Home | History | Annotate | Download | only in enc
      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Author: Jyrki Alakuijala (jyrki (at) google.com)
     11 //
     12 
     13 #include <assert.h>
     14 #include <math.h>
     15 
     16 #include "./backward_references.h"
     17 #include "./histogram.h"
     18 #include "../dsp/lossless.h"
     19 #include "../dsp/dsp.h"
     20 #include "../utils/color_cache.h"
     21 #include "../utils/utils.h"
     22 
     23 #define VALUES_IN_BYTE 256
     24 
     25 #define MIN_BLOCK_SIZE 256  // minimum block size for backward references
     26 
     27 #define MAX_ENTROPY    (1e30f)
     28 
     29 // 1M window (4M bytes) minus 120 special codes for short distances.
     30 #define WINDOW_SIZE ((1 << 20) - 120)
     31 
     32 // Bounds for the match length.
     33 #define MIN_LENGTH 2
     34 #define MAX_LENGTH 4096
     35 
     36 // -----------------------------------------------------------------------------
     37 
     38 static const uint8_t plane_to_code_lut[128] = {
     39  96,   73,  55,  39,  23,  13,   5,  1,  255, 255, 255, 255, 255, 255, 255, 255,
     40  101,  78,  58,  42,  26,  16,   8,  2,    0,   3,  9,   17,  27,  43,  59,  79,
     41  102,  86,  62,  46,  32,  20,  10,  6,    4,   7,  11,  21,  33,  47,  63,  87,
     42  105,  90,  70,  52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53,  71,  91,
     43  110,  99,  82,  66,  48,  35,  30,  24,  22,  25,  31,  36,  49,  67,  83, 100,
     44  115, 108,  94,  76,  64,  50,  44,  40,  34,  41,  45,  51,  65,  77,  95, 109,
     45  118, 113, 103,  92,  80,  68,  60,  56,  54,  57,  61,  69,  81,  93, 104, 114,
     46  119, 116, 111, 106,  97,  88,  84,  74,  72,  75,  85,  89,  98, 107, 112, 117
     47 };
     48 
     49 static int DistanceToPlaneCode(int xsize, int dist) {
     50   const int yoffset = dist / xsize;
     51   const int xoffset = dist - yoffset * xsize;
     52   if (xoffset <= 8 && yoffset < 8) {
     53     return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
     54   } else if (xoffset > xsize - 8 && yoffset < 7) {
     55     return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
     56   }
     57   return dist + 120;
     58 }
     59 
     60 // Returns the exact index where array1 and array2 are different if this
     61 // index is strictly superior to best_len_match. Otherwise, it returns 0.
     62 // If no two elements are the same, it returns max_limit.
     63 static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
     64                                        const uint32_t* const array2,
     65                                        int best_len_match,
     66                                        int max_limit) {
     67   int match_len;
     68 
     69   // Before 'expensive' linear match, check if the two arrays match at the
     70   // current best length index.
     71   if (array1[best_len_match] != array2[best_len_match]) return 0;
     72 
     73 #if defined(WEBP_USE_SSE2)
     74   // Check if anything is different up to best_len_match excluded.
     75   // memcmp seems to be slower on ARM so it is disabled for now.
     76   if (memcmp(array1, array2, best_len_match * sizeof(*array1))) return 0;
     77   match_len = best_len_match + 1;
     78 #else
     79   match_len = 0;
     80 #endif
     81 
     82   while (match_len < max_limit && array1[match_len] == array2[match_len]) {
     83     ++match_len;
     84   }
     85   return match_len;
     86 }
     87 
     88 // -----------------------------------------------------------------------------
     89 //  VP8LBackwardRefs
     90 
     91 struct PixOrCopyBlock {
     92   PixOrCopyBlock* next_;   // next block (or NULL)
     93   PixOrCopy* start_;       // data start
     94   int size_;               // currently used size
     95 };
     96 
     97 static void ClearBackwardRefs(VP8LBackwardRefs* const refs) {
     98   assert(refs != NULL);
     99   if (refs->tail_ != NULL) {
    100     *refs->tail_ = refs->free_blocks_;  // recycle all blocks at once
    101   }
    102   refs->free_blocks_ = refs->refs_;
    103   refs->tail_ = &refs->refs_;
    104   refs->last_block_ = NULL;
    105   refs->refs_ = NULL;
    106 }
    107 
    108 void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
    109   assert(refs != NULL);
    110   ClearBackwardRefs(refs);
    111   while (refs->free_blocks_ != NULL) {
    112     PixOrCopyBlock* const next = refs->free_blocks_->next_;
    113     WebPSafeFree(refs->free_blocks_);
    114     refs->free_blocks_ = next;
    115   }
    116 }
    117 
    118 void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
    119   assert(refs != NULL);
    120   memset(refs, 0, sizeof(*refs));
    121   refs->tail_ = &refs->refs_;
    122   refs->block_size_ =
    123       (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
    124 }
    125 
    126 VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
    127   VP8LRefsCursor c;
    128   c.cur_block_ = refs->refs_;
    129   if (refs->refs_ != NULL) {
    130     c.cur_pos = c.cur_block_->start_;
    131     c.last_pos_ = c.cur_pos + c.cur_block_->size_;
    132   } else {
    133     c.cur_pos = NULL;
    134     c.last_pos_ = NULL;
    135   }
    136   return c;
    137 }
    138 
    139 void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
    140   PixOrCopyBlock* const b = c->cur_block_->next_;
    141   c->cur_pos = (b == NULL) ? NULL : b->start_;
    142   c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
    143   c->cur_block_ = b;
    144 }
    145 
    146 // Create a new block, either from the free list or allocated
    147 static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
    148   PixOrCopyBlock* b = refs->free_blocks_;
    149   if (b == NULL) {   // allocate new memory chunk
    150     const size_t total_size =
    151         sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
    152     b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
    153     if (b == NULL) {
    154       refs->error_ |= 1;
    155       return NULL;
    156     }
    157     b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
    158   } else {  // recycle from free-list
    159     refs->free_blocks_ = b->next_;
    160   }
    161   *refs->tail_ = b;
    162   refs->tail_ = &b->next_;
    163   refs->last_block_ = b;
    164   b->next_ = NULL;
    165   b->size_ = 0;
    166   return b;
    167 }
    168 
    169 static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
    170                                               const PixOrCopy v) {
    171   PixOrCopyBlock* b = refs->last_block_;
    172   if (b == NULL || b->size_ == refs->block_size_) {
    173     b = BackwardRefsNewBlock(refs);
    174     if (b == NULL) return;   // refs->error_ is set
    175   }
    176   b->start_[b->size_++] = v;
    177 }
    178 
    179 int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
    180                          VP8LBackwardRefs* const dst) {
    181   const PixOrCopyBlock* b = src->refs_;
    182   ClearBackwardRefs(dst);
    183   assert(src->block_size_ == dst->block_size_);
    184   while (b != NULL) {
    185     PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst);
    186     if (new_b == NULL) return 0;   // dst->error_ is set
    187     memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_));
    188     new_b->size_ = b->size_;
    189     b = b->next_;
    190   }
    191   return 1;
    192 }
    193 
    194 // -----------------------------------------------------------------------------
    195 // Hash chains
    196 
    197 // initialize as empty
    198 static void HashChainReset(VP8LHashChain* const p) {
    199   assert(p != NULL);
    200   // Set the int32_t arrays to -1.
    201   memset(p->chain_, 0xff, p->size_ * sizeof(*p->chain_));
    202   memset(p->hash_to_first_index_, 0xff,
    203          HASH_SIZE * sizeof(*p->hash_to_first_index_));
    204 }
    205 
    206 int VP8LHashChainInit(VP8LHashChain* const p, int size) {
    207   assert(p->size_ == 0);
    208   assert(p->chain_ == NULL);
    209   assert(size > 0);
    210   p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_));
    211   if (p->chain_ == NULL) return 0;
    212   p->size_ = size;
    213   HashChainReset(p);
    214   return 1;
    215 }
    216 
    217 void VP8LHashChainClear(VP8LHashChain* const p) {
    218   assert(p != NULL);
    219   WebPSafeFree(p->chain_);
    220   p->size_ = 0;
    221   p->chain_ = NULL;
    222 }
    223 
    224 // -----------------------------------------------------------------------------
    225 
    226 #define HASH_MULTIPLIER_HI (0xc6a4a793U)
    227 #define HASH_MULTIPLIER_LO (0x5bd1e996U)
    228 
    229 static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) {
    230   uint32_t key;
    231   key  = argb[1] * HASH_MULTIPLIER_HI;
    232   key += argb[0] * HASH_MULTIPLIER_LO;
    233   key = key >> (32 - HASH_BITS);
    234   return key;
    235 }
    236 
    237 // Insertion of two pixels at a time.
    238 static void HashChainInsert(VP8LHashChain* const p,
    239                             const uint32_t* const argb, int pos) {
    240   const uint32_t hash_code = GetPixPairHash64(argb);
    241   p->chain_[pos] = p->hash_to_first_index_[hash_code];
    242   p->hash_to_first_index_[hash_code] = pos;
    243 }
    244 
    245 // Returns the maximum number of hash chain lookups to do for a
    246 // given compression quality. Return value in range [6, 86].
    247 static int GetMaxItersForQuality(int quality, int low_effort) {
    248   return (low_effort ? 6 : 8) + (quality * quality) / 128;
    249 }
    250 
    251 static int GetWindowSizeForHashChain(int quality, int xsize) {
    252   const int max_window_size = (quality > 75) ? WINDOW_SIZE
    253                             : (quality > 50) ? (xsize << 8)
    254                             : (quality > 25) ? (xsize << 6)
    255                             : (xsize << 4);
    256   assert(xsize > 0);
    257   return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
    258 }
    259 
    260 static WEBP_INLINE int MaxFindCopyLength(int len) {
    261   return (len < MAX_LENGTH) ? len : MAX_LENGTH;
    262 }
    263 
    264 static void HashChainFindOffset(const VP8LHashChain* const p, int base_position,
    265                                 const uint32_t* const argb, int len,
    266                                 int window_size, int* const distance_ptr) {
    267   const uint32_t* const argb_start = argb + base_position;
    268   const int min_pos =
    269       (base_position > window_size) ? base_position - window_size : 0;
    270   int pos;
    271   assert(len <= MAX_LENGTH);
    272   for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
    273        pos >= min_pos;
    274        pos = p->chain_[pos]) {
    275     const int curr_length =
    276         FindMatchLength(argb + pos, argb_start, len - 1, len);
    277     if (curr_length == len) break;
    278   }
    279   *distance_ptr = base_position - pos;
    280 }
    281 
    282 static int HashChainFindCopy(const VP8LHashChain* const p,
    283                              int base_position,
    284                              const uint32_t* const argb, int max_len,
    285                              int window_size, int iter_max,
    286                              int* const distance_ptr,
    287                              int* const length_ptr) {
    288   const uint32_t* const argb_start = argb + base_position;
    289   int iter = iter_max;
    290   int best_length = 0;
    291   int best_distance = 0;
    292   const int min_pos =
    293       (base_position > window_size) ? base_position - window_size : 0;
    294   int pos;
    295   int length_max = 256;
    296   if (max_len < length_max) {
    297     length_max = max_len;
    298   }
    299   for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
    300        pos >= min_pos;
    301        pos = p->chain_[pos]) {
    302     int curr_length;
    303     int distance;
    304     if (--iter < 0) {
    305       break;
    306     }
    307 
    308     curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len);
    309     if (best_length < curr_length) {
    310       distance = base_position - pos;
    311       best_length = curr_length;
    312       best_distance = distance;
    313       if (curr_length >= length_max) {
    314         break;
    315       }
    316     }
    317   }
    318   *distance_ptr = best_distance;
    319   *length_ptr = best_length;
    320   return (best_length >= MIN_LENGTH);
    321 }
    322 
    323 static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
    324                                          VP8LColorCache* const hashers,
    325                                          VP8LBackwardRefs* const refs) {
    326   PixOrCopy v;
    327   if (use_color_cache) {
    328     const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
    329     if (VP8LColorCacheLookup(hashers, key) == pixel) {
    330       v = PixOrCopyCreateCacheIdx(key);
    331     } else {
    332       v = PixOrCopyCreateLiteral(pixel);
    333       VP8LColorCacheSet(hashers, key, pixel);
    334     }
    335   } else {
    336     v = PixOrCopyCreateLiteral(pixel);
    337   }
    338   BackwardRefsCursorAdd(refs, v);
    339 }
    340 
    341 static int BackwardReferencesRle(int xsize, int ysize,
    342                                  const uint32_t* const argb,
    343                                  int cache_bits, VP8LBackwardRefs* const refs) {
    344   const int pix_count = xsize * ysize;
    345   int i, k;
    346   const int use_color_cache = (cache_bits > 0);
    347   VP8LColorCache hashers;
    348 
    349   if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
    350     return 0;
    351   }
    352   ClearBackwardRefs(refs);
    353   // Add first pixel as literal.
    354   AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
    355   i = 1;
    356   while (i < pix_count) {
    357     const int max_len = MaxFindCopyLength(pix_count - i);
    358     const int kMinLength = 4;
    359     const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
    360     const int prev_row_len = (i < xsize) ? 0 :
    361         FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
    362     if (rle_len >= prev_row_len && rle_len >= kMinLength) {
    363       BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
    364       // We don't need to update the color cache here since it is always the
    365       // same pixel being copied, and that does not change the color cache
    366       // state.
    367       i += rle_len;
    368     } else if (prev_row_len >= kMinLength) {
    369       BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
    370       if (use_color_cache) {
    371         for (k = 0; k < prev_row_len; ++k) {
    372           VP8LColorCacheInsert(&hashers, argb[i + k]);
    373         }
    374       }
    375       i += prev_row_len;
    376     } else {
    377       AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
    378       i++;
    379     }
    380   }
    381   if (use_color_cache) VP8LColorCacheClear(&hashers);
    382   return !refs->error_;
    383 }
    384 
    385 static int BackwardReferencesLz77(int xsize, int ysize,
    386                                   const uint32_t* const argb, int cache_bits,
    387                                   int quality, int low_effort,
    388                                   VP8LHashChain* const hash_chain,
    389                                   VP8LBackwardRefs* const refs) {
    390   int i;
    391   int ok = 0;
    392   int cc_init = 0;
    393   const int use_color_cache = (cache_bits > 0);
    394   const int pix_count = xsize * ysize;
    395   VP8LColorCache hashers;
    396   int iter_max = GetMaxItersForQuality(quality, low_effort);
    397   const int window_size = GetWindowSizeForHashChain(quality, xsize);
    398   int min_matches = 32;
    399 
    400   if (use_color_cache) {
    401     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    402     if (!cc_init) goto Error;
    403   }
    404   ClearBackwardRefs(refs);
    405   HashChainReset(hash_chain);
    406   for (i = 0; i < pix_count - 2; ) {
    407     // Alternative#1: Code the pixels starting at 'i' using backward reference.
    408     int offset = 0;
    409     int len = 0;
    410     const int max_len = MaxFindCopyLength(pix_count - i);
    411     HashChainFindCopy(hash_chain, i, argb, max_len, window_size,
    412                       iter_max, &offset, &len);
    413     if (len > MIN_LENGTH || (len == MIN_LENGTH && offset <= 512)) {
    414       int offset2 = 0;
    415       int len2 = 0;
    416       int k;
    417       min_matches = 8;
    418       HashChainInsert(hash_chain, &argb[i], i);
    419       if ((len < (max_len >> 2)) && !low_effort) {
    420         // Evaluate Alternative#2: Insert the pixel at 'i' as literal, and code
    421         // the pixels starting at 'i + 1' using backward reference.
    422         HashChainFindCopy(hash_chain, i + 1, argb, max_len - 1,
    423                           window_size, iter_max, &offset2,
    424                           &len2);
    425         if (len2 > len + 1) {
    426           AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
    427           i++;  // Backward reference to be done for next pixel.
    428           len = len2;
    429           offset = offset2;
    430         }
    431       }
    432       BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
    433       if (use_color_cache) {
    434         for (k = 0; k < len; ++k) {
    435           VP8LColorCacheInsert(&hashers, argb[i + k]);
    436         }
    437       }
    438       // Add to the hash_chain (but cannot add the last pixel).
    439       if (offset >= 3 && offset != xsize) {
    440         const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
    441         for (k = 2; k < last - 8; k += 2) {
    442           HashChainInsert(hash_chain, &argb[i + k], i + k);
    443         }
    444         for (; k < last; ++k) {
    445           HashChainInsert(hash_chain, &argb[i + k], i + k);
    446         }
    447       }
    448       i += len;
    449     } else {
    450       AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
    451       HashChainInsert(hash_chain, &argb[i], i);
    452       ++i;
    453       --min_matches;
    454       if (min_matches <= 0) {
    455         AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
    456         HashChainInsert(hash_chain, &argb[i], i);
    457         ++i;
    458       }
    459     }
    460   }
    461   while (i < pix_count) {
    462     // Handle the last pixel(s).
    463     AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
    464     ++i;
    465   }
    466 
    467   ok = !refs->error_;
    468  Error:
    469   if (cc_init) VP8LColorCacheClear(&hashers);
    470   return ok;
    471 }
    472 
    473 // -----------------------------------------------------------------------------
    474 
    475 typedef struct {
    476   double alpha_[VALUES_IN_BYTE];
    477   double red_[VALUES_IN_BYTE];
    478   double blue_[VALUES_IN_BYTE];
    479   double distance_[NUM_DISTANCE_CODES];
    480   double* literal_;
    481 } CostModel;
    482 
    483 static int BackwardReferencesTraceBackwards(
    484     int xsize, int ysize, const uint32_t* const argb, int quality,
    485     int cache_bits, VP8LHashChain* const hash_chain,
    486     VP8LBackwardRefs* const refs);
    487 
    488 static void ConvertPopulationCountTableToBitEstimates(
    489     int num_symbols, const uint32_t population_counts[], double output[]) {
    490   uint32_t sum = 0;
    491   int nonzeros = 0;
    492   int i;
    493   for (i = 0; i < num_symbols; ++i) {
    494     sum += population_counts[i];
    495     if (population_counts[i] > 0) {
    496       ++nonzeros;
    497     }
    498   }
    499   if (nonzeros <= 1) {
    500     memset(output, 0, num_symbols * sizeof(*output));
    501   } else {
    502     const double logsum = VP8LFastLog2(sum);
    503     for (i = 0; i < num_symbols; ++i) {
    504       output[i] = logsum - VP8LFastLog2(population_counts[i]);
    505     }
    506   }
    507 }
    508 
    509 static int CostModelBuild(CostModel* const m, int cache_bits,
    510                           VP8LBackwardRefs* const refs) {
    511   int ok = 0;
    512   VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
    513   if (histo == NULL) goto Error;
    514 
    515   VP8LHistogramCreate(histo, refs, cache_bits);
    516 
    517   ConvertPopulationCountTableToBitEstimates(
    518       VP8LHistogramNumCodes(histo->palette_code_bits_),
    519       histo->literal_, m->literal_);
    520   ConvertPopulationCountTableToBitEstimates(
    521       VALUES_IN_BYTE, histo->red_, m->red_);
    522   ConvertPopulationCountTableToBitEstimates(
    523       VALUES_IN_BYTE, histo->blue_, m->blue_);
    524   ConvertPopulationCountTableToBitEstimates(
    525       VALUES_IN_BYTE, histo->alpha_, m->alpha_);
    526   ConvertPopulationCountTableToBitEstimates(
    527       NUM_DISTANCE_CODES, histo->distance_, m->distance_);
    528   ok = 1;
    529 
    530  Error:
    531   VP8LFreeHistogram(histo);
    532   return ok;
    533 }
    534 
    535 static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
    536   return m->alpha_[v >> 24] +
    537          m->red_[(v >> 16) & 0xff] +
    538          m->literal_[(v >> 8) & 0xff] +
    539          m->blue_[v & 0xff];
    540 }
    541 
    542 static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
    543   const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
    544   return m->literal_[literal_idx];
    545 }
    546 
    547 static WEBP_INLINE double GetLengthCost(const CostModel* const m,
    548                                         uint32_t length) {
    549   int code, extra_bits;
    550   VP8LPrefixEncodeBits(length, &code, &extra_bits);
    551   return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
    552 }
    553 
    554 static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
    555                                           uint32_t distance) {
    556   int code, extra_bits;
    557   VP8LPrefixEncodeBits(distance, &code, &extra_bits);
    558   return m->distance_[code] + extra_bits;
    559 }
    560 
    561 static void AddSingleLiteralWithCostModel(
    562     const uint32_t* const argb, VP8LHashChain* const hash_chain,
    563     VP8LColorCache* const hashers, const CostModel* const cost_model, int idx,
    564     int is_last, int use_color_cache, double prev_cost, float* const cost,
    565     uint16_t* const dist_array) {
    566   double cost_val = prev_cost;
    567   const uint32_t color = argb[0];
    568   if (!is_last) {
    569     HashChainInsert(hash_chain, argb, idx);
    570   }
    571   if (use_color_cache && VP8LColorCacheContains(hashers, color)) {
    572     const double mul0 = 0.68;
    573     const int ix = VP8LColorCacheGetIndex(hashers, color);
    574     cost_val += GetCacheCost(cost_model, ix) * mul0;
    575   } else {
    576     const double mul1 = 0.82;
    577     if (use_color_cache) VP8LColorCacheInsert(hashers, color);
    578     cost_val += GetLiteralCost(cost_model, color) * mul1;
    579   }
    580   if (cost[idx] > cost_val) {
    581     cost[idx] = (float)cost_val;
    582     dist_array[idx] = 1;  // only one is inserted.
    583   }
    584 }
    585 
    586 static int BackwardReferencesHashChainDistanceOnly(
    587     int xsize, int ysize, const uint32_t* const argb,
    588     int quality, int cache_bits, VP8LHashChain* const hash_chain,
    589     VP8LBackwardRefs* const refs, uint16_t* const dist_array) {
    590   int i;
    591   int ok = 0;
    592   int cc_init = 0;
    593   const int pix_count = xsize * ysize;
    594   const int use_color_cache = (cache_bits > 0);
    595   float* const cost =
    596       (float*)WebPSafeMalloc(pix_count, sizeof(*cost));
    597   const size_t literal_array_size = sizeof(double) *
    598       (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
    599        ((cache_bits > 0) ? (1 << cache_bits) : 0));
    600   const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
    601   CostModel* const cost_model =
    602       (CostModel*)WebPSafeMalloc(1ULL, cost_model_size);
    603   VP8LColorCache hashers;
    604   const int skip_length = 32 + quality;
    605   const int skip_min_distance_code = 2;
    606   int iter_max = GetMaxItersForQuality(quality, 0);
    607   const int window_size = GetWindowSizeForHashChain(quality, xsize);
    608 
    609   if (cost == NULL || cost_model == NULL) goto Error;
    610 
    611   cost_model->literal_ = (double*)(cost_model + 1);
    612   if (use_color_cache) {
    613     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    614     if (!cc_init) goto Error;
    615   }
    616 
    617   if (!CostModelBuild(cost_model, cache_bits, refs)) {
    618     goto Error;
    619   }
    620 
    621   for (i = 0; i < pix_count; ++i) cost[i] = 1e38f;
    622 
    623   // We loop one pixel at a time, but store all currently best points to
    624   // non-processed locations from this point.
    625   dist_array[0] = 0;
    626   HashChainReset(hash_chain);
    627   // Add first pixel as literal.
    628   AddSingleLiteralWithCostModel(argb + 0, hash_chain, &hashers, cost_model, 0,
    629                                 0, use_color_cache, 0.0, cost, dist_array);
    630   for (i = 1; i < pix_count - 1; ++i) {
    631     int offset = 0;
    632     int len = 0;
    633     double prev_cost = cost[i - 1];
    634     const int max_len = MaxFindCopyLength(pix_count - i);
    635     HashChainFindCopy(hash_chain, i, argb, max_len, window_size,
    636                       iter_max, &offset, &len);
    637     if (len >= MIN_LENGTH) {
    638       const int code = DistanceToPlaneCode(xsize, offset);
    639       const double distance_cost =
    640           prev_cost + GetDistanceCost(cost_model, code);
    641       int k;
    642       for (k = 1; k < len; ++k) {
    643         const double cost_val = distance_cost + GetLengthCost(cost_model, k);
    644         if (cost[i + k] > cost_val) {
    645           cost[i + k] = (float)cost_val;
    646           dist_array[i + k] = k + 1;
    647         }
    648       }
    649       // This if is for speedup only. It roughly doubles the speed, and
    650       // makes compression worse by .1 %.
    651       if (len >= skip_length && code <= skip_min_distance_code) {
    652         // Long copy for short distances, let's skip the middle
    653         // lookups for better copies.
    654         // 1) insert the hashes.
    655         if (use_color_cache) {
    656           for (k = 0; k < len; ++k) {
    657             VP8LColorCacheInsert(&hashers, argb[i + k]);
    658           }
    659         }
    660         // 2) Add to the hash_chain (but cannot add the last pixel)
    661         {
    662           const int last = (len + i < pix_count - 1) ? len + i
    663                                                      : pix_count - 1;
    664           for (k = i; k < last; ++k) {
    665             HashChainInsert(hash_chain, &argb[k], k);
    666           }
    667         }
    668         // 3) jump.
    669         i += len - 1;  // for loop does ++i, thus -1 here.
    670         goto next_symbol;
    671       }
    672       if (len != MIN_LENGTH) {
    673         int code_min_length;
    674         double cost_total;
    675         HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size,
    676                             &offset);
    677         code_min_length = DistanceToPlaneCode(xsize, offset);
    678         cost_total = prev_cost +
    679             GetDistanceCost(cost_model, code_min_length) +
    680             GetLengthCost(cost_model, 1);
    681         if (cost[i + 1] > cost_total) {
    682           cost[i + 1] = (float)cost_total;
    683           dist_array[i + 1] = 2;
    684         }
    685       }
    686     }
    687     AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i,
    688                                   0, use_color_cache, prev_cost, cost,
    689                                   dist_array);
    690  next_symbol: ;
    691   }
    692   // Handle the last pixel.
    693   if (i == (pix_count - 1)) {
    694     AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i,
    695                                   1, use_color_cache, cost[pix_count - 2], cost,
    696                                   dist_array);
    697   }
    698   ok = !refs->error_;
    699  Error:
    700   if (cc_init) VP8LColorCacheClear(&hashers);
    701   WebPSafeFree(cost_model);
    702   WebPSafeFree(cost);
    703   return ok;
    704 }
    705 
    706 // We pack the path at the end of *dist_array and return
    707 // a pointer to this part of the array. Example:
    708 // dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
    709 static void TraceBackwards(uint16_t* const dist_array,
    710                            int dist_array_size,
    711                            uint16_t** const chosen_path,
    712                            int* const chosen_path_size) {
    713   uint16_t* path = dist_array + dist_array_size;
    714   uint16_t* cur = dist_array + dist_array_size - 1;
    715   while (cur >= dist_array) {
    716     const int k = *cur;
    717     --path;
    718     *path = k;
    719     cur -= k;
    720   }
    721   *chosen_path = path;
    722   *chosen_path_size = (int)(dist_array + dist_array_size - path);
    723 }
    724 
    725 static int BackwardReferencesHashChainFollowChosenPath(
    726     int xsize, int ysize, const uint32_t* const argb,
    727     int quality, int cache_bits,
    728     const uint16_t* const chosen_path, int chosen_path_size,
    729     VP8LHashChain* const hash_chain,
    730     VP8LBackwardRefs* const refs) {
    731   const int pix_count = xsize * ysize;
    732   const int use_color_cache = (cache_bits > 0);
    733   int ix;
    734   int i = 0;
    735   int ok = 0;
    736   int cc_init = 0;
    737   const int window_size = GetWindowSizeForHashChain(quality, xsize);
    738   VP8LColorCache hashers;
    739 
    740   if (use_color_cache) {
    741     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    742     if (!cc_init) goto Error;
    743   }
    744 
    745   ClearBackwardRefs(refs);
    746   HashChainReset(hash_chain);
    747   for (ix = 0; ix < chosen_path_size; ++ix) {
    748     int offset = 0;
    749     const int len = chosen_path[ix];
    750     if (len != 1) {
    751       int k;
    752       HashChainFindOffset(hash_chain, i, argb, len, window_size, &offset);
    753       BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
    754       if (use_color_cache) {
    755         for (k = 0; k < len; ++k) {
    756           VP8LColorCacheInsert(&hashers, argb[i + k]);
    757         }
    758       }
    759       {
    760         const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
    761         for (k = 0; k < last; ++k) {
    762           HashChainInsert(hash_chain, &argb[i + k], i + k);
    763         }
    764       }
    765       i += len;
    766     } else {
    767       PixOrCopy v;
    768       if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
    769         // push pixel as a color cache index
    770         const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]);
    771         v = PixOrCopyCreateCacheIdx(idx);
    772       } else {
    773         if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
    774         v = PixOrCopyCreateLiteral(argb[i]);
    775       }
    776       BackwardRefsCursorAdd(refs, v);
    777       if (i + 1 < pix_count) {
    778         HashChainInsert(hash_chain, &argb[i], i);
    779       }
    780       ++i;
    781     }
    782   }
    783   ok = !refs->error_;
    784  Error:
    785   if (cc_init) VP8LColorCacheClear(&hashers);
    786   return ok;
    787 }
    788 
    789 // Returns 1 on success.
    790 static int BackwardReferencesTraceBackwards(int xsize, int ysize,
    791                                             const uint32_t* const argb,
    792                                             int quality, int cache_bits,
    793                                             VP8LHashChain* const hash_chain,
    794                                             VP8LBackwardRefs* const refs) {
    795   int ok = 0;
    796   const int dist_array_size = xsize * ysize;
    797   uint16_t* chosen_path = NULL;
    798   int chosen_path_size = 0;
    799   uint16_t* dist_array =
    800       (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
    801 
    802   if (dist_array == NULL) goto Error;
    803 
    804   if (!BackwardReferencesHashChainDistanceOnly(
    805       xsize, ysize, argb, quality, cache_bits, hash_chain,
    806       refs, dist_array)) {
    807     goto Error;
    808   }
    809   TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
    810   if (!BackwardReferencesHashChainFollowChosenPath(
    811       xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size,
    812       hash_chain, refs)) {
    813     goto Error;
    814   }
    815   ok = 1;
    816  Error:
    817   WebPSafeFree(dist_array);
    818   return ok;
    819 }
    820 
    821 static void BackwardReferences2DLocality(int xsize,
    822                                          const VP8LBackwardRefs* const refs) {
    823   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
    824   while (VP8LRefsCursorOk(&c)) {
    825     if (PixOrCopyIsCopy(c.cur_pos)) {
    826       const int dist = c.cur_pos->argb_or_distance;
    827       const int transformed_dist = DistanceToPlaneCode(xsize, dist);
    828       c.cur_pos->argb_or_distance = transformed_dist;
    829     }
    830     VP8LRefsCursorNext(&c);
    831   }
    832 }
    833 
    834 // Returns entropy for the given cache bits.
    835 static double ComputeCacheEntropy(const uint32_t* argb,
    836                                   const VP8LBackwardRefs* const refs,
    837                                   int cache_bits) {
    838   const int use_color_cache = (cache_bits > 0);
    839   int cc_init = 0;
    840   double entropy = MAX_ENTROPY;
    841   const double kSmallPenaltyForLargeCache = 4.0;
    842   VP8LColorCache hashers;
    843   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
    844   VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits);
    845   if (histo == NULL) goto Error;
    846 
    847   if (use_color_cache) {
    848     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    849     if (!cc_init) goto Error;
    850   }
    851   if (!use_color_cache) {
    852     while (VP8LRefsCursorOk(&c)) {
    853       VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
    854       VP8LRefsCursorNext(&c);
    855     }
    856   } else {
    857     while (VP8LRefsCursorOk(&c)) {
    858       const PixOrCopy* const v = c.cur_pos;
    859       if (PixOrCopyIsLiteral(v)) {
    860         const uint32_t pix = *argb++;
    861         const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix);
    862         if (VP8LColorCacheLookup(&hashers, key) == pix) {
    863           ++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
    864         } else {
    865           VP8LColorCacheSet(&hashers, key, pix);
    866           ++histo->blue_[pix & 0xff];
    867           ++histo->literal_[(pix >> 8) & 0xff];
    868           ++histo->red_[(pix >> 16) & 0xff];
    869           ++histo->alpha_[pix >> 24];
    870         }
    871       } else {
    872         int len = PixOrCopyLength(v);
    873         int code, extra_bits;
    874         VP8LPrefixEncodeBits(len, &code, &extra_bits);
    875         ++histo->literal_[NUM_LITERAL_CODES + code];
    876         VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
    877         ++histo->distance_[code];
    878         do {
    879           VP8LColorCacheInsert(&hashers, *argb++);
    880         } while(--len != 0);
    881       }
    882       VP8LRefsCursorNext(&c);
    883     }
    884   }
    885   entropy = VP8LHistogramEstimateBits(histo) +
    886       kSmallPenaltyForLargeCache * cache_bits;
    887  Error:
    888   if (cc_init) VP8LColorCacheClear(&hashers);
    889   VP8LFreeHistogram(histo);
    890   return entropy;
    891 }
    892 
    893 // Evaluate optimal cache bits for the local color cache.
    894 // The input *best_cache_bits sets the maximum cache bits to use (passing 0
    895 // implies disabling the local color cache). The local color cache is also
    896 // disabled for the lower (<= 25) quality.
    897 // Returns 0 in case of memory error.
    898 static int CalculateBestCacheSize(const uint32_t* const argb,
    899                                   int xsize, int ysize, int quality,
    900                                   VP8LHashChain* const hash_chain,
    901                                   VP8LBackwardRefs* const refs,
    902                                   int* const lz77_computed,
    903                                   int* const best_cache_bits) {
    904   int eval_low = 1;
    905   int eval_high = 1;
    906   double entropy_low = MAX_ENTROPY;
    907   double entropy_high = MAX_ENTROPY;
    908   const double cost_mul = 5e-4;
    909   int cache_bits_low = 0;
    910   int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits;
    911 
    912   assert(cache_bits_high <= MAX_COLOR_CACHE_BITS);
    913 
    914   *lz77_computed = 0;
    915   if (cache_bits_high == 0) {
    916     *best_cache_bits = 0;
    917     // Local color cache is disabled.
    918     return 1;
    919   }
    920   if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, quality, 0,
    921                               hash_chain, refs)) {
    922     return 0;
    923   }
    924   // Do a binary search to find the optimal entropy for cache_bits.
    925   while (eval_low || eval_high) {
    926     if (eval_low) {
    927       entropy_low = ComputeCacheEntropy(argb, refs, cache_bits_low);
    928       entropy_low += entropy_low * cache_bits_low * cost_mul;
    929       eval_low = 0;
    930     }
    931     if (eval_high) {
    932       entropy_high = ComputeCacheEntropy(argb, refs, cache_bits_high);
    933       entropy_high += entropy_high * cache_bits_high * cost_mul;
    934       eval_high = 0;
    935     }
    936     if (entropy_high < entropy_low) {
    937       const int prev_cache_bits_low = cache_bits_low;
    938       *best_cache_bits = cache_bits_high;
    939       cache_bits_low = (cache_bits_low + cache_bits_high) / 2;
    940       if (cache_bits_low != prev_cache_bits_low) eval_low = 1;
    941     } else {
    942       *best_cache_bits = cache_bits_low;
    943       cache_bits_high = (cache_bits_low + cache_bits_high) / 2;
    944       if (cache_bits_high != cache_bits_low) eval_high = 1;
    945     }
    946   }
    947   *lz77_computed = 1;
    948   return 1;
    949 }
    950 
    951 // Update (in-place) backward references for specified cache_bits.
    952 static int BackwardRefsWithLocalCache(const uint32_t* const argb,
    953                                       int cache_bits,
    954                                       VP8LBackwardRefs* const refs) {
    955   int pixel_index = 0;
    956   VP8LColorCache hashers;
    957   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
    958   if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
    959 
    960   while (VP8LRefsCursorOk(&c)) {
    961     PixOrCopy* const v = c.cur_pos;
    962     if (PixOrCopyIsLiteral(v)) {
    963       const uint32_t argb_literal = v->argb_or_distance;
    964       if (VP8LColorCacheContains(&hashers, argb_literal)) {
    965         const int ix = VP8LColorCacheGetIndex(&hashers, argb_literal);
    966         *v = PixOrCopyCreateCacheIdx(ix);
    967       } else {
    968         VP8LColorCacheInsert(&hashers, argb_literal);
    969       }
    970       ++pixel_index;
    971     } else {
    972       // refs was created without local cache, so it can not have cache indexes.
    973       int k;
    974       assert(PixOrCopyIsCopy(v));
    975       for (k = 0; k < v->len; ++k) {
    976         VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
    977       }
    978     }
    979     VP8LRefsCursorNext(&c);
    980   }
    981   VP8LColorCacheClear(&hashers);
    982   return 1;
    983 }
    984 
    985 static VP8LBackwardRefs* GetBackwardReferencesLowEffort(
    986     int width, int height, const uint32_t* const argb, int quality,
    987     int* const cache_bits, VP8LHashChain* const hash_chain,
    988     VP8LBackwardRefs refs_array[2]) {
    989   VP8LBackwardRefs* refs_lz77 = &refs_array[0];
    990   *cache_bits = 0;
    991   if (!BackwardReferencesLz77(width, height, argb, 0, quality,
    992                               1 /* Low effort. */, hash_chain, refs_lz77)) {
    993     return NULL;
    994   }
    995   BackwardReferences2DLocality(width, refs_lz77);
    996   return refs_lz77;
    997 }
    998 
    999 static VP8LBackwardRefs* GetBackwardReferences(
   1000     int width, int height, const uint32_t* const argb, int quality,
   1001     int* const cache_bits, VP8LHashChain* const hash_chain,
   1002     VP8LBackwardRefs refs_array[2]) {
   1003   int lz77_is_useful;
   1004   int lz77_computed;
   1005   double bit_cost_lz77, bit_cost_rle;
   1006   VP8LBackwardRefs* best = NULL;
   1007   VP8LBackwardRefs* refs_lz77 = &refs_array[0];
   1008   VP8LBackwardRefs* refs_rle = &refs_array[1];
   1009   VP8LHistogram* histo = NULL;
   1010 
   1011   if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain,
   1012                               refs_lz77, &lz77_computed, cache_bits)) {
   1013     goto Error;
   1014   }
   1015 
   1016   if (lz77_computed) {
   1017     // Transform refs_lz77 for the optimized cache_bits.
   1018     if (*cache_bits > 0) {
   1019       if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) {
   1020         goto Error;
   1021       }
   1022     }
   1023   } else {
   1024     if (!BackwardReferencesLz77(width, height, argb, *cache_bits, quality,
   1025                                 0 /* Low effort. */, hash_chain, refs_lz77)) {
   1026       goto Error;
   1027     }
   1028   }
   1029 
   1030   if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) {
   1031     goto Error;
   1032   }
   1033 
   1034   histo = VP8LAllocateHistogram(*cache_bits);
   1035   if (histo == NULL) goto Error;
   1036 
   1037   {
   1038     // Evaluate LZ77 coding.
   1039     VP8LHistogramCreate(histo, refs_lz77, *cache_bits);
   1040     bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
   1041     // Evaluate RLE coding.
   1042     VP8LHistogramCreate(histo, refs_rle, *cache_bits);
   1043     bit_cost_rle = VP8LHistogramEstimateBits(histo);
   1044     // Decide if LZ77 is useful.
   1045     lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
   1046   }
   1047 
   1048   // Choose appropriate backward reference.
   1049   if (lz77_is_useful) {
   1050     // TraceBackwards is costly. Don't execute it at lower quality.
   1051     const int try_lz77_trace_backwards = (quality >= 25);
   1052     best = refs_lz77;   // default guess: lz77 is better
   1053     if (try_lz77_trace_backwards) {
   1054       VP8LBackwardRefs* const refs_trace = refs_rle;
   1055       if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) {
   1056         best = NULL;
   1057         goto Error;
   1058       }
   1059       if (BackwardReferencesTraceBackwards(width, height, argb, quality,
   1060                                            *cache_bits, hash_chain,
   1061                                            refs_trace)) {
   1062         double bit_cost_trace;
   1063         // Evaluate LZ77 coding.
   1064         VP8LHistogramCreate(histo, refs_trace, *cache_bits);
   1065         bit_cost_trace = VP8LHistogramEstimateBits(histo);
   1066         if (bit_cost_trace < bit_cost_lz77) {
   1067           best = refs_trace;
   1068         }
   1069       }
   1070     }
   1071   } else {
   1072     best = refs_rle;
   1073   }
   1074 
   1075   BackwardReferences2DLocality(width, best);
   1076 
   1077  Error:
   1078   VP8LFreeHistogram(histo);
   1079   return best;
   1080 }
   1081 
   1082 VP8LBackwardRefs* VP8LGetBackwardReferences(
   1083     int width, int height, const uint32_t* const argb, int quality,
   1084     int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain,
   1085     VP8LBackwardRefs refs_array[2]) {
   1086   if (low_effort) {
   1087     return GetBackwardReferencesLowEffort(width, height, argb, quality,
   1088                                           cache_bits, hash_chain, refs_array);
   1089   } else {
   1090     return GetBackwardReferences(width, height, argb, quality, cache_bits,
   1091                                  hash_chain, refs_array);
   1092   }
   1093 }
   1094