Home | History | Annotate | Download | only in enc
      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Author: Jyrki Alakuijala (jyrki (at) google.com)
     11 //
     12 
     13 #include <assert.h>
     14 #include <math.h>
     15 
     16 #include "./backward_references.h"
     17 #include "./histogram.h"
     18 #include "../dsp/lossless.h"
     19 #include "../utils/color_cache.h"
     20 #include "../utils/utils.h"
     21 
     22 #define VALUES_IN_BYTE 256
     23 
     24 #define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL)
     25 
     26 #define MIN_BLOCK_SIZE 256  // minimum block size for backward references
     27 
     28 #define MAX_ENTROPY    (1e30f)
     29 
     30 // 1M window (4M bytes) minus 120 special codes for short distances.
     31 #define WINDOW_SIZE ((1 << 20) - 120)
     32 
     33 // Bounds for the match length.
     34 #define MIN_LENGTH 2
     35 #define MAX_LENGTH 4096
     36 
     37 // -----------------------------------------------------------------------------
     38 
     39 static const uint8_t plane_to_code_lut[128] = {
     40  96,   73,  55,  39,  23,  13,   5,  1,  255, 255, 255, 255, 255, 255, 255, 255,
     41  101,  78,  58,  42,  26,  16,   8,  2,    0,   3,  9,   17,  27,  43,  59,  79,
     42  102,  86,  62,  46,  32,  20,  10,  6,    4,   7,  11,  21,  33,  47,  63,  87,
     43  105,  90,  70,  52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53,  71,  91,
     44  110,  99,  82,  66,  48,  35,  30,  24,  22,  25,  31,  36,  49,  67,  83, 100,
     45  115, 108,  94,  76,  64,  50,  44,  40,  34,  41,  45,  51,  65,  77,  95, 109,
     46  118, 113, 103,  92,  80,  68,  60,  56,  54,  57,  61,  69,  81,  93, 104, 114,
     47  119, 116, 111, 106,  97,  88,  84,  74,  72,  75,  85,  89,  98, 107, 112, 117
     48 };
     49 
     50 static int DistanceToPlaneCode(int xsize, int dist) {
     51   const int yoffset = dist / xsize;
     52   const int xoffset = dist - yoffset * xsize;
     53   if (xoffset <= 8 && yoffset < 8) {
     54     return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
     55   } else if (xoffset > xsize - 8 && yoffset < 7) {
     56     return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
     57   }
     58   return dist + 120;
     59 }
     60 
     61 static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
     62                                        const uint32_t* const array2,
     63                                        const int max_limit) {
     64   int match_len = 0;
     65   while (match_len < max_limit && array1[match_len] == array2[match_len]) {
     66     ++match_len;
     67   }
     68   return match_len;
     69 }
     70 
     71 // -----------------------------------------------------------------------------
     72 //  VP8LBackwardRefs
     73 
     74 struct PixOrCopyBlock {
     75   PixOrCopyBlock* next_;   // next block (or NULL)
     76   PixOrCopy* start_;       // data start
     77   int size_;               // currently used size
     78 };
     79 
     80 static void ClearBackwardRefs(VP8LBackwardRefs* const refs) {
     81   assert(refs != NULL);
     82   if (refs->tail_ != NULL) {
     83     *refs->tail_ = refs->free_blocks_;  // recycle all blocks at once
     84   }
     85   refs->free_blocks_ = refs->refs_;
     86   refs->tail_ = &refs->refs_;
     87   refs->last_block_ = NULL;
     88   refs->refs_ = NULL;
     89 }
     90 
     91 void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
     92   assert(refs != NULL);
     93   ClearBackwardRefs(refs);
     94   while (refs->free_blocks_ != NULL) {
     95     PixOrCopyBlock* const next = refs->free_blocks_->next_;
     96     WebPSafeFree(refs->free_blocks_);
     97     refs->free_blocks_ = next;
     98   }
     99 }
    100 
    101 void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
    102   assert(refs != NULL);
    103   memset(refs, 0, sizeof(*refs));
    104   refs->tail_ = &refs->refs_;
    105   refs->block_size_ =
    106       (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
    107 }
    108 
    109 VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
    110   VP8LRefsCursor c;
    111   c.cur_block_ = refs->refs_;
    112   if (refs->refs_ != NULL) {
    113     c.cur_pos = c.cur_block_->start_;
    114     c.last_pos_ = c.cur_pos + c.cur_block_->size_;
    115   } else {
    116     c.cur_pos = NULL;
    117     c.last_pos_ = NULL;
    118   }
    119   return c;
    120 }
    121 
    122 void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
    123   PixOrCopyBlock* const b = c->cur_block_->next_;
    124   c->cur_pos = (b == NULL) ? NULL : b->start_;
    125   c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
    126   c->cur_block_ = b;
    127 }
    128 
    129 // Create a new block, either from the free list or allocated
    130 static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
    131   PixOrCopyBlock* b = refs->free_blocks_;
    132   if (b == NULL) {   // allocate new memory chunk
    133     const size_t total_size =
    134         sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
    135     b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
    136     if (b == NULL) {
    137       refs->error_ |= 1;
    138       return NULL;
    139     }
    140     b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
    141   } else {  // recycle from free-list
    142     refs->free_blocks_ = b->next_;
    143   }
    144   *refs->tail_ = b;
    145   refs->tail_ = &b->next_;
    146   refs->last_block_ = b;
    147   b->next_ = NULL;
    148   b->size_ = 0;
    149   return b;
    150 }
    151 
    152 static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
    153                                               const PixOrCopy v) {
    154   PixOrCopyBlock* b = refs->last_block_;
    155   if (b == NULL || b->size_ == refs->block_size_) {
    156     b = BackwardRefsNewBlock(refs);
    157     if (b == NULL) return;   // refs->error_ is set
    158   }
    159   b->start_[b->size_++] = v;
    160 }
    161 
    162 int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
    163                          VP8LBackwardRefs* const dst) {
    164   const PixOrCopyBlock* b = src->refs_;
    165   ClearBackwardRefs(dst);
    166   assert(src->block_size_ == dst->block_size_);
    167   while (b != NULL) {
    168     PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst);
    169     if (new_b == NULL) return 0;   // dst->error_ is set
    170     memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_));
    171     new_b->size_ = b->size_;
    172     b = b->next_;
    173   }
    174   return 1;
    175 }
    176 
    177 // -----------------------------------------------------------------------------
    178 // Hash chains
    179 
    180 // initialize as empty
    181 static void HashChainInit(VP8LHashChain* const p) {
    182   int i;
    183   assert(p != NULL);
    184   for (i = 0; i < p->size_; ++i) {
    185     p->chain_[i] = -1;
    186   }
    187   for (i = 0; i < HASH_SIZE; ++i) {
    188     p->hash_to_first_index_[i] = -1;
    189   }
    190 }
    191 
    192 int VP8LHashChainInit(VP8LHashChain* const p, int size) {
    193   assert(p->size_ == 0);
    194   assert(p->chain_ == NULL);
    195   assert(size > 0);
    196   p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_));
    197   if (p->chain_ == NULL) return 0;
    198   p->size_ = size;
    199   HashChainInit(p);
    200   return 1;
    201 }
    202 
    203 void VP8LHashChainClear(VP8LHashChain* const p) {
    204   assert(p != NULL);
    205   WebPSafeFree(p->chain_);
    206   p->size_ = 0;
    207   p->chain_ = NULL;
    208 }
    209 
    210 // -----------------------------------------------------------------------------
    211 
    212 static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) {
    213   uint64_t key = ((uint64_t)argb[1] << 32) | argb[0];
    214   key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS);
    215   return key;
    216 }
    217 
    218 // Insertion of two pixels at a time.
    219 static void HashChainInsert(VP8LHashChain* const p,
    220                             const uint32_t* const argb, int pos) {
    221   const uint64_t hash_code = GetPixPairHash64(argb);
    222   p->chain_[pos] = p->hash_to_first_index_[hash_code];
    223   p->hash_to_first_index_[hash_code] = pos;
    224 }
    225 
    226 static void GetParamsForHashChainFindCopy(int quality, int xsize,
    227                                           int cache_bits, int* window_size,
    228                                           int* iter_pos, int* iter_limit) {
    229   const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
    230   const int iter_neg = -iter_mult * (quality >> 1);
    231   // Limit the backward-ref window size for lower qualities.
    232   const int max_window_size = (quality > 50) ? WINDOW_SIZE
    233                             : (quality > 25) ? (xsize << 8)
    234                             : (xsize << 4);
    235   assert(xsize > 0);
    236   *window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE
    237                : max_window_size;
    238   *iter_pos = 8 + (quality >> 3);
    239   // For lower entropy images, the rigorous search loop in HashChainFindCopy
    240   // can be relaxed.
    241   *iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2;
    242 }
    243 
    244 static int HashChainFindCopy(const VP8LHashChain* const p,
    245                              int base_position, int xsize_signed,
    246                              const uint32_t* const argb, int max_len,
    247                              int window_size, int iter_pos, int iter_limit,
    248                              int* const distance_ptr,
    249                              int* const length_ptr) {
    250   const uint32_t* const argb_start = argb + base_position;
    251   uint64_t best_val = 0;
    252   uint32_t best_length = 1;
    253   uint32_t best_distance = 0;
    254   const uint32_t xsize = (uint32_t)xsize_signed;
    255   const int min_pos =
    256       (base_position > window_size) ? base_position - window_size : 0;
    257   int pos;
    258   assert(xsize > 0);
    259   if (max_len > MAX_LENGTH) {
    260     max_len = MAX_LENGTH;
    261   }
    262   for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
    263        pos >= min_pos;
    264        pos = p->chain_[pos]) {
    265     uint64_t val;
    266     uint32_t curr_length;
    267     uint32_t distance;
    268     const uint32_t* const ptr1 = (argb + pos + best_length - 1);
    269     const uint32_t* const ptr2 = (argb_start + best_length - 1);
    270 
    271     if (iter_pos < 0) {
    272       if (iter_pos < iter_limit || best_val >= 0xff0000) {
    273         break;
    274       }
    275     }
    276     --iter_pos;
    277 
    278     // Before 'expensive' linear match, check if the two arrays match at the
    279     // current best length index and also for the succeeding elements.
    280     if (ptr1[0] != ptr2[0] || ptr1[1] != ptr2[1]) continue;
    281 
    282     curr_length = FindMatchLength(argb + pos, argb_start, max_len);
    283     if (curr_length < best_length) continue;
    284 
    285     distance = (uint32_t)(base_position - pos);
    286     val = curr_length << 16;
    287     // Favoring 2d locality here gives savings for certain images.
    288     if (distance < 9 * xsize) {
    289       const uint32_t y = distance / xsize;
    290       uint32_t x = distance % xsize;
    291       if (x > (xsize >> 1)) {
    292         x = xsize - x;
    293       }
    294       if (x <= 7) {
    295         val += 9 * 9 + 9 * 9;
    296         val -= y * y + x * x;
    297       }
    298     }
    299     if (best_val < val) {
    300       best_val = val;
    301       best_length = curr_length;
    302       best_distance = distance;
    303       if (curr_length >= (uint32_t)max_len) {
    304         break;
    305       }
    306       if ((best_distance == 1 || distance == xsize) &&
    307           best_length >= 128) {
    308         break;
    309       }
    310     }
    311   }
    312   *distance_ptr = (int)best_distance;
    313   *length_ptr = best_length;
    314   return (best_length >= MIN_LENGTH);
    315 }
    316 
    317 static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) {
    318   while (length >= MAX_LENGTH) {
    319     BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, MAX_LENGTH));
    320     length -= MAX_LENGTH;
    321   }
    322   if (length > 0) {
    323     BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, length));
    324   }
    325 }
    326 
    327 static int BackwardReferencesRle(int xsize, int ysize,
    328                                  const uint32_t* const argb,
    329                                  VP8LBackwardRefs* const refs) {
    330   const int pix_count = xsize * ysize;
    331   int match_len = 0;
    332   int i;
    333   ClearBackwardRefs(refs);
    334   PushBackCopy(refs, match_len);    // i=0 case
    335   BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[0]));
    336   for (i = 1; i < pix_count; ++i) {
    337     if (argb[i] == argb[i - 1]) {
    338       ++match_len;
    339     } else {
    340       PushBackCopy(refs, match_len);
    341       match_len = 0;
    342       BackwardRefsCursorAdd(refs, PixOrCopyCreateLiteral(argb[i]));
    343     }
    344   }
    345   PushBackCopy(refs, match_len);
    346   return !refs->error_;
    347 }
    348 
    349 static int BackwardReferencesHashChain(int xsize, int ysize,
    350                                        const uint32_t* const argb,
    351                                        int cache_bits, int quality,
    352                                        VP8LHashChain* const hash_chain,
    353                                        VP8LBackwardRefs* const refs) {
    354   int i;
    355   int ok = 0;
    356   int cc_init = 0;
    357   const int use_color_cache = (cache_bits > 0);
    358   const int pix_count = xsize * ysize;
    359   VP8LColorCache hashers;
    360   int window_size = WINDOW_SIZE;
    361   int iter_pos = 1;
    362   int iter_limit = -1;
    363 
    364   if (use_color_cache) {
    365     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    366     if (!cc_init) goto Error;
    367   }
    368 
    369   ClearBackwardRefs(refs);
    370   GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
    371                                 &window_size, &iter_pos, &iter_limit);
    372   HashChainInit(hash_chain);
    373   for (i = 0; i < pix_count; ) {
    374     // Alternative#1: Code the pixels starting at 'i' using backward reference.
    375     int offset = 0;
    376     int len = 0;
    377     if (i < pix_count - 1) {  // FindCopy(i,..) reads pixels at [i] and [i + 1].
    378       int max_len = pix_count - i;
    379       HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
    380                         window_size, iter_pos, iter_limit,
    381                         &offset, &len);
    382     }
    383     if (len >= MIN_LENGTH) {
    384       // Alternative#2: Insert the pixel at 'i' as literal, and code the
    385       // pixels starting at 'i + 1' using backward reference.
    386       int offset2 = 0;
    387       int len2 = 0;
    388       int k;
    389       HashChainInsert(hash_chain, &argb[i], i);
    390       if (i < pix_count - 2) {  // FindCopy(i+1,..) reads [i + 1] and [i + 2].
    391         int max_len = pix_count - (i + 1);
    392         HashChainFindCopy(hash_chain, i + 1, xsize, argb, max_len,
    393                           window_size, iter_pos, iter_limit,
    394                           &offset2, &len2);
    395         if (len2 > len + 1) {
    396           const uint32_t pixel = argb[i];
    397           // Alternative#2 is a better match. So push pixel at 'i' as literal.
    398           PixOrCopy v;
    399           if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
    400             const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
    401             v = PixOrCopyCreateCacheIdx(ix);
    402           } else {
    403             if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
    404             v = PixOrCopyCreateLiteral(pixel);
    405           }
    406           BackwardRefsCursorAdd(refs, v);
    407           i++;  // Backward reference to be done for next pixel.
    408           len = len2;
    409           offset = offset2;
    410         }
    411       }
    412       if (len >= MAX_LENGTH) {
    413         len = MAX_LENGTH - 1;
    414       }
    415       BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
    416       if (use_color_cache) {
    417         for (k = 0; k < len; ++k) {
    418           VP8LColorCacheInsert(&hashers, argb[i + k]);
    419         }
    420       }
    421       // Add to the hash_chain (but cannot add the last pixel).
    422       {
    423         const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
    424         for (k = 1; k < last; ++k) {
    425           HashChainInsert(hash_chain, &argb[i + k], i + k);
    426         }
    427       }
    428       i += len;
    429     } else {
    430       const uint32_t pixel = argb[i];
    431       PixOrCopy v;
    432       if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
    433         // push pixel as a PixOrCopyCreateCacheIdx pixel
    434         const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
    435         v = PixOrCopyCreateCacheIdx(ix);
    436       } else {
    437         if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
    438         v = PixOrCopyCreateLiteral(pixel);
    439       }
    440       BackwardRefsCursorAdd(refs, v);
    441       if (i + 1 < pix_count) {
    442         HashChainInsert(hash_chain, &argb[i], i);
    443       }
    444       ++i;
    445     }
    446   }
    447   ok = !refs->error_;
    448 Error:
    449   if (cc_init) VP8LColorCacheClear(&hashers);
    450   return ok;
    451 }
    452 
    453 // -----------------------------------------------------------------------------
    454 
    455 typedef struct {
    456   double alpha_[VALUES_IN_BYTE];
    457   double red_[VALUES_IN_BYTE];
    458   double literal_[PIX_OR_COPY_CODES_MAX];
    459   double blue_[VALUES_IN_BYTE];
    460   double distance_[NUM_DISTANCE_CODES];
    461 } CostModel;
    462 
    463 static int BackwardReferencesTraceBackwards(
    464     int xsize, int ysize, int recursive_cost_model,
    465     const uint32_t* const argb, int quality, int cache_bits,
    466     VP8LHashChain* const hash_chain,
    467     VP8LBackwardRefs* const refs);
    468 
    469 static void ConvertPopulationCountTableToBitEstimates(
    470     int num_symbols, const uint32_t population_counts[], double output[]) {
    471   uint32_t sum = 0;
    472   int nonzeros = 0;
    473   int i;
    474   for (i = 0; i < num_symbols; ++i) {
    475     sum += population_counts[i];
    476     if (population_counts[i] > 0) {
    477       ++nonzeros;
    478     }
    479   }
    480   if (nonzeros <= 1) {
    481     memset(output, 0, num_symbols * sizeof(*output));
    482   } else {
    483     const double logsum = VP8LFastLog2(sum);
    484     for (i = 0; i < num_symbols; ++i) {
    485       output[i] = logsum - VP8LFastLog2(population_counts[i]);
    486     }
    487   }
    488 }
    489 
    490 static int CostModelBuild(CostModel* const m, int xsize, int ysize,
    491                           int recursion_level, const uint32_t* const argb,
    492                           int quality, int cache_bits,
    493                           VP8LHashChain* const hash_chain,
    494                           VP8LBackwardRefs* const refs) {
    495   int ok = 0;
    496   VP8LHistogram* histo = NULL;
    497 
    498   ClearBackwardRefs(refs);
    499   if (recursion_level > 0) {
    500     if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
    501                                           argb, quality, cache_bits, hash_chain,
    502                                           refs)) {
    503       goto Error;
    504     }
    505   } else {
    506     if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality,
    507                                      hash_chain, refs)) {
    508       goto Error;
    509     }
    510   }
    511   histo = VP8LAllocateHistogram(cache_bits);
    512   if (histo == NULL) goto Error;
    513 
    514   VP8LHistogramCreate(histo, refs, cache_bits);
    515 
    516   ConvertPopulationCountTableToBitEstimates(
    517       VP8LHistogramNumCodes(histo->palette_code_bits_),
    518       histo->literal_, m->literal_);
    519   ConvertPopulationCountTableToBitEstimates(
    520       VALUES_IN_BYTE, histo->red_, m->red_);
    521   ConvertPopulationCountTableToBitEstimates(
    522       VALUES_IN_BYTE, histo->blue_, m->blue_);
    523   ConvertPopulationCountTableToBitEstimates(
    524       VALUES_IN_BYTE, histo->alpha_, m->alpha_);
    525   ConvertPopulationCountTableToBitEstimates(
    526       NUM_DISTANCE_CODES, histo->distance_, m->distance_);
    527   ok = 1;
    528 
    529  Error:
    530   VP8LFreeHistogram(histo);
    531   return ok;
    532 }
    533 
    534 static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
    535   return m->alpha_[v >> 24] +
    536          m->red_[(v >> 16) & 0xff] +
    537          m->literal_[(v >> 8) & 0xff] +
    538          m->blue_[v & 0xff];
    539 }
    540 
    541 static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
    542   const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
    543   return m->literal_[literal_idx];
    544 }
    545 
    546 static WEBP_INLINE double GetLengthCost(const CostModel* const m,
    547                                         uint32_t length) {
    548   int code, extra_bits;
    549   VP8LPrefixEncodeBits(length, &code, &extra_bits);
    550   return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
    551 }
    552 
    553 static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
    554                                           uint32_t distance) {
    555   int code, extra_bits;
    556   VP8LPrefixEncodeBits(distance, &code, &extra_bits);
    557   return m->distance_[code] + extra_bits;
    558 }
    559 
    560 static int BackwardReferencesHashChainDistanceOnly(
    561     int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
    562     int quality, int cache_bits, VP8LHashChain* const hash_chain,
    563     VP8LBackwardRefs* const refs, uint32_t* const dist_array) {
    564   int i;
    565   int ok = 0;
    566   int cc_init = 0;
    567   const int pix_count = xsize * ysize;
    568   const int use_color_cache = (cache_bits > 0);
    569   float* const cost =
    570       (float*)WebPSafeMalloc(pix_count, sizeof(*cost));
    571   CostModel* cost_model = (CostModel*)WebPSafeMalloc(1ULL, sizeof(*cost_model));
    572   VP8LColorCache hashers;
    573   const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
    574   const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
    575   const int min_distance_code = 2;  // TODO(vikasa): tune as function of quality
    576   int window_size = WINDOW_SIZE;
    577   int iter_pos = 1;
    578   int iter_limit = -1;
    579 
    580   if (cost == NULL || cost_model == NULL) goto Error;
    581 
    582   if (use_color_cache) {
    583     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    584     if (!cc_init) goto Error;
    585   }
    586 
    587   if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
    588                       quality, cache_bits, hash_chain, refs)) {
    589     goto Error;
    590   }
    591 
    592   for (i = 0; i < pix_count; ++i) cost[i] = 1e38f;
    593 
    594   // We loop one pixel at a time, but store all currently best points to
    595   // non-processed locations from this point.
    596   dist_array[0] = 0;
    597   GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
    598                                 &window_size, &iter_pos, &iter_limit);
    599   HashChainInit(hash_chain);
    600   for (i = 0; i < pix_count; ++i) {
    601     double prev_cost = 0.0;
    602     int shortmax;
    603     if (i > 0) {
    604       prev_cost = cost[i - 1];
    605     }
    606     for (shortmax = 0; shortmax < 2; ++shortmax) {
    607       int offset = 0;
    608       int len = 0;
    609       if (i < pix_count - 1) {  // FindCopy reads pixels at [i] and [i + 1].
    610         int max_len = shortmax ? 2 : pix_count - i;
    611         HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
    612                           window_size, iter_pos, iter_limit,
    613                           &offset, &len);
    614       }
    615       if (len >= MIN_LENGTH) {
    616         const int code = DistanceToPlaneCode(xsize, offset);
    617         const double distance_cost =
    618             prev_cost + GetDistanceCost(cost_model, code);
    619         int k;
    620         for (k = 1; k < len; ++k) {
    621           const double cost_val = distance_cost + GetLengthCost(cost_model, k);
    622           if (cost[i + k] > cost_val) {
    623             cost[i + k] = (float)cost_val;
    624             dist_array[i + k] = k + 1;
    625           }
    626         }
    627         // This if is for speedup only. It roughly doubles the speed, and
    628         // makes compression worse by .1 %.
    629         if (len >= 128 && code <= min_distance_code) {
    630           // Long copy for short distances, let's skip the middle
    631           // lookups for better copies.
    632           // 1) insert the hashes.
    633           if (use_color_cache) {
    634             for (k = 0; k < len; ++k) {
    635               VP8LColorCacheInsert(&hashers, argb[i + k]);
    636             }
    637           }
    638           // 2) Add to the hash_chain (but cannot add the last pixel)
    639           {
    640             const int last = (len + i < pix_count - 1) ? len + i
    641                                                        : pix_count - 1;
    642             for (k = i; k < last; ++k) {
    643               HashChainInsert(hash_chain, &argb[k], k);
    644             }
    645           }
    646           // 3) jump.
    647           i += len - 1;  // for loop does ++i, thus -1 here.
    648           goto next_symbol;
    649         }
    650       }
    651     }
    652     if (i < pix_count - 1) {
    653       HashChainInsert(hash_chain, &argb[i], i);
    654     }
    655     {
    656       // inserting a literal pixel
    657       double cost_val = prev_cost;
    658       if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
    659         const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
    660         cost_val += GetCacheCost(cost_model, ix) * mul0;
    661       } else {
    662         if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
    663         cost_val += GetLiteralCost(cost_model, argb[i]) * mul1;
    664       }
    665       if (cost[i] > cost_val) {
    666         cost[i] = (float)cost_val;
    667         dist_array[i] = 1;  // only one is inserted.
    668       }
    669     }
    670  next_symbol: ;
    671   }
    672   // Last pixel still to do, it can only be a single step if not reached
    673   // through cheaper means already.
    674   ok = !refs->error_;
    675 Error:
    676   if (cc_init) VP8LColorCacheClear(&hashers);
    677   WebPSafeFree(cost_model);
    678   WebPSafeFree(cost);
    679   return ok;
    680 }
    681 
    682 // We pack the path at the end of *dist_array and return
    683 // a pointer to this part of the array. Example:
    684 // dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
    685 static void TraceBackwards(uint32_t* const dist_array,
    686                            int dist_array_size,
    687                            uint32_t** const chosen_path,
    688                            int* const chosen_path_size) {
    689   uint32_t* path = dist_array + dist_array_size;
    690   uint32_t* cur = dist_array + dist_array_size - 1;
    691   while (cur >= dist_array) {
    692     const int k = *cur;
    693     --path;
    694     *path = k;
    695     cur -= k;
    696   }
    697   *chosen_path = path;
    698   *chosen_path_size = (int)(dist_array + dist_array_size - path);
    699 }
    700 
    701 static int BackwardReferencesHashChainFollowChosenPath(
    702     int xsize, int ysize, const uint32_t* const argb,
    703     int quality, int cache_bits,
    704     const uint32_t* const chosen_path, int chosen_path_size,
    705     VP8LHashChain* const hash_chain,
    706     VP8LBackwardRefs* const refs) {
    707   const int pix_count = xsize * ysize;
    708   const int use_color_cache = (cache_bits > 0);
    709   int size = 0;
    710   int i = 0;
    711   int k;
    712   int ix;
    713   int ok = 0;
    714   int cc_init = 0;
    715   int window_size = WINDOW_SIZE;
    716   int iter_pos = 1;
    717   int iter_limit = -1;
    718   VP8LColorCache hashers;
    719 
    720   if (use_color_cache) {
    721     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    722     if (!cc_init) goto Error;
    723   }
    724 
    725   ClearBackwardRefs(refs);
    726   GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
    727                                 &window_size, &iter_pos, &iter_limit);
    728   HashChainInit(hash_chain);
    729   for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
    730     int offset = 0;
    731     int len = 0;
    732     int max_len = chosen_path[ix];
    733     if (max_len != 1) {
    734       HashChainFindCopy(hash_chain, i, xsize, argb, max_len,
    735                         window_size, iter_pos, iter_limit,
    736                         &offset, &len);
    737       assert(len == max_len);
    738       BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
    739       if (use_color_cache) {
    740         for (k = 0; k < len; ++k) {
    741           VP8LColorCacheInsert(&hashers, argb[i + k]);
    742         }
    743       }
    744       {
    745         const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
    746         for (k = 0; k < last; ++k) {
    747           HashChainInsert(hash_chain, &argb[i + k], i + k);
    748         }
    749       }
    750       i += len;
    751     } else {
    752       PixOrCopy v;
    753       if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
    754         // push pixel as a color cache index
    755         const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]);
    756         v = PixOrCopyCreateCacheIdx(idx);
    757       } else {
    758         if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
    759         v = PixOrCopyCreateLiteral(argb[i]);
    760       }
    761       BackwardRefsCursorAdd(refs, v);
    762       if (i + 1 < pix_count) {
    763         HashChainInsert(hash_chain, &argb[i], i);
    764       }
    765       ++i;
    766     }
    767   }
    768   ok = !refs->error_;
    769 Error:
    770   if (cc_init) VP8LColorCacheClear(&hashers);
    771   return ok;
    772 }
    773 
    774 // Returns 1 on success.
    775 static int BackwardReferencesTraceBackwards(int xsize, int ysize,
    776                                             int recursive_cost_model,
    777                                             const uint32_t* const argb,
    778                                             int quality, int cache_bits,
    779                                             VP8LHashChain* const hash_chain,
    780                                             VP8LBackwardRefs* const refs) {
    781   int ok = 0;
    782   const int dist_array_size = xsize * ysize;
    783   uint32_t* chosen_path = NULL;
    784   int chosen_path_size = 0;
    785   uint32_t* dist_array =
    786       (uint32_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
    787 
    788   if (dist_array == NULL) goto Error;
    789 
    790   if (!BackwardReferencesHashChainDistanceOnly(
    791       xsize, ysize, recursive_cost_model, argb, quality, cache_bits, hash_chain,
    792       refs, dist_array)) {
    793     goto Error;
    794   }
    795   TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
    796   if (!BackwardReferencesHashChainFollowChosenPath(
    797       xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size,
    798       hash_chain, refs)) {
    799     goto Error;
    800   }
    801   ok = 1;
    802  Error:
    803   WebPSafeFree(dist_array);
    804   return ok;
    805 }
    806 
    807 static void BackwardReferences2DLocality(int xsize,
    808                                          const VP8LBackwardRefs* const refs) {
    809   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
    810   while (VP8LRefsCursorOk(&c)) {
    811     if (PixOrCopyIsCopy(c.cur_pos)) {
    812       const int dist = c.cur_pos->argb_or_distance;
    813       const int transformed_dist = DistanceToPlaneCode(xsize, dist);
    814       c.cur_pos->argb_or_distance = transformed_dist;
    815     }
    816     VP8LRefsCursorNext(&c);
    817   }
    818 }
    819 
    820 VP8LBackwardRefs* VP8LGetBackwardReferences(
    821     int width, int height, const uint32_t* const argb, int quality,
    822     int cache_bits, int use_2d_locality, VP8LHashChain* const hash_chain,
    823     VP8LBackwardRefs refs_array[2]) {
    824   int lz77_is_useful;
    825   const int num_pix = width * height;
    826   VP8LBackwardRefs* best = NULL;
    827   VP8LBackwardRefs* const refs_lz77 = &refs_array[0];
    828   VP8LBackwardRefs* const refs_rle = &refs_array[1];
    829 
    830   if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality,
    831                                    hash_chain, refs_lz77)) {
    832     return NULL;
    833   }
    834   if (!BackwardReferencesRle(width, height, argb, refs_rle)) {
    835     return NULL;
    836   }
    837 
    838   {
    839     double bit_cost_lz77, bit_cost_rle;
    840     VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
    841     if (histo == NULL) return NULL;
    842     // Evaluate LZ77 coding.
    843     VP8LHistogramCreate(histo, refs_lz77, cache_bits);
    844     bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
    845     // Evaluate RLE coding.
    846     VP8LHistogramCreate(histo, refs_rle, cache_bits);
    847     bit_cost_rle = VP8LHistogramEstimateBits(histo);
    848     // Decide if LZ77 is useful.
    849     lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
    850     VP8LFreeHistogram(histo);
    851   }
    852 
    853   // Choose appropriate backward reference.
    854   if (lz77_is_useful) {
    855     // TraceBackwards is costly. Don't execute it at lower quality.
    856     const int try_lz77_trace_backwards = (quality >= 25);
    857     best = refs_lz77;   // default guess: lz77 is better
    858     if (try_lz77_trace_backwards) {
    859       // Set recursion level for large images using a color cache.
    860       const int recursion_level =
    861           (num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0;
    862       VP8LBackwardRefs* const refs_trace = &refs_array[1];
    863       ClearBackwardRefs(refs_trace);
    864       if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb,
    865                                            quality, cache_bits, hash_chain,
    866                                            refs_trace)) {
    867         best = refs_trace;
    868       }
    869     }
    870   } else {
    871     best = refs_rle;
    872   }
    873 
    874   if (use_2d_locality) BackwardReferences2DLocality(width, best);
    875 
    876   return best;
    877 }
    878 
    879 // Returns entropy for the given cache bits.
    880 static double ComputeCacheEntropy(const uint32_t* const argb,
    881                                   int xsize, int ysize,
    882                                   const VP8LBackwardRefs* const refs,
    883                                   int cache_bits) {
    884   int pixel_index = 0;
    885   uint32_t k;
    886   const int use_color_cache = (cache_bits > 0);
    887   int cc_init = 0;
    888   double entropy = MAX_ENTROPY;
    889   const double kSmallPenaltyForLargeCache = 4.0;
    890   VP8LColorCache hashers;
    891   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
    892   VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits);
    893   if (histo == NULL) goto Error;
    894 
    895   if (use_color_cache) {
    896     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
    897     if (!cc_init) goto Error;
    898   }
    899 
    900   while (VP8LRefsCursorOk(&c)) {
    901     const PixOrCopy* const v = c.cur_pos;
    902     if (PixOrCopyIsLiteral(v)) {
    903       if (use_color_cache &&
    904           VP8LColorCacheContains(&hashers, argb[pixel_index])) {
    905         // push pixel as a cache index
    906         const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]);
    907         const PixOrCopy token = PixOrCopyCreateCacheIdx(ix);
    908         VP8LHistogramAddSinglePixOrCopy(histo, &token);
    909       } else {
    910         VP8LHistogramAddSinglePixOrCopy(histo, v);
    911       }
    912     } else {
    913       VP8LHistogramAddSinglePixOrCopy(histo, v);
    914     }
    915     if (use_color_cache) {
    916       for (k = 0; k < PixOrCopyLength(v); ++k) {
    917         VP8LColorCacheInsert(&hashers, argb[pixel_index + k]);
    918       }
    919     }
    920     pixel_index += PixOrCopyLength(v);
    921     VP8LRefsCursorNext(&c);
    922   }
    923   assert(pixel_index == xsize * ysize);
    924   (void)xsize;  // xsize is not used in non-debug compilations otherwise.
    925   (void)ysize;  // ysize is not used in non-debug compilations otherwise.
    926   entropy = VP8LHistogramEstimateBits(histo) +
    927       kSmallPenaltyForLargeCache * cache_bits;
    928  Error:
    929   if (cc_init) VP8LColorCacheClear(&hashers);
    930   VP8LFreeHistogram(histo);
    931   return entropy;
    932 }
    933 
    934 // *best_cache_bits will contain how many bits are to be used for a color cache.
    935 // Returns 0 in case of memory error.
    936 int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
    937                                       int xsize, int ysize, int quality,
    938                                       VP8LHashChain* const hash_chain,
    939                                       VP8LBackwardRefs* const refs,
    940                                       int* const best_cache_bits) {
    941   int eval_low = 1;
    942   int eval_high = 1;
    943   double entropy_low = MAX_ENTROPY;
    944   double entropy_high = MAX_ENTROPY;
    945   int cache_bits_low = 0;
    946   int cache_bits_high = MAX_COLOR_CACHE_BITS;
    947 
    948   if (!BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, hash_chain,
    949                                    refs)) {
    950     return 0;
    951   }
    952   // Do a binary search to find the optimal entropy for cache_bits.
    953   while (cache_bits_high - cache_bits_low > 1) {
    954     if (eval_low) {
    955       entropy_low =
    956           ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_low);
    957       eval_low = 0;
    958     }
    959     if (eval_high) {
    960       entropy_high =
    961           ComputeCacheEntropy(argb, xsize, ysize, refs, cache_bits_high);
    962       eval_high = 0;
    963     }
    964     if (entropy_high < entropy_low) {
    965       *best_cache_bits = cache_bits_high;
    966       cache_bits_low = (cache_bits_low + cache_bits_high) / 2;
    967       eval_low = 1;
    968     } else {
    969       *best_cache_bits = cache_bits_low;
    970       cache_bits_high = (cache_bits_low + cache_bits_high) / 2;
    971       eval_high = 1;
    972     }
    973   }
    974   return 1;
    975 }
    976