Home | History | Annotate | Download | only in enc
      1 // Copyright 2012 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Author: Jyrki Alakuijala (jyrki (at) google.com)
     11 //
     12 #ifdef HAVE_CONFIG_H
     13 #include "../webp/config.h"
     14 #endif
     15 
     16 #include <math.h>
     17 
     18 #include "./backward_references.h"
     19 #include "./histogram.h"
     20 #include "../dsp/lossless.h"
     21 #include "../utils/utils.h"
     22 
     23 #define MAX_COST 1.e38
     24 
     25 // Number of partitions for the three dominant (literal, red and blue) symbol
     26 // costs.
     27 #define NUM_PARTITIONS 4
     28 // The size of the bin-hash corresponding to the three dominant costs.
     29 #define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
     30 // Maximum number of histograms allowed in greedy combining algorithm.
     31 #define MAX_HISTO_GREEDY 100
     32 
     33 static void HistogramClear(VP8LHistogram* const p) {
     34   uint32_t* const literal = p->literal_;
     35   const int cache_bits = p->palette_code_bits_;
     36   const int histo_size = VP8LGetHistogramSize(cache_bits);
     37   memset(p, 0, histo_size);
     38   p->palette_code_bits_ = cache_bits;
     39   p->literal_ = literal;
     40 }
     41 
     42 // Swap two histogram pointers.
     43 static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) {
     44   VP8LHistogram* const tmp = *A;
     45   *A = *B;
     46   *B = tmp;
     47 }
     48 
     49 static void HistogramCopy(const VP8LHistogram* const src,
     50                           VP8LHistogram* const dst) {
     51   uint32_t* const dst_literal = dst->literal_;
     52   const int dst_cache_bits = dst->palette_code_bits_;
     53   const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
     54   assert(src->palette_code_bits_ == dst_cache_bits);
     55   memcpy(dst, src, histo_size);
     56   dst->literal_ = dst_literal;
     57 }
     58 
     59 int VP8LGetHistogramSize(int cache_bits) {
     60   const int literal_size = VP8LHistogramNumCodes(cache_bits);
     61   const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
     62   assert(total_size <= (size_t)0x7fffffff);
     63   return (int)total_size;
     64 }
     65 
     66 void VP8LFreeHistogram(VP8LHistogram* const histo) {
     67   WebPSafeFree(histo);
     68 }
     69 
     70 void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) {
     71   WebPSafeFree(histo);
     72 }
     73 
     74 void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
     75                             VP8LHistogram* const histo) {
     76   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
     77   while (VP8LRefsCursorOk(&c)) {
     78     VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
     79     VP8LRefsCursorNext(&c);
     80   }
     81 }
     82 
     83 void VP8LHistogramCreate(VP8LHistogram* const p,
     84                          const VP8LBackwardRefs* const refs,
     85                          int palette_code_bits) {
     86   if (palette_code_bits >= 0) {
     87     p->palette_code_bits_ = palette_code_bits;
     88   }
     89   HistogramClear(p);
     90   VP8LHistogramStoreRefs(refs, p);
     91 }
     92 
     93 void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
     94   p->palette_code_bits_ = palette_code_bits;
     95   HistogramClear(p);
     96 }
     97 
     98 VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
     99   VP8LHistogram* histo = NULL;
    100   const int total_size = VP8LGetHistogramSize(cache_bits);
    101   uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
    102   if (memory == NULL) return NULL;
    103   histo = (VP8LHistogram*)memory;
    104   // literal_ won't necessary be aligned.
    105   histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
    106   VP8LHistogramInit(histo, cache_bits);
    107   return histo;
    108 }
    109 
    110 VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
    111   int i;
    112   VP8LHistogramSet* set;
    113   const int histo_size = VP8LGetHistogramSize(cache_bits);
    114   const size_t total_size =
    115       sizeof(*set) + size * (sizeof(*set->histograms) +
    116       histo_size + WEBP_ALIGN_CST);
    117   uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
    118   if (memory == NULL) return NULL;
    119 
    120   set = (VP8LHistogramSet*)memory;
    121   memory += sizeof(*set);
    122   set->histograms = (VP8LHistogram**)memory;
    123   memory += size * sizeof(*set->histograms);
    124   set->max_size = size;
    125   set->size = size;
    126   for (i = 0; i < size; ++i) {
    127     memory = (uint8_t*)WEBP_ALIGN(memory);
    128     set->histograms[i] = (VP8LHistogram*)memory;
    129     // literal_ won't necessary be aligned.
    130     set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
    131     VP8LHistogramInit(set->histograms[i], cache_bits);
    132     memory += histo_size;
    133   }
    134   return set;
    135 }
    136 
    137 // -----------------------------------------------------------------------------
    138 
    139 void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
    140                                      const PixOrCopy* const v) {
    141   if (PixOrCopyIsLiteral(v)) {
    142     ++histo->alpha_[PixOrCopyLiteral(v, 3)];
    143     ++histo->red_[PixOrCopyLiteral(v, 2)];
    144     ++histo->literal_[PixOrCopyLiteral(v, 1)];
    145     ++histo->blue_[PixOrCopyLiteral(v, 0)];
    146   } else if (PixOrCopyIsCacheIdx(v)) {
    147     const int literal_ix =
    148         NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
    149     ++histo->literal_[literal_ix];
    150   } else {
    151     int code, extra_bits;
    152     VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);
    153     ++histo->literal_[NUM_LITERAL_CODES + code];
    154     VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
    155     ++histo->distance_[code];
    156   }
    157 }
    158 
    159 // -----------------------------------------------------------------------------
    160 // Entropy-related functions.
    161 
    162 static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
    163   double mix;
    164   if (entropy->nonzeros < 5) {
    165     if (entropy->nonzeros <= 1) {
    166       return 0;
    167     }
    168     // Two symbols, they will be 0 and 1 in a Huffman code.
    169     // Let's mix in a bit of entropy to favor good clustering when
    170     // distributions of these are combined.
    171     if (entropy->nonzeros == 2) {
    172       return 0.99 * entropy->sum + 0.01 * entropy->entropy;
    173     }
    174     // No matter what the entropy says, we cannot be better than min_limit
    175     // with Huffman coding. I am mixing a bit of entropy into the
    176     // min_limit since it produces much better (~0.5 %) compression results
    177     // perhaps because of better entropy clustering.
    178     if (entropy->nonzeros == 3) {
    179       mix = 0.95;
    180     } else {
    181       mix = 0.7;  // nonzeros == 4.
    182     }
    183   } else {
    184     mix = 0.627;
    185   }
    186 
    187   {
    188     double min_limit = 2 * entropy->sum - entropy->max_val;
    189     min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy;
    190     return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
    191   }
    192 }
    193 
    194 double VP8LBitsEntropy(const uint32_t* const array, int n,
    195                        uint32_t* const trivial_symbol) {
    196   VP8LBitEntropy entropy;
    197   VP8LBitsEntropyUnrefined(array, n, &entropy);
    198   if (trivial_symbol != NULL) {
    199     *trivial_symbol =
    200         (entropy.nonzeros == 1) ? entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM;
    201   }
    202 
    203   return BitsEntropyRefine(&entropy);
    204 }
    205 
    206 static double InitialHuffmanCost(void) {
    207   // Small bias because Huffman code length is typically not stored in
    208   // full length.
    209   static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
    210   static const double kSmallBias = 9.1;
    211   return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
    212 }
    213 
    214 // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
    215 static double FinalHuffmanCost(const VP8LStreaks* const stats) {
    216   double retval = InitialHuffmanCost();
    217   retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
    218   retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
    219   retval += 1.796875 * stats->streaks[0][0];
    220   retval += 3.28125 * stats->streaks[1][0];
    221   return retval;
    222 }
    223 
    224 // Get the symbol entropy for the distribution 'population'.
    225 // Set 'trivial_sym', if there's only one symbol present in the distribution.
    226 static double PopulationCost(const uint32_t* const population, int length,
    227                              uint32_t* const trivial_sym) {
    228   VP8LBitEntropy bit_entropy;
    229   VP8LStreaks stats;
    230   VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
    231   if (trivial_sym != NULL) {
    232     *trivial_sym = (bit_entropy.nonzeros == 1) ? bit_entropy.nonzero_code
    233                                                : VP8L_NON_TRIVIAL_SYM;
    234   }
    235 
    236   return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
    237 }
    238 
    239 static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
    240                                              const uint32_t* const Y,
    241                                              int length) {
    242   VP8LBitEntropy bit_entropy;
    243   VP8LStreaks stats;
    244   VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
    245 
    246   return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
    247 }
    248 
    249 // Estimates the Entropy + Huffman + other block overhead size cost.
    250 double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
    251   return
    252       PopulationCost(
    253           p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
    254       + PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
    255       + PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
    256       + PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
    257       + PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
    258       + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
    259       + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
    260 }
    261 
    262 // -----------------------------------------------------------------------------
    263 // Various histogram combine/cost-eval functions
    264 
    265 static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
    266                                        const VP8LHistogram* const b,
    267                                        double cost_threshold,
    268                                        double* cost) {
    269   const int palette_code_bits = a->palette_code_bits_;
    270   assert(a->palette_code_bits_ == b->palette_code_bits_);
    271   *cost += GetCombinedEntropy(a->literal_, b->literal_,
    272                               VP8LHistogramNumCodes(palette_code_bits));
    273   *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
    274                                  b->literal_ + NUM_LITERAL_CODES,
    275                                  NUM_LENGTH_CODES);
    276   if (*cost > cost_threshold) return 0;
    277 
    278   *cost += GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES);
    279   if (*cost > cost_threshold) return 0;
    280 
    281   *cost += GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES);
    282   if (*cost > cost_threshold) return 0;
    283 
    284   *cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES);
    285   if (*cost > cost_threshold) return 0;
    286 
    287   *cost += GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES);
    288   *cost +=
    289       VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
    290   if (*cost > cost_threshold) return 0;
    291 
    292   return 1;
    293 }
    294 
    295 // Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
    296 // to the threshold value 'cost_threshold'. The score returned is
    297 //  Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
    298 // Since the previous score passed is 'cost_threshold', we only need to compare
    299 // the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
    300 // early.
    301 static double HistogramAddEval(const VP8LHistogram* const a,
    302                                const VP8LHistogram* const b,
    303                                VP8LHistogram* const out,
    304                                double cost_threshold) {
    305   double cost = 0;
    306   const double sum_cost = a->bit_cost_ + b->bit_cost_;
    307   cost_threshold += sum_cost;
    308 
    309   if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
    310     VP8LHistogramAdd(a, b, out);
    311     out->bit_cost_ = cost;
    312     out->palette_code_bits_ = a->palette_code_bits_;
    313     out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ?
    314         a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM;
    315   }
    316 
    317   return cost - sum_cost;
    318 }
    319 
    320 // Same as HistogramAddEval(), except that the resulting histogram
    321 // is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
    322 // the term C(b) which is constant over all the evaluations.
    323 static double HistogramAddThresh(const VP8LHistogram* const a,
    324                                  const VP8LHistogram* const b,
    325                                  double cost_threshold) {
    326   double cost = -a->bit_cost_;
    327   GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
    328   return cost;
    329 }
    330 
    331 // -----------------------------------------------------------------------------
    332 
    333 // The structure to keep track of cost range for the three dominant entropy
    334 // symbols.
    335 // TODO(skal): Evaluate if float can be used here instead of double for
    336 // representing the entropy costs.
    337 typedef struct {
    338   double literal_max_;
    339   double literal_min_;
    340   double red_max_;
    341   double red_min_;
    342   double blue_max_;
    343   double blue_min_;
    344 } DominantCostRange;
    345 
    346 static void DominantCostRangeInit(DominantCostRange* const c) {
    347   c->literal_max_ = 0.;
    348   c->literal_min_ = MAX_COST;
    349   c->red_max_ = 0.;
    350   c->red_min_ = MAX_COST;
    351   c->blue_max_ = 0.;
    352   c->blue_min_ = MAX_COST;
    353 }
    354 
    355 static void UpdateDominantCostRange(
    356     const VP8LHistogram* const h, DominantCostRange* const c) {
    357   if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_;
    358   if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_;
    359   if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_;
    360   if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_;
    361   if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_;
    362   if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_;
    363 }
    364 
    365 static void UpdateHistogramCost(VP8LHistogram* const h) {
    366   uint32_t alpha_sym, red_sym, blue_sym;
    367   const double alpha_cost =
    368       PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);
    369   const double distance_cost =
    370       PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
    371       VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
    372   const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
    373   h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) +
    374                      VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
    375                                    NUM_LENGTH_CODES);
    376   h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
    377   h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
    378   h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
    379                  alpha_cost + distance_cost;
    380   if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
    381     h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM;
    382   } else {
    383     h->trivial_symbol_ =
    384         ((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0);
    385   }
    386 }
    387 
    388 static int GetBinIdForEntropy(double min, double max, double val) {
    389   const double range = max - min + 1e-6;
    390   const double delta = val - min;
    391   return (int)(NUM_PARTITIONS * delta / range);
    392 }
    393 
    394 static int GetHistoBinIndexLowEffort(
    395     const VP8LHistogram* const h, const DominantCostRange* const c) {
    396   const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_,
    397                                         h->literal_cost_);
    398   assert(bin_id < NUM_PARTITIONS);
    399   return bin_id;
    400 }
    401 
    402 static int GetHistoBinIndex(
    403     const VP8LHistogram* const h, const DominantCostRange* const c) {
    404   const int bin_id =
    405       GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) +
    406       NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_,
    407                                           h->red_cost_) +
    408       NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_,
    409                                                            c->literal_max_,
    410                                                            h->literal_cost_);
    411   assert(bin_id < BIN_SIZE);
    412   return bin_id;
    413 }
    414 
    415 // Construct the histograms from backward references.
    416 static void HistogramBuild(
    417     int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs,
    418     VP8LHistogramSet* const image_histo) {
    419   int x = 0, y = 0;
    420   const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
    421   VP8LHistogram** const histograms = image_histo->histograms;
    422   VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);
    423   assert(histo_bits > 0);
    424   while (VP8LRefsCursorOk(&c)) {
    425     const PixOrCopy* const v = c.cur_pos;
    426     const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
    427     VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
    428     x += PixOrCopyLength(v);
    429     while (x >= xsize) {
    430       x -= xsize;
    431       ++y;
    432     }
    433     VP8LRefsCursorNext(&c);
    434   }
    435 }
    436 
    437 // Copies the histograms and computes its bit_cost.
    438 static void HistogramCopyAndAnalyze(
    439     VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) {
    440   int i;
    441   const int histo_size = orig_histo->size;
    442   VP8LHistogram** const orig_histograms = orig_histo->histograms;
    443   VP8LHistogram** const histograms = image_histo->histograms;
    444   for (i = 0; i < histo_size; ++i) {
    445     VP8LHistogram* const histo = orig_histograms[i];
    446     UpdateHistogramCost(histo);
    447     // Copy histograms from orig_histo[] to image_histo[].
    448     HistogramCopy(histo, histograms[i]);
    449   }
    450 }
    451 
    452 // Partition histograms to different entropy bins for three dominant (literal,
    453 // red and blue) symbol costs and compute the histogram aggregate bit_cost.
    454 static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
    455                                        int16_t* const bin_map, int low_effort) {
    456   int i;
    457   VP8LHistogram** const histograms = image_histo->histograms;
    458   const int histo_size = image_histo->size;
    459   const int bin_depth = histo_size + 1;
    460   DominantCostRange cost_range;
    461   DominantCostRangeInit(&cost_range);
    462 
    463   // Analyze the dominant (literal, red and blue) entropy costs.
    464   for (i = 0; i < histo_size; ++i) {
    465     VP8LHistogram* const histo = histograms[i];
    466     UpdateDominantCostRange(histo, &cost_range);
    467   }
    468 
    469   // bin-hash histograms on three of the dominant (literal, red and blue)
    470   // symbol costs.
    471   for (i = 0; i < histo_size; ++i) {
    472     int num_histos;
    473     VP8LHistogram* const histo = histograms[i];
    474     const int16_t bin_id = low_effort ?
    475         (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) :
    476         (int16_t)GetHistoBinIndex(histo, &cost_range);
    477     const int bin_offset = bin_id * bin_depth;
    478     // bin_map[n][0] for every bin 'n' maintains the counter for the number of
    479     // histograms in that bin.
    480     // Get and increment the num_histos in that bin.
    481     num_histos = ++bin_map[bin_offset];
    482     assert(bin_offset + num_histos < bin_depth * BIN_SIZE);
    483     // Add histogram i'th index at num_histos (last) position in the bin_map.
    484     bin_map[bin_offset + num_histos] = i;
    485   }
    486 }
    487 
    488 // Compact the histogram set by removing unused entries.
    489 static void HistogramCompactBins(VP8LHistogramSet* const image_histo) {
    490   VP8LHistogram** const histograms = image_histo->histograms;
    491   int i, j;
    492 
    493   for (i = 0, j = 0; i < image_histo->size; ++i) {
    494     if (histograms[i] != NULL && histograms[i]->bit_cost_ != 0.) {
    495       if (j < i) {
    496         histograms[j] = histograms[i];
    497         histograms[i] = NULL;
    498       }
    499       ++j;
    500     }
    501   }
    502   image_histo->size = j;
    503 }
    504 
    505 static VP8LHistogram* HistogramCombineEntropyBin(
    506     VP8LHistogramSet* const image_histo,
    507     VP8LHistogram* cur_combo,
    508     int16_t* const bin_map, int bin_depth, int num_bins,
    509     double combine_cost_factor, int low_effort) {
    510   int bin_id;
    511   VP8LHistogram** const histograms = image_histo->histograms;
    512 
    513   for (bin_id = 0; bin_id < num_bins; ++bin_id) {
    514     const int bin_offset = bin_id * bin_depth;
    515     const int num_histos = bin_map[bin_offset];
    516     const int idx1 = bin_map[bin_offset + 1];
    517     int num_combine_failures = 0;
    518     int n;
    519     for (n = 2; n <= num_histos; ++n) {
    520       const int idx2 = bin_map[bin_offset + n];
    521       if (low_effort) {
    522         // Merge all histograms with the same bin index, irrespective of cost of
    523         // the merged histograms.
    524         VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]);
    525         histograms[idx2]->bit_cost_ = 0.;
    526       } else {
    527         const double bit_cost_idx2 = histograms[idx2]->bit_cost_;
    528         if (bit_cost_idx2 > 0.) {
    529           const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor;
    530           const double curr_cost_diff =
    531               HistogramAddEval(histograms[idx1], histograms[idx2],
    532                                cur_combo, bit_cost_thresh);
    533           if (curr_cost_diff < bit_cost_thresh) {
    534             // Try to merge two histograms only if the combo is a trivial one or
    535             // the two candidate histograms are already non-trivial.
    536             // For some images, 'try_combine' turns out to be false for a lot of
    537             // histogram pairs. In that case, we fallback to combining
    538             // histograms as usual to avoid increasing the header size.
    539             const int try_combine =
    540                 (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) ||
    541                 ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&
    542                  (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM));
    543             const int max_combine_failures = 32;
    544             if (try_combine || (num_combine_failures >= max_combine_failures)) {
    545               HistogramSwap(&cur_combo, &histograms[idx1]);
    546               histograms[idx2]->bit_cost_ = 0.;
    547             } else {
    548               ++num_combine_failures;
    549             }
    550           }
    551         }
    552       }
    553     }
    554     if (low_effort) {
    555       // Update the bit_cost for the merged histograms (per bin index).
    556       UpdateHistogramCost(histograms[idx1]);
    557     }
    558   }
    559   HistogramCompactBins(image_histo);
    560   return cur_combo;
    561 }
    562 
    563 static uint32_t MyRand(uint32_t *seed) {
    564   *seed *= 16807U;
    565   if (*seed == 0) {
    566     *seed = 1;
    567   }
    568   return *seed;
    569 }
    570 
    571 // -----------------------------------------------------------------------------
    572 // Histogram pairs priority queue
    573 
    574 // Pair of histograms. Negative idx1 value means that pair is out-of-date.
    575 typedef struct {
    576   int idx1;
    577   int idx2;
    578   double cost_diff;
    579   double cost_combo;
    580 } HistogramPair;
    581 
    582 typedef struct {
    583   HistogramPair* queue;
    584   int size;
    585   int max_size;
    586 } HistoQueue;
    587 
    588 static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) {
    589   histo_queue->size = 0;
    590   // max_index^2 for the queue size is safe. If you look at
    591   // HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes
    592   // data to the queue, you insert at most:
    593   // - max_index*(max_index-1)/2 (the first two for loops)
    594   // - max_index - 1 in the last for loop at the first iteration of the while
    595   //   loop, max_index - 2 at the second iteration ... therefore
    596   //   max_index*(max_index-1)/2 overall too
    597   histo_queue->max_size = max_index * max_index;
    598   // We allocate max_size + 1 because the last element at index "size" is
    599   // used as temporary data (and it could be up to max_size).
    600   histo_queue->queue = WebPSafeMalloc(histo_queue->max_size + 1,
    601                                       sizeof(*histo_queue->queue));
    602   return histo_queue->queue != NULL;
    603 }
    604 
    605 static void HistoQueueClear(HistoQueue* const histo_queue) {
    606   assert(histo_queue != NULL);
    607   WebPSafeFree(histo_queue->queue);
    608 }
    609 
    610 static void SwapHistogramPairs(HistogramPair *p1,
    611                                HistogramPair *p2) {
    612   const HistogramPair tmp = *p1;
    613   *p1 = *p2;
    614   *p2 = tmp;
    615 }
    616 
    617 // Given a valid priority queue in range [0, queue_size) this function checks
    618 // whether histo_queue[queue_size] should be accepted and swaps it with the
    619 // front if it is smaller. Otherwise, it leaves it as is.
    620 static void UpdateQueueFront(HistoQueue* const histo_queue) {
    621   if (histo_queue->queue[histo_queue->size].cost_diff >= 0) return;
    622 
    623   if (histo_queue->queue[histo_queue->size].cost_diff <
    624       histo_queue->queue[0].cost_diff) {
    625     SwapHistogramPairs(histo_queue->queue,
    626                        histo_queue->queue + histo_queue->size);
    627   }
    628   ++histo_queue->size;
    629 
    630   // We cannot add more elements than the capacity.
    631   // The allocation adds an extra element to the official capacity so that
    632   // histo_queue->queue[histo_queue->max_size] is read/written within bound.
    633   assert(histo_queue->size <= histo_queue->max_size);
    634 }
    635 
    636 // -----------------------------------------------------------------------------
    637 
    638 static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2,
    639                         HistogramPair* const pair,
    640                         VP8LHistogram* const histos) {
    641   if (idx1 > idx2) {
    642     const int tmp = idx2;
    643     idx2 = idx1;
    644     idx1 = tmp;
    645   }
    646   pair->idx1 = idx1;
    647   pair->idx2 = idx2;
    648   pair->cost_diff =
    649       HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0);
    650   pair->cost_combo = histos->bit_cost_;
    651 }
    652 
    653 // Combines histograms by continuously choosing the one with the highest cost
    654 // reduction.
    655 static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
    656                                   VP8LHistogram* const histos) {
    657   int ok = 0;
    658   int image_histo_size = image_histo->size;
    659   int i, j;
    660   VP8LHistogram** const histograms = image_histo->histograms;
    661   // Indexes of remaining histograms.
    662   int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters));
    663   // Priority queue of histogram pairs.
    664   HistoQueue histo_queue;
    665 
    666   if (!HistoQueueInit(&histo_queue, image_histo_size) || clusters == NULL) {
    667     goto End;
    668   }
    669 
    670   for (i = 0; i < image_histo_size; ++i) {
    671     // Initialize clusters indexes.
    672     clusters[i] = i;
    673     for (j = i + 1; j < image_histo_size; ++j) {
    674       // Initialize positions array.
    675       PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size],
    676                   histos);
    677       UpdateQueueFront(&histo_queue);
    678     }
    679   }
    680 
    681   while (image_histo_size > 1 && histo_queue.size > 0) {
    682     HistogramPair* copy_to;
    683     const int idx1 = histo_queue.queue[0].idx1;
    684     const int idx2 = histo_queue.queue[0].idx2;
    685     VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);
    686     histograms[idx1]->bit_cost_ = histo_queue.queue[0].cost_combo;
    687     // Remove merged histogram.
    688     for (i = 0; i + 1 < image_histo_size; ++i) {
    689       if (clusters[i] >= idx2) {
    690         clusters[i] = clusters[i + 1];
    691       }
    692     }
    693     --image_histo_size;
    694 
    695     // Remove pairs intersecting the just combined best pair. This will
    696     // therefore pop the head of the queue.
    697     copy_to = histo_queue.queue;
    698     for (i = 0; i < histo_queue.size; ++i) {
    699       HistogramPair* const p = histo_queue.queue + i;
    700       if (p->idx1 == idx1 || p->idx2 == idx1 ||
    701           p->idx1 == idx2 || p->idx2 == idx2) {
    702         // Do not copy the invalid pair.
    703         continue;
    704       }
    705       if (p->cost_diff < histo_queue.queue[0].cost_diff) {
    706         // Replace the top of the queue if we found better.
    707         SwapHistogramPairs(histo_queue.queue, p);
    708       }
    709       SwapHistogramPairs(copy_to, p);
    710       ++copy_to;
    711     }
    712     histo_queue.size = (int)(copy_to - histo_queue.queue);
    713 
    714     // Push new pairs formed with combined histogram to the queue.
    715     for (i = 0; i < image_histo_size; ++i) {
    716       if (clusters[i] != idx1) {
    717         PreparePair(histograms, idx1, clusters[i],
    718                     &histo_queue.queue[histo_queue.size], histos);
    719         UpdateQueueFront(&histo_queue);
    720       }
    721     }
    722   }
    723   // Move remaining histograms to the beginning of the array.
    724   for (i = 0; i < image_histo_size; ++i) {
    725     if (i != clusters[i]) {  // swap the two histograms
    726       HistogramSwap(&histograms[i], &histograms[clusters[i]]);
    727     }
    728   }
    729 
    730   image_histo->size = image_histo_size;
    731   ok = 1;
    732 
    733  End:
    734   WebPSafeFree(clusters);
    735   HistoQueueClear(&histo_queue);
    736   return ok;
    737 }
    738 
    739 static VP8LHistogram* HistogramCombineStochastic(
    740     VP8LHistogramSet* const image_histo,
    741     VP8LHistogram* tmp_histo,
    742     VP8LHistogram* best_combo,
    743     int quality, int min_cluster_size) {
    744   int iter;
    745   uint32_t seed = 0;
    746   int tries_with_no_success = 0;
    747   int image_histo_size = image_histo->size;
    748   const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8;
    749   const int outer_iters = image_histo_size * iter_mult;
    750   const int num_pairs = image_histo_size / 2;
    751   const int num_tries_no_success = outer_iters / 2;
    752   VP8LHistogram** const histograms = image_histo->histograms;
    753 
    754   // Collapse similar histograms in 'image_histo'.
    755   ++min_cluster_size;
    756   for (iter = 0;
    757        iter < outer_iters && image_histo_size >= min_cluster_size;
    758        ++iter) {
    759     double best_cost_diff = 0.;
    760     int best_idx1 = -1, best_idx2 = 1;
    761     int j;
    762     const int num_tries =
    763         (num_pairs < image_histo_size) ? num_pairs : image_histo_size;
    764     seed += iter;
    765     for (j = 0; j < num_tries; ++j) {
    766       double curr_cost_diff;
    767       // Choose two histograms at random and try to combine them.
    768       const uint32_t idx1 = MyRand(&seed) % image_histo_size;
    769       const uint32_t tmp = (j & 7) + 1;
    770       const uint32_t diff =
    771           (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1);
    772       const uint32_t idx2 = (idx1 + diff + 1) % image_histo_size;
    773       if (idx1 == idx2) {
    774         continue;
    775       }
    776 
    777       // Calculate cost reduction on combining.
    778       curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2],
    779                                         tmp_histo, best_cost_diff);
    780       if (curr_cost_diff < best_cost_diff) {    // found a better pair?
    781         HistogramSwap(&best_combo, &tmp_histo);
    782         best_cost_diff = curr_cost_diff;
    783         best_idx1 = idx1;
    784         best_idx2 = idx2;
    785       }
    786     }
    787 
    788     if (best_idx1 >= 0) {
    789       HistogramSwap(&best_combo, &histograms[best_idx1]);
    790       // swap best_idx2 slot with last one (which is now unused)
    791       --image_histo_size;
    792       if (best_idx2 != image_histo_size) {
    793         HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);
    794         histograms[image_histo_size] = NULL;
    795       }
    796       tries_with_no_success = 0;
    797     }
    798     if (++tries_with_no_success >= num_tries_no_success) {
    799       break;
    800     }
    801   }
    802   image_histo->size = image_histo_size;
    803   return best_combo;
    804 }
    805 
    806 // -----------------------------------------------------------------------------
    807 // Histogram refinement
    808 
    809 // Find the best 'out' histogram for each of the 'in' histograms.
    810 // Note: we assume that out[]->bit_cost_ is already up-to-date.
    811 static void HistogramRemap(const VP8LHistogramSet* const orig_histo,
    812                            const VP8LHistogramSet* const image_histo,
    813                            uint16_t* const symbols) {
    814   int i;
    815   VP8LHistogram** const orig_histograms = orig_histo->histograms;
    816   VP8LHistogram** const histograms = image_histo->histograms;
    817   const int orig_histo_size = orig_histo->size;
    818   const int image_histo_size = image_histo->size;
    819   if (image_histo_size > 1) {
    820     for (i = 0; i < orig_histo_size; ++i) {
    821       int best_out = 0;
    822       double best_bits =
    823           HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
    824       int k;
    825       for (k = 1; k < image_histo_size; ++k) {
    826         const double cur_bits =
    827             HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
    828         if (cur_bits < best_bits) {
    829           best_bits = cur_bits;
    830           best_out = k;
    831         }
    832       }
    833       symbols[i] = best_out;
    834     }
    835   } else {
    836     assert(image_histo_size == 1);
    837     for (i = 0; i < orig_histo_size; ++i) {
    838       symbols[i] = 0;
    839     }
    840   }
    841 
    842   // Recompute each out based on raw and symbols.
    843   for (i = 0; i < image_histo_size; ++i) {
    844     HistogramClear(histograms[i]);
    845   }
    846 
    847   for (i = 0; i < orig_histo_size; ++i) {
    848     const int idx = symbols[i];
    849     VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]);
    850   }
    851 }
    852 
    853 static double GetCombineCostFactor(int histo_size, int quality) {
    854   double combine_cost_factor = 0.16;
    855   if (quality < 90) {
    856     if (histo_size > 256) combine_cost_factor /= 2.;
    857     if (histo_size > 512) combine_cost_factor /= 2.;
    858     if (histo_size > 1024) combine_cost_factor /= 2.;
    859     if (quality <= 50) combine_cost_factor /= 2.;
    860   }
    861   return combine_cost_factor;
    862 }
    863 
    864 int VP8LGetHistoImageSymbols(int xsize, int ysize,
    865                              const VP8LBackwardRefs* const refs,
    866                              int quality, int low_effort,
    867                              int histo_bits, int cache_bits,
    868                              VP8LHistogramSet* const image_histo,
    869                              VP8LHistogramSet* const tmp_histos,
    870                              uint16_t* const histogram_symbols) {
    871   int ok = 0;
    872   const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
    873   const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
    874   const int image_histo_raw_size = histo_xsize * histo_ysize;
    875   const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
    876 
    877   // The bin_map for every bin follows following semantics:
    878   // bin_map[n][0] = num_histo; // The number of histograms in that bin.
    879   // bin_map[n][1] = index of first histogram in that bin;
    880   // bin_map[n][num_histo] = index of last histogram in that bin;
    881   // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = unused indices.
    882   const int bin_depth = image_histo_raw_size + 1;
    883   int16_t* bin_map = NULL;
    884   VP8LHistogramSet* const orig_histo =
    885       VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
    886   VP8LHistogram* cur_combo;
    887   const int entropy_combine =
    888       (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
    889 
    890   if (orig_histo == NULL) goto Error;
    891 
    892   // Don't attempt linear bin-partition heuristic for:
    893   // histograms of small sizes, as bin_map will be very sparse and;
    894   // Maximum quality (q==100), to preserve the compression gains at that level.
    895   if (entropy_combine) {
    896     const int bin_map_size = bin_depth * entropy_combine_num_bins;
    897     bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map));
    898     if (bin_map == NULL) goto Error;
    899   }
    900 
    901   // Construct the histograms from backward references.
    902   HistogramBuild(xsize, histo_bits, refs, orig_histo);
    903   // Copies the histograms and computes its bit_cost.
    904   HistogramCopyAndAnalyze(orig_histo, image_histo);
    905 
    906   cur_combo = tmp_histos->histograms[1];  // pick up working slot
    907   if (entropy_combine) {
    908     const double combine_cost_factor =
    909         GetCombineCostFactor(image_histo_raw_size, quality);
    910     HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
    911     // Collapse histograms with similar entropy.
    912     cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map,
    913                                            bin_depth, entropy_combine_num_bins,
    914                                            combine_cost_factor, low_effort);
    915   }
    916 
    917   // Don't combine the histograms using stochastic and greedy heuristics for
    918   // low-effort compression mode.
    919   if (!low_effort || !entropy_combine) {
    920     const float x = quality / 100.f;
    921     // cubic ramp between 1 and MAX_HISTO_GREEDY:
    922     const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
    923     cur_combo = HistogramCombineStochastic(image_histo,
    924                                            tmp_histos->histograms[0],
    925                                            cur_combo, quality, threshold_size);
    926     if ((image_histo->size <= threshold_size) &&
    927         !HistogramCombineGreedy(image_histo, cur_combo)) {
    928       goto Error;
    929     }
    930   }
    931 
    932   // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also.
    933   // Find the optimal map from original histograms to the final ones.
    934   HistogramRemap(orig_histo, image_histo, histogram_symbols);
    935 
    936   ok = 1;
    937 
    938  Error:
    939   WebPSafeFree(bin_map);
    940   VP8LFreeHistogramSet(orig_histo);
    941   return ok;
    942 }
    943