Home | History | Annotate | Download | only in enc
      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2015 Google Inc. All Rights Reserved.
      3 
      4    Distributed under MIT license.
      5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN */
      9 
     10 #define HistogramType FN(Histogram)
     11 
     12 /* Greedy block splitter for one block category (literal, command or distance).
     13 */
     14 typedef struct FN(BlockSplitter) {
     15   /* Alphabet size of particular block category. */
     16   size_t alphabet_size_;
     17   /* We collect at least this many symbols for each block. */
     18   size_t min_block_size_;
     19   /* We merge histograms A and B if
     20        entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
     21      where A is the current histogram and B is the histogram of the last or the
     22      second last block type. */
     23   double split_threshold_;
     24 
     25   size_t num_blocks_;
     26   BlockSplit* split_;  /* not owned */
     27   HistogramType* histograms_;  /* not owned */
     28   size_t* histograms_size_;  /* not owned */
     29 
     30   /* The number of symbols that we want to collect before deciding on whether
     31      or not to merge the block with a previous one or emit a new block. */
     32   size_t target_block_size_;
     33   /* The number of symbols in the current histogram. */
     34   size_t block_size_;
     35   /* Offset of the current histogram. */
     36   size_t curr_histogram_ix_;
     37   /* Offset of the histograms of the previous two block types. */
     38   size_t last_histogram_ix_[2];
     39   /* Entropy of the previous two block types. */
     40   double last_entropy_[2];
     41   /* The number of times we merged the current block with the last one. */
     42   size_t merge_last_count_;
     43 } FN(BlockSplitter);
     44 
     45 static void FN(InitBlockSplitter)(
     46     MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
     47     size_t min_block_size, double split_threshold, size_t num_symbols,
     48     BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
     49   size_t max_num_blocks = num_symbols / min_block_size + 1;
     50   /* We have to allocate one more histogram than the maximum number of block
     51      types for the current histogram when the meta-block is too big. */
     52   size_t max_num_types =
     53       BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
     54   self->alphabet_size_ = alphabet_size;
     55   self->min_block_size_ = min_block_size;
     56   self->split_threshold_ = split_threshold;
     57   self->num_blocks_ = 0;
     58   self->split_ = split;
     59   self->histograms_size_ = histograms_size;
     60   self->target_block_size_ = min_block_size;
     61   self->block_size_ = 0;
     62   self->curr_histogram_ix_ = 0;
     63   self->merge_last_count_ = 0;
     64   BROTLI_ENSURE_CAPACITY(m, uint8_t,
     65       split->types, split->types_alloc_size, max_num_blocks);
     66   BROTLI_ENSURE_CAPACITY(m, uint32_t,
     67       split->lengths, split->lengths_alloc_size, max_num_blocks);
     68   if (BROTLI_IS_OOM(m)) return;
     69   self->split_->num_blocks = max_num_blocks;
     70   assert(*histograms == 0);
     71   *histograms_size = max_num_types;
     72   *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
     73   self->histograms_ = *histograms;
     74   if (BROTLI_IS_OOM(m)) return;
     75   /* Clear only current histogram. */
     76   FN(HistogramClear)(&self->histograms_[0]);
     77   self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
     78 }
     79 
     80 /* Does either of three things:
     81      (1) emits the current block with a new block type;
     82      (2) emits the current block with the type of the second last block;
     83      (3) merges the current block with the last block. */
     84 static void FN(BlockSplitterFinishBlock)(
     85     FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
     86   BlockSplit* split = self->split_;
     87   double* last_entropy = self->last_entropy_;
     88   HistogramType* histograms = self->histograms_;
     89   self->block_size_ =
     90       BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
     91   if (self->num_blocks_ == 0) {
     92     /* Create first block. */
     93     split->lengths[0] = (uint32_t)self->block_size_;
     94     split->types[0] = 0;
     95     last_entropy[0] =
     96         BitsEntropy(histograms[0].data_, self->alphabet_size_);
     97     last_entropy[1] = last_entropy[0];
     98     ++self->num_blocks_;
     99     ++split->num_types;
    100     ++self->curr_histogram_ix_;
    101     if (self->curr_histogram_ix_ < *self->histograms_size_)
    102       FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
    103     self->block_size_ = 0;
    104   } else if (self->block_size_ > 0) {
    105     double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
    106                                  self->alphabet_size_);
    107     HistogramType combined_histo[2];
    108     double combined_entropy[2];
    109     double diff[2];
    110     size_t j;
    111     for (j = 0; j < 2; ++j) {
    112       size_t last_histogram_ix = self->last_histogram_ix_[j];
    113       combined_histo[j] = histograms[self->curr_histogram_ix_];
    114       FN(HistogramAddHistogram)(&combined_histo[j],
    115           &histograms[last_histogram_ix]);
    116       combined_entropy[j] = BitsEntropy(
    117           &combined_histo[j].data_[0], self->alphabet_size_);
    118       diff[j] = combined_entropy[j] - entropy - last_entropy[j];
    119     }
    120 
    121     if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
    122         diff[0] > self->split_threshold_ &&
    123         diff[1] > self->split_threshold_) {
    124       /* Create new block. */
    125       split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
    126       split->types[self->num_blocks_] = (uint8_t)split->num_types;
    127       self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
    128       self->last_histogram_ix_[0] = (uint8_t)split->num_types;
    129       last_entropy[1] = last_entropy[0];
    130       last_entropy[0] = entropy;
    131       ++self->num_blocks_;
    132       ++split->num_types;
    133       ++self->curr_histogram_ix_;
    134       if (self->curr_histogram_ix_ < *self->histograms_size_)
    135         FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
    136       self->block_size_ = 0;
    137       self->merge_last_count_ = 0;
    138       self->target_block_size_ = self->min_block_size_;
    139     } else if (diff[1] < diff[0] - 20.0) {
    140       /* Combine this block with second last block. */
    141       split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
    142       split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
    143       BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
    144       histograms[self->last_histogram_ix_[0]] = combined_histo[1];
    145       last_entropy[1] = last_entropy[0];
    146       last_entropy[0] = combined_entropy[1];
    147       ++self->num_blocks_;
    148       self->block_size_ = 0;
    149       FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
    150       self->merge_last_count_ = 0;
    151       self->target_block_size_ = self->min_block_size_;
    152     } else {
    153       /* Combine this block with last block. */
    154       split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
    155       histograms[self->last_histogram_ix_[0]] = combined_histo[0];
    156       last_entropy[0] = combined_entropy[0];
    157       if (split->num_types == 1) {
    158         last_entropy[1] = last_entropy[0];
    159       }
    160       self->block_size_ = 0;
    161       FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
    162       if (++self->merge_last_count_ > 1) {
    163         self->target_block_size_ += self->min_block_size_;
    164       }
    165     }
    166   }
    167   if (is_final) {
    168     *self->histograms_size_ = split->num_types;
    169     split->num_blocks = self->num_blocks_;
    170   }
    171 }
    172 
    173 /* Adds the next symbol to the current histogram. When the current histogram
    174    reaches the target size, decides on merging the block. */
    175 static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
    176   FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
    177   ++self->block_size_;
    178   if (self->block_size_ == self->target_block_size_) {
    179     FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
    180   }
    181 }
    182 
    183 #undef HistogramType
    184