1 /* NOLINT(build/header_guard) */ 2 /* Copyright 2015 Google Inc. All Rights Reserved. 3 4 Distributed under MIT license. 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 6 */ 7 8 /* template parameters: FN */ 9 10 #define HistogramType FN(Histogram) 11 12 /* Greedy block splitter for one block category (literal, command or distance). 13 */ 14 typedef struct FN(BlockSplitter) { 15 /* Alphabet size of particular block category. */ 16 size_t alphabet_size_; 17 /* We collect at least this many symbols for each block. */ 18 size_t min_block_size_; 19 /* We merge histograms A and B if 20 entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, 21 where A is the current histogram and B is the histogram of the last or the 22 second last block type. */ 23 double split_threshold_; 24 25 size_t num_blocks_; 26 BlockSplit* split_; /* not owned */ 27 HistogramType* histograms_; /* not owned */ 28 size_t* histograms_size_; /* not owned */ 29 30 /* The number of symbols that we want to collect before deciding on whether 31 or not to merge the block with a previous one or emit a new block. */ 32 size_t target_block_size_; 33 /* The number of symbols in the current histogram. */ 34 size_t block_size_; 35 /* Offset of the current histogram. */ 36 size_t curr_histogram_ix_; 37 /* Offset of the histograms of the previous two block types. */ 38 size_t last_histogram_ix_[2]; 39 /* Entropy of the previous two block types. */ 40 double last_entropy_[2]; 41 /* The number of times we merged the current block with the last one. */ 42 size_t merge_last_count_; 43 } FN(BlockSplitter); 44 45 static void FN(InitBlockSplitter)( 46 MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size, 47 size_t min_block_size, double split_threshold, size_t num_symbols, 48 BlockSplit* split, HistogramType** histograms, size_t* histograms_size) { 49 size_t max_num_blocks = num_symbols / min_block_size + 1; 50 /* We have to allocate one more histogram than the maximum number of block 51 types for the current histogram when the meta-block is too big. */ 52 size_t max_num_types = 53 BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1); 54 self->alphabet_size_ = alphabet_size; 55 self->min_block_size_ = min_block_size; 56 self->split_threshold_ = split_threshold; 57 self->num_blocks_ = 0; 58 self->split_ = split; 59 self->histograms_size_ = histograms_size; 60 self->target_block_size_ = min_block_size; 61 self->block_size_ = 0; 62 self->curr_histogram_ix_ = 0; 63 self->merge_last_count_ = 0; 64 BROTLI_ENSURE_CAPACITY(m, uint8_t, 65 split->types, split->types_alloc_size, max_num_blocks); 66 BROTLI_ENSURE_CAPACITY(m, uint32_t, 67 split->lengths, split->lengths_alloc_size, max_num_blocks); 68 if (BROTLI_IS_OOM(m)) return; 69 self->split_->num_blocks = max_num_blocks; 70 BROTLI_DCHECK(*histograms == 0); 71 *histograms_size = max_num_types; 72 *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size); 73 self->histograms_ = *histograms; 74 if (BROTLI_IS_OOM(m)) return; 75 /* Clear only current histogram. */ 76 FN(HistogramClear)(&self->histograms_[0]); 77 self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0; 78 } 79 80 /* Does either of three things: 81 (1) emits the current block with a new block type; 82 (2) emits the current block with the type of the second last block; 83 (3) merges the current block with the last block. */ 84 static void FN(BlockSplitterFinishBlock)( 85 FN(BlockSplitter)* self, BROTLI_BOOL is_final) { 86 BlockSplit* split = self->split_; 87 double* last_entropy = self->last_entropy_; 88 HistogramType* histograms = self->histograms_; 89 self->block_size_ = 90 BROTLI_MAX(size_t, self->block_size_, self->min_block_size_); 91 if (self->num_blocks_ == 0) { 92 /* Create first block. */ 93 split->lengths[0] = (uint32_t)self->block_size_; 94 split->types[0] = 0; 95 last_entropy[0] = 96 BitsEntropy(histograms[0].data_, self->alphabet_size_); 97 last_entropy[1] = last_entropy[0]; 98 ++self->num_blocks_; 99 ++split->num_types; 100 ++self->curr_histogram_ix_; 101 if (self->curr_histogram_ix_ < *self->histograms_size_) 102 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 103 self->block_size_ = 0; 104 } else if (self->block_size_ > 0) { 105 double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_, 106 self->alphabet_size_); 107 HistogramType combined_histo[2]; 108 double combined_entropy[2]; 109 double diff[2]; 110 size_t j; 111 for (j = 0; j < 2; ++j) { 112 size_t last_histogram_ix = self->last_histogram_ix_[j]; 113 combined_histo[j] = histograms[self->curr_histogram_ix_]; 114 FN(HistogramAddHistogram)(&combined_histo[j], 115 &histograms[last_histogram_ix]); 116 combined_entropy[j] = BitsEntropy( 117 &combined_histo[j].data_[0], self->alphabet_size_); 118 diff[j] = combined_entropy[j] - entropy - last_entropy[j]; 119 } 120 121 if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES && 122 diff[0] > self->split_threshold_ && 123 diff[1] > self->split_threshold_) { 124 /* Create new block. */ 125 split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; 126 split->types[self->num_blocks_] = (uint8_t)split->num_types; 127 self->last_histogram_ix_[1] = self->last_histogram_ix_[0]; 128 self->last_histogram_ix_[0] = (uint8_t)split->num_types; 129 last_entropy[1] = last_entropy[0]; 130 last_entropy[0] = entropy; 131 ++self->num_blocks_; 132 ++split->num_types; 133 ++self->curr_histogram_ix_; 134 if (self->curr_histogram_ix_ < *self->histograms_size_) 135 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 136 self->block_size_ = 0; 137 self->merge_last_count_ = 0; 138 self->target_block_size_ = self->min_block_size_; 139 } else if (diff[1] < diff[0] - 20.0) { 140 /* Combine this block with second last block. */ 141 split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; 142 split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2]; 143 BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1); 144 histograms[self->last_histogram_ix_[0]] = combined_histo[1]; 145 last_entropy[1] = last_entropy[0]; 146 last_entropy[0] = combined_entropy[1]; 147 ++self->num_blocks_; 148 self->block_size_ = 0; 149 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 150 self->merge_last_count_ = 0; 151 self->target_block_size_ = self->min_block_size_; 152 } else { 153 /* Combine this block with last block. */ 154 split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_; 155 histograms[self->last_histogram_ix_[0]] = combined_histo[0]; 156 last_entropy[0] = combined_entropy[0]; 157 if (split->num_types == 1) { 158 last_entropy[1] = last_entropy[0]; 159 } 160 self->block_size_ = 0; 161 FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); 162 if (++self->merge_last_count_ > 1) { 163 self->target_block_size_ += self->min_block_size_; 164 } 165 } 166 } 167 if (is_final) { 168 *self->histograms_size_ = split->num_types; 169 split->num_blocks = self->num_blocks_; 170 } 171 } 172 173 /* Adds the next symbol to the current histogram. When the current histogram 174 reaches the target size, decides on merging the block. */ 175 static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) { 176 FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol); 177 ++self->block_size_; 178 if (self->block_size_ == self->target_block_size_) { 179 FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE); 180 } 181 } 182 183 #undef HistogramType 184