1 /* Copyright 2015 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 /* Algorithms for distributing the literals and commands of a metablock between 8 block types and contexts. */ 9 10 #include "./metablock.h" 11 12 #include "../common/constants.h" 13 #include <brotli/types.h> 14 #include "./bit_cost.h" 15 #include "./block_splitter.h" 16 #include "./cluster.h" 17 #include "./context.h" 18 #include "./entropy_encode.h" 19 #include "./histogram.h" 20 #include "./memory.h" 21 #include "./port.h" 22 #include "./quality.h" 23 24 #if defined(__cplusplus) || defined(c_plusplus) 25 extern "C" { 26 #endif 27 28 void BrotliBuildMetaBlock(MemoryManager* m, 29 const uint8_t* ringbuffer, 30 const size_t pos, 31 const size_t mask, 32 const BrotliEncoderParams* params, 33 uint8_t prev_byte, 34 uint8_t prev_byte2, 35 const Command* cmds, 36 size_t num_commands, 37 ContextType literal_context_mode, 38 MetaBlockSplit* mb) { 39 /* Histogram ids need to fit in one byte. */ 40 static const size_t kMaxNumberOfHistograms = 256; 41 HistogramDistance* distance_histograms; 42 HistogramLiteral* literal_histograms; 43 ContextType* literal_context_modes = NULL; 44 size_t literal_histograms_size; 45 size_t distance_histograms_size; 46 size_t i; 47 size_t literal_context_multiplier = 1; 48 49 BrotliSplitBlock(m, cmds, num_commands, 50 ringbuffer, pos, mask, params, 51 &mb->literal_split, 52 &mb->command_split, 53 &mb->distance_split); 54 if (BROTLI_IS_OOM(m)) return; 55 56 if (!params->disable_literal_context_modeling) { 57 literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS; 58 literal_context_modes = 59 BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types); 60 if (BROTLI_IS_OOM(m)) return; 61 for (i = 0; i < mb->literal_split.num_types; ++i) { 62 literal_context_modes[i] = literal_context_mode; 63 } 64 } 65 66 literal_histograms_size = 67 mb->literal_split.num_types * literal_context_multiplier; 68 literal_histograms = 69 BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size); 70 if (BROTLI_IS_OOM(m)) return; 71 ClearHistogramsLiteral(literal_histograms, literal_histograms_size); 72 73 distance_histograms_size = 74 mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS; 75 distance_histograms = 76 BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size); 77 if (BROTLI_IS_OOM(m)) return; 78 ClearHistogramsDistance(distance_histograms, distance_histograms_size); 79 80 assert(mb->command_histograms == 0); 81 mb->command_histograms_size = mb->command_split.num_types; 82 mb->command_histograms = 83 BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size); 84 if (BROTLI_IS_OOM(m)) return; 85 ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size); 86 87 BrotliBuildHistogramsWithContext(cmds, num_commands, 88 &mb->literal_split, &mb->command_split, &mb->distance_split, 89 ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes, 90 literal_histograms, mb->command_histograms, distance_histograms); 91 BROTLI_FREE(m, literal_context_modes); 92 93 assert(mb->literal_context_map == 0); 94 mb->literal_context_map_size = 95 mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS; 96 mb->literal_context_map = 97 BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size); 98 if (BROTLI_IS_OOM(m)) return; 99 100 assert(mb->literal_histograms == 0); 101 mb->literal_histograms_size = mb->literal_context_map_size; 102 mb->literal_histograms = 103 BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size); 104 if (BROTLI_IS_OOM(m)) return; 105 106 BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size, 107 kMaxNumberOfHistograms, mb->literal_histograms, 108 &mb->literal_histograms_size, mb->literal_context_map); 109 if (BROTLI_IS_OOM(m)) return; 110 BROTLI_FREE(m, literal_histograms); 111 112 if (params->disable_literal_context_modeling) { 113 /* Distribute assignment to all contexts. */ 114 for (i = mb->literal_split.num_types; i != 0;) { 115 size_t j = 0; 116 i--; 117 for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) { 118 mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] = 119 mb->literal_context_map[i]; 120 } 121 } 122 } 123 124 assert(mb->distance_context_map == 0); 125 mb->distance_context_map_size = 126 mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS; 127 mb->distance_context_map = 128 BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size); 129 if (BROTLI_IS_OOM(m)) return; 130 131 assert(mb->distance_histograms == 0); 132 mb->distance_histograms_size = mb->distance_context_map_size; 133 mb->distance_histograms = 134 BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size); 135 if (BROTLI_IS_OOM(m)) return; 136 137 BrotliClusterHistogramsDistance(m, distance_histograms, 138 mb->distance_context_map_size, 139 kMaxNumberOfHistograms, 140 mb->distance_histograms, 141 &mb->distance_histograms_size, 142 mb->distance_context_map); 143 if (BROTLI_IS_OOM(m)) return; 144 BROTLI_FREE(m, distance_histograms); 145 } 146 147 #define FN(X) X ## Literal 148 #include "./metablock_inc.h" /* NOLINT(build/include) */ 149 #undef FN 150 151 #define FN(X) X ## Command 152 #include "./metablock_inc.h" /* NOLINT(build/include) */ 153 #undef FN 154 155 #define FN(X) X ## Distance 156 #include "./metablock_inc.h" /* NOLINT(build/include) */ 157 #undef FN 158 159 #define BROTLI_MAX_STATIC_CONTEXTS 13 160 161 /* Greedy block splitter for one block category (literal, command or distance). 162 Gathers histograms for all context buckets. */ 163 typedef struct ContextBlockSplitter { 164 /* Alphabet size of particular block category. */ 165 size_t alphabet_size_; 166 size_t num_contexts_; 167 size_t max_block_types_; 168 /* We collect at least this many symbols for each block. */ 169 size_t min_block_size_; 170 /* We merge histograms A and B if 171 entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, 172 where A is the current histogram and B is the histogram of the last or the 173 second last block type. */ 174 double split_threshold_; 175 176 size_t num_blocks_; 177 BlockSplit* split_; /* not owned */ 178 HistogramLiteral* histograms_; /* not owned */ 179 size_t* histograms_size_; /* not owned */ 180 181 /* The number of symbols that we want to collect before deciding on whether 182 or not to merge the block with a previous one or emit a new block. */ 183 size_t target_block_size_; 184 /* The number of symbols in the current histogram. */ 185 size_t block_size_; 186 /* Offset of the current histogram. */ 187 size_t curr_histogram_ix_; 188 /* Offset of the histograms of the previous two block types. */ 189 size_t last_histogram_ix_[2]; 190 /* Entropy of the previous two block types. */ 191 double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS]; 192 /* The number of times we merged the current block with the last one. */ 193 size_t merge_last_count_; 194 } ContextBlockSplitter; 195 196 static void InitContextBlockSplitter( 197 MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size, 198 size_t num_contexts, size_t min_block_size, double split_threshold, 199 size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms, 200 size_t* histograms_size) { 201 size_t max_num_blocks = num_symbols / min_block_size + 1; 202 size_t max_num_types; 203 assert(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS); 204 205 self->alphabet_size_ = alphabet_size; 206 self->num_contexts_ = num_contexts; 207 self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts; 208 self->min_block_size_ = min_block_size; 209 self->split_threshold_ = split_threshold; 210 self->num_blocks_ = 0; 211 self->split_ = split; 212 self->histograms_size_ = histograms_size; 213 self->target_block_size_ = min_block_size; 214 self->block_size_ = 0; 215 self->curr_histogram_ix_ = 0; 216 self->merge_last_count_ = 0; 217 218 /* We have to allocate one more histogram than the maximum number of block 219 types for the current histogram when the meta-block is too big. */ 220 max_num_types = 221 BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1); 222 BROTLI_ENSURE_CAPACITY(m, uint8_t, 223 split->types, split->types_alloc_size, max_num_blocks); 224 BROTLI_ENSURE_CAPACITY(m, uint32_t, 225 split->lengths, split->lengths_alloc_size, max_num_blocks); 226 if (BROTLI_IS_OOM(m)) return; 227 split->num_blocks = max_num_blocks; 228 if (BROTLI_IS_OOM(m)) return; 229 assert(*histograms == 0); 230 *histograms_size = max_num_types * num_contexts; 231 *histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size); 232 self->histograms_ = *histograms; 233 if (BROTLI_IS_OOM(m)) return; 234 /* Clear only current histogram. */ 235 ClearHistogramsLiteral(&self->histograms_[0], num_contexts); 236 self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0; 237 } 238 239 /* Does either of three things: 240 (1) emits the current block with a new block type; 241 (2) emits the current block with the type of the second last block; 242 (3) merges the current block with the last block. */ 243 static void ContextBlockSplitterFinishBlock( 244 ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) { 245 BlockSplit* split = self->split_; 246 const size_t num_contexts = self->num_contexts_; 247 double* last_entropy = self->last_entropy_; 248 HistogramLiteral* histograms = self->histograms_; 249 250 if (self->block_size_ < self->min_block_size_) { 251 self->block_size_ = self->min_block_size_; 252 } 253 if (self->num_blocks_ == 0) { 254 size_t i; 255 /* Create first block. */ 256 split->lengths[0] = (uint32_t)self->block_size_; 257 split->types[0] = 0; 258 259 for (i = 0; i < num_contexts; ++i) { 260 last_entropy[i] = 261 BitsEntropy(histograms[i].data_, self->alphabet_size_); 262 last_entropy[num_contexts + i] = last_entropy[i]; 263 } 264 ++self->num_blocks_; 265 ++split->num_types; 266 self->curr_histogram_ix_ += num_contexts; 267 if (self->curr_histogram_ix_ < *self->histograms_size_) { 268 ClearHistogramsLiteral( 269 &self->histograms_[self->curr_histogram_ix_], self->num_contexts_); 270 } 271 self->block_size_ = 0; 272 } else if (self->block_size_ > 0) { 273 /* Try merging the set of histograms for the current block type with the 274 respective set of histograms for the last and second last block types. 275 Decide over the split based on the total reduction of entropy across 276 all contexts. */ 277 double entropy[BROTLI_MAX_STATIC_CONTEXTS]; 278 HistogramLiteral* combined_histo = 279 BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts); 280 double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS]; 281 double diff[2] = { 0.0 }; 282 size_t i; 283 if (BROTLI_IS_OOM(m)) return; 284 for (i = 0; i < num_contexts; ++i) { 285 size_t curr_histo_ix = self->curr_histogram_ix_ + i; 286 size_t j; 287 entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_, 288 self->alphabet_size_); 289 for (j = 0; j < 2; ++j) { 290 size_t jx = j * num_contexts + i; 291 size_t last_histogram_ix = self->last_histogram_ix_[j] + i; 292 combined_histo[jx] = histograms[curr_histo_ix]; 293 HistogramAddHistogramLiteral(&combined_histo[jx], 294 &histograms[last_histogram_ix]); 295 combined_entropy[jx] = BitsEntropy( 296 &combined_histo[jx].data_[0], self->alphabet_size_); 297 diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx]; 298 } 299 } 300 301 if (split->num_types < self->max_block_types_ && 302 diff[0] > self->split_threshold_ && 303 diff[1] > self->split_threshold_) { 304 /* Create new block. */ 305 split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; 306 split->types[self->num_blocks_] = (uint8_t)split->num_types; 307 self->last_histogram_ix_[1] = self->last_histogram_ix_[0]; 308 self->last_histogram_ix_[0] = split->num_types * num_contexts; 309 for (i = 0; i < num_contexts; ++i) { 310 last_entropy[num_contexts + i] = last_entropy[i]; 311 last_entropy[i] = entropy[i]; 312 } 313 ++self->num_blocks_; 314 ++split->num_types; 315 self->curr_histogram_ix_ += num_contexts; 316 if (self->curr_histogram_ix_ < *self->histograms_size_) { 317 ClearHistogramsLiteral( 318 &self->histograms_[self->curr_histogram_ix_], self->num_contexts_); 319 } 320 self->block_size_ = 0; 321 self->merge_last_count_ = 0; 322 self->target_block_size_ = self->min_block_size_; 323 } else if (diff[1] < diff[0] - 20.0) { 324 /* Combine this block with second last block. */ 325 split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; 326 split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2]; 327 BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1); 328 for (i = 0; i < num_contexts; ++i) { 329 histograms[self->last_histogram_ix_[0] + i] = 330 combined_histo[num_contexts + i]; 331 last_entropy[num_contexts + i] = last_entropy[i]; 332 last_entropy[i] = combined_entropy[num_contexts + i]; 333 HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]); 334 } 335 ++self->num_blocks_; 336 self->block_size_ = 0; 337 self->merge_last_count_ = 0; 338 self->target_block_size_ = self->min_block_size_; 339 } else { 340 /* Combine this block with last block. */ 341 split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_; 342 for (i = 0; i < num_contexts; ++i) { 343 histograms[self->last_histogram_ix_[0] + i] = combined_histo[i]; 344 last_entropy[i] = combined_entropy[i]; 345 if (split->num_types == 1) { 346 last_entropy[num_contexts + i] = last_entropy[i]; 347 } 348 HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]); 349 } 350 self->block_size_ = 0; 351 if (++self->merge_last_count_ > 1) { 352 self->target_block_size_ += self->min_block_size_; 353 } 354 } 355 BROTLI_FREE(m, combined_histo); 356 } 357 if (is_final) { 358 *self->histograms_size_ = split->num_types * num_contexts; 359 split->num_blocks = self->num_blocks_; 360 } 361 } 362 363 /* Adds the next symbol to the current block type and context. When the 364 current block reaches the target size, decides on merging the block. */ 365 static void ContextBlockSplitterAddSymbol( 366 ContextBlockSplitter* self, MemoryManager* m, 367 size_t symbol, size_t context) { 368 HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context], 369 symbol); 370 ++self->block_size_; 371 if (self->block_size_ == self->target_block_size_) { 372 ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE); 373 if (BROTLI_IS_OOM(m)) return; 374 } 375 } 376 377 static void MapStaticContexts(MemoryManager* m, 378 size_t num_contexts, 379 const uint32_t* static_context_map, 380 MetaBlockSplit* mb) { 381 size_t i; 382 assert(mb->literal_context_map == 0); 383 mb->literal_context_map_size = 384 mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS; 385 mb->literal_context_map = 386 BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size); 387 if (BROTLI_IS_OOM(m)) return; 388 389 for (i = 0; i < mb->literal_split.num_types; ++i) { 390 uint32_t offset = (uint32_t)(i * num_contexts); 391 size_t j; 392 for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) { 393 mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] = 394 offset + static_context_map[j]; 395 } 396 } 397 } 398 399 static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( 400 MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, 401 uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, 402 const size_t num_contexts, const uint32_t* static_context_map, 403 const Command *commands, size_t n_commands, MetaBlockSplit* mb) { 404 union { 405 BlockSplitterLiteral plain; 406 ContextBlockSplitter ctx; 407 } lit_blocks; 408 BlockSplitterCommand cmd_blocks; 409 BlockSplitterDistance dist_blocks; 410 size_t num_literals = 0; 411 size_t i; 412 for (i = 0; i < n_commands; ++i) { 413 num_literals += commands[i].insert_len_; 414 } 415 416 if (num_contexts == 1) { 417 InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0, 418 num_literals, &mb->literal_split, &mb->literal_histograms, 419 &mb->literal_histograms_size); 420 } else { 421 InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0, 422 num_literals, &mb->literal_split, &mb->literal_histograms, 423 &mb->literal_histograms_size); 424 } 425 if (BROTLI_IS_OOM(m)) return; 426 InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024, 427 500.0, n_commands, &mb->command_split, &mb->command_histograms, 428 &mb->command_histograms_size); 429 if (BROTLI_IS_OOM(m)) return; 430 InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands, 431 &mb->distance_split, &mb->distance_histograms, 432 &mb->distance_histograms_size); 433 if (BROTLI_IS_OOM(m)) return; 434 435 for (i = 0; i < n_commands; ++i) { 436 const Command cmd = commands[i]; 437 size_t j; 438 BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_); 439 for (j = cmd.insert_len_; j != 0; --j) { 440 uint8_t literal = ringbuffer[pos & mask]; 441 if (num_contexts == 1) { 442 BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal); 443 } else { 444 size_t context = Context(prev_byte, prev_byte2, literal_context_mode); 445 ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal, 446 static_context_map[context]); 447 if (BROTLI_IS_OOM(m)) return; 448 } 449 prev_byte2 = prev_byte; 450 prev_byte = literal; 451 ++pos; 452 } 453 pos += CommandCopyLen(&cmd); 454 if (CommandCopyLen(&cmd)) { 455 prev_byte2 = ringbuffer[(pos - 2) & mask]; 456 prev_byte = ringbuffer[(pos - 1) & mask]; 457 if (cmd.cmd_prefix_ >= 128) { 458 BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_); 459 } 460 } 461 } 462 463 if (num_contexts == 1) { 464 BlockSplitterFinishBlockLiteral( 465 &lit_blocks.plain, /* is_final = */ BROTLI_TRUE); 466 } else { 467 ContextBlockSplitterFinishBlock( 468 &lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE); 469 if (BROTLI_IS_OOM(m)) return; 470 } 471 BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE); 472 BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE); 473 474 if (num_contexts > 1) { 475 MapStaticContexts(m, num_contexts, static_context_map, mb); 476 } 477 } 478 479 void BrotliBuildMetaBlockGreedy(MemoryManager* m, 480 const uint8_t* ringbuffer, 481 size_t pos, 482 size_t mask, 483 uint8_t prev_byte, 484 uint8_t prev_byte2, 485 ContextType literal_context_mode, 486 size_t num_contexts, 487 const uint32_t* static_context_map, 488 const Command* commands, 489 size_t n_commands, 490 MetaBlockSplit* mb) { 491 if (num_contexts == 1) { 492 BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, 493 prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb); 494 } else { 495 BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, 496 prev_byte2, literal_context_mode, num_contexts, static_context_map, 497 commands, n_commands, mb); 498 } 499 } 500 501 void BrotliOptimizeHistograms(size_t num_direct_distance_codes, 502 size_t distance_postfix_bits, 503 MetaBlockSplit* mb) { 504 uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS]; 505 size_t num_distance_codes; 506 size_t i; 507 for (i = 0; i < mb->literal_histograms_size; ++i) { 508 BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_, 509 good_for_rle); 510 } 511 for (i = 0; i < mb->command_histograms_size; ++i) { 512 BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS, 513 mb->command_histograms[i].data_, 514 good_for_rle); 515 } 516 num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES + 517 num_direct_distance_codes + 518 ((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits); 519 for (i = 0; i < mb->distance_histograms_size; ++i) { 520 BrotliOptimizeHuffmanCountsForRle(num_distance_codes, 521 mb->distance_histograms[i].data_, 522 good_for_rle); 523 } 524 } 525 526 #if defined(__cplusplus) || defined(c_plusplus) 527 } /* extern "C" */ 528 #endif 529