1 /* Copyright 2013 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 /* Implementation of Brotli compressor. */ 8 9 #include <brotli/encode.h> 10 11 #include <stdlib.h> /* free, malloc */ 12 #include <string.h> /* memcpy, memset */ 13 14 #include "../common/version.h" 15 #include "./backward_references.h" 16 #include "./backward_references_hq.h" 17 #include "./bit_cost.h" 18 #include "./brotli_bit_stream.h" 19 #include "./compress_fragment.h" 20 #include "./compress_fragment_two_pass.h" 21 #include "./context.h" 22 #include "./entropy_encode.h" 23 #include "./fast_log.h" 24 #include "./hash.h" 25 #include "./histogram.h" 26 #include "./memory.h" 27 #include "./metablock.h" 28 #include "./port.h" 29 #include "./prefix.h" 30 #include "./quality.h" 31 #include "./ringbuffer.h" 32 #include "./utf8_util.h" 33 #include "./write_bits.h" 34 35 #if defined(__cplusplus) || defined(c_plusplus) 36 extern "C" { 37 #endif 38 39 #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); 40 41 typedef enum BrotliEncoderStreamState { 42 /* Default state. */ 43 BROTLI_STREAM_PROCESSING = 0, 44 /* Intermediate state; after next block is emitted, byte-padding should be 45 performed before getting back to default state. */ 46 BROTLI_STREAM_FLUSH_REQUESTED = 1, 47 /* Last metablock was produced; no more input is acceptable. */ 48 BROTLI_STREAM_FINISHED = 2, 49 /* Flushing compressed block and writing meta-data block header. */ 50 BROTLI_STREAM_METADATA_HEAD = 3, 51 /* Writing metadata block body. */ 52 BROTLI_STREAM_METADATA_BODY = 4 53 } BrotliEncoderStreamState; 54 55 typedef struct BrotliEncoderStateStruct { 56 BrotliEncoderParams params; 57 58 MemoryManager memory_manager_; 59 60 HasherHandle hasher_; 61 uint64_t input_pos_; 62 RingBuffer ringbuffer_; 63 size_t cmd_alloc_size_; 64 Command* commands_; 65 size_t num_commands_; 66 size_t num_literals_; 67 size_t last_insert_len_; 68 uint64_t last_flush_pos_; 69 uint64_t last_processed_pos_; 70 int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; 71 int saved_dist_cache_[4]; 72 uint8_t last_byte_; 73 uint8_t last_byte_bits_; 74 uint8_t prev_byte_; 75 uint8_t prev_byte2_; 76 size_t storage_size_; 77 uint8_t* storage_; 78 /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */ 79 int small_table_[1 << 10]; /* 4KiB */ 80 int* large_table_; /* Allocated only when needed */ 81 size_t large_table_size_; 82 /* Command and distance prefix codes (each 64 symbols, stored back-to-back) 83 used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command 84 prefix code is over a smaller alphabet with the following 64 symbols: 85 0 - 15: insert length code 0, copy length code 0 - 15, same distance 86 16 - 39: insert length code 0, copy length code 0 - 23 87 40 - 63: insert length code 0 - 23, copy length code 0 88 Note that symbols 16 and 40 represent the same code in the full alphabet, 89 but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */ 90 uint8_t cmd_depths_[128]; 91 uint16_t cmd_bits_[128]; 92 /* The compressed form of the command and distance prefix codes for the next 93 block in FAST_ONE_PASS_COMPRESSION_QUALITY. */ 94 uint8_t cmd_code_[512]; 95 size_t cmd_code_numbits_; 96 /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */ 97 uint32_t* command_buf_; 98 uint8_t* literal_buf_; 99 100 uint8_t* next_out_; 101 size_t available_out_; 102 size_t total_out_; 103 /* Temporary buffer for padding flush bits or metadata block header / body. */ 104 union { 105 uint64_t u64[2]; 106 uint8_t u8[16]; 107 } tiny_buf_; 108 uint32_t remaining_metadata_bytes_; 109 BrotliEncoderStreamState stream_state_; 110 111 BROTLI_BOOL is_last_block_emitted_; 112 BROTLI_BOOL is_initialized_; 113 } BrotliEncoderStateStruct; 114 115 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s); 116 117 static size_t InputBlockSize(BrotliEncoderState* s) { 118 if (!EnsureInitialized(s)) return 0; 119 return (size_t)1 << s->params.lgblock; 120 } 121 122 static uint64_t UnprocessedInputSize(BrotliEncoderState* s) { 123 return s->input_pos_ - s->last_processed_pos_; 124 } 125 126 static size_t RemainingInputBlockSize(BrotliEncoderState* s) { 127 const uint64_t delta = UnprocessedInputSize(s); 128 size_t block_size = InputBlockSize(s); 129 if (delta >= block_size) return 0; 130 return block_size - (size_t)delta; 131 } 132 133 BROTLI_BOOL BrotliEncoderSetParameter( 134 BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) { 135 /* Changing parameters on the fly is not implemented yet. */ 136 if (state->is_initialized_) return BROTLI_FALSE; 137 /* TODO: Validate/clamp parameters here. */ 138 switch (p) { 139 case BROTLI_PARAM_MODE: 140 state->params.mode = (BrotliEncoderMode)value; 141 return BROTLI_TRUE; 142 143 case BROTLI_PARAM_QUALITY: 144 state->params.quality = (int)value; 145 return BROTLI_TRUE; 146 147 case BROTLI_PARAM_LGWIN: 148 state->params.lgwin = (int)value; 149 return BROTLI_TRUE; 150 151 case BROTLI_PARAM_LGBLOCK: 152 state->params.lgblock = (int)value; 153 return BROTLI_TRUE; 154 155 case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING: 156 if ((value != 0) && (value != 1)) return BROTLI_FALSE; 157 state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value); 158 return BROTLI_TRUE; 159 160 case BROTLI_PARAM_SIZE_HINT: 161 state->params.size_hint = value; 162 return BROTLI_TRUE; 163 164 default: return BROTLI_FALSE; 165 } 166 } 167 168 static void RecomputeDistancePrefixes(Command* cmds, 169 size_t num_commands, 170 uint32_t num_direct_distance_codes, 171 uint32_t distance_postfix_bits) { 172 size_t i; 173 if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) { 174 return; 175 } 176 for (i = 0; i < num_commands; ++i) { 177 Command* cmd = &cmds[i]; 178 if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { 179 PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd), 180 num_direct_distance_codes, 181 distance_postfix_bits, 182 &cmd->dist_prefix_, 183 &cmd->dist_extra_); 184 } 185 } 186 } 187 188 /* Wraps 64-bit input position to 32-bit ring-buffer position preserving 189 "not-a-first-lap" feature. */ 190 static uint32_t WrapPosition(uint64_t position) { 191 uint32_t result = (uint32_t)position; 192 uint64_t gb = position >> 30; 193 if (gb > 2) { 194 /* Wrap every 2GiB; The first 3GB are continuous. */ 195 result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30; 196 } 197 return result; 198 } 199 200 static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) { 201 MemoryManager* m = &s->memory_manager_; 202 if (s->storage_size_ < size) { 203 BROTLI_FREE(m, s->storage_); 204 s->storage_ = BROTLI_ALLOC(m, uint8_t, size); 205 if (BROTLI_IS_OOM(m)) return NULL; 206 s->storage_size_ = size; 207 } 208 return s->storage_; 209 } 210 211 static size_t HashTableSize(size_t max_table_size, size_t input_size) { 212 size_t htsize = 256; 213 while (htsize < max_table_size && htsize < input_size) { 214 htsize <<= 1; 215 } 216 return htsize; 217 } 218 219 static int* GetHashTable(BrotliEncoderState* s, int quality, 220 size_t input_size, size_t* table_size) { 221 /* Use smaller hash table when input.size() is smaller, since we 222 fill the table, incurring O(hash table size) overhead for 223 compression, and if the input is short, we won't need that 224 many hash table entries anyway. */ 225 MemoryManager* m = &s->memory_manager_; 226 const size_t max_table_size = MaxHashTableSize(quality); 227 size_t htsize = HashTableSize(max_table_size, input_size); 228 int* table; 229 assert(max_table_size >= 256); 230 if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { 231 /* Only odd shifts are supported by fast-one-pass. */ 232 if ((htsize & 0xAAAAA) == 0) { 233 htsize <<= 1; 234 } 235 } 236 237 if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) { 238 table = s->small_table_; 239 } else { 240 if (htsize > s->large_table_size_) { 241 s->large_table_size_ = htsize; 242 BROTLI_FREE(m, s->large_table_); 243 s->large_table_ = BROTLI_ALLOC(m, int, htsize); 244 if (BROTLI_IS_OOM(m)) return 0; 245 } 246 table = s->large_table_; 247 } 248 249 *table_size = htsize; 250 memset(table, 0, htsize * sizeof(*table)); 251 return table; 252 } 253 254 static void EncodeWindowBits(int lgwin, uint8_t* last_byte, 255 uint8_t* last_byte_bits) { 256 if (lgwin == 16) { 257 *last_byte = 0; 258 *last_byte_bits = 1; 259 } else if (lgwin == 17) { 260 *last_byte = 1; 261 *last_byte_bits = 7; 262 } else if (lgwin > 17) { 263 *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1); 264 *last_byte_bits = 4; 265 } else { 266 *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1); 267 *last_byte_bits = 7; 268 } 269 } 270 271 /* Initializes the command and distance prefix codes for the first block. */ 272 static void InitCommandPrefixCodes(uint8_t cmd_depths[128], 273 uint16_t cmd_bits[128], 274 uint8_t cmd_code[512], 275 size_t* cmd_code_numbits) { 276 static const uint8_t kDefaultCommandDepths[128] = { 277 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 278 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 279 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6, 280 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 281 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 282 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 283 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10, 284 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 285 }; 286 static const uint16_t kDefaultCommandBits[128] = { 287 0, 0, 8, 9, 3, 35, 7, 71, 288 39, 103, 23, 47, 175, 111, 239, 31, 289 0, 0, 0, 4, 12, 2, 10, 6, 290 13, 29, 11, 43, 27, 59, 87, 55, 291 15, 79, 319, 831, 191, 703, 447, 959, 292 0, 14, 1, 25, 5, 21, 19, 51, 293 119, 159, 95, 223, 479, 991, 63, 575, 294 127, 639, 383, 895, 255, 767, 511, 1023, 295 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 296 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12, 297 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255, 298 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095, 299 }; 300 static const uint8_t kDefaultCommandCode[] = { 301 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6, 302 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c, 303 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa, 304 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94, 305 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00, 306 }; 307 static const size_t kDefaultCommandCodeNumBits = 448; 308 COPY_ARRAY(cmd_depths, kDefaultCommandDepths); 309 COPY_ARRAY(cmd_bits, kDefaultCommandBits); 310 311 /* Initialize the pre-compressed form of the command and distance prefix 312 codes. */ 313 COPY_ARRAY(cmd_code, kDefaultCommandCode); 314 *cmd_code_numbits = kDefaultCommandCodeNumBits; 315 } 316 317 /* Decide about the context map based on the ability of the prediction 318 ability of the previous byte UTF8-prefix on the next byte. The 319 prediction ability is calculated as Shannon entropy. Here we need 320 Shannon entropy instead of 'BitsEntropy' since the prefix will be 321 encoded with the remaining 6 bits of the following byte, and 322 BitsEntropy will assume that symbol to be stored alone using Huffman 323 coding. */ 324 static void ChooseContextMap(int quality, 325 uint32_t* bigram_histo, 326 size_t* num_literal_contexts, 327 const uint32_t** literal_context_map) { 328 static const uint32_t kStaticContextMapContinuation[64] = { 329 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 333 }; 334 static const uint32_t kStaticContextMapSimpleUTF8[64] = { 335 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 339 }; 340 341 uint32_t monogram_histo[3] = { 0 }; 342 uint32_t two_prefix_histo[6] = { 0 }; 343 size_t total; 344 size_t i; 345 size_t dummy; 346 double entropy[4]; 347 for (i = 0; i < 9; ++i) { 348 monogram_histo[i % 3] += bigram_histo[i]; 349 two_prefix_histo[i % 6] += bigram_histo[i]; 350 } 351 entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy); 352 entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) + 353 ShannonEntropy(two_prefix_histo + 3, 3, &dummy)); 354 entropy[3] = 0; 355 for (i = 0; i < 3; ++i) { 356 entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy); 357 } 358 359 total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2]; 360 assert(total != 0); 361 entropy[0] = 1.0 / (double)total; 362 entropy[1] *= entropy[0]; 363 entropy[2] *= entropy[0]; 364 entropy[3] *= entropy[0]; 365 366 if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) { 367 /* 3 context models is a bit slower, don't use it at lower qualities. */ 368 entropy[3] = entropy[1] * 10; 369 } 370 /* If expected savings by symbol are less than 0.2 bits, skip the 371 context modeling -- in exchange for faster decoding speed. */ 372 if (entropy[1] - entropy[2] < 0.2 && 373 entropy[1] - entropy[3] < 0.2) { 374 *num_literal_contexts = 1; 375 } else if (entropy[2] - entropy[3] < 0.02) { 376 *num_literal_contexts = 2; 377 *literal_context_map = kStaticContextMapSimpleUTF8; 378 } else { 379 *num_literal_contexts = 3; 380 *literal_context_map = kStaticContextMapContinuation; 381 } 382 } 383 384 /* Decide if we want to use a more complex static context map containing 13 385 context values, based on the entropy reduction of histograms over the 386 first 5 bits of literals. */ 387 static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, 388 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint, 389 size_t* num_literal_contexts, const uint32_t** literal_context_map) { 390 static const uint32_t kStaticContextMapComplexUTF8[64] = { 391 11, 11, 12, 12, /* 0 special */ 392 0, 0, 0, 0, /* 4 lf */ 393 1, 1, 9, 9, /* 8 space */ 394 2, 2, 2, 2, /* !, first after space/lf and after something else. */ 395 1, 1, 1, 1, /* " */ 396 8, 3, 3, 3, /* % */ 397 1, 1, 1, 1, /* ({[ */ 398 2, 2, 2, 2, /* }]) */ 399 8, 4, 4, 4, /* :; */ 400 8, 7, 4, 4, /* . */ 401 8, 0, 0, 0, /* > */ 402 3, 3, 3, 3, /* [0..9] */ 403 5, 5, 10, 5, /* [A-Z] */ 404 5, 5, 10, 5, 405 6, 6, 6, 6, /* [a-z] */ 406 6, 6, 6, 6, 407 }; 408 BROTLI_UNUSED(quality); 409 /* Try the more complex static context map only for long data. */ 410 if (size_hint < (1 << 20)) { 411 return BROTLI_FALSE; 412 } else { 413 const size_t end_pos = start_pos + length; 414 /* To make entropy calculations faster and to fit on the stack, we collect 415 histograms over the 5 most significant bits of literals. One histogram 416 without context and 13 additional histograms for each context value. */ 417 uint32_t combined_histo[32] = { 0 }; 418 uint32_t context_histo[13][32] = { { 0 } }; 419 uint32_t total = 0; 420 double entropy[3]; 421 size_t dummy; 422 size_t i; 423 for (; start_pos + 64 <= end_pos; start_pos += 4096) { 424 const size_t stride_end_pos = start_pos + 64; 425 uint8_t prev2 = input[start_pos & mask]; 426 uint8_t prev1 = input[(start_pos + 1) & mask]; 427 size_t pos; 428 /* To make the analysis of the data faster we only examine 64 byte long 429 strides at every 4kB intervals. */ 430 for (pos = start_pos + 2; pos < stride_end_pos; ++pos) { 431 const uint8_t literal = input[pos & mask]; 432 const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[ 433 Context(prev1, prev2, CONTEXT_UTF8)]; 434 ++total; 435 ++combined_histo[literal >> 3]; 436 ++context_histo[context][literal >> 3]; 437 prev2 = prev1; 438 prev1 = literal; 439 } 440 } 441 entropy[1] = ShannonEntropy(combined_histo, 32, &dummy); 442 entropy[2] = 0; 443 for (i = 0; i < 13; ++i) { 444 entropy[2] += ShannonEntropy(&context_histo[i][0], 32, &dummy); 445 } 446 entropy[0] = 1.0 / (double)total; 447 entropy[1] *= entropy[0]; 448 entropy[2] *= entropy[0]; 449 /* The triggering heuristics below were tuned by compressing the individual 450 files of the silesia corpus. If we skip this kind of context modeling 451 for not very well compressible input (i.e. entropy using context modeling 452 is 60% of maximal entropy) or if expected savings by symbol are less 453 than 0.2 bits, then in every case when it triggers, the final compression 454 ratio is improved. Note however that this heuristics might be too strict 455 for some cases and could be tuned further. */ 456 if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) { 457 return BROTLI_FALSE; 458 } else { 459 *num_literal_contexts = 13; 460 *literal_context_map = kStaticContextMapComplexUTF8; 461 return BROTLI_TRUE; 462 } 463 } 464 } 465 466 static void DecideOverLiteralContextModeling(const uint8_t* input, 467 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint, 468 size_t* num_literal_contexts, const uint32_t** literal_context_map) { 469 if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) { 470 return; 471 } else if (ShouldUseComplexStaticContextMap( 472 input, start_pos, length, mask, quality, size_hint, 473 num_literal_contexts, literal_context_map)) { 474 /* Context map was already set, nothing else to do. */ 475 } else { 476 /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of 477 UTF8 data faster we only examine 64 byte long strides at every 4kB 478 intervals. */ 479 const size_t end_pos = start_pos + length; 480 uint32_t bigram_prefix_histo[9] = { 0 }; 481 for (; start_pos + 64 <= end_pos; start_pos += 4096) { 482 static const int lut[4] = { 0, 0, 1, 2 }; 483 const size_t stride_end_pos = start_pos + 64; 484 int prev = lut[input[start_pos & mask] >> 6] * 3; 485 size_t pos; 486 for (pos = start_pos + 1; pos < stride_end_pos; ++pos) { 487 const uint8_t literal = input[pos & mask]; 488 ++bigram_prefix_histo[prev + lut[literal >> 6]]; 489 prev = lut[literal >> 6] * 3; 490 } 491 } 492 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts, 493 literal_context_map); 494 } 495 } 496 497 static BROTLI_BOOL ShouldCompress( 498 const uint8_t* data, const size_t mask, const uint64_t last_flush_pos, 499 const size_t bytes, const size_t num_literals, const size_t num_commands) { 500 if (num_commands < (bytes >> 8) + 2) { 501 if (num_literals > 0.99 * (double)bytes) { 502 uint32_t literal_histo[256] = { 0 }; 503 static const uint32_t kSampleRate = 13; 504 static const double kMinEntropy = 7.92; 505 const double bit_cost_threshold = 506 (double)bytes * kMinEntropy / kSampleRate; 507 size_t t = (bytes + kSampleRate - 1) / kSampleRate; 508 uint32_t pos = (uint32_t)last_flush_pos; 509 size_t i; 510 for (i = 0; i < t; i++) { 511 ++literal_histo[data[pos & mask]]; 512 pos += kSampleRate; 513 } 514 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) { 515 return BROTLI_FALSE; 516 } 517 } 518 } 519 return BROTLI_TRUE; 520 } 521 522 static void WriteMetaBlockInternal(MemoryManager* m, 523 const uint8_t* data, 524 const size_t mask, 525 const uint64_t last_flush_pos, 526 const size_t bytes, 527 const BROTLI_BOOL is_last, 528 const BrotliEncoderParams* params, 529 const uint8_t prev_byte, 530 const uint8_t prev_byte2, 531 const size_t num_literals, 532 const size_t num_commands, 533 Command* commands, 534 const int* saved_dist_cache, 535 int* dist_cache, 536 size_t* storage_ix, 537 uint8_t* storage) { 538 const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos); 539 uint8_t last_byte; 540 uint8_t last_byte_bits; 541 uint32_t num_direct_distance_codes = 0; 542 uint32_t distance_postfix_bits = 0; 543 544 if (bytes == 0) { 545 /* Write the ISLAST and ISEMPTY bits. */ 546 BrotliWriteBits(2, 3, storage_ix, storage); 547 *storage_ix = (*storage_ix + 7u) & ~7u; 548 return; 549 } 550 551 if (!ShouldCompress(data, mask, last_flush_pos, bytes, 552 num_literals, num_commands)) { 553 /* Restore the distance cache, as its last update by 554 CreateBackwardReferences is now unused. */ 555 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); 556 BrotliStoreUncompressedMetaBlock(is_last, data, 557 wrapped_last_flush_pos, mask, bytes, 558 storage_ix, storage); 559 return; 560 } 561 562 last_byte = storage[0]; 563 last_byte_bits = (uint8_t)(*storage_ix & 0xff); 564 if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && 565 params->mode == BROTLI_MODE_FONT) { 566 num_direct_distance_codes = 12; 567 distance_postfix_bits = 1; 568 RecomputeDistancePrefixes(commands, 569 num_commands, 570 num_direct_distance_codes, 571 distance_postfix_bits); 572 } 573 if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { 574 BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, 575 bytes, mask, is_last, 576 commands, num_commands, 577 storage_ix, storage); 578 if (BROTLI_IS_OOM(m)) return; 579 } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) { 580 BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos, 581 bytes, mask, is_last, 582 commands, num_commands, 583 storage_ix, storage); 584 if (BROTLI_IS_OOM(m)) return; 585 } else { 586 ContextType literal_context_mode = CONTEXT_UTF8; 587 MetaBlockSplit mb; 588 InitMetaBlockSplit(&mb); 589 if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) { 590 size_t num_literal_contexts = 1; 591 const uint32_t* literal_context_map = NULL; 592 if (!params->disable_literal_context_modeling) { 593 DecideOverLiteralContextModeling( 594 data, wrapped_last_flush_pos, bytes, mask, params->quality, 595 params->size_hint, &num_literal_contexts, 596 &literal_context_map); 597 } 598 BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, 599 prev_byte, prev_byte2, literal_context_mode, num_literal_contexts, 600 literal_context_map, commands, num_commands, &mb); 601 if (BROTLI_IS_OOM(m)) return; 602 } else { 603 if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes, 604 kMinUTF8Ratio)) { 605 literal_context_mode = CONTEXT_SIGNED; 606 } 607 BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params, 608 prev_byte, prev_byte2, 609 commands, num_commands, 610 literal_context_mode, 611 &mb); 612 if (BROTLI_IS_OOM(m)) return; 613 } 614 if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) { 615 BrotliOptimizeHistograms(num_direct_distance_codes, 616 distance_postfix_bits, 617 &mb); 618 } 619 BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask, 620 prev_byte, prev_byte2, 621 is_last, 622 num_direct_distance_codes, 623 distance_postfix_bits, 624 literal_context_mode, 625 commands, num_commands, 626 &mb, 627 storage_ix, storage); 628 if (BROTLI_IS_OOM(m)) return; 629 DestroyMetaBlockSplit(m, &mb); 630 } 631 if (bytes + 4 < (*storage_ix >> 3)) { 632 /* Restore the distance cache and last byte. */ 633 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); 634 storage[0] = last_byte; 635 *storage_ix = last_byte_bits; 636 BrotliStoreUncompressedMetaBlock(is_last, data, 637 wrapped_last_flush_pos, mask, 638 bytes, storage_ix, storage); 639 } 640 } 641 642 static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { 643 if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE; 644 if (s->is_initialized_) return BROTLI_TRUE; 645 646 SanitizeParams(&s->params); 647 s->params.lgblock = ComputeLgBlock(&s->params); 648 649 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; 650 651 RingBufferSetup(&s->params, &s->ringbuffer_); 652 653 /* Initialize last byte with stream header. */ 654 { 655 int lgwin = s->params.lgwin; 656 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || 657 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 658 lgwin = BROTLI_MAX(int, lgwin, 18); 659 } 660 EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_); 661 } 662 663 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { 664 InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_, 665 s->cmd_code_, &s->cmd_code_numbits_); 666 } 667 668 s->is_initialized_ = BROTLI_TRUE; 669 return BROTLI_TRUE; 670 } 671 672 static void BrotliEncoderInitParams(BrotliEncoderParams* params) { 673 params->mode = BROTLI_DEFAULT_MODE; 674 params->quality = BROTLI_DEFAULT_QUALITY; 675 params->lgwin = BROTLI_DEFAULT_WINDOW; 676 params->lgblock = 0; 677 params->size_hint = 0; 678 params->disable_literal_context_modeling = BROTLI_FALSE; 679 } 680 681 static void BrotliEncoderInitState(BrotliEncoderState* s) { 682 BrotliEncoderInitParams(&s->params); 683 s->input_pos_ = 0; 684 s->num_commands_ = 0; 685 s->num_literals_ = 0; 686 s->last_insert_len_ = 0; 687 s->last_flush_pos_ = 0; 688 s->last_processed_pos_ = 0; 689 s->prev_byte_ = 0; 690 s->prev_byte2_ = 0; 691 s->storage_size_ = 0; 692 s->storage_ = 0; 693 s->hasher_ = NULL; 694 s->large_table_ = NULL; 695 s->large_table_size_ = 0; 696 s->cmd_code_numbits_ = 0; 697 s->command_buf_ = NULL; 698 s->literal_buf_ = NULL; 699 s->next_out_ = NULL; 700 s->available_out_ = 0; 701 s->total_out_ = 0; 702 s->stream_state_ = BROTLI_STREAM_PROCESSING; 703 s->is_last_block_emitted_ = BROTLI_FALSE; 704 s->is_initialized_ = BROTLI_FALSE; 705 706 RingBufferInit(&s->ringbuffer_); 707 708 s->commands_ = 0; 709 s->cmd_alloc_size_ = 0; 710 711 /* Initialize distance cache. */ 712 s->dist_cache_[0] = 4; 713 s->dist_cache_[1] = 11; 714 s->dist_cache_[2] = 15; 715 s->dist_cache_[3] = 16; 716 /* Save the state of the distance cache in case we need to restore it for 717 emitting an uncompressed block. */ 718 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_)); 719 } 720 721 BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func, 722 brotli_free_func free_func, 723 void* opaque) { 724 BrotliEncoderState* state = 0; 725 if (!alloc_func && !free_func) { 726 state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState)); 727 } else if (alloc_func && free_func) { 728 state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState)); 729 } 730 if (state == 0) { 731 /* BROTLI_DUMP(); */ 732 return 0; 733 } 734 BrotliInitMemoryManager( 735 &state->memory_manager_, alloc_func, free_func, opaque); 736 BrotliEncoderInitState(state); 737 return state; 738 } 739 740 static void BrotliEncoderCleanupState(BrotliEncoderState* s) { 741 MemoryManager* m = &s->memory_manager_; 742 if (BROTLI_IS_OOM(m)) { 743 BrotliWipeOutMemoryManager(m); 744 return; 745 } 746 BROTLI_FREE(m, s->storage_); 747 BROTLI_FREE(m, s->commands_); 748 RingBufferFree(m, &s->ringbuffer_); 749 DestroyHasher(m, &s->hasher_); 750 BROTLI_FREE(m, s->large_table_); 751 BROTLI_FREE(m, s->command_buf_); 752 BROTLI_FREE(m, s->literal_buf_); 753 } 754 755 /* Deinitializes and frees BrotliEncoderState instance. */ 756 void BrotliEncoderDestroyInstance(BrotliEncoderState* state) { 757 if (!state) { 758 return; 759 } else { 760 MemoryManager* m = &state->memory_manager_; 761 brotli_free_func free_func = m->free_func; 762 void* opaque = m->opaque; 763 BrotliEncoderCleanupState(state); 764 free_func(opaque, state); 765 } 766 } 767 768 /* 769 Copies the given input data to the internal ring buffer of the compressor. 770 No processing of the data occurs at this time and this function can be 771 called multiple times before calling WriteBrotliData() to process the 772 accumulated input. At most input_block_size() bytes of input data can be 773 copied to the ring buffer, otherwise the next WriteBrotliData() will fail. 774 */ 775 static void CopyInputToRingBuffer(BrotliEncoderState* s, 776 const size_t input_size, 777 const uint8_t* input_buffer) { 778 RingBuffer* ringbuffer_ = &s->ringbuffer_; 779 MemoryManager* m = &s->memory_manager_; 780 if (!EnsureInitialized(s)) return; 781 RingBufferWrite(m, input_buffer, input_size, ringbuffer_); 782 if (BROTLI_IS_OOM(m)) return; 783 s->input_pos_ += input_size; 784 785 /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the 786 hashing not depend on uninitialized data. This makes compression 787 deterministic and it prevents uninitialized memory warnings in Valgrind. 788 Even without erasing, the output would be valid (but nondeterministic). 789 790 Background information: The compressor stores short (at most 8 bytes) 791 substrings of the input already read in a hash table, and detects 792 repetitions by looking up such substrings in the hash table. If it 793 can find a substring, it checks whether the substring is really there 794 in the ring buffer (or it's just a hash collision). Should the hash 795 table become corrupt, this check makes sure that the output is 796 still valid, albeit the compression ratio would be bad. 797 798 The compressor populates the hash table from the ring buffer as it's 799 reading new bytes from the input. However, at the last few indexes of 800 the ring buffer, there are not enough bytes to build full-length 801 substrings from. Since the hash table always contains full-length 802 substrings, we erase with dummy zeros here to make sure that those 803 substrings will contain zeros at the end instead of uninitialized 804 data. 805 806 Please note that erasing is not necessary (because the 807 memory region is already initialized since he ring buffer 808 has a `tail' that holds a copy of the beginning,) so we 809 skip erasing if we have already gone around at least once in 810 the ring buffer. 811 812 Only clear during the first round of ring-buffer writes. On 813 subsequent rounds data in the ring-buffer would be affected. */ 814 if (ringbuffer_->pos_ <= ringbuffer_->mask_) { 815 /* This is the first time when the ring buffer is being written. 816 We clear 7 bytes just after the bytes that have been copied from 817 the input buffer. 818 819 The ring-buffer has a "tail" that holds a copy of the beginning, 820 but only once the ring buffer has been fully written once, i.e., 821 pos <= mask. For the first time, we need to write values 822 in this tail (where index may be larger than mask), so that 823 we have exactly defined behavior and don't read uninitialized 824 memory. Due to performance reasons, hashing reads data using a 825 LOAD64, which can go 7 bytes beyond the bytes written in the 826 ring-buffer. */ 827 memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7); 828 } 829 } 830 831 /* Marks all input as processed. 832 Returns true if position wrapping occurs. */ 833 static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) { 834 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_); 835 uint32_t wrapped_input_pos = WrapPosition(s->input_pos_); 836 s->last_processed_pos_ = s->input_pos_; 837 return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos); 838 } 839 840 /* 841 Processes the accumulated input data and sets |*out_size| to the length of 842 the new output meta-block, or to zero if no new output meta-block has been 843 created (in this case the processed input data is buffered internally). 844 If |*out_size| is positive, |*output| points to the start of the output 845 data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is 846 always created. However, until |is_last| is BROTLI_TRUE encoder may retain up 847 to 7 bits of the last byte of output. To force encoder to dump the remaining 848 bits use WriteMetadata() to append an empty meta-data block. 849 Returns BROTLI_FALSE if the size of the input data is larger than 850 input_block_size(). 851 */ 852 static BROTLI_BOOL EncodeData( 853 BrotliEncoderState* s, const BROTLI_BOOL is_last, 854 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { 855 const uint64_t delta = UnprocessedInputSize(s); 856 const uint32_t bytes = (uint32_t)delta; 857 const uint32_t wrapped_last_processed_pos = 858 WrapPosition(s->last_processed_pos_); 859 uint8_t* data; 860 uint32_t mask; 861 MemoryManager* m = &s->memory_manager_; 862 const BrotliDictionary* dictionary = BrotliGetDictionary(); 863 864 if (!EnsureInitialized(s)) return BROTLI_FALSE; 865 data = s->ringbuffer_.buffer_; 866 mask = s->ringbuffer_.mask_; 867 868 /* Adding more blocks after "last" block is forbidden. */ 869 if (s->is_last_block_emitted_) return BROTLI_FALSE; 870 if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE; 871 872 if (delta > InputBlockSize(s)) { 873 return BROTLI_FALSE; 874 } 875 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY && 876 !s->command_buf_) { 877 s->command_buf_ = 878 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize); 879 s->literal_buf_ = 880 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize); 881 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 882 } 883 884 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || 885 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 886 uint8_t* storage; 887 size_t storage_ix = s->last_byte_bits_; 888 size_t table_size; 889 int* table; 890 891 if (delta == 0 && !is_last) { 892 /* We have no new input data and we don't have to finish the stream, so 893 nothing to do. */ 894 *out_size = 0; 895 return BROTLI_TRUE; 896 } 897 storage = GetBrotliStorage(s, 2 * bytes + 502); 898 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 899 storage[0] = s->last_byte_; 900 table = GetHashTable(s, s->params.quality, bytes, &table_size); 901 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 902 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { 903 BrotliCompressFragmentFast( 904 m, &data[wrapped_last_processed_pos & mask], 905 bytes, is_last, 906 table, table_size, 907 s->cmd_depths_, s->cmd_bits_, 908 &s->cmd_code_numbits_, s->cmd_code_, 909 &storage_ix, storage); 910 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 911 } else { 912 BrotliCompressFragmentTwoPass( 913 m, &data[wrapped_last_processed_pos & mask], 914 bytes, is_last, 915 s->command_buf_, s->literal_buf_, 916 table, table_size, 917 &storage_ix, storage); 918 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 919 } 920 s->last_byte_ = storage[storage_ix >> 3]; 921 s->last_byte_bits_ = storage_ix & 7u; 922 UpdateLastProcessedPos(s); 923 *output = &storage[0]; 924 *out_size = storage_ix >> 3; 925 return BROTLI_TRUE; 926 } 927 928 { 929 /* Theoretical max number of commands is 1 per 2 bytes. */ 930 size_t newsize = s->num_commands_ + bytes / 2 + 1; 931 if (newsize > s->cmd_alloc_size_) { 932 Command* new_commands; 933 /* Reserve a bit more memory to allow merging with a next block 934 without reallocation: that would impact speed. */ 935 newsize += (bytes / 4) + 16; 936 s->cmd_alloc_size_ = newsize; 937 new_commands = BROTLI_ALLOC(m, Command, newsize); 938 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 939 if (s->commands_) { 940 memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_); 941 BROTLI_FREE(m, s->commands_); 942 } 943 s->commands_ = new_commands; 944 } 945 } 946 947 InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params, 948 wrapped_last_processed_pos, bytes, is_last); 949 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 950 951 if (s->params.quality == ZOPFLIFICATION_QUALITY) { 952 assert(s->params.hasher.type == 10); 953 BrotliCreateZopfliBackwardReferences( 954 m, dictionary, bytes, wrapped_last_processed_pos, data, mask, 955 &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, 956 &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); 957 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 958 } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) { 959 assert(s->params.hasher.type == 10); 960 BrotliCreateHqZopfliBackwardReferences( 961 m, dictionary, bytes, wrapped_last_processed_pos, data, mask, 962 &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, 963 &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); 964 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 965 } else { 966 BrotliCreateBackwardReferences( 967 dictionary, bytes, wrapped_last_processed_pos, data, mask, 968 &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, 969 &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); 970 } 971 972 { 973 const size_t max_length = MaxMetablockSize(&s->params); 974 const size_t max_literals = max_length / 8; 975 const size_t max_commands = max_length / 8; 976 const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_); 977 /* If maximal possible additional block doesn't fit metablock, flush now. */ 978 /* TODO: Postpone decision until next block arrives? */ 979 const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL( 980 processed_bytes + InputBlockSize(s) <= max_length); 981 /* If block splitting is not used, then flush as soon as there is some 982 amount of commands / literals produced. */ 983 const BROTLI_BOOL should_flush = TO_BROTLI_BOOL( 984 s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT && 985 s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS); 986 if (!is_last && !force_flush && !should_flush && 987 next_input_fits_metablock && 988 s->num_literals_ < max_literals && 989 s->num_commands_ < max_commands) { 990 /* Merge with next input block. Everything will happen later. */ 991 if (UpdateLastProcessedPos(s)) { 992 HasherReset(s->hasher_); 993 } 994 *out_size = 0; 995 return BROTLI_TRUE; 996 } 997 } 998 999 /* Create the last insert-only command. */ 1000 if (s->last_insert_len_ > 0) { 1001 InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_); 1002 s->num_literals_ += s->last_insert_len_; 1003 s->last_insert_len_ = 0; 1004 } 1005 1006 if (!is_last && s->input_pos_ == s->last_flush_pos_) { 1007 /* We have no new input data and we don't have to finish the stream, so 1008 nothing to do. */ 1009 *out_size = 0; 1010 return BROTLI_TRUE; 1011 } 1012 assert(s->input_pos_ >= s->last_flush_pos_); 1013 assert(s->input_pos_ > s->last_flush_pos_ || is_last); 1014 assert(s->input_pos_ - s->last_flush_pos_ <= 1u << 24); 1015 { 1016 const uint32_t metablock_size = 1017 (uint32_t)(s->input_pos_ - s->last_flush_pos_); 1018 uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502); 1019 size_t storage_ix = s->last_byte_bits_; 1020 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1021 storage[0] = s->last_byte_; 1022 WriteMetaBlockInternal( 1023 m, data, mask, s->last_flush_pos_, metablock_size, is_last, 1024 &s->params, s->prev_byte_, s->prev_byte2_, 1025 s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_, 1026 s->dist_cache_, &storage_ix, storage); 1027 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1028 s->last_byte_ = storage[storage_ix >> 3]; 1029 s->last_byte_bits_ = storage_ix & 7u; 1030 s->last_flush_pos_ = s->input_pos_; 1031 if (UpdateLastProcessedPos(s)) { 1032 HasherReset(s->hasher_); 1033 } 1034 if (s->last_flush_pos_ > 0) { 1035 s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask]; 1036 } 1037 if (s->last_flush_pos_ > 1) { 1038 s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask]; 1039 } 1040 s->num_commands_ = 0; 1041 s->num_literals_ = 0; 1042 /* Save the state of the distance cache in case we need to restore it for 1043 emitting an uncompressed block. */ 1044 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_)); 1045 *output = &storage[0]; 1046 *out_size = storage_ix >> 3; 1047 return BROTLI_TRUE; 1048 } 1049 } 1050 1051 /* Dumps remaining output bits and metadata header to |header|. 1052 Returns number of produced bytes. 1053 REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long. 1054 REQUIRED: |block_size| <= (1 << 24). */ 1055 static size_t WriteMetadataHeader( 1056 BrotliEncoderState* s, const size_t block_size, uint8_t* header) { 1057 size_t storage_ix; 1058 storage_ix = s->last_byte_bits_; 1059 header[0] = s->last_byte_; 1060 s->last_byte_ = 0; 1061 s->last_byte_bits_ = 0; 1062 1063 BrotliWriteBits(1, 0, &storage_ix, header); 1064 BrotliWriteBits(2, 3, &storage_ix, header); 1065 BrotliWriteBits(1, 0, &storage_ix, header); 1066 if (block_size == 0) { 1067 BrotliWriteBits(2, 0, &storage_ix, header); 1068 } else { 1069 uint32_t nbits = (block_size == 1) ? 0 : 1070 (Log2FloorNonZero((uint32_t)block_size - 1) + 1); 1071 uint32_t nbytes = (nbits + 7) / 8; 1072 BrotliWriteBits(2, nbytes, &storage_ix, header); 1073 BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header); 1074 } 1075 return (storage_ix + 7u) >> 3; 1076 } 1077 1078 static BROTLI_BOOL BrotliCompressBufferQuality10( 1079 int lgwin, size_t input_size, const uint8_t* input_buffer, 1080 size_t* encoded_size, uint8_t* encoded_buffer) { 1081 MemoryManager memory_manager; 1082 MemoryManager* m = &memory_manager; 1083 1084 const size_t mask = BROTLI_SIZE_MAX >> 1; 1085 const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(lgwin); 1086 int dist_cache[4] = { 4, 11, 15, 16 }; 1087 int saved_dist_cache[4] = { 4, 11, 15, 16 }; 1088 BROTLI_BOOL ok = BROTLI_TRUE; 1089 const size_t max_out_size = *encoded_size; 1090 size_t total_out_size = 0; 1091 uint8_t last_byte; 1092 uint8_t last_byte_bits; 1093 HasherHandle hasher = NULL; 1094 1095 const size_t hasher_eff_size = 1096 BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP); 1097 1098 BrotliEncoderParams params; 1099 const BrotliDictionary* dictionary = BrotliGetDictionary(); 1100 1101 const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1); 1102 size_t max_block_size; 1103 const size_t max_metablock_size = (size_t)1 << lgmetablock; 1104 const size_t max_literals_per_metablock = max_metablock_size / 8; 1105 const size_t max_commands_per_metablock = max_metablock_size / 8; 1106 size_t metablock_start = 0; 1107 uint8_t prev_byte = 0; 1108 uint8_t prev_byte2 = 0; 1109 1110 BrotliEncoderInitParams(¶ms); 1111 params.quality = 10; 1112 params.lgwin = lgwin; 1113 SanitizeParams(¶ms); 1114 params.lgblock = ComputeLgBlock(¶ms); 1115 max_block_size = (size_t)1 << params.lgblock; 1116 1117 BrotliInitMemoryManager(m, 0, 0, 0); 1118 1119 assert(input_size <= mask + 1); 1120 EncodeWindowBits(lgwin, &last_byte, &last_byte_bits); 1121 InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, ¶ms, 1122 0, hasher_eff_size, BROTLI_TRUE); 1123 if (BROTLI_IS_OOM(m)) goto oom; 1124 1125 while (ok && metablock_start < input_size) { 1126 const size_t metablock_end = 1127 BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size); 1128 const size_t expected_num_commands = 1129 (metablock_end - metablock_start) / 12 + 16; 1130 Command* commands = 0; 1131 size_t num_commands = 0; 1132 size_t last_insert_len = 0; 1133 size_t num_literals = 0; 1134 size_t metablock_size = 0; 1135 size_t cmd_alloc_size = 0; 1136 BROTLI_BOOL is_last; 1137 uint8_t* storage; 1138 size_t storage_ix; 1139 1140 size_t block_start; 1141 for (block_start = metablock_start; block_start < metablock_end; ) { 1142 size_t block_size = 1143 BROTLI_MIN(size_t, metablock_end - block_start, max_block_size); 1144 ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1); 1145 size_t path_size; 1146 size_t new_cmd_alloc_size; 1147 if (BROTLI_IS_OOM(m)) goto oom; 1148 BrotliInitZopfliNodes(nodes, block_size + 1); 1149 StitchToPreviousBlockH10(hasher, block_size, block_start, 1150 input_buffer, mask); 1151 path_size = BrotliZopfliComputeShortestPath( 1152 m, dictionary, block_size, block_start, input_buffer, mask, ¶ms, 1153 max_backward_limit, dist_cache, hasher, nodes); 1154 if (BROTLI_IS_OOM(m)) goto oom; 1155 /* We allocate a command buffer in the first iteration of this loop that 1156 will be likely big enough for the whole metablock, so that for most 1157 inputs we will not have to reallocate in later iterations. We do the 1158 allocation here and not before the loop, because if the input is small, 1159 this will be allocated after the Zopfli cost model is freed, so this 1160 will not increase peak memory usage. 1161 TODO: If the first allocation is too small, increase command 1162 buffer size exponentially. */ 1163 new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands, 1164 num_commands + path_size + 1); 1165 if (cmd_alloc_size != new_cmd_alloc_size) { 1166 Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size); 1167 if (BROTLI_IS_OOM(m)) goto oom; 1168 cmd_alloc_size = new_cmd_alloc_size; 1169 if (commands) { 1170 memcpy(new_commands, commands, sizeof(Command) * num_commands); 1171 BROTLI_FREE(m, commands); 1172 } 1173 commands = new_commands; 1174 } 1175 BrotliZopfliCreateCommands(block_size, block_start, max_backward_limit, 1176 &nodes[0], dist_cache, &last_insert_len, 1177 ¶ms, &commands[num_commands], 1178 &num_literals); 1179 num_commands += path_size; 1180 block_start += block_size; 1181 metablock_size += block_size; 1182 BROTLI_FREE(m, nodes); 1183 if (num_literals > max_literals_per_metablock || 1184 num_commands > max_commands_per_metablock) { 1185 break; 1186 } 1187 } 1188 1189 if (last_insert_len > 0) { 1190 InitInsertCommand(&commands[num_commands++], last_insert_len); 1191 num_literals += last_insert_len; 1192 } 1193 1194 is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size); 1195 storage = NULL; 1196 storage_ix = last_byte_bits; 1197 1198 if (metablock_size == 0) { 1199 /* Write the ISLAST and ISEMPTY bits. */ 1200 storage = BROTLI_ALLOC(m, uint8_t, 16); 1201 if (BROTLI_IS_OOM(m)) goto oom; 1202 storage[0] = last_byte; 1203 BrotliWriteBits(2, 3, &storage_ix, storage); 1204 storage_ix = (storage_ix + 7u) & ~7u; 1205 } else if (!ShouldCompress(input_buffer, mask, metablock_start, 1206 metablock_size, num_literals, num_commands)) { 1207 /* Restore the distance cache, as its last update by 1208 CreateBackwardReferences is now unused. */ 1209 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); 1210 storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16); 1211 if (BROTLI_IS_OOM(m)) goto oom; 1212 storage[0] = last_byte; 1213 BrotliStoreUncompressedMetaBlock(is_last, input_buffer, 1214 metablock_start, mask, metablock_size, 1215 &storage_ix, storage); 1216 } else { 1217 uint32_t num_direct_distance_codes = 0; 1218 uint32_t distance_postfix_bits = 0; 1219 ContextType literal_context_mode = CONTEXT_UTF8; 1220 MetaBlockSplit mb; 1221 InitMetaBlockSplit(&mb); 1222 if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask, 1223 metablock_size, kMinUTF8Ratio)) { 1224 literal_context_mode = CONTEXT_SIGNED; 1225 } 1226 BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, ¶ms, 1227 prev_byte, prev_byte2, 1228 commands, num_commands, 1229 literal_context_mode, 1230 &mb); 1231 if (BROTLI_IS_OOM(m)) goto oom; 1232 BrotliOptimizeHistograms(num_direct_distance_codes, 1233 distance_postfix_bits, 1234 &mb); 1235 storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502); 1236 if (BROTLI_IS_OOM(m)) goto oom; 1237 storage[0] = last_byte; 1238 BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size, 1239 mask, prev_byte, prev_byte2, 1240 is_last, 1241 num_direct_distance_codes, 1242 distance_postfix_bits, 1243 literal_context_mode, 1244 commands, num_commands, 1245 &mb, 1246 &storage_ix, storage); 1247 if (BROTLI_IS_OOM(m)) goto oom; 1248 if (metablock_size + 4 < (storage_ix >> 3)) { 1249 /* Restore the distance cache and last byte. */ 1250 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); 1251 storage[0] = last_byte; 1252 storage_ix = last_byte_bits; 1253 BrotliStoreUncompressedMetaBlock(is_last, input_buffer, 1254 metablock_start, mask, 1255 metablock_size, &storage_ix, storage); 1256 } 1257 DestroyMetaBlockSplit(m, &mb); 1258 } 1259 last_byte = storage[storage_ix >> 3]; 1260 last_byte_bits = storage_ix & 7u; 1261 metablock_start += metablock_size; 1262 prev_byte = input_buffer[metablock_start - 1]; 1263 prev_byte2 = input_buffer[metablock_start - 2]; 1264 /* Save the state of the distance cache in case we need to restore it for 1265 emitting an uncompressed block. */ 1266 memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0])); 1267 1268 { 1269 const size_t out_size = storage_ix >> 3; 1270 total_out_size += out_size; 1271 if (total_out_size <= max_out_size) { 1272 memcpy(encoded_buffer, storage, out_size); 1273 encoded_buffer += out_size; 1274 } else { 1275 ok = BROTLI_FALSE; 1276 } 1277 } 1278 BROTLI_FREE(m, storage); 1279 BROTLI_FREE(m, commands); 1280 } 1281 1282 *encoded_size = total_out_size; 1283 DestroyHasher(m, &hasher); 1284 return ok; 1285 1286 oom: 1287 BrotliWipeOutMemoryManager(m); 1288 return BROTLI_FALSE; 1289 } 1290 1291 size_t BrotliEncoderMaxCompressedSize(size_t input_size) { 1292 /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ 1293 size_t num_large_blocks = input_size >> 24; 1294 size_t tail = input_size - (num_large_blocks << 24); 1295 size_t tail_overhead = (tail > (1 << 20)) ? 4 : 3; 1296 size_t overhead = 2 + (4 * num_large_blocks) + tail_overhead + 1; 1297 size_t result = input_size + overhead; 1298 if (input_size == 0) return 1; 1299 return (result < input_size) ? 0 : result; 1300 } 1301 1302 /* Wraps data to uncompressed brotli stream with minimal window size. 1303 |output| should point at region with at least BrotliEncoderMaxCompressedSize 1304 addressable bytes. 1305 Returns the length of stream. */ 1306 static size_t MakeUncompressedStream( 1307 const uint8_t* input, size_t input_size, uint8_t* output) { 1308 size_t size = input_size; 1309 size_t result = 0; 1310 size_t offset = 0; 1311 if (input_size == 0) { 1312 output[0] = 6; 1313 return 1; 1314 } 1315 output[result++] = 0x21; /* window bits = 10, is_last = false */ 1316 output[result++] = 0x03; /* empty metadata, padding */ 1317 while (size > 0) { 1318 uint32_t nibbles = 0; 1319 uint32_t chunk_size; 1320 uint32_t bits; 1321 chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size; 1322 if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1; 1323 bits = 1324 (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles)); 1325 output[result++] = (uint8_t)bits; 1326 output[result++] = (uint8_t)(bits >> 8); 1327 output[result++] = (uint8_t)(bits >> 16); 1328 if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24); 1329 memcpy(&output[result], &input[offset], chunk_size); 1330 result += chunk_size; 1331 offset += chunk_size; 1332 size -= chunk_size; 1333 } 1334 output[result++] = 3; 1335 return result; 1336 } 1337 1338 BROTLI_BOOL BrotliEncoderCompress( 1339 int quality, int lgwin, BrotliEncoderMode mode, size_t input_size, 1340 const uint8_t* input_buffer, size_t* encoded_size, 1341 uint8_t* encoded_buffer) { 1342 BrotliEncoderState* s; 1343 size_t out_size = *encoded_size; 1344 const uint8_t* input_start = input_buffer; 1345 uint8_t* output_start = encoded_buffer; 1346 size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size); 1347 if (out_size == 0) { 1348 /* Output buffer needs at least one byte. */ 1349 return BROTLI_FALSE; 1350 } 1351 if (input_size == 0) { 1352 /* Handle the special case of empty input. */ 1353 *encoded_size = 1; 1354 *encoded_buffer = 6; 1355 return BROTLI_TRUE; 1356 } 1357 if (quality == 10) { 1358 /* TODO: Implement this direct path for all quality levels. */ 1359 const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS, 1360 BROTLI_MAX(int, 16, lgwin)); 1361 int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer, 1362 encoded_size, encoded_buffer); 1363 if (!ok || (max_out_size && *encoded_size > max_out_size)) { 1364 goto fallback; 1365 } 1366 return BROTLI_TRUE; 1367 } 1368 1369 s = BrotliEncoderCreateInstance(0, 0, 0); 1370 if (!s) { 1371 return BROTLI_FALSE; 1372 } else { 1373 size_t available_in = input_size; 1374 const uint8_t* next_in = input_buffer; 1375 size_t available_out = *encoded_size; 1376 uint8_t* next_out = encoded_buffer; 1377 size_t total_out = 0; 1378 BROTLI_BOOL result = BROTLI_FALSE; 1379 BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality); 1380 BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); 1381 BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); 1382 BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size); 1383 result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH, 1384 &available_in, &next_in, &available_out, &next_out, &total_out); 1385 if (!BrotliEncoderIsFinished(s)) result = 0; 1386 *encoded_size = total_out; 1387 BrotliEncoderDestroyInstance(s); 1388 if (!result || (max_out_size && *encoded_size > max_out_size)) { 1389 goto fallback; 1390 } 1391 return BROTLI_TRUE; 1392 } 1393 fallback: 1394 *encoded_size = 0; 1395 if (!max_out_size) return BROTLI_FALSE; 1396 if (out_size >= max_out_size) { 1397 *encoded_size = 1398 MakeUncompressedStream(input_start, input_size, output_start); 1399 return BROTLI_TRUE; 1400 } 1401 return BROTLI_FALSE; 1402 } 1403 1404 static void InjectBytePaddingBlock(BrotliEncoderState* s) { 1405 uint32_t seal = s->last_byte_; 1406 size_t seal_bits = s->last_byte_bits_; 1407 uint8_t* destination; 1408 s->last_byte_ = 0; 1409 s->last_byte_bits_ = 0; 1410 /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */ 1411 seal |= 0x6u << seal_bits; 1412 seal_bits += 6; 1413 /* If we have already created storage, then append to it. 1414 Storage is valid until next block is being compressed. */ 1415 if (s->next_out_) { 1416 destination = s->next_out_ + s->available_out_; 1417 } else { 1418 destination = s->tiny_buf_.u8; 1419 s->next_out_ = destination; 1420 } 1421 destination[0] = (uint8_t)seal; 1422 if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8); 1423 s->available_out_ += (seal_bits + 7) >> 3; 1424 } 1425 1426 /* Injects padding bits or pushes compressed data to output. 1427 Returns false if nothing is done. */ 1428 static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, 1429 size_t* available_out, uint8_t** next_out, size_t* total_out) { 1430 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && 1431 s->last_byte_bits_ != 0) { 1432 InjectBytePaddingBlock(s); 1433 return BROTLI_TRUE; 1434 } 1435 1436 if (s->available_out_ != 0 && *available_out != 0) { 1437 size_t copy_output_size = 1438 BROTLI_MIN(size_t, s->available_out_, *available_out); 1439 memcpy(*next_out, s->next_out_, copy_output_size); 1440 *next_out += copy_output_size; 1441 *available_out -= copy_output_size; 1442 s->next_out_ += copy_output_size; 1443 s->available_out_ -= copy_output_size; 1444 s->total_out_ += copy_output_size; 1445 if (total_out) *total_out = s->total_out_; 1446 return BROTLI_TRUE; 1447 } 1448 1449 return BROTLI_FALSE; 1450 } 1451 1452 static void CheckFlushComplete(BrotliEncoderState* s) { 1453 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && 1454 s->available_out_ == 0) { 1455 s->stream_state_ = BROTLI_STREAM_PROCESSING; 1456 s->next_out_ = 0; 1457 } 1458 } 1459 1460 static BROTLI_BOOL BrotliEncoderCompressStreamFast( 1461 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, 1462 const uint8_t** next_in, size_t* available_out, uint8_t** next_out, 1463 size_t* total_out) { 1464 const size_t block_size_limit = (size_t)1 << s->params.lgwin; 1465 const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize, 1466 BROTLI_MIN(size_t, *available_in, block_size_limit)); 1467 uint32_t* tmp_command_buf = NULL; 1468 uint32_t* command_buf = NULL; 1469 uint8_t* tmp_literal_buf = NULL; 1470 uint8_t* literal_buf = NULL; 1471 MemoryManager* m = &s->memory_manager_; 1472 if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY && 1473 s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) { 1474 return BROTLI_FALSE; 1475 } 1476 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 1477 if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) { 1478 s->command_buf_ = 1479 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize); 1480 s->literal_buf_ = 1481 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize); 1482 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1483 } 1484 if (s->command_buf_) { 1485 command_buf = s->command_buf_; 1486 literal_buf = s->literal_buf_; 1487 } else { 1488 tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size); 1489 tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size); 1490 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1491 command_buf = tmp_command_buf; 1492 literal_buf = tmp_literal_buf; 1493 } 1494 } 1495 1496 while (BROTLI_TRUE) { 1497 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { 1498 continue; 1499 } 1500 1501 /* Compress block only when internal output buffer is empty, stream is not 1502 finished, there is no pending flush request, and there is either 1503 additional input or pending operation. */ 1504 if (s->available_out_ == 0 && 1505 s->stream_state_ == BROTLI_STREAM_PROCESSING && 1506 (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) { 1507 size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in); 1508 BROTLI_BOOL is_last = 1509 (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH); 1510 BROTLI_BOOL force_flush = 1511 (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH); 1512 size_t max_out_size = 2 * block_size + 502; 1513 BROTLI_BOOL inplace = BROTLI_TRUE; 1514 uint8_t* storage = NULL; 1515 size_t storage_ix = s->last_byte_bits_; 1516 size_t table_size; 1517 int* table; 1518 1519 if (force_flush && block_size == 0) { 1520 s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; 1521 continue; 1522 } 1523 if (max_out_size <= *available_out) { 1524 storage = *next_out; 1525 } else { 1526 inplace = BROTLI_FALSE; 1527 storage = GetBrotliStorage(s, max_out_size); 1528 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1529 } 1530 storage[0] = s->last_byte_; 1531 table = GetHashTable(s, s->params.quality, block_size, &table_size); 1532 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1533 1534 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { 1535 BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table, 1536 table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_, 1537 s->cmd_code_, &storage_ix, storage); 1538 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1539 } else { 1540 BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last, 1541 command_buf, literal_buf, table, table_size, 1542 &storage_ix, storage); 1543 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1544 } 1545 *next_in += block_size; 1546 *available_in -= block_size; 1547 if (inplace) { 1548 size_t out_bytes = storage_ix >> 3; 1549 assert(out_bytes <= *available_out); 1550 assert((storage_ix & 7) == 0 || out_bytes < *available_out); 1551 *next_out += out_bytes; 1552 *available_out -= out_bytes; 1553 s->total_out_ += out_bytes; 1554 if (total_out) *total_out = s->total_out_; 1555 } else { 1556 size_t out_bytes = storage_ix >> 3; 1557 s->next_out_ = storage; 1558 s->available_out_ = out_bytes; 1559 } 1560 s->last_byte_ = storage[storage_ix >> 3]; 1561 s->last_byte_bits_ = storage_ix & 7u; 1562 1563 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; 1564 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; 1565 continue; 1566 } 1567 break; 1568 } 1569 BROTLI_FREE(m, tmp_command_buf); 1570 BROTLI_FREE(m, tmp_literal_buf); 1571 CheckFlushComplete(s); 1572 return BROTLI_TRUE; 1573 } 1574 1575 static BROTLI_BOOL ProcessMetadata( 1576 BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in, 1577 size_t* available_out, uint8_t** next_out, size_t* total_out) { 1578 if (*available_in > (1u << 24)) return BROTLI_FALSE; 1579 /* Switch to metadata block workflow, if required. */ 1580 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) { 1581 s->remaining_metadata_bytes_ = (uint32_t)*available_in; 1582 s->stream_state_ = BROTLI_STREAM_METADATA_HEAD; 1583 } 1584 if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD && 1585 s->stream_state_ != BROTLI_STREAM_METADATA_BODY) { 1586 return BROTLI_FALSE; 1587 } 1588 1589 while (BROTLI_TRUE) { 1590 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { 1591 continue; 1592 } 1593 if (s->available_out_ != 0) break; 1594 1595 if (s->input_pos_ != s->last_flush_pos_) { 1596 BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE, 1597 &s->available_out_, &s->next_out_); 1598 if (!result) return BROTLI_FALSE; 1599 continue; 1600 } 1601 1602 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) { 1603 s->next_out_ = s->tiny_buf_.u8; 1604 s->available_out_ = 1605 WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_); 1606 s->stream_state_ = BROTLI_STREAM_METADATA_BODY; 1607 continue; 1608 } else { 1609 /* Exit workflow only when there is no more input and no more output. 1610 Otherwise client may continue producing empty metadata blocks. */ 1611 if (s->remaining_metadata_bytes_ == 0) { 1612 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; 1613 s->stream_state_ = BROTLI_STREAM_PROCESSING; 1614 break; 1615 } 1616 if (*available_out) { 1617 /* Directly copy input to output. */ 1618 uint32_t copy = (uint32_t)BROTLI_MIN( 1619 size_t, s->remaining_metadata_bytes_, *available_out); 1620 memcpy(*next_out, *next_in, copy); 1621 *next_in += copy; 1622 *available_in -= copy; 1623 s->remaining_metadata_bytes_ -= copy; 1624 *next_out += copy; 1625 *available_out -= copy; 1626 } else { 1627 /* This guarantees progress in "TakeOutput" workflow. */ 1628 uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16); 1629 s->next_out_ = s->tiny_buf_.u8; 1630 memcpy(s->next_out_, *next_in, copy); 1631 *next_in += copy; 1632 *available_in -= copy; 1633 s->remaining_metadata_bytes_ -= copy; 1634 s->available_out_ = copy; 1635 } 1636 continue; 1637 } 1638 } 1639 1640 return BROTLI_TRUE; 1641 } 1642 1643 static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) { 1644 if (s->params.size_hint == 0) { 1645 uint64_t delta = UnprocessedInputSize(s); 1646 uint64_t tail = available_in; 1647 uint32_t limit = 1u << 30; 1648 uint32_t total; 1649 if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) { 1650 total = limit; 1651 } else { 1652 total = (uint32_t)(delta + tail); 1653 } 1654 s->params.size_hint = total; 1655 } 1656 } 1657 1658 BROTLI_BOOL BrotliEncoderCompressStream( 1659 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, 1660 const uint8_t** next_in, size_t* available_out,uint8_t** next_out, 1661 size_t* total_out) { 1662 if (!EnsureInitialized(s)) return BROTLI_FALSE; 1663 1664 /* Unfinished metadata block; check requirements. */ 1665 if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) { 1666 if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE; 1667 if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE; 1668 } 1669 1670 if (op == BROTLI_OPERATION_EMIT_METADATA) { 1671 UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */ 1672 return ProcessMetadata( 1673 s, available_in, next_in, available_out, next_out, total_out); 1674 } 1675 1676 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD || 1677 s->stream_state_ == BROTLI_STREAM_METADATA_BODY) { 1678 return BROTLI_FALSE; 1679 } 1680 1681 if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) { 1682 return BROTLI_FALSE; 1683 } 1684 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || 1685 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 1686 return BrotliEncoderCompressStreamFast(s, op, available_in, next_in, 1687 available_out, next_out, total_out); 1688 } 1689 while (BROTLI_TRUE) { 1690 size_t remaining_block_size = RemainingInputBlockSize(s); 1691 1692 if (remaining_block_size != 0 && *available_in != 0) { 1693 size_t copy_input_size = 1694 BROTLI_MIN(size_t, remaining_block_size, *available_in); 1695 CopyInputToRingBuffer(s, copy_input_size, *next_in); 1696 *next_in += copy_input_size; 1697 *available_in -= copy_input_size; 1698 continue; 1699 } 1700 1701 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { 1702 continue; 1703 } 1704 1705 /* Compress data only when internal output buffer is empty, stream is not 1706 finished and there is no pending flush request. */ 1707 if (s->available_out_ == 0 && 1708 s->stream_state_ == BROTLI_STREAM_PROCESSING) { 1709 if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) { 1710 BROTLI_BOOL is_last = TO_BROTLI_BOOL( 1711 (*available_in == 0) && op == BROTLI_OPERATION_FINISH); 1712 BROTLI_BOOL force_flush = TO_BROTLI_BOOL( 1713 (*available_in == 0) && op == BROTLI_OPERATION_FLUSH); 1714 BROTLI_BOOL result; 1715 UpdateSizeHint(s, *available_in); 1716 result = EncodeData(s, is_last, force_flush, 1717 &s->available_out_, &s->next_out_); 1718 if (!result) return BROTLI_FALSE; 1719 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; 1720 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; 1721 continue; 1722 } 1723 } 1724 break; 1725 } 1726 CheckFlushComplete(s); 1727 return BROTLI_TRUE; 1728 } 1729 1730 BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) { 1731 return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED && 1732 !BrotliEncoderHasMoreOutput(s)); 1733 } 1734 1735 BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) { 1736 return TO_BROTLI_BOOL(s->available_out_ != 0); 1737 } 1738 1739 const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) { 1740 size_t consumed_size = s->available_out_; 1741 uint8_t* result = s->next_out_; 1742 if (*size) { 1743 consumed_size = BROTLI_MIN(size_t, *size, s->available_out_); 1744 } 1745 if (consumed_size) { 1746 s->next_out_ += consumed_size; 1747 s->available_out_ -= consumed_size; 1748 s->total_out_ += consumed_size; 1749 CheckFlushComplete(s); 1750 *size = consumed_size; 1751 } else { 1752 *size = 0; 1753 result = 0; 1754 } 1755 return result; 1756 } 1757 1758 uint32_t BrotliEncoderVersion(void) { 1759 return BROTLI_VERSION; 1760 } 1761 1762 #if defined(__cplusplus) || defined(c_plusplus) 1763 } /* extern "C" */ 1764 #endif 1765