1 /* Copyright 2015 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 /* Function for fast encoding of an input fragment, independently from the input 8 history. This function uses one-pass processing: when we find a backward 9 match, we immediately emit the corresponding command and literal codes to 10 the bit stream. 11 12 Adapted from the CompressFragment() function in 13 https://github.com/google/snappy/blob/master/snappy.cc */ 14 15 #include "./compress_fragment.h" 16 17 #include <string.h> /* memcmp, memcpy, memset */ 18 19 #include "../common/constants.h" 20 #include <brotli/types.h> 21 #include "./brotli_bit_stream.h" 22 #include "./entropy_encode.h" 23 #include "./fast_log.h" 24 #include "./find_match_length.h" 25 #include "./memory.h" 26 #include "./port.h" 27 #include "./write_bits.h" 28 29 30 #if defined(__cplusplus) || defined(c_plusplus) 31 extern "C" { 32 #endif 33 34 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18) 35 36 /* kHashMul32 multiplier has these properties: 37 * The multiplier must be odd. Otherwise we may lose the highest bit. 38 * No long streaks of ones or zeros. 39 * There is no effort to ensure that it is a prime, the oddity is enough 40 for this use. 41 * The number has been tuned heuristically against compression benchmarks. */ 42 static const uint32_t kHashMul32 = 0x1e35a7bd; 43 44 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { 45 const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32; 46 return (uint32_t)(h >> shift); 47 } 48 49 static BROTLI_INLINE uint32_t HashBytesAtOffset( 50 uint64_t v, int offset, size_t shift) { 51 assert(offset >= 0); 52 assert(offset <= 3); 53 { 54 const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32; 55 return (uint32_t)(h >> shift); 56 } 57 } 58 59 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) { 60 return TO_BROTLI_BOOL( 61 BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) && 62 p1[4] == p2[4]); 63 } 64 65 /* Builds a literal prefix code into "depths" and "bits" based on the statistics 66 of the "input" string and stores it into the bit stream. 67 Note that the prefix code here is built from the pre-LZ77 input, therefore 68 we can only approximate the statistics of the actual literal stream. 69 Moreover, for long inputs we build a histogram from a sample of the input 70 and thus have to assign a non-zero depth for each literal. 71 Returns estimated compression ratio millibytes/char for encoding given input 72 with generated code. */ 73 static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m, 74 const uint8_t* input, 75 const size_t input_size, 76 uint8_t depths[256], 77 uint16_t bits[256], 78 size_t* storage_ix, 79 uint8_t* storage) { 80 uint32_t histogram[256] = { 0 }; 81 size_t histogram_total; 82 size_t i; 83 if (input_size < (1 << 15)) { 84 for (i = 0; i < input_size; ++i) { 85 ++histogram[input[i]]; 86 } 87 histogram_total = input_size; 88 for (i = 0; i < 256; ++i) { 89 /* We weigh the first 11 samples with weight 3 to account for the 90 balancing effect of the LZ77 phase on the histogram. */ 91 const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u); 92 histogram[i] += adjust; 93 histogram_total += adjust; 94 } 95 } else { 96 static const size_t kSampleRate = 29; 97 for (i = 0; i < input_size; i += kSampleRate) { 98 ++histogram[input[i]]; 99 } 100 histogram_total = (input_size + kSampleRate - 1) / kSampleRate; 101 for (i = 0; i < 256; ++i) { 102 /* We add 1 to each population count to avoid 0 bit depths (since this is 103 only a sample and we don't know if the symbol appears or not), and we 104 weigh the first 11 samples with weight 3 to account for the balancing 105 effect of the LZ77 phase on the histogram (more frequent symbols are 106 more likely to be in backward references instead as literals). */ 107 const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u); 108 histogram[i] += adjust; 109 histogram_total += adjust; 110 } 111 } 112 BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total, 113 /* max_bits = */ 8, 114 depths, bits, storage_ix, storage); 115 if (BROTLI_IS_OOM(m)) return 0; 116 { 117 size_t literal_ratio = 0; 118 for (i = 0; i < 256; ++i) { 119 if (histogram[i]) literal_ratio += histogram[i] * depths[i]; 120 } 121 /* Estimated encoding ratio, millibytes per symbol. */ 122 return (literal_ratio * 125) / histogram_total; 123 } 124 } 125 126 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and 127 "bits" based on "histogram" and stores it into the bit stream. */ 128 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128], 129 uint8_t depth[128], uint16_t bits[128], size_t* storage_ix, 130 uint8_t* storage) { 131 /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */ 132 HuffmanTree tree[129]; 133 uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 }; 134 uint16_t cmd_bits[64]; 135 136 BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth); 137 BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]); 138 /* We have to jump through a few hoops here in order to compute 139 the command bits because the symbols are in a different order than in 140 the full alphabet. This looks complicated, but having the symbols 141 in this order in the command bits saves a few branches in the Emit* 142 functions. */ 143 memcpy(cmd_depth, depth, 24); 144 memcpy(cmd_depth + 24, depth + 40, 8); 145 memcpy(cmd_depth + 32, depth + 24, 8); 146 memcpy(cmd_depth + 40, depth + 48, 8); 147 memcpy(cmd_depth + 48, depth + 32, 8); 148 memcpy(cmd_depth + 56, depth + 56, 8); 149 BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits); 150 memcpy(bits, cmd_bits, 48); 151 memcpy(bits + 24, cmd_bits + 32, 16); 152 memcpy(bits + 32, cmd_bits + 48, 16); 153 memcpy(bits + 40, cmd_bits + 24, 16); 154 memcpy(bits + 48, cmd_bits + 40, 16); 155 memcpy(bits + 56, cmd_bits + 56, 16); 156 BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]); 157 { 158 /* Create the bit length array for the full command alphabet. */ 159 size_t i; 160 memset(cmd_depth, 0, 64); /* only 64 first values were used */ 161 memcpy(cmd_depth, depth, 8); 162 memcpy(cmd_depth + 64, depth + 8, 8); 163 memcpy(cmd_depth + 128, depth + 16, 8); 164 memcpy(cmd_depth + 192, depth + 24, 8); 165 memcpy(cmd_depth + 384, depth + 32, 8); 166 for (i = 0; i < 8; ++i) { 167 cmd_depth[128 + 8 * i] = depth[40 + i]; 168 cmd_depth[256 + 8 * i] = depth[48 + i]; 169 cmd_depth[448 + 8 * i] = depth[56 + i]; 170 } 171 BrotliStoreHuffmanTree( 172 cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage); 173 } 174 BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage); 175 } 176 177 /* REQUIRES: insertlen < 6210 */ 178 static BROTLI_INLINE void EmitInsertLen(size_t insertlen, 179 const uint8_t depth[128], 180 const uint16_t bits[128], 181 uint32_t histo[128], 182 size_t* storage_ix, 183 uint8_t* storage) { 184 if (insertlen < 6) { 185 const size_t code = insertlen + 40; 186 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 187 ++histo[code]; 188 } else if (insertlen < 130) { 189 const size_t tail = insertlen - 2; 190 const uint32_t nbits = Log2FloorNonZero(tail) - 1u; 191 const size_t prefix = tail >> nbits; 192 const size_t inscode = (nbits << 1) + prefix + 42; 193 BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage); 194 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 195 ++histo[inscode]; 196 } else if (insertlen < 2114) { 197 const size_t tail = insertlen - 66; 198 const uint32_t nbits = Log2FloorNonZero(tail); 199 const size_t code = nbits + 50; 200 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 201 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 202 ++histo[code]; 203 } else { 204 BrotliWriteBits(depth[61], bits[61], storage_ix, storage); 205 BrotliWriteBits(12, insertlen - 2114, storage_ix, storage); 206 ++histo[21]; 207 } 208 } 209 210 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen, 211 const uint8_t depth[128], 212 const uint16_t bits[128], 213 uint32_t histo[128], 214 size_t* storage_ix, 215 uint8_t* storage) { 216 if (insertlen < 22594) { 217 BrotliWriteBits(depth[62], bits[62], storage_ix, storage); 218 BrotliWriteBits(14, insertlen - 6210, storage_ix, storage); 219 ++histo[22]; 220 } else { 221 BrotliWriteBits(depth[63], bits[63], storage_ix, storage); 222 BrotliWriteBits(24, insertlen - 22594, storage_ix, storage); 223 ++histo[23]; 224 } 225 } 226 227 static BROTLI_INLINE void EmitCopyLen(size_t copylen, 228 const uint8_t depth[128], 229 const uint16_t bits[128], 230 uint32_t histo[128], 231 size_t* storage_ix, 232 uint8_t* storage) { 233 if (copylen < 10) { 234 BrotliWriteBits( 235 depth[copylen + 14], bits[copylen + 14], storage_ix, storage); 236 ++histo[copylen + 14]; 237 } else if (copylen < 134) { 238 const size_t tail = copylen - 6; 239 const uint32_t nbits = Log2FloorNonZero(tail) - 1u; 240 const size_t prefix = tail >> nbits; 241 const size_t code = (nbits << 1) + prefix + 20; 242 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 243 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 244 ++histo[code]; 245 } else if (copylen < 2118) { 246 const size_t tail = copylen - 70; 247 const uint32_t nbits = Log2FloorNonZero(tail); 248 const size_t code = nbits + 28; 249 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 250 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 251 ++histo[code]; 252 } else { 253 BrotliWriteBits(depth[39], bits[39], storage_ix, storage); 254 BrotliWriteBits(24, copylen - 2118, storage_ix, storage); 255 ++histo[47]; 256 } 257 } 258 259 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen, 260 const uint8_t depth[128], 261 const uint16_t bits[128], 262 uint32_t histo[128], 263 size_t* storage_ix, 264 uint8_t* storage) { 265 if (copylen < 12) { 266 BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage); 267 ++histo[copylen - 4]; 268 } else if (copylen < 72) { 269 const size_t tail = copylen - 8; 270 const uint32_t nbits = Log2FloorNonZero(tail) - 1; 271 const size_t prefix = tail >> nbits; 272 const size_t code = (nbits << 1) + prefix + 4; 273 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 274 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 275 ++histo[code]; 276 } else if (copylen < 136) { 277 const size_t tail = copylen - 8; 278 const size_t code = (tail >> 5) + 30; 279 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 280 BrotliWriteBits(5, tail & 31, storage_ix, storage); 281 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 282 ++histo[code]; 283 ++histo[64]; 284 } else if (copylen < 2120) { 285 const size_t tail = copylen - 72; 286 const uint32_t nbits = Log2FloorNonZero(tail); 287 const size_t code = nbits + 28; 288 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 289 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 290 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 291 ++histo[code]; 292 ++histo[64]; 293 } else { 294 BrotliWriteBits(depth[39], bits[39], storage_ix, storage); 295 BrotliWriteBits(24, copylen - 2120, storage_ix, storage); 296 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 297 ++histo[47]; 298 ++histo[64]; 299 } 300 } 301 302 static BROTLI_INLINE void EmitDistance(size_t distance, 303 const uint8_t depth[128], 304 const uint16_t bits[128], 305 uint32_t histo[128], 306 size_t* storage_ix, uint8_t* storage) { 307 const size_t d = distance + 3; 308 const uint32_t nbits = Log2FloorNonZero(d) - 1u; 309 const size_t prefix = (d >> nbits) & 1; 310 const size_t offset = (2 + prefix) << nbits; 311 const size_t distcode = 2 * (nbits - 1) + prefix + 80; 312 BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage); 313 BrotliWriteBits(nbits, d - offset, storage_ix, storage); 314 ++histo[distcode]; 315 } 316 317 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len, 318 const uint8_t depth[256], 319 const uint16_t bits[256], 320 size_t* storage_ix, uint8_t* storage) { 321 size_t j; 322 for (j = 0; j < len; j++) { 323 const uint8_t lit = input[j]; 324 BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage); 325 } 326 } 327 328 /* REQUIRES: len <= 1 << 24. */ 329 static void BrotliStoreMetaBlockHeader( 330 size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix, 331 uint8_t* storage) { 332 size_t nibbles = 6; 333 /* ISLAST */ 334 BrotliWriteBits(1, 0, storage_ix, storage); 335 if (len <= (1U << 16)) { 336 nibbles = 4; 337 } else if (len <= (1U << 20)) { 338 nibbles = 5; 339 } 340 BrotliWriteBits(2, nibbles - 4, storage_ix, storage); 341 BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage); 342 /* ISUNCOMPRESSED */ 343 BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage); 344 } 345 346 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos, 347 uint8_t *array) { 348 while (n_bits > 0) { 349 size_t byte_pos = pos >> 3; 350 size_t n_unchanged_bits = pos & 7; 351 size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits); 352 size_t total_bits = n_unchanged_bits + n_changed_bits; 353 uint32_t mask = 354 (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u); 355 uint32_t unchanged_bits = array[byte_pos] & mask; 356 uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u); 357 array[byte_pos] = 358 (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits); 359 n_bits -= n_changed_bits; 360 bits >>= n_changed_bits; 361 pos += n_changed_bits; 362 } 363 } 364 365 static void RewindBitPosition(const size_t new_storage_ix, 366 size_t* storage_ix, uint8_t* storage) { 367 const size_t bitpos = new_storage_ix & 7; 368 const size_t mask = (1u << bitpos) - 1; 369 storage[new_storage_ix >> 3] &= (uint8_t)mask; 370 *storage_ix = new_storage_ix; 371 } 372 373 static BROTLI_BOOL ShouldMergeBlock( 374 const uint8_t* data, size_t len, const uint8_t* depths) { 375 size_t histo[256] = { 0 }; 376 static const size_t kSampleRate = 43; 377 size_t i; 378 for (i = 0; i < len; i += kSampleRate) { 379 ++histo[data[i]]; 380 } 381 { 382 const size_t total = (len + kSampleRate - 1) / kSampleRate; 383 double r = (FastLog2(total) + 0.5) * (double)total + 200; 384 for (i = 0; i < 256; ++i) { 385 r -= (double)histo[i] * (depths[i] + FastLog2(histo[i])); 386 } 387 return TO_BROTLI_BOOL(r >= 0.0); 388 } 389 } 390 391 /* Acceptable loss for uncompressible speedup is 2% */ 392 #define MIN_RATIO 980 393 394 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode( 395 const uint8_t* metablock_start, const uint8_t* next_emit, 396 const size_t insertlen, const size_t literal_ratio) { 397 const size_t compressed = (size_t)(next_emit - metablock_start); 398 if (compressed * 50 > insertlen) { 399 return BROTLI_FALSE; 400 } else { 401 return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO); 402 } 403 } 404 405 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end, 406 const size_t storage_ix_start, 407 size_t* storage_ix, uint8_t* storage) { 408 const size_t len = (size_t)(end - begin); 409 RewindBitPosition(storage_ix_start, storage_ix, storage); 410 BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage); 411 *storage_ix = (*storage_ix + 7u) & ~7u; 412 memcpy(&storage[*storage_ix >> 3], begin, len); 413 *storage_ix += len << 3; 414 storage[*storage_ix >> 3] = 0; 415 } 416 417 static uint32_t kCmdHistoSeed[128] = { 418 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 419 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 420 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 421 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 422 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 423 1, 1, 1, 1, 0, 0, 0, 0, 424 }; 425 426 static BROTLI_INLINE void BrotliCompressFragmentFastImpl( 427 MemoryManager* m, const uint8_t* input, size_t input_size, 428 BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128], 429 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, 430 size_t* storage_ix, uint8_t* storage) { 431 uint32_t cmd_histo[128]; 432 const uint8_t* ip_end; 433 434 /* "next_emit" is a pointer to the first byte that is not covered by a 435 previous copy. Bytes between "next_emit" and the start of the next copy or 436 the end of the input will be emitted as literal bytes. */ 437 const uint8_t* next_emit = input; 438 /* Save the start of the first block for position and distance computations. 439 */ 440 const uint8_t* base_ip = input; 441 442 static const size_t kFirstBlockSize = 3 << 15; 443 static const size_t kMergeBlockSize = 1 << 16; 444 445 const size_t kInputMarginBytes = BROTLI_WINDOW_GAP; 446 const size_t kMinMatchLen = 5; 447 448 const uint8_t* metablock_start = input; 449 size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize); 450 size_t total_block_size = block_size; 451 /* Save the bit position of the MLEN field of the meta-block header, so that 452 we can update it later if we decide to extend this meta-block. */ 453 size_t mlen_storage_ix = *storage_ix + 3; 454 455 uint8_t lit_depth[256]; 456 uint16_t lit_bits[256]; 457 458 size_t literal_ratio; 459 460 const uint8_t* ip; 461 int last_distance; 462 463 const size_t shift = 64u - table_bits; 464 465 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage); 466 /* No block splits, no contexts. */ 467 BrotliWriteBits(13, 0, storage_ix, storage); 468 469 literal_ratio = BuildAndStoreLiteralPrefixCode( 470 m, input, block_size, lit_depth, lit_bits, storage_ix, storage); 471 if (BROTLI_IS_OOM(m)) return; 472 473 { 474 /* Store the pre-compressed command and distance prefix codes. */ 475 size_t i; 476 for (i = 0; i + 7 < *cmd_code_numbits; i += 8) { 477 BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage); 478 } 479 } 480 BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3], 481 storage_ix, storage); 482 483 emit_commands: 484 /* Initialize the command and distance histograms. We will gather 485 statistics of command and distance codes during the processing 486 of this block and use it to update the command and distance 487 prefix codes for the next block. */ 488 memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed)); 489 490 /* "ip" is the input pointer. */ 491 ip = input; 492 last_distance = -1; 493 ip_end = input + block_size; 494 495 if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) { 496 /* For the last block, we need to keep a 16 bytes margin so that we can be 497 sure that all distances are at most window size - 16. 498 For all other blocks, we only need to keep a margin of 5 bytes so that 499 we don't go over the block size with a copy. */ 500 const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen, 501 input_size - kInputMarginBytes); 502 const uint8_t* ip_limit = input + len_limit; 503 504 uint32_t next_hash; 505 for (next_hash = Hash(++ip, shift); ; ) { 506 /* Step 1: Scan forward in the input looking for a 5-byte-long match. 507 If we get close to exhausting the input then goto emit_remainder. 508 509 Heuristic match skipping: If 32 bytes are scanned with no matches 510 found, start looking only at every other byte. If 32 more bytes are 511 scanned, look at every third byte, etc.. When a match is found, 512 immediately go back to looking at every byte. This is a small loss 513 (~5% performance, ~0.1% density) for compressible data due to more 514 bookkeeping, but for non-compressible data (such as JPEG) it's a huge 515 win since the compressor quickly "realizes" the data is incompressible 516 and doesn't bother looking for matches everywhere. 517 518 The "skip" variable keeps track of how many bytes there are since the 519 last match; dividing it by 32 (i.e. right-shifting by five) gives the 520 number of bytes to move ahead for each iteration. */ 521 uint32_t skip = 32; 522 523 const uint8_t* next_ip = ip; 524 const uint8_t* candidate; 525 assert(next_emit < ip); 526 trawl: 527 do { 528 uint32_t hash = next_hash; 529 uint32_t bytes_between_hash_lookups = skip++ >> 5; 530 assert(hash == Hash(next_ip, shift)); 531 ip = next_ip; 532 next_ip = ip + bytes_between_hash_lookups; 533 if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) { 534 goto emit_remainder; 535 } 536 next_hash = Hash(next_ip, shift); 537 candidate = ip - last_distance; 538 if (IsMatch(ip, candidate)) { 539 if (BROTLI_PREDICT_TRUE(candidate < ip)) { 540 table[hash] = (int)(ip - base_ip); 541 break; 542 } 543 } 544 candidate = base_ip + table[hash]; 545 assert(candidate >= base_ip); 546 assert(candidate < ip); 547 548 table[hash] = (int)(ip - base_ip); 549 } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate))); 550 551 /* Check copy distance. If candidate is not feasible, continue search. 552 Checking is done outside of hot loop to reduce overhead. */ 553 if (ip - candidate > MAX_DISTANCE) goto trawl; 554 555 /* Step 2: Emit the found match together with the literal bytes from 556 "next_emit" to the bit stream, and then see if we can find a next match 557 immediately afterwards. Repeat until we find no match for the input 558 without emitting some literal bytes. */ 559 560 { 561 /* We have a 5-byte match at ip, and we need to emit bytes in 562 [next_emit, ip). */ 563 const uint8_t* base = ip; 564 size_t matched = 5 + FindMatchLengthWithLimit( 565 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5); 566 int distance = (int)(base - candidate); /* > 0 */ 567 size_t insert = (size_t)(base - next_emit); 568 ip += matched; 569 assert(0 == memcmp(base, candidate, matched)); 570 if (BROTLI_PREDICT_TRUE(insert < 6210)) { 571 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 572 storage_ix, storage); 573 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert, 574 literal_ratio)) { 575 EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3, 576 storage_ix, storage); 577 input_size -= (size_t)(base - input); 578 input = base; 579 next_emit = input; 580 goto next_block; 581 } else { 582 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 583 storage_ix, storage); 584 } 585 EmitLiterals(next_emit, insert, lit_depth, lit_bits, 586 storage_ix, storage); 587 if (distance == last_distance) { 588 BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage); 589 ++cmd_histo[64]; 590 } else { 591 EmitDistance((size_t)distance, cmd_depth, cmd_bits, 592 cmd_histo, storage_ix, storage); 593 last_distance = distance; 594 } 595 EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo, 596 storage_ix, storage); 597 598 next_emit = ip; 599 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) { 600 goto emit_remainder; 601 } 602 /* We could immediately start working at ip now, but to improve 603 compression we first update "table" with the hashes of some positions 604 within the last copy. */ 605 { 606 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3); 607 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); 608 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift); 609 table[prev_hash] = (int)(ip - base_ip - 3); 610 prev_hash = HashBytesAtOffset(input_bytes, 1, shift); 611 table[prev_hash] = (int)(ip - base_ip - 2); 612 prev_hash = HashBytesAtOffset(input_bytes, 2, shift); 613 table[prev_hash] = (int)(ip - base_ip - 1); 614 615 candidate = base_ip + table[cur_hash]; 616 table[cur_hash] = (int)(ip - base_ip); 617 } 618 } 619 620 while (IsMatch(ip, candidate)) { 621 /* We have a 5-byte match at ip, and no need to emit any literal bytes 622 prior to ip. */ 623 const uint8_t* base = ip; 624 size_t matched = 5 + FindMatchLengthWithLimit( 625 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5); 626 if (ip - candidate > MAX_DISTANCE) break; 627 ip += matched; 628 last_distance = (int)(base - candidate); /* > 0 */ 629 assert(0 == memcmp(base, candidate, matched)); 630 EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo, 631 storage_ix, storage); 632 EmitDistance((size_t)last_distance, cmd_depth, cmd_bits, 633 cmd_histo, storage_ix, storage); 634 635 next_emit = ip; 636 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) { 637 goto emit_remainder; 638 } 639 /* We could immediately start working at ip now, but to improve 640 compression we first update "table" with the hashes of some positions 641 within the last copy. */ 642 { 643 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3); 644 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); 645 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift); 646 table[prev_hash] = (int)(ip - base_ip - 3); 647 prev_hash = HashBytesAtOffset(input_bytes, 1, shift); 648 table[prev_hash] = (int)(ip - base_ip - 2); 649 prev_hash = HashBytesAtOffset(input_bytes, 2, shift); 650 table[prev_hash] = (int)(ip - base_ip - 1); 651 652 candidate = base_ip + table[cur_hash]; 653 table[cur_hash] = (int)(ip - base_ip); 654 } 655 } 656 657 next_hash = Hash(++ip, shift); 658 } 659 } 660 661 emit_remainder: 662 assert(next_emit <= ip_end); 663 input += block_size; 664 input_size -= block_size; 665 block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize); 666 667 /* Decide if we want to continue this meta-block instead of emitting the 668 last insert-only command. */ 669 if (input_size > 0 && 670 total_block_size + block_size <= (1 << 20) && 671 ShouldMergeBlock(input, block_size, lit_depth)) { 672 assert(total_block_size > (1 << 16)); 673 /* Update the size of the current meta-block and continue emitting commands. 674 We can do this because the current size and the new size both have 5 675 nibbles. */ 676 total_block_size += block_size; 677 UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage); 678 goto emit_commands; 679 } 680 681 /* Emit the remaining bytes as literals. */ 682 if (next_emit < ip_end) { 683 const size_t insert = (size_t)(ip_end - next_emit); 684 if (BROTLI_PREDICT_TRUE(insert < 6210)) { 685 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 686 storage_ix, storage); 687 EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage); 688 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert, 689 literal_ratio)) { 690 EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3, 691 storage_ix, storage); 692 } else { 693 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 694 storage_ix, storage); 695 EmitLiterals(next_emit, insert, lit_depth, lit_bits, 696 storage_ix, storage); 697 } 698 } 699 next_emit = ip_end; 700 701 next_block: 702 /* If we have more data, write a new meta-block header and prefix codes and 703 then continue emitting commands. */ 704 if (input_size > 0) { 705 metablock_start = input; 706 block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize); 707 total_block_size = block_size; 708 /* Save the bit position of the MLEN field of the meta-block header, so that 709 we can update it later if we decide to extend this meta-block. */ 710 mlen_storage_ix = *storage_ix + 3; 711 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage); 712 /* No block splits, no contexts. */ 713 BrotliWriteBits(13, 0, storage_ix, storage); 714 literal_ratio = BuildAndStoreLiteralPrefixCode( 715 m, input, block_size, lit_depth, lit_bits, storage_ix, storage); 716 if (BROTLI_IS_OOM(m)) return; 717 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits, 718 storage_ix, storage); 719 goto emit_commands; 720 } 721 722 if (!is_last) { 723 /* If this is not the last block, update the command and distance prefix 724 codes for the next block and store the compressed forms. */ 725 cmd_code[0] = 0; 726 *cmd_code_numbits = 0; 727 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits, 728 cmd_code_numbits, cmd_code); 729 } 730 } 731 732 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15) 733 734 #define BAKE_METHOD_PARAM_(B) \ 735 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \ 736 MemoryManager* m, const uint8_t* input, size_t input_size, \ 737 BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128], \ 738 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, \ 739 size_t* storage_ix, uint8_t* storage) { \ 740 BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B, \ 741 cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \ 742 } 743 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_) 744 #undef BAKE_METHOD_PARAM_ 745 746 void BrotliCompressFragmentFast( 747 MemoryManager* m, const uint8_t* input, size_t input_size, 748 BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128], 749 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, 750 size_t* storage_ix, uint8_t* storage) { 751 const size_t initial_storage_ix = *storage_ix; 752 const size_t table_bits = Log2FloorNonZero(table_size); 753 754 if (input_size == 0) { 755 assert(is_last); 756 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */ 757 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */ 758 *storage_ix = (*storage_ix + 7u) & ~7u; 759 return; 760 } 761 762 switch (table_bits) { 763 #define CASE_(B) \ 764 case B: \ 765 BrotliCompressFragmentFastImpl ## B( \ 766 m, input, input_size, is_last, table, cmd_depth, cmd_bits, \ 767 cmd_code_numbits, cmd_code, storage_ix, storage); \ 768 break; 769 FOR_TABLE_BITS_(CASE_) 770 #undef CASE_ 771 default: assert(0); break; 772 } 773 774 /* If output is larger than single uncompressed block, rewrite it. */ 775 if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) { 776 EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix, 777 storage_ix, storage); 778 } 779 780 if (is_last) { 781 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */ 782 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */ 783 *storage_ix = (*storage_ix + 7u) & ~7u; 784 } 785 } 786 787 #undef FOR_TABLE_BITS_ 788 789 #if defined(__cplusplus) || defined(c_plusplus) 790 } /* extern "C" */ 791 #endif 792