1 /* Copyright 2015 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 /* Function for fast encoding of an input fragment, independently from the input 8 history. This function uses one-pass processing: when we find a backward 9 match, we immediately emit the corresponding command and literal codes to 10 the bit stream. 11 12 Adapted from the CompressFragment() function in 13 https://github.com/google/snappy/blob/master/snappy.cc */ 14 15 #include "./compress_fragment.h" 16 17 #include <string.h> /* memcmp, memcpy, memset */ 18 19 #include "../common/constants.h" 20 #include "../common/platform.h" 21 #include <brotli/types.h> 22 #include "./brotli_bit_stream.h" 23 #include "./entropy_encode.h" 24 #include "./fast_log.h" 25 #include "./find_match_length.h" 26 #include "./memory.h" 27 #include "./write_bits.h" 28 29 #if defined(__cplusplus) || defined(c_plusplus) 30 extern "C" { 31 #endif 32 33 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18) 34 35 /* kHashMul32 multiplier has these properties: 36 * The multiplier must be odd. Otherwise we may lose the highest bit. 37 * No long streaks of ones or zeros. 38 * There is no effort to ensure that it is a prime, the oddity is enough 39 for this use. 40 * The number has been tuned heuristically against compression benchmarks. */ 41 static const uint32_t kHashMul32 = 0x1E35A7BD; 42 43 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) { 44 const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32; 45 return (uint32_t)(h >> shift); 46 } 47 48 static BROTLI_INLINE uint32_t HashBytesAtOffset( 49 uint64_t v, int offset, size_t shift) { 50 BROTLI_DCHECK(offset >= 0); 51 BROTLI_DCHECK(offset <= 3); 52 { 53 const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32; 54 return (uint32_t)(h >> shift); 55 } 56 } 57 58 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) { 59 return TO_BROTLI_BOOL( 60 BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) && 61 p1[4] == p2[4]); 62 } 63 64 /* Builds a literal prefix code into "depths" and "bits" based on the statistics 65 of the "input" string and stores it into the bit stream. 66 Note that the prefix code here is built from the pre-LZ77 input, therefore 67 we can only approximate the statistics of the actual literal stream. 68 Moreover, for long inputs we build a histogram from a sample of the input 69 and thus have to assign a non-zero depth for each literal. 70 Returns estimated compression ratio millibytes/char for encoding given input 71 with generated code. */ 72 static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m, 73 const uint8_t* input, 74 const size_t input_size, 75 uint8_t depths[256], 76 uint16_t bits[256], 77 size_t* storage_ix, 78 uint8_t* storage) { 79 uint32_t histogram[256] = { 0 }; 80 size_t histogram_total; 81 size_t i; 82 if (input_size < (1 << 15)) { 83 for (i = 0; i < input_size; ++i) { 84 ++histogram[input[i]]; 85 } 86 histogram_total = input_size; 87 for (i = 0; i < 256; ++i) { 88 /* We weigh the first 11 samples with weight 3 to account for the 89 balancing effect of the LZ77 phase on the histogram. */ 90 const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u); 91 histogram[i] += adjust; 92 histogram_total += adjust; 93 } 94 } else { 95 static const size_t kSampleRate = 29; 96 for (i = 0; i < input_size; i += kSampleRate) { 97 ++histogram[input[i]]; 98 } 99 histogram_total = (input_size + kSampleRate - 1) / kSampleRate; 100 for (i = 0; i < 256; ++i) { 101 /* We add 1 to each population count to avoid 0 bit depths (since this is 102 only a sample and we don't know if the symbol appears or not), and we 103 weigh the first 11 samples with weight 3 to account for the balancing 104 effect of the LZ77 phase on the histogram (more frequent symbols are 105 more likely to be in backward references instead as literals). */ 106 const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u); 107 histogram[i] += adjust; 108 histogram_total += adjust; 109 } 110 } 111 BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total, 112 /* max_bits = */ 8, 113 depths, bits, storage_ix, storage); 114 if (BROTLI_IS_OOM(m)) return 0; 115 { 116 size_t literal_ratio = 0; 117 for (i = 0; i < 256; ++i) { 118 if (histogram[i]) literal_ratio += histogram[i] * depths[i]; 119 } 120 /* Estimated encoding ratio, millibytes per symbol. */ 121 return (literal_ratio * 125) / histogram_total; 122 } 123 } 124 125 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and 126 "bits" based on "histogram" and stores it into the bit stream. */ 127 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128], 128 uint8_t depth[128], uint16_t bits[128], size_t* storage_ix, 129 uint8_t* storage) { 130 /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */ 131 HuffmanTree tree[129]; 132 uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 }; 133 uint16_t cmd_bits[64]; 134 135 BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth); 136 BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]); 137 /* We have to jump through a few hoops here in order to compute 138 the command bits because the symbols are in a different order than in 139 the full alphabet. This looks complicated, but having the symbols 140 in this order in the command bits saves a few branches in the Emit* 141 functions. */ 142 memcpy(cmd_depth, depth, 24); 143 memcpy(cmd_depth + 24, depth + 40, 8); 144 memcpy(cmd_depth + 32, depth + 24, 8); 145 memcpy(cmd_depth + 40, depth + 48, 8); 146 memcpy(cmd_depth + 48, depth + 32, 8); 147 memcpy(cmd_depth + 56, depth + 56, 8); 148 BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits); 149 memcpy(bits, cmd_bits, 48); 150 memcpy(bits + 24, cmd_bits + 32, 16); 151 memcpy(bits + 32, cmd_bits + 48, 16); 152 memcpy(bits + 40, cmd_bits + 24, 16); 153 memcpy(bits + 48, cmd_bits + 40, 16); 154 memcpy(bits + 56, cmd_bits + 56, 16); 155 BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]); 156 { 157 /* Create the bit length array for the full command alphabet. */ 158 size_t i; 159 memset(cmd_depth, 0, 64); /* only 64 first values were used */ 160 memcpy(cmd_depth, depth, 8); 161 memcpy(cmd_depth + 64, depth + 8, 8); 162 memcpy(cmd_depth + 128, depth + 16, 8); 163 memcpy(cmd_depth + 192, depth + 24, 8); 164 memcpy(cmd_depth + 384, depth + 32, 8); 165 for (i = 0; i < 8; ++i) { 166 cmd_depth[128 + 8 * i] = depth[40 + i]; 167 cmd_depth[256 + 8 * i] = depth[48 + i]; 168 cmd_depth[448 + 8 * i] = depth[56 + i]; 169 } 170 BrotliStoreHuffmanTree( 171 cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage); 172 } 173 BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage); 174 } 175 176 /* REQUIRES: insertlen < 6210 */ 177 static BROTLI_INLINE void EmitInsertLen(size_t insertlen, 178 const uint8_t depth[128], 179 const uint16_t bits[128], 180 uint32_t histo[128], 181 size_t* storage_ix, 182 uint8_t* storage) { 183 if (insertlen < 6) { 184 const size_t code = insertlen + 40; 185 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 186 ++histo[code]; 187 } else if (insertlen < 130) { 188 const size_t tail = insertlen - 2; 189 const uint32_t nbits = Log2FloorNonZero(tail) - 1u; 190 const size_t prefix = tail >> nbits; 191 const size_t inscode = (nbits << 1) + prefix + 42; 192 BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage); 193 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 194 ++histo[inscode]; 195 } else if (insertlen < 2114) { 196 const size_t tail = insertlen - 66; 197 const uint32_t nbits = Log2FloorNonZero(tail); 198 const size_t code = nbits + 50; 199 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 200 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 201 ++histo[code]; 202 } else { 203 BrotliWriteBits(depth[61], bits[61], storage_ix, storage); 204 BrotliWriteBits(12, insertlen - 2114, storage_ix, storage); 205 ++histo[61]; 206 } 207 } 208 209 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen, 210 const uint8_t depth[128], 211 const uint16_t bits[128], 212 uint32_t histo[128], 213 size_t* storage_ix, 214 uint8_t* storage) { 215 if (insertlen < 22594) { 216 BrotliWriteBits(depth[62], bits[62], storage_ix, storage); 217 BrotliWriteBits(14, insertlen - 6210, storage_ix, storage); 218 ++histo[62]; 219 } else { 220 BrotliWriteBits(depth[63], bits[63], storage_ix, storage); 221 BrotliWriteBits(24, insertlen - 22594, storage_ix, storage); 222 ++histo[63]; 223 } 224 } 225 226 static BROTLI_INLINE void EmitCopyLen(size_t copylen, 227 const uint8_t depth[128], 228 const uint16_t bits[128], 229 uint32_t histo[128], 230 size_t* storage_ix, 231 uint8_t* storage) { 232 if (copylen < 10) { 233 BrotliWriteBits( 234 depth[copylen + 14], bits[copylen + 14], storage_ix, storage); 235 ++histo[copylen + 14]; 236 } else if (copylen < 134) { 237 const size_t tail = copylen - 6; 238 const uint32_t nbits = Log2FloorNonZero(tail) - 1u; 239 const size_t prefix = tail >> nbits; 240 const size_t code = (nbits << 1) + prefix + 20; 241 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 242 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 243 ++histo[code]; 244 } else if (copylen < 2118) { 245 const size_t tail = copylen - 70; 246 const uint32_t nbits = Log2FloorNonZero(tail); 247 const size_t code = nbits + 28; 248 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 249 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 250 ++histo[code]; 251 } else { 252 BrotliWriteBits(depth[39], bits[39], storage_ix, storage); 253 BrotliWriteBits(24, copylen - 2118, storage_ix, storage); 254 ++histo[39]; 255 } 256 } 257 258 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen, 259 const uint8_t depth[128], 260 const uint16_t bits[128], 261 uint32_t histo[128], 262 size_t* storage_ix, 263 uint8_t* storage) { 264 if (copylen < 12) { 265 BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage); 266 ++histo[copylen - 4]; 267 } else if (copylen < 72) { 268 const size_t tail = copylen - 8; 269 const uint32_t nbits = Log2FloorNonZero(tail) - 1; 270 const size_t prefix = tail >> nbits; 271 const size_t code = (nbits << 1) + prefix + 4; 272 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 273 BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage); 274 ++histo[code]; 275 } else if (copylen < 136) { 276 const size_t tail = copylen - 8; 277 const size_t code = (tail >> 5) + 30; 278 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 279 BrotliWriteBits(5, tail & 31, storage_ix, storage); 280 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 281 ++histo[code]; 282 ++histo[64]; 283 } else if (copylen < 2120) { 284 const size_t tail = copylen - 72; 285 const uint32_t nbits = Log2FloorNonZero(tail); 286 const size_t code = nbits + 28; 287 BrotliWriteBits(depth[code], bits[code], storage_ix, storage); 288 BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage); 289 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 290 ++histo[code]; 291 ++histo[64]; 292 } else { 293 BrotliWriteBits(depth[39], bits[39], storage_ix, storage); 294 BrotliWriteBits(24, copylen - 2120, storage_ix, storage); 295 BrotliWriteBits(depth[64], bits[64], storage_ix, storage); 296 ++histo[39]; 297 ++histo[64]; 298 } 299 } 300 301 static BROTLI_INLINE void EmitDistance(size_t distance, 302 const uint8_t depth[128], 303 const uint16_t bits[128], 304 uint32_t histo[128], 305 size_t* storage_ix, uint8_t* storage) { 306 const size_t d = distance + 3; 307 const uint32_t nbits = Log2FloorNonZero(d) - 1u; 308 const size_t prefix = (d >> nbits) & 1; 309 const size_t offset = (2 + prefix) << nbits; 310 const size_t distcode = 2 * (nbits - 1) + prefix + 80; 311 BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage); 312 BrotliWriteBits(nbits, d - offset, storage_ix, storage); 313 ++histo[distcode]; 314 } 315 316 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len, 317 const uint8_t depth[256], 318 const uint16_t bits[256], 319 size_t* storage_ix, uint8_t* storage) { 320 size_t j; 321 for (j = 0; j < len; j++) { 322 const uint8_t lit = input[j]; 323 BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage); 324 } 325 } 326 327 /* REQUIRES: len <= 1 << 24. */ 328 static void BrotliStoreMetaBlockHeader( 329 size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix, 330 uint8_t* storage) { 331 size_t nibbles = 6; 332 /* ISLAST */ 333 BrotliWriteBits(1, 0, storage_ix, storage); 334 if (len <= (1U << 16)) { 335 nibbles = 4; 336 } else if (len <= (1U << 20)) { 337 nibbles = 5; 338 } 339 BrotliWriteBits(2, nibbles - 4, storage_ix, storage); 340 BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage); 341 /* ISUNCOMPRESSED */ 342 BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage); 343 } 344 345 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos, 346 uint8_t* array) { 347 while (n_bits > 0) { 348 size_t byte_pos = pos >> 3; 349 size_t n_unchanged_bits = pos & 7; 350 size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits); 351 size_t total_bits = n_unchanged_bits + n_changed_bits; 352 uint32_t mask = 353 (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u); 354 uint32_t unchanged_bits = array[byte_pos] & mask; 355 uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u); 356 array[byte_pos] = 357 (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits); 358 n_bits -= n_changed_bits; 359 bits >>= n_changed_bits; 360 pos += n_changed_bits; 361 } 362 } 363 364 static void RewindBitPosition(const size_t new_storage_ix, 365 size_t* storage_ix, uint8_t* storage) { 366 const size_t bitpos = new_storage_ix & 7; 367 const size_t mask = (1u << bitpos) - 1; 368 storage[new_storage_ix >> 3] &= (uint8_t)mask; 369 *storage_ix = new_storage_ix; 370 } 371 372 static BROTLI_BOOL ShouldMergeBlock( 373 const uint8_t* data, size_t len, const uint8_t* depths) { 374 size_t histo[256] = { 0 }; 375 static const size_t kSampleRate = 43; 376 size_t i; 377 for (i = 0; i < len; i += kSampleRate) { 378 ++histo[data[i]]; 379 } 380 { 381 const size_t total = (len + kSampleRate - 1) / kSampleRate; 382 double r = (FastLog2(total) + 0.5) * (double)total + 200; 383 for (i = 0; i < 256; ++i) { 384 r -= (double)histo[i] * (depths[i] + FastLog2(histo[i])); 385 } 386 return TO_BROTLI_BOOL(r >= 0.0); 387 } 388 } 389 390 /* Acceptable loss for uncompressible speedup is 2% */ 391 #define MIN_RATIO 980 392 393 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode( 394 const uint8_t* metablock_start, const uint8_t* next_emit, 395 const size_t insertlen, const size_t literal_ratio) { 396 const size_t compressed = (size_t)(next_emit - metablock_start); 397 if (compressed * 50 > insertlen) { 398 return BROTLI_FALSE; 399 } else { 400 return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO); 401 } 402 } 403 404 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end, 405 const size_t storage_ix_start, 406 size_t* storage_ix, uint8_t* storage) { 407 const size_t len = (size_t)(end - begin); 408 RewindBitPosition(storage_ix_start, storage_ix, storage); 409 BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage); 410 *storage_ix = (*storage_ix + 7u) & ~7u; 411 memcpy(&storage[*storage_ix >> 3], begin, len); 412 *storage_ix += len << 3; 413 storage[*storage_ix >> 3] = 0; 414 } 415 416 static uint32_t kCmdHistoSeed[128] = { 417 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 418 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 419 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 420 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 422 1, 1, 1, 1, 0, 0, 0, 0, 423 }; 424 425 static BROTLI_INLINE void BrotliCompressFragmentFastImpl( 426 MemoryManager* m, const uint8_t* input, size_t input_size, 427 BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128], 428 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, 429 size_t* storage_ix, uint8_t* storage) { 430 uint32_t cmd_histo[128]; 431 const uint8_t* ip_end; 432 433 /* "next_emit" is a pointer to the first byte that is not covered by a 434 previous copy. Bytes between "next_emit" and the start of the next copy or 435 the end of the input will be emitted as literal bytes. */ 436 const uint8_t* next_emit = input; 437 /* Save the start of the first block for position and distance computations. 438 */ 439 const uint8_t* base_ip = input; 440 441 static const size_t kFirstBlockSize = 3 << 15; 442 static const size_t kMergeBlockSize = 1 << 16; 443 444 const size_t kInputMarginBytes = BROTLI_WINDOW_GAP; 445 const size_t kMinMatchLen = 5; 446 447 const uint8_t* metablock_start = input; 448 size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize); 449 size_t total_block_size = block_size; 450 /* Save the bit position of the MLEN field of the meta-block header, so that 451 we can update it later if we decide to extend this meta-block. */ 452 size_t mlen_storage_ix = *storage_ix + 3; 453 454 uint8_t lit_depth[256]; 455 uint16_t lit_bits[256]; 456 457 size_t literal_ratio; 458 459 const uint8_t* ip; 460 int last_distance; 461 462 const size_t shift = 64u - table_bits; 463 464 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage); 465 /* No block splits, no contexts. */ 466 BrotliWriteBits(13, 0, storage_ix, storage); 467 468 literal_ratio = BuildAndStoreLiteralPrefixCode( 469 m, input, block_size, lit_depth, lit_bits, storage_ix, storage); 470 if (BROTLI_IS_OOM(m)) return; 471 472 { 473 /* Store the pre-compressed command and distance prefix codes. */ 474 size_t i; 475 for (i = 0; i + 7 < *cmd_code_numbits; i += 8) { 476 BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage); 477 } 478 } 479 BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3], 480 storage_ix, storage); 481 482 emit_commands: 483 /* Initialize the command and distance histograms. We will gather 484 statistics of command and distance codes during the processing 485 of this block and use it to update the command and distance 486 prefix codes for the next block. */ 487 memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed)); 488 489 /* "ip" is the input pointer. */ 490 ip = input; 491 last_distance = -1; 492 ip_end = input + block_size; 493 494 if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) { 495 /* For the last block, we need to keep a 16 bytes margin so that we can be 496 sure that all distances are at most window size - 16. 497 For all other blocks, we only need to keep a margin of 5 bytes so that 498 we don't go over the block size with a copy. */ 499 const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen, 500 input_size - kInputMarginBytes); 501 const uint8_t* ip_limit = input + len_limit; 502 503 uint32_t next_hash; 504 for (next_hash = Hash(++ip, shift); ; ) { 505 /* Step 1: Scan forward in the input looking for a 5-byte-long match. 506 If we get close to exhausting the input then goto emit_remainder. 507 508 Heuristic match skipping: If 32 bytes are scanned with no matches 509 found, start looking only at every other byte. If 32 more bytes are 510 scanned, look at every third byte, etc.. When a match is found, 511 immediately go back to looking at every byte. This is a small loss 512 (~5% performance, ~0.1% density) for compressible data due to more 513 bookkeeping, but for non-compressible data (such as JPEG) it's a huge 514 win since the compressor quickly "realizes" the data is incompressible 515 and doesn't bother looking for matches everywhere. 516 517 The "skip" variable keeps track of how many bytes there are since the 518 last match; dividing it by 32 (i.e. right-shifting by five) gives the 519 number of bytes to move ahead for each iteration. */ 520 uint32_t skip = 32; 521 522 const uint8_t* next_ip = ip; 523 const uint8_t* candidate; 524 BROTLI_DCHECK(next_emit < ip); 525 trawl: 526 do { 527 uint32_t hash = next_hash; 528 uint32_t bytes_between_hash_lookups = skip++ >> 5; 529 BROTLI_DCHECK(hash == Hash(next_ip, shift)); 530 ip = next_ip; 531 next_ip = ip + bytes_between_hash_lookups; 532 if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) { 533 goto emit_remainder; 534 } 535 next_hash = Hash(next_ip, shift); 536 candidate = ip - last_distance; 537 if (IsMatch(ip, candidate)) { 538 if (BROTLI_PREDICT_TRUE(candidate < ip)) { 539 table[hash] = (int)(ip - base_ip); 540 break; 541 } 542 } 543 candidate = base_ip + table[hash]; 544 BROTLI_DCHECK(candidate >= base_ip); 545 BROTLI_DCHECK(candidate < ip); 546 547 table[hash] = (int)(ip - base_ip); 548 } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate))); 549 550 /* Check copy distance. If candidate is not feasible, continue search. 551 Checking is done outside of hot loop to reduce overhead. */ 552 if (ip - candidate > MAX_DISTANCE) goto trawl; 553 554 /* Step 2: Emit the found match together with the literal bytes from 555 "next_emit" to the bit stream, and then see if we can find a next match 556 immediately afterwards. Repeat until we find no match for the input 557 without emitting some literal bytes. */ 558 559 { 560 /* We have a 5-byte match at ip, and we need to emit bytes in 561 [next_emit, ip). */ 562 const uint8_t* base = ip; 563 size_t matched = 5 + FindMatchLengthWithLimit( 564 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5); 565 int distance = (int)(base - candidate); /* > 0 */ 566 size_t insert = (size_t)(base - next_emit); 567 ip += matched; 568 BROTLI_DCHECK(0 == memcmp(base, candidate, matched)); 569 if (BROTLI_PREDICT_TRUE(insert < 6210)) { 570 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 571 storage_ix, storage); 572 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert, 573 literal_ratio)) { 574 EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3, 575 storage_ix, storage); 576 input_size -= (size_t)(base - input); 577 input = base; 578 next_emit = input; 579 goto next_block; 580 } else { 581 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 582 storage_ix, storage); 583 } 584 EmitLiterals(next_emit, insert, lit_depth, lit_bits, 585 storage_ix, storage); 586 if (distance == last_distance) { 587 BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage); 588 ++cmd_histo[64]; 589 } else { 590 EmitDistance((size_t)distance, cmd_depth, cmd_bits, 591 cmd_histo, storage_ix, storage); 592 last_distance = distance; 593 } 594 EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo, 595 storage_ix, storage); 596 597 next_emit = ip; 598 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) { 599 goto emit_remainder; 600 } 601 /* We could immediately start working at ip now, but to improve 602 compression we first update "table" with the hashes of some positions 603 within the last copy. */ 604 { 605 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3); 606 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); 607 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift); 608 table[prev_hash] = (int)(ip - base_ip - 3); 609 prev_hash = HashBytesAtOffset(input_bytes, 1, shift); 610 table[prev_hash] = (int)(ip - base_ip - 2); 611 prev_hash = HashBytesAtOffset(input_bytes, 2, shift); 612 table[prev_hash] = (int)(ip - base_ip - 1); 613 614 candidate = base_ip + table[cur_hash]; 615 table[cur_hash] = (int)(ip - base_ip); 616 } 617 } 618 619 while (IsMatch(ip, candidate)) { 620 /* We have a 5-byte match at ip, and no need to emit any literal bytes 621 prior to ip. */ 622 const uint8_t* base = ip; 623 size_t matched = 5 + FindMatchLengthWithLimit( 624 candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5); 625 if (ip - candidate > MAX_DISTANCE) break; 626 ip += matched; 627 last_distance = (int)(base - candidate); /* > 0 */ 628 BROTLI_DCHECK(0 == memcmp(base, candidate, matched)); 629 EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo, 630 storage_ix, storage); 631 EmitDistance((size_t)last_distance, cmd_depth, cmd_bits, 632 cmd_histo, storage_ix, storage); 633 634 next_emit = ip; 635 if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) { 636 goto emit_remainder; 637 } 638 /* We could immediately start working at ip now, but to improve 639 compression we first update "table" with the hashes of some positions 640 within the last copy. */ 641 { 642 uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3); 643 uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); 644 uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift); 645 table[prev_hash] = (int)(ip - base_ip - 3); 646 prev_hash = HashBytesAtOffset(input_bytes, 1, shift); 647 table[prev_hash] = (int)(ip - base_ip - 2); 648 prev_hash = HashBytesAtOffset(input_bytes, 2, shift); 649 table[prev_hash] = (int)(ip - base_ip - 1); 650 651 candidate = base_ip + table[cur_hash]; 652 table[cur_hash] = (int)(ip - base_ip); 653 } 654 } 655 656 next_hash = Hash(++ip, shift); 657 } 658 } 659 660 emit_remainder: 661 BROTLI_DCHECK(next_emit <= ip_end); 662 input += block_size; 663 input_size -= block_size; 664 block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize); 665 666 /* Decide if we want to continue this meta-block instead of emitting the 667 last insert-only command. */ 668 if (input_size > 0 && 669 total_block_size + block_size <= (1 << 20) && 670 ShouldMergeBlock(input, block_size, lit_depth)) { 671 BROTLI_DCHECK(total_block_size > (1 << 16)); 672 /* Update the size of the current meta-block and continue emitting commands. 673 We can do this because the current size and the new size both have 5 674 nibbles. */ 675 total_block_size += block_size; 676 UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage); 677 goto emit_commands; 678 } 679 680 /* Emit the remaining bytes as literals. */ 681 if (next_emit < ip_end) { 682 const size_t insert = (size_t)(ip_end - next_emit); 683 if (BROTLI_PREDICT_TRUE(insert < 6210)) { 684 EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 685 storage_ix, storage); 686 EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage); 687 } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert, 688 literal_ratio)) { 689 EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3, 690 storage_ix, storage); 691 } else { 692 EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo, 693 storage_ix, storage); 694 EmitLiterals(next_emit, insert, lit_depth, lit_bits, 695 storage_ix, storage); 696 } 697 } 698 next_emit = ip_end; 699 700 next_block: 701 /* If we have more data, write a new meta-block header and prefix codes and 702 then continue emitting commands. */ 703 if (input_size > 0) { 704 metablock_start = input; 705 block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize); 706 total_block_size = block_size; 707 /* Save the bit position of the MLEN field of the meta-block header, so that 708 we can update it later if we decide to extend this meta-block. */ 709 mlen_storage_ix = *storage_ix + 3; 710 BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage); 711 /* No block splits, no contexts. */ 712 BrotliWriteBits(13, 0, storage_ix, storage); 713 literal_ratio = BuildAndStoreLiteralPrefixCode( 714 m, input, block_size, lit_depth, lit_bits, storage_ix, storage); 715 if (BROTLI_IS_OOM(m)) return; 716 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits, 717 storage_ix, storage); 718 goto emit_commands; 719 } 720 721 if (!is_last) { 722 /* If this is not the last block, update the command and distance prefix 723 codes for the next block and store the compressed forms. */ 724 cmd_code[0] = 0; 725 *cmd_code_numbits = 0; 726 BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits, 727 cmd_code_numbits, cmd_code); 728 } 729 } 730 731 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15) 732 733 #define BAKE_METHOD_PARAM_(B) \ 734 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \ 735 MemoryManager* m, const uint8_t* input, size_t input_size, \ 736 BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128], \ 737 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, \ 738 size_t* storage_ix, uint8_t* storage) { \ 739 BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B, \ 740 cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \ 741 } 742 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_) 743 #undef BAKE_METHOD_PARAM_ 744 745 void BrotliCompressFragmentFast( 746 MemoryManager* m, const uint8_t* input, size_t input_size, 747 BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128], 748 uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, 749 size_t* storage_ix, uint8_t* storage) { 750 const size_t initial_storage_ix = *storage_ix; 751 const size_t table_bits = Log2FloorNonZero(table_size); 752 753 if (input_size == 0) { 754 BROTLI_DCHECK(is_last); 755 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */ 756 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */ 757 *storage_ix = (*storage_ix + 7u) & ~7u; 758 return; 759 } 760 761 switch (table_bits) { 762 #define CASE_(B) \ 763 case B: \ 764 BrotliCompressFragmentFastImpl ## B( \ 765 m, input, input_size, is_last, table, cmd_depth, cmd_bits, \ 766 cmd_code_numbits, cmd_code, storage_ix, storage); \ 767 break; 768 FOR_TABLE_BITS_(CASE_) 769 #undef CASE_ 770 default: BROTLI_DCHECK(0); break; 771 } 772 773 /* If output is larger than single uncompressed block, rewrite it. */ 774 if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) { 775 EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix, 776 storage_ix, storage); 777 } 778 779 if (is_last) { 780 BrotliWriteBits(1, 1, storage_ix, storage); /* islast */ 781 BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */ 782 *storage_ix = (*storage_ix + 7u) & ~7u; 783 } 784 } 785 786 #undef FOR_TABLE_BITS_ 787 788 #if defined(__cplusplus) || defined(c_plusplus) 789 } /* extern "C" */ 790 #endif 791