Home | History | Annotate | Download | only in enc
      1 /* Copyright 2015 Google Inc. All Rights Reserved.
      2 
      3    Distributed under MIT license.
      4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 /* Function for fast encoding of an input fragment, independently from the input
      8    history. This function uses one-pass processing: when we find a backward
      9    match, we immediately emit the corresponding command and literal codes to
     10    the bit stream.
     11 
     12    Adapted from the CompressFragment() function in
     13    https://github.com/google/snappy/blob/master/snappy.cc */
     14 
     15 #include "./compress_fragment.h"
     16 
     17 #include <string.h>  /* memcmp, memcpy, memset */
     18 
     19 #include "../common/constants.h"
     20 #include "../common/platform.h"
     21 #include <brotli/types.h>
     22 #include "./brotli_bit_stream.h"
     23 #include "./entropy_encode.h"
     24 #include "./fast_log.h"
     25 #include "./find_match_length.h"
     26 #include "./memory.h"
     27 #include "./write_bits.h"
     28 
     29 #if defined(__cplusplus) || defined(c_plusplus)
     30 extern "C" {
     31 #endif
     32 
     33 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
     34 
     35 /* kHashMul32 multiplier has these properties:
     36    * The multiplier must be odd. Otherwise we may lose the highest bit.
     37    * No long streaks of ones or zeros.
     38    * There is no effort to ensure that it is a prime, the oddity is enough
     39      for this use.
     40    * The number has been tuned heuristically against compression benchmarks. */
     41 static const uint32_t kHashMul32 = 0x1E35A7BD;
     42 
     43 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
     44   const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
     45   return (uint32_t)(h >> shift);
     46 }
     47 
     48 static BROTLI_INLINE uint32_t HashBytesAtOffset(
     49     uint64_t v, int offset, size_t shift) {
     50   BROTLI_DCHECK(offset >= 0);
     51   BROTLI_DCHECK(offset <= 3);
     52   {
     53     const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
     54     return (uint32_t)(h >> shift);
     55   }
     56 }
     57 
     58 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
     59   return TO_BROTLI_BOOL(
     60       BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
     61       p1[4] == p2[4]);
     62 }
     63 
     64 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
     65    of the "input" string and stores it into the bit stream.
     66    Note that the prefix code here is built from the pre-LZ77 input, therefore
     67    we can only approximate the statistics of the actual literal stream.
     68    Moreover, for long inputs we build a histogram from a sample of the input
     69    and thus have to assign a non-zero depth for each literal.
     70    Returns estimated compression ratio millibytes/char for encoding given input
     71    with generated code. */
     72 static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
     73                                              const uint8_t* input,
     74                                              const size_t input_size,
     75                                              uint8_t depths[256],
     76                                              uint16_t bits[256],
     77                                              size_t* storage_ix,
     78                                              uint8_t* storage) {
     79   uint32_t histogram[256] = { 0 };
     80   size_t histogram_total;
     81   size_t i;
     82   if (input_size < (1 << 15)) {
     83     for (i = 0; i < input_size; ++i) {
     84       ++histogram[input[i]];
     85     }
     86     histogram_total = input_size;
     87     for (i = 0; i < 256; ++i) {
     88       /* We weigh the first 11 samples with weight 3 to account for the
     89          balancing effect of the LZ77 phase on the histogram. */
     90       const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
     91       histogram[i] += adjust;
     92       histogram_total += adjust;
     93     }
     94   } else {
     95     static const size_t kSampleRate = 29;
     96     for (i = 0; i < input_size; i += kSampleRate) {
     97       ++histogram[input[i]];
     98     }
     99     histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
    100     for (i = 0; i < 256; ++i) {
    101       /* We add 1 to each population count to avoid 0 bit depths (since this is
    102          only a sample and we don't know if the symbol appears or not), and we
    103          weigh the first 11 samples with weight 3 to account for the balancing
    104          effect of the LZ77 phase on the histogram (more frequent symbols are
    105          more likely to be in backward references instead as literals). */
    106       const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
    107       histogram[i] += adjust;
    108       histogram_total += adjust;
    109     }
    110   }
    111   BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
    112                                      /* max_bits = */ 8,
    113                                      depths, bits, storage_ix, storage);
    114   if (BROTLI_IS_OOM(m)) return 0;
    115   {
    116     size_t literal_ratio = 0;
    117     for (i = 0; i < 256; ++i) {
    118       if (histogram[i]) literal_ratio += histogram[i] * depths[i];
    119     }
    120     /* Estimated encoding ratio, millibytes per symbol. */
    121     return (literal_ratio * 125) / histogram_total;
    122   }
    123 }
    124 
    125 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
    126    "bits" based on "histogram" and stores it into the bit stream. */
    127 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
    128     uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
    129     uint8_t* storage) {
    130   /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
    131   HuffmanTree tree[129];
    132   uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
    133   uint16_t cmd_bits[64];
    134 
    135   BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
    136   BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
    137   /* We have to jump through a few hoops here in order to compute
    138      the command bits because the symbols are in a different order than in
    139      the full alphabet. This looks complicated, but having the symbols
    140      in this order in the command bits saves a few branches in the Emit*
    141      functions. */
    142   memcpy(cmd_depth, depth, 24);
    143   memcpy(cmd_depth + 24, depth + 40, 8);
    144   memcpy(cmd_depth + 32, depth + 24, 8);
    145   memcpy(cmd_depth + 40, depth + 48, 8);
    146   memcpy(cmd_depth + 48, depth + 32, 8);
    147   memcpy(cmd_depth + 56, depth + 56, 8);
    148   BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
    149   memcpy(bits, cmd_bits, 48);
    150   memcpy(bits + 24, cmd_bits + 32, 16);
    151   memcpy(bits + 32, cmd_bits + 48, 16);
    152   memcpy(bits + 40, cmd_bits + 24, 16);
    153   memcpy(bits + 48, cmd_bits + 40, 16);
    154   memcpy(bits + 56, cmd_bits + 56, 16);
    155   BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
    156   {
    157     /* Create the bit length array for the full command alphabet. */
    158     size_t i;
    159     memset(cmd_depth, 0, 64);  /* only 64 first values were used */
    160     memcpy(cmd_depth, depth, 8);
    161     memcpy(cmd_depth + 64, depth + 8, 8);
    162     memcpy(cmd_depth + 128, depth + 16, 8);
    163     memcpy(cmd_depth + 192, depth + 24, 8);
    164     memcpy(cmd_depth + 384, depth + 32, 8);
    165     for (i = 0; i < 8; ++i) {
    166       cmd_depth[128 + 8 * i] = depth[40 + i];
    167       cmd_depth[256 + 8 * i] = depth[48 + i];
    168       cmd_depth[448 + 8 * i] = depth[56 + i];
    169     }
    170     BrotliStoreHuffmanTree(
    171         cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
    172   }
    173   BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
    174 }
    175 
    176 /* REQUIRES: insertlen < 6210 */
    177 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
    178                                         const uint8_t depth[128],
    179                                         const uint16_t bits[128],
    180                                         uint32_t histo[128],
    181                                         size_t* storage_ix,
    182                                         uint8_t* storage) {
    183   if (insertlen < 6) {
    184     const size_t code = insertlen + 40;
    185     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    186     ++histo[code];
    187   } else if (insertlen < 130) {
    188     const size_t tail = insertlen - 2;
    189     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
    190     const size_t prefix = tail >> nbits;
    191     const size_t inscode = (nbits << 1) + prefix + 42;
    192     BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
    193     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    194     ++histo[inscode];
    195   } else if (insertlen < 2114) {
    196     const size_t tail = insertlen - 66;
    197     const uint32_t nbits = Log2FloorNonZero(tail);
    198     const size_t code = nbits + 50;
    199     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    200     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    201     ++histo[code];
    202   } else {
    203     BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
    204     BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
    205     ++histo[61];
    206   }
    207 }
    208 
    209 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
    210                                             const uint8_t depth[128],
    211                                             const uint16_t bits[128],
    212                                             uint32_t histo[128],
    213                                             size_t* storage_ix,
    214                                             uint8_t* storage) {
    215   if (insertlen < 22594) {
    216     BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
    217     BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
    218     ++histo[62];
    219   } else {
    220     BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
    221     BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
    222     ++histo[63];
    223   }
    224 }
    225 
    226 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
    227                                       const uint8_t depth[128],
    228                                       const uint16_t bits[128],
    229                                       uint32_t histo[128],
    230                                       size_t* storage_ix,
    231                                       uint8_t* storage) {
    232   if (copylen < 10) {
    233     BrotliWriteBits(
    234         depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
    235     ++histo[copylen + 14];
    236   } else if (copylen < 134) {
    237     const size_t tail = copylen - 6;
    238     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
    239     const size_t prefix = tail >> nbits;
    240     const size_t code = (nbits << 1) + prefix + 20;
    241     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    242     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    243     ++histo[code];
    244   } else if (copylen < 2118) {
    245     const size_t tail = copylen - 70;
    246     const uint32_t nbits = Log2FloorNonZero(tail);
    247     const size_t code = nbits + 28;
    248     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    249     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    250     ++histo[code];
    251   } else {
    252     BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
    253     BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
    254     ++histo[39];
    255   }
    256 }
    257 
    258 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
    259                                                   const uint8_t depth[128],
    260                                                   const uint16_t bits[128],
    261                                                   uint32_t histo[128],
    262                                                   size_t* storage_ix,
    263                                                   uint8_t* storage) {
    264   if (copylen < 12) {
    265     BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
    266     ++histo[copylen - 4];
    267   } else if (copylen < 72) {
    268     const size_t tail = copylen - 8;
    269     const uint32_t nbits = Log2FloorNonZero(tail) - 1;
    270     const size_t prefix = tail >> nbits;
    271     const size_t code = (nbits << 1) + prefix + 4;
    272     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    273     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    274     ++histo[code];
    275   } else if (copylen < 136) {
    276     const size_t tail = copylen - 8;
    277     const size_t code = (tail >> 5) + 30;
    278     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    279     BrotliWriteBits(5, tail & 31, storage_ix, storage);
    280     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    281     ++histo[code];
    282     ++histo[64];
    283   } else if (copylen < 2120) {
    284     const size_t tail = copylen - 72;
    285     const uint32_t nbits = Log2FloorNonZero(tail);
    286     const size_t code = nbits + 28;
    287     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    288     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    289     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    290     ++histo[code];
    291     ++histo[64];
    292   } else {
    293     BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
    294     BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
    295     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    296     ++histo[39];
    297     ++histo[64];
    298   }
    299 }
    300 
    301 static BROTLI_INLINE void EmitDistance(size_t distance,
    302                                        const uint8_t depth[128],
    303                                        const uint16_t bits[128],
    304                                        uint32_t histo[128],
    305                                        size_t* storage_ix, uint8_t* storage) {
    306   const size_t d = distance + 3;
    307   const uint32_t nbits = Log2FloorNonZero(d) - 1u;
    308   const size_t prefix = (d >> nbits) & 1;
    309   const size_t offset = (2 + prefix) << nbits;
    310   const size_t distcode = 2 * (nbits - 1) + prefix + 80;
    311   BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
    312   BrotliWriteBits(nbits, d - offset, storage_ix, storage);
    313   ++histo[distcode];
    314 }
    315 
    316 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
    317                                        const uint8_t depth[256],
    318                                        const uint16_t bits[256],
    319                                        size_t* storage_ix, uint8_t* storage) {
    320   size_t j;
    321   for (j = 0; j < len; j++) {
    322     const uint8_t lit = input[j];
    323     BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
    324   }
    325 }
    326 
    327 /* REQUIRES: len <= 1 << 24. */
    328 static void BrotliStoreMetaBlockHeader(
    329     size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
    330     uint8_t* storage) {
    331   size_t nibbles = 6;
    332   /* ISLAST */
    333   BrotliWriteBits(1, 0, storage_ix, storage);
    334   if (len <= (1U << 16)) {
    335     nibbles = 4;
    336   } else if (len <= (1U << 20)) {
    337     nibbles = 5;
    338   }
    339   BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
    340   BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
    341   /* ISUNCOMPRESSED */
    342   BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
    343 }
    344 
    345 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
    346     uint8_t* array) {
    347   while (n_bits > 0) {
    348     size_t byte_pos = pos >> 3;
    349     size_t n_unchanged_bits = pos & 7;
    350     size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
    351     size_t total_bits = n_unchanged_bits + n_changed_bits;
    352     uint32_t mask =
    353         (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
    354     uint32_t unchanged_bits = array[byte_pos] & mask;
    355     uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
    356     array[byte_pos] =
    357         (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
    358     n_bits -= n_changed_bits;
    359     bits >>= n_changed_bits;
    360     pos += n_changed_bits;
    361   }
    362 }
    363 
    364 static void RewindBitPosition(const size_t new_storage_ix,
    365                               size_t* storage_ix, uint8_t* storage) {
    366   const size_t bitpos = new_storage_ix & 7;
    367   const size_t mask = (1u << bitpos) - 1;
    368   storage[new_storage_ix >> 3] &= (uint8_t)mask;
    369   *storage_ix = new_storage_ix;
    370 }
    371 
    372 static BROTLI_BOOL ShouldMergeBlock(
    373     const uint8_t* data, size_t len, const uint8_t* depths) {
    374   size_t histo[256] = { 0 };
    375   static const size_t kSampleRate = 43;
    376   size_t i;
    377   for (i = 0; i < len; i += kSampleRate) {
    378     ++histo[data[i]];
    379   }
    380   {
    381     const size_t total = (len + kSampleRate - 1) / kSampleRate;
    382     double r = (FastLog2(total) + 0.5) * (double)total + 200;
    383     for (i = 0; i < 256; ++i) {
    384       r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
    385     }
    386     return TO_BROTLI_BOOL(r >= 0.0);
    387   }
    388 }
    389 
    390 /* Acceptable loss for uncompressible speedup is 2% */
    391 #define MIN_RATIO 980
    392 
    393 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
    394     const uint8_t* metablock_start, const uint8_t* next_emit,
    395     const size_t insertlen, const size_t literal_ratio) {
    396   const size_t compressed = (size_t)(next_emit - metablock_start);
    397   if (compressed * 50 > insertlen) {
    398     return BROTLI_FALSE;
    399   } else {
    400     return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
    401   }
    402 }
    403 
    404 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
    405                                       const size_t storage_ix_start,
    406                                       size_t* storage_ix, uint8_t* storage) {
    407   const size_t len = (size_t)(end - begin);
    408   RewindBitPosition(storage_ix_start, storage_ix, storage);
    409   BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
    410   *storage_ix = (*storage_ix + 7u) & ~7u;
    411   memcpy(&storage[*storage_ix >> 3], begin, len);
    412   *storage_ix += len << 3;
    413   storage[*storage_ix >> 3] = 0;
    414 }
    415 
    416 static uint32_t kCmdHistoSeed[128] = {
    417   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
    418   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
    419   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
    420   0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    421   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    422   1, 1, 1, 1, 0, 0, 0, 0,
    423 };
    424 
    425 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
    426     MemoryManager* m, const uint8_t* input, size_t input_size,
    427     BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
    428     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
    429     size_t* storage_ix, uint8_t* storage) {
    430   uint32_t cmd_histo[128];
    431   const uint8_t* ip_end;
    432 
    433   /* "next_emit" is a pointer to the first byte that is not covered by a
    434      previous copy. Bytes between "next_emit" and the start of the next copy or
    435      the end of the input will be emitted as literal bytes. */
    436   const uint8_t* next_emit = input;
    437   /* Save the start of the first block for position and distance computations.
    438   */
    439   const uint8_t* base_ip = input;
    440 
    441   static const size_t kFirstBlockSize = 3 << 15;
    442   static const size_t kMergeBlockSize = 1 << 16;
    443 
    444   const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
    445   const size_t kMinMatchLen = 5;
    446 
    447   const uint8_t* metablock_start = input;
    448   size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
    449   size_t total_block_size = block_size;
    450   /* Save the bit position of the MLEN field of the meta-block header, so that
    451      we can update it later if we decide to extend this meta-block. */
    452   size_t mlen_storage_ix = *storage_ix + 3;
    453 
    454   uint8_t lit_depth[256];
    455   uint16_t lit_bits[256];
    456 
    457   size_t literal_ratio;
    458 
    459   const uint8_t* ip;
    460   int last_distance;
    461 
    462   const size_t shift = 64u - table_bits;
    463 
    464   BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    465   /* No block splits, no contexts. */
    466   BrotliWriteBits(13, 0, storage_ix, storage);
    467 
    468   literal_ratio = BuildAndStoreLiteralPrefixCode(
    469       m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
    470   if (BROTLI_IS_OOM(m)) return;
    471 
    472   {
    473     /* Store the pre-compressed command and distance prefix codes. */
    474     size_t i;
    475     for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
    476       BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
    477     }
    478   }
    479   BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
    480                   storage_ix, storage);
    481 
    482  emit_commands:
    483   /* Initialize the command and distance histograms. We will gather
    484      statistics of command and distance codes during the processing
    485      of this block and use it to update the command and distance
    486      prefix codes for the next block. */
    487   memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
    488 
    489   /* "ip" is the input pointer. */
    490   ip = input;
    491   last_distance = -1;
    492   ip_end = input + block_size;
    493 
    494   if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
    495     /* For the last block, we need to keep a 16 bytes margin so that we can be
    496        sure that all distances are at most window size - 16.
    497        For all other blocks, we only need to keep a margin of 5 bytes so that
    498        we don't go over the block size with a copy. */
    499     const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
    500                                         input_size - kInputMarginBytes);
    501     const uint8_t* ip_limit = input + len_limit;
    502 
    503     uint32_t next_hash;
    504     for (next_hash = Hash(++ip, shift); ; ) {
    505       /* Step 1: Scan forward in the input looking for a 5-byte-long match.
    506          If we get close to exhausting the input then goto emit_remainder.
    507 
    508          Heuristic match skipping: If 32 bytes are scanned with no matches
    509          found, start looking only at every other byte. If 32 more bytes are
    510          scanned, look at every third byte, etc.. When a match is found,
    511          immediately go back to looking at every byte. This is a small loss
    512          (~5% performance, ~0.1% density) for compressible data due to more
    513          bookkeeping, but for non-compressible data (such as JPEG) it's a huge
    514          win since the compressor quickly "realizes" the data is incompressible
    515          and doesn't bother looking for matches everywhere.
    516 
    517          The "skip" variable keeps track of how many bytes there are since the
    518          last match; dividing it by 32 (i.e. right-shifting by five) gives the
    519          number of bytes to move ahead for each iteration. */
    520       uint32_t skip = 32;
    521 
    522       const uint8_t* next_ip = ip;
    523       const uint8_t* candidate;
    524       BROTLI_DCHECK(next_emit < ip);
    525 trawl:
    526       do {
    527         uint32_t hash = next_hash;
    528         uint32_t bytes_between_hash_lookups = skip++ >> 5;
    529         BROTLI_DCHECK(hash == Hash(next_ip, shift));
    530         ip = next_ip;
    531         next_ip = ip + bytes_between_hash_lookups;
    532         if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
    533           goto emit_remainder;
    534         }
    535         next_hash = Hash(next_ip, shift);
    536         candidate = ip - last_distance;
    537         if (IsMatch(ip, candidate)) {
    538           if (BROTLI_PREDICT_TRUE(candidate < ip)) {
    539             table[hash] = (int)(ip - base_ip);
    540             break;
    541           }
    542         }
    543         candidate = base_ip + table[hash];
    544         BROTLI_DCHECK(candidate >= base_ip);
    545         BROTLI_DCHECK(candidate < ip);
    546 
    547         table[hash] = (int)(ip - base_ip);
    548       } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
    549 
    550       /* Check copy distance. If candidate is not feasible, continue search.
    551          Checking is done outside of hot loop to reduce overhead. */
    552       if (ip - candidate > MAX_DISTANCE) goto trawl;
    553 
    554       /* Step 2: Emit the found match together with the literal bytes from
    555          "next_emit" to the bit stream, and then see if we can find a next match
    556          immediately afterwards. Repeat until we find no match for the input
    557          without emitting some literal bytes. */
    558 
    559       {
    560         /* We have a 5-byte match at ip, and we need to emit bytes in
    561            [next_emit, ip). */
    562         const uint8_t* base = ip;
    563         size_t matched = 5 + FindMatchLengthWithLimit(
    564             candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
    565         int distance = (int)(base - candidate);  /* > 0 */
    566         size_t insert = (size_t)(base - next_emit);
    567         ip += matched;
    568         BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
    569         if (BROTLI_PREDICT_TRUE(insert < 6210)) {
    570           EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    571                         storage_ix, storage);
    572         } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
    573                                              literal_ratio)) {
    574           EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
    575                                     storage_ix, storage);
    576           input_size -= (size_t)(base - input);
    577           input = base;
    578           next_emit = input;
    579           goto next_block;
    580         } else {
    581           EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    582                             storage_ix, storage);
    583         }
    584         EmitLiterals(next_emit, insert, lit_depth, lit_bits,
    585                      storage_ix, storage);
    586         if (distance == last_distance) {
    587           BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
    588           ++cmd_histo[64];
    589         } else {
    590           EmitDistance((size_t)distance, cmd_depth, cmd_bits,
    591                        cmd_histo, storage_ix, storage);
    592           last_distance = distance;
    593         }
    594         EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
    595                                 storage_ix, storage);
    596 
    597         next_emit = ip;
    598         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    599           goto emit_remainder;
    600         }
    601         /* We could immediately start working at ip now, but to improve
    602            compression we first update "table" with the hashes of some positions
    603            within the last copy. */
    604         {
    605           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
    606           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    607           uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
    608           table[prev_hash] = (int)(ip - base_ip - 3);
    609           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    610           table[prev_hash] = (int)(ip - base_ip - 2);
    611           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
    612           table[prev_hash] = (int)(ip - base_ip - 1);
    613 
    614           candidate = base_ip + table[cur_hash];
    615           table[cur_hash] = (int)(ip - base_ip);
    616         }
    617       }
    618 
    619       while (IsMatch(ip, candidate)) {
    620         /* We have a 5-byte match at ip, and no need to emit any literal bytes
    621            prior to ip. */
    622         const uint8_t* base = ip;
    623         size_t matched = 5 + FindMatchLengthWithLimit(
    624             candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
    625         if (ip - candidate > MAX_DISTANCE) break;
    626         ip += matched;
    627         last_distance = (int)(base - candidate);  /* > 0 */
    628         BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
    629         EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
    630                     storage_ix, storage);
    631         EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
    632                      cmd_histo, storage_ix, storage);
    633 
    634         next_emit = ip;
    635         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    636           goto emit_remainder;
    637         }
    638         /* We could immediately start working at ip now, but to improve
    639            compression we first update "table" with the hashes of some positions
    640            within the last copy. */
    641         {
    642           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
    643           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    644           uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
    645           table[prev_hash] = (int)(ip - base_ip - 3);
    646           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    647           table[prev_hash] = (int)(ip - base_ip - 2);
    648           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
    649           table[prev_hash] = (int)(ip - base_ip - 1);
    650 
    651           candidate = base_ip + table[cur_hash];
    652           table[cur_hash] = (int)(ip - base_ip);
    653         }
    654       }
    655 
    656       next_hash = Hash(++ip, shift);
    657     }
    658   }
    659 
    660  emit_remainder:
    661   BROTLI_DCHECK(next_emit <= ip_end);
    662   input += block_size;
    663   input_size -= block_size;
    664   block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
    665 
    666   /* Decide if we want to continue this meta-block instead of emitting the
    667      last insert-only command. */
    668   if (input_size > 0 &&
    669       total_block_size + block_size <= (1 << 20) &&
    670       ShouldMergeBlock(input, block_size, lit_depth)) {
    671     BROTLI_DCHECK(total_block_size > (1 << 16));
    672     /* Update the size of the current meta-block and continue emitting commands.
    673        We can do this because the current size and the new size both have 5
    674        nibbles. */
    675     total_block_size += block_size;
    676     UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
    677     goto emit_commands;
    678   }
    679 
    680   /* Emit the remaining bytes as literals. */
    681   if (next_emit < ip_end) {
    682     const size_t insert = (size_t)(ip_end - next_emit);
    683     if (BROTLI_PREDICT_TRUE(insert < 6210)) {
    684       EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    685                     storage_ix, storage);
    686       EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
    687     } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
    688                                          literal_ratio)) {
    689       EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
    690                                 storage_ix, storage);
    691     } else {
    692       EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    693                         storage_ix, storage);
    694       EmitLiterals(next_emit, insert, lit_depth, lit_bits,
    695                    storage_ix, storage);
    696     }
    697   }
    698   next_emit = ip_end;
    699 
    700 next_block:
    701   /* If we have more data, write a new meta-block header and prefix codes and
    702      then continue emitting commands. */
    703   if (input_size > 0) {
    704     metablock_start = input;
    705     block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
    706     total_block_size = block_size;
    707     /* Save the bit position of the MLEN field of the meta-block header, so that
    708        we can update it later if we decide to extend this meta-block. */
    709     mlen_storage_ix = *storage_ix + 3;
    710     BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    711     /* No block splits, no contexts. */
    712     BrotliWriteBits(13, 0, storage_ix, storage);
    713     literal_ratio = BuildAndStoreLiteralPrefixCode(
    714         m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
    715     if (BROTLI_IS_OOM(m)) return;
    716     BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
    717                                    storage_ix, storage);
    718     goto emit_commands;
    719   }
    720 
    721   if (!is_last) {
    722     /* If this is not the last block, update the command and distance prefix
    723        codes for the next block and store the compressed forms. */
    724     cmd_code[0] = 0;
    725     *cmd_code_numbits = 0;
    726     BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
    727                                    cmd_code_numbits, cmd_code);
    728   }
    729 }
    730 
    731 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
    732 
    733 #define BAKE_METHOD_PARAM_(B) \
    734 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
    735     MemoryManager* m, const uint8_t* input, size_t input_size,               \
    736     BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128],                 \
    737     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,     \
    738     size_t* storage_ix, uint8_t* storage) {                                  \
    739   BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B,    \
    740       cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
    741 }
    742 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
    743 #undef BAKE_METHOD_PARAM_
    744 
    745 void BrotliCompressFragmentFast(
    746     MemoryManager* m, const uint8_t* input, size_t input_size,
    747     BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
    748     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
    749     size_t* storage_ix, uint8_t* storage) {
    750   const size_t initial_storage_ix = *storage_ix;
    751   const size_t table_bits = Log2FloorNonZero(table_size);
    752 
    753   if (input_size == 0) {
    754     BROTLI_DCHECK(is_last);
    755     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
    756     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    757     *storage_ix = (*storage_ix + 7u) & ~7u;
    758     return;
    759   }
    760 
    761   switch (table_bits) {
    762 #define CASE_(B)                                                     \
    763     case B:                                                          \
    764       BrotliCompressFragmentFastImpl ## B(                           \
    765           m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
    766           cmd_code_numbits, cmd_code, storage_ix, storage);          \
    767       break;
    768     FOR_TABLE_BITS_(CASE_)
    769 #undef CASE_
    770     default: BROTLI_DCHECK(0); break;
    771   }
    772 
    773   /* If output is larger than single uncompressed block, rewrite it. */
    774   if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
    775     EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
    776                               storage_ix, storage);
    777   }
    778 
    779   if (is_last) {
    780     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
    781     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    782     *storage_ix = (*storage_ix + 7u) & ~7u;
    783   }
    784 }
    785 
    786 #undef FOR_TABLE_BITS_
    787 
    788 #if defined(__cplusplus) || defined(c_plusplus)
    789 }  /* extern "C" */
    790 #endif
    791