Home | History | Annotate | Download | only in enc
      1 /* Copyright 2015 Google Inc. All Rights Reserved.
      2 
      3    Distributed under MIT license.
      4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 /* Function for fast encoding of an input fragment, independently from the input
      8    history. This function uses one-pass processing: when we find a backward
      9    match, we immediately emit the corresponding command and literal codes to
     10    the bit stream.
     11 
     12    Adapted from the CompressFragment() function in
     13    https://github.com/google/snappy/blob/master/snappy.cc */
     14 
     15 #include "./compress_fragment.h"
     16 
     17 #include <string.h>  /* memcmp, memcpy, memset */
     18 
     19 #include "../common/constants.h"
     20 #include <brotli/types.h>
     21 #include "./brotli_bit_stream.h"
     22 #include "./entropy_encode.h"
     23 #include "./fast_log.h"
     24 #include "./find_match_length.h"
     25 #include "./memory.h"
     26 #include "./port.h"
     27 #include "./write_bits.h"
     28 
     29 
     30 #if defined(__cplusplus) || defined(c_plusplus)
     31 extern "C" {
     32 #endif
     33 
     34 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
     35 
     36 /* kHashMul32 multiplier has these properties:
     37    * The multiplier must be odd. Otherwise we may lose the highest bit.
     38    * No long streaks of ones or zeros.
     39    * There is no effort to ensure that it is a prime, the oddity is enough
     40      for this use.
     41    * The number has been tuned heuristically against compression benchmarks. */
     42 static const uint32_t kHashMul32 = 0x1e35a7bd;
     43 
     44 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
     45   const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
     46   return (uint32_t)(h >> shift);
     47 }
     48 
     49 static BROTLI_INLINE uint32_t HashBytesAtOffset(
     50     uint64_t v, int offset, size_t shift) {
     51   assert(offset >= 0);
     52   assert(offset <= 3);
     53   {
     54     const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
     55     return (uint32_t)(h >> shift);
     56   }
     57 }
     58 
     59 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
     60   return TO_BROTLI_BOOL(
     61       BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
     62       p1[4] == p2[4]);
     63 }
     64 
     65 /* Builds a literal prefix code into "depths" and "bits" based on the statistics
     66    of the "input" string and stores it into the bit stream.
     67    Note that the prefix code here is built from the pre-LZ77 input, therefore
     68    we can only approximate the statistics of the actual literal stream.
     69    Moreover, for long inputs we build a histogram from a sample of the input
     70    and thus have to assign a non-zero depth for each literal.
     71    Returns estimated compression ratio millibytes/char for encoding given input
     72    with generated code. */
     73 static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
     74                                              const uint8_t* input,
     75                                              const size_t input_size,
     76                                              uint8_t depths[256],
     77                                              uint16_t bits[256],
     78                                              size_t* storage_ix,
     79                                              uint8_t* storage) {
     80   uint32_t histogram[256] = { 0 };
     81   size_t histogram_total;
     82   size_t i;
     83   if (input_size < (1 << 15)) {
     84     for (i = 0; i < input_size; ++i) {
     85       ++histogram[input[i]];
     86     }
     87     histogram_total = input_size;
     88     for (i = 0; i < 256; ++i) {
     89       /* We weigh the first 11 samples with weight 3 to account for the
     90          balancing effect of the LZ77 phase on the histogram. */
     91       const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
     92       histogram[i] += adjust;
     93       histogram_total += adjust;
     94     }
     95   } else {
     96     static const size_t kSampleRate = 29;
     97     for (i = 0; i < input_size; i += kSampleRate) {
     98       ++histogram[input[i]];
     99     }
    100     histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
    101     for (i = 0; i < 256; ++i) {
    102       /* We add 1 to each population count to avoid 0 bit depths (since this is
    103          only a sample and we don't know if the symbol appears or not), and we
    104          weigh the first 11 samples with weight 3 to account for the balancing
    105          effect of the LZ77 phase on the histogram (more frequent symbols are
    106          more likely to be in backward references instead as literals). */
    107       const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
    108       histogram[i] += adjust;
    109       histogram_total += adjust;
    110     }
    111   }
    112   BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
    113                                      /* max_bits = */ 8,
    114                                      depths, bits, storage_ix, storage);
    115   if (BROTLI_IS_OOM(m)) return 0;
    116   {
    117     size_t literal_ratio = 0;
    118     for (i = 0; i < 256; ++i) {
    119       if (histogram[i]) literal_ratio += histogram[i] * depths[i];
    120     }
    121     /* Estimated encoding ratio, millibytes per symbol. */
    122     return (literal_ratio * 125) / histogram_total;
    123   }
    124 }
    125 
    126 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
    127    "bits" based on "histogram" and stores it into the bit stream. */
    128 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
    129     uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
    130     uint8_t* storage) {
    131   /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
    132   HuffmanTree tree[129];
    133   uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
    134   uint16_t cmd_bits[64];
    135 
    136   BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
    137   BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
    138   /* We have to jump through a few hoops here in order to compute
    139      the command bits because the symbols are in a different order than in
    140      the full alphabet. This looks complicated, but having the symbols
    141      in this order in the command bits saves a few branches in the Emit*
    142      functions. */
    143   memcpy(cmd_depth, depth, 24);
    144   memcpy(cmd_depth + 24, depth + 40, 8);
    145   memcpy(cmd_depth + 32, depth + 24, 8);
    146   memcpy(cmd_depth + 40, depth + 48, 8);
    147   memcpy(cmd_depth + 48, depth + 32, 8);
    148   memcpy(cmd_depth + 56, depth + 56, 8);
    149   BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
    150   memcpy(bits, cmd_bits, 48);
    151   memcpy(bits + 24, cmd_bits + 32, 16);
    152   memcpy(bits + 32, cmd_bits + 48, 16);
    153   memcpy(bits + 40, cmd_bits + 24, 16);
    154   memcpy(bits + 48, cmd_bits + 40, 16);
    155   memcpy(bits + 56, cmd_bits + 56, 16);
    156   BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
    157   {
    158     /* Create the bit length array for the full command alphabet. */
    159     size_t i;
    160     memset(cmd_depth, 0, 64);  /* only 64 first values were used */
    161     memcpy(cmd_depth, depth, 8);
    162     memcpy(cmd_depth + 64, depth + 8, 8);
    163     memcpy(cmd_depth + 128, depth + 16, 8);
    164     memcpy(cmd_depth + 192, depth + 24, 8);
    165     memcpy(cmd_depth + 384, depth + 32, 8);
    166     for (i = 0; i < 8; ++i) {
    167       cmd_depth[128 + 8 * i] = depth[40 + i];
    168       cmd_depth[256 + 8 * i] = depth[48 + i];
    169       cmd_depth[448 + 8 * i] = depth[56 + i];
    170     }
    171     BrotliStoreHuffmanTree(
    172         cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
    173   }
    174   BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
    175 }
    176 
    177 /* REQUIRES: insertlen < 6210 */
    178 static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
    179                                         const uint8_t depth[128],
    180                                         const uint16_t bits[128],
    181                                         uint32_t histo[128],
    182                                         size_t* storage_ix,
    183                                         uint8_t* storage) {
    184   if (insertlen < 6) {
    185     const size_t code = insertlen + 40;
    186     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    187     ++histo[code];
    188   } else if (insertlen < 130) {
    189     const size_t tail = insertlen - 2;
    190     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
    191     const size_t prefix = tail >> nbits;
    192     const size_t inscode = (nbits << 1) + prefix + 42;
    193     BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
    194     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    195     ++histo[inscode];
    196   } else if (insertlen < 2114) {
    197     const size_t tail = insertlen - 66;
    198     const uint32_t nbits = Log2FloorNonZero(tail);
    199     const size_t code = nbits + 50;
    200     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    201     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    202     ++histo[code];
    203   } else {
    204     BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
    205     BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
    206     ++histo[21];
    207   }
    208 }
    209 
    210 static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
    211                                             const uint8_t depth[128],
    212                                             const uint16_t bits[128],
    213                                             uint32_t histo[128],
    214                                             size_t* storage_ix,
    215                                             uint8_t* storage) {
    216   if (insertlen < 22594) {
    217     BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
    218     BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
    219     ++histo[22];
    220   } else {
    221     BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
    222     BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
    223     ++histo[23];
    224   }
    225 }
    226 
    227 static BROTLI_INLINE void EmitCopyLen(size_t copylen,
    228                                       const uint8_t depth[128],
    229                                       const uint16_t bits[128],
    230                                       uint32_t histo[128],
    231                                       size_t* storage_ix,
    232                                       uint8_t* storage) {
    233   if (copylen < 10) {
    234     BrotliWriteBits(
    235         depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
    236     ++histo[copylen + 14];
    237   } else if (copylen < 134) {
    238     const size_t tail = copylen - 6;
    239     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
    240     const size_t prefix = tail >> nbits;
    241     const size_t code = (nbits << 1) + prefix + 20;
    242     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    243     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    244     ++histo[code];
    245   } else if (copylen < 2118) {
    246     const size_t tail = copylen - 70;
    247     const uint32_t nbits = Log2FloorNonZero(tail);
    248     const size_t code = nbits + 28;
    249     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    250     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    251     ++histo[code];
    252   } else {
    253     BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
    254     BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
    255     ++histo[47];
    256   }
    257 }
    258 
    259 static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
    260                                                   const uint8_t depth[128],
    261                                                   const uint16_t bits[128],
    262                                                   uint32_t histo[128],
    263                                                   size_t* storage_ix,
    264                                                   uint8_t* storage) {
    265   if (copylen < 12) {
    266     BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
    267     ++histo[copylen - 4];
    268   } else if (copylen < 72) {
    269     const size_t tail = copylen - 8;
    270     const uint32_t nbits = Log2FloorNonZero(tail) - 1;
    271     const size_t prefix = tail >> nbits;
    272     const size_t code = (nbits << 1) + prefix + 4;
    273     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    274     BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    275     ++histo[code];
    276   } else if (copylen < 136) {
    277     const size_t tail = copylen - 8;
    278     const size_t code = (tail >> 5) + 30;
    279     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    280     BrotliWriteBits(5, tail & 31, storage_ix, storage);
    281     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    282     ++histo[code];
    283     ++histo[64];
    284   } else if (copylen < 2120) {
    285     const size_t tail = copylen - 72;
    286     const uint32_t nbits = Log2FloorNonZero(tail);
    287     const size_t code = nbits + 28;
    288     BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    289     BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
    290     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    291     ++histo[code];
    292     ++histo[64];
    293   } else {
    294     BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
    295     BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
    296     BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    297     ++histo[47];
    298     ++histo[64];
    299   }
    300 }
    301 
    302 static BROTLI_INLINE void EmitDistance(size_t distance,
    303                                        const uint8_t depth[128],
    304                                        const uint16_t bits[128],
    305                                        uint32_t histo[128],
    306                                        size_t* storage_ix, uint8_t* storage) {
    307   const size_t d = distance + 3;
    308   const uint32_t nbits = Log2FloorNonZero(d) - 1u;
    309   const size_t prefix = (d >> nbits) & 1;
    310   const size_t offset = (2 + prefix) << nbits;
    311   const size_t distcode = 2 * (nbits - 1) + prefix + 80;
    312   BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
    313   BrotliWriteBits(nbits, d - offset, storage_ix, storage);
    314   ++histo[distcode];
    315 }
    316 
    317 static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
    318                                        const uint8_t depth[256],
    319                                        const uint16_t bits[256],
    320                                        size_t* storage_ix, uint8_t* storage) {
    321   size_t j;
    322   for (j = 0; j < len; j++) {
    323     const uint8_t lit = input[j];
    324     BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
    325   }
    326 }
    327 
    328 /* REQUIRES: len <= 1 << 24. */
    329 static void BrotliStoreMetaBlockHeader(
    330     size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
    331     uint8_t* storage) {
    332   size_t nibbles = 6;
    333   /* ISLAST */
    334   BrotliWriteBits(1, 0, storage_ix, storage);
    335   if (len <= (1U << 16)) {
    336     nibbles = 4;
    337   } else if (len <= (1U << 20)) {
    338     nibbles = 5;
    339   }
    340   BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
    341   BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
    342   /* ISUNCOMPRESSED */
    343   BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
    344 }
    345 
    346 static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
    347     uint8_t *array) {
    348   while (n_bits > 0) {
    349     size_t byte_pos = pos >> 3;
    350     size_t n_unchanged_bits = pos & 7;
    351     size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
    352     size_t total_bits = n_unchanged_bits + n_changed_bits;
    353     uint32_t mask =
    354         (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
    355     uint32_t unchanged_bits = array[byte_pos] & mask;
    356     uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
    357     array[byte_pos] =
    358         (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
    359     n_bits -= n_changed_bits;
    360     bits >>= n_changed_bits;
    361     pos += n_changed_bits;
    362   }
    363 }
    364 
    365 static void RewindBitPosition(const size_t new_storage_ix,
    366                               size_t* storage_ix, uint8_t* storage) {
    367   const size_t bitpos = new_storage_ix & 7;
    368   const size_t mask = (1u << bitpos) - 1;
    369   storage[new_storage_ix >> 3] &= (uint8_t)mask;
    370   *storage_ix = new_storage_ix;
    371 }
    372 
    373 static BROTLI_BOOL ShouldMergeBlock(
    374     const uint8_t* data, size_t len, const uint8_t* depths) {
    375   size_t histo[256] = { 0 };
    376   static const size_t kSampleRate = 43;
    377   size_t i;
    378   for (i = 0; i < len; i += kSampleRate) {
    379     ++histo[data[i]];
    380   }
    381   {
    382     const size_t total = (len + kSampleRate - 1) / kSampleRate;
    383     double r = (FastLog2(total) + 0.5) * (double)total + 200;
    384     for (i = 0; i < 256; ++i) {
    385       r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
    386     }
    387     return TO_BROTLI_BOOL(r >= 0.0);
    388   }
    389 }
    390 
    391 /* Acceptable loss for uncompressible speedup is 2% */
    392 #define MIN_RATIO 980
    393 
    394 static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
    395     const uint8_t* metablock_start, const uint8_t* next_emit,
    396     const size_t insertlen, const size_t literal_ratio) {
    397   const size_t compressed = (size_t)(next_emit - metablock_start);
    398   if (compressed * 50 > insertlen) {
    399     return BROTLI_FALSE;
    400   } else {
    401     return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
    402   }
    403 }
    404 
    405 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
    406                                       const size_t storage_ix_start,
    407                                       size_t* storage_ix, uint8_t* storage) {
    408   const size_t len = (size_t)(end - begin);
    409   RewindBitPosition(storage_ix_start, storage_ix, storage);
    410   BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
    411   *storage_ix = (*storage_ix + 7u) & ~7u;
    412   memcpy(&storage[*storage_ix >> 3], begin, len);
    413   *storage_ix += len << 3;
    414   storage[*storage_ix >> 3] = 0;
    415 }
    416 
    417 static uint32_t kCmdHistoSeed[128] = {
    418   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
    419   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
    420   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
    421   0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    422   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    423   1, 1, 1, 1, 0, 0, 0, 0,
    424 };
    425 
    426 static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
    427     MemoryManager* m, const uint8_t* input, size_t input_size,
    428     BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
    429     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
    430     size_t* storage_ix, uint8_t* storage) {
    431   uint32_t cmd_histo[128];
    432   const uint8_t* ip_end;
    433 
    434   /* "next_emit" is a pointer to the first byte that is not covered by a
    435      previous copy. Bytes between "next_emit" and the start of the next copy or
    436      the end of the input will be emitted as literal bytes. */
    437   const uint8_t* next_emit = input;
    438   /* Save the start of the first block for position and distance computations.
    439   */
    440   const uint8_t* base_ip = input;
    441 
    442   static const size_t kFirstBlockSize = 3 << 15;
    443   static const size_t kMergeBlockSize = 1 << 16;
    444 
    445   const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
    446   const size_t kMinMatchLen = 5;
    447 
    448   const uint8_t* metablock_start = input;
    449   size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
    450   size_t total_block_size = block_size;
    451   /* Save the bit position of the MLEN field of the meta-block header, so that
    452      we can update it later if we decide to extend this meta-block. */
    453   size_t mlen_storage_ix = *storage_ix + 3;
    454 
    455   uint8_t lit_depth[256];
    456   uint16_t lit_bits[256];
    457 
    458   size_t literal_ratio;
    459 
    460   const uint8_t* ip;
    461   int last_distance;
    462 
    463   const size_t shift = 64u - table_bits;
    464 
    465   BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    466   /* No block splits, no contexts. */
    467   BrotliWriteBits(13, 0, storage_ix, storage);
    468 
    469   literal_ratio = BuildAndStoreLiteralPrefixCode(
    470       m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
    471   if (BROTLI_IS_OOM(m)) return;
    472 
    473   {
    474     /* Store the pre-compressed command and distance prefix codes. */
    475     size_t i;
    476     for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
    477       BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
    478     }
    479   }
    480   BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
    481                   storage_ix, storage);
    482 
    483  emit_commands:
    484   /* Initialize the command and distance histograms. We will gather
    485      statistics of command and distance codes during the processing
    486      of this block and use it to update the command and distance
    487      prefix codes for the next block. */
    488   memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
    489 
    490   /* "ip" is the input pointer. */
    491   ip = input;
    492   last_distance = -1;
    493   ip_end = input + block_size;
    494 
    495   if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
    496     /* For the last block, we need to keep a 16 bytes margin so that we can be
    497        sure that all distances are at most window size - 16.
    498        For all other blocks, we only need to keep a margin of 5 bytes so that
    499        we don't go over the block size with a copy. */
    500     const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
    501                                         input_size - kInputMarginBytes);
    502     const uint8_t* ip_limit = input + len_limit;
    503 
    504     uint32_t next_hash;
    505     for (next_hash = Hash(++ip, shift); ; ) {
    506       /* Step 1: Scan forward in the input looking for a 5-byte-long match.
    507          If we get close to exhausting the input then goto emit_remainder.
    508 
    509          Heuristic match skipping: If 32 bytes are scanned with no matches
    510          found, start looking only at every other byte. If 32 more bytes are
    511          scanned, look at every third byte, etc.. When a match is found,
    512          immediately go back to looking at every byte. This is a small loss
    513          (~5% performance, ~0.1% density) for compressible data due to more
    514          bookkeeping, but for non-compressible data (such as JPEG) it's a huge
    515          win since the compressor quickly "realizes" the data is incompressible
    516          and doesn't bother looking for matches everywhere.
    517 
    518          The "skip" variable keeps track of how many bytes there are since the
    519          last match; dividing it by 32 (i.e. right-shifting by five) gives the
    520          number of bytes to move ahead for each iteration. */
    521       uint32_t skip = 32;
    522 
    523       const uint8_t* next_ip = ip;
    524       const uint8_t* candidate;
    525       assert(next_emit < ip);
    526 trawl:
    527       do {
    528         uint32_t hash = next_hash;
    529         uint32_t bytes_between_hash_lookups = skip++ >> 5;
    530         assert(hash == Hash(next_ip, shift));
    531         ip = next_ip;
    532         next_ip = ip + bytes_between_hash_lookups;
    533         if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
    534           goto emit_remainder;
    535         }
    536         next_hash = Hash(next_ip, shift);
    537         candidate = ip - last_distance;
    538         if (IsMatch(ip, candidate)) {
    539           if (BROTLI_PREDICT_TRUE(candidate < ip)) {
    540             table[hash] = (int)(ip - base_ip);
    541             break;
    542           }
    543         }
    544         candidate = base_ip + table[hash];
    545         assert(candidate >= base_ip);
    546         assert(candidate < ip);
    547 
    548         table[hash] = (int)(ip - base_ip);
    549       } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
    550 
    551       /* Check copy distance. If candidate is not feasible, continue search.
    552          Checking is done outside of hot loop to reduce overhead. */
    553       if (ip - candidate > MAX_DISTANCE) goto trawl;
    554 
    555       /* Step 2: Emit the found match together with the literal bytes from
    556          "next_emit" to the bit stream, and then see if we can find a next match
    557          immediately afterwards. Repeat until we find no match for the input
    558          without emitting some literal bytes. */
    559 
    560       {
    561         /* We have a 5-byte match at ip, and we need to emit bytes in
    562            [next_emit, ip). */
    563         const uint8_t* base = ip;
    564         size_t matched = 5 + FindMatchLengthWithLimit(
    565             candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
    566         int distance = (int)(base - candidate);  /* > 0 */
    567         size_t insert = (size_t)(base - next_emit);
    568         ip += matched;
    569         assert(0 == memcmp(base, candidate, matched));
    570         if (BROTLI_PREDICT_TRUE(insert < 6210)) {
    571           EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    572                         storage_ix, storage);
    573         } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
    574                                              literal_ratio)) {
    575           EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
    576                                     storage_ix, storage);
    577           input_size -= (size_t)(base - input);
    578           input = base;
    579           next_emit = input;
    580           goto next_block;
    581         } else {
    582           EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    583                             storage_ix, storage);
    584         }
    585         EmitLiterals(next_emit, insert, lit_depth, lit_bits,
    586                      storage_ix, storage);
    587         if (distance == last_distance) {
    588           BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
    589           ++cmd_histo[64];
    590         } else {
    591           EmitDistance((size_t)distance, cmd_depth, cmd_bits,
    592                        cmd_histo, storage_ix, storage);
    593           last_distance = distance;
    594         }
    595         EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
    596                                 storage_ix, storage);
    597 
    598         next_emit = ip;
    599         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    600           goto emit_remainder;
    601         }
    602         /* We could immediately start working at ip now, but to improve
    603            compression we first update "table" with the hashes of some positions
    604            within the last copy. */
    605         {
    606           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
    607           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    608           uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
    609           table[prev_hash] = (int)(ip - base_ip - 3);
    610           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    611           table[prev_hash] = (int)(ip - base_ip - 2);
    612           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
    613           table[prev_hash] = (int)(ip - base_ip - 1);
    614 
    615           candidate = base_ip + table[cur_hash];
    616           table[cur_hash] = (int)(ip - base_ip);
    617         }
    618       }
    619 
    620       while (IsMatch(ip, candidate)) {
    621         /* We have a 5-byte match at ip, and no need to emit any literal bytes
    622            prior to ip. */
    623         const uint8_t* base = ip;
    624         size_t matched = 5 + FindMatchLengthWithLimit(
    625             candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
    626         if (ip - candidate > MAX_DISTANCE) break;
    627         ip += matched;
    628         last_distance = (int)(base - candidate);  /* > 0 */
    629         assert(0 == memcmp(base, candidate, matched));
    630         EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
    631                     storage_ix, storage);
    632         EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
    633                      cmd_histo, storage_ix, storage);
    634 
    635         next_emit = ip;
    636         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    637           goto emit_remainder;
    638         }
    639         /* We could immediately start working at ip now, but to improve
    640            compression we first update "table" with the hashes of some positions
    641            within the last copy. */
    642         {
    643           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
    644           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    645           uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
    646           table[prev_hash] = (int)(ip - base_ip - 3);
    647           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    648           table[prev_hash] = (int)(ip - base_ip - 2);
    649           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
    650           table[prev_hash] = (int)(ip - base_ip - 1);
    651 
    652           candidate = base_ip + table[cur_hash];
    653           table[cur_hash] = (int)(ip - base_ip);
    654         }
    655       }
    656 
    657       next_hash = Hash(++ip, shift);
    658     }
    659   }
    660 
    661  emit_remainder:
    662   assert(next_emit <= ip_end);
    663   input += block_size;
    664   input_size -= block_size;
    665   block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
    666 
    667   /* Decide if we want to continue this meta-block instead of emitting the
    668      last insert-only command. */
    669   if (input_size > 0 &&
    670       total_block_size + block_size <= (1 << 20) &&
    671       ShouldMergeBlock(input, block_size, lit_depth)) {
    672     assert(total_block_size > (1 << 16));
    673     /* Update the size of the current meta-block and continue emitting commands.
    674        We can do this because the current size and the new size both have 5
    675        nibbles. */
    676     total_block_size += block_size;
    677     UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
    678     goto emit_commands;
    679   }
    680 
    681   /* Emit the remaining bytes as literals. */
    682   if (next_emit < ip_end) {
    683     const size_t insert = (size_t)(ip_end - next_emit);
    684     if (BROTLI_PREDICT_TRUE(insert < 6210)) {
    685       EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    686                     storage_ix, storage);
    687       EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
    688     } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
    689                                          literal_ratio)) {
    690       EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
    691                                 storage_ix, storage);
    692     } else {
    693       EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
    694                         storage_ix, storage);
    695       EmitLiterals(next_emit, insert, lit_depth, lit_bits,
    696                    storage_ix, storage);
    697     }
    698   }
    699   next_emit = ip_end;
    700 
    701 next_block:
    702   /* If we have more data, write a new meta-block header and prefix codes and
    703      then continue emitting commands. */
    704   if (input_size > 0) {
    705     metablock_start = input;
    706     block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
    707     total_block_size = block_size;
    708     /* Save the bit position of the MLEN field of the meta-block header, so that
    709        we can update it later if we decide to extend this meta-block. */
    710     mlen_storage_ix = *storage_ix + 3;
    711     BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    712     /* No block splits, no contexts. */
    713     BrotliWriteBits(13, 0, storage_ix, storage);
    714     literal_ratio = BuildAndStoreLiteralPrefixCode(
    715         m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
    716     if (BROTLI_IS_OOM(m)) return;
    717     BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
    718                                    storage_ix, storage);
    719     goto emit_commands;
    720   }
    721 
    722   if (!is_last) {
    723     /* If this is not the last block, update the command and distance prefix
    724        codes for the next block and store the compressed forms. */
    725     cmd_code[0] = 0;
    726     *cmd_code_numbits = 0;
    727     BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
    728                                    cmd_code_numbits, cmd_code);
    729   }
    730 }
    731 
    732 #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
    733 
    734 #define BAKE_METHOD_PARAM_(B) \
    735 static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B(             \
    736     MemoryManager* m, const uint8_t* input, size_t input_size,               \
    737     BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128],                 \
    738     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,     \
    739     size_t* storage_ix, uint8_t* storage) {                                  \
    740   BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B,    \
    741       cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
    742 }
    743 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
    744 #undef BAKE_METHOD_PARAM_
    745 
    746 void BrotliCompressFragmentFast(
    747     MemoryManager* m, const uint8_t* input, size_t input_size,
    748     BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
    749     uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
    750     size_t* storage_ix, uint8_t* storage) {
    751   const size_t initial_storage_ix = *storage_ix;
    752   const size_t table_bits = Log2FloorNonZero(table_size);
    753 
    754   if (input_size == 0) {
    755     assert(is_last);
    756     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
    757     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    758     *storage_ix = (*storage_ix + 7u) & ~7u;
    759     return;
    760   }
    761 
    762   switch (table_bits) {
    763 #define CASE_(B)                                                     \
    764     case B:                                                          \
    765       BrotliCompressFragmentFastImpl ## B(                           \
    766           m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
    767           cmd_code_numbits, cmd_code, storage_ix, storage);          \
    768       break;
    769     FOR_TABLE_BITS_(CASE_)
    770 #undef CASE_
    771     default: assert(0); break;
    772   }
    773 
    774   /* If output is larger than single uncompressed block, rewrite it. */
    775   if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
    776     EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
    777                               storage_ix, storage);
    778   }
    779 
    780   if (is_last) {
    781     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
    782     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    783     *storage_ix = (*storage_ix + 7u) & ~7u;
    784   }
    785 }
    786 
    787 #undef FOR_TABLE_BITS_
    788 
    789 #if defined(__cplusplus) || defined(c_plusplus)
    790 }  /* extern "C" */
    791 #endif
    792