Home | History | Annotate | Download | only in enc
      1 /* Copyright 2015 Google Inc. All Rights Reserved.
      2 
      3    Distributed under MIT license.
      4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      5 */
      6 
      7 /* Function for fast encoding of an input fragment, independently from the input
      8    history. This function uses two-pass processing: in the first pass we save
      9    the found backward matches and literal bytes into a buffer, and in the
     10    second pass we emit them into the bit stream using prefix codes built based
     11    on the actual command and literal byte histograms. */
     12 
     13 #include "./compress_fragment_two_pass.h"
     14 
     15 #include <string.h>  /* memcmp, memcpy, memset */
     16 
     17 #include "../common/constants.h"
     18 #include <brotli/types.h>
     19 #include "./bit_cost.h"
     20 #include "./brotli_bit_stream.h"
     21 #include "./entropy_encode.h"
     22 #include "./fast_log.h"
     23 #include "./find_match_length.h"
     24 #include "./memory.h"
     25 #include "./port.h"
     26 #include "./write_bits.h"
     27 
     28 
     29 #if defined(__cplusplus) || defined(c_plusplus)
     30 extern "C" {
     31 #endif
     32 
     33 #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
     34 
     35 /* kHashMul32 multiplier has these properties:
     36    * The multiplier must be odd. Otherwise we may lose the highest bit.
     37    * No long streaks of ones or zeros.
     38    * There is no effort to ensure that it is a prime, the oddity is enough
     39      for this use.
     40    * The number has been tuned heuristically against compression benchmarks. */
     41 static const uint32_t kHashMul32 = 0x1e35a7bd;
     42 
     43 static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
     44   const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
     45   return (uint32_t)(h >> shift);
     46 }
     47 
     48 static BROTLI_INLINE uint32_t HashBytesAtOffset(
     49     uint64_t v, int offset, size_t shift) {
     50   assert(offset >= 0);
     51   assert(offset <= 2);
     52   {
     53     const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
     54     return (uint32_t)(h >> shift);
     55   }
     56 }
     57 
     58 static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
     59   return TO_BROTLI_BOOL(
     60       BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
     61       p1[4] == p2[4] &&
     62       p1[5] == p2[5]);
     63 }
     64 
     65 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
     66    "bits" based on "histogram" and stores it into the bit stream. */
     67 static void BuildAndStoreCommandPrefixCode(
     68     const uint32_t histogram[128],
     69     uint8_t depth[128], uint16_t bits[128],
     70     size_t* storage_ix, uint8_t* storage) {
     71   /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
     72   HuffmanTree tree[129];
     73   uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
     74   uint16_t cmd_bits[64];
     75   BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
     76   BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
     77   /* We have to jump through a few hoops here in order to compute
     78      the command bits because the symbols are in a different order than in
     79      the full alphabet. This looks complicated, but having the symbols
     80      in this order in the command bits saves a few branches in the Emit*
     81      functions. */
     82   memcpy(cmd_depth, depth + 24, 24);
     83   memcpy(cmd_depth + 24, depth, 8);
     84   memcpy(cmd_depth + 32, depth + 48, 8);
     85   memcpy(cmd_depth + 40, depth + 8, 8);
     86   memcpy(cmd_depth + 48, depth + 56, 8);
     87   memcpy(cmd_depth + 56, depth + 16, 8);
     88   BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
     89   memcpy(bits, cmd_bits + 24, 16);
     90   memcpy(bits + 8, cmd_bits + 40, 16);
     91   memcpy(bits + 16, cmd_bits + 56, 16);
     92   memcpy(bits + 24, cmd_bits, 48);
     93   memcpy(bits + 48, cmd_bits + 32, 16);
     94   memcpy(bits + 56, cmd_bits + 48, 16);
     95   BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
     96   {
     97     /* Create the bit length array for the full command alphabet. */
     98     size_t i;
     99     memset(cmd_depth, 0, 64);  /* only 64 first values were used */
    100     memcpy(cmd_depth, depth + 24, 8);
    101     memcpy(cmd_depth + 64, depth + 32, 8);
    102     memcpy(cmd_depth + 128, depth + 40, 8);
    103     memcpy(cmd_depth + 192, depth + 48, 8);
    104     memcpy(cmd_depth + 384, depth + 56, 8);
    105     for (i = 0; i < 8; ++i) {
    106       cmd_depth[128 + 8 * i] = depth[i];
    107       cmd_depth[256 + 8 * i] = depth[8 + i];
    108       cmd_depth[448 + 8 * i] = depth[16 + i];
    109     }
    110     BrotliStoreHuffmanTree(
    111         cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
    112   }
    113   BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
    114 }
    115 
    116 static BROTLI_INLINE void EmitInsertLen(
    117     uint32_t insertlen, uint32_t** commands) {
    118   if (insertlen < 6) {
    119     **commands = insertlen;
    120   } else if (insertlen < 130) {
    121     const uint32_t tail = insertlen - 2;
    122     const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
    123     const uint32_t prefix = tail >> nbits;
    124     const uint32_t inscode = (nbits << 1) + prefix + 2;
    125     const uint32_t extra = tail - (prefix << nbits);
    126     **commands = inscode | (extra << 8);
    127   } else if (insertlen < 2114) {
    128     const uint32_t tail = insertlen - 66;
    129     const uint32_t nbits = Log2FloorNonZero(tail);
    130     const uint32_t code = nbits + 10;
    131     const uint32_t extra = tail - (1u << nbits);
    132     **commands = code | (extra << 8);
    133   } else if (insertlen < 6210) {
    134     const uint32_t extra = insertlen - 2114;
    135     **commands = 21 | (extra << 8);
    136   } else if (insertlen < 22594) {
    137     const uint32_t extra = insertlen - 6210;
    138     **commands = 22 | (extra << 8);
    139   } else {
    140     const uint32_t extra = insertlen - 22594;
    141     **commands = 23 | (extra << 8);
    142   }
    143   ++(*commands);
    144 }
    145 
    146 static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
    147   if (copylen < 10) {
    148     **commands = (uint32_t)(copylen + 38);
    149   } else if (copylen < 134) {
    150     const size_t tail = copylen - 6;
    151     const size_t nbits = Log2FloorNonZero(tail) - 1;
    152     const size_t prefix = tail >> nbits;
    153     const size_t code = (nbits << 1) + prefix + 44;
    154     const size_t extra = tail - (prefix << nbits);
    155     **commands = (uint32_t)(code | (extra << 8));
    156   } else if (copylen < 2118) {
    157     const size_t tail = copylen - 70;
    158     const size_t nbits = Log2FloorNonZero(tail);
    159     const size_t code = nbits + 52;
    160     const size_t extra = tail - ((size_t)1 << nbits);
    161     **commands = (uint32_t)(code | (extra << 8));
    162   } else {
    163     const size_t extra = copylen - 2118;
    164     **commands = (uint32_t)(63 | (extra << 8));
    165   }
    166   ++(*commands);
    167 }
    168 
    169 static BROTLI_INLINE void EmitCopyLenLastDistance(
    170     size_t copylen, uint32_t** commands) {
    171   if (copylen < 12) {
    172     **commands = (uint32_t)(copylen + 20);
    173     ++(*commands);
    174   } else if (copylen < 72) {
    175     const size_t tail = copylen - 8;
    176     const size_t nbits = Log2FloorNonZero(tail) - 1;
    177     const size_t prefix = tail >> nbits;
    178     const size_t code = (nbits << 1) + prefix + 28;
    179     const size_t extra = tail - (prefix << nbits);
    180     **commands = (uint32_t)(code | (extra << 8));
    181     ++(*commands);
    182   } else if (copylen < 136) {
    183     const size_t tail = copylen - 8;
    184     const size_t code = (tail >> 5) + 54;
    185     const size_t extra = tail & 31;
    186     **commands = (uint32_t)(code | (extra << 8));
    187     ++(*commands);
    188     **commands = 64;
    189     ++(*commands);
    190   } else if (copylen < 2120) {
    191     const size_t tail = copylen - 72;
    192     const size_t nbits = Log2FloorNonZero(tail);
    193     const size_t code = nbits + 52;
    194     const size_t extra = tail - ((size_t)1 << nbits);
    195     **commands = (uint32_t)(code | (extra << 8));
    196     ++(*commands);
    197     **commands = 64;
    198     ++(*commands);
    199   } else {
    200     const size_t extra = copylen - 2120;
    201     **commands = (uint32_t)(63 | (extra << 8));
    202     ++(*commands);
    203     **commands = 64;
    204     ++(*commands);
    205   }
    206 }
    207 
    208 static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
    209   uint32_t d = distance + 3;
    210   uint32_t nbits = Log2FloorNonZero(d) - 1;
    211   const uint32_t prefix = (d >> nbits) & 1;
    212   const uint32_t offset = (2 + prefix) << nbits;
    213   const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
    214   uint32_t extra = d - offset;
    215   **commands = distcode | (extra << 8);
    216   ++(*commands);
    217 }
    218 
    219 /* REQUIRES: len <= 1 << 24. */
    220 static void BrotliStoreMetaBlockHeader(
    221     size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
    222     uint8_t* storage) {
    223   size_t nibbles = 6;
    224   /* ISLAST */
    225   BrotliWriteBits(1, 0, storage_ix, storage);
    226   if (len <= (1U << 16)) {
    227     nibbles = 4;
    228   } else if (len <= (1U << 20)) {
    229     nibbles = 5;
    230   }
    231   BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
    232   BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
    233   /* ISUNCOMPRESSED */
    234   BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
    235 }
    236 
    237 static BROTLI_INLINE void CreateCommands(const uint8_t* input,
    238     size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
    239     size_t table_bits, uint8_t** literals, uint32_t** commands) {
    240   /* "ip" is the input pointer. */
    241   const uint8_t* ip = input;
    242   const size_t shift = 64u - table_bits;
    243   const uint8_t* ip_end = input + block_size;
    244   /* "next_emit" is a pointer to the first byte that is not covered by a
    245      previous copy. Bytes between "next_emit" and the start of the next copy or
    246      the end of the input will be emitted as literal bytes. */
    247   const uint8_t* next_emit = input;
    248 
    249   int last_distance = -1;
    250   const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
    251   const size_t kMinMatchLen = 6;
    252 
    253   if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
    254     /* For the last block, we need to keep a 16 bytes margin so that we can be
    255        sure that all distances are at most window size - 16.
    256        For all other blocks, we only need to keep a margin of 5 bytes so that
    257        we don't go over the block size with a copy. */
    258     const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
    259                                         input_size - kInputMarginBytes);
    260     const uint8_t* ip_limit = input + len_limit;
    261 
    262     uint32_t next_hash;
    263     for (next_hash = Hash(++ip, shift); ; ) {
    264       /* Step 1: Scan forward in the input looking for a 6-byte-long match.
    265          If we get close to exhausting the input then goto emit_remainder.
    266 
    267          Heuristic match skipping: If 32 bytes are scanned with no matches
    268          found, start looking only at every other byte. If 32 more bytes are
    269          scanned, look at every third byte, etc.. When a match is found,
    270          immediately go back to looking at every byte. This is a small loss
    271          (~5% performance, ~0.1% density) for compressible data due to more
    272          bookkeeping, but for non-compressible data (such as JPEG) it's a huge
    273          win since the compressor quickly "realizes" the data is incompressible
    274          and doesn't bother looking for matches everywhere.
    275 
    276          The "skip" variable keeps track of how many bytes there are since the
    277          last match; dividing it by 32 (ie. right-shifting by five) gives the
    278          number of bytes to move ahead for each iteration. */
    279       uint32_t skip = 32;
    280 
    281       const uint8_t* next_ip = ip;
    282       const uint8_t* candidate;
    283 
    284       assert(next_emit < ip);
    285 trawl:
    286       do {
    287         uint32_t hash = next_hash;
    288         uint32_t bytes_between_hash_lookups = skip++ >> 5;
    289         ip = next_ip;
    290         assert(hash == Hash(ip, shift));
    291         next_ip = ip + bytes_between_hash_lookups;
    292         if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
    293           goto emit_remainder;
    294         }
    295         next_hash = Hash(next_ip, shift);
    296         candidate = ip - last_distance;
    297         if (IsMatch(ip, candidate)) {
    298           if (BROTLI_PREDICT_TRUE(candidate < ip)) {
    299             table[hash] = (int)(ip - base_ip);
    300             break;
    301           }
    302         }
    303         candidate = base_ip + table[hash];
    304         assert(candidate >= base_ip);
    305         assert(candidate < ip);
    306 
    307         table[hash] = (int)(ip - base_ip);
    308       } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
    309 
    310       /* Check copy distance. If candidate is not feasible, continue search.
    311          Checking is done outside of hot loop to reduce overhead. */
    312       if (ip - candidate > MAX_DISTANCE) goto trawl;
    313 
    314       /* Step 2: Emit the found match together with the literal bytes from
    315          "next_emit", and then see if we can find a next match immediately
    316          afterwards. Repeat until we find no match for the input
    317          without emitting some literal bytes. */
    318 
    319       {
    320         /* We have a 6-byte match at ip, and we need to emit bytes in
    321            [next_emit, ip). */
    322         const uint8_t* base = ip;
    323         size_t matched = 6 + FindMatchLengthWithLimit(
    324             candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
    325         int distance = (int)(base - candidate);  /* > 0 */
    326         int insert = (int)(base - next_emit);
    327         ip += matched;
    328         assert(0 == memcmp(base, candidate, matched));
    329         EmitInsertLen((uint32_t)insert, commands);
    330         memcpy(*literals, next_emit, (size_t)insert);
    331         *literals += insert;
    332         if (distance == last_distance) {
    333           **commands = 64;
    334           ++(*commands);
    335         } else {
    336           EmitDistance((uint32_t)distance, commands);
    337           last_distance = distance;
    338         }
    339         EmitCopyLenLastDistance(matched, commands);
    340 
    341         next_emit = ip;
    342         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    343           goto emit_remainder;
    344         }
    345         {
    346           /* We could immediately start working at ip now, but to improve
    347              compression we first update "table" with the hashes of some
    348              positions within the last copy. */
    349           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
    350           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    351           uint32_t cur_hash;
    352           table[prev_hash] = (int)(ip - base_ip - 5);
    353           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    354           table[prev_hash] = (int)(ip - base_ip - 4);
    355           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
    356           table[prev_hash] = (int)(ip - base_ip - 3);
    357           input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
    358           cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
    359           prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    360           table[prev_hash] = (int)(ip - base_ip - 2);
    361           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    362           table[prev_hash] = (int)(ip - base_ip - 1);
    363 
    364           candidate = base_ip + table[cur_hash];
    365           table[cur_hash] = (int)(ip - base_ip);
    366         }
    367       }
    368 
    369       while (ip - candidate <= MAX_DISTANCE && IsMatch(ip, candidate)) {
    370         /* We have a 6-byte match at ip, and no need to emit any
    371            literal bytes prior to ip. */
    372         const uint8_t* base = ip;
    373         size_t matched = 6 + FindMatchLengthWithLimit(
    374             candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
    375         ip += matched;
    376         last_distance = (int)(base - candidate);  /* > 0 */
    377         assert(0 == memcmp(base, candidate, matched));
    378         EmitCopyLen(matched, commands);
    379         EmitDistance((uint32_t)last_distance, commands);
    380 
    381         next_emit = ip;
    382         if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
    383           goto emit_remainder;
    384         }
    385         {
    386           /* We could immediately start working at ip now, but to improve
    387              compression we first update "table" with the hashes of some
    388              positions within the last copy. */
    389           uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
    390           uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    391           uint32_t cur_hash;
    392           table[prev_hash] = (int)(ip - base_ip - 5);
    393           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    394           table[prev_hash] = (int)(ip - base_ip - 4);
    395           prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
    396           table[prev_hash] = (int)(ip - base_ip - 3);
    397           input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
    398           cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
    399           prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
    400           table[prev_hash] = (int)(ip - base_ip - 2);
    401           prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
    402           table[prev_hash] = (int)(ip - base_ip - 1);
    403 
    404           candidate = base_ip + table[cur_hash];
    405           table[cur_hash] = (int)(ip - base_ip);
    406         }
    407       }
    408 
    409       next_hash = Hash(++ip, shift);
    410     }
    411   }
    412 
    413 emit_remainder:
    414   assert(next_emit <= ip_end);
    415   /* Emit the remaining bytes as literals. */
    416   if (next_emit < ip_end) {
    417     const uint32_t insert = (uint32_t)(ip_end - next_emit);
    418     EmitInsertLen(insert, commands);
    419     memcpy(*literals, next_emit, insert);
    420     *literals += insert;
    421   }
    422 }
    423 
    424 static void StoreCommands(MemoryManager* m,
    425                           const uint8_t* literals, const size_t num_literals,
    426                           const uint32_t* commands, const size_t num_commands,
    427                           size_t* storage_ix, uint8_t* storage) {
    428   static const uint32_t kNumExtraBits[128] = {
    429     0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
    430     0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
    431     0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
    432     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    433     1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
    434     9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
    435     17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
    436   };
    437   static const uint32_t kInsertOffset[24] = {
    438     0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
    439     1090, 2114, 6210, 22594,
    440   };
    441 
    442   uint8_t lit_depths[256];
    443   uint16_t lit_bits[256];
    444   uint32_t lit_histo[256] = { 0 };
    445   uint8_t cmd_depths[128] = { 0 };
    446   uint16_t cmd_bits[128] = { 0 };
    447   uint32_t cmd_histo[128] = { 0 };
    448   size_t i;
    449   for (i = 0; i < num_literals; ++i) {
    450     ++lit_histo[literals[i]];
    451   }
    452   BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
    453                                      /* max_bits = */ 8,
    454                                      lit_depths, lit_bits,
    455                                      storage_ix, storage);
    456   if (BROTLI_IS_OOM(m)) return;
    457 
    458   for (i = 0; i < num_commands; ++i) {
    459     const uint32_t code = commands[i] & 0xFF;
    460     assert(code < 128);
    461     ++cmd_histo[code];
    462   }
    463   cmd_histo[1] += 1;
    464   cmd_histo[2] += 1;
    465   cmd_histo[64] += 1;
    466   cmd_histo[84] += 1;
    467   BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
    468                                  storage_ix, storage);
    469 
    470   for (i = 0; i < num_commands; ++i) {
    471     const uint32_t cmd = commands[i];
    472     const uint32_t code = cmd & 0xFF;
    473     const uint32_t extra = cmd >> 8;
    474     assert(code < 128);
    475     BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
    476     BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
    477     if (code < 24) {
    478       const uint32_t insert = kInsertOffset[code] + extra;
    479       uint32_t j;
    480       for (j = 0; j < insert; ++j) {
    481         const uint8_t lit = *literals;
    482         BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
    483         ++literals;
    484       }
    485     }
    486   }
    487 }
    488 
    489 /* Acceptable loss for uncompressible speedup is 2% */
    490 #define MIN_RATIO 0.98
    491 #define SAMPLE_RATE 43
    492 
    493 static BROTLI_BOOL ShouldCompress(
    494     const uint8_t* input, size_t input_size, size_t num_literals) {
    495   double corpus_size = (double)input_size;
    496   if (num_literals < MIN_RATIO * corpus_size) {
    497     return BROTLI_TRUE;
    498   } else {
    499     uint32_t literal_histo[256] = { 0 };
    500     const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
    501     size_t i;
    502     for (i = 0; i < input_size; i += SAMPLE_RATE) {
    503       ++literal_histo[input[i]];
    504     }
    505     return TO_BROTLI_BOOL(BitsEntropy(literal_histo, 256) < max_total_bit_cost);
    506   }
    507 }
    508 
    509 static void RewindBitPosition(const size_t new_storage_ix,
    510                               size_t* storage_ix, uint8_t* storage) {
    511   const size_t bitpos = new_storage_ix & 7;
    512   const size_t mask = (1u << bitpos) - 1;
    513   storage[new_storage_ix >> 3] &= (uint8_t)mask;
    514   *storage_ix = new_storage_ix;
    515 }
    516 
    517 static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
    518                                       size_t* storage_ix, uint8_t* storage) {
    519   BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
    520   *storage_ix = (*storage_ix + 7u) & ~7u;
    521   memcpy(&storage[*storage_ix >> 3], input, input_size);
    522   *storage_ix += input_size << 3;
    523   storage[*storage_ix >> 3] = 0;
    524 }
    525 
    526 static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
    527     MemoryManager* m, const uint8_t* input, size_t input_size,
    528     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
    529     int* table, size_t table_bits, size_t* storage_ix, uint8_t* storage) {
    530   /* Save the start of the first block for position and distance computations.
    531   */
    532   const uint8_t* base_ip = input;
    533   BROTLI_UNUSED(is_last);
    534 
    535   while (input_size > 0) {
    536     size_t block_size =
    537         BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
    538     uint32_t* commands = command_buf;
    539     uint8_t* literals = literal_buf;
    540     size_t num_literals;
    541     CreateCommands(input, block_size, input_size, base_ip, table, table_bits,
    542                    &literals, &commands);
    543     num_literals = (size_t)(literals - literal_buf);
    544     if (ShouldCompress(input, block_size, num_literals)) {
    545       const size_t num_commands = (size_t)(commands - command_buf);
    546       BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    547       /* No block splits, no contexts. */
    548       BrotliWriteBits(13, 0, storage_ix, storage);
    549       StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
    550                     storage_ix, storage);
    551       if (BROTLI_IS_OOM(m)) return;
    552     } else {
    553       /* Since we did not find many backward references and the entropy of
    554          the data is close to 8 bits, we can simply emit an uncompressed block.
    555          This makes compression speed of uncompressible data about 3x faster. */
    556       EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
    557     }
    558     input += block_size;
    559     input_size -= block_size;
    560   }
    561 }
    562 
    563 #define FOR_TABLE_BITS_(X) \
    564   X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
    565 
    566 #define BAKE_METHOD_PARAM_(B)                                                  \
    567 static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B(            \
    568     MemoryManager* m, const uint8_t* input, size_t input_size,                 \
    569     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,          \
    570     int* table, size_t* storage_ix, uint8_t* storage) {                        \
    571   BrotliCompressFragmentTwoPassImpl(m, input, input_size, is_last, command_buf,\
    572       literal_buf, table, B, storage_ix, storage);                             \
    573 }
    574 FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
    575 #undef BAKE_METHOD_PARAM_
    576 
    577 void BrotliCompressFragmentTwoPass(
    578     MemoryManager* m, const uint8_t* input, size_t input_size,
    579     BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
    580     int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
    581   const size_t initial_storage_ix = *storage_ix;
    582   const size_t table_bits = Log2FloorNonZero(table_size);
    583   switch (table_bits) {
    584 #define CASE_(B)                                      \
    585     case B:                                           \
    586       BrotliCompressFragmentTwoPassImpl ## B(         \
    587           m, input, input_size, is_last, command_buf, \
    588           literal_buf, table, storage_ix, storage);   \
    589       break;
    590     FOR_TABLE_BITS_(CASE_)
    591 #undef CASE_
    592     default: assert(0); break;
    593   }
    594 
    595   /* If output is larger than single uncompressed block, rewrite it. */
    596   if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
    597     RewindBitPosition(initial_storage_ix, storage_ix, storage);
    598     EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
    599   }
    600 
    601   if (is_last) {
    602     BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
    603     BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    604     *storage_ix = (*storage_ix + 7u) & ~7u;
    605   }
    606 }
    607 
    608 #undef FOR_TABLE_BITS_
    609 
    610 #if defined(__cplusplus) || defined(c_plusplus)
    611 }  /* extern "C" */
    612 #endif
    613