Home | History | Annotate | Download | only in zopfli
      1 /*
      2 Copyright 2011 Google Inc. All Rights Reserved.
      3 
      4 Licensed under the Apache License, Version 2.0 (the "License");
      5 you may not use this file except in compliance with the License.
      6 You may obtain a copy of the License at
      7 
      8     http://www.apache.org/licenses/LICENSE-2.0
      9 
     10 Unless required by applicable law or agreed to in writing, software
     11 distributed under the License is distributed on an "AS IS" BASIS,
     12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 See the License for the specific language governing permissions and
     14 limitations under the License.
     15 
     16 Author: lode.vandevenne (at) gmail.com (Lode Vandevenne)
     17 Author: jyrki.alakuijala (at) gmail.com (Jyrki Alakuijala)
     18 */
     19 
     20 #include "deflate.h"
     21 
     22 #include <assert.h>
     23 #include <stdio.h>
     24 #include <stdlib.h>
     25 
     26 #include "blocksplitter.h"
     27 #include "lz77.h"
     28 #include "squeeze.h"
     29 #include "tree.h"
     30 
     31 /*
     32 bp = bitpointer, always in range [0, 7].
     33 The outsize is number of necessary bytes to encode the bits.
     34 Given the value of bp and the amount of bytes, the amount of bits represented
     35 is not simply bytesize * 8 + bp because even representing one bit requires a
     36 whole byte. It is: (bp == 0) ? (bytesize * 8) : ((bytesize - 1) * 8 + bp)
     37 */
     38 static void AddBit(int bit,
     39                    unsigned char* bp, unsigned char** out, size_t* outsize) {
     40   if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
     41   (*out)[*outsize - 1] |= bit << *bp;
     42   *bp = (*bp + 1) & 7;
     43 }
     44 
     45 static void AddBits(unsigned symbol, unsigned length,
     46                     unsigned char* bp, unsigned char** out, size_t* outsize) {
     47   /* TODO(lode): make more efficient (add more bits at once). */
     48   unsigned i;
     49   for (i = 0; i < length; i++) {
     50     unsigned bit = (symbol >> i) & 1;
     51     if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
     52     (*out)[*outsize - 1] |= bit << *bp;
     53     *bp = (*bp + 1) & 7;
     54   }
     55 }
     56 
     57 /*
     58 Adds bits, like AddBits, but the order is inverted. The deflate specification
     59 uses both orders in one standard.
     60 */
     61 static void AddHuffmanBits(unsigned symbol, unsigned length,
     62                            unsigned char* bp, unsigned char** out,
     63                            size_t* outsize) {
     64   /* TODO(lode): make more efficient (add more bits at once). */
     65   unsigned i;
     66   for (i = 0; i < length; i++) {
     67     unsigned bit = (symbol >> (length - i - 1)) & 1;
     68     if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
     69     (*out)[*outsize - 1] |= bit << *bp;
     70     *bp = (*bp + 1) & 7;
     71   }
     72 }
     73 
     74 /*
     75 Ensures there are at least 2 distance codes to support buggy decoders.
     76 Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1
     77 distance code (with length > 0), even though it's valid according to the
     78 deflate spec to have 0 distance codes. On top of that, some mobile phones
     79 require at least two distance codes. To support these decoders too (but
     80 potentially at the cost of a few bytes), add dummy code lengths of 1.
     81 References to this bug can be found in the changelog of
     82 Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0.
     83 
     84 d_lengths: the 32 lengths of the distance codes.
     85 */
     86 static void PatchDistanceCodesForBuggyDecoders(unsigned* d_lengths) {
     87   int num_dist_codes = 0; /* Amount of non-zero distance codes */
     88   int i;
     89   for (i = 0; i < 30 /* Ignore the two unused codes from the spec */; i++) {
     90     if (d_lengths[i]) num_dist_codes++;
     91     if (num_dist_codes >= 2) return; /* Two or more codes is fine. */
     92   }
     93 
     94   if (num_dist_codes == 0) {
     95     d_lengths[0] = d_lengths[1] = 1;
     96   } else if (num_dist_codes == 1) {
     97     d_lengths[d_lengths[0] ? 1 : 0] = 1;
     98   }
     99 }
    100 
    101 /*
    102 Encodes the Huffman tree and returns how many bits its encoding takes. If out
    103 is a null pointer, only returns the size and runs faster.
    104 */
    105 static size_t EncodeTree(const unsigned* ll_lengths,
    106                          const unsigned* d_lengths,
    107                          int use_16, int use_17, int use_18,
    108                          unsigned char* bp,
    109                          unsigned char** out, size_t* outsize) {
    110   unsigned lld_total;  /* Total amount of literal, length, distance codes. */
    111   /* Runlength encoded version of lengths of litlen and dist trees. */
    112   unsigned* rle = 0;
    113   unsigned* rle_bits = 0;  /* Extra bits for rle values 16, 17 and 18. */
    114   size_t rle_size = 0;  /* Size of rle array. */
    115   size_t rle_bits_size = 0;  /* Should have same value as rle_size. */
    116   unsigned hlit = 29;  /* 286 - 257 */
    117   unsigned hdist = 29;  /* 32 - 1, but gzip does not like hdist > 29.*/
    118   unsigned hclen;
    119   unsigned hlit2;
    120   size_t i, j;
    121   size_t clcounts[19];
    122   unsigned clcl[19];  /* Code length code lengths. */
    123   unsigned clsymbols[19];
    124   /* The order in which code length code lengths are encoded as per deflate. */
    125   static const unsigned order[19] = {
    126     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
    127   };
    128   int size_only = !out;
    129   size_t result_size = 0;
    130 
    131   for(i = 0; i < 19; i++) clcounts[i] = 0;
    132 
    133   /* Trim zeros. */
    134   while (hlit > 0 && ll_lengths[257 + hlit - 1] == 0) hlit--;
    135   while (hdist > 0 && d_lengths[1 + hdist - 1] == 0) hdist--;
    136   hlit2 = hlit + 257;
    137 
    138   lld_total = hlit2 + hdist + 1;
    139 
    140   for (i = 0; i < lld_total; i++) {
    141     /* This is an encoding of a huffman tree, so now the length is a symbol */
    142     unsigned char symbol = i < hlit2 ? ll_lengths[i] : d_lengths[i - hlit2];
    143     unsigned count = 1;
    144     if(use_16 || (symbol == 0 && (use_17 || use_18))) {
    145       for (j = i + 1; j < lld_total && symbol ==
    146           (j < hlit2 ? ll_lengths[j] : d_lengths[j - hlit2]); j++) {
    147         count++;
    148       }
    149     }
    150     i += count - 1;
    151 
    152     /* Repetitions of zeroes */
    153     if (symbol == 0 && count >= 3) {
    154       if (use_18) {
    155         while (count >= 11) {
    156           unsigned count2 = count > 138 ? 138 : count;
    157           if (!size_only) {
    158             ZOPFLI_APPEND_DATA(18, &rle, &rle_size);
    159             ZOPFLI_APPEND_DATA(count2 - 11, &rle_bits, &rle_bits_size);
    160           }
    161           clcounts[18]++;
    162           count -= count2;
    163         }
    164       }
    165       if (use_17) {
    166         while (count >= 3) {
    167           unsigned count2 = count > 10 ? 10 : count;
    168           if (!size_only) {
    169             ZOPFLI_APPEND_DATA(17, &rle, &rle_size);
    170             ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
    171           }
    172           clcounts[17]++;
    173           count -= count2;
    174         }
    175       }
    176     }
    177 
    178     /* Repetitions of any symbol */
    179     if (use_16 && count >= 4) {
    180       count--;  /* Since the first one is hardcoded. */
    181       clcounts[symbol]++;
    182       if (!size_only) {
    183         ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
    184         ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
    185       }
    186       while (count >= 3) {
    187         unsigned count2 = count > 6 ? 6 : count;
    188         if (!size_only) {
    189           ZOPFLI_APPEND_DATA(16, &rle, &rle_size);
    190           ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
    191         }
    192         clcounts[16]++;
    193         count -= count2;
    194       }
    195     }
    196 
    197     /* No or insufficient repetition */
    198     clcounts[symbol] += count;
    199     while (count > 0) {
    200       if (!size_only) {
    201         ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
    202         ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
    203       }
    204       count--;
    205     }
    206   }
    207 
    208   ZopfliCalculateBitLengths(clcounts, 19, 7, clcl);
    209   if (!size_only) ZopfliLengthsToSymbols(clcl, 19, 7, clsymbols);
    210 
    211   hclen = 15;
    212   /* Trim zeros. */
    213   while (hclen > 0 && clcounts[order[hclen + 4 - 1]] == 0) hclen--;
    214 
    215   if (!size_only) {
    216     AddBits(hlit, 5, bp, out, outsize);
    217     AddBits(hdist, 5, bp, out, outsize);
    218     AddBits(hclen, 4, bp, out, outsize);
    219 
    220     for (i = 0; i < hclen + 4; i++) {
    221       AddBits(clcl[order[i]], 3, bp, out, outsize);
    222     }
    223 
    224     for (i = 0; i < rle_size; i++) {
    225       unsigned symbol = clsymbols[rle[i]];
    226       AddHuffmanBits(symbol, clcl[rle[i]], bp, out, outsize);
    227       /* Extra bits. */
    228       if (rle[i] == 16) AddBits(rle_bits[i], 2, bp, out, outsize);
    229       else if (rle[i] == 17) AddBits(rle_bits[i], 3, bp, out, outsize);
    230       else if (rle[i] == 18) AddBits(rle_bits[i], 7, bp, out, outsize);
    231     }
    232   }
    233 
    234   result_size += 14;  /* hlit, hdist, hclen bits */
    235   result_size += (hclen + 4) * 3;  /* clcl bits */
    236   for(i = 0; i < 19; i++) {
    237     result_size += clcl[i] * clcounts[i];
    238   }
    239   /* Extra bits. */
    240   result_size += clcounts[16] * 2;
    241   result_size += clcounts[17] * 3;
    242   result_size += clcounts[18] * 7;
    243 
    244   /* Note: in case of "size_only" these are null pointers so no effect. */
    245   free(rle);
    246   free(rle_bits);
    247 
    248   return result_size;
    249 }
    250 
    251 static void AddDynamicTree(const unsigned* ll_lengths,
    252                            const unsigned* d_lengths,
    253                            unsigned char* bp,
    254                            unsigned char** out, size_t* outsize) {
    255   int i;
    256   int best = 0;
    257   size_t bestsize = 0;
    258 
    259   for(i = 0; i < 8; i++) {
    260     size_t size = EncodeTree(ll_lengths, d_lengths,
    261                              i & 1, i & 2, i & 4,
    262                              0, 0, 0);
    263     if (bestsize == 0 || size < bestsize) {
    264       bestsize = size;
    265       best = i;
    266     }
    267   }
    268 
    269   EncodeTree(ll_lengths, d_lengths,
    270              best & 1, best & 2, best & 4,
    271              bp, out, outsize);
    272 }
    273 
    274 /*
    275 Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE.
    276 */
    277 static size_t CalculateTreeSize(const unsigned* ll_lengths,
    278                                 const unsigned* d_lengths) {
    279   size_t result = 0;
    280   int i;
    281 
    282   for(i = 0; i < 8; i++) {
    283     size_t size = EncodeTree(ll_lengths, d_lengths,
    284                              i & 1, i & 2, i & 4,
    285                              0, 0, 0);
    286     if (result == 0 || size < result) result = size;
    287   }
    288 
    289   return result;
    290 }
    291 
    292 /*
    293 Adds all lit/len and dist codes from the lists as huffman symbols. Does not add
    294 end code 256. expected_data_size is the uncompressed block size, used for
    295 assert, but you can set it to 0 to not do the assertion.
    296 */
    297 static void AddLZ77Data(const unsigned short* litlens,
    298                         const unsigned short* dists,
    299                         size_t lstart, size_t lend,
    300                         size_t expected_data_size,
    301                         const unsigned* ll_symbols, const unsigned* ll_lengths,
    302                         const unsigned* d_symbols, const unsigned* d_lengths,
    303                         unsigned char* bp,
    304                         unsigned char** out, size_t* outsize) {
    305   size_t testlength = 0;
    306   size_t i;
    307 
    308   for (i = lstart; i < lend; i++) {
    309     unsigned dist = dists[i];
    310     unsigned litlen = litlens[i];
    311     if (dist == 0) {
    312       assert(litlen < 256);
    313       assert(ll_lengths[litlen] > 0);
    314       AddHuffmanBits(ll_symbols[litlen], ll_lengths[litlen], bp, out, outsize);
    315       testlength++;
    316     } else {
    317       unsigned lls = ZopfliGetLengthSymbol(litlen);
    318       unsigned ds = ZopfliGetDistSymbol(dist);
    319       assert(litlen >= 3 && litlen <= 288);
    320       assert(ll_lengths[lls] > 0);
    321       assert(d_lengths[ds] > 0);
    322       AddHuffmanBits(ll_symbols[lls], ll_lengths[lls], bp, out, outsize);
    323       AddBits(ZopfliGetLengthExtraBitsValue(litlen),
    324               ZopfliGetLengthExtraBits(litlen),
    325               bp, out, outsize);
    326       AddHuffmanBits(d_symbols[ds], d_lengths[ds], bp, out, outsize);
    327       AddBits(ZopfliGetDistExtraBitsValue(dist),
    328               ZopfliGetDistExtraBits(dist),
    329               bp, out, outsize);
    330       testlength += litlen;
    331     }
    332   }
    333   assert(expected_data_size == 0 || testlength == expected_data_size);
    334 }
    335 
    336 static void GetFixedTree(unsigned* ll_lengths, unsigned* d_lengths) {
    337   size_t i;
    338   for (i = 0; i < 144; i++) ll_lengths[i] = 8;
    339   for (i = 144; i < 256; i++) ll_lengths[i] = 9;
    340   for (i = 256; i < 280; i++) ll_lengths[i] = 7;
    341   for (i = 280; i < 288; i++) ll_lengths[i] = 8;
    342   for (i = 0; i < 32; i++) d_lengths[i] = 5;
    343 }
    344 
    345 /*
    346 Calculates size of the part after the header and tree of an LZ77 block, in bits.
    347 */
    348 static size_t CalculateBlockSymbolSize(const unsigned* ll_lengths,
    349                                        const unsigned* d_lengths,
    350                                        const unsigned short* litlens,
    351                                        const unsigned short* dists,
    352                                        size_t lstart, size_t lend) {
    353   size_t result = 0;
    354   size_t i;
    355   for (i = lstart; i < lend; i++) {
    356     if (dists[i] == 0) {
    357       result += ll_lengths[litlens[i]];
    358     } else {
    359       result += ll_lengths[ZopfliGetLengthSymbol(litlens[i])];
    360       result += d_lengths[ZopfliGetDistSymbol(dists[i])];
    361       result += ZopfliGetLengthExtraBits(litlens[i]);
    362       result += ZopfliGetDistExtraBits(dists[i]);
    363     }
    364   }
    365   result += ll_lengths[256]; /*end symbol*/
    366   return result;
    367 }
    368 
    369 static size_t AbsDiff(size_t x, size_t y) {
    370   if (x > y)
    371     return x - y;
    372   else
    373     return y - x;
    374 }
    375 
    376 /*
    377 Change the population counts in a way that the consequent Hufmann tree
    378 compression, especially its rle-part will be more likely to compress this data
    379 more efficiently. length containts the size of the histogram.
    380 */
    381 void OptimizeHuffmanForRle(int length, size_t* counts) {
    382   int i, k, stride;
    383   size_t symbol, sum, limit;
    384   int* good_for_rle;
    385 
    386   /* 1) We don't want to touch the trailing zeros. We may break the
    387   rules of the format by adding more data in the distance codes. */
    388   for (; length >= 0; --length) {
    389     if (length == 0) {
    390       return;
    391     }
    392     if (counts[length - 1] != 0) {
    393       /* Now counts[0..length - 1] does not have trailing zeros. */
    394       break;
    395     }
    396   }
    397   /* 2) Let's mark all population counts that already can be encoded
    398   with an rle code.*/
    399   good_for_rle = (int*)malloc(length * sizeof(int));
    400   for (i = 0; i < length; ++i) good_for_rle[i] = 0;
    401 
    402   /* Let's not spoil any of the existing good rle codes.
    403   Mark any seq of 0's that is longer than 5 as a good_for_rle.
    404   Mark any seq of non-0's that is longer than 7 as a good_for_rle.*/
    405   symbol = counts[0];
    406   stride = 0;
    407   for (i = 0; i < length + 1; ++i) {
    408     if (i == length || counts[i] != symbol) {
    409       if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) {
    410         for (k = 0; k < stride; ++k) {
    411           good_for_rle[i - k - 1] = 1;
    412         }
    413       }
    414       stride = 1;
    415       if (i != length) {
    416         symbol = counts[i];
    417       }
    418     } else {
    419       ++stride;
    420     }
    421   }
    422 
    423   /* 3) Let's replace those population counts that lead to more rle codes. */
    424   stride = 0;
    425   limit = counts[0];
    426   sum = 0;
    427   for (i = 0; i < length + 1; ++i) {
    428     if (i == length || good_for_rle[i]
    429         /* Heuristic for selecting the stride ranges to collapse. */
    430         || AbsDiff(counts[i], limit) >= 4) {
    431       if (stride >= 4 || (stride >= 3 && sum == 0)) {
    432         /* The stride must end, collapse what we have, if we have enough (4). */
    433         int count = (sum + stride / 2) / stride;
    434         if (count < 1) count = 1;
    435         if (sum == 0) {
    436           /* Don't make an all zeros stride to be upgraded to ones. */
    437           count = 0;
    438         }
    439         for (k = 0; k < stride; ++k) {
    440           /* We don't want to change value at counts[i],
    441           that is already belonging to the next stride. Thus - 1. */
    442           counts[i - k - 1] = count;
    443         }
    444       }
    445       stride = 0;
    446       sum = 0;
    447       if (i < length - 3) {
    448         /* All interesting strides have a count of at least 4,
    449         at least when non-zeros. */
    450         limit = (counts[i] + counts[i + 1] +
    451                  counts[i + 2] + counts[i + 3] + 2) / 4;
    452       } else if (i < length) {
    453         limit = counts[i];
    454       } else {
    455         limit = 0;
    456       }
    457     }
    458     ++stride;
    459     if (i != length) {
    460       sum += counts[i];
    461     }
    462   }
    463 
    464   free(good_for_rle);
    465 }
    466 
    467 /*
    468 Calculates the bit lengths for the symbols for dynamic blocks. Chooses bit
    469 lengths that give the smallest size of tree encoding + encoding of all the
    470 symbols to have smallest output size. This are not necessarily the ideal Huffman
    471 bit lengths.
    472 */
    473 static void GetDynamicLengths(const unsigned short* litlens,
    474                               const unsigned short* dists,
    475                               size_t lstart, size_t lend,
    476                               unsigned* ll_lengths, unsigned* d_lengths) {
    477   size_t ll_counts[288];
    478   size_t d_counts[32];
    479 
    480   ZopfliLZ77Counts(litlens, dists, lstart, lend, ll_counts, d_counts);
    481   OptimizeHuffmanForRle(288, ll_counts);
    482   OptimizeHuffmanForRle(32, d_counts);
    483   ZopfliCalculateBitLengths(ll_counts, 288, 15, ll_lengths);
    484   ZopfliCalculateBitLengths(d_counts, 32, 15, d_lengths);
    485   PatchDistanceCodesForBuggyDecoders(d_lengths);
    486 }
    487 
    488 double ZopfliCalculateBlockSize(const unsigned short* litlens,
    489                                 const unsigned short* dists,
    490                                 size_t lstart, size_t lend, int btype) {
    491   unsigned ll_lengths[288];
    492   unsigned d_lengths[32];
    493 
    494   double result = 3; /* bfinal and btype bits */
    495 
    496   assert(btype == 1 || btype == 2); /* This is not for uncompressed blocks. */
    497 
    498   if(btype == 1) {
    499     GetFixedTree(ll_lengths, d_lengths);
    500   } else {
    501     GetDynamicLengths(litlens, dists, lstart, lend, ll_lengths, d_lengths);
    502     result += CalculateTreeSize(ll_lengths, d_lengths);
    503   }
    504 
    505   result += CalculateBlockSymbolSize(
    506       ll_lengths, d_lengths, litlens, dists, lstart, lend);
    507 
    508   return result;
    509 }
    510 
    511 /*
    512 Adds a deflate block with the given LZ77 data to the output.
    513 options: global program options
    514 btype: the block type, must be 1 or 2
    515 final: whether to set the "final" bit on this block, must be the last block
    516 litlens: literal/length array of the LZ77 data, in the same format as in
    517     ZopfliLZ77Store.
    518 dists: distance array of the LZ77 data, in the same format as in
    519     ZopfliLZ77Store.
    520 lstart: where to start in the LZ77 data
    521 lend: where to end in the LZ77 data (not inclusive)
    522 expected_data_size: the uncompressed block size, used for assert, but you can
    523   set it to 0 to not do the assertion.
    524 bp: output bit pointer
    525 out: dynamic output array to append to
    526 outsize: dynamic output array size
    527 */
    528 static void AddLZ77Block(const ZopfliOptions* options, int btype, int final,
    529                          const unsigned short* litlens,
    530                          const unsigned short* dists,
    531                          size_t lstart, size_t lend,
    532                          size_t expected_data_size,
    533                          unsigned char* bp,
    534                          unsigned char** out, size_t* outsize) {
    535   unsigned ll_lengths[288];
    536   unsigned d_lengths[32];
    537   unsigned ll_symbols[288];
    538   unsigned d_symbols[32];
    539   size_t detect_block_size = *outsize;
    540   size_t compressed_size;
    541   size_t uncompressed_size = 0;
    542   size_t i;
    543 
    544   AddBit(final, bp, out, outsize);
    545   AddBit(btype & 1, bp, out, outsize);
    546   AddBit((btype & 2) >> 1, bp, out, outsize);
    547 
    548   if (btype == 1) {
    549     /* Fixed block. */
    550     GetFixedTree(ll_lengths, d_lengths);
    551   } else {
    552     /* Dynamic block. */
    553     unsigned detect_tree_size;
    554     assert(btype == 2);
    555 
    556     GetDynamicLengths(litlens, dists, lstart, lend, ll_lengths, d_lengths);
    557 
    558     detect_tree_size = *outsize;
    559     AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize);
    560     if (options->verbose) {
    561       fprintf(stderr, "treesize: %d\n", (int)(*outsize - detect_tree_size));
    562     }
    563   }
    564 
    565   ZopfliLengthsToSymbols(ll_lengths, 288, 15, ll_symbols);
    566   ZopfliLengthsToSymbols(d_lengths, 32, 15, d_symbols);
    567 
    568   detect_block_size = *outsize;
    569   AddLZ77Data(litlens, dists, lstart, lend, expected_data_size,
    570               ll_symbols, ll_lengths, d_symbols, d_lengths,
    571               bp, out, outsize);
    572   /* End symbol. */
    573   AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize);
    574 
    575   for (i = lstart; i < lend; i++) {
    576     uncompressed_size += dists[i] == 0 ? 1 : litlens[i];
    577   }
    578   compressed_size = *outsize - detect_block_size;
    579   if (options->verbose) {
    580     fprintf(stderr, "compressed block size: %d (%dk) (unc: %d)\n",
    581            (int)compressed_size, (int)(compressed_size / 1024),
    582            (int)(uncompressed_size));
    583   }
    584 }
    585 
    586 static void DeflateDynamicBlock(const ZopfliOptions* options, int final,
    587                                 const unsigned char* in,
    588                                 size_t instart, size_t inend,
    589                                 unsigned char* bp,
    590                                 unsigned char** out, size_t* outsize) {
    591   ZopfliBlockState s;
    592   size_t blocksize = inend - instart;
    593   ZopfliLZ77Store store;
    594   int btype = 2;
    595 
    596   ZopfliInitLZ77Store(&store);
    597 
    598   s.options = options;
    599   s.blockstart = instart;
    600   s.blockend = inend;
    601 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    602   s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
    603   ZopfliInitCache(blocksize, s.lmc);
    604 #endif
    605 
    606   ZopfliLZ77Optimal(&s, in, instart, inend, &store);
    607 
    608   /* For small block, encoding with fixed tree can be smaller. For large block,
    609   don't bother doing this expensive test, dynamic tree will be better.*/
    610   if (store.size < 1000) {
    611     double dyncost, fixedcost;
    612     ZopfliLZ77Store fixedstore;
    613     ZopfliInitLZ77Store(&fixedstore);
    614     ZopfliLZ77OptimalFixed(&s, in, instart, inend, &fixedstore);
    615     dyncost = ZopfliCalculateBlockSize(store.litlens, store.dists,
    616         0, store.size, 2);
    617     fixedcost = ZopfliCalculateBlockSize(fixedstore.litlens, fixedstore.dists,
    618         0, fixedstore.size, 1);
    619     if (fixedcost < dyncost) {
    620       btype = 1;
    621       ZopfliCleanLZ77Store(&store);
    622       store = fixedstore;
    623     } else {
    624       ZopfliCleanLZ77Store(&fixedstore);
    625     }
    626   }
    627 
    628   AddLZ77Block(s.options, btype, final,
    629                store.litlens, store.dists, 0, store.size,
    630                blocksize, bp, out, outsize);
    631 
    632 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    633   ZopfliCleanCache(s.lmc);
    634   free(s.lmc);
    635 #endif
    636   ZopfliCleanLZ77Store(&store);
    637 }
    638 
    639 static void DeflateFixedBlock(const ZopfliOptions* options, int final,
    640                               const unsigned char* in,
    641                               size_t instart, size_t inend,
    642                               unsigned char* bp,
    643                               unsigned char** out, size_t* outsize) {
    644   ZopfliBlockState s;
    645   size_t blocksize = inend - instart;
    646   ZopfliLZ77Store store;
    647 
    648   ZopfliInitLZ77Store(&store);
    649 
    650   s.options = options;
    651   s.blockstart = instart;
    652   s.blockend = inend;
    653 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    654   s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
    655   ZopfliInitCache(blocksize, s.lmc);
    656 #endif
    657 
    658   ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
    659 
    660   AddLZ77Block(s.options, 1, final, store.litlens, store.dists, 0, store.size,
    661                blocksize, bp, out, outsize);
    662 
    663 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    664   ZopfliCleanCache(s.lmc);
    665   free(s.lmc);
    666 #endif
    667   ZopfliCleanLZ77Store(&store);
    668 }
    669 
    670 static void DeflateNonCompressedBlock(const ZopfliOptions* options, int final,
    671                                       const unsigned char* in, size_t instart,
    672                                       size_t inend,
    673                                       unsigned char* bp,
    674                                       unsigned char** out, size_t* outsize) {
    675   size_t i;
    676   size_t blocksize = inend - instart;
    677   unsigned short nlen = ~blocksize;
    678 
    679   (void)options;
    680   assert(blocksize < 65536);  /* Non compressed blocks are max this size. */
    681 
    682   AddBit(final, bp, out, outsize);
    683   /* BTYPE 00 */
    684   AddBit(0, bp, out, outsize);
    685   AddBit(0, bp, out, outsize);
    686 
    687   /* Any bits of input up to the next byte boundary are ignored. */
    688   *bp = 0;
    689 
    690   ZOPFLI_APPEND_DATA(blocksize % 256, out, outsize);
    691   ZOPFLI_APPEND_DATA((blocksize / 256) % 256, out, outsize);
    692   ZOPFLI_APPEND_DATA(nlen % 256, out, outsize);
    693   ZOPFLI_APPEND_DATA((nlen / 256) % 256, out, outsize);
    694 
    695   for (i = instart; i < inend; i++) {
    696     ZOPFLI_APPEND_DATA(in[i], out, outsize);
    697   }
    698 }
    699 
    700 static void DeflateBlock(const ZopfliOptions* options,
    701                          int btype, int final,
    702                          const unsigned char* in, size_t instart, size_t inend,
    703                          unsigned char* bp,
    704                          unsigned char** out, size_t* outsize) {
    705   if (btype == 0) {
    706     DeflateNonCompressedBlock(
    707         options, final, in, instart, inend, bp, out, outsize);
    708   } else if (btype == 1) {
    709      DeflateFixedBlock(options, final, in, instart, inend, bp, out, outsize);
    710   } else {
    711     assert (btype == 2);
    712     DeflateDynamicBlock(options, final, in, instart, inend, bp, out, outsize);
    713   }
    714 }
    715 
    716 /*
    717 Does squeeze strategy where first block splitting is done, then each block is
    718 squeezed.
    719 Parameters: see description of the ZopfliDeflate function.
    720 */
    721 static void DeflateSplittingFirst(const ZopfliOptions* options,
    722                                   int btype, int final,
    723                                   const unsigned char* in,
    724                                   size_t instart, size_t inend,
    725                                   unsigned char* bp,
    726                                   unsigned char** out, size_t* outsize) {
    727   size_t i;
    728   size_t* splitpoints = 0;
    729   size_t npoints = 0;
    730   if (btype == 0) {
    731     ZopfliBlockSplitSimple(in, instart, inend, 65535, &splitpoints, &npoints);
    732   } else if (btype == 1) {
    733     /* If all blocks are fixed tree, splitting into separate blocks only
    734     increases the total size. Leave npoints at 0, this represents 1 block. */
    735   } else {
    736     ZopfliBlockSplit(options, in, instart, inend,
    737                      options->blocksplittingmax, &splitpoints, &npoints);
    738   }
    739 
    740   for (i = 0; i <= npoints; i++) {
    741     size_t start = i == 0 ? instart : splitpoints[i - 1];
    742     size_t end = i == npoints ? inend : splitpoints[i];
    743     DeflateBlock(options, btype, i == npoints && final, in, start, end,
    744                  bp, out, outsize);
    745   }
    746 
    747   free(splitpoints);
    748 }
    749 
    750 /*
    751 Does squeeze strategy where first the best possible lz77 is done, and then based
    752 on that data, block splitting is done.
    753 Parameters: see description of the ZopfliDeflate function.
    754 */
    755 static void DeflateSplittingLast(const ZopfliOptions* options,
    756                                  int btype, int final,
    757                                  const unsigned char* in,
    758                                  size_t instart, size_t inend,
    759                                  unsigned char* bp,
    760                                  unsigned char** out, size_t* outsize) {
    761   size_t i;
    762   ZopfliBlockState s;
    763   ZopfliLZ77Store store;
    764   size_t* splitpoints = 0;
    765   size_t npoints = 0;
    766 
    767   if (btype == 0) {
    768     /* This function only supports LZ77 compression. DeflateSplittingFirst
    769        supports the special case of noncompressed data. Punt it to that one. */
    770     DeflateSplittingFirst(options, btype, final,
    771                           in, instart, inend,
    772                           bp, out, outsize);
    773   }
    774   assert(btype == 1 || btype == 2);
    775 
    776   ZopfliInitLZ77Store(&store);
    777 
    778   s.options = options;
    779   s.blockstart = instart;
    780   s.blockend = inend;
    781 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    782   s.lmc = (ZopfliLongestMatchCache*)malloc(sizeof(ZopfliLongestMatchCache));
    783   ZopfliInitCache(inend - instart, s.lmc);
    784 #endif
    785 
    786   if (btype == 2) {
    787     ZopfliLZ77Optimal(&s, in, instart, inend, &store);
    788   } else {
    789     assert (btype == 1);
    790     ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
    791   }
    792 
    793   if (btype == 1) {
    794     /* If all blocks are fixed tree, splitting into separate blocks only
    795     increases the total size. Leave npoints at 0, this represents 1 block. */
    796   } else {
    797     ZopfliBlockSplitLZ77(options, store.litlens, store.dists, store.size,
    798                          options->blocksplittingmax, &splitpoints, &npoints);
    799   }
    800 
    801   for (i = 0; i <= npoints; i++) {
    802     size_t start = i == 0 ? 0 : splitpoints[i - 1];
    803     size_t end = i == npoints ? store.size : splitpoints[i];
    804     AddLZ77Block(options, btype, i == npoints && final,
    805                  store.litlens, store.dists, start, end, 0,
    806                  bp, out, outsize);
    807   }
    808 
    809 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    810   ZopfliCleanCache(s.lmc);
    811   free(s.lmc);
    812 #endif
    813 
    814   ZopfliCleanLZ77Store(&store);
    815   free(splitpoints);
    816 }
    817 
    818 /*
    819 Deflate a part, to allow ZopfliDeflate() to use multiple master blocks if
    820 needed.
    821 It is possible to call this function multiple times in a row, shifting
    822 instart and inend to next bytes of the data. If instart is larger than 0, then
    823 previous bytes are used as the initial dictionary for LZ77.
    824 This function will usually output multiple deflate blocks. If final is 1, then
    825 the final bit will be set on the last block.
    826 */
    827 void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
    828                        const unsigned char* in, size_t instart, size_t inend,
    829                        unsigned char* bp, unsigned char** out,
    830                        size_t* outsize) {
    831   if (options->blocksplitting) {
    832     if (options->blocksplittinglast) {
    833       DeflateSplittingLast(options, btype, final, in, instart, inend,
    834                            bp, out, outsize);
    835     } else {
    836       DeflateSplittingFirst(options, btype, final, in, instart, inend,
    837                             bp, out, outsize);
    838     }
    839   } else {
    840     DeflateBlock(options, btype, final, in, instart, inend, bp, out, outsize);
    841   }
    842 }
    843 
    844 void ZopfliDeflate(const ZopfliOptions* options, int btype, int final,
    845                    const unsigned char* in, size_t insize,
    846                    unsigned char* bp, unsigned char** out, size_t* outsize) {
    847 #if ZOPFLI_MASTER_BLOCK_SIZE == 0
    848   ZopfliDeflatePart(options, btype, final, in, 0, insize, bp, out, outsize);
    849 #else
    850   size_t i = 0;
    851   while (i < insize) {
    852     int masterfinal = (i + ZOPFLI_MASTER_BLOCK_SIZE >= insize);
    853     int final2 = final && masterfinal;
    854     size_t size = masterfinal ? insize - i : ZOPFLI_MASTER_BLOCK_SIZE;
    855     ZopfliDeflatePart(options, btype, final2,
    856                       in, i, i + size, bp, out, outsize);
    857     i += size;
    858   }
    859 #endif
    860   if (options->verbose) {
    861     fprintf(stderr,
    862             "Original Size: %d, Deflate: %d, Compression: %f%% Removed\n",
    863             (int)insize, (int)*outsize,
    864             100.0 * (double)(insize - *outsize) / (double)insize);
    865   }
    866 }
    867