Home | History | Annotate | Download | only in zopfli
      1 /*
      2 Copyright 2011 Google Inc. All Rights Reserved.
      3 
      4 Licensed under the Apache License, Version 2.0 (the "License");
      5 you may not use this file except in compliance with the License.
      6 You may obtain a copy of the License at
      7 
      8     http://www.apache.org/licenses/LICENSE-2.0
      9 
     10 Unless required by applicable law or agreed to in writing, software
     11 distributed under the License is distributed on an "AS IS" BASIS,
     12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 See the License for the specific language governing permissions and
     14 limitations under the License.
     15 
     16 Author: lode.vandevenne (at) gmail.com (Lode Vandevenne)
     17 Author: jyrki.alakuijala (at) gmail.com (Jyrki Alakuijala)
     18 */
     19 
     20 #include "blocksplitter.h"
     21 
     22 #include <assert.h>
     23 #include <stdio.h>
     24 #include <stdlib.h>
     25 
     26 #include "deflate.h"
     27 #include "lz77.h"
     28 #include "squeeze.h"
     29 #include "tree.h"
     30 #include "util.h"
     31 
     32 /*
     33 The "f" for the FindMinimum function below.
     34 i: the current parameter of f(i)
     35 context: for your implementation
     36 */
     37 typedef double FindMinimumFun(size_t i, void* context);
     38 
     39 /*
     40 Finds minimum of function f(i) where is is of type size_t, f(i) is of type
     41 double, i is in range start-end (excluding end).
     42 */
     43 static size_t FindMinimum(FindMinimumFun f, void* context,
     44                           size_t start, size_t end) {
     45   if (end - start < 1024) {
     46     double best = ZOPFLI_LARGE_FLOAT;
     47     size_t result = start;
     48     size_t i;
     49     for (i = start; i < end; i++) {
     50       double v = f(i, context);
     51       if (v < best) {
     52         best = v;
     53         result = i;
     54       }
     55     }
     56     return result;
     57   } else {
     58     /* Try to find minimum faster by recursively checking multiple points. */
     59 #define NUM 9  /* Good value: 9. */
     60     size_t i;
     61     size_t p[NUM];
     62     double vp[NUM];
     63     size_t besti;
     64     double best;
     65     double lastbest = ZOPFLI_LARGE_FLOAT;
     66     size_t pos = start;
     67 
     68     for (;;) {
     69       if (end - start <= NUM) break;
     70 
     71       for (i = 0; i < NUM; i++) {
     72         p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
     73         vp[i] = f(p[i], context);
     74       }
     75       besti = 0;
     76       best = vp[0];
     77       for (i = 1; i < NUM; i++) {
     78         if (vp[i] < best) {
     79           best = vp[i];
     80           besti = i;
     81         }
     82       }
     83       if (best > lastbest) break;
     84 
     85       start = besti == 0 ? start : p[besti - 1];
     86       end = besti == NUM - 1 ? end : p[besti + 1];
     87 
     88       pos = p[besti];
     89       lastbest = best;
     90     }
     91     return pos;
     92 #undef NUM
     93   }
     94 }
     95 
     96 /*
     97 Returns estimated cost of a block in bits.  It includes the size to encode the
     98 tree and the size to encode all literal, length and distance symbols and their
     99 extra bits.
    100 
    101 litlens: lz77 lit/lengths
    102 dists: ll77 distances
    103 lstart: start of block
    104 lend: end of block (not inclusive)
    105 */
    106 static double EstimateCost(const unsigned short* litlens,
    107                            const unsigned short* dists,
    108                            size_t lstart, size_t lend) {
    109   return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
    110 }
    111 
    112 typedef struct SplitCostContext {
    113   const unsigned short* litlens;
    114   const unsigned short* dists;
    115   size_t llsize;
    116   size_t start;
    117   size_t end;
    118 } SplitCostContext;
    119 
    120 
    121 /*
    122 Gets the cost which is the sum of the cost of the left and the right section
    123 of the data.
    124 type: FindMinimumFun
    125 */
    126 static double SplitCost(size_t i, void* context) {
    127   SplitCostContext* c = (SplitCostContext*)context;
    128   return EstimateCost(c->litlens, c->dists, c->start, i) +
    129       EstimateCost(c->litlens, c->dists, i, c->end);
    130 }
    131 
    132 static void AddSorted(size_t value, size_t** out, size_t* outsize) {
    133   size_t i;
    134   ZOPFLI_APPEND_DATA(value, out, outsize);
    135   for (i = 0; i + 1 < *outsize; i++) {
    136     if ((*out)[i] > value) {
    137       size_t j;
    138       for (j = *outsize - 1; j > i; j--) {
    139         (*out)[j] = (*out)[j - 1];
    140       }
    141       (*out)[i] = value;
    142       break;
    143     }
    144   }
    145 }
    146 
    147 /*
    148 Prints the block split points as decimal and hex values in the terminal.
    149 */
    150 static void PrintBlockSplitPoints(const unsigned short* litlens,
    151                                   const unsigned short* dists,
    152                                   size_t llsize, const size_t* lz77splitpoints,
    153                                   size_t nlz77points) {
    154   size_t* splitpoints = 0;
    155   size_t npoints = 0;
    156   size_t i;
    157   /* The input is given as lz77 indices, but we want to see the uncompressed
    158   index values. */
    159   size_t pos = 0;
    160   if (nlz77points > 0) {
    161     for (i = 0; i < llsize; i++) {
    162       size_t length = dists[i] == 0 ? 1 : litlens[i];
    163       if (lz77splitpoints[npoints] == i) {
    164         ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
    165         if (npoints == nlz77points) break;
    166       }
    167       pos += length;
    168     }
    169   }
    170   assert(npoints == nlz77points);
    171 
    172   fprintf(stderr, "block split points: ");
    173   for (i = 0; i < npoints; i++) {
    174     fprintf(stderr, "%d ", (int)splitpoints[i]);
    175   }
    176   fprintf(stderr, "(hex:");
    177   for (i = 0; i < npoints; i++) {
    178     fprintf(stderr, " %x", (int)splitpoints[i]);
    179   }
    180   fprintf(stderr, ")\n");
    181 
    182   free(splitpoints);
    183 }
    184 
    185 /*
    186 Finds next block to try to split, the largest of the available ones.
    187 The largest is chosen to make sure that if only a limited amount of blocks is
    188 requested, their sizes are spread evenly.
    189 llsize: the size of the LL77 data, which is the size of the done array here.
    190 done: array indicating which blocks starting at that position are no longer
    191     splittable (splitting them increases rather than decreases cost).
    192 splitpoints: the splitpoints found so far.
    193 npoints: the amount of splitpoints found so far.
    194 lstart: output variable, giving start of block.
    195 lend: output variable, giving end of block.
    196 returns 1 if a block was found, 0 if no block found (all are done).
    197 */
    198 static int FindLargestSplittableBlock(
    199     size_t llsize, const unsigned char* done,
    200     const size_t* splitpoints, size_t npoints,
    201     size_t* lstart, size_t* lend) {
    202   size_t longest = 0;
    203   int found = 0;
    204   size_t i;
    205   for (i = 0; i <= npoints; i++) {
    206     size_t start = i == 0 ? 0 : splitpoints[i - 1];
    207     size_t end = i == npoints ? llsize - 1 : splitpoints[i];
    208     if (!done[start] && end - start > longest) {
    209       *lstart = start;
    210       *lend = end;
    211       found = 1;
    212       longest = end - start;
    213     }
    214   }
    215   return found;
    216 }
    217 
    218 void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
    219                           const unsigned short* litlens,
    220                           const unsigned short* dists,
    221                           size_t llsize, size_t maxblocks,
    222                           size_t** splitpoints, size_t* npoints) {
    223   size_t lstart, lend;
    224   size_t i;
    225   size_t llpos = 0;
    226   size_t numblocks = 1;
    227   unsigned char* done;
    228   double splitcost, origcost;
    229 
    230   if (llsize < 10) return;  /* This code fails on tiny files. */
    231 
    232   done = (unsigned char*)malloc(llsize);
    233   if (!done) exit(-1); /* Allocation failed. */
    234   for (i = 0; i < llsize; i++) done[i] = 0;
    235 
    236   lstart = 0;
    237   lend = llsize;
    238   for (;;) {
    239     SplitCostContext c;
    240 
    241     if (maxblocks > 0 && numblocks >= maxblocks) {
    242       break;
    243     }
    244 
    245     c.litlens = litlens;
    246     c.dists = dists;
    247     c.llsize = llsize;
    248     c.start = lstart;
    249     c.end = lend;
    250     assert(lstart < lend);
    251     llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
    252 
    253     assert(llpos > lstart);
    254     assert(llpos < lend);
    255 
    256     splitcost = EstimateCost(litlens, dists, lstart, llpos) +
    257         EstimateCost(litlens, dists, llpos, lend);
    258     origcost = EstimateCost(litlens, dists, lstart, lend);
    259 
    260     if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
    261       done[lstart] = 1;
    262     } else {
    263       AddSorted(llpos, splitpoints, npoints);
    264       numblocks++;
    265     }
    266 
    267     if (!FindLargestSplittableBlock(
    268         llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
    269       break;  /* No further split will probably reduce compression. */
    270     }
    271 
    272     if (lend - lstart < 10) {
    273       break;
    274     }
    275   }
    276 
    277   if (options->verbose) {
    278     PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
    279   }
    280 
    281   free(done);
    282 }
    283 
    284 void ZopfliBlockSplit(const ZopfliOptions* options,
    285                       const unsigned char* in, size_t instart, size_t inend,
    286                       size_t maxblocks, size_t** splitpoints, size_t* npoints) {
    287   size_t pos = 0;
    288   size_t i;
    289   ZopfliBlockState s;
    290   size_t* lz77splitpoints = 0;
    291   size_t nlz77points = 0;
    292   ZopfliLZ77Store store;
    293 
    294   ZopfliInitLZ77Store(&store);
    295 
    296   s.options = options;
    297   s.blockstart = instart;
    298   s.blockend = inend;
    299 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
    300   s.lmc = 0;
    301 #endif
    302 
    303   *npoints = 0;
    304   *splitpoints = 0;
    305 
    306   /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
    307   results in better blocks. */
    308   ZopfliLZ77Greedy(&s, in, instart, inend, &store);
    309 
    310   ZopfliBlockSplitLZ77(options,
    311                        store.litlens, store.dists, store.size, maxblocks,
    312                        &lz77splitpoints, &nlz77points);
    313 
    314   /* Convert LZ77 positions to positions in the uncompressed input. */
    315   pos = instart;
    316   if (nlz77points > 0) {
    317     for (i = 0; i < store.size; i++) {
    318       size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
    319       if (lz77splitpoints[*npoints] == i) {
    320         ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
    321         if (*npoints == nlz77points) break;
    322       }
    323       pos += length;
    324     }
    325   }
    326   assert(*npoints == nlz77points);
    327 
    328   free(lz77splitpoints);
    329   ZopfliCleanLZ77Store(&store);
    330 }
    331 
    332 void ZopfliBlockSplitSimple(const unsigned char* in,
    333                             size_t instart, size_t inend,
    334                             size_t blocksize,
    335                             size_t** splitpoints, size_t* npoints) {
    336   size_t i = instart;
    337   while (i < inend) {
    338     ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
    339     i += blocksize;
    340   }
    341   (void)in;
    342 }
    343