Home | History | Annotate | Download | only in zopfli
      1 /*
      2 Copyright 2011 Google Inc. All Rights Reserved.
      3 
      4 Licensed under the Apache License, Version 2.0 (the "License");
      5 you may not use this file except in compliance with the License.
      6 You may obtain a copy of the License at
      7 
      8     http://www.apache.org/licenses/LICENSE-2.0
      9 
     10 Unless required by applicable law or agreed to in writing, software
     11 distributed under the License is distributed on an "AS IS" BASIS,
     12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 See the License for the specific language governing permissions and
     14 limitations under the License.
     15 
     16 Author: lode.vandevenne (at) gmail.com (Lode Vandevenne)
     17 Author: jyrki.alakuijala (at) gmail.com (Jyrki Alakuijala)
     18 */
     19 
     20 /*
     21 Bounded package merge algorithm, based on the paper
     22 "A Fast and Space-Economical Algorithm for Length-Limited Coding
     23 Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
     24 */
     25 
     26 #include "katajainen.h"
     27 #include <assert.h>
     28 #include <stdlib.h>
     29 
     30 typedef struct Node Node;
     31 
     32 /*
     33 Nodes forming chains. Also used to represent leaves.
     34 */
     35 struct Node {
     36   size_t weight;  /* Total weight (symbol count) of this chain. */
     37   Node* tail;  /* Previous node(s) of this chain, or 0 if none. */
     38   int count;  /* Leaf symbol index, or number of leaves before this chain. */
     39   char inuse;  /* Tracking for garbage collection. */
     40 };
     41 
     42 /*
     43 Memory pool for nodes.
     44 */
     45 typedef struct NodePool {
     46   Node* nodes;  /* The pool. */
     47   Node* next;  /* Pointer to a possibly free node in the pool. */
     48   int size;  /* Size of the memory pool. */
     49 } NodePool;
     50 
     51 /*
     52 Initializes a chain node with the given values and marks it as in use.
     53 */
     54 static void InitNode(size_t weight, int count, Node* tail, Node* node) {
     55   node->weight = weight;
     56   node->count = count;
     57   node->tail = tail;
     58   node->inuse = 1;
     59 }
     60 
     61 /*
     62 Finds a free location in the memory pool. Performs garbage collection if needed.
     63 lists: If given, used to mark in-use nodes during garbage collection.
     64 maxbits: Size of lists.
     65 pool: Memory pool to get free node from.
     66 */
     67 static Node* GetFreeNode(Node* (*lists)[2], int maxbits, NodePool* pool) {
     68   for (;;) {
     69     if (pool->next >= &pool->nodes[pool->size]) {
     70       /* Garbage collection. */
     71       int i;
     72       for (i = 0; i < pool->size; i++) {
     73         pool->nodes[i].inuse = 0;
     74       }
     75       if (lists) {
     76         for (i = 0; i < maxbits * 2; i++) {
     77           Node* node;
     78           for (node = lists[i / 2][i % 2]; node; node = node->tail) {
     79             node->inuse = 1;
     80           }
     81         }
     82       }
     83       pool->next = &pool->nodes[0];
     84     }
     85     if (!pool->next->inuse) break;  /* Found one. */
     86     pool->next++;
     87   }
     88   return pool->next++;
     89 }
     90 
     91 
     92 /*
     93 Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
     94 new chain is, depending on the weights, a leaf or a combination of two chains
     95 from the previous list.
     96 lists: The lists of chains.
     97 maxbits: Number of lists.
     98 leaves: The leaves, one per symbol.
     99 numsymbols: Number of leaves.
    100 pool: the node memory pool.
    101 index: The index of the list in which a new chain or leaf is required.
    102 final: Whether this is the last time this function is called. If it is then it
    103   is no more needed to recursively call self.
    104 */
    105 static void BoundaryPM(Node* (*lists)[2], int maxbits,
    106     Node* leaves, int numsymbols, NodePool* pool, int index, char final) {
    107   Node* newchain;
    108   Node* oldchain;
    109   int lastcount = lists[index][1]->count;  /* Count of last chain of list. */
    110 
    111   if (index == 0 && lastcount >= numsymbols) return;
    112 
    113   newchain = GetFreeNode(lists, maxbits, pool);
    114   oldchain = lists[index][1];
    115 
    116   /* These are set up before the recursive calls below, so that there is a list
    117   pointing to the new node, to let the garbage collection know it's in use. */
    118   lists[index][0] = oldchain;
    119   lists[index][1] = newchain;
    120 
    121   if (index == 0) {
    122     /* New leaf node in list 0. */
    123     InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain);
    124   } else {
    125     size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
    126     if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
    127       /* New leaf inserted in list, so count is incremented. */
    128       InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail,
    129           newchain);
    130     } else {
    131       InitNode(sum, lastcount, lists[index - 1][1], newchain);
    132       if (!final) {
    133         /* Two lookahead chains of previous list used up, create new ones. */
    134         BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0);
    135         BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0);
    136       }
    137     }
    138   }
    139 }
    140 
    141 /*
    142 Initializes each list with as lookahead chains the two leaves with lowest
    143 weights.
    144 */
    145 static void InitLists(
    146     NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) {
    147   int i;
    148   Node* node0 = GetFreeNode(0, maxbits, pool);
    149   Node* node1 = GetFreeNode(0, maxbits, pool);
    150   InitNode(leaves[0].weight, 1, 0, node0);
    151   InitNode(leaves[1].weight, 2, 0, node1);
    152   for (i = 0; i < maxbits; i++) {
    153     lists[i][0] = node0;
    154     lists[i][1] = node1;
    155   }
    156 }
    157 
    158 /*
    159 Converts result of boundary package-merge to the bitlengths. The result in the
    160 last chain of the last list contains the amount of active leaves in each list.
    161 chain: Chain to extract the bit length from (last chain from last list).
    162 */
    163 static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) {
    164   Node* node;
    165   for (node = chain; node; node = node->tail) {
    166     int i;
    167     for (i = 0; i < node->count; i++) {
    168       bitlengths[leaves[i].count]++;
    169     }
    170   }
    171 }
    172 
    173 /*
    174 Comparator for sorting the leaves. Has the function signature for qsort.
    175 */
    176 static int LeafComparator(const void* a, const void* b) {
    177   return ((const Node*)a)->weight - ((const Node*)b)->weight;
    178 }
    179 
    180 int ZopfliLengthLimitedCodeLengths(
    181     const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) {
    182   NodePool pool;
    183   int i;
    184   int numsymbols = 0;  /* Amount of symbols with frequency > 0. */
    185   int numBoundaryPMRuns;
    186 
    187   /* Array of lists of chains. Each list requires only two lookahead chains at
    188   a time, so each list is a array of two Node*'s. */
    189   Node* (*lists)[2];
    190 
    191   /* One leaf per symbol. Only numsymbols leaves will be used. */
    192   Node* leaves = (Node*)malloc(n * sizeof(*leaves));
    193 
    194   /* Initialize all bitlengths at 0. */
    195   for (i = 0; i < n; i++) {
    196     bitlengths[i] = 0;
    197   }
    198 
    199   /* Count used symbols and place them in the leaves. */
    200   for (i = 0; i < n; i++) {
    201     if (frequencies[i]) {
    202       leaves[numsymbols].weight = frequencies[i];
    203       leaves[numsymbols].count = i;  /* Index of symbol this leaf represents. */
    204       numsymbols++;
    205     }
    206   }
    207 
    208   /* Check special cases and error conditions. */
    209   if ((1 << maxbits) < numsymbols) {
    210     free(leaves);
    211     return 1;  /* Error, too few maxbits to represent symbols. */
    212   }
    213   if (numsymbols == 0) {
    214     free(leaves);
    215     return 0;  /* No symbols at all. OK. */
    216   }
    217   if (numsymbols == 1) {
    218     bitlengths[leaves[0].count] = 1;
    219     free(leaves);
    220     return 0;  /* Only one symbol, give it bitlength 1, not 0. OK. */
    221   }
    222 
    223   /* Sort the leaves from lightest to heaviest. */
    224   qsort(leaves, numsymbols, sizeof(Node), LeafComparator);
    225 
    226   /* Initialize node memory pool. */
    227   pool.size = 2 * maxbits * (maxbits + 1);
    228   pool.nodes = (Node*)malloc(pool.size * sizeof(*pool.nodes));
    229   pool.next = pool.nodes;
    230   for (i = 0; i < pool.size; i++) {
    231     pool.nodes[i].inuse = 0;
    232   }
    233 
    234   lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists));
    235   InitLists(&pool, leaves, maxbits, lists);
    236 
    237   /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two
    238   are already created in the initialization. Each BoundaryPM run creates one. */
    239   numBoundaryPMRuns = 2 * numsymbols - 4;
    240   for (i = 0; i < numBoundaryPMRuns; i++) {
    241     char final = i == numBoundaryPMRuns - 1;
    242     BoundaryPM(lists, maxbits, leaves, numsymbols, &pool, maxbits - 1, final);
    243   }
    244 
    245   ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths);
    246 
    247   free(lists);
    248   free(leaves);
    249   free(pool.nodes);
    250   return 0;  /* OK. */
    251 }
    252