1 /* 2 Copyright 2011 Google Inc. All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 16 Author: lode.vandevenne (at) gmail.com (Lode Vandevenne) 17 Author: jyrki.alakuijala (at) gmail.com (Jyrki Alakuijala) 18 */ 19 20 /* 21 Bounded package merge algorithm, based on the paper 22 "A Fast and Space-Economical Algorithm for Length-Limited Coding 23 Jyrki Katajainen, Alistair Moffat, Andrew Turpin". 24 */ 25 26 #include "katajainen.h" 27 #include <assert.h> 28 #include <stdlib.h> 29 30 typedef struct Node Node; 31 32 /* 33 Nodes forming chains. Also used to represent leaves. 34 */ 35 struct Node { 36 size_t weight; /* Total weight (symbol count) of this chain. */ 37 Node* tail; /* Previous node(s) of this chain, or 0 if none. */ 38 int count; /* Leaf symbol index, or number of leaves before this chain. */ 39 char inuse; /* Tracking for garbage collection. */ 40 }; 41 42 /* 43 Memory pool for nodes. 44 */ 45 typedef struct NodePool { 46 Node* nodes; /* The pool. */ 47 Node* next; /* Pointer to a possibly free node in the pool. */ 48 int size; /* Size of the memory pool. */ 49 } NodePool; 50 51 /* 52 Initializes a chain node with the given values and marks it as in use. 53 */ 54 static void InitNode(size_t weight, int count, Node* tail, Node* node) { 55 node->weight = weight; 56 node->count = count; 57 node->tail = tail; 58 node->inuse = 1; 59 } 60 61 /* 62 Finds a free location in the memory pool. Performs garbage collection if needed. 63 lists: If given, used to mark in-use nodes during garbage collection. 64 maxbits: Size of lists. 65 pool: Memory pool to get free node from. 66 */ 67 static Node* GetFreeNode(Node* (*lists)[2], int maxbits, NodePool* pool) { 68 for (;;) { 69 if (pool->next >= &pool->nodes[pool->size]) { 70 /* Garbage collection. */ 71 int i; 72 for (i = 0; i < pool->size; i++) { 73 pool->nodes[i].inuse = 0; 74 } 75 if (lists) { 76 for (i = 0; i < maxbits * 2; i++) { 77 Node* node; 78 for (node = lists[i / 2][i % 2]; node; node = node->tail) { 79 node->inuse = 1; 80 } 81 } 82 } 83 pool->next = &pool->nodes[0]; 84 } 85 if (!pool->next->inuse) break; /* Found one. */ 86 pool->next++; 87 } 88 return pool->next++; 89 } 90 91 92 /* 93 Performs a Boundary Package-Merge step. Puts a new chain in the given list. The 94 new chain is, depending on the weights, a leaf or a combination of two chains 95 from the previous list. 96 lists: The lists of chains. 97 maxbits: Number of lists. 98 leaves: The leaves, one per symbol. 99 numsymbols: Number of leaves. 100 pool: the node memory pool. 101 index: The index of the list in which a new chain or leaf is required. 102 final: Whether this is the last time this function is called. If it is then it 103 is no more needed to recursively call self. 104 */ 105 static void BoundaryPM(Node* (*lists)[2], int maxbits, 106 Node* leaves, int numsymbols, NodePool* pool, int index, char final) { 107 Node* newchain; 108 Node* oldchain; 109 int lastcount = lists[index][1]->count; /* Count of last chain of list. */ 110 111 if (index == 0 && lastcount >= numsymbols) return; 112 113 newchain = GetFreeNode(lists, maxbits, pool); 114 oldchain = lists[index][1]; 115 116 /* These are set up before the recursive calls below, so that there is a list 117 pointing to the new node, to let the garbage collection know it's in use. */ 118 lists[index][0] = oldchain; 119 lists[index][1] = newchain; 120 121 if (index == 0) { 122 /* New leaf node in list 0. */ 123 InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain); 124 } else { 125 size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight; 126 if (lastcount < numsymbols && sum > leaves[lastcount].weight) { 127 /* New leaf inserted in list, so count is incremented. */ 128 InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail, 129 newchain); 130 } else { 131 InitNode(sum, lastcount, lists[index - 1][1], newchain); 132 if (!final) { 133 /* Two lookahead chains of previous list used up, create new ones. */ 134 BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0); 135 BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0); 136 } 137 } 138 } 139 } 140 141 /* 142 Initializes each list with as lookahead chains the two leaves with lowest 143 weights. 144 */ 145 static void InitLists( 146 NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) { 147 int i; 148 Node* node0 = GetFreeNode(0, maxbits, pool); 149 Node* node1 = GetFreeNode(0, maxbits, pool); 150 InitNode(leaves[0].weight, 1, 0, node0); 151 InitNode(leaves[1].weight, 2, 0, node1); 152 for (i = 0; i < maxbits; i++) { 153 lists[i][0] = node0; 154 lists[i][1] = node1; 155 } 156 } 157 158 /* 159 Converts result of boundary package-merge to the bitlengths. The result in the 160 last chain of the last list contains the amount of active leaves in each list. 161 chain: Chain to extract the bit length from (last chain from last list). 162 */ 163 static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) { 164 Node* node; 165 for (node = chain; node; node = node->tail) { 166 int i; 167 for (i = 0; i < node->count; i++) { 168 bitlengths[leaves[i].count]++; 169 } 170 } 171 } 172 173 /* 174 Comparator for sorting the leaves. Has the function signature for qsort. 175 */ 176 static int LeafComparator(const void* a, const void* b) { 177 return ((const Node*)a)->weight - ((const Node*)b)->weight; 178 } 179 180 int ZopfliLengthLimitedCodeLengths( 181 const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) { 182 NodePool pool; 183 int i; 184 int numsymbols = 0; /* Amount of symbols with frequency > 0. */ 185 int numBoundaryPMRuns; 186 187 /* Array of lists of chains. Each list requires only two lookahead chains at 188 a time, so each list is a array of two Node*'s. */ 189 Node* (*lists)[2]; 190 191 /* One leaf per symbol. Only numsymbols leaves will be used. */ 192 Node* leaves = (Node*)malloc(n * sizeof(*leaves)); 193 194 /* Initialize all bitlengths at 0. */ 195 for (i = 0; i < n; i++) { 196 bitlengths[i] = 0; 197 } 198 199 /* Count used symbols and place them in the leaves. */ 200 for (i = 0; i < n; i++) { 201 if (frequencies[i]) { 202 leaves[numsymbols].weight = frequencies[i]; 203 leaves[numsymbols].count = i; /* Index of symbol this leaf represents. */ 204 numsymbols++; 205 } 206 } 207 208 /* Check special cases and error conditions. */ 209 if ((1 << maxbits) < numsymbols) { 210 free(leaves); 211 return 1; /* Error, too few maxbits to represent symbols. */ 212 } 213 if (numsymbols == 0) { 214 free(leaves); 215 return 0; /* No symbols at all. OK. */ 216 } 217 if (numsymbols == 1) { 218 bitlengths[leaves[0].count] = 1; 219 free(leaves); 220 return 0; /* Only one symbol, give it bitlength 1, not 0. OK. */ 221 } 222 223 /* Sort the leaves from lightest to heaviest. */ 224 qsort(leaves, numsymbols, sizeof(Node), LeafComparator); 225 226 /* Initialize node memory pool. */ 227 pool.size = 2 * maxbits * (maxbits + 1); 228 pool.nodes = (Node*)malloc(pool.size * sizeof(*pool.nodes)); 229 pool.next = pool.nodes; 230 for (i = 0; i < pool.size; i++) { 231 pool.nodes[i].inuse = 0; 232 } 233 234 lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists)); 235 InitLists(&pool, leaves, maxbits, lists); 236 237 /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two 238 are already created in the initialization. Each BoundaryPM run creates one. */ 239 numBoundaryPMRuns = 2 * numsymbols - 4; 240 for (i = 0; i < numBoundaryPMRuns; i++) { 241 char final = i == numBoundaryPMRuns - 1; 242 BoundaryPM(lists, maxbits, leaves, numsymbols, &pool, maxbits - 1, final); 243 } 244 245 ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths); 246 247 free(lists); 248 free(leaves); 249 free(pool.nodes); 250 return 0; /* OK. */ 251 } 252