Home | History | Annotate | Download | only in m_initimg
      1 /*************************************************************************
      2 * Name:        huffman.c
      3 * Author:      Marcus Geelnard
      4 * Description: Huffman coder/decoder implementation.
      5 * Reentrant:   Yes
      6 * $Id: huffman.c,v 1.6 2004/12/14 18:59:40 marcus256 Exp $
      7 *
      8 * This is a very straight forward implementation of a Huffman coder and
      9 * decoder.
     10 *
     11 * Primary flaws with this primitive implementation are:
     12 *  - Slow bit stream implementation
     13 *  - Fairly slow decoding (slower than encoding)
     14 *  - Maximum tree depth of 32 (the coder aborts if any code exceeds a
     15 *    size of 32 bits). If I'm not mistaking, this should not be possible
     16 *    unless the input buffer is larger than 2^32 bytes, which is not
     17 *    supported by the coder anyway (max 2^32-1 bytes can be specified with
     18 *    an unsigned 32-bit integer).
     19 *
     20 * On the other hand, there are a few advantages of this implementation:
     21 *  - The Huffman tree is stored in a very compact form, requiring only
     22 *    12 bits per symbol (for 8 bit symbols), meaning a maximum of 384
     23 *    bytes overhead.
     24 *  - The Huffman coder does quite well in situations where the data is
     25 *    noisy, in which case most dictionary based coders run into problems.
     26 *
     27 * Possible improvements (probably not worth it):
     28 *  - Partition the input data stream into blocks, where each block has
     29 *    its own Huffman tree. With variable block sizes, it should be
     30 *    possible to find locally optimal Huffman trees, which in turn could
     31 *    reduce the total size.
     32 *  - Allow for a few different predefined Huffman trees, which could
     33 *    reduce the size of a block even further.
     34 *-------------------------------------------------------------------------
     35 * Copyright (c) 2003-2004 Marcus Geelnard
     36 *
     37 * This software is provided 'as-is', without any express or implied
     38 * warranty. In no event will the authors be held liable for any damages
     39 * arising from the use of this software.
     40 *
     41 * Permission is granted to anyone to use this software for any purpose,
     42 * including commercial applications, and to alter it and redistribute it
     43 * freely, subject to the following restrictions:
     44 *
     45 * 1. The origin of this software must not be misrepresented; you must not
     46 *    claim that you wrote the original software. If you use this software
     47 *    in a product, an acknowledgment in the product documentation would
     48 *    be appreciated but is not required.
     49 *
     50 * 2. Altered source versions must be plainly marked as such, and must not
     51 *    be misrepresented as being the original software.
     52 *
     53 * 3. This notice may not be removed or altered from any source
     54 *    distribution.
     55 *
     56 * Marcus Geelnard
     57 * marcus.geelnard at home.se
     58 *************************************************************************/
     59 
     60 /* Modified May 06 by Julian Seward for use in Valgrind.
     61    - changed integral types to V's versions (UInt, UChar etc)
     62    - added initialisation in _Huffman_WriteBits, as described in
     63      comment in that function.
     64 */
     65 
     66 /*************************************************************************
     67 * Types used for Huffman coding
     68 *************************************************************************/
     69 
     70 typedef struct {
     71     UInt Symbol;
     72     UInt Count;
     73     UInt Code;
     74     UInt Bits;
     75 } huff_sym_t;
     76 
     77 typedef struct {
     78     UChar *BytePtr;
     79     UInt  BitPos;
     80 } huff_bitstream_t;
     81 
     82 
     83 
     84 /*************************************************************************
     85 *                           INTERNAL FUNCTIONS                           *
     86 *************************************************************************/
     87 
     88 
     89 /*************************************************************************
     90 * _Huffman_InitBitstream() - Initialize a bitstream.
     91 *************************************************************************/
     92 
     93 static void _Huffman_InitBitstream( huff_bitstream_t *stream,
     94     UChar *buf )
     95 {
     96     stream->BytePtr  = buf;
     97     stream->BitPos   = 0;
     98 }
     99 
    100 
    101 /*************************************************************************
    102 * _Huffman_ReadBits() - Read bits from a bitstream.
    103 *************************************************************************/
    104 
    105 static UInt _Huffman_ReadBits( huff_bitstream_t *stream,
    106     UInt bits )
    107 {
    108     UInt  x, bit, count;
    109     UChar *buf;
    110 
    111     /* Get current stream state */
    112     buf = stream->BytePtr;
    113     bit = stream->BitPos;
    114 
    115     /* Extract bits */
    116     x = 0;
    117     for( count = 0; count < bits; ++ count )
    118     {
    119         x = (x<<1) + (*buf & (1<<(7-bit)) ? 1 : 0);
    120         bit = (bit+1) & 7;
    121         if( !bit )
    122         {
    123             ++ buf;
    124         }
    125     }
    126 
    127     /* Store new stream state */
    128     stream->BytePtr = buf;
    129     stream->BitPos  = bit;
    130 
    131     return x;
    132 }
    133 
    134 
    135 /*************************************************************************
    136 * _Huffman_WriteBits() - Write bits to a bitstream.
    137 *************************************************************************/
    138 
    139 static void _Huffman_WriteBits( huff_bitstream_t *stream, UInt x,
    140     UInt bits )
    141 {
    142     UInt  bit, count;
    143     UChar *buf;
    144     UInt  mask;
    145 
    146     /* Get current stream state */
    147     buf = stream->BytePtr;
    148     bit = stream->BitPos;
    149 
    150     /* Append bits */
    151     mask = 1 << (bits-1);
    152     for( count = 0; count < bits; ++ count )
    153     {
    154         /* If we're starting a new byte, zero it out, so that the
    155            resulting byte sequence looks completely defined from
    156            Valgrind's point of view.  If this doesn't happen then the
    157            last byte in the stream may look partially undefined. */
    158         if (bit == 0)
    159            *buf = 0;
    160         *buf = (*buf & (0xff^(1<<(7-bit)))) +
    161                ((x & mask ? 1 : 0) << (7-bit));
    162         x <<= 1;
    163         bit = (bit+1) & 7;
    164         if( !bit )
    165         {
    166             ++ buf;
    167         }
    168     }
    169 
    170     /* Store new stream state */
    171     stream->BytePtr = buf;
    172     stream->BitPos  = bit;
    173 }
    174 
    175 
    176 /*************************************************************************
    177 * _Huffman_Hist() - Calculate (sorted) histogram for a block of data.
    178 *************************************************************************/
    179 
    180 static void _Huffman_Hist( UChar *in, huff_sym_t *sym,
    181     UInt size )
    182 {
    183     Int k, swaps;
    184     huff_sym_t tmp;
    185 
    186     /* Clear/init histogram */
    187     for( k = 0; k < 256; ++ k )
    188     {
    189         sym[k].Symbol = k;
    190         sym[k].Count  = 0;
    191         sym[k].Code   = 0;
    192         sym[k].Bits   = 0;
    193     }
    194 
    195     /* Build histogram */
    196     for( k = size; k; -- k )
    197     {
    198         sym[ *in ++ ].Count ++;
    199     }
    200 
    201     /* Sort histogram - most frequent symbol first (bubble sort) */
    202     do
    203     {
    204         swaps = 0;
    205         for( k = 0; k < 255; ++ k )
    206         {
    207             if( sym[k].Count < sym[k+1].Count )
    208             {
    209                 tmp      = sym[k];
    210                 sym[k]   = sym[k+1];
    211                 sym[k+1] = tmp;
    212                 swaps    = 1;
    213             }
    214         }
    215     }
    216     while( swaps );
    217 }
    218 
    219 
    220 /*************************************************************************
    221 * _Huffman_MakeTree() - Generate a Huffman tree.
    222 *************************************************************************/
    223 
    224 static void _Huffman_MakeTree( huff_sym_t *sym, huff_bitstream_t *stream,
    225     UInt code, UInt bits, UInt first,
    226     UInt last )
    227 {
    228     UInt k, size, size_a, size_b, last_a, first_b;
    229 
    230     /* Is this a leaf node? */
    231     if( first == last )
    232     {
    233         /* Append symbol to tree description */
    234         _Huffman_WriteBits( stream, 1, 1 );
    235         _Huffman_WriteBits( stream, sym[first].Symbol, 8 );
    236 
    237         /* Store code info in symbol array */
    238         sym[first].Code = code;
    239         sym[first].Bits = bits;
    240         return;
    241     }
    242     else
    243     {
    244         /* This was not a leaf node */
    245         _Huffman_WriteBits( stream, 0, 1 );
    246     }
    247 
    248     /* Total size of interval */
    249     size = 0;
    250     for( k = first; k <= last; ++ k )
    251     {
    252         size += sym[k].Count;
    253     }
    254 
    255     /* Find size of branch a */
    256     size_a = 0;
    257     for( k = first; size_a < ((size+1)>>1) && k < last; ++ k )
    258     {
    259         size_a += sym[k].Count;
    260     }
    261 
    262     /* Non-empty branch? */
    263     if( size_a > 0 )
    264     {
    265         /* Continue branching */
    266         _Huffman_WriteBits( stream, 1, 1 );
    267 
    268         /* Branch a cut in histogram */
    269         last_a  = k-1;
    270 
    271         /* Create branch a */
    272         _Huffman_MakeTree( sym, stream, (code<<1)+0, bits+1,
    273                                first, last_a );
    274     }
    275     else
    276     {
    277         /* This was an empty branch */
    278         _Huffman_WriteBits( stream, 0, 1 );
    279     }
    280 
    281     /* Size of branch b */
    282     size_b = size - size_a;
    283 
    284     /* Non-empty branch? */
    285     if( size_b > 0 )
    286     {
    287         /* Continue branching */
    288         _Huffman_WriteBits( stream, 1, 1 );
    289 
    290         /* Branch b cut in histogram */
    291         first_b = k;
    292 
    293         /* Create branch b */
    294         _Huffman_MakeTree( sym, stream, (code<<1)+1, bits+1,
    295                                first_b, last );
    296     }
    297     else
    298     {
    299         /* This was an empty branch */
    300         _Huffman_WriteBits( stream, 0, 1 );
    301     }
    302 }
    303 
    304 
    305 /*************************************************************************
    306 * _Huffman_RecoverTree() - Recover a Huffman tree from a bitstream.
    307 *************************************************************************/
    308 
    309 static void _Huffman_RecoverTree( huff_sym_t *sym,
    310     huff_bitstream_t *stream, UInt code, UInt bits,
    311     UInt *symnum )
    312 {
    313     UInt symbol;
    314 
    315     /* Is this a leaf node? */
    316     if( _Huffman_ReadBits( stream, 1 ) )
    317     {
    318         /* Get symbol from tree description */
    319         symbol = _Huffman_ReadBits( stream, 8 );
    320 
    321         /* Store code info in symbol array */
    322         sym[*symnum].Symbol = symbol;
    323         sym[*symnum].Code   = code;
    324         sym[*symnum].Bits   = bits;
    325 
    326         /* Increase symbol counter */
    327         *symnum = *symnum + 1;
    328 
    329         return;
    330     }
    331 
    332     /* Non-empty branch? */
    333     if( _Huffman_ReadBits( stream, 1 ) )
    334     {
    335         /* Create branch a */
    336         _Huffman_RecoverTree( sym, stream, (code<<1)+0, bits+1,
    337                               symnum );
    338     }
    339 
    340     /* Non-empty branch? */
    341     if( _Huffman_ReadBits( stream, 1 ) )
    342     {
    343         /* Create branch b */
    344         _Huffman_RecoverTree( sym, stream, (code<<1)+1, bits+1,
    345                               symnum );
    346     }
    347 }
    348 
    349 
    350 
    351 
    352 /*************************************************************************
    353 *                            PUBLIC FUNCTIONS                            *
    354 *************************************************************************/
    355 
    356 
    357 /*************************************************************************
    358 * Huffman_Compress() - Compress a block of data using a Huffman coder.
    359 *  in     - Input (uncompressed) buffer.
    360 *  out    - Output (compressed) buffer. This buffer must be 384 bytes
    361 *           larger than the input buffer.
    362 *  insize - Number of input bytes.
    363 * The function returns the size of the compressed data.
    364 *************************************************************************/
    365 static
    366 Int Huffman_Compress( UChar *in, UChar *out,
    367     UInt insize )
    368 {
    369     huff_sym_t       sym[ 256 ], tmp;
    370     huff_bitstream_t stream;
    371     UInt     k, total_bytes, swaps, symbol, last_symbol;
    372 
    373     /* Do we have anything to compress? */
    374     if( insize < 1 ) return 0;
    375 
    376     /* Initialize bitstream */
    377     _Huffman_InitBitstream( &stream, out );
    378 
    379     /* Calculate and sort histogram for input data */
    380     _Huffman_Hist( in, sym, insize );
    381 
    382     /* Find number of used symbols */
    383     for( last_symbol = 255; sym[last_symbol].Count == 0; -- last_symbol );
    384 
    385     /* Special case: In order to build a correct tree, we need at least
    386        two symbols (otherwise we get zero-bit representations). */
    387     if( last_symbol == 0 ) ++ last_symbol;
    388 
    389     /* Build Huffman tree */
    390     _Huffman_MakeTree( sym, &stream, 0, 0, 0, last_symbol );
    391 
    392     /* Was any code > 32 bits? (we do not handle that at present) */
    393     for( k = 0; k < 255; ++ k )
    394     {
    395         if( sym[k].Bits > 32 )
    396         {
    397             return 0;
    398         }
    399     }
    400 
    401     /* Sort histogram - first symbol first (bubble sort) */
    402     do
    403     {
    404         swaps = 0;
    405         for( k = 0; k < 255; ++ k )
    406         {
    407             if( sym[k].Symbol > sym[k+1].Symbol )
    408             {
    409                 tmp      = sym[k];
    410                 sym[k]   = sym[k+1];
    411                 sym[k+1] = tmp;
    412                 swaps    = 1;
    413             }
    414         }
    415     }
    416     while( swaps );
    417 
    418     /* Encode input stream */
    419     for( k = 0; k < insize; ++ k )
    420     {
    421         symbol = in[ k ];
    422         _Huffman_WriteBits( &stream, sym[symbol].Code,
    423                             sym[symbol].Bits );
    424     }
    425 
    426     /* Calculate size of output data */
    427     total_bytes = (Int)(stream.BytePtr - out);
    428     if( stream.BitPos > 0 )
    429     {
    430         ++ total_bytes;
    431     }
    432 
    433     return total_bytes;
    434 }
    435 
    436 
    437 
    438 /*************************************************************************
    439 * Huffman_Uncompress() - Uncompress a block of data using a Huffman
    440 * decoder.
    441 *  in      - Input (compressed) buffer.
    442 *  out     - Output (uncompressed) buffer. This buffer must be large
    443 *            enough to hold the uncompressed data.
    444 *  insize  - Number of input bytes.
    445 *  outsize - Number of output bytes.
    446 *************************************************************************/
    447 static
    448 void Huffman_Uncompress( UChar *in, UChar *out,
    449     UInt insize, UInt outsize )
    450 {
    451     huff_sym_t       sym[ 256 ], tmp;
    452     huff_bitstream_t stream;
    453     UInt     k, m, symbol_count, swaps;
    454     UChar    *buf;
    455     UInt     bits, delta_bits, new_bits, code;
    456 
    457     /* Do we have anything to decompress? */
    458     if( insize < 1 ) return;
    459 
    460     /* Initialize bitstream */
    461     _Huffman_InitBitstream( &stream, in );
    462 
    463     /* Clear tree/histogram */
    464     for( k = 0; k < 256; ++ k )
    465     {
    466         sym[k].Bits = 0x7fffffff;
    467     }
    468 
    469     /* Recover Huffman tree */
    470     symbol_count = 0;
    471     _Huffman_RecoverTree( sym, &stream, 0, 0, &symbol_count );
    472 
    473     /* Sort histogram - shortest code first (bubble sort) */
    474     do
    475     {
    476         swaps = 0;
    477         for( k = 0; k < symbol_count-1; ++ k )
    478         {
    479             if( sym[k].Bits > sym[k+1].Bits )
    480             {
    481                 tmp      = sym[k];
    482                 sym[k]   = sym[k+1];
    483                 sym[k+1] = tmp;
    484                 swaps    = 1;
    485             }
    486         }
    487     }
    488     while( swaps );
    489 
    490     /* Decode input stream */
    491     buf = out;
    492     for( k = 0; k < outsize; ++ k )
    493     {
    494         /* Search tree for matching code */
    495         bits = 0;
    496         code = 0;
    497         for( m = 0; m < symbol_count; ++ m )
    498         {
    499             delta_bits = sym[m].Bits - bits;
    500             if( delta_bits )
    501             {
    502                 new_bits = _Huffman_ReadBits( &stream, delta_bits );
    503                 code = code | (new_bits << (32-bits-delta_bits));
    504                 bits = sym[m].Bits;
    505             }
    506             if( code == (sym[m].Code << (32-sym[m].Bits)) )
    507             {
    508                 *buf ++ = (UChar) sym[m].Symbol;
    509                 break;
    510             }
    511         }
    512     }
    513 }
    514