Home | History | Annotate | Download | only in examples
      1 /* fitblk.c: example of fitting compressed output to a specified size
      2    Not copyrighted -- provided to the public domain
      3    Version 1.1  25 November 2004  Mark Adler */
      4 
      5 /* Version history:
      6    1.0  24 Nov 2004  First version
      7    1.1  25 Nov 2004  Change deflateInit2() to deflateInit()
      8                      Use fixed-size, stack-allocated raw buffers
      9                      Simplify code moving compression to subroutines
     10                      Use assert() for internal errors
     11                      Add detailed description of approach
     12  */
     13 
     14 /* Approach to just fitting a requested compressed size:
     15 
     16    fitblk performs three compression passes on a portion of the input
     17    data in order to determine how much of that input will compress to
     18    nearly the requested output block size.  The first pass generates
     19    enough deflate blocks to produce output to fill the requested
     20    output size plus a specfied excess amount (see the EXCESS define
     21    below).  The last deflate block may go quite a bit past that, but
     22    is discarded.  The second pass decompresses and recompresses just
     23    the compressed data that fit in the requested plus excess sized
     24    buffer.  The deflate process is terminated after that amount of
     25    input, which is less than the amount consumed on the first pass.
     26    The last deflate block of the result will be of a comparable size
     27    to the final product, so that the header for that deflate block and
     28    the compression ratio for that block will be about the same as in
     29    the final product.  The third compression pass decompresses the
     30    result of the second step, but only the compressed data up to the
     31    requested size minus an amount to allow the compressed stream to
     32    complete (see the MARGIN define below).  That will result in a
     33    final compressed stream whose length is less than or equal to the
     34    requested size.  Assuming sufficient input and a requested size
     35    greater than a few hundred bytes, the shortfall will typically be
     36    less than ten bytes.
     37 
     38    If the input is short enough that the first compression completes
     39    before filling the requested output size, then that compressed
     40    stream is return with no recompression.
     41 
     42    EXCESS is chosen to be just greater than the shortfall seen in a
     43    two pass approach similar to the above.  That shortfall is due to
     44    the last deflate block compressing more efficiently with a smaller
     45    header on the second pass.  EXCESS is set to be large enough so
     46    that there is enough uncompressed data for the second pass to fill
     47    out the requested size, and small enough so that the final deflate
     48    block of the second pass will be close in size to the final deflate
     49    block of the third and final pass.  MARGIN is chosen to be just
     50    large enough to assure that the final compression has enough room
     51    to complete in all cases.
     52  */
     53 
     54 #include <stdio.h>
     55 #include <stdlib.h>
     56 #include <assert.h>
     57 #include "zlib.h"
     58 
     59 #define local static
     60 
     61 /* print nastygram and leave */
     62 local void quit(char *why)
     63 {
     64     fprintf(stderr, "fitblk abort: %s\n", why);
     65     exit(1);
     66 }
     67 
     68 #define RAWLEN 4096    /* intermediate uncompressed buffer size */
     69 
     70 /* compress from file to def until provided buffer is full or end of
     71    input reached; return last deflate() return value, or Z_ERRNO if
     72    there was read error on the file */
     73 local int partcompress(FILE *in, z_streamp def)
     74 {
     75     int ret, flush;
     76     unsigned char raw[RAWLEN];
     77 
     78     flush = Z_NO_FLUSH;
     79     do {
     80         def->avail_in = fread(raw, 1, RAWLEN, in);
     81         if (ferror(in))
     82             return Z_ERRNO;
     83         def->next_in = raw;
     84         if (feof(in))
     85             flush = Z_FINISH;
     86         ret = deflate(def, flush);
     87         assert(ret != Z_STREAM_ERROR);
     88     } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
     89     return ret;
     90 }
     91 
     92 /* recompress from inf's input to def's output; the input for inf and
     93    the output for def are set in those structures before calling;
     94    return last deflate() return value, or Z_MEM_ERROR if inflate()
     95    was not able to allocate enough memory when it needed to */
     96 local int recompress(z_streamp inf, z_streamp def)
     97 {
     98     int ret, flush;
     99     unsigned char raw[RAWLEN];
    100 
    101     flush = Z_NO_FLUSH;
    102     do {
    103         /* decompress */
    104         inf->avail_out = RAWLEN;
    105         inf->next_out = raw;
    106         ret = inflate(inf, Z_NO_FLUSH);
    107         assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
    108                ret != Z_NEED_DICT);
    109         if (ret == Z_MEM_ERROR)
    110             return ret;
    111 
    112         /* compress what was decompresed until done or no room */
    113         def->avail_in = RAWLEN - inf->avail_out;
    114         def->next_in = raw;
    115         if (inf->avail_out != 0)
    116             flush = Z_FINISH;
    117         ret = deflate(def, flush);
    118         assert(ret != Z_STREAM_ERROR);
    119     } while (ret != Z_STREAM_END && def->avail_out != 0);
    120     return ret;
    121 }
    122 
    123 #define EXCESS 256      /* empirically determined stream overage */
    124 #define MARGIN 8        /* amount to back off for completion */
    125 
    126 /* compress from stdin to fixed-size block on stdout */
    127 int main(int argc, char **argv)
    128 {
    129     int ret;                /* return code */
    130     unsigned size;          /* requested fixed output block size */
    131     unsigned have;          /* bytes written by deflate() call */
    132     unsigned char *blk;     /* intermediate and final stream */
    133     unsigned char *tmp;     /* close to desired size stream */
    134     z_stream def, inf;      /* zlib deflate and inflate states */
    135 
    136     /* get requested output size */
    137     if (argc != 2)
    138         quit("need one argument: size of output block");
    139     ret = strtol(argv[1], argv + 1, 10);
    140     if (argv[1][0] != 0)
    141         quit("argument must be a number");
    142     if (ret < 8)            /* 8 is minimum zlib stream size */
    143         quit("need positive size of 8 or greater");
    144     size = (unsigned)ret;
    145 
    146     /* allocate memory for buffers and compression engine */
    147     blk = malloc(size + EXCESS);
    148     def.zalloc = Z_NULL;
    149     def.zfree = Z_NULL;
    150     def.opaque = Z_NULL;
    151     ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
    152     if (ret != Z_OK || blk == NULL)
    153         quit("out of memory");
    154 
    155     /* compress from stdin until output full, or no more input */
    156     def.avail_out = size + EXCESS;
    157     def.next_out = blk;
    158     ret = partcompress(stdin, &def);
    159     if (ret == Z_ERRNO)
    160         quit("error reading input");
    161 
    162     /* if it all fit, then size was undersubscribed -- done! */
    163     if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
    164         /* write block to stdout */
    165         have = size + EXCESS - def.avail_out;
    166         if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
    167             quit("error writing output");
    168 
    169         /* clean up and print results to stderr */
    170         ret = deflateEnd(&def);
    171         assert(ret != Z_STREAM_ERROR);
    172         free(blk);
    173         fprintf(stderr,
    174                 "%u bytes unused out of %u requested (all input)\n",
    175                 size - have, size);
    176         return 0;
    177     }
    178 
    179     /* it didn't all fit -- set up for recompression */
    180     inf.zalloc = Z_NULL;
    181     inf.zfree = Z_NULL;
    182     inf.opaque = Z_NULL;
    183     inf.avail_in = 0;
    184     inf.next_in = Z_NULL;
    185     ret = inflateInit(&inf);
    186     tmp = malloc(size + EXCESS);
    187     if (ret != Z_OK || tmp == NULL)
    188         quit("out of memory");
    189     ret = deflateReset(&def);
    190     assert(ret != Z_STREAM_ERROR);
    191 
    192     /* do first recompression close to the right amount */
    193     inf.avail_in = size + EXCESS;
    194     inf.next_in = blk;
    195     def.avail_out = size + EXCESS;
    196     def.next_out = tmp;
    197     ret = recompress(&inf, &def);
    198     if (ret == Z_MEM_ERROR)
    199         quit("out of memory");
    200 
    201     /* set up for next reocmpression */
    202     ret = inflateReset(&inf);
    203     assert(ret != Z_STREAM_ERROR);
    204     ret = deflateReset(&def);
    205     assert(ret != Z_STREAM_ERROR);
    206 
    207     /* do second and final recompression (third compression) */
    208     inf.avail_in = size - MARGIN;   /* assure stream will complete */
    209     inf.next_in = tmp;
    210     def.avail_out = size;
    211     def.next_out = blk;
    212     ret = recompress(&inf, &def);
    213     if (ret == Z_MEM_ERROR)
    214         quit("out of memory");
    215     assert(ret == Z_STREAM_END);    /* otherwise MARGIN too small */
    216 
    217     /* done -- write block to stdout */
    218     have = size - def.avail_out;
    219     if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
    220         quit("error writing output");
    221 
    222     /* clean up and print results to stderr */
    223     free(tmp);
    224     ret = inflateEnd(&inf);
    225     assert(ret != Z_STREAM_ERROR);
    226     ret = deflateEnd(&def);
    227     assert(ret != Z_STREAM_ERROR);
    228     free(blk);
    229     fprintf(stderr,
    230             "%u bytes unused out of %u requested (%lu input)\n",
    231             size - have, size, def.total_in);
    232     return 0;
    233 }
    234