Home | History | Annotate | Download | only in src
      1 /* gzread.c -- zlib functions for reading gzip files
      2  * Copyright (C) 2004, 2005, 2010, 2011, 2012 Mark Adler
      3  * For conditions of distribution and use, see copyright notice in zlib.h
      4  */
      5 
      6 #include "gzguts.h"
      7 
      8 /* Local functions */
      9 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
     10 local int gz_avail OF((gz_statep));
     11 local int gz_look OF((gz_statep));
     12 local int gz_decomp OF((gz_statep));
     13 local int gz_fetch OF((gz_statep));
     14 local int gz_skip OF((gz_statep, z_off64_t));
     15 
     16 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
     17    state->fd, and update state->eof, state->err, and state->msg as appropriate.
     18    This function needs to loop on read(), since read() is not guaranteed to
     19    read the number of bytes requested, depending on the type of descriptor. */
     20 local int gz_load(state, buf, len, have)
     21     gz_statep state;
     22     unsigned char *buf;
     23     unsigned len;
     24     unsigned *have;
     25 {
     26     int ret;
     27 
     28     *have = 0;
     29     do {
     30         ret = read(state->fd, buf + *have, len - *have);
     31         if (ret <= 0)
     32             break;
     33         *have += ret;
     34     } while (*have < len);
     35     if (ret < 0) {
     36         gz_error(state, Z_ERRNO, zstrerror());
     37         return -1;
     38     }
     39     if (ret == 0)
     40         state->eof = 1;
     41     return 0;
     42 }
     43 
     44 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
     45    error, 0 otherwise.  Note that the eof flag is set when the end of the input
     46    file is reached, even though there may be unused data in the buffer.  Once
     47    that data has been used, no more attempts will be made to read the file.
     48    If strm->avail_in != 0, then the current data is moved to the beginning of
     49    the input buffer, and then the remainder of the buffer is loaded with the
     50    available data from the input file. */
     51 local int gz_avail(state)
     52     gz_statep state;
     53 {
     54     unsigned got;
     55     z_streamp strm = &(state->strm);
     56 
     57     if (state->err != Z_OK && state->err != Z_BUF_ERROR)
     58         return -1;
     59     if (state->eof == 0) {
     60         if (strm->avail_in) {       /* copy what's there to the start */
     61             unsigned char *p = state->in, *q = strm->next_in;
     62             unsigned n = strm->avail_in;
     63             do {
     64                 *p++ = *q++;
     65             } while (--n);
     66         }
     67         if (gz_load(state, state->in + strm->avail_in,
     68                     state->size - strm->avail_in, &got) == -1)
     69             return -1;
     70         strm->avail_in += got;
     71         strm->next_in = state->in;
     72     }
     73     return 0;
     74 }
     75 
     76 /* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
     77    If this is the first time in, allocate required memory.  state->how will be
     78    left unchanged if there is no more input data available, will be set to COPY
     79    if there is no gzip header and direct copying will be performed, or it will
     80    be set to GZIP for decompression.  If direct copying, then leftover input
     81    data from the input buffer will be copied to the output buffer.  In that
     82    case, all further file reads will be directly to either the output buffer or
     83    a user buffer.  If decompressing, the inflate state will be initialized.
     84    gz_look() will return 0 on success or -1 on failure. */
     85 local int gz_look(state)
     86     gz_statep state;
     87 {
     88     z_streamp strm = &(state->strm);
     89 
     90     /* allocate read buffers and inflate memory */
     91     if (state->size == 0) {
     92         /* allocate buffers */
     93         state->in = malloc(state->want);
     94         state->out = malloc(state->want << 1);
     95         if (state->in == NULL || state->out == NULL) {
     96             if (state->out != NULL)
     97                 free(state->out);
     98             if (state->in != NULL)
     99                 free(state->in);
    100             gz_error(state, Z_MEM_ERROR, "out of memory");
    101             return -1;
    102         }
    103         state->size = state->want;
    104 
    105         /* allocate inflate memory */
    106         state->strm.zalloc = Z_NULL;
    107         state->strm.zfree = Z_NULL;
    108         state->strm.opaque = Z_NULL;
    109         state->strm.avail_in = 0;
    110         state->strm.next_in = Z_NULL;
    111         if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
    112             free(state->out);
    113             free(state->in);
    114             state->size = 0;
    115             gz_error(state, Z_MEM_ERROR, "out of memory");
    116             return -1;
    117         }
    118     }
    119 
    120     /* get at least the magic bytes in the input buffer */
    121     if (strm->avail_in < 2) {
    122         if (gz_avail(state) == -1)
    123             return -1;
    124         if (strm->avail_in == 0)
    125             return 0;
    126     }
    127 
    128     /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
    129        a logical dilemma here when considering the case of a partially written
    130        gzip file, to wit, if a single 31 byte is written, then we cannot tell
    131        whether this is a single-byte file, or just a partially written gzip
    132        file -- for here we assume that if a gzip file is being written, then
    133        the header will be written in a single operation, so that reading a
    134        single byte is sufficient indication that it is not a gzip file) */
    135     if (strm->avail_in > 1 &&
    136             strm->next_in[0] == 31 && strm->next_in[1] == 139) {
    137         inflateReset(strm);
    138         state->how = GZIP;
    139         state->direct = 0;
    140         return 0;
    141     }
    142 
    143     /* no gzip header -- if we were decoding gzip before, then this is trailing
    144        garbage.  Ignore the trailing garbage and finish. */
    145     if (state->direct == 0) {
    146         strm->avail_in = 0;
    147         state->eof = 1;
    148         state->x.have = 0;
    149         return 0;
    150     }
    151 
    152     /* doing raw i/o, copy any leftover input to output -- this assumes that
    153        the output buffer is larger than the input buffer, which also assures
    154        space for gzungetc() */
    155     state->x.next = state->out;
    156     if (strm->avail_in) {
    157         memcpy(state->x.next, strm->next_in, strm->avail_in);
    158         state->x.have = strm->avail_in;
    159         strm->avail_in = 0;
    160     }
    161     state->how = COPY;
    162     state->direct = 1;
    163     return 0;
    164 }
    165 
    166 /* Decompress from input to the provided next_out and avail_out in the state.
    167    On return, state->x.have and state->x.next point to the just decompressed
    168    data.  If the gzip stream completes, state->how is reset to LOOK to look for
    169    the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
    170    on success, -1 on failure. */
    171 local int gz_decomp(state)
    172     gz_statep state;
    173 {
    174     int ret = Z_OK;
    175     unsigned had;
    176     z_streamp strm = &(state->strm);
    177 
    178     /* fill output buffer up to end of deflate stream */
    179     had = strm->avail_out;
    180     do {
    181         /* get more input for inflate() */
    182         if (strm->avail_in == 0 && gz_avail(state) == -1)
    183             return -1;
    184         if (strm->avail_in == 0) {
    185             gz_error(state, Z_BUF_ERROR, "unexpected end of file");
    186             break;
    187         }
    188 
    189         /* decompress and handle errors */
    190         ret = inflate(strm, Z_NO_FLUSH);
    191         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
    192             gz_error(state, Z_STREAM_ERROR,
    193                      "internal error: inflate stream corrupt");
    194             return -1;
    195         }
    196         if (ret == Z_MEM_ERROR) {
    197             gz_error(state, Z_MEM_ERROR, "out of memory");
    198             return -1;
    199         }
    200         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
    201             gz_error(state, Z_DATA_ERROR,
    202                      strm->msg == NULL ? "compressed data error" : strm->msg);
    203             return -1;
    204         }
    205     } while (strm->avail_out && ret != Z_STREAM_END);
    206 
    207     /* update available output */
    208     state->x.have = had - strm->avail_out;
    209     state->x.next = strm->next_out - state->x.have;
    210 
    211     /* if the gzip stream completed successfully, look for another */
    212     if (ret == Z_STREAM_END)
    213         state->how = LOOK;
    214 
    215     /* good decompression */
    216     return 0;
    217 }
    218 
    219 /* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
    220    Data is either copied from the input file or decompressed from the input
    221    file depending on state->how.  If state->how is LOOK, then a gzip header is
    222    looked for to determine whether to copy or decompress.  Returns -1 on error,
    223    otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
    224    end of the input file has been reached and all data has been processed.  */
    225 local int gz_fetch(state)
    226     gz_statep state;
    227 {
    228     z_streamp strm = &(state->strm);
    229 
    230     do {
    231         switch(state->how) {
    232         case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
    233             if (gz_look(state) == -1)
    234                 return -1;
    235             if (state->how == LOOK)
    236                 return 0;
    237             break;
    238         case COPY:      /* -> COPY */
    239             if (gz_load(state, state->out, state->size << 1, &(state->x.have))
    240                     == -1)
    241                 return -1;
    242             state->x.next = state->out;
    243             return 0;
    244         case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
    245             strm->avail_out = state->size << 1;
    246             strm->next_out = state->out;
    247             if (gz_decomp(state) == -1)
    248                 return -1;
    249         }
    250     } while (state->x.have == 0 && (!state->eof || strm->avail_in));
    251     return 0;
    252 }
    253 
    254 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
    255 local int gz_skip(state, len)
    256     gz_statep state;
    257     z_off64_t len;
    258 {
    259     unsigned n;
    260 
    261     /* skip over len bytes or reach end-of-file, whichever comes first */
    262     while (len)
    263         /* skip over whatever is in output buffer */
    264         if (state->x.have) {
    265             n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
    266                 (unsigned)len : state->x.have;
    267             state->x.have -= n;
    268             state->x.next += n;
    269             state->x.pos += n;
    270             len -= n;
    271         }
    272 
    273         /* output buffer empty -- return if we're at the end of the input */
    274         else if (state->eof && state->strm.avail_in == 0)
    275             break;
    276 
    277         /* need more data to skip -- load up output buffer */
    278         else {
    279             /* get more output, looking for header if required */
    280             if (gz_fetch(state) == -1)
    281                 return -1;
    282         }
    283     return 0;
    284 }
    285 
    286 /* -- see zlib.h -- */
    287 int ZEXPORT gzread(file, buf, len)
    288     gzFile file;
    289     voidp buf;
    290     unsigned len;
    291 {
    292     unsigned got, n;
    293     gz_statep state;
    294     z_streamp strm;
    295 
    296     /* get internal structure */
    297     if (file == NULL)
    298         return -1;
    299     state = (gz_statep)file;
    300     strm = &(state->strm);
    301 
    302     /* check that we're reading and that there's no (serious) error */
    303     if (state->mode != GZ_READ ||
    304             (state->err != Z_OK && state->err != Z_BUF_ERROR))
    305         return -1;
    306 
    307     /* since an int is returned, make sure len fits in one, otherwise return
    308        with an error (this avoids the flaw in the interface) */
    309     if ((int)len < 0) {
    310         gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
    311         return -1;
    312     }
    313 
    314     /* if len is zero, avoid unnecessary operations */
    315     if (len == 0)
    316         return 0;
    317 
    318     /* process a skip request */
    319     if (state->seek) {
    320         state->seek = 0;
    321         if (gz_skip(state, state->skip) == -1)
    322             return -1;
    323     }
    324 
    325     /* get len bytes to buf, or less than len if at the end */
    326     got = 0;
    327     do {
    328         /* first just try copying data from the output buffer */
    329         if (state->x.have) {
    330             n = state->x.have > len ? len : state->x.have;
    331             memcpy(buf, state->x.next, n);
    332             state->x.next += n;
    333             state->x.have -= n;
    334         }
    335 
    336         /* output buffer empty -- return if we're at the end of the input */
    337         else if (state->eof && strm->avail_in == 0) {
    338             state->past = 1;        /* tried to read past end */
    339             break;
    340         }
    341 
    342         /* need output data -- for small len or new stream load up our output
    343            buffer */
    344         else if (state->how == LOOK || len < (state->size << 1)) {
    345             /* get more output, looking for header if required */
    346             if (gz_fetch(state) == -1)
    347                 return -1;
    348             continue;       /* no progress yet -- go back to copy above */
    349             /* the copy above assures that we will leave with space in the
    350                output buffer, allowing at least one gzungetc() to succeed */
    351         }
    352 
    353         /* large len -- read directly into user buffer */
    354         else if (state->how == COPY) {      /* read directly */
    355             if (gz_load(state, buf, len, &n) == -1)
    356                 return -1;
    357         }
    358 
    359         /* large len -- decompress directly into user buffer */
    360         else {  /* state->how == GZIP */
    361             strm->avail_out = len;
    362             strm->next_out = buf;
    363             if (gz_decomp(state) == -1)
    364                 return -1;
    365             n = state->x.have;
    366             state->x.have = 0;
    367         }
    368 
    369         /* update progress */
    370         len -= n;
    371         buf = (char *)buf + n;
    372         got += n;
    373         state->x.pos += n;
    374     } while (len);
    375 
    376     /* return number of bytes read into user buffer (will fit in int) */
    377     return (int)got;
    378 }
    379 
    380 /* -- see zlib.h -- */
    381 #undef gzgetc
    382 int ZEXPORT gzgetc(file)
    383     gzFile file;
    384 {
    385     int ret;
    386     unsigned char buf[1];
    387     gz_statep state;
    388 
    389     /* get internal structure */
    390     if (file == NULL)
    391         return -1;
    392     state = (gz_statep)file;
    393 
    394     /* check that we're reading and that there's no (serious) error */
    395     if (state->mode != GZ_READ ||
    396         (state->err != Z_OK && state->err != Z_BUF_ERROR))
    397         return -1;
    398 
    399     /* try output buffer (no need to check for skip request) */
    400     if (state->x.have) {
    401         state->x.have--;
    402         state->x.pos++;
    403         return *(state->x.next)++;
    404     }
    405 
    406     /* nothing there -- try gzread() */
    407     ret = gzread(file, buf, 1);
    408     return ret < 1 ? -1 : buf[0];
    409 }
    410 
    411 int ZEXPORT gzgetc_(file)
    412 gzFile file;
    413 {
    414     return gzgetc(file);
    415 }
    416 
    417 /* -- see zlib.h -- */
    418 int ZEXPORT gzungetc(c, file)
    419     int c;
    420     gzFile file;
    421 {
    422     gz_statep state;
    423 
    424     /* get internal structure */
    425     if (file == NULL)
    426         return -1;
    427     state = (gz_statep)file;
    428 
    429     /* check that we're reading and that there's no (serious) error */
    430     if (state->mode != GZ_READ ||
    431         (state->err != Z_OK && state->err != Z_BUF_ERROR))
    432         return -1;
    433 
    434     /* process a skip request */
    435     if (state->seek) {
    436         state->seek = 0;
    437         if (gz_skip(state, state->skip) == -1)
    438             return -1;
    439     }
    440 
    441     /* can't push EOF */
    442     if (c < 0)
    443         return -1;
    444 
    445     /* if output buffer empty, put byte at end (allows more pushing) */
    446     if (state->x.have == 0) {
    447         state->x.have = 1;
    448         state->x.next = state->out + (state->size << 1) - 1;
    449         state->x.next[0] = c;
    450         state->x.pos--;
    451         state->past = 0;
    452         return c;
    453     }
    454 
    455     /* if no room, give up (must have already done a gzungetc()) */
    456     if (state->x.have == (state->size << 1)) {
    457         gz_error(state, Z_DATA_ERROR, "out of room to push characters");
    458         return -1;
    459     }
    460 
    461     /* slide output data if needed and insert byte before existing data */
    462     if (state->x.next == state->out) {
    463         unsigned char *src = state->out + state->x.have;
    464         unsigned char *dest = state->out + (state->size << 1);
    465         while (src > state->out)
    466             *--dest = *--src;
    467         state->x.next = dest;
    468     }
    469     state->x.have++;
    470     state->x.next--;
    471     state->x.next[0] = c;
    472     state->x.pos--;
    473     state->past = 0;
    474     return c;
    475 }
    476 
    477 /* -- see zlib.h -- */
    478 char * ZEXPORT gzgets(file, buf, len)
    479     gzFile file;
    480     char *buf;
    481     int len;
    482 {
    483     unsigned left, n;
    484     char *str;
    485     unsigned char *eol;
    486     gz_statep state;
    487 
    488     /* check parameters and get internal structure */
    489     if (file == NULL || buf == NULL || len < 1)
    490         return NULL;
    491     state = (gz_statep)file;
    492 
    493     /* check that we're reading and that there's no (serious) error */
    494     if (state->mode != GZ_READ ||
    495         (state->err != Z_OK && state->err != Z_BUF_ERROR))
    496         return NULL;
    497 
    498     /* process a skip request */
    499     if (state->seek) {
    500         state->seek = 0;
    501         if (gz_skip(state, state->skip) == -1)
    502             return NULL;
    503     }
    504 
    505     /* copy output bytes up to new line or len - 1, whichever comes first --
    506        append a terminating zero to the string (we don't check for a zero in
    507        the contents, let the user worry about that) */
    508     str = buf;
    509     left = (unsigned)len - 1;
    510     if (left) do {
    511         /* assure that something is in the output buffer */
    512         if (state->x.have == 0 && gz_fetch(state) == -1)
    513             return NULL;                /* error */
    514         if (state->x.have == 0) {       /* end of file */
    515             state->past = 1;            /* read past end */
    516             break;                      /* return what we have */
    517         }
    518 
    519         /* look for end-of-line in current output buffer */
    520         n = state->x.have > left ? left : state->x.have;
    521         eol = memchr(state->x.next, '\n', n);
    522         if (eol != NULL)
    523             n = (unsigned)(eol - state->x.next) + 1;
    524 
    525         /* copy through end-of-line, or remainder if not found */
    526         memcpy(buf, state->x.next, n);
    527         state->x.have -= n;
    528         state->x.next += n;
    529         state->x.pos += n;
    530         left -= n;
    531         buf += n;
    532     } while (left && eol == NULL);
    533 
    534     /* return terminated string, or if nothing, end of file */
    535     if (buf == str)
    536         return NULL;
    537     buf[0] = 0;
    538     return str;
    539 }
    540 
    541 /* -- see zlib.h -- */
    542 int ZEXPORT gzdirect(file)
    543     gzFile file;
    544 {
    545     gz_statep state;
    546 
    547     /* get internal structure */
    548     if (file == NULL)
    549         return 0;
    550     state = (gz_statep)file;
    551 
    552     /* if the state is not known, but we can find out, then do so (this is
    553        mainly for right after a gzopen() or gzdopen()) */
    554     if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
    555         (void)gz_look(state);
    556 
    557     /* return 1 if transparent, 0 if processing a gzip stream */
    558     return state->direct;
    559 }
    560 
    561 /* -- see zlib.h -- */
    562 int ZEXPORT gzclose_r(file)
    563     gzFile file;
    564 {
    565     int ret, err;
    566     gz_statep state;
    567 
    568     /* get internal structure */
    569     if (file == NULL)
    570         return Z_STREAM_ERROR;
    571     state = (gz_statep)file;
    572 
    573     /* check that we're reading */
    574     if (state->mode != GZ_READ)
    575         return Z_STREAM_ERROR;
    576 
    577     /* free memory and close file */
    578     if (state->size) {
    579         inflateEnd(&(state->strm));
    580         free(state->out);
    581         free(state->in);
    582     }
    583     err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
    584     gz_error(state, Z_OK, NULL);
    585     free(state->path);
    586     ret = close(state->fd);
    587     free(state);
    588     return ret ? Z_ERRNO : err;
    589 }
    590