1 /* gzread.c -- zlib functions for reading gzip files 2 * Copyright (C) 2004, 2005, 2010, 2011, 2012 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 */ 5 6 #include "gzguts.h" 7 8 /* Local functions */ 9 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); 10 local int gz_avail OF((gz_statep)); 11 local int gz_look OF((gz_statep)); 12 local int gz_decomp OF((gz_statep)); 13 local int gz_fetch OF((gz_statep)); 14 local int gz_skip OF((gz_statep, z_off64_t)); 15 16 /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from 17 state->fd, and update state->eof, state->err, and state->msg as appropriate. 18 This function needs to loop on read(), since read() is not guaranteed to 19 read the number of bytes requested, depending on the type of descriptor. */ 20 local int gz_load(state, buf, len, have) 21 gz_statep state; 22 unsigned char *buf; 23 unsigned len; 24 unsigned *have; 25 { 26 int ret; 27 28 *have = 0; 29 do { 30 ret = read(state->fd, buf + *have, len - *have); 31 if (ret <= 0) 32 break; 33 *have += ret; 34 } while (*have < len); 35 if (ret < 0) { 36 gz_error(state, Z_ERRNO, zstrerror()); 37 return -1; 38 } 39 if (ret == 0) 40 state->eof = 1; 41 return 0; 42 } 43 44 /* Load up input buffer and set eof flag if last data loaded -- return -1 on 45 error, 0 otherwise. Note that the eof flag is set when the end of the input 46 file is reached, even though there may be unused data in the buffer. Once 47 that data has been used, no more attempts will be made to read the file. 48 If strm->avail_in != 0, then the current data is moved to the beginning of 49 the input buffer, and then the remainder of the buffer is loaded with the 50 available data from the input file. */ 51 local int gz_avail(state) 52 gz_statep state; 53 { 54 unsigned got; 55 z_streamp strm = &(state->strm); 56 57 if (state->err != Z_OK && state->err != Z_BUF_ERROR) 58 return -1; 59 if (state->eof == 0) { 60 if (strm->avail_in) { /* copy what's there to the start */ 61 unsigned char *p = state->in, *q = strm->next_in; 62 unsigned n = strm->avail_in; 63 do { 64 *p++ = *q++; 65 } while (--n); 66 } 67 if (gz_load(state, state->in + strm->avail_in, 68 state->size - strm->avail_in, &got) == -1) 69 return -1; 70 strm->avail_in += got; 71 strm->next_in = state->in; 72 } 73 return 0; 74 } 75 76 /* Look for gzip header, set up for inflate or copy. state->x.have must be 0. 77 If this is the first time in, allocate required memory. state->how will be 78 left unchanged if there is no more input data available, will be set to COPY 79 if there is no gzip header and direct copying will be performed, or it will 80 be set to GZIP for decompression. If direct copying, then leftover input 81 data from the input buffer will be copied to the output buffer. In that 82 case, all further file reads will be directly to either the output buffer or 83 a user buffer. If decompressing, the inflate state will be initialized. 84 gz_look() will return 0 on success or -1 on failure. */ 85 local int gz_look(state) 86 gz_statep state; 87 { 88 z_streamp strm = &(state->strm); 89 90 /* allocate read buffers and inflate memory */ 91 if (state->size == 0) { 92 /* allocate buffers */ 93 state->in = malloc(state->want); 94 state->out = malloc(state->want << 1); 95 if (state->in == NULL || state->out == NULL) { 96 if (state->out != NULL) 97 free(state->out); 98 if (state->in != NULL) 99 free(state->in); 100 gz_error(state, Z_MEM_ERROR, "out of memory"); 101 return -1; 102 } 103 state->size = state->want; 104 105 /* allocate inflate memory */ 106 state->strm.zalloc = Z_NULL; 107 state->strm.zfree = Z_NULL; 108 state->strm.opaque = Z_NULL; 109 state->strm.avail_in = 0; 110 state->strm.next_in = Z_NULL; 111 if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ 112 free(state->out); 113 free(state->in); 114 state->size = 0; 115 gz_error(state, Z_MEM_ERROR, "out of memory"); 116 return -1; 117 } 118 } 119 120 /* get at least the magic bytes in the input buffer */ 121 if (strm->avail_in < 2) { 122 if (gz_avail(state) == -1) 123 return -1; 124 if (strm->avail_in == 0) 125 return 0; 126 } 127 128 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is 129 a logical dilemma here when considering the case of a partially written 130 gzip file, to wit, if a single 31 byte is written, then we cannot tell 131 whether this is a single-byte file, or just a partially written gzip 132 file -- for here we assume that if a gzip file is being written, then 133 the header will be written in a single operation, so that reading a 134 single byte is sufficient indication that it is not a gzip file) */ 135 if (strm->avail_in > 1 && 136 strm->next_in[0] == 31 && strm->next_in[1] == 139) { 137 inflateReset(strm); 138 state->how = GZIP; 139 state->direct = 0; 140 return 0; 141 } 142 143 /* no gzip header -- if we were decoding gzip before, then this is trailing 144 garbage. Ignore the trailing garbage and finish. */ 145 if (state->direct == 0) { 146 strm->avail_in = 0; 147 state->eof = 1; 148 state->x.have = 0; 149 return 0; 150 } 151 152 /* doing raw i/o, copy any leftover input to output -- this assumes that 153 the output buffer is larger than the input buffer, which also assures 154 space for gzungetc() */ 155 state->x.next = state->out; 156 if (strm->avail_in) { 157 memcpy(state->x.next, strm->next_in, strm->avail_in); 158 state->x.have = strm->avail_in; 159 strm->avail_in = 0; 160 } 161 state->how = COPY; 162 state->direct = 1; 163 return 0; 164 } 165 166 /* Decompress from input to the provided next_out and avail_out in the state. 167 On return, state->x.have and state->x.next point to the just decompressed 168 data. If the gzip stream completes, state->how is reset to LOOK to look for 169 the next gzip stream or raw data, once state->x.have is depleted. Returns 0 170 on success, -1 on failure. */ 171 local int gz_decomp(state) 172 gz_statep state; 173 { 174 int ret = Z_OK; 175 unsigned had; 176 z_streamp strm = &(state->strm); 177 178 /* fill output buffer up to end of deflate stream */ 179 had = strm->avail_out; 180 do { 181 /* get more input for inflate() */ 182 if (strm->avail_in == 0 && gz_avail(state) == -1) 183 return -1; 184 if (strm->avail_in == 0) { 185 gz_error(state, Z_BUF_ERROR, "unexpected end of file"); 186 break; 187 } 188 189 /* decompress and handle errors */ 190 ret = inflate(strm, Z_NO_FLUSH); 191 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { 192 gz_error(state, Z_STREAM_ERROR, 193 "internal error: inflate stream corrupt"); 194 return -1; 195 } 196 if (ret == Z_MEM_ERROR) { 197 gz_error(state, Z_MEM_ERROR, "out of memory"); 198 return -1; 199 } 200 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ 201 gz_error(state, Z_DATA_ERROR, 202 strm->msg == NULL ? "compressed data error" : strm->msg); 203 return -1; 204 } 205 } while (strm->avail_out && ret != Z_STREAM_END); 206 207 /* update available output */ 208 state->x.have = had - strm->avail_out; 209 state->x.next = strm->next_out - state->x.have; 210 211 /* if the gzip stream completed successfully, look for another */ 212 if (ret == Z_STREAM_END) 213 state->how = LOOK; 214 215 /* good decompression */ 216 return 0; 217 } 218 219 /* Fetch data and put it in the output buffer. Assumes state->x.have is 0. 220 Data is either copied from the input file or decompressed from the input 221 file depending on state->how. If state->how is LOOK, then a gzip header is 222 looked for to determine whether to copy or decompress. Returns -1 on error, 223 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the 224 end of the input file has been reached and all data has been processed. */ 225 local int gz_fetch(state) 226 gz_statep state; 227 { 228 z_streamp strm = &(state->strm); 229 230 do { 231 switch(state->how) { 232 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ 233 if (gz_look(state) == -1) 234 return -1; 235 if (state->how == LOOK) 236 return 0; 237 break; 238 case COPY: /* -> COPY */ 239 if (gz_load(state, state->out, state->size << 1, &(state->x.have)) 240 == -1) 241 return -1; 242 state->x.next = state->out; 243 return 0; 244 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ 245 strm->avail_out = state->size << 1; 246 strm->next_out = state->out; 247 if (gz_decomp(state) == -1) 248 return -1; 249 } 250 } while (state->x.have == 0 && (!state->eof || strm->avail_in)); 251 return 0; 252 } 253 254 /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ 255 local int gz_skip(state, len) 256 gz_statep state; 257 z_off64_t len; 258 { 259 unsigned n; 260 261 /* skip over len bytes or reach end-of-file, whichever comes first */ 262 while (len) 263 /* skip over whatever is in output buffer */ 264 if (state->x.have) { 265 n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? 266 (unsigned)len : state->x.have; 267 state->x.have -= n; 268 state->x.next += n; 269 state->x.pos += n; 270 len -= n; 271 } 272 273 /* output buffer empty -- return if we're at the end of the input */ 274 else if (state->eof && state->strm.avail_in == 0) 275 break; 276 277 /* need more data to skip -- load up output buffer */ 278 else { 279 /* get more output, looking for header if required */ 280 if (gz_fetch(state) == -1) 281 return -1; 282 } 283 return 0; 284 } 285 286 /* -- see zlib.h -- */ 287 int ZEXPORT gzread(file, buf, len) 288 gzFile file; 289 voidp buf; 290 unsigned len; 291 { 292 unsigned got, n; 293 gz_statep state; 294 z_streamp strm; 295 296 /* get internal structure */ 297 if (file == NULL) 298 return -1; 299 state = (gz_statep)file; 300 strm = &(state->strm); 301 302 /* check that we're reading and that there's no (serious) error */ 303 if (state->mode != GZ_READ || 304 (state->err != Z_OK && state->err != Z_BUF_ERROR)) 305 return -1; 306 307 /* since an int is returned, make sure len fits in one, otherwise return 308 with an error (this avoids the flaw in the interface) */ 309 if ((int)len < 0) { 310 gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); 311 return -1; 312 } 313 314 /* if len is zero, avoid unnecessary operations */ 315 if (len == 0) 316 return 0; 317 318 /* process a skip request */ 319 if (state->seek) { 320 state->seek = 0; 321 if (gz_skip(state, state->skip) == -1) 322 return -1; 323 } 324 325 /* get len bytes to buf, or less than len if at the end */ 326 got = 0; 327 do { 328 /* first just try copying data from the output buffer */ 329 if (state->x.have) { 330 n = state->x.have > len ? len : state->x.have; 331 memcpy(buf, state->x.next, n); 332 state->x.next += n; 333 state->x.have -= n; 334 } 335 336 /* output buffer empty -- return if we're at the end of the input */ 337 else if (state->eof && strm->avail_in == 0) { 338 state->past = 1; /* tried to read past end */ 339 break; 340 } 341 342 /* need output data -- for small len or new stream load up our output 343 buffer */ 344 else if (state->how == LOOK || len < (state->size << 1)) { 345 /* get more output, looking for header if required */ 346 if (gz_fetch(state) == -1) 347 return -1; 348 continue; /* no progress yet -- go back to copy above */ 349 /* the copy above assures that we will leave with space in the 350 output buffer, allowing at least one gzungetc() to succeed */ 351 } 352 353 /* large len -- read directly into user buffer */ 354 else if (state->how == COPY) { /* read directly */ 355 if (gz_load(state, buf, len, &n) == -1) 356 return -1; 357 } 358 359 /* large len -- decompress directly into user buffer */ 360 else { /* state->how == GZIP */ 361 strm->avail_out = len; 362 strm->next_out = buf; 363 if (gz_decomp(state) == -1) 364 return -1; 365 n = state->x.have; 366 state->x.have = 0; 367 } 368 369 /* update progress */ 370 len -= n; 371 buf = (char *)buf + n; 372 got += n; 373 state->x.pos += n; 374 } while (len); 375 376 /* return number of bytes read into user buffer (will fit in int) */ 377 return (int)got; 378 } 379 380 /* -- see zlib.h -- */ 381 #undef gzgetc 382 int ZEXPORT gzgetc(file) 383 gzFile file; 384 { 385 int ret; 386 unsigned char buf[1]; 387 gz_statep state; 388 389 /* get internal structure */ 390 if (file == NULL) 391 return -1; 392 state = (gz_statep)file; 393 394 /* check that we're reading and that there's no (serious) error */ 395 if (state->mode != GZ_READ || 396 (state->err != Z_OK && state->err != Z_BUF_ERROR)) 397 return -1; 398 399 /* try output buffer (no need to check for skip request) */ 400 if (state->x.have) { 401 state->x.have--; 402 state->x.pos++; 403 return *(state->x.next)++; 404 } 405 406 /* nothing there -- try gzread() */ 407 ret = gzread(file, buf, 1); 408 return ret < 1 ? -1 : buf[0]; 409 } 410 411 int ZEXPORT gzgetc_(file) 412 gzFile file; 413 { 414 return gzgetc(file); 415 } 416 417 /* -- see zlib.h -- */ 418 int ZEXPORT gzungetc(c, file) 419 int c; 420 gzFile file; 421 { 422 gz_statep state; 423 424 /* get internal structure */ 425 if (file == NULL) 426 return -1; 427 state = (gz_statep)file; 428 429 /* check that we're reading and that there's no (serious) error */ 430 if (state->mode != GZ_READ || 431 (state->err != Z_OK && state->err != Z_BUF_ERROR)) 432 return -1; 433 434 /* process a skip request */ 435 if (state->seek) { 436 state->seek = 0; 437 if (gz_skip(state, state->skip) == -1) 438 return -1; 439 } 440 441 /* can't push EOF */ 442 if (c < 0) 443 return -1; 444 445 /* if output buffer empty, put byte at end (allows more pushing) */ 446 if (state->x.have == 0) { 447 state->x.have = 1; 448 state->x.next = state->out + (state->size << 1) - 1; 449 state->x.next[0] = c; 450 state->x.pos--; 451 state->past = 0; 452 return c; 453 } 454 455 /* if no room, give up (must have already done a gzungetc()) */ 456 if (state->x.have == (state->size << 1)) { 457 gz_error(state, Z_DATA_ERROR, "out of room to push characters"); 458 return -1; 459 } 460 461 /* slide output data if needed and insert byte before existing data */ 462 if (state->x.next == state->out) { 463 unsigned char *src = state->out + state->x.have; 464 unsigned char *dest = state->out + (state->size << 1); 465 while (src > state->out) 466 *--dest = *--src; 467 state->x.next = dest; 468 } 469 state->x.have++; 470 state->x.next--; 471 state->x.next[0] = c; 472 state->x.pos--; 473 state->past = 0; 474 return c; 475 } 476 477 /* -- see zlib.h -- */ 478 char * ZEXPORT gzgets(file, buf, len) 479 gzFile file; 480 char *buf; 481 int len; 482 { 483 unsigned left, n; 484 char *str; 485 unsigned char *eol; 486 gz_statep state; 487 488 /* check parameters and get internal structure */ 489 if (file == NULL || buf == NULL || len < 1) 490 return NULL; 491 state = (gz_statep)file; 492 493 /* check that we're reading and that there's no (serious) error */ 494 if (state->mode != GZ_READ || 495 (state->err != Z_OK && state->err != Z_BUF_ERROR)) 496 return NULL; 497 498 /* process a skip request */ 499 if (state->seek) { 500 state->seek = 0; 501 if (gz_skip(state, state->skip) == -1) 502 return NULL; 503 } 504 505 /* copy output bytes up to new line or len - 1, whichever comes first -- 506 append a terminating zero to the string (we don't check for a zero in 507 the contents, let the user worry about that) */ 508 str = buf; 509 left = (unsigned)len - 1; 510 if (left) do { 511 /* assure that something is in the output buffer */ 512 if (state->x.have == 0 && gz_fetch(state) == -1) 513 return NULL; /* error */ 514 if (state->x.have == 0) { /* end of file */ 515 state->past = 1; /* read past end */ 516 break; /* return what we have */ 517 } 518 519 /* look for end-of-line in current output buffer */ 520 n = state->x.have > left ? left : state->x.have; 521 eol = memchr(state->x.next, '\n', n); 522 if (eol != NULL) 523 n = (unsigned)(eol - state->x.next) + 1; 524 525 /* copy through end-of-line, or remainder if not found */ 526 memcpy(buf, state->x.next, n); 527 state->x.have -= n; 528 state->x.next += n; 529 state->x.pos += n; 530 left -= n; 531 buf += n; 532 } while (left && eol == NULL); 533 534 /* return terminated string, or if nothing, end of file */ 535 if (buf == str) 536 return NULL; 537 buf[0] = 0; 538 return str; 539 } 540 541 /* -- see zlib.h -- */ 542 int ZEXPORT gzdirect(file) 543 gzFile file; 544 { 545 gz_statep state; 546 547 /* get internal structure */ 548 if (file == NULL) 549 return 0; 550 state = (gz_statep)file; 551 552 /* if the state is not known, but we can find out, then do so (this is 553 mainly for right after a gzopen() or gzdopen()) */ 554 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) 555 (void)gz_look(state); 556 557 /* return 1 if transparent, 0 if processing a gzip stream */ 558 return state->direct; 559 } 560 561 /* -- see zlib.h -- */ 562 int ZEXPORT gzclose_r(file) 563 gzFile file; 564 { 565 int ret, err; 566 gz_statep state; 567 568 /* get internal structure */ 569 if (file == NULL) 570 return Z_STREAM_ERROR; 571 state = (gz_statep)file; 572 573 /* check that we're reading */ 574 if (state->mode != GZ_READ) 575 return Z_STREAM_ERROR; 576 577 /* free memory and close file */ 578 if (state->size) { 579 inflateEnd(&(state->strm)); 580 free(state->out); 581 free(state->in); 582 } 583 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; 584 gz_error(state, Z_OK, NULL); 585 free(state->path); 586 ret = close(state->fd); 587 free(state); 588 return ret ? Z_ERRNO : err; 589 } 590