1 /* 2 * .xz Stream decoder 3 * 4 * Author: Lasse Collin <lasse.collin (at) tukaani.org> 5 * 6 * This file has been put into the public domain. 7 * You can do whatever you want with this file. 8 */ 9 10 #include "xz_private.h" 11 #include "xz_stream.h" 12 13 #ifdef XZ_USE_CRC64 14 # define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64) 15 #else 16 # define IS_CRC64(check_type) false 17 #endif 18 19 /* Hash used to validate the Index field */ 20 struct xz_dec_hash { 21 vli_type unpadded; 22 vli_type uncompressed; 23 uint32_t crc32; 24 }; 25 26 struct xz_dec { 27 /* Position in dec_main() */ 28 enum { 29 SEQ_STREAM_HEADER, 30 SEQ_BLOCK_START, 31 SEQ_BLOCK_HEADER, 32 SEQ_BLOCK_UNCOMPRESS, 33 SEQ_BLOCK_PADDING, 34 SEQ_BLOCK_CHECK, 35 SEQ_INDEX, 36 SEQ_INDEX_PADDING, 37 SEQ_INDEX_CRC32, 38 SEQ_STREAM_FOOTER 39 } sequence; 40 41 /* Position in variable-length integers and Check fields */ 42 uint32_t pos; 43 44 /* Variable-length integer decoded by dec_vli() */ 45 vli_type vli; 46 47 /* Saved in_pos and out_pos */ 48 size_t in_start; 49 size_t out_start; 50 51 #ifdef XZ_USE_CRC64 52 /* CRC32 or CRC64 value in Block or CRC32 value in Index */ 53 uint64_t crc; 54 #else 55 /* CRC32 value in Block or Index */ 56 uint32_t crc; 57 #endif 58 59 /* Type of the integrity check calculated from uncompressed data */ 60 enum xz_check check_type; 61 62 /* Operation mode */ 63 enum xz_mode mode; 64 65 /* 66 * True if the next call to xz_dec_run() is allowed to return 67 * XZ_BUF_ERROR. 68 */ 69 bool allow_buf_error; 70 71 /* Information stored in Block Header */ 72 struct { 73 /* 74 * Value stored in the Compressed Size field, or 75 * VLI_UNKNOWN if Compressed Size is not present. 76 */ 77 vli_type compressed; 78 79 /* 80 * Value stored in the Uncompressed Size field, or 81 * VLI_UNKNOWN if Uncompressed Size is not present. 82 */ 83 vli_type uncompressed; 84 85 /* Size of the Block Header field */ 86 uint32_t size; 87 } block_header; 88 89 /* Information collected when decoding Blocks */ 90 struct { 91 /* Observed compressed size of the current Block */ 92 vli_type compressed; 93 94 /* Observed uncompressed size of the current Block */ 95 vli_type uncompressed; 96 97 /* Number of Blocks decoded so far */ 98 vli_type count; 99 100 /* 101 * Hash calculated from the Block sizes. This is used to 102 * validate the Index field. 103 */ 104 struct xz_dec_hash hash; 105 } block; 106 107 /* Variables needed when verifying the Index field */ 108 struct { 109 /* Position in dec_index() */ 110 enum { 111 SEQ_INDEX_COUNT, 112 SEQ_INDEX_UNPADDED, 113 SEQ_INDEX_UNCOMPRESSED 114 } sequence; 115 116 /* Size of the Index in bytes */ 117 vli_type size; 118 119 /* Number of Records (matches block.count in valid files) */ 120 vli_type count; 121 122 /* 123 * Hash calculated from the Records (matches block.hash in 124 * valid files). 125 */ 126 struct xz_dec_hash hash; 127 } index; 128 129 /* 130 * Temporary buffer needed to hold Stream Header, Block Header, 131 * and Stream Footer. The Block Header is the biggest (1 KiB) 132 * so we reserve space according to that. buf[] has to be aligned 133 * to a multiple of four bytes; the size_t variables before it 134 * should guarantee this. 135 */ 136 struct { 137 size_t pos; 138 size_t size; 139 uint8_t buf[1024]; 140 } temp; 141 142 struct xz_dec_lzma2 *lzma2; 143 144 #ifdef XZ_DEC_BCJ 145 struct xz_dec_bcj *bcj; 146 bool bcj_active; 147 #endif 148 }; 149 150 #ifdef XZ_DEC_ANY_CHECK 151 /* Sizes of the Check field with different Check IDs */ 152 static const uint8_t check_sizes[16] = { 153 0, 154 4, 4, 4, 155 8, 8, 8, 156 16, 16, 16, 157 32, 32, 32, 158 64, 64, 64 159 }; 160 #endif 161 162 /* 163 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller 164 * must have set s->temp.pos to indicate how much data we are supposed 165 * to copy into s->temp.buf. Return true once s->temp.pos has reached 166 * s->temp.size. 167 */ 168 static bool fill_temp(struct xz_dec *s, struct xz_buf *b) 169 { 170 size_t copy_size = min_t(size_t, 171 b->in_size - b->in_pos, s->temp.size - s->temp.pos); 172 173 memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); 174 b->in_pos += copy_size; 175 s->temp.pos += copy_size; 176 177 if (s->temp.pos == s->temp.size) { 178 s->temp.pos = 0; 179 return true; 180 } 181 182 return false; 183 } 184 185 /* Decode a variable-length integer (little-endian base-128 encoding) */ 186 static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in, 187 size_t *in_pos, size_t in_size) 188 { 189 uint8_t byte; 190 191 if (s->pos == 0) 192 s->vli = 0; 193 194 while (*in_pos < in_size) { 195 byte = in[*in_pos]; 196 ++*in_pos; 197 198 s->vli |= (vli_type)(byte & 0x7F) << s->pos; 199 200 if ((byte & 0x80) == 0) { 201 /* Don't allow non-minimal encodings. */ 202 if (byte == 0 && s->pos != 0) 203 return XZ_DATA_ERROR; 204 205 s->pos = 0; 206 return XZ_STREAM_END; 207 } 208 209 s->pos += 7; 210 if (s->pos == 7 * VLI_BYTES_MAX) 211 return XZ_DATA_ERROR; 212 } 213 214 return XZ_OK; 215 } 216 217 /* 218 * Decode the Compressed Data field from a Block. Update and validate 219 * the observed compressed and uncompressed sizes of the Block so that 220 * they don't exceed the values possibly stored in the Block Header 221 * (validation assumes that no integer overflow occurs, since vli_type 222 * is normally uint64_t). Update the CRC32 or CRC64 value if presence of 223 * the CRC32 or CRC64 field was indicated in Stream Header. 224 * 225 * Once the decoding is finished, validate that the observed sizes match 226 * the sizes possibly stored in the Block Header. Update the hash and 227 * Block count, which are later used to validate the Index field. 228 */ 229 static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) 230 { 231 enum xz_ret ret; 232 233 s->in_start = b->in_pos; 234 s->out_start = b->out_pos; 235 236 #ifdef XZ_DEC_BCJ 237 if (s->bcj_active) 238 ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); 239 else 240 #endif 241 ret = xz_dec_lzma2_run(s->lzma2, b); 242 243 s->block.compressed += b->in_pos - s->in_start; 244 s->block.uncompressed += b->out_pos - s->out_start; 245 246 /* 247 * There is no need to separately check for VLI_UNKNOWN, since 248 * the observed sizes are always smaller than VLI_UNKNOWN. 249 */ 250 if (s->block.compressed > s->block_header.compressed 251 || s->block.uncompressed 252 > s->block_header.uncompressed) 253 return XZ_DATA_ERROR; 254 255 if (s->check_type == XZ_CHECK_CRC32) 256 s->crc = xz_crc32(b->out + s->out_start, 257 b->out_pos - s->out_start, s->crc); 258 #ifdef XZ_USE_CRC64 259 else if (s->check_type == XZ_CHECK_CRC64) 260 s->crc = xz_crc64(b->out + s->out_start, 261 b->out_pos - s->out_start, s->crc); 262 #endif 263 264 if (ret == XZ_STREAM_END) { 265 if (s->block_header.compressed != VLI_UNKNOWN 266 && s->block_header.compressed 267 != s->block.compressed) 268 return XZ_DATA_ERROR; 269 270 if (s->block_header.uncompressed != VLI_UNKNOWN 271 && s->block_header.uncompressed 272 != s->block.uncompressed) 273 return XZ_DATA_ERROR; 274 275 s->block.hash.unpadded += s->block_header.size 276 + s->block.compressed; 277 278 #ifdef XZ_DEC_ANY_CHECK 279 s->block.hash.unpadded += check_sizes[s->check_type]; 280 #else 281 if (s->check_type == XZ_CHECK_CRC32) 282 s->block.hash.unpadded += 4; 283 else if (IS_CRC64(s->check_type)) 284 s->block.hash.unpadded += 8; 285 #endif 286 287 s->block.hash.uncompressed += s->block.uncompressed; 288 s->block.hash.crc32 = xz_crc32( 289 (const uint8_t *)&s->block.hash, 290 sizeof(s->block.hash), s->block.hash.crc32); 291 292 ++s->block.count; 293 } 294 295 return ret; 296 } 297 298 /* Update the Index size and the CRC32 value. */ 299 static void index_update(struct xz_dec *s, const struct xz_buf *b) 300 { 301 size_t in_used = b->in_pos - s->in_start; 302 s->index.size += in_used; 303 s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc); 304 } 305 306 /* 307 * Decode the Number of Records, Unpadded Size, and Uncompressed Size 308 * fields from the Index field. That is, Index Padding and CRC32 are not 309 * decoded by this function. 310 * 311 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything 312 * successfully decoded), or XZ_DATA_ERROR (input is corrupt). 313 */ 314 static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b) 315 { 316 enum xz_ret ret; 317 318 do { 319 ret = dec_vli(s, b->in, &b->in_pos, b->in_size); 320 if (ret != XZ_STREAM_END) { 321 index_update(s, b); 322 return ret; 323 } 324 325 switch (s->index.sequence) { 326 case SEQ_INDEX_COUNT: 327 s->index.count = s->vli; 328 329 /* 330 * Validate that the Number of Records field 331 * indicates the same number of Records as 332 * there were Blocks in the Stream. 333 */ 334 if (s->index.count != s->block.count) 335 return XZ_DATA_ERROR; 336 337 s->index.sequence = SEQ_INDEX_UNPADDED; 338 break; 339 340 case SEQ_INDEX_UNPADDED: 341 s->index.hash.unpadded += s->vli; 342 s->index.sequence = SEQ_INDEX_UNCOMPRESSED; 343 break; 344 345 case SEQ_INDEX_UNCOMPRESSED: 346 s->index.hash.uncompressed += s->vli; 347 s->index.hash.crc32 = xz_crc32( 348 (const uint8_t *)&s->index.hash, 349 sizeof(s->index.hash), 350 s->index.hash.crc32); 351 --s->index.count; 352 s->index.sequence = SEQ_INDEX_UNPADDED; 353 break; 354 } 355 } while (s->index.count > 0); 356 357 return XZ_STREAM_END; 358 } 359 360 /* 361 * Validate that the next four or eight input bytes match the value 362 * of s->crc. s->pos must be zero when starting to validate the first byte. 363 * The "bits" argument allows using the same code for both CRC32 and CRC64. 364 */ 365 static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b, 366 uint32_t bits) 367 { 368 do { 369 if (b->in_pos == b->in_size) 370 return XZ_OK; 371 372 if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++]) 373 return XZ_DATA_ERROR; 374 375 s->pos += 8; 376 377 } while (s->pos < bits); 378 379 s->crc = 0; 380 s->pos = 0; 381 382 return XZ_STREAM_END; 383 } 384 385 #ifdef XZ_DEC_ANY_CHECK 386 /* 387 * Skip over the Check field when the Check ID is not supported. 388 * Returns true once the whole Check field has been skipped over. 389 */ 390 static bool check_skip(struct xz_dec *s, struct xz_buf *b) 391 { 392 while (s->pos < check_sizes[s->check_type]) { 393 if (b->in_pos == b->in_size) 394 return false; 395 396 ++b->in_pos; 397 ++s->pos; 398 } 399 400 s->pos = 0; 401 402 return true; 403 } 404 #endif 405 406 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ 407 static enum xz_ret dec_stream_header(struct xz_dec *s) 408 { 409 if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) 410 return XZ_FORMAT_ERROR; 411 412 if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) 413 != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) 414 return XZ_DATA_ERROR; 415 416 if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) 417 return XZ_OPTIONS_ERROR; 418 419 /* 420 * Of integrity checks, we support none (Check ID = 0), 421 * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4). 422 * However, if XZ_DEC_ANY_CHECK is defined, we will accept other 423 * check types too, but then the check won't be verified and 424 * a warning (XZ_UNSUPPORTED_CHECK) will be given. 425 */ 426 s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; 427 428 #ifdef XZ_DEC_ANY_CHECK 429 if (s->check_type > XZ_CHECK_MAX) 430 return XZ_OPTIONS_ERROR; 431 432 if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) 433 return XZ_UNSUPPORTED_CHECK; 434 #else 435 if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type)) 436 return XZ_OPTIONS_ERROR; 437 #endif 438 439 return XZ_OK; 440 } 441 442 /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ 443 static enum xz_ret dec_stream_footer(struct xz_dec *s) 444 { 445 if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) 446 return XZ_DATA_ERROR; 447 448 if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) 449 return XZ_DATA_ERROR; 450 451 /* 452 * Validate Backward Size. Note that we never added the size of the 453 * Index CRC32 field to s->index.size, thus we use s->index.size / 4 454 * instead of s->index.size / 4 - 1. 455 */ 456 if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) 457 return XZ_DATA_ERROR; 458 459 if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) 460 return XZ_DATA_ERROR; 461 462 /* 463 * Use XZ_STREAM_END instead of XZ_OK to be more convenient 464 * for the caller. 465 */ 466 return XZ_STREAM_END; 467 } 468 469 /* Decode the Block Header and initialize the filter chain. */ 470 static enum xz_ret dec_block_header(struct xz_dec *s) 471 { 472 enum xz_ret ret; 473 474 /* 475 * Validate the CRC32. We know that the temp buffer is at least 476 * eight bytes so this is safe. 477 */ 478 s->temp.size -= 4; 479 if (xz_crc32(s->temp.buf, s->temp.size, 0) 480 != get_le32(s->temp.buf + s->temp.size)) 481 return XZ_DATA_ERROR; 482 483 s->temp.pos = 2; 484 485 /* 486 * Catch unsupported Block Flags. We support only one or two filters 487 * in the chain, so we catch that with the same test. 488 */ 489 #ifdef XZ_DEC_BCJ 490 if (s->temp.buf[1] & 0x3E) 491 #else 492 if (s->temp.buf[1] & 0x3F) 493 #endif 494 return XZ_OPTIONS_ERROR; 495 496 /* Compressed Size */ 497 if (s->temp.buf[1] & 0x40) { 498 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) 499 != XZ_STREAM_END) 500 return XZ_DATA_ERROR; 501 502 s->block_header.compressed = s->vli; 503 } else { 504 s->block_header.compressed = VLI_UNKNOWN; 505 } 506 507 /* Uncompressed Size */ 508 if (s->temp.buf[1] & 0x80) { 509 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) 510 != XZ_STREAM_END) 511 return XZ_DATA_ERROR; 512 513 s->block_header.uncompressed = s->vli; 514 } else { 515 s->block_header.uncompressed = VLI_UNKNOWN; 516 } 517 518 #ifdef XZ_DEC_BCJ 519 /* If there are two filters, the first one must be a BCJ filter. */ 520 s->bcj_active = s->temp.buf[1] & 0x01; 521 if (s->bcj_active) { 522 if (s->temp.size - s->temp.pos < 2) 523 return XZ_OPTIONS_ERROR; 524 525 ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); 526 if (ret != XZ_OK) 527 return ret; 528 529 /* 530 * We don't support custom start offset, 531 * so Size of Properties must be zero. 532 */ 533 if (s->temp.buf[s->temp.pos++] != 0x00) 534 return XZ_OPTIONS_ERROR; 535 } 536 #endif 537 538 /* Valid Filter Flags always take at least two bytes. */ 539 if (s->temp.size - s->temp.pos < 2) 540 return XZ_DATA_ERROR; 541 542 /* Filter ID = LZMA2 */ 543 if (s->temp.buf[s->temp.pos++] != 0x21) 544 return XZ_OPTIONS_ERROR; 545 546 /* Size of Properties = 1-byte Filter Properties */ 547 if (s->temp.buf[s->temp.pos++] != 0x01) 548 return XZ_OPTIONS_ERROR; 549 550 /* Filter Properties contains LZMA2 dictionary size. */ 551 if (s->temp.size - s->temp.pos < 1) 552 return XZ_DATA_ERROR; 553 554 ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); 555 if (ret != XZ_OK) 556 return ret; 557 558 /* The rest must be Header Padding. */ 559 while (s->temp.pos < s->temp.size) 560 if (s->temp.buf[s->temp.pos++] != 0x00) 561 return XZ_OPTIONS_ERROR; 562 563 s->temp.pos = 0; 564 s->block.compressed = 0; 565 s->block.uncompressed = 0; 566 567 return XZ_OK; 568 } 569 570 static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) 571 { 572 enum xz_ret ret; 573 574 /* 575 * Store the start position for the case when we are in the middle 576 * of the Index field. 577 */ 578 s->in_start = b->in_pos; 579 580 while (true) { 581 switch (s->sequence) { 582 case SEQ_STREAM_HEADER: 583 /* 584 * Stream Header is copied to s->temp, and then 585 * decoded from there. This way if the caller 586 * gives us only little input at a time, we can 587 * still keep the Stream Header decoding code 588 * simple. Similar approach is used in many places 589 * in this file. 590 */ 591 if (!fill_temp(s, b)) 592 return XZ_OK; 593 594 /* 595 * If dec_stream_header() returns 596 * XZ_UNSUPPORTED_CHECK, it is still possible 597 * to continue decoding if working in multi-call 598 * mode. Thus, update s->sequence before calling 599 * dec_stream_header(). 600 */ 601 s->sequence = SEQ_BLOCK_START; 602 603 ret = dec_stream_header(s); 604 if (ret != XZ_OK) 605 return ret; 606 607 /* Fall through */ 608 609 case SEQ_BLOCK_START: 610 /* We need one byte of input to continue. */ 611 if (b->in_pos == b->in_size) 612 return XZ_OK; 613 614 /* See if this is the beginning of the Index field. */ 615 if (b->in[b->in_pos] == 0) { 616 s->in_start = b->in_pos++; 617 s->sequence = SEQ_INDEX; 618 break; 619 } 620 621 /* 622 * Calculate the size of the Block Header and 623 * prepare to decode it. 624 */ 625 s->block_header.size 626 = ((uint32_t)b->in[b->in_pos] + 1) * 4; 627 628 s->temp.size = s->block_header.size; 629 s->temp.pos = 0; 630 s->sequence = SEQ_BLOCK_HEADER; 631 632 /* Fall through */ 633 634 case SEQ_BLOCK_HEADER: 635 if (!fill_temp(s, b)) 636 return XZ_OK; 637 638 ret = dec_block_header(s); 639 if (ret != XZ_OK) 640 return ret; 641 642 s->sequence = SEQ_BLOCK_UNCOMPRESS; 643 644 /* Fall through */ 645 646 case SEQ_BLOCK_UNCOMPRESS: 647 ret = dec_block(s, b); 648 if (ret != XZ_STREAM_END) 649 return ret; 650 651 s->sequence = SEQ_BLOCK_PADDING; 652 653 /* Fall through */ 654 655 case SEQ_BLOCK_PADDING: 656 /* 657 * Size of Compressed Data + Block Padding 658 * must be a multiple of four. We don't need 659 * s->block.compressed for anything else 660 * anymore, so we use it here to test the size 661 * of the Block Padding field. 662 */ 663 while (s->block.compressed & 3) { 664 if (b->in_pos == b->in_size) 665 return XZ_OK; 666 667 if (b->in[b->in_pos++] != 0) 668 return XZ_DATA_ERROR; 669 670 ++s->block.compressed; 671 } 672 673 s->sequence = SEQ_BLOCK_CHECK; 674 675 /* Fall through */ 676 677 case SEQ_BLOCK_CHECK: 678 if (s->check_type == XZ_CHECK_CRC32) { 679 ret = crc_validate(s, b, 32); 680 if (ret != XZ_STREAM_END) 681 return ret; 682 } 683 else if (IS_CRC64(s->check_type)) { 684 ret = crc_validate(s, b, 64); 685 if (ret != XZ_STREAM_END) 686 return ret; 687 } 688 #ifdef XZ_DEC_ANY_CHECK 689 else if (!check_skip(s, b)) { 690 return XZ_OK; 691 } 692 #endif 693 694 s->sequence = SEQ_BLOCK_START; 695 break; 696 697 case SEQ_INDEX: 698 ret = dec_index(s, b); 699 if (ret != XZ_STREAM_END) 700 return ret; 701 702 s->sequence = SEQ_INDEX_PADDING; 703 704 /* Fall through */ 705 706 case SEQ_INDEX_PADDING: 707 while ((s->index.size + (b->in_pos - s->in_start)) 708 & 3) { 709 if (b->in_pos == b->in_size) { 710 index_update(s, b); 711 return XZ_OK; 712 } 713 714 if (b->in[b->in_pos++] != 0) 715 return XZ_DATA_ERROR; 716 } 717 718 /* Finish the CRC32 value and Index size. */ 719 index_update(s, b); 720 721 /* Compare the hashes to validate the Index field. */ 722 if (!memeq(&s->block.hash, &s->index.hash, 723 sizeof(s->block.hash))) 724 return XZ_DATA_ERROR; 725 726 s->sequence = SEQ_INDEX_CRC32; 727 728 /* Fall through */ 729 730 case SEQ_INDEX_CRC32: 731 ret = crc_validate(s, b, 32); 732 if (ret != XZ_STREAM_END) 733 return ret; 734 735 s->temp.size = STREAM_HEADER_SIZE; 736 s->sequence = SEQ_STREAM_FOOTER; 737 738 /* Fall through */ 739 740 case SEQ_STREAM_FOOTER: 741 if (!fill_temp(s, b)) 742 return XZ_OK; 743 744 return dec_stream_footer(s); 745 } 746 } 747 748 /* Never reached */ 749 } 750 751 /* 752 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in 753 * multi-call and single-call decoding. 754 * 755 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we 756 * are not going to make any progress anymore. This is to prevent the caller 757 * from calling us infinitely when the input file is truncated or otherwise 758 * corrupt. Since zlib-style API allows that the caller fills the input buffer 759 * only when the decoder doesn't produce any new output, we have to be careful 760 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only 761 * after the second consecutive call to xz_dec_run() that makes no progress. 762 * 763 * In single-call mode, if we couldn't decode everything and no error 764 * occurred, either the input is truncated or the output buffer is too small. 765 * Since we know that the last input byte never produces any output, we know 766 * that if all the input was consumed and decoding wasn't finished, the file 767 * must be corrupt. Otherwise the output buffer has to be too small or the 768 * file is corrupt in a way that decoding it produces too big output. 769 * 770 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to 771 * their original values. This is because with some filter chains there won't 772 * be any valid uncompressed data in the output buffer unless the decoding 773 * actually succeeds (that's the price to pay of using the output buffer as 774 * the workspace). 775 */ 776 XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) 777 { 778 size_t in_start; 779 size_t out_start; 780 enum xz_ret ret; 781 782 if (DEC_IS_SINGLE(s->mode)) 783 xz_dec_reset(s); 784 785 in_start = b->in_pos; 786 out_start = b->out_pos; 787 ret = dec_main(s, b); 788 789 if (DEC_IS_SINGLE(s->mode)) { 790 if (ret == XZ_OK) 791 ret = b->in_pos == b->in_size 792 ? XZ_DATA_ERROR : XZ_BUF_ERROR; 793 794 if (ret != XZ_STREAM_END) { 795 b->in_pos = in_start; 796 b->out_pos = out_start; 797 } 798 799 } else if (ret == XZ_OK && in_start == b->in_pos 800 && out_start == b->out_pos) { 801 if (s->allow_buf_error) 802 ret = XZ_BUF_ERROR; 803 804 s->allow_buf_error = true; 805 } else { 806 s->allow_buf_error = false; 807 } 808 809 return ret; 810 } 811 812 XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) 813 { 814 struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); 815 if (s == NULL) 816 return NULL; 817 818 s->mode = mode; 819 820 #ifdef XZ_DEC_BCJ 821 s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); 822 if (s->bcj == NULL) 823 goto error_bcj; 824 #endif 825 826 s->lzma2 = xz_dec_lzma2_create(mode, dict_max); 827 if (s->lzma2 == NULL) 828 goto error_lzma2; 829 830 xz_dec_reset(s); 831 return s; 832 833 error_lzma2: 834 #ifdef XZ_DEC_BCJ 835 xz_dec_bcj_end(s->bcj); 836 error_bcj: 837 #endif 838 kfree(s); 839 return NULL; 840 } 841 842 XZ_EXTERN void xz_dec_reset(struct xz_dec *s) 843 { 844 s->sequence = SEQ_STREAM_HEADER; 845 s->allow_buf_error = false; 846 s->pos = 0; 847 s->crc = 0; 848 memzero(&s->block, sizeof(s->block)); 849 memzero(&s->index, sizeof(s->index)); 850 s->temp.pos = 0; 851 s->temp.size = STREAM_HEADER_SIZE; 852 } 853 854 XZ_EXTERN void xz_dec_end(struct xz_dec *s) 855 { 856 if (s != NULL) { 857 xz_dec_lzma2_end(s->lzma2); 858 #ifdef XZ_DEC_BCJ 859 xz_dec_bcj_end(s->bcj); 860 #endif 861 kfree(s); 862 } 863 } 864