1 2 #define BZ_NO_STDIO 3 4 5 /*-------------------------------------------------------------*/ 6 /*--- Private header file for the library. ---*/ 7 /*--- bzlib_private.h ---*/ 8 /*-------------------------------------------------------------*/ 9 10 /*-- 11 This file is a part of bzip2 and/or libbzip2, a program and 12 library for lossless, block-sorting data compression. 13 14 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions 18 are met: 19 20 1. Redistributions of source code must retain the above copyright 21 notice, this list of conditions and the following disclaimer. 22 23 2. The origin of this software must not be misrepresented; you must 24 not claim that you wrote the original software. If you use this 25 software in a product, an acknowledgment in the product 26 documentation would be appreciated but is not required. 27 28 3. Altered source versions must be plainly marked as such, and must 29 not be misrepresented as being the original software. 30 31 4. The name of the author may not be used to endorse or promote 32 products derived from this software without specific prior written 33 permission. 34 35 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 36 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 37 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 38 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 39 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 40 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 41 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 42 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 43 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 44 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 45 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 47 Julian Seward, Cambridge, UK. 48 jseward (at) bzip.org 49 bzip2/libbzip2 version 1.0 of 21 March 2000 50 51 This program is based on (at least) the work of: 52 Mike Burrows 53 David Wheeler 54 Peter Fenwick 55 Alistair Moffat 56 Radford Neal 57 Ian H. Witten 58 Robert Sedgewick 59 Jon L. Bentley 60 61 For more information on these sources, see the manual. 62 --*/ 63 64 65 #ifndef _BZLIB_PRIVATE_H 66 #define _BZLIB_PRIVATE_H 67 68 #include <stdlib.h> 69 70 #ifndef BZ_NO_STDIO 71 #include <stdio.h> 72 #include <ctype.h> 73 #include <string.h> 74 #endif 75 76 77 /*-------------------------------------------------------------*/ 78 /*--- Public header file for the library. ---*/ 79 /*--- bzlib.h ---*/ 80 /*-------------------------------------------------------------*/ 81 82 /*-- 83 This file is a part of bzip2 and/or libbzip2, a program and 84 library for lossless, block-sorting data compression. 85 86 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 87 88 Redistribution and use in source and binary forms, with or without 89 modification, are permitted provided that the following conditions 90 are met: 91 92 1. Redistributions of source code must retain the above copyright 93 notice, this list of conditions and the following disclaimer. 94 95 2. The origin of this software must not be misrepresented; you must 96 not claim that you wrote the original software. If you use this 97 software in a product, an acknowledgment in the product 98 documentation would be appreciated but is not required. 99 100 3. Altered source versions must be plainly marked as such, and must 101 not be misrepresented as being the original software. 102 103 4. The name of the author may not be used to endorse or promote 104 products derived from this software without specific prior written 105 permission. 106 107 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 108 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 109 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 110 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 111 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 112 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 113 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 114 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 115 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 116 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 117 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 118 119 Julian Seward, Cambridge, UK. 120 jseward (at) bzip.org 121 bzip2/libbzip2 version 1.0 of 21 March 2000 122 123 This program is based on (at least) the work of: 124 Mike Burrows 125 David Wheeler 126 Peter Fenwick 127 Alistair Moffat 128 Radford Neal 129 Ian H. Witten 130 Robert Sedgewick 131 Jon L. Bentley 132 133 For more information on these sources, see the manual. 134 --*/ 135 136 137 #ifndef _BZLIB_H 138 #define _BZLIB_H 139 140 #ifdef __cplusplus 141 extern "C" { 142 #endif 143 144 #define BZ_RUN 0 145 #define BZ_FLUSH 1 146 #define BZ_FINISH 2 147 148 #define BZ_OK 0 149 #define BZ_RUN_OK 1 150 #define BZ_FLUSH_OK 2 151 #define BZ_FINISH_OK 3 152 #define BZ_STREAM_END 4 153 #define BZ_SEQUENCE_ERROR (-1) 154 #define BZ_PARAM_ERROR (-2) 155 #define BZ_MEM_ERROR (-3) 156 #define BZ_DATA_ERROR (-4) 157 #define BZ_DATA_ERROR_MAGIC (-5) 158 #define BZ_IO_ERROR (-6) 159 #define BZ_UNEXPECTED_EOF (-7) 160 #define BZ_OUTBUFF_FULL (-8) 161 #define BZ_CONFIG_ERROR (-9) 162 163 typedef 164 struct { 165 char *next_in; 166 unsigned int avail_in; 167 unsigned int total_in_lo32; 168 unsigned int total_in_hi32; 169 170 char *next_out; 171 unsigned int avail_out; 172 unsigned int total_out_lo32; 173 unsigned int total_out_hi32; 174 175 void *state; 176 177 void *(*bzalloc)(void *,int,int); 178 void (*bzfree)(void *,void *); 179 void *opaque; 180 } 181 bz_stream; 182 183 184 #ifndef BZ_IMPORT 185 #define BZ_EXPORT 186 #endif 187 188 #ifndef BZ_NO_STDIO 189 /* Need a definitition for FILE */ 190 #include <stdio.h> 191 #endif 192 193 #ifdef _WIN32 194 # include <windows.h> 195 # ifdef small 196 /* windows.h define small to char */ 197 # undef small 198 # endif 199 # ifdef BZ_EXPORT 200 # define BZ_API(func) WINAPI func 201 # define BZ_EXTERN extern 202 # else 203 /* import windows dll dynamically */ 204 # define BZ_API(func) (WINAPI * func) 205 # define BZ_EXTERN 206 # endif 207 #else 208 # define BZ_API(func) func 209 # define BZ_EXTERN extern 210 #endif 211 212 213 /*-- Core (low-level) library functions --*/ 214 215 BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( 216 bz_stream* strm, 217 int blockSize100k, 218 int verbosity, 219 int workFactor 220 ); 221 222 BZ_EXTERN int BZ_API(BZ2_bzCompress) ( 223 bz_stream* strm, 224 int action 225 ); 226 227 BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( 228 bz_stream* strm 229 ); 230 231 BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( 232 bz_stream *strm, 233 int verbosity, 234 int small 235 ); 236 237 BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( 238 bz_stream* strm 239 ); 240 241 BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( 242 bz_stream *strm 243 ); 244 245 246 247 /*-- High(er) level library functions --*/ 248 249 #ifndef BZ_NO_STDIO 250 #define BZ_MAX_UNUSED 5000 251 252 typedef void BZFILE; 253 254 BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( 255 int* bzerror, 256 FILE* f, 257 int verbosity, 258 int small, 259 void* unused, 260 int nUnused 261 ); 262 263 BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( 264 int* bzerror, 265 BZFILE* b 266 ); 267 268 BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( 269 int* bzerror, 270 BZFILE* b, 271 void** unused, 272 int* nUnused 273 ); 274 275 BZ_EXTERN int BZ_API(BZ2_bzRead) ( 276 int* bzerror, 277 BZFILE* b, 278 void* buf, 279 int len 280 ); 281 282 BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( 283 int* bzerror, 284 FILE* f, 285 int blockSize100k, 286 int verbosity, 287 int workFactor 288 ); 289 290 BZ_EXTERN void BZ_API(BZ2_bzWrite) ( 291 int* bzerror, 292 BZFILE* b, 293 void* buf, 294 int len 295 ); 296 297 BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( 298 int* bzerror, 299 BZFILE* b, 300 int abandon, 301 unsigned int* nbytes_in, 302 unsigned int* nbytes_out 303 ); 304 305 BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( 306 int* bzerror, 307 BZFILE* b, 308 int abandon, 309 unsigned int* nbytes_in_lo32, 310 unsigned int* nbytes_in_hi32, 311 unsigned int* nbytes_out_lo32, 312 unsigned int* nbytes_out_hi32 313 ); 314 #endif 315 316 317 /*-- Utility functions --*/ 318 319 BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( 320 char* dest, 321 unsigned int* destLen, 322 char* source, 323 unsigned int sourceLen, 324 int blockSize100k, 325 int verbosity, 326 int workFactor 327 ); 328 329 BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( 330 char* dest, 331 unsigned int* destLen, 332 char* source, 333 unsigned int sourceLen, 334 int small, 335 int verbosity 336 ); 337 338 339 /*-- 340 Code contributed by Yoshioka Tsuneo 341 (QWF00133 (at) niftyserve.or.jp/tsuneo-y (at) is.aist-nara.ac.jp), 342 to support better zlib compatibility. 343 This code is not _officially_ part of libbzip2 (yet); 344 I haven't tested it, documented it, or considered the 345 threading-safeness of it. 346 If this code breaks, please contact both Yoshioka and me. 347 --*/ 348 349 BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) ( 350 void 351 ); 352 353 #ifndef BZ_NO_STDIO 354 BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) ( 355 const char *path, 356 const char *mode 357 ); 358 359 BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) ( 360 int fd, 361 const char *mode 362 ); 363 364 BZ_EXTERN int BZ_API(BZ2_bzread) ( 365 BZFILE* b, 366 void* buf, 367 int len 368 ); 369 370 BZ_EXTERN int BZ_API(BZ2_bzwrite) ( 371 BZFILE* b, 372 void* buf, 373 int len 374 ); 375 376 BZ_EXTERN int BZ_API(BZ2_bzflush) ( 377 BZFILE* b 378 ); 379 380 BZ_EXTERN void BZ_API(BZ2_bzclose) ( 381 BZFILE* b 382 ); 383 384 BZ_EXTERN const char * BZ_API(BZ2_bzerror) ( 385 BZFILE *b, 386 int *errnum 387 ); 388 #endif 389 390 #ifdef __cplusplus 391 } 392 #endif 393 394 #endif 395 396 /*-------------------------------------------------------------*/ 397 /*--- end bzlib.h ---*/ 398 /*-------------------------------------------------------------*/ 399 400 401 402 403 /*-- General stuff. --*/ 404 405 #define BZ_VERSION "1.0.3, 17-Oct-2004" 406 407 typedef char Char; 408 typedef unsigned char Bool; 409 typedef unsigned char UChar; 410 typedef int Int32; 411 typedef unsigned int UInt32; 412 typedef short Int16; 413 typedef unsigned short UInt16; 414 415 #define True ((Bool)1) 416 #define False ((Bool)0) 417 418 #ifndef __GNUC__ 419 #define __inline__ /* */ 420 #endif 421 422 #ifndef BZ_NO_STDIO 423 extern void BZ2_bz__AssertH__fail ( int errcode ); 424 #define AssertH(cond,errcode) \ 425 { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); } 426 #if BZ_DEBUG 427 #define AssertD(cond,msg) \ 428 { if (!(cond)) { \ 429 fprintf ( stderr, \ 430 "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\ 431 exit(1); \ 432 }} 433 #else 434 #define AssertD(cond,msg) /* */ 435 #endif 436 #define VPrintf0(zf) \ 437 fprintf(stderr,zf) 438 #define VPrintf1(zf,za1) \ 439 fprintf(stderr,zf,za1) 440 #define VPrintf2(zf,za1,za2) \ 441 fprintf(stderr,zf,za1,za2) 442 #define VPrintf3(zf,za1,za2,za3) \ 443 fprintf(stderr,zf,za1,za2,za3) 444 #define VPrintf4(zf,za1,za2,za3,za4) \ 445 fprintf(stderr,zf,za1,za2,za3,za4) 446 #define VPrintf5(zf,za1,za2,za3,za4,za5) \ 447 fprintf(stderr,zf,za1,za2,za3,za4,za5) 448 #else 449 extern void bz_internal_error ( int errcode ); 450 #define AssertH(cond,errcode) \ 451 { if (!(cond)) bz_internal_error ( errcode ); } 452 #define AssertD(cond,msg) /* */ 453 #define VPrintf0(zf) \ 454 vexxx_printf(zf) 455 #define VPrintf1(zf,za1) \ 456 vexxx_printf(zf,za1) 457 #define VPrintf2(zf,za1,za2) \ 458 vexxx_printf(zf,za1,za2) 459 #define VPrintf3(zf,za1,za2,za3) \ 460 vexxx_printf(zf,za1,za2,za3) 461 #define VPrintf4(zf,za1,za2,za3,za4) \ 462 vexxx_printf(zf,za1,za2,za3,za4) 463 #define VPrintf5(zf,za1,za2,za3,za4,za5) \ 464 vexxx_printf(zf,za1,za2,za3,za4,za5) 465 #endif 466 467 468 #define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1) 469 #define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp)) 470 471 472 /*-- Header bytes. --*/ 473 474 #define BZ_HDR_B 0x42 /* 'B' */ 475 #define BZ_HDR_Z 0x5a /* 'Z' */ 476 #define BZ_HDR_h 0x68 /* 'h' */ 477 #define BZ_HDR_0 0x30 /* '0' */ 478 479 /*-- Constants for the back end. --*/ 480 481 #define BZ_MAX_ALPHA_SIZE 258 482 #define BZ_MAX_CODE_LEN 23 483 484 #define BZ_RUNA 0 485 #define BZ_RUNB 1 486 487 #define BZ_N_GROUPS 6 488 #define BZ_G_SIZE 50 489 #define BZ_N_ITERS 4 490 491 #define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) 492 493 494 495 /*-- Stuff for randomising repetitive blocks. --*/ 496 497 extern Int32 BZ2_rNums[512]; 498 499 #define BZ_RAND_DECLS \ 500 Int32 rNToGo; \ 501 Int32 rTPos \ 502 503 #define BZ_RAND_INIT_MASK \ 504 s->rNToGo = 0; \ 505 s->rTPos = 0 \ 506 507 #define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0) 508 509 #define BZ_RAND_UPD_MASK \ 510 if (s->rNToGo == 0) { \ 511 s->rNToGo = BZ2_rNums[s->rTPos]; \ 512 s->rTPos++; \ 513 if (s->rTPos == 512) s->rTPos = 0; \ 514 } \ 515 s->rNToGo--; 516 517 518 519 /*-- Stuff for doing CRCs. --*/ 520 521 extern UInt32 BZ2_crc32Table[256]; 522 523 #define BZ_INITIALISE_CRC(crcVar) \ 524 { \ 525 crcVar = 0xffffffffL; \ 526 } 527 528 #define BZ_FINALISE_CRC(crcVar) \ 529 { \ 530 crcVar = ~(crcVar); \ 531 } 532 533 #define BZ_UPDATE_CRC(crcVar,cha) \ 534 { \ 535 crcVar = (crcVar << 8) ^ \ 536 BZ2_crc32Table[(crcVar >> 24) ^ \ 537 ((UChar)cha)]; \ 538 } 539 540 541 542 /*-- States and modes for compression. --*/ 543 544 #define BZ_M_IDLE 1 545 #define BZ_M_RUNNING 2 546 #define BZ_M_FLUSHING 3 547 #define BZ_M_FINISHING 4 548 549 #define BZ_S_OUTPUT 1 550 #define BZ_S_INPUT 2 551 552 #define BZ_N_RADIX 2 553 #define BZ_N_QSORT 12 554 #define BZ_N_SHELL 18 555 #define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) 556 557 558 559 560 /*-- Structure holding all the compression-side stuff. --*/ 561 562 typedef 563 struct { 564 /* pointer back to the struct bz_stream */ 565 bz_stream* strm; 566 567 /* mode this stream is in, and whether inputting */ 568 /* or outputting data */ 569 Int32 mode; 570 Int32 state; 571 572 /* remembers avail_in when flush/finish requested */ 573 UInt32 avail_in_expect; 574 575 /* for doing the block sorting */ 576 UInt32* arr1; 577 UInt32* arr2; 578 UInt32* ftab; 579 Int32 origPtr; 580 581 /* aliases for arr1 and arr2 */ 582 UInt32* ptr; 583 UChar* block; 584 UInt16* mtfv; 585 UChar* zbits; 586 587 /* for deciding when to use the fallback sorting algorithm */ 588 Int32 workFactor; 589 590 /* run-length-encoding of the input */ 591 UInt32 state_in_ch; 592 Int32 state_in_len; 593 BZ_RAND_DECLS; 594 595 /* input and output limits and current posns */ 596 Int32 nblock; 597 Int32 nblockMAX; 598 Int32 numZ; 599 Int32 state_out_pos; 600 601 /* map of bytes used in block */ 602 Int32 nInUse; 603 Bool inUse[256]; 604 UChar unseqToSeq[256]; 605 606 /* the buffer for bit stream creation */ 607 UInt32 bsBuff; 608 Int32 bsLive; 609 610 /* block and combined CRCs */ 611 UInt32 blockCRC; 612 UInt32 combinedCRC; 613 614 /* misc administratium */ 615 Int32 verbosity; 616 Int32 blockNo; 617 Int32 blockSize100k; 618 619 /* stuff for coding the MTF values */ 620 Int32 nMTF; 621 Int32 mtfFreq [BZ_MAX_ALPHA_SIZE]; 622 UChar selector [BZ_MAX_SELECTORS]; 623 UChar selectorMtf[BZ_MAX_SELECTORS]; 624 625 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 626 Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 627 Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 628 /* second dimension: only 3 needed; 4 makes index calculations faster */ 629 UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4]; 630 631 } 632 EState; 633 634 635 636 /*-- externs for compression. --*/ 637 638 extern void 639 BZ2_blockSort ( EState* ); 640 641 extern void 642 BZ2_compressBlock ( EState*, Bool ); 643 644 extern void 645 BZ2_bsInitWrite ( EState* ); 646 647 extern void 648 BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); 649 650 extern void 651 BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); 652 653 654 655 /*-- states for decompression. --*/ 656 657 #define BZ_X_IDLE 1 658 #define BZ_X_OUTPUT 2 659 660 #define BZ_X_MAGIC_1 10 661 #define BZ_X_MAGIC_2 11 662 #define BZ_X_MAGIC_3 12 663 #define BZ_X_MAGIC_4 13 664 #define BZ_X_BLKHDR_1 14 665 #define BZ_X_BLKHDR_2 15 666 #define BZ_X_BLKHDR_3 16 667 #define BZ_X_BLKHDR_4 17 668 #define BZ_X_BLKHDR_5 18 669 #define BZ_X_BLKHDR_6 19 670 #define BZ_X_BCRC_1 20 671 #define BZ_X_BCRC_2 21 672 #define BZ_X_BCRC_3 22 673 #define BZ_X_BCRC_4 23 674 #define BZ_X_RANDBIT 24 675 #define BZ_X_ORIGPTR_1 25 676 #define BZ_X_ORIGPTR_2 26 677 #define BZ_X_ORIGPTR_3 27 678 #define BZ_X_MAPPING_1 28 679 #define BZ_X_MAPPING_2 29 680 #define BZ_X_SELECTOR_1 30 681 #define BZ_X_SELECTOR_2 31 682 #define BZ_X_SELECTOR_3 32 683 #define BZ_X_CODING_1 33 684 #define BZ_X_CODING_2 34 685 #define BZ_X_CODING_3 35 686 #define BZ_X_MTF_1 36 687 #define BZ_X_MTF_2 37 688 #define BZ_X_MTF_3 38 689 #define BZ_X_MTF_4 39 690 #define BZ_X_MTF_5 40 691 #define BZ_X_MTF_6 41 692 #define BZ_X_ENDHDR_2 42 693 #define BZ_X_ENDHDR_3 43 694 #define BZ_X_ENDHDR_4 44 695 #define BZ_X_ENDHDR_5 45 696 #define BZ_X_ENDHDR_6 46 697 #define BZ_X_CCRC_1 47 698 #define BZ_X_CCRC_2 48 699 #define BZ_X_CCRC_3 49 700 #define BZ_X_CCRC_4 50 701 702 703 704 /*-- Constants for the fast MTF decoder. --*/ 705 706 #define MTFA_SIZE 4096 707 #define MTFL_SIZE 16 708 709 710 711 /*-- Structure holding all the decompression-side stuff. --*/ 712 713 typedef 714 struct { 715 /* pointer back to the struct bz_stream */ 716 bz_stream* strm; 717 718 /* state indicator for this stream */ 719 Int32 state; 720 721 /* for doing the final run-length decoding */ 722 UChar state_out_ch; 723 Int32 state_out_len; 724 Bool blockRandomised; 725 BZ_RAND_DECLS; 726 727 /* the buffer for bit stream reading */ 728 UInt32 bsBuff; 729 Int32 bsLive; 730 731 /* misc administratium */ 732 Int32 blockSize100k; 733 Bool smallDecompress; 734 Int32 currBlockNo; 735 Int32 verbosity; 736 737 /* for undoing the Burrows-Wheeler transform */ 738 Int32 origPtr; 739 UInt32 tPos; 740 Int32 k0; 741 Int32 unzftab[256]; 742 Int32 nblock_used; 743 Int32 cftab[257]; 744 Int32 cftabCopy[257]; 745 746 /* for undoing the Burrows-Wheeler transform (FAST) */ 747 UInt32 *tt; 748 749 /* for undoing the Burrows-Wheeler transform (SMALL) */ 750 UInt16 *ll16; 751 UChar *ll4; 752 753 /* stored and calculated CRCs */ 754 UInt32 storedBlockCRC; 755 UInt32 storedCombinedCRC; 756 UInt32 calculatedBlockCRC; 757 UInt32 calculatedCombinedCRC; 758 759 /* map of bytes used in block */ 760 Int32 nInUse; 761 Bool inUse[256]; 762 Bool inUse16[16]; 763 UChar seqToUnseq[256]; 764 765 /* for decoding the MTF values */ 766 UChar mtfa [MTFA_SIZE]; 767 Int32 mtfbase[256 / MTFL_SIZE]; 768 UChar selector [BZ_MAX_SELECTORS]; 769 UChar selectorMtf[BZ_MAX_SELECTORS]; 770 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 771 772 Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 773 Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 774 Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 775 Int32 minLens[BZ_N_GROUPS]; 776 777 /* save area for scalars in the main decompress code */ 778 Int32 save_i; 779 Int32 save_j; 780 Int32 save_t; 781 Int32 save_alphaSize; 782 Int32 save_nGroups; 783 Int32 save_nSelectors; 784 Int32 save_EOB; 785 Int32 save_groupNo; 786 Int32 save_groupPos; 787 Int32 save_nextSym; 788 Int32 save_nblockMAX; 789 Int32 save_nblock; 790 Int32 save_es; 791 Int32 save_N; 792 Int32 save_curr; 793 Int32 save_zt; 794 Int32 save_zn; 795 Int32 save_zvec; 796 Int32 save_zj; 797 Int32 save_gSel; 798 Int32 save_gMinlen; 799 Int32* save_gLimit; 800 Int32* save_gBase; 801 Int32* save_gPerm; 802 803 } 804 DState; 805 806 807 808 /*-- Macros for decompression. --*/ 809 810 #define BZ_GET_FAST(cccc) \ 811 s->tPos = s->tt[s->tPos]; \ 812 cccc = (UChar)(s->tPos & 0xff); \ 813 s->tPos >>= 8; 814 815 #define BZ_GET_FAST_C(cccc) \ 816 c_tPos = c_tt[c_tPos]; \ 817 cccc = (UChar)(c_tPos & 0xff); \ 818 c_tPos >>= 8; 819 820 #define SET_LL4(i,n) \ 821 { if (((i) & 0x1) == 0) \ 822 s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \ 823 s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \ 824 } 825 826 #define GET_LL4(i) \ 827 ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF) 828 829 #define SET_LL(i,n) \ 830 { s->ll16[i] = (UInt16)(n & 0x0000ffff); \ 831 SET_LL4(i, n >> 16); \ 832 } 833 834 #define GET_LL(i) \ 835 (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) 836 837 #define BZ_GET_SMALL(cccc) \ 838 cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \ 839 s->tPos = GET_LL(s->tPos); 840 841 842 /*-- externs for decompression. --*/ 843 844 extern Int32 845 BZ2_indexIntoF ( Int32, Int32* ); 846 847 extern Int32 848 BZ2_decompress ( DState* ); 849 850 extern void 851 BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, 852 Int32, Int32, Int32 ); 853 854 855 #endif 856 857 858 /*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/ 859 860 #ifdef BZ_NO_STDIO 861 #ifndef NULL 862 #define NULL 0 863 #endif 864 #endif 865 866 867 /*-------------------------------------------------------------*/ 868 /*--- end bzlib_private.h ---*/ 869 /*-------------------------------------------------------------*/ 870 871 872 /* Something which has the same size as void* on the host. That is, 873 it is 32 bits on a 32-bit host and 64 bits on a 64-bit host, and so 874 it can safely be coerced to and from a pointer type on the host 875 machine. */ 876 typedef unsigned long HWord; 877 typedef char HChar; 878 typedef signed int Int; 879 typedef unsigned int UInt; 880 881 typedef signed long long int Long; 882 typedef unsigned long long int ULong; 883 884 885 ///////////////////////////////////////////////////////////////////// 886 ///////////////////////////////////////////////////////////////////// 887 888 //#include "/home/sewardj/VEX/trunk/pub/libvex_basictypes.h" 889 890 static HWord (*serviceFn)(HWord,HWord) = 0; 891 892 893 static char* my_strcpy ( char* dest, const char* src ) 894 { 895 char* dest_orig = dest; 896 while (*src) *dest++ = *src++; 897 *dest = 0; 898 return dest_orig; 899 } 900 901 static void* my_memcpy ( void *dest, const void *src, int sz ) 902 { 903 const char *s = (const char *)src; 904 char *d = (char *)dest; 905 906 while (sz--) 907 *d++ = *s++; 908 909 return dest; 910 } 911 912 static void* my_memmove( void *dst, const void *src, unsigned int len ) 913 { 914 register char *d; 915 register char *s; 916 if ( dst > src ) { 917 d = (char *)dst + len - 1; 918 s = (char *)src + len - 1; 919 while ( len >= 4 ) { 920 *d-- = *s--; 921 *d-- = *s--; 922 *d-- = *s--; 923 *d-- = *s--; 924 len -= 4; 925 } 926 while ( len-- ) { 927 *d-- = *s--; 928 } 929 } else if ( dst < src ) { 930 d = (char *)dst; 931 s = (char *)src; 932 while ( len >= 4 ) { 933 *d++ = *s++; 934 *d++ = *s++; 935 *d++ = *s++; 936 *d++ = *s++; 937 len -= 4; 938 } 939 while ( len-- ) { 940 *d++ = *s++; 941 } 942 } 943 return dst; 944 } 945 946 char* my_strcat ( char* dest, const char* src ) 947 { 948 char* dest_orig = dest; 949 while (*dest) dest++; 950 while (*src) *dest++ = *src++; 951 *dest = 0; 952 return dest_orig; 953 } 954 955 956 ///////////////////////////////////////////////////////////////////// 957 958 static void vexxx_log_bytes ( char* p, int n ) 959 { 960 int i; 961 for (i = 0; i < n; i++) 962 (*serviceFn)( 1, (int)p[i] ); 963 } 964 965 /*---------------------------------------------------------*/ 966 /*--- vexxx_printf ---*/ 967 /*---------------------------------------------------------*/ 968 969 /* This should be the only <...> include in the entire VEX library. 970 New code for vexxx_util.c should go above this point. */ 971 #include <stdarg.h> 972 973 static HChar vexxx_toupper ( HChar c ) 974 { 975 if (c >= 'a' && c <= 'z') 976 return c + ('A' - 'a'); 977 else 978 return c; 979 } 980 981 static Int vexxx_strlen ( const HChar* str ) 982 { 983 Int i = 0; 984 while (str[i] != 0) i++; 985 return i; 986 } 987 988 Bool vexxx_streq ( const HChar* s1, const HChar* s2 ) 989 { 990 while (True) { 991 if (*s1 == 0 && *s2 == 0) 992 return True; 993 if (*s1 != *s2) 994 return False; 995 s1++; 996 s2++; 997 } 998 } 999 1000 /* Some flags. */ 1001 #define VG_MSG_SIGNED 1 /* The value is signed. */ 1002 #define VG_MSG_ZJUSTIFY 2 /* Must justify with '0'. */ 1003 #define VG_MSG_LJUSTIFY 4 /* Must justify on the left. */ 1004 #define VG_MSG_PAREN 8 /* Parenthesize if present (for %y) */ 1005 #define VG_MSG_COMMA 16 /* Add commas to numbers (for %d, %u) */ 1006 1007 /* Copy a string into the buffer. */ 1008 static UInt 1009 myvprintf_str ( void(*send)(HChar), Int flags, Int width, HChar* str, 1010 Bool capitalise ) 1011 { 1012 # define MAYBE_TOUPPER(ch) (capitalise ? vexxx_toupper(ch) : (ch)) 1013 UInt ret = 0; 1014 Int i, extra; 1015 Int len = vexxx_strlen(str); 1016 1017 if (width == 0) { 1018 ret += len; 1019 for (i = 0; i < len; i++) 1020 send(MAYBE_TOUPPER(str[i])); 1021 return ret; 1022 } 1023 1024 if (len > width) { 1025 ret += width; 1026 for (i = 0; i < width; i++) 1027 send(MAYBE_TOUPPER(str[i])); 1028 return ret; 1029 } 1030 1031 extra = width - len; 1032 if (flags & VG_MSG_LJUSTIFY) { 1033 ret += extra; 1034 for (i = 0; i < extra; i++) 1035 send(' '); 1036 } 1037 ret += len; 1038 for (i = 0; i < len; i++) 1039 send(MAYBE_TOUPPER(str[i])); 1040 if (!(flags & VG_MSG_LJUSTIFY)) { 1041 ret += extra; 1042 for (i = 0; i < extra; i++) 1043 send(' '); 1044 } 1045 1046 # undef MAYBE_TOUPPER 1047 1048 return ret; 1049 } 1050 1051 /* Write P into the buffer according to these args: 1052 * If SIGN is true, p is a signed. 1053 * BASE is the base. 1054 * If WITH_ZERO is true, '0' must be added. 1055 * WIDTH is the width of the field. 1056 */ 1057 static UInt 1058 myvprintf_int64 ( void(*send)(HChar), Int flags, Int base, Int width, ULong pL) 1059 { 1060 HChar buf[40]; 1061 Int ind = 0; 1062 Int i, nc = 0; 1063 Bool neg = False; 1064 HChar *digits = "0123456789ABCDEF"; 1065 UInt ret = 0; 1066 UInt p = (UInt)pL; 1067 1068 if (base < 2 || base > 16) 1069 return ret; 1070 1071 if ((flags & VG_MSG_SIGNED) && (Int)p < 0) { 1072 p = - (Int)p; 1073 neg = True; 1074 } 1075 1076 if (p == 0) 1077 buf[ind++] = '0'; 1078 else { 1079 while (p > 0) { 1080 if ((flags & VG_MSG_COMMA) && 10 == base && 1081 0 == (ind-nc) % 3 && 0 != ind) 1082 { 1083 buf[ind++] = ','; 1084 nc++; 1085 } 1086 buf[ind++] = digits[p % base]; 1087 p /= base; 1088 } 1089 } 1090 1091 if (neg) 1092 buf[ind++] = '-'; 1093 1094 if (width > 0 && !(flags & VG_MSG_LJUSTIFY)) { 1095 for(; ind < width; ind++) { 1096 //vassert(ind < 39); 1097 buf[ind] = ((flags & VG_MSG_ZJUSTIFY) ? '0': ' '); 1098 } 1099 } 1100 1101 /* Reverse copy to buffer. */ 1102 ret += ind; 1103 for (i = ind -1; i >= 0; i--) { 1104 send(buf[i]); 1105 } 1106 if (width > 0 && (flags & VG_MSG_LJUSTIFY)) { 1107 for(; ind < width; ind++) { 1108 ret++; 1109 send(' '); // Never pad with zeroes on RHS -- changes the value! 1110 } 1111 } 1112 return ret; 1113 } 1114 1115 1116 /* A simple vprintf(). */ 1117 static 1118 UInt vprintf_wrk ( void(*send)(HChar), const HChar *format, va_list vargs ) 1119 { 1120 UInt ret = 0; 1121 int i; 1122 int flags; 1123 int width; 1124 Bool is_long; 1125 1126 /* We assume that vargs has already been initialised by the 1127 caller, using va_start, and that the caller will similarly 1128 clean up with va_end. 1129 */ 1130 1131 for (i = 0; format[i] != 0; i++) { 1132 if (format[i] != '%') { 1133 send(format[i]); 1134 ret++; 1135 continue; 1136 } 1137 i++; 1138 /* A '%' has been found. Ignore a trailing %. */ 1139 if (format[i] == 0) 1140 break; 1141 if (format[i] == '%') { 1142 /* `%%' is replaced by `%'. */ 1143 send('%'); 1144 ret++; 1145 continue; 1146 } 1147 flags = 0; 1148 is_long = False; 1149 width = 0; /* length of the field. */ 1150 if (format[i] == '(') { 1151 flags |= VG_MSG_PAREN; 1152 i++; 1153 } 1154 /* If ',' follows '%', commas will be inserted. */ 1155 if (format[i] == ',') { 1156 flags |= VG_MSG_COMMA; 1157 i++; 1158 } 1159 /* If '-' follows '%', justify on the left. */ 1160 if (format[i] == '-') { 1161 flags |= VG_MSG_LJUSTIFY; 1162 i++; 1163 } 1164 /* If '0' follows '%', pads will be inserted. */ 1165 if (format[i] == '0') { 1166 flags |= VG_MSG_ZJUSTIFY; 1167 i++; 1168 } 1169 /* Compute the field length. */ 1170 while (format[i] >= '0' && format[i] <= '9') { 1171 width *= 10; 1172 width += format[i++] - '0'; 1173 } 1174 while (format[i] == 'l') { 1175 i++; 1176 is_long = True; 1177 } 1178 1179 switch (format[i]) { 1180 case 'd': /* %d */ 1181 flags |= VG_MSG_SIGNED; 1182 if (is_long) 1183 ret += myvprintf_int64(send, flags, 10, width, 1184 (ULong)(va_arg (vargs, Long))); 1185 else 1186 ret += myvprintf_int64(send, flags, 10, width, 1187 (ULong)(va_arg (vargs, Int))); 1188 break; 1189 case 'u': /* %u */ 1190 if (is_long) 1191 ret += myvprintf_int64(send, flags, 10, width, 1192 (ULong)(va_arg (vargs, ULong))); 1193 else 1194 ret += myvprintf_int64(send, flags, 10, width, 1195 (ULong)(va_arg (vargs, UInt))); 1196 break; 1197 case 'p': /* %p */ 1198 ret += 2; 1199 send('0'); 1200 send('x'); 1201 ret += myvprintf_int64(send, flags, 16, width, 1202 (ULong)((HWord)va_arg (vargs, void *))); 1203 break; 1204 case 'x': /* %x */ 1205 if (is_long) 1206 ret += myvprintf_int64(send, flags, 16, width, 1207 (ULong)(va_arg (vargs, ULong))); 1208 else 1209 ret += myvprintf_int64(send, flags, 16, width, 1210 (ULong)(va_arg (vargs, UInt))); 1211 break; 1212 case 'c': /* %c */ 1213 ret++; 1214 send((va_arg (vargs, int))); 1215 break; 1216 case 's': case 'S': { /* %s */ 1217 char *str = va_arg (vargs, char *); 1218 if (str == (char*) 0) str = "(null)"; 1219 ret += myvprintf_str(send, flags, width, str, 1220 (format[i]=='S')); 1221 break; 1222 } 1223 # if 0 1224 case 'y': { /* %y - print symbol */ 1225 Char buf[100]; 1226 Char *cp = buf; 1227 Addr a = va_arg(vargs, Addr); 1228 1229 if (flags & VG_MSG_PAREN) 1230 *cp++ = '('; 1231 if (VG_(get_fnname_w_offset)(a, cp, sizeof(buf)-4)) { 1232 if (flags & VG_MSG_PAREN) { 1233 cp += VG_(strlen)(cp); 1234 *cp++ = ')'; 1235 *cp = '\0'; 1236 } 1237 ret += myvprintf_str(send, flags, width, buf, 0); 1238 } 1239 break; 1240 } 1241 # endif 1242 default: 1243 break; 1244 } 1245 } 1246 return ret; 1247 } 1248 1249 1250 /* A general replacement for printf(). Note that only low-level 1251 debugging info should be sent via here. The official route is to 1252 to use vg_message(). This interface is deprecated. 1253 */ 1254 static HChar myprintf_buf[1000]; 1255 static Int n_myprintf_buf; 1256 1257 static void add_to_myprintf_buf ( HChar c ) 1258 { 1259 if (c == '\n' || n_myprintf_buf >= 1000-10 /*paranoia*/ ) { 1260 (*vexxx_log_bytes)( myprintf_buf, vexxx_strlen(myprintf_buf) ); 1261 n_myprintf_buf = 0; 1262 myprintf_buf[n_myprintf_buf] = 0; 1263 } 1264 myprintf_buf[n_myprintf_buf++] = c; 1265 myprintf_buf[n_myprintf_buf] = 0; 1266 } 1267 1268 static UInt vexxx_printf ( const char *format, ... ) 1269 { 1270 UInt ret; 1271 va_list vargs; 1272 va_start(vargs,format); 1273 1274 n_myprintf_buf = 0; 1275 myprintf_buf[n_myprintf_buf] = 0; 1276 ret = vprintf_wrk ( add_to_myprintf_buf, format, vargs ); 1277 1278 if (n_myprintf_buf > 0) { 1279 (*vexxx_log_bytes)( myprintf_buf, n_myprintf_buf ); 1280 } 1281 1282 va_end(vargs); 1283 1284 return ret; 1285 } 1286 1287 /*---------------------------------------------------------------*/ 1288 /*--- end vexxx_util.c ---*/ 1289 /*---------------------------------------------------------------*/ 1290 1291 1292 ///////////////////////////////////////////////////////////////////// 1293 ///////////////////////////////////////////////////////////////////// 1294 ///////////////////////////////////////////////////////////////////// 1295 ///////////////////////////////////////////////////////////////////// 1296 1297 1298 /*-------------------------------------------------------------*/ 1299 /*--- Decompression machinery ---*/ 1300 /*--- decompress.c ---*/ 1301 /*-------------------------------------------------------------*/ 1302 1303 /*-- 1304 This file is a part of bzip2 and/or libbzip2, a program and 1305 library for lossless, block-sorting data compression. 1306 1307 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 1308 1309 Redistribution and use in source and binary forms, with or without 1310 modification, are permitted provided that the following conditions 1311 are met: 1312 1313 1. Redistributions of source code must retain the above copyright 1314 notice, this list of conditions and the following disclaimer. 1315 1316 2. The origin of this software must not be misrepresented; you must 1317 not claim that you wrote the original software. If you use this 1318 software in a product, an acknowledgment in the product 1319 documentation would be appreciated but is not required. 1320 1321 3. Altered source versions must be plainly marked as such, and must 1322 not be misrepresented as being the original software. 1323 1324 4. The name of the author may not be used to endorse or promote 1325 products derived from this software without specific prior written 1326 permission. 1327 1328 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 1329 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 1330 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1331 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 1332 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1333 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 1334 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 1335 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 1336 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 1337 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 1338 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1339 1340 Julian Seward, Cambridge, UK. 1341 jseward (at) bzip.org 1342 bzip2/libbzip2 version 1.0 of 21 March 2000 1343 1344 This program is based on (at least) the work of: 1345 Mike Burrows 1346 David Wheeler 1347 Peter Fenwick 1348 Alistair Moffat 1349 Radford Neal 1350 Ian H. Witten 1351 Robert Sedgewick 1352 Jon L. Bentley 1353 1354 For more information on these sources, see the manual. 1355 --*/ 1356 1357 1358 1359 1360 /*---------------------------------------------------*/ 1361 static 1362 void makeMaps_d ( DState* s ) 1363 { 1364 Int32 i; 1365 s->nInUse = 0; 1366 for (i = 0; i < 256; i++) 1367 if (s->inUse[i]) { 1368 s->seqToUnseq[s->nInUse] = i; 1369 s->nInUse++; 1370 } 1371 } 1372 1373 1374 /*---------------------------------------------------*/ 1375 #define RETURN(rrr) \ 1376 { retVal = rrr; goto save_state_and_return; }; 1377 1378 #define GET_BITS(lll,vvv,nnn) \ 1379 case lll: s->state = lll; \ 1380 while (True) { \ 1381 if (s->bsLive >= nnn) { \ 1382 UInt32 v; \ 1383 v = (s->bsBuff >> \ 1384 (s->bsLive-nnn)) & ((1 << nnn)-1); \ 1385 s->bsLive -= nnn; \ 1386 vvv = v; \ 1387 break; \ 1388 } \ 1389 if (s->strm->avail_in == 0) RETURN(BZ_OK); \ 1390 s->bsBuff \ 1391 = (s->bsBuff << 8) | \ 1392 ((UInt32) \ 1393 (*((UChar*)(s->strm->next_in)))); \ 1394 s->bsLive += 8; \ 1395 s->strm->next_in++; \ 1396 s->strm->avail_in--; \ 1397 s->strm->total_in_lo32++; \ 1398 if (s->strm->total_in_lo32 == 0) \ 1399 s->strm->total_in_hi32++; \ 1400 } 1401 1402 #define GET_UCHAR(lll,uuu) \ 1403 GET_BITS(lll,uuu,8) 1404 1405 #define GET_BIT(lll,uuu) \ 1406 GET_BITS(lll,uuu,1) 1407 1408 /*---------------------------------------------------*/ 1409 #define GET_MTF_VAL(label1,label2,lval) \ 1410 { \ 1411 if (groupPos == 0) { \ 1412 groupNo++; \ 1413 if (groupNo >= nSelectors) \ 1414 RETURN(BZ_DATA_ERROR); \ 1415 groupPos = BZ_G_SIZE; \ 1416 gSel = s->selector[groupNo]; \ 1417 gMinlen = s->minLens[gSel]; \ 1418 gLimit = &(s->limit[gSel][0]); \ 1419 gPerm = &(s->perm[gSel][0]); \ 1420 gBase = &(s->base[gSel][0]); \ 1421 } \ 1422 groupPos--; \ 1423 zn = gMinlen; \ 1424 GET_BITS(label1, zvec, zn); \ 1425 while (1) { \ 1426 if (zn > 20 /* the longest code */) \ 1427 RETURN(BZ_DATA_ERROR); \ 1428 if (zvec <= gLimit[zn]) break; \ 1429 zn++; \ 1430 GET_BIT(label2, zj); \ 1431 zvec = (zvec << 1) | zj; \ 1432 }; \ 1433 if (zvec - gBase[zn] < 0 \ 1434 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \ 1435 RETURN(BZ_DATA_ERROR); \ 1436 lval = gPerm[zvec - gBase[zn]]; \ 1437 } 1438 1439 1440 1441 /*---------------------------------------------------*/ 1442 __inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab ) 1443 { 1444 Int32 nb, na, mid; 1445 nb = 0; 1446 na = 256; 1447 do { 1448 mid = (nb + na) >> 1; 1449 if (indx >= cftab[mid]) nb = mid; else na = mid; 1450 } 1451 while (na - nb != 1); 1452 return nb; 1453 } 1454 1455 /*---------------------------------------------------*/ 1456 Int32 BZ2_decompress ( DState* s ) 1457 { 1458 UChar uc; 1459 Int32 retVal; 1460 Int32 minLen, maxLen; 1461 bz_stream* strm = s->strm; 1462 1463 /* stuff that needs to be saved/restored */ 1464 Int32 i; 1465 Int32 j; 1466 Int32 t; 1467 Int32 alphaSize; 1468 Int32 nGroups; 1469 Int32 nSelectors; 1470 Int32 EOB; 1471 Int32 groupNo; 1472 Int32 groupPos; 1473 Int32 nextSym; 1474 Int32 nblockMAX; 1475 Int32 nblock; 1476 Int32 es; 1477 Int32 N; 1478 Int32 curr; 1479 Int32 zt; 1480 Int32 zn; 1481 Int32 zvec; 1482 Int32 zj; 1483 Int32 gSel; 1484 Int32 gMinlen; 1485 Int32* gLimit; 1486 Int32* gBase; 1487 Int32* gPerm; 1488 1489 if (s->state == BZ_X_MAGIC_1) { 1490 /*initialise the save area*/ 1491 s->save_i = 0; 1492 s->save_j = 0; 1493 s->save_t = 0; 1494 s->save_alphaSize = 0; 1495 s->save_nGroups = 0; 1496 s->save_nSelectors = 0; 1497 s->save_EOB = 0; 1498 s->save_groupNo = 0; 1499 s->save_groupPos = 0; 1500 s->save_nextSym = 0; 1501 s->save_nblockMAX = 0; 1502 s->save_nblock = 0; 1503 s->save_es = 0; 1504 s->save_N = 0; 1505 s->save_curr = 0; 1506 s->save_zt = 0; 1507 s->save_zn = 0; 1508 s->save_zvec = 0; 1509 s->save_zj = 0; 1510 s->save_gSel = 0; 1511 s->save_gMinlen = 0; 1512 s->save_gLimit = NULL; 1513 s->save_gBase = NULL; 1514 s->save_gPerm = NULL; 1515 } 1516 1517 /*restore from the save area*/ 1518 i = s->save_i; 1519 j = s->save_j; 1520 t = s->save_t; 1521 alphaSize = s->save_alphaSize; 1522 nGroups = s->save_nGroups; 1523 nSelectors = s->save_nSelectors; 1524 EOB = s->save_EOB; 1525 groupNo = s->save_groupNo; 1526 groupPos = s->save_groupPos; 1527 nextSym = s->save_nextSym; 1528 nblockMAX = s->save_nblockMAX; 1529 nblock = s->save_nblock; 1530 es = s->save_es; 1531 N = s->save_N; 1532 curr = s->save_curr; 1533 zt = s->save_zt; 1534 zn = s->save_zn; 1535 zvec = s->save_zvec; 1536 zj = s->save_zj; 1537 gSel = s->save_gSel; 1538 gMinlen = s->save_gMinlen; 1539 gLimit = s->save_gLimit; 1540 gBase = s->save_gBase; 1541 gPerm = s->save_gPerm; 1542 1543 retVal = BZ_OK; 1544 1545 switch (s->state) { 1546 1547 GET_UCHAR(BZ_X_MAGIC_1, uc); 1548 if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC); 1549 1550 GET_UCHAR(BZ_X_MAGIC_2, uc); 1551 if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC); 1552 1553 GET_UCHAR(BZ_X_MAGIC_3, uc) 1554 if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC); 1555 1556 GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) 1557 if (s->blockSize100k < (BZ_HDR_0 + 1) || 1558 s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC); 1559 s->blockSize100k -= BZ_HDR_0; 1560 1561 if (s->smallDecompress) { 1562 s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); 1563 s->ll4 = BZALLOC( 1564 ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) 1565 ); 1566 if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR); 1567 } else { 1568 s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) ); 1569 if (s->tt == NULL) RETURN(BZ_MEM_ERROR); 1570 } 1571 1572 GET_UCHAR(BZ_X_BLKHDR_1, uc); 1573 1574 if (uc == 0x17) goto endhdr_2; 1575 if (uc != 0x31) RETURN(BZ_DATA_ERROR); 1576 GET_UCHAR(BZ_X_BLKHDR_2, uc); 1577 if (uc != 0x41) RETURN(BZ_DATA_ERROR); 1578 GET_UCHAR(BZ_X_BLKHDR_3, uc); 1579 if (uc != 0x59) RETURN(BZ_DATA_ERROR); 1580 GET_UCHAR(BZ_X_BLKHDR_4, uc); 1581 if (uc != 0x26) RETURN(BZ_DATA_ERROR); 1582 GET_UCHAR(BZ_X_BLKHDR_5, uc); 1583 if (uc != 0x53) RETURN(BZ_DATA_ERROR); 1584 GET_UCHAR(BZ_X_BLKHDR_6, uc); 1585 if (uc != 0x59) RETURN(BZ_DATA_ERROR); 1586 1587 s->currBlockNo++; 1588 if (s->verbosity >= 2) 1589 VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo ); 1590 1591 s->storedBlockCRC = 0; 1592 GET_UCHAR(BZ_X_BCRC_1, uc); 1593 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 1594 GET_UCHAR(BZ_X_BCRC_2, uc); 1595 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 1596 GET_UCHAR(BZ_X_BCRC_3, uc); 1597 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 1598 GET_UCHAR(BZ_X_BCRC_4, uc); 1599 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 1600 1601 GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1); 1602 1603 s->origPtr = 0; 1604 GET_UCHAR(BZ_X_ORIGPTR_1, uc); 1605 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 1606 GET_UCHAR(BZ_X_ORIGPTR_2, uc); 1607 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 1608 GET_UCHAR(BZ_X_ORIGPTR_3, uc); 1609 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 1610 1611 if (s->origPtr < 0) 1612 RETURN(BZ_DATA_ERROR); 1613 if (s->origPtr > 10 + 100000*s->blockSize100k) 1614 RETURN(BZ_DATA_ERROR); 1615 1616 /*--- Receive the mapping table ---*/ 1617 for (i = 0; i < 16; i++) { 1618 GET_BIT(BZ_X_MAPPING_1, uc); 1619 if (uc == 1) 1620 s->inUse16[i] = True; else 1621 s->inUse16[i] = False; 1622 } 1623 1624 for (i = 0; i < 256; i++) s->inUse[i] = False; 1625 1626 for (i = 0; i < 16; i++) 1627 if (s->inUse16[i]) 1628 for (j = 0; j < 16; j++) { 1629 GET_BIT(BZ_X_MAPPING_2, uc); 1630 if (uc == 1) s->inUse[i * 16 + j] = True; 1631 } 1632 makeMaps_d ( s ); 1633 if (s->nInUse == 0) RETURN(BZ_DATA_ERROR); 1634 alphaSize = s->nInUse+2; 1635 1636 /*--- Now the selectors ---*/ 1637 GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); 1638 if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR); 1639 GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); 1640 if (nSelectors < 1) RETURN(BZ_DATA_ERROR); 1641 for (i = 0; i < nSelectors; i++) { 1642 j = 0; 1643 while (True) { 1644 GET_BIT(BZ_X_SELECTOR_3, uc); 1645 if (uc == 0) break; 1646 j++; 1647 if (j >= nGroups) RETURN(BZ_DATA_ERROR); 1648 } 1649 s->selectorMtf[i] = j; 1650 } 1651 1652 /*--- Undo the MTF values for the selectors. ---*/ 1653 { 1654 UChar pos[BZ_N_GROUPS], tmp, v; 1655 for (v = 0; v < nGroups; v++) pos[v] = v; 1656 1657 for (i = 0; i < nSelectors; i++) { 1658 v = s->selectorMtf[i]; 1659 tmp = pos[v]; 1660 while (v > 0) { pos[v] = pos[v-1]; v--; } 1661 pos[0] = tmp; 1662 s->selector[i] = tmp; 1663 } 1664 } 1665 1666 /*--- Now the coding tables ---*/ 1667 for (t = 0; t < nGroups; t++) { 1668 GET_BITS(BZ_X_CODING_1, curr, 5); 1669 for (i = 0; i < alphaSize; i++) { 1670 while (True) { 1671 if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR); 1672 GET_BIT(BZ_X_CODING_2, uc); 1673 if (uc == 0) break; 1674 GET_BIT(BZ_X_CODING_3, uc); 1675 if (uc == 0) curr++; else curr--; 1676 } 1677 s->len[t][i] = curr; 1678 } 1679 } 1680 1681 /*--- Create the Huffman decoding tables ---*/ 1682 for (t = 0; t < nGroups; t++) { 1683 minLen = 32; 1684 maxLen = 0; 1685 for (i = 0; i < alphaSize; i++) { 1686 if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 1687 if (s->len[t][i] < minLen) minLen = s->len[t][i]; 1688 } 1689 BZ2_hbCreateDecodeTables ( 1690 &(s->limit[t][0]), 1691 &(s->base[t][0]), 1692 &(s->perm[t][0]), 1693 &(s->len[t][0]), 1694 minLen, maxLen, alphaSize 1695 ); 1696 s->minLens[t] = minLen; 1697 } 1698 1699 /*--- Now the MTF values ---*/ 1700 1701 EOB = s->nInUse+1; 1702 nblockMAX = 100000 * s->blockSize100k; 1703 groupNo = -1; 1704 groupPos = 0; 1705 1706 for (i = 0; i <= 255; i++) s->unzftab[i] = 0; 1707 1708 /*-- MTF init --*/ 1709 { 1710 Int32 ii, jj, kk; 1711 kk = MTFA_SIZE-1; 1712 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) { 1713 for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 1714 s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj); 1715 kk--; 1716 } 1717 s->mtfbase[ii] = kk + 1; 1718 } 1719 } 1720 /*-- end MTF init --*/ 1721 1722 nblock = 0; 1723 GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); 1724 1725 while (True) { 1726 1727 if (nextSym == EOB) break; 1728 1729 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) { 1730 1731 es = -1; 1732 N = 1; 1733 do { 1734 if (nextSym == BZ_RUNA) es = es + (0+1) * N; else 1735 if (nextSym == BZ_RUNB) es = es + (1+1) * N; 1736 N = N * 2; 1737 GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym); 1738 } 1739 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB); 1740 1741 es++; 1742 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ]; 1743 s->unzftab[uc] += es; 1744 1745 if (s->smallDecompress) 1746 while (es > 0) { 1747 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 1748 s->ll16[nblock] = (UInt16)uc; 1749 nblock++; 1750 es--; 1751 } 1752 else 1753 while (es > 0) { 1754 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 1755 s->tt[nblock] = (UInt32)uc; 1756 nblock++; 1757 es--; 1758 }; 1759 1760 continue; 1761 1762 } else { 1763 1764 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 1765 1766 /*-- uc = MTF ( nextSym-1 ) --*/ 1767 { 1768 Int32 ii, jj, kk, pp, lno, off; 1769 UInt32 nn; 1770 nn = (UInt32)(nextSym - 1); 1771 1772 if (nn < MTFL_SIZE) { 1773 /* avoid general-case expense */ 1774 pp = s->mtfbase[0]; 1775 uc = s->mtfa[pp+nn]; 1776 while (nn > 3) { 1777 Int32 z = pp+nn; 1778 s->mtfa[(z) ] = s->mtfa[(z)-1]; 1779 s->mtfa[(z)-1] = s->mtfa[(z)-2]; 1780 s->mtfa[(z)-2] = s->mtfa[(z)-3]; 1781 s->mtfa[(z)-3] = s->mtfa[(z)-4]; 1782 nn -= 4; 1783 } 1784 while (nn > 0) { 1785 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; 1786 }; 1787 s->mtfa[pp] = uc; 1788 } else { 1789 /* general case */ 1790 lno = nn / MTFL_SIZE; 1791 off = nn % MTFL_SIZE; 1792 pp = s->mtfbase[lno] + off; 1793 uc = s->mtfa[pp]; 1794 while (pp > s->mtfbase[lno]) { 1795 s->mtfa[pp] = s->mtfa[pp-1]; pp--; 1796 }; 1797 s->mtfbase[lno]++; 1798 while (lno > 0) { 1799 s->mtfbase[lno]--; 1800 s->mtfa[s->mtfbase[lno]] 1801 = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1]; 1802 lno--; 1803 } 1804 s->mtfbase[0]--; 1805 s->mtfa[s->mtfbase[0]] = uc; 1806 if (s->mtfbase[0] == 0) { 1807 kk = MTFA_SIZE-1; 1808 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) { 1809 for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 1810 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj]; 1811 kk--; 1812 } 1813 s->mtfbase[ii] = kk + 1; 1814 } 1815 } 1816 } 1817 } 1818 /*-- end uc = MTF ( nextSym-1 ) --*/ 1819 1820 s->unzftab[s->seqToUnseq[uc]]++; 1821 if (s->smallDecompress) 1822 s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else 1823 s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]); 1824 nblock++; 1825 1826 GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym); 1827 continue; 1828 } 1829 } 1830 1831 /* Now we know what nblock is, we can do a better sanity 1832 check on s->origPtr. 1833 */ 1834 if (s->origPtr < 0 || s->origPtr >= nblock) 1835 RETURN(BZ_DATA_ERROR); 1836 1837 /*-- Set up cftab to facilitate generation of T^(-1) --*/ 1838 s->cftab[0] = 0; 1839 for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; 1840 for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; 1841 for (i = 0; i <= 256; i++) { 1842 if (s->cftab[i] < 0 || s->cftab[i] > nblock) { 1843 /* s->cftab[i] can legitimately be == nblock */ 1844 RETURN(BZ_DATA_ERROR); 1845 } 1846 } 1847 1848 s->state_out_len = 0; 1849 s->state_out_ch = 0; 1850 BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); 1851 s->state = BZ_X_OUTPUT; 1852 if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); 1853 1854 if (s->smallDecompress) { 1855 1856 /*-- Make a copy of cftab, used in generation of T --*/ 1857 for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i]; 1858 1859 /*-- compute the T vector --*/ 1860 for (i = 0; i < nblock; i++) { 1861 uc = (UChar)(s->ll16[i]); 1862 SET_LL(i, s->cftabCopy[uc]); 1863 s->cftabCopy[uc]++; 1864 } 1865 1866 /*-- Compute T^(-1) by pointer reversal on T --*/ 1867 i = s->origPtr; 1868 j = GET_LL(i); 1869 do { 1870 Int32 tmp = GET_LL(j); 1871 SET_LL(j, i); 1872 i = j; 1873 j = tmp; 1874 } 1875 while (i != s->origPtr); 1876 1877 s->tPos = s->origPtr; 1878 s->nblock_used = 0; 1879 if (s->blockRandomised) { 1880 BZ_RAND_INIT_MASK; 1881 BZ_GET_SMALL(s->k0); s->nblock_used++; 1882 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 1883 } else { 1884 BZ_GET_SMALL(s->k0); s->nblock_used++; 1885 } 1886 1887 } else { 1888 1889 /*-- compute the T^(-1) vector --*/ 1890 for (i = 0; i < nblock; i++) { 1891 uc = (UChar)(s->tt[i] & 0xff); 1892 s->tt[s->cftab[uc]] |= (i << 8); 1893 s->cftab[uc]++; 1894 } 1895 1896 s->tPos = s->tt[s->origPtr] >> 8; 1897 s->nblock_used = 0; 1898 if (s->blockRandomised) { 1899 BZ_RAND_INIT_MASK; 1900 BZ_GET_FAST(s->k0); s->nblock_used++; 1901 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 1902 } else { 1903 BZ_GET_FAST(s->k0); s->nblock_used++; 1904 } 1905 1906 } 1907 1908 RETURN(BZ_OK); 1909 1910 1911 1912 endhdr_2: 1913 1914 GET_UCHAR(BZ_X_ENDHDR_2, uc); 1915 if (uc != 0x72) RETURN(BZ_DATA_ERROR); 1916 GET_UCHAR(BZ_X_ENDHDR_3, uc); 1917 if (uc != 0x45) RETURN(BZ_DATA_ERROR); 1918 GET_UCHAR(BZ_X_ENDHDR_4, uc); 1919 if (uc != 0x38) RETURN(BZ_DATA_ERROR); 1920 GET_UCHAR(BZ_X_ENDHDR_5, uc); 1921 if (uc != 0x50) RETURN(BZ_DATA_ERROR); 1922 GET_UCHAR(BZ_X_ENDHDR_6, uc); 1923 if (uc != 0x90) RETURN(BZ_DATA_ERROR); 1924 1925 s->storedCombinedCRC = 0; 1926 GET_UCHAR(BZ_X_CCRC_1, uc); 1927 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 1928 GET_UCHAR(BZ_X_CCRC_2, uc); 1929 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 1930 GET_UCHAR(BZ_X_CCRC_3, uc); 1931 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 1932 GET_UCHAR(BZ_X_CCRC_4, uc); 1933 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 1934 1935 s->state = BZ_X_IDLE; 1936 RETURN(BZ_STREAM_END); 1937 1938 default: AssertH ( False, 4001 ); 1939 } 1940 1941 AssertH ( False, 4002 ); 1942 1943 save_state_and_return: 1944 1945 s->save_i = i; 1946 s->save_j = j; 1947 s->save_t = t; 1948 s->save_alphaSize = alphaSize; 1949 s->save_nGroups = nGroups; 1950 s->save_nSelectors = nSelectors; 1951 s->save_EOB = EOB; 1952 s->save_groupNo = groupNo; 1953 s->save_groupPos = groupPos; 1954 s->save_nextSym = nextSym; 1955 s->save_nblockMAX = nblockMAX; 1956 s->save_nblock = nblock; 1957 s->save_es = es; 1958 s->save_N = N; 1959 s->save_curr = curr; 1960 s->save_zt = zt; 1961 s->save_zn = zn; 1962 s->save_zvec = zvec; 1963 s->save_zj = zj; 1964 s->save_gSel = gSel; 1965 s->save_gMinlen = gMinlen; 1966 s->save_gLimit = gLimit; 1967 s->save_gBase = gBase; 1968 s->save_gPerm = gPerm; 1969 1970 return retVal; 1971 } 1972 1973 1974 /*-------------------------------------------------------------*/ 1975 /*--- end decompress.c ---*/ 1976 /*-------------------------------------------------------------*/ 1977 1978 /*-------------------------------------------------------------*/ 1979 /*--- Block sorting machinery ---*/ 1980 /*--- blocksort.c ---*/ 1981 /*-------------------------------------------------------------*/ 1982 1983 /*-- 1984 This file is a part of bzip2 and/or libbzip2, a program and 1985 library for lossless, block-sorting data compression. 1986 1987 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 1988 1989 Redistribution and use in source and binary forms, with or without 1990 modification, are permitted provided that the following conditions 1991 are met: 1992 1993 1. Redistributions of source code must retain the above copyright 1994 notice, this list of conditions and the following disclaimer. 1995 1996 2. The origin of this software must not be misrepresented; you must 1997 not claim that you wrote the original software. If you use this 1998 software in a product, an acknowledgment in the product 1999 documentation would be appreciated but is not required. 2000 2001 3. Altered source versions must be plainly marked as such, and must 2002 not be misrepresented as being the original software. 2003 2004 4. The name of the author may not be used to endorse or promote 2005 products derived from this software without specific prior written 2006 permission. 2007 2008 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 2009 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 2010 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2011 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 2012 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2013 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 2014 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2015 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 2016 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 2017 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 2018 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2019 2020 Julian Seward, Cambridge, UK. 2021 jseward (at) bzip.org 2022 bzip2/libbzip2 version 1.0 of 21 March 2000 2023 2024 This program is based on (at least) the work of: 2025 Mike Burrows 2026 David Wheeler 2027 Peter Fenwick 2028 Alistair Moffat 2029 Radford Neal 2030 Ian H. Witten 2031 Robert Sedgewick 2032 Jon L. Bentley 2033 2034 For more information on these sources, see the manual. 2035 2036 To get some idea how the block sorting algorithms in this file 2037 work, read my paper 2038 On the Performance of BWT Sorting Algorithms 2039 in Proceedings of the IEEE Data Compression Conference 2000, 2040 Snowbird, Utah, USA, 27-30 March 2000. The main sort in this 2041 file implements the algorithm called cache in the paper. 2042 --*/ 2043 2044 2045 2046 /*---------------------------------------------*/ 2047 /*--- Fallback O(N log(N)^2) sorting ---*/ 2048 /*--- algorithm, for repetitive blocks ---*/ 2049 /*---------------------------------------------*/ 2050 2051 /*---------------------------------------------*/ 2052 static 2053 __inline__ 2054 void fallbackSimpleSort ( UInt32* fmap, 2055 UInt32* eclass, 2056 Int32 lo, 2057 Int32 hi ) 2058 { 2059 Int32 i, j, tmp; 2060 UInt32 ec_tmp; 2061 2062 if (lo == hi) return; 2063 2064 if (hi - lo > 3) { 2065 for ( i = hi-4; i >= lo; i-- ) { 2066 tmp = fmap[i]; 2067 ec_tmp = eclass[tmp]; 2068 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 2069 fmap[j-4] = fmap[j]; 2070 fmap[j-4] = tmp; 2071 } 2072 } 2073 2074 for ( i = hi-1; i >= lo; i-- ) { 2075 tmp = fmap[i]; 2076 ec_tmp = eclass[tmp]; 2077 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 2078 fmap[j-1] = fmap[j]; 2079 fmap[j-1] = tmp; 2080 } 2081 } 2082 2083 2084 /*---------------------------------------------*/ 2085 #define fswap(zz1, zz2) \ 2086 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 2087 2088 #define fvswap(zzp1, zzp2, zzn) \ 2089 { \ 2090 Int32 yyp1 = (zzp1); \ 2091 Int32 yyp2 = (zzp2); \ 2092 Int32 yyn = (zzn); \ 2093 while (yyn > 0) { \ 2094 fswap(fmap[yyp1], fmap[yyp2]); \ 2095 yyp1++; yyp2++; yyn--; \ 2096 } \ 2097 } 2098 2099 2100 #define fmin(a,b) ((a) < (b)) ? (a) : (b) 2101 2102 #define fpush(lz,hz) { stackLo[sp] = lz; \ 2103 stackHi[sp] = hz; \ 2104 sp++; } 2105 2106 #define fpop(lz,hz) { sp--; \ 2107 lz = stackLo[sp]; \ 2108 hz = stackHi[sp]; } 2109 2110 #define FALLBACK_QSORT_SMALL_THRESH 10 2111 #define FALLBACK_QSORT_STACK_SIZE 100 2112 2113 2114 static 2115 void fallbackQSort3 ( UInt32* fmap, 2116 UInt32* eclass, 2117 Int32 loSt, 2118 Int32 hiSt ) 2119 { 2120 Int32 unLo, unHi, ltLo, gtHi, n, m; 2121 Int32 sp, lo, hi; 2122 UInt32 med, r, r3; 2123 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 2124 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 2125 2126 r = 0; 2127 2128 sp = 0; 2129 fpush ( loSt, hiSt ); 2130 2131 while (sp > 0) { 2132 2133 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 ); 2134 2135 fpop ( lo, hi ); 2136 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 2137 fallbackSimpleSort ( fmap, eclass, lo, hi ); 2138 continue; 2139 } 2140 2141 /* Random partitioning. Median of 3 sometimes fails to 2142 avoid bad cases. Median of 9 seems to help but 2143 looks rather expensive. This too seems to work but 2144 is cheaper. Guidance for the magic constants 2145 7621 and 32768 is taken from Sedgewick's algorithms 2146 book, chapter 35. 2147 */ 2148 r = ((r * 7621) + 1) % 32768; 2149 r3 = r % 3; 2150 if (r3 == 0) med = eclass[fmap[lo]]; else 2151 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 2152 med = eclass[fmap[hi]]; 2153 2154 unLo = ltLo = lo; 2155 unHi = gtHi = hi; 2156 2157 while (1) { 2158 while (1) { 2159 if (unLo > unHi) break; 2160 n = (Int32)eclass[fmap[unLo]] - (Int32)med; 2161 if (n == 0) { 2162 fswap(fmap[unLo], fmap[ltLo]); 2163 ltLo++; unLo++; 2164 continue; 2165 }; 2166 if (n > 0) break; 2167 unLo++; 2168 } 2169 while (1) { 2170 if (unLo > unHi) break; 2171 n = (Int32)eclass[fmap[unHi]] - (Int32)med; 2172 if (n == 0) { 2173 fswap(fmap[unHi], fmap[gtHi]); 2174 gtHi--; unHi--; 2175 continue; 2176 }; 2177 if (n < 0) break; 2178 unHi--; 2179 } 2180 if (unLo > unHi) break; 2181 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 2182 } 2183 2184 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 2185 2186 if (gtHi < ltLo) continue; 2187 2188 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 2189 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 2190 2191 n = lo + unLo - ltLo - 1; 2192 m = hi - (gtHi - unHi) + 1; 2193 2194 if (n - lo > hi - m) { 2195 fpush ( lo, n ); 2196 fpush ( m, hi ); 2197 } else { 2198 fpush ( m, hi ); 2199 fpush ( lo, n ); 2200 } 2201 } 2202 } 2203 2204 #undef fmin 2205 #undef fpush 2206 #undef fpop 2207 #undef fswap 2208 #undef fvswap 2209 #undef FALLBACK_QSORT_SMALL_THRESH 2210 #undef FALLBACK_QSORT_STACK_SIZE 2211 2212 2213 /*---------------------------------------------*/ 2214 /* Pre: 2215 nblock > 0 2216 eclass exists for [0 .. nblock-1] 2217 ((UChar*)eclass) [0 .. nblock-1] holds block 2218 ptr exists for [0 .. nblock-1] 2219 2220 Post: 2221 ((UChar*)eclass) [0 .. nblock-1] holds block 2222 All other areas of eclass destroyed 2223 fmap [0 .. nblock-1] holds sorted order 2224 bhtab [ 0 .. 2+(nblock/32) ] destroyed 2225 */ 2226 2227 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 2228 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 2229 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 2230 #define WORD_BH(zz) bhtab[(zz) >> 5] 2231 #define UNALIGNED_BH(zz) ((zz) & 0x01f) 2232 2233 static 2234 void fallbackSort ( UInt32* fmap, 2235 UInt32* eclass, 2236 UInt32* bhtab, 2237 Int32 nblock, 2238 Int32 verb ) 2239 { 2240 Int32 ftab[257]; 2241 Int32 ftabCopy[256]; 2242 Int32 H, i, j, k, l, r, cc, cc1; 2243 Int32 nNotDone; 2244 Int32 nBhtab; 2245 UChar* eclass8 = (UChar*)eclass; 2246 2247 /*-- 2248 Initial 1-char radix sort to generate 2249 initial fmap and initial BH bits. 2250 --*/ 2251 if (verb >= 4) 2252 VPrintf0 ( " bucket sorting ...\n" ); 2253 for (i = 0; i < 257; i++) ftab[i] = 0; 2254 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 2255 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 2256 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 2257 2258 for (i = 0; i < nblock; i++) { 2259 j = eclass8[i]; 2260 k = ftab[j] - 1; 2261 ftab[j] = k; 2262 fmap[k] = i; 2263 } 2264 2265 nBhtab = 2 + (nblock / 32); 2266 for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 2267 for (i = 0; i < 256; i++) SET_BH(ftab[i]); 2268 2269 /*-- 2270 Inductively refine the buckets. Kind-of an 2271 "exponential radix sort" (!), inspired by the 2272 Manber-Myers suffix array construction algorithm. 2273 --*/ 2274 2275 /*-- set sentinel bits for block-end detection --*/ 2276 for (i = 0; i < 32; i++) { 2277 SET_BH(nblock + 2*i); 2278 CLEAR_BH(nblock + 2*i + 1); 2279 } 2280 2281 /*-- the log(N) loop --*/ 2282 H = 1; 2283 while (1) { 2284 2285 if (verb >= 4) 2286 VPrintf1 ( " depth %6d has ", H ); 2287 2288 j = 0; 2289 for (i = 0; i < nblock; i++) { 2290 if (ISSET_BH(i)) j = i; 2291 k = fmap[i] - H; if (k < 0) k += nblock; 2292 eclass[k] = j; 2293 } 2294 2295 nNotDone = 0; 2296 r = -1; 2297 while (1) { 2298 2299 /*-- find the next non-singleton bucket --*/ 2300 k = r + 1; 2301 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 2302 if (ISSET_BH(k)) { 2303 while (WORD_BH(k) == 0xffffffff) k += 32; 2304 while (ISSET_BH(k)) k++; 2305 } 2306 l = k - 1; 2307 if (l >= nblock) break; 2308 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 2309 if (!ISSET_BH(k)) { 2310 while (WORD_BH(k) == 0x00000000) k += 32; 2311 while (!ISSET_BH(k)) k++; 2312 } 2313 r = k - 1; 2314 if (r >= nblock) break; 2315 2316 /*-- now [l, r] bracket current bucket --*/ 2317 if (r > l) { 2318 nNotDone += (r - l + 1); 2319 fallbackQSort3 ( fmap, eclass, l, r ); 2320 2321 /*-- scan bucket and generate header bits-- */ 2322 cc = -1; 2323 for (i = l; i <= r; i++) { 2324 cc1 = eclass[fmap[i]]; 2325 if (cc != cc1) { SET_BH(i); cc = cc1; }; 2326 } 2327 } 2328 } 2329 2330 if (verb >= 4) 2331 VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 2332 2333 H *= 2; 2334 if (H > nblock || nNotDone == 0) break; 2335 } 2336 2337 /*-- 2338 Reconstruct the original block in 2339 eclass8 [0 .. nblock-1], since the 2340 previous phase destroyed it. 2341 --*/ 2342 if (verb >= 4) 2343 VPrintf0 ( " reconstructing block ...\n" ); 2344 j = 0; 2345 for (i = 0; i < nblock; i++) { 2346 while (ftabCopy[j] == 0) j++; 2347 ftabCopy[j]--; 2348 eclass8[fmap[i]] = (UChar)j; 2349 } 2350 AssertH ( j < 256, 1005 ); 2351 } 2352 2353 #undef SET_BH 2354 #undef CLEAR_BH 2355 #undef ISSET_BH 2356 #undef WORD_BH 2357 #undef UNALIGNED_BH 2358 2359 2360 /*---------------------------------------------*/ 2361 /*--- The main, O(N^2 log(N)) sorting ---*/ 2362 /*--- algorithm. Faster for "normal" ---*/ 2363 /*--- non-repetitive blocks. ---*/ 2364 /*---------------------------------------------*/ 2365 2366 /*---------------------------------------------*/ 2367 static 2368 __inline__ 2369 Bool mainGtU ( UInt32 i1, 2370 UInt32 i2, 2371 UChar* block, 2372 UInt16* quadrant, 2373 UInt32 nblock, 2374 Int32* budget ) 2375 { 2376 Int32 k; 2377 UChar c1, c2; 2378 UInt16 s1, s2; 2379 2380 AssertD ( i1 != i2, "mainGtU" ); 2381 /* 1 */ 2382 c1 = block[i1]; c2 = block[i2]; 2383 if (c1 != c2) return (c1 > c2); 2384 i1++; i2++; 2385 /* 2 */ 2386 c1 = block[i1]; c2 = block[i2]; 2387 if (c1 != c2) return (c1 > c2); 2388 i1++; i2++; 2389 /* 3 */ 2390 c1 = block[i1]; c2 = block[i2]; 2391 if (c1 != c2) return (c1 > c2); 2392 i1++; i2++; 2393 /* 4 */ 2394 c1 = block[i1]; c2 = block[i2]; 2395 if (c1 != c2) return (c1 > c2); 2396 i1++; i2++; 2397 /* 5 */ 2398 c1 = block[i1]; c2 = block[i2]; 2399 if (c1 != c2) return (c1 > c2); 2400 i1++; i2++; 2401 /* 6 */ 2402 c1 = block[i1]; c2 = block[i2]; 2403 if (c1 != c2) return (c1 > c2); 2404 i1++; i2++; 2405 /* 7 */ 2406 c1 = block[i1]; c2 = block[i2]; 2407 if (c1 != c2) return (c1 > c2); 2408 i1++; i2++; 2409 /* 8 */ 2410 c1 = block[i1]; c2 = block[i2]; 2411 if (c1 != c2) return (c1 > c2); 2412 i1++; i2++; 2413 /* 9 */ 2414 c1 = block[i1]; c2 = block[i2]; 2415 if (c1 != c2) return (c1 > c2); 2416 i1++; i2++; 2417 /* 10 */ 2418 c1 = block[i1]; c2 = block[i2]; 2419 if (c1 != c2) return (c1 > c2); 2420 i1++; i2++; 2421 /* 11 */ 2422 c1 = block[i1]; c2 = block[i2]; 2423 if (c1 != c2) return (c1 > c2); 2424 i1++; i2++; 2425 /* 12 */ 2426 c1 = block[i1]; c2 = block[i2]; 2427 if (c1 != c2) return (c1 > c2); 2428 i1++; i2++; 2429 2430 k = nblock + 8; 2431 2432 do { 2433 /* 1 */ 2434 c1 = block[i1]; c2 = block[i2]; 2435 if (c1 != c2) return (c1 > c2); 2436 s1 = quadrant[i1]; s2 = quadrant[i2]; 2437 if (s1 != s2) return (s1 > s2); 2438 i1++; i2++; 2439 /* 2 */ 2440 c1 = block[i1]; c2 = block[i2]; 2441 if (c1 != c2) return (c1 > c2); 2442 s1 = quadrant[i1]; s2 = quadrant[i2]; 2443 if (s1 != s2) return (s1 > s2); 2444 i1++; i2++; 2445 /* 3 */ 2446 c1 = block[i1]; c2 = block[i2]; 2447 if (c1 != c2) return (c1 > c2); 2448 s1 = quadrant[i1]; s2 = quadrant[i2]; 2449 if (s1 != s2) return (s1 > s2); 2450 i1++; i2++; 2451 /* 4 */ 2452 c1 = block[i1]; c2 = block[i2]; 2453 if (c1 != c2) return (c1 > c2); 2454 s1 = quadrant[i1]; s2 = quadrant[i2]; 2455 if (s1 != s2) return (s1 > s2); 2456 i1++; i2++; 2457 /* 5 */ 2458 c1 = block[i1]; c2 = block[i2]; 2459 if (c1 != c2) return (c1 > c2); 2460 s1 = quadrant[i1]; s2 = quadrant[i2]; 2461 if (s1 != s2) return (s1 > s2); 2462 i1++; i2++; 2463 /* 6 */ 2464 c1 = block[i1]; c2 = block[i2]; 2465 if (c1 != c2) return (c1 > c2); 2466 s1 = quadrant[i1]; s2 = quadrant[i2]; 2467 if (s1 != s2) return (s1 > s2); 2468 i1++; i2++; 2469 /* 7 */ 2470 c1 = block[i1]; c2 = block[i2]; 2471 if (c1 != c2) return (c1 > c2); 2472 s1 = quadrant[i1]; s2 = quadrant[i2]; 2473 if (s1 != s2) return (s1 > s2); 2474 i1++; i2++; 2475 /* 8 */ 2476 c1 = block[i1]; c2 = block[i2]; 2477 if (c1 != c2) return (c1 > c2); 2478 s1 = quadrant[i1]; s2 = quadrant[i2]; 2479 if (s1 != s2) return (s1 > s2); 2480 i1++; i2++; 2481 2482 if (i1 >= nblock) i1 -= nblock; 2483 if (i2 >= nblock) i2 -= nblock; 2484 2485 k -= 8; 2486 (*budget)--; 2487 } 2488 while (k >= 0); 2489 2490 return False; 2491 } 2492 2493 2494 /*---------------------------------------------*/ 2495 /*-- 2496 Knuth's increments seem to work better 2497 than Incerpi-Sedgewick here. Possibly 2498 because the number of elems to sort is 2499 usually small, typically <= 20. 2500 --*/ 2501 static 2502 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 2503 9841, 29524, 88573, 265720, 2504 797161, 2391484 }; 2505 2506 static 2507 void mainSimpleSort ( UInt32* ptr, 2508 UChar* block, 2509 UInt16* quadrant, 2510 Int32 nblock, 2511 Int32 lo, 2512 Int32 hi, 2513 Int32 d, 2514 Int32* budget ) 2515 { 2516 Int32 i, j, h, bigN, hp; 2517 UInt32 v; 2518 2519 bigN = hi - lo + 1; 2520 if (bigN < 2) return; 2521 2522 hp = 0; 2523 while (incs[hp] < bigN) hp++; 2524 hp--; 2525 2526 for (; hp >= 0; hp--) { 2527 h = incs[hp]; 2528 2529 i = lo + h; 2530 while (True) { 2531 2532 /*-- copy 1 --*/ 2533 if (i > hi) break; 2534 v = ptr[i]; 2535 j = i; 2536 while ( mainGtU ( 2537 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 2538 ) ) { 2539 ptr[j] = ptr[j-h]; 2540 j = j - h; 2541 if (j <= (lo + h - 1)) break; 2542 } 2543 ptr[j] = v; 2544 i++; 2545 2546 /*-- copy 2 --*/ 2547 if (i > hi) break; 2548 v = ptr[i]; 2549 j = i; 2550 while ( mainGtU ( 2551 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 2552 ) ) { 2553 ptr[j] = ptr[j-h]; 2554 j = j - h; 2555 if (j <= (lo + h - 1)) break; 2556 } 2557 ptr[j] = v; 2558 i++; 2559 2560 /*-- copy 3 --*/ 2561 if (i > hi) break; 2562 v = ptr[i]; 2563 j = i; 2564 while ( mainGtU ( 2565 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 2566 ) ) { 2567 ptr[j] = ptr[j-h]; 2568 j = j - h; 2569 if (j <= (lo + h - 1)) break; 2570 } 2571 ptr[j] = v; 2572 i++; 2573 2574 if (*budget < 0) return; 2575 } 2576 } 2577 } 2578 2579 2580 /*---------------------------------------------*/ 2581 /*-- 2582 The following is an implementation of 2583 an elegant 3-way quicksort for strings, 2584 described in a paper "Fast Algorithms for 2585 Sorting and Searching Strings", by Robert 2586 Sedgewick and Jon L. Bentley. 2587 --*/ 2588 2589 #define mswap(zz1, zz2) \ 2590 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 2591 2592 #define mvswap(zzp1, zzp2, zzn) \ 2593 { \ 2594 Int32 yyp1 = (zzp1); \ 2595 Int32 yyp2 = (zzp2); \ 2596 Int32 yyn = (zzn); \ 2597 while (yyn > 0) { \ 2598 mswap(ptr[yyp1], ptr[yyp2]); \ 2599 yyp1++; yyp2++; yyn--; \ 2600 } \ 2601 } 2602 2603 static 2604 __inline__ 2605 UChar mmed3 ( UChar a, UChar b, UChar c ) 2606 { 2607 UChar t; 2608 if (a > b) { t = a; a = b; b = t; }; 2609 if (b > c) { 2610 b = c; 2611 if (a > b) b = a; 2612 } 2613 return b; 2614 } 2615 2616 #define mmin(a,b) ((a) < (b)) ? (a) : (b) 2617 2618 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 2619 stackHi[sp] = hz; \ 2620 stackD [sp] = dz; \ 2621 sp++; } 2622 2623 #define mpop(lz,hz,dz) { sp--; \ 2624 lz = stackLo[sp]; \ 2625 hz = stackHi[sp]; \ 2626 dz = stackD [sp]; } 2627 2628 2629 #define mnextsize(az) (nextHi[az]-nextLo[az]) 2630 2631 #define mnextswap(az,bz) \ 2632 { Int32 tz; \ 2633 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 2634 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 2635 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 2636 2637 2638 #define MAIN_QSORT_SMALL_THRESH 20 2639 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 2640 #define MAIN_QSORT_STACK_SIZE 100 2641 2642 static 2643 void mainQSort3 ( UInt32* ptr, 2644 UChar* block, 2645 UInt16* quadrant, 2646 Int32 nblock, 2647 Int32 loSt, 2648 Int32 hiSt, 2649 Int32 dSt, 2650 Int32* budget ) 2651 { 2652 Int32 unLo, unHi, ltLo, gtHi, n, m, med; 2653 Int32 sp, lo, hi, d; 2654 2655 Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 2656 Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 2657 Int32 stackD [MAIN_QSORT_STACK_SIZE]; 2658 2659 Int32 nextLo[3]; 2660 Int32 nextHi[3]; 2661 Int32 nextD [3]; 2662 2663 sp = 0; 2664 mpush ( loSt, hiSt, dSt ); 2665 2666 while (sp > 0) { 2667 2668 AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 ); 2669 2670 mpop ( lo, hi, d ); 2671 if (hi - lo < MAIN_QSORT_SMALL_THRESH || 2672 d > MAIN_QSORT_DEPTH_THRESH) { 2673 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 2674 if (*budget < 0) return; 2675 continue; 2676 } 2677 2678 med = (Int32) 2679 mmed3 ( block[ptr[ lo ]+d], 2680 block[ptr[ hi ]+d], 2681 block[ptr[ (lo+hi)>>1 ]+d] ); 2682 2683 unLo = ltLo = lo; 2684 unHi = gtHi = hi; 2685 2686 while (True) { 2687 while (True) { 2688 if (unLo > unHi) break; 2689 n = ((Int32)block[ptr[unLo]+d]) - med; 2690 if (n == 0) { 2691 mswap(ptr[unLo], ptr[ltLo]); 2692 ltLo++; unLo++; continue; 2693 }; 2694 if (n > 0) break; 2695 unLo++; 2696 } 2697 while (True) { 2698 if (unLo > unHi) break; 2699 n = ((Int32)block[ptr[unHi]+d]) - med; 2700 if (n == 0) { 2701 mswap(ptr[unHi], ptr[gtHi]); 2702 gtHi--; unHi--; continue; 2703 }; 2704 if (n < 0) break; 2705 unHi--; 2706 } 2707 if (unLo > unHi) break; 2708 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 2709 } 2710 2711 AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 2712 2713 if (gtHi < ltLo) { 2714 mpush(lo, hi, d+1 ); 2715 continue; 2716 } 2717 2718 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 2719 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 2720 2721 n = lo + unLo - ltLo - 1; 2722 m = hi - (gtHi - unHi) + 1; 2723 2724 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 2725 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 2726 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 2727 2728 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 2729 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 2730 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 2731 2732 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 2733 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 2734 2735 mpush (nextLo[0], nextHi[0], nextD[0]); 2736 mpush (nextLo[1], nextHi[1], nextD[1]); 2737 mpush (nextLo[2], nextHi[2], nextD[2]); 2738 } 2739 } 2740 2741 #undef mswap 2742 #undef mvswap 2743 #undef mpush 2744 #undef mpop 2745 #undef mmin 2746 #undef mnextsize 2747 #undef mnextswap 2748 #undef MAIN_QSORT_SMALL_THRESH 2749 #undef MAIN_QSORT_DEPTH_THRESH 2750 #undef MAIN_QSORT_STACK_SIZE 2751 2752 2753 /*---------------------------------------------*/ 2754 /* Pre: 2755 nblock > N_OVERSHOOT 2756 block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 2757 ((UChar*)block32) [0 .. nblock-1] holds block 2758 ptr exists for [0 .. nblock-1] 2759 2760 Post: 2761 ((UChar*)block32) [0 .. nblock-1] holds block 2762 All other areas of block32 destroyed 2763 ftab [0 .. 65536 ] destroyed 2764 ptr [0 .. nblock-1] holds sorted order 2765 if (*budget < 0), sorting was abandoned 2766 */ 2767 2768 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 2769 #define SETMASK (1 << 21) 2770 #define CLEARMASK (~(SETMASK)) 2771 2772 static 2773 void mainSort ( UInt32* ptr, 2774 UChar* block, 2775 UInt16* quadrant, 2776 UInt32* ftab, 2777 Int32 nblock, 2778 Int32 verb, 2779 Int32* budget ) 2780 { 2781 Int32 i, j, k, ss, sb; 2782 Int32 runningOrder[256]; 2783 Bool bigDone[256]; 2784 Int32 copyStart[256]; 2785 Int32 copyEnd [256]; 2786 UChar c1; 2787 Int32 numQSorted; 2788 UInt16 s; 2789 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 2790 2791 /*-- set up the 2-byte frequency table --*/ 2792 for (i = 65536; i >= 0; i--) ftab[i] = 0; 2793 2794 j = block[0] << 8; 2795 i = nblock-1; 2796 for (; i >= 3; i -= 4) { 2797 quadrant[i] = 0; 2798 j = (j >> 8) | ( ((UInt16)block[i]) << 8); 2799 ftab[j]++; 2800 quadrant[i-1] = 0; 2801 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 2802 ftab[j]++; 2803 quadrant[i-2] = 0; 2804 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 2805 ftab[j]++; 2806 quadrant[i-3] = 0; 2807 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 2808 ftab[j]++; 2809 } 2810 for (; i >= 0; i--) { 2811 quadrant[i] = 0; 2812 j = (j >> 8) | ( ((UInt16)block[i]) << 8); 2813 ftab[j]++; 2814 } 2815 2816 /*-- (emphasises close relationship of block & quadrant) --*/ 2817 for (i = 0; i < BZ_N_OVERSHOOT; i++) { 2818 block [nblock+i] = block[i]; 2819 quadrant[nblock+i] = 0; 2820 } 2821 2822 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 2823 2824 /*-- Complete the initial radix sort --*/ 2825 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 2826 2827 s = block[0] << 8; 2828 i = nblock-1; 2829 for (; i >= 3; i -= 4) { 2830 s = (s >> 8) | (block[i] << 8); 2831 j = ftab[s] -1; 2832 ftab[s] = j; 2833 ptr[j] = i; 2834 s = (s >> 8) | (block[i-1] << 8); 2835 j = ftab[s] -1; 2836 ftab[s] = j; 2837 ptr[j] = i-1; 2838 s = (s >> 8) | (block[i-2] << 8); 2839 j = ftab[s] -1; 2840 ftab[s] = j; 2841 ptr[j] = i-2; 2842 s = (s >> 8) | (block[i-3] << 8); 2843 j = ftab[s] -1; 2844 ftab[s] = j; 2845 ptr[j] = i-3; 2846 } 2847 for (; i >= 0; i--) { 2848 s = (s >> 8) | (block[i] << 8); 2849 j = ftab[s] -1; 2850 ftab[s] = j; 2851 ptr[j] = i; 2852 } 2853 2854 /*-- 2855 Now ftab contains the first loc of every small bucket. 2856 Calculate the running order, from smallest to largest 2857 big bucket. 2858 --*/ 2859 for (i = 0; i <= 255; i++) { 2860 bigDone [i] = False; 2861 runningOrder[i] = i; 2862 } 2863 2864 { 2865 Int32 vv; 2866 Int32 h = 1; 2867 do h = 3 * h + 1; while (h <= 256); 2868 do { 2869 h = h / 3; 2870 for (i = h; i <= 255; i++) { 2871 vv = runningOrder[i]; 2872 j = i; 2873 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 2874 runningOrder[j] = runningOrder[j-h]; 2875 j = j - h; 2876 if (j <= (h - 1)) goto zero; 2877 } 2878 zero: 2879 runningOrder[j] = vv; 2880 } 2881 } while (h != 1); 2882 } 2883 2884 /*-- 2885 The main sorting loop. 2886 --*/ 2887 2888 numQSorted = 0; 2889 2890 for (i = 0; i <= 255; i++) { 2891 2892 /*-- 2893 Process big buckets, starting with the least full. 2894 Basically this is a 3-step process in which we call 2895 mainQSort3 to sort the small buckets [ss, j], but 2896 also make a big effort to avoid the calls if we can. 2897 --*/ 2898 ss = runningOrder[i]; 2899 2900 /*-- 2901 Step 1: 2902 Complete the big bucket [ss] by quicksorting 2903 any unsorted small buckets [ss, j], for j != ss. 2904 Hopefully previous pointer-scanning phases have already 2905 completed many of the small buckets [ss, j], so 2906 we don't have to sort them at all. 2907 --*/ 2908 for (j = 0; j <= 255; j++) { 2909 if (j != ss) { 2910 sb = (ss << 8) + j; 2911 if ( ! (ftab[sb] & SETMASK) ) { 2912 Int32 lo = ftab[sb] & CLEARMASK; 2913 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 2914 if (hi > lo) { 2915 if (verb >= 4) 2916 VPrintf4 ( " qsort [0x%x, 0x%x] " 2917 "done %d this %d\n", 2918 ss, j, numQSorted, hi - lo + 1 ); 2919 mainQSort3 ( 2920 ptr, block, quadrant, nblock, 2921 lo, hi, BZ_N_RADIX, budget 2922 ); 2923 numQSorted += (hi - lo + 1); 2924 if (*budget < 0) return; 2925 } 2926 } 2927 ftab[sb] |= SETMASK; 2928 } 2929 } 2930 2931 AssertH ( !bigDone[ss], 1006 ); 2932 2933 /*-- 2934 Step 2: 2935 Now scan this big bucket [ss] so as to synthesise the 2936 sorted order for small buckets [t, ss] for all t, 2937 including, magically, the bucket [ss,ss] too. 2938 This will avoid doing Real Work in subsequent Step 1's. 2939 --*/ 2940 { 2941 for (j = 0; j <= 255; j++) { 2942 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 2943 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 2944 } 2945 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 2946 k = ptr[j]-1; if (k < 0) k += nblock; 2947 c1 = block[k]; 2948 if (!bigDone[c1]) 2949 ptr[ copyStart[c1]++ ] = k; 2950 } 2951 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 2952 k = ptr[j]-1; if (k < 0) k += nblock; 2953 c1 = block[k]; 2954 if (!bigDone[c1]) 2955 ptr[ copyEnd[c1]-- ] = k; 2956 } 2957 } 2958 2959 AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 2960 || 2961 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 2962 Necessity for this case is demonstrated by compressing 2963 a sequence of approximately 48.5 million of character 2964 251; 1.0.0/1.0.1 will then die here. */ 2965 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 2966 1007 ) 2967 2968 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 2969 2970 /*-- 2971 Step 3: 2972 The [ss] big bucket is now done. Record this fact, 2973 and update the quadrant descriptors. Remember to 2974 update quadrants in the overshoot area too, if 2975 necessary. The "if (i < 255)" test merely skips 2976 this updating for the last bucket processed, since 2977 updating for the last bucket is pointless. 2978 2979 The quadrant array provides a way to incrementally 2980 cache sort orderings, as they appear, so as to 2981 make subsequent comparisons in fullGtU() complete 2982 faster. For repetitive blocks this makes a big 2983 difference (but not big enough to be able to avoid 2984 the fallback sorting mechanism, exponential radix sort). 2985 2986 The precise meaning is: at all times: 2987 2988 for 0 <= i < nblock and 0 <= j <= nblock 2989 2990 if block[i] != block[j], 2991 2992 then the relative values of quadrant[i] and 2993 quadrant[j] are meaningless. 2994 2995 else { 2996 if quadrant[i] < quadrant[j] 2997 then the string starting at i lexicographically 2998 precedes the string starting at j 2999 3000 else if quadrant[i] > quadrant[j] 3001 then the string starting at j lexicographically 3002 precedes the string starting at i 3003 3004 else 3005 the relative ordering of the strings starting 3006 at i and j has not yet been determined. 3007 } 3008 --*/ 3009 bigDone[ss] = True; 3010 3011 if (i < 255) { 3012 Int32 bbStart = ftab[ss << 8] & CLEARMASK; 3013 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 3014 Int32 shifts = 0; 3015 3016 while ((bbSize >> shifts) > 65534) shifts++; 3017 3018 for (j = bbSize-1; j >= 0; j--) { 3019 Int32 a2update = ptr[bbStart + j]; 3020 UInt16 qVal = (UInt16)(j >> shifts); 3021 quadrant[a2update] = qVal; 3022 if (a2update < BZ_N_OVERSHOOT) 3023 quadrant[a2update + nblock] = qVal; 3024 } 3025 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 3026 } 3027 3028 } 3029 3030 if (verb >= 4) 3031 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 3032 nblock, numQSorted, nblock - numQSorted ); 3033 } 3034 3035 #undef BIGFREQ 3036 #undef SETMASK 3037 #undef CLEARMASK 3038 3039 3040 /*---------------------------------------------*/ 3041 /* Pre: 3042 nblock > 0 3043 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 3044 ((UChar*)arr2) [0 .. nblock-1] holds block 3045 arr1 exists for [0 .. nblock-1] 3046 3047 Post: 3048 ((UChar*)arr2) [0 .. nblock-1] holds block 3049 All other areas of block destroyed 3050 ftab [ 0 .. 65536 ] destroyed 3051 arr1 [0 .. nblock-1] holds sorted order 3052 */ 3053 void BZ2_blockSort ( EState* s ) 3054 { 3055 UInt32* ptr = s->ptr; 3056 UChar* block = s->block; 3057 UInt32* ftab = s->ftab; 3058 Int32 nblock = s->nblock; 3059 Int32 verb = s->verbosity; 3060 Int32 wfact = s->workFactor; 3061 UInt16* quadrant; 3062 Int32 budget; 3063 Int32 budgetInit; 3064 Int32 i; 3065 3066 if (nblock < /* 10000 */1000 ) { 3067 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 3068 } else { 3069 /* Calculate the location for quadrant, remembering to get 3070 the alignment right. Assumes that &(block[0]) is at least 3071 2-byte aligned -- this should be ok since block is really 3072 the first section of arr2. 3073 */ 3074 i = nblock+BZ_N_OVERSHOOT; 3075 if (i & 1) i++; 3076 quadrant = (UInt16*)(&(block[i])); 3077 3078 /* (wfact-1) / 3 puts the default-factor-30 3079 transition point at very roughly the same place as 3080 with v0.1 and v0.9.0. 3081 Not that it particularly matters any more, since the 3082 resulting compressed stream is now the same regardless 3083 of whether or not we use the main sort or fallback sort. 3084 */ 3085 if (wfact < 1 ) wfact = 1; 3086 if (wfact > 100) wfact = 100; 3087 budgetInit = nblock * ((wfact-1) / 3); 3088 budget = budgetInit; 3089 3090 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 3091 if (0 && verb >= 3) 3092 VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 3093 budgetInit - budget, 3094 nblock, 3095 (float)(budgetInit - budget) / 3096 (float)(nblock==0 ? 1 : nblock) ); 3097 if (budget < 0) { 3098 if (verb >= 2) 3099 VPrintf0 ( " too repetitive; using fallback" 3100 " sorting algorithm\n" ); 3101 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 3102 } 3103 } 3104 3105 s->origPtr = -1; 3106 for (i = 0; i < s->nblock; i++) 3107 if (ptr[i] == 0) 3108 { s->origPtr = i; break; }; 3109 3110 AssertH( s->origPtr != -1, 1003 ); 3111 } 3112 3113 3114 /*-------------------------------------------------------------*/ 3115 /*--- end blocksort.c ---*/ 3116 /*-------------------------------------------------------------*/ 3117 3118 /*-------------------------------------------------------------*/ 3119 /*--- Huffman coding low-level stuff ---*/ 3120 /*--- huffman.c ---*/ 3121 /*-------------------------------------------------------------*/ 3122 3123 /*-- 3124 This file is a part of bzip2 and/or libbzip2, a program and 3125 library for lossless, block-sorting data compression. 3126 3127 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 3128 3129 Redistribution and use in source and binary forms, with or without 3130 modification, are permitted provided that the following conditions 3131 are met: 3132 3133 1. Redistributions of source code must retain the above copyright 3134 notice, this list of conditions and the following disclaimer. 3135 3136 2. The origin of this software must not be misrepresented; you must 3137 not claim that you wrote the original software. If you use this 3138 software in a product, an acknowledgment in the product 3139 documentation would be appreciated but is not required. 3140 3141 3. Altered source versions must be plainly marked as such, and must 3142 not be misrepresented as being the original software. 3143 3144 4. The name of the author may not be used to endorse or promote 3145 products derived from this software without specific prior written 3146 permission. 3147 3148 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 3149 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 3150 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3151 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 3152 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3153 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 3154 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 3155 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 3156 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 3157 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 3158 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3159 3160 Julian Seward, Cambridge, UK. 3161 jseward (at) bzip.org 3162 bzip2/libbzip2 version 1.0 of 21 March 2000 3163 3164 This program is based on (at least) the work of: 3165 Mike Burrows 3166 David Wheeler 3167 Peter Fenwick 3168 Alistair Moffat 3169 Radford Neal 3170 Ian H. Witten 3171 Robert Sedgewick 3172 Jon L. Bentley 3173 3174 For more information on these sources, see the manual. 3175 --*/ 3176 3177 3178 3179 /*---------------------------------------------------*/ 3180 #define WEIGHTOF(zz0) ((zz0) & 0xffffff00) 3181 #define DEPTHOF(zz1) ((zz1) & 0x000000ff) 3182 #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) 3183 3184 #define ADDWEIGHTS(zw1,zw2) \ 3185 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ 3186 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) 3187 3188 #define UPHEAP(z) \ 3189 { \ 3190 Int32 zz, tmp; \ 3191 zz = z; tmp = heap[zz]; \ 3192 while (weight[tmp] < weight[heap[zz >> 1]]) { \ 3193 heap[zz] = heap[zz >> 1]; \ 3194 zz >>= 1; \ 3195 } \ 3196 heap[zz] = tmp; \ 3197 } 3198 3199 #define DOWNHEAP(z) \ 3200 { \ 3201 Int32 zz, yy, tmp; \ 3202 zz = z; tmp = heap[zz]; \ 3203 while (True) { \ 3204 yy = zz << 1; \ 3205 if (yy > nHeap) break; \ 3206 if (yy < nHeap && \ 3207 weight[heap[yy+1]] < weight[heap[yy]]) \ 3208 yy++; \ 3209 if (weight[tmp] < weight[heap[yy]]) break; \ 3210 heap[zz] = heap[yy]; \ 3211 zz = yy; \ 3212 } \ 3213 heap[zz] = tmp; \ 3214 } 3215 3216 3217 /*---------------------------------------------------*/ 3218 void BZ2_hbMakeCodeLengths ( UChar *len, 3219 Int32 *freq, 3220 Int32 alphaSize, 3221 Int32 maxLen ) 3222 { 3223 /*-- 3224 Nodes and heap entries run from 1. Entry 0 3225 for both the heap and nodes is a sentinel. 3226 --*/ 3227 Int32 nNodes, nHeap, n1, n2, i, j, k; 3228 Bool tooLong; 3229 3230 Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; 3231 Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; 3232 Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 3233 3234 for (i = 0; i < alphaSize; i++) 3235 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 3236 3237 while (True) { 3238 3239 nNodes = alphaSize; 3240 nHeap = 0; 3241 3242 heap[0] = 0; 3243 weight[0] = 0; 3244 parent[0] = -2; 3245 3246 for (i = 1; i <= alphaSize; i++) { 3247 parent[i] = -1; 3248 nHeap++; 3249 heap[nHeap] = i; 3250 UPHEAP(nHeap); 3251 } 3252 3253 AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); 3254 3255 while (nHeap > 1) { 3256 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 3257 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 3258 nNodes++; 3259 parent[n1] = parent[n2] = nNodes; 3260 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); 3261 parent[nNodes] = -1; 3262 nHeap++; 3263 heap[nHeap] = nNodes; 3264 UPHEAP(nHeap); 3265 } 3266 3267 AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); 3268 3269 tooLong = False; 3270 for (i = 1; i <= alphaSize; i++) { 3271 j = 0; 3272 k = i; 3273 while (parent[k] >= 0) { k = parent[k]; j++; } 3274 len[i-1] = j; 3275 if (j > maxLen) tooLong = True; 3276 } 3277 3278 if (! tooLong) break; 3279 3280 /* 17 Oct 04: keep-going condition for the following loop used 3281 to be 'i < alphaSize', which missed the last element, 3282 theoretically leading to the possibility of the compressor 3283 looping. However, this count-scaling step is only needed if 3284 one of the generated Huffman code words is longer than 3285 maxLen, which up to and including version 1.0.2 was 20 bits, 3286 which is extremely unlikely. In version 1.0.3 maxLen was 3287 changed to 17 bits, which has minimal effect on compression 3288 ratio, but does mean this scaling step is used from time to 3289 time, enough to verify that it works. 3290 3291 This means that bzip2-1.0.3 and later will only produce 3292 Huffman codes with a maximum length of 17 bits. However, in 3293 order to preserve backwards compatibility with bitstreams 3294 produced by versions pre-1.0.3, the decompressor must still 3295 handle lengths of up to 20. */ 3296 3297 for (i = 1; i <= alphaSize; i++) { 3298 j = weight[i] >> 8; 3299 j = 1 + (j / 2); 3300 weight[i] = j << 8; 3301 } 3302 } 3303 } 3304 3305 3306 /*---------------------------------------------------*/ 3307 void BZ2_hbAssignCodes ( Int32 *code, 3308 UChar *length, 3309 Int32 minLen, 3310 Int32 maxLen, 3311 Int32 alphaSize ) 3312 { 3313 Int32 n, vec, i; 3314 3315 vec = 0; 3316 for (n = minLen; n <= maxLen; n++) { 3317 for (i = 0; i < alphaSize; i++) 3318 if (length[i] == n) { code[i] = vec; vec++; }; 3319 vec <<= 1; 3320 } 3321 } 3322 3323 3324 /*---------------------------------------------------*/ 3325 void BZ2_hbCreateDecodeTables ( Int32 *limit, 3326 Int32 *base, 3327 Int32 *perm, 3328 UChar *length, 3329 Int32 minLen, 3330 Int32 maxLen, 3331 Int32 alphaSize ) 3332 { 3333 Int32 pp, i, j, vec; 3334 3335 pp = 0; 3336 for (i = minLen; i <= maxLen; i++) 3337 for (j = 0; j < alphaSize; j++) 3338 if (length[j] == i) { perm[pp] = j; pp++; }; 3339 3340 for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; 3341 for (i = 0; i < alphaSize; i++) base[length[i]+1]++; 3342 3343 for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; 3344 3345 for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; 3346 vec = 0; 3347 3348 for (i = minLen; i <= maxLen; i++) { 3349 vec += (base[i+1] - base[i]); 3350 limit[i] = vec-1; 3351 vec <<= 1; 3352 } 3353 for (i = minLen + 1; i <= maxLen; i++) 3354 base[i] = ((limit[i-1] + 1) << 1) - base[i]; 3355 } 3356 3357 3358 /*-------------------------------------------------------------*/ 3359 /*--- end huffman.c ---*/ 3360 /*-------------------------------------------------------------*/ 3361 3362 /*-------------------------------------------------------------*/ 3363 /*--- Compression machinery (not incl block sorting) ---*/ 3364 /*--- compress.c ---*/ 3365 /*-------------------------------------------------------------*/ 3366 3367 /*-- 3368 This file is a part of bzip2 and/or libbzip2, a program and 3369 library for lossless, block-sorting data compression. 3370 3371 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 3372 3373 Redistribution and use in source and binary forms, with or without 3374 modification, are permitted provided that the following conditions 3375 are met: 3376 3377 1. Redistributions of source code must retain the above copyright 3378 notice, this list of conditions and the following disclaimer. 3379 3380 2. The origin of this software must not be misrepresented; you must 3381 not claim that you wrote the original software. If you use this 3382 software in a product, an acknowledgment in the product 3383 documentation would be appreciated but is not required. 3384 3385 3. Altered source versions must be plainly marked as such, and must 3386 not be misrepresented as being the original software. 3387 3388 4. The name of the author may not be used to endorse or promote 3389 products derived from this software without specific prior written 3390 permission. 3391 3392 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 3393 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 3394 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3395 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 3396 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3397 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 3398 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 3399 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 3400 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 3401 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 3402 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3403 3404 Julian Seward, Cambridge, UK. 3405 jseward (at) bzip.org 3406 bzip2/libbzip2 version 1.0 of 21 March 2000 3407 3408 This program is based on (at least) the work of: 3409 Mike Burrows 3410 David Wheeler 3411 Peter Fenwick 3412 Alistair Moffat 3413 Radford Neal 3414 Ian H. Witten 3415 Robert Sedgewick 3416 Jon L. Bentley 3417 3418 For more information on these sources, see the manual. 3419 --*/ 3420 3421 /*-- 3422 CHANGES 3423 ~~~~~~~ 3424 0.9.0 -- original version. 3425 3426 0.9.0a/b -- no changes in this file. 3427 3428 0.9.0c 3429 * changed setting of nGroups in sendMTFValues() so as to 3430 do a bit better on small files 3431 --*/ 3432 3433 3434 3435 /*---------------------------------------------------*/ 3436 /*--- Bit stream I/O ---*/ 3437 /*---------------------------------------------------*/ 3438 3439 /*---------------------------------------------------*/ 3440 void BZ2_bsInitWrite ( EState* s ) 3441 { 3442 s->bsLive = 0; 3443 s->bsBuff = 0; 3444 } 3445 3446 3447 /*---------------------------------------------------*/ 3448 static 3449 void bsFinishWrite ( EState* s ) 3450 { 3451 while (s->bsLive > 0) { 3452 s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); 3453 s->numZ++; 3454 s->bsBuff <<= 8; 3455 s->bsLive -= 8; 3456 } 3457 } 3458 3459 3460 /*---------------------------------------------------*/ 3461 #define bsNEEDW(nz) \ 3462 { \ 3463 while (s->bsLive >= 8) { \ 3464 s->zbits[s->numZ] \ 3465 = (UChar)(s->bsBuff >> 24); \ 3466 s->numZ++; \ 3467 s->bsBuff <<= 8; \ 3468 s->bsLive -= 8; \ 3469 } \ 3470 } 3471 3472 3473 /*---------------------------------------------------*/ 3474 static 3475 __inline__ 3476 void bsW ( EState* s, Int32 n, UInt32 v ) 3477 { 3478 bsNEEDW ( n ); 3479 s->bsBuff |= (v << (32 - s->bsLive - n)); 3480 s->bsLive += n; 3481 } 3482 3483 3484 /*---------------------------------------------------*/ 3485 static 3486 void bsPutUInt32 ( EState* s, UInt32 u ) 3487 { 3488 bsW ( s, 8, (u >> 24) & 0xffL ); 3489 bsW ( s, 8, (u >> 16) & 0xffL ); 3490 bsW ( s, 8, (u >> 8) & 0xffL ); 3491 bsW ( s, 8, u & 0xffL ); 3492 } 3493 3494 3495 /*---------------------------------------------------*/ 3496 static 3497 void bsPutUChar ( EState* s, UChar c ) 3498 { 3499 bsW( s, 8, (UInt32)c ); 3500 } 3501 3502 3503 /*---------------------------------------------------*/ 3504 /*--- The back end proper ---*/ 3505 /*---------------------------------------------------*/ 3506 3507 /*---------------------------------------------------*/ 3508 static 3509 void makeMaps_e ( EState* s ) 3510 { 3511 Int32 i; 3512 s->nInUse = 0; 3513 for (i = 0; i < 256; i++) 3514 if (s->inUse[i]) { 3515 s->unseqToSeq[i] = s->nInUse; 3516 s->nInUse++; 3517 } 3518 } 3519 3520 3521 /*---------------------------------------------------*/ 3522 static 3523 void generateMTFValues ( EState* s ) 3524 { 3525 UChar yy[256]; 3526 Int32 i, j; 3527 Int32 zPend; 3528 Int32 wr; 3529 Int32 EOB; 3530 3531 /* 3532 After sorting (eg, here), 3533 s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, 3534 and 3535 ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 3536 holds the original block data. 3537 3538 The first thing to do is generate the MTF values, 3539 and put them in 3540 ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. 3541 Because there are strictly fewer or equal MTF values 3542 than block values, ptr values in this area are overwritten 3543 with MTF values only when they are no longer needed. 3544 3545 The final compressed bitstream is generated into the 3546 area starting at 3547 (UChar*) (&((UChar*)s->arr2)[s->nblock]) 3548 3549 These storage aliases are set up in bzCompressInit(), 3550 except for the last one, which is arranged in 3551 compressBlock(). 3552 */ 3553 UInt32* ptr = s->ptr; 3554 UChar* block = s->block; 3555 UInt16* mtfv = s->mtfv; 3556 3557 makeMaps_e ( s ); 3558 EOB = s->nInUse+1; 3559 3560 for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; 3561 3562 wr = 0; 3563 zPend = 0; 3564 for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; 3565 3566 for (i = 0; i < s->nblock; i++) { 3567 UChar ll_i; 3568 AssertD ( wr <= i, "generateMTFValues(1)" ); 3569 j = ptr[i]-1; if (j < 0) j += s->nblock; 3570 ll_i = s->unseqToSeq[block[j]]; 3571 AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); 3572 3573 if (yy[0] == ll_i) { 3574 zPend++; 3575 } else { 3576 3577 if (zPend > 0) { 3578 zPend--; 3579 while (True) { 3580 if (zPend & 1) { 3581 mtfv[wr] = BZ_RUNB; wr++; 3582 s->mtfFreq[BZ_RUNB]++; 3583 } else { 3584 mtfv[wr] = BZ_RUNA; wr++; 3585 s->mtfFreq[BZ_RUNA]++; 3586 } 3587 if (zPend < 2) break; 3588 zPend = (zPend - 2) / 2; 3589 }; 3590 zPend = 0; 3591 } 3592 { 3593 register UChar rtmp; 3594 register UChar* ryy_j; 3595 register UChar rll_i; 3596 rtmp = yy[1]; 3597 yy[1] = yy[0]; 3598 ryy_j = &(yy[1]); 3599 rll_i = ll_i; 3600 while ( rll_i != rtmp ) { 3601 register UChar rtmp2; 3602 ryy_j++; 3603 rtmp2 = rtmp; 3604 rtmp = *ryy_j; 3605 *ryy_j = rtmp2; 3606 }; 3607 yy[0] = rtmp; 3608 j = ryy_j - &(yy[0]); 3609 mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; 3610 } 3611 3612 } 3613 } 3614 3615 if (zPend > 0) { 3616 zPend--; 3617 while (True) { 3618 if (zPend & 1) { 3619 mtfv[wr] = BZ_RUNB; wr++; 3620 s->mtfFreq[BZ_RUNB]++; 3621 } else { 3622 mtfv[wr] = BZ_RUNA; wr++; 3623 s->mtfFreq[BZ_RUNA]++; 3624 } 3625 if (zPend < 2) break; 3626 zPend = (zPend - 2) / 2; 3627 }; 3628 zPend = 0; 3629 } 3630 3631 mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; 3632 3633 s->nMTF = wr; 3634 } 3635 3636 3637 /*---------------------------------------------------*/ 3638 #define BZ_LESSER_ICOST 0 3639 #define BZ_GREATER_ICOST 15 3640 3641 static 3642 void sendMTFValues ( EState* s ) 3643 { 3644 Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; 3645 Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; 3646 Int32 nGroups, nBytes; 3647 3648 /*-- 3649 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 3650 is a global since the decoder also needs it. 3651 3652 Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 3653 Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 3654 are also globals only used in this proc. 3655 Made global to keep stack frame size small. 3656 --*/ 3657 3658 3659 UInt16 cost[BZ_N_GROUPS]; 3660 Int32 fave[BZ_N_GROUPS]; 3661 3662 UInt16* mtfv = s->mtfv; 3663 3664 if (s->verbosity >= 3) 3665 VPrintf3( " %d in block, %d after MTF & 1-2 coding, " 3666 "%d+2 syms in use\n", 3667 s->nblock, s->nMTF, s->nInUse ); 3668 3669 alphaSize = s->nInUse+2; 3670 for (t = 0; t < BZ_N_GROUPS; t++) 3671 for (v = 0; v < alphaSize; v++) 3672 s->len[t][v] = BZ_GREATER_ICOST; 3673 3674 /*--- Decide how many coding tables to use ---*/ 3675 AssertH ( s->nMTF > 0, 3001 ); 3676 if (s->nMTF < 200) nGroups = 2; else 3677 if (s->nMTF < 600) nGroups = 3; else 3678 if (s->nMTF < 1200) nGroups = 4; else 3679 if (s->nMTF < 2400) nGroups = 5; else 3680 nGroups = 6; 3681 3682 /*--- Generate an initial set of coding tables ---*/ 3683 { 3684 Int32 nPart, remF, tFreq, aFreq; 3685 3686 nPart = nGroups; 3687 remF = s->nMTF; 3688 gs = 0; 3689 while (nPart > 0) { 3690 tFreq = remF / nPart; 3691 ge = gs-1; 3692 aFreq = 0; 3693 while (aFreq < tFreq && ge < alphaSize-1) { 3694 ge++; 3695 aFreq += s->mtfFreq[ge]; 3696 } 3697 3698 if (ge > gs 3699 && nPart != nGroups && nPart != 1 3700 && ((nGroups-nPart) % 2 == 1)) { 3701 aFreq -= s->mtfFreq[ge]; 3702 ge--; 3703 } 3704 3705 if (0 && s->verbosity >= 3) 3706 VPrintf5( " initial group %d, [%d .. %d], " 3707 "has %d syms (%4.1f%%)\n", 3708 nPart, gs, ge, aFreq, 3709 (100.0 * (float)aFreq) / (float)(s->nMTF) ); 3710 3711 for (v = 0; v < alphaSize; v++) 3712 if (v >= gs && v <= ge) 3713 s->len[nPart-1][v] = BZ_LESSER_ICOST; else 3714 s->len[nPart-1][v] = BZ_GREATER_ICOST; 3715 3716 nPart--; 3717 gs = ge+1; 3718 remF -= aFreq; 3719 } 3720 } 3721 3722 /*--- 3723 Iterate up to BZ_N_ITERS times to improve the tables. 3724 ---*/ 3725 for (iter = 0; iter < BZ_N_ITERS; iter++) { 3726 3727 for (t = 0; t < nGroups; t++) fave[t] = 0; 3728 3729 for (t = 0; t < nGroups; t++) 3730 for (v = 0; v < alphaSize; v++) 3731 s->rfreq[t][v] = 0; 3732 3733 /*--- 3734 Set up an auxiliary length table which is used to fast-track 3735 the common case (nGroups == 6). 3736 ---*/ 3737 if (nGroups == 6) { 3738 for (v = 0; v < alphaSize; v++) { 3739 s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; 3740 s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; 3741 s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; 3742 } 3743 } 3744 3745 nSelectors = 0; 3746 totc = 0; 3747 gs = 0; 3748 while (True) { 3749 3750 /*--- Set group start & end marks. --*/ 3751 if (gs >= s->nMTF) break; 3752 ge = gs + BZ_G_SIZE - 1; 3753 if (ge >= s->nMTF) ge = s->nMTF-1; 3754 3755 /*-- 3756 Calculate the cost of this group as coded 3757 by each of the coding tables. 3758 --*/ 3759 for (t = 0; t < nGroups; t++) cost[t] = 0; 3760 3761 if (nGroups == 6 && 50 == ge-gs+1) { 3762 /*--- fast track the common case ---*/ 3763 register UInt32 cost01, cost23, cost45; 3764 register UInt16 icv; 3765 cost01 = cost23 = cost45 = 0; 3766 3767 # define BZ_ITER(nn) \ 3768 icv = mtfv[gs+(nn)]; \ 3769 cost01 += s->len_pack[icv][0]; \ 3770 cost23 += s->len_pack[icv][1]; \ 3771 cost45 += s->len_pack[icv][2]; \ 3772 3773 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); 3774 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); 3775 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); 3776 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); 3777 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); 3778 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); 3779 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); 3780 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); 3781 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); 3782 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); 3783 3784 # undef BZ_ITER 3785 3786 cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; 3787 cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; 3788 cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; 3789 3790 } else { 3791 /*--- slow version which correctly handles all situations ---*/ 3792 for (i = gs; i <= ge; i++) { 3793 UInt16 icv = mtfv[i]; 3794 for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; 3795 } 3796 } 3797 3798 /*-- 3799 Find the coding table which is best for this group, 3800 and record its identity in the selector table. 3801 --*/ 3802 bc = 999999999; bt = -1; 3803 for (t = 0; t < nGroups; t++) 3804 if (cost[t] < bc) { bc = cost[t]; bt = t; }; 3805 totc += bc; 3806 fave[bt]++; 3807 s->selector[nSelectors] = bt; 3808 nSelectors++; 3809 3810 /*-- 3811 Increment the symbol frequencies for the selected table. 3812 --*/ 3813 if (nGroups == 6 && 50 == ge-gs+1) { 3814 /*--- fast track the common case ---*/ 3815 3816 # define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ 3817 3818 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); 3819 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); 3820 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); 3821 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); 3822 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); 3823 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); 3824 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); 3825 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); 3826 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); 3827 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); 3828 3829 # undef BZ_ITUR 3830 3831 } else { 3832 /*--- slow version which correctly handles all situations ---*/ 3833 for (i = gs; i <= ge; i++) 3834 s->rfreq[bt][ mtfv[i] ]++; 3835 } 3836 3837 gs = ge+1; 3838 } 3839 if (s->verbosity >= 3) { 3840 VPrintf2 ( " pass %d: size is %d, grp uses are ", 3841 iter+1, totc/8 ); 3842 for (t = 0; t < nGroups; t++) 3843 VPrintf1 ( "%d ", fave[t] ); 3844 VPrintf0 ( "\n" ); 3845 } 3846 3847 /*-- 3848 Recompute the tables based on the accumulated frequencies. 3849 --*/ 3850 /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See 3851 comment in huffman.c for details. */ 3852 for (t = 0; t < nGroups; t++) 3853 BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 3854 alphaSize, 17 /*20*/ ); 3855 } 3856 3857 3858 AssertH( nGroups < 8, 3002 ); 3859 AssertH( nSelectors < 32768 && 3860 nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3861 3003 ); 3862 3863 3864 /*--- Compute MTF values for the selectors. ---*/ 3865 { 3866 UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; 3867 for (i = 0; i < nGroups; i++) pos[i] = i; 3868 for (i = 0; i < nSelectors; i++) { 3869 ll_i = s->selector[i]; 3870 j = 0; 3871 tmp = pos[j]; 3872 while ( ll_i != tmp ) { 3873 j++; 3874 tmp2 = tmp; 3875 tmp = pos[j]; 3876 pos[j] = tmp2; 3877 }; 3878 pos[0] = tmp; 3879 s->selectorMtf[i] = j; 3880 } 3881 }; 3882 3883 /*--- Assign actual codes for the tables. --*/ 3884 for (t = 0; t < nGroups; t++) { 3885 minLen = 32; 3886 maxLen = 0; 3887 for (i = 0; i < alphaSize; i++) { 3888 if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 3889 if (s->len[t][i] < minLen) minLen = s->len[t][i]; 3890 } 3891 AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); 3892 AssertH ( !(minLen < 1), 3005 ); 3893 BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 3894 minLen, maxLen, alphaSize ); 3895 } 3896 3897 /*--- Transmit the mapping table. ---*/ 3898 { 3899 Bool inUse16[16]; 3900 for (i = 0; i < 16; i++) { 3901 inUse16[i] = False; 3902 for (j = 0; j < 16; j++) 3903 if (s->inUse[i * 16 + j]) inUse16[i] = True; 3904 } 3905 3906 nBytes = s->numZ; 3907 for (i = 0; i < 16; i++) 3908 if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); 3909 3910 for (i = 0; i < 16; i++) 3911 if (inUse16[i]) 3912 for (j = 0; j < 16; j++) { 3913 if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); 3914 } 3915 3916 if (s->verbosity >= 3) 3917 VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); 3918 } 3919 3920 /*--- Now the selectors. ---*/ 3921 nBytes = s->numZ; 3922 bsW ( s, 3, nGroups ); 3923 bsW ( s, 15, nSelectors ); 3924 for (i = 0; i < nSelectors; i++) { 3925 for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); 3926 bsW(s,1,0); 3927 } 3928 if (s->verbosity >= 3) 3929 VPrintf1( "selectors %d, ", s->numZ-nBytes ); 3930 3931 /*--- Now the coding tables. ---*/ 3932 nBytes = s->numZ; 3933 3934 for (t = 0; t < nGroups; t++) { 3935 Int32 curr = s->len[t][0]; 3936 bsW ( s, 5, curr ); 3937 for (i = 0; i < alphaSize; i++) { 3938 while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; 3939 while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; 3940 bsW ( s, 1, 0 ); 3941 } 3942 } 3943 3944 if (s->verbosity >= 3) 3945 VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); 3946 3947 /*--- And finally, the block data proper ---*/ 3948 nBytes = s->numZ; 3949 selCtr = 0; 3950 gs = 0; 3951 while (True) { 3952 if (gs >= s->nMTF) break; 3953 ge = gs + BZ_G_SIZE - 1; 3954 if (ge >= s->nMTF) ge = s->nMTF-1; 3955 AssertH ( s->selector[selCtr] < nGroups, 3006 ); 3956 3957 if (nGroups == 6 && 50 == ge-gs+1) { 3958 /*--- fast track the common case ---*/ 3959 UInt16 mtfv_i; 3960 UChar* s_len_sel_selCtr 3961 = &(s->len[s->selector[selCtr]][0]); 3962 Int32* s_code_sel_selCtr 3963 = &(s->code[s->selector[selCtr]][0]); 3964 3965 # define BZ_ITAH(nn) \ 3966 mtfv_i = mtfv[gs+(nn)]; \ 3967 bsW ( s, \ 3968 s_len_sel_selCtr[mtfv_i], \ 3969 s_code_sel_selCtr[mtfv_i] ) 3970 3971 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); 3972 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); 3973 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); 3974 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); 3975 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); 3976 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); 3977 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); 3978 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); 3979 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); 3980 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); 3981 3982 # undef BZ_ITAH 3983 3984 } else { 3985 /*--- slow version which correctly handles all situations ---*/ 3986 for (i = gs; i <= ge; i++) { 3987 bsW ( s, 3988 s->len [s->selector[selCtr]] [mtfv[i]], 3989 s->code [s->selector[selCtr]] [mtfv[i]] ); 3990 } 3991 } 3992 3993 3994 gs = ge+1; 3995 selCtr++; 3996 } 3997 AssertH( selCtr == nSelectors, 3007 ); 3998 3999 if (s->verbosity >= 3) 4000 VPrintf1( "codes %d\n", s->numZ-nBytes ); 4001 } 4002 4003 4004 /*---------------------------------------------------*/ 4005 void BZ2_compressBlock ( EState* s, Bool is_last_block ) 4006 { 4007 if (s->nblock > 0) { 4008 4009 BZ_FINALISE_CRC ( s->blockCRC ); 4010 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); 4011 s->combinedCRC ^= s->blockCRC; 4012 if (s->blockNo > 1) s->numZ = 0; 4013 4014 if (s->verbosity >= 2) 4015 VPrintf4( " block %d: crc = 0x%08x, " 4016 "combined CRC = 0x%08x, size = %d\n", 4017 s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); 4018 4019 BZ2_blockSort ( s ); 4020 } 4021 4022 s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); 4023 4024 /*-- If this is the first block, create the stream header. --*/ 4025 if (s->blockNo == 1) { 4026 BZ2_bsInitWrite ( s ); 4027 bsPutUChar ( s, BZ_HDR_B ); 4028 bsPutUChar ( s, BZ_HDR_Z ); 4029 bsPutUChar ( s, BZ_HDR_h ); 4030 bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); 4031 } 4032 4033 if (s->nblock > 0) { 4034 4035 bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); 4036 bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); 4037 bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); 4038 4039 /*-- Now the block's CRC, so it is in a known place. --*/ 4040 bsPutUInt32 ( s, s->blockCRC ); 4041 4042 /*-- 4043 Now a single bit indicating (non-)randomisation. 4044 As of version 0.9.5, we use a better sorting algorithm 4045 which makes randomisation unnecessary. So always set 4046 the randomised bit to 'no'. Of course, the decoder 4047 still needs to be able to handle randomised blocks 4048 so as to maintain backwards compatibility with 4049 older versions of bzip2. 4050 --*/ 4051 bsW(s,1,0); 4052 4053 bsW ( s, 24, s->origPtr ); 4054 generateMTFValues ( s ); 4055 sendMTFValues ( s ); 4056 } 4057 4058 4059 /*-- If this is the last block, add the stream trailer. --*/ 4060 if (is_last_block) { 4061 4062 bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); 4063 bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); 4064 bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); 4065 bsPutUInt32 ( s, s->combinedCRC ); 4066 if (s->verbosity >= 2) 4067 VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); 4068 bsFinishWrite ( s ); 4069 } 4070 } 4071 4072 4073 /*-------------------------------------------------------------*/ 4074 /*--- end compress.c ---*/ 4075 /*-------------------------------------------------------------*/ 4076 4077 4078 /*-------------------------------------------------------------*/ 4079 /*--- Table for randomising repetitive blocks ---*/ 4080 /*--- randtable.c ---*/ 4081 /*-------------------------------------------------------------*/ 4082 4083 /*-- 4084 This file is a part of bzip2 and/or libbzip2, a program and 4085 library for lossless, block-sorting data compression. 4086 4087 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 4088 4089 Redistribution and use in source and binary forms, with or without 4090 modification, are permitted provided that the following conditions 4091 are met: 4092 4093 1. Redistributions of source code must retain the above copyright 4094 notice, this list of conditions and the following disclaimer. 4095 4096 2. The origin of this software must not be misrepresented; you must 4097 not claim that you wrote the original software. If you use this 4098 software in a product, an acknowledgment in the product 4099 documentation would be appreciated but is not required. 4100 4101 3. Altered source versions must be plainly marked as such, and must 4102 not be misrepresented as being the original software. 4103 4104 4. The name of the author may not be used to endorse or promote 4105 products derived from this software without specific prior written 4106 permission. 4107 4108 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 4109 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 4110 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4111 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 4112 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4113 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 4114 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 4115 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4116 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4117 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4118 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4119 4120 Julian Seward, Cambridge, UK. 4121 jseward (at) bzip.org 4122 bzip2/libbzip2 version 1.0 of 21 March 2000 4123 4124 This program is based on (at least) the work of: 4125 Mike Burrows 4126 David Wheeler 4127 Peter Fenwick 4128 Alistair Moffat 4129 Radford Neal 4130 Ian H. Witten 4131 Robert Sedgewick 4132 Jon L. Bentley 4133 4134 For more information on these sources, see the manual. 4135 --*/ 4136 4137 4138 4139 4140 /*---------------------------------------------*/ 4141 Int32 BZ2_rNums[512] = { 4142 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 4143 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 4144 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 4145 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 4146 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, 4147 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 4148 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 4149 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, 4150 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 4151 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 4152 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, 4153 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, 4154 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 4155 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, 4156 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 4157 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 4158 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, 4159 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, 4160 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 4161 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, 4162 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, 4163 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 4164 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 4165 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, 4166 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 4167 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, 4168 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 4169 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 4170 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, 4171 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 4172 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 4173 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, 4174 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 4175 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 4176 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 4177 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, 4178 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 4179 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, 4180 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 4181 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 4182 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, 4183 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, 4184 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 4185 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 4186 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, 4187 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 4188 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, 4189 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 4190 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 4191 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 4192 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 4193 936, 638 4194 }; 4195 4196 4197 /*-------------------------------------------------------------*/ 4198 /*--- end randtable.c ---*/ 4199 /*-------------------------------------------------------------*/ 4200 4201 /*-------------------------------------------------------------*/ 4202 /*--- Table for doing CRCs ---*/ 4203 /*--- crctable.c ---*/ 4204 /*-------------------------------------------------------------*/ 4205 4206 /*-- 4207 This file is a part of bzip2 and/or libbzip2, a program and 4208 library for lossless, block-sorting data compression. 4209 4210 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 4211 4212 Redistribution and use in source and binary forms, with or without 4213 modification, are permitted provided that the following conditions 4214 are met: 4215 4216 1. Redistributions of source code must retain the above copyright 4217 notice, this list of conditions and the following disclaimer. 4218 4219 2. The origin of this software must not be misrepresented; you must 4220 not claim that you wrote the original software. If you use this 4221 software in a product, an acknowledgment in the product 4222 documentation would be appreciated but is not required. 4223 4224 3. Altered source versions must be plainly marked as such, and must 4225 not be misrepresented as being the original software. 4226 4227 4. The name of the author may not be used to endorse or promote 4228 products derived from this software without specific prior written 4229 permission. 4230 4231 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 4232 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 4233 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4234 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 4235 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4236 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 4237 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 4238 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4239 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4240 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4241 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4242 4243 Julian Seward, Cambridge, UK. 4244 jseward (at) bzip.org 4245 bzip2/libbzip2 version 1.0 of 21 March 2000 4246 4247 This program is based on (at least) the work of: 4248 Mike Burrows 4249 David Wheeler 4250 Peter Fenwick 4251 Alistair Moffat 4252 Radford Neal 4253 Ian H. Witten 4254 Robert Sedgewick 4255 Jon L. Bentley 4256 4257 For more information on these sources, see the manual. 4258 --*/ 4259 4260 4261 4262 4263 4264 /*-- 4265 I think this is an implementation of the AUTODIN-II, 4266 Ethernet & FDDI 32-bit CRC standard. Vaguely derived 4267 from code by Rob Warnock, in Section 51 of the 4268 comp.compression FAQ. 4269 --*/ 4270 4271 UInt32 BZ2_crc32Table[256] = { 4272 4273 /*-- Ugly, innit? --*/ 4274 4275 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, 4276 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, 4277 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, 4278 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, 4279 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, 4280 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, 4281 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, 4282 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, 4283 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, 4284 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, 4285 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, 4286 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, 4287 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, 4288 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, 4289 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, 4290 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, 4291 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, 4292 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, 4293 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, 4294 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, 4295 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, 4296 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, 4297 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, 4298 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, 4299 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, 4300 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, 4301 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, 4302 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, 4303 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, 4304 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, 4305 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, 4306 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, 4307 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, 4308 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, 4309 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, 4310 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, 4311 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, 4312 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, 4313 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, 4314 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, 4315 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, 4316 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, 4317 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, 4318 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, 4319 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, 4320 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, 4321 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, 4322 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, 4323 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, 4324 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, 4325 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, 4326 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, 4327 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, 4328 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, 4329 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, 4330 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, 4331 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, 4332 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, 4333 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, 4334 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, 4335 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, 4336 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, 4337 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, 4338 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L 4339 }; 4340 4341 4342 /*-------------------------------------------------------------*/ 4343 /*--- end crctable.c ---*/ 4344 /*-------------------------------------------------------------*/ 4345 4346 /*-------------------------------------------------------------*/ 4347 /*--- Library top-level functions. ---*/ 4348 /*--- bzlib.c ---*/ 4349 /*-------------------------------------------------------------*/ 4350 4351 /*-- 4352 This file is a part of bzip2 and/or libbzip2, a program and 4353 library for lossless, block-sorting data compression. 4354 4355 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 4356 4357 Redistribution and use in source and binary forms, with or without 4358 modification, are permitted provided that the following conditions 4359 are met: 4360 4361 1. Redistributions of source code must retain the above copyright 4362 notice, this list of conditions and the following disclaimer. 4363 4364 2. The origin of this software must not be misrepresented; you must 4365 not claim that you wrote the original software. If you use this 4366 software in a product, an acknowledgment in the product 4367 documentation would be appreciated but is not required. 4368 4369 3. Altered source versions must be plainly marked as such, and must 4370 not be misrepresented as being the original software. 4371 4372 4. The name of the author may not be used to endorse or promote 4373 products derived from this software without specific prior written 4374 permission. 4375 4376 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 4377 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 4378 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4379 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 4380 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4381 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 4382 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 4383 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4384 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4385 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4386 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4387 4388 Julian Seward, Cambridge, UK. 4389 jseward (at) bzip.org 4390 bzip2/libbzip2 version 1.0 of 21 March 2000 4391 4392 This program is based on (at least) the work of: 4393 Mike Burrows 4394 David Wheeler 4395 Peter Fenwick 4396 Alistair Moffat 4397 Radford Neal 4398 Ian H. Witten 4399 Robert Sedgewick 4400 Jon L. Bentley 4401 4402 For more information on these sources, see the manual. 4403 --*/ 4404 4405 /*-- 4406 CHANGES 4407 ~~~~~~~ 4408 0.9.0 -- original version. 4409 4410 0.9.0a/b -- no changes in this file. 4411 4412 0.9.0c 4413 * made zero-length BZ_FLUSH work correctly in bzCompress(). 4414 * fixed bzWrite/bzRead to ignore zero-length requests. 4415 * fixed bzread to correctly handle read requests after EOF. 4416 * wrong parameter order in call to bzDecompressInit in 4417 bzBuffToBuffDecompress. Fixed. 4418 --*/ 4419 4420 4421 4422 /*---------------------------------------------------*/ 4423 /*--- Compression stuff ---*/ 4424 /*---------------------------------------------------*/ 4425 4426 4427 /*---------------------------------------------------*/ 4428 void BZ2_bz__AssertH__fail ( int errcode ) 4429 { 4430 vexxx_printf("BZ2_bz__AssertH__fail(%d) called, exiting\n", errcode); 4431 (*serviceFn)(0,0); 4432 } 4433 4434 void bz_internal_error ( int errcode ) 4435 { 4436 vexxx_printf("bz_internal_error called, exiting\n", errcode); 4437 (*serviceFn)(0,0); 4438 } 4439 4440 /*---------------------------------------------------*/ 4441 static 4442 int bz_config_ok ( void ) 4443 { 4444 if (sizeof(int) != 4) return 0; 4445 if (sizeof(short) != 2) return 0; 4446 if (sizeof(char) != 1) return 0; 4447 return 1; 4448 } 4449 4450 4451 /*---------------------------------------------------*/ 4452 static 4453 void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) 4454 { 4455 void* v = (void*) (*serviceFn)(2, items * size ); 4456 return v; 4457 } 4458 4459 static 4460 void default_bzfree ( void* opaque, void* addr ) 4461 { 4462 if (addr != NULL) (*serviceFn)( 3, (HWord)addr ); 4463 } 4464 4465 4466 /*---------------------------------------------------*/ 4467 static 4468 void prepare_new_block ( EState* s ) 4469 { 4470 Int32 i; 4471 s->nblock = 0; 4472 s->numZ = 0; 4473 s->state_out_pos = 0; 4474 BZ_INITIALISE_CRC ( s->blockCRC ); 4475 for (i = 0; i < 256; i++) s->inUse[i] = False; 4476 s->blockNo++; 4477 } 4478 4479 4480 /*---------------------------------------------------*/ 4481 static 4482 void init_RL ( EState* s ) 4483 { 4484 s->state_in_ch = 256; 4485 s->state_in_len = 0; 4486 } 4487 4488 4489 static 4490 Bool isempty_RL ( EState* s ) 4491 { 4492 if (s->state_in_ch < 256 && s->state_in_len > 0) 4493 return False; else 4494 return True; 4495 } 4496 4497 4498 /*---------------------------------------------------*/ 4499 int BZ_API(BZ2_bzCompressInit) 4500 ( bz_stream* strm, 4501 int blockSize100k, 4502 int verbosity, 4503 int workFactor ) 4504 { 4505 Int32 n; 4506 EState* s; 4507 4508 if (!bz_config_ok()) return BZ_CONFIG_ERROR; 4509 4510 if (strm == NULL || 4511 blockSize100k < 1 || blockSize100k > 9 || 4512 workFactor < 0 || workFactor > 250) 4513 return BZ_PARAM_ERROR; 4514 4515 if (workFactor == 0) workFactor = 30; 4516 if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; 4517 if (strm->bzfree == NULL) strm->bzfree = default_bzfree; 4518 4519 s = BZALLOC( sizeof(EState) ); 4520 if (s == NULL) return BZ_MEM_ERROR; 4521 s->strm = strm; 4522 4523 s->arr1 = NULL; 4524 s->arr2 = NULL; 4525 s->ftab = NULL; 4526 4527 n = 100000 * blockSize100k; 4528 s->arr1 = BZALLOC( n * sizeof(UInt32) ); 4529 s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) ); 4530 s->ftab = BZALLOC( 65537 * sizeof(UInt32) ); 4531 4532 if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) { 4533 if (s->arr1 != NULL) BZFREE(s->arr1); 4534 if (s->arr2 != NULL) BZFREE(s->arr2); 4535 if (s->ftab != NULL) BZFREE(s->ftab); 4536 if (s != NULL) BZFREE(s); 4537 return BZ_MEM_ERROR; 4538 } 4539 4540 s->blockNo = 0; 4541 s->state = BZ_S_INPUT; 4542 s->mode = BZ_M_RUNNING; 4543 s->combinedCRC = 0; 4544 s->blockSize100k = blockSize100k; 4545 s->nblockMAX = 100000 * blockSize100k - 19; 4546 s->verbosity = verbosity; 4547 s->workFactor = workFactor; 4548 4549 s->block = (UChar*)s->arr2; 4550 s->mtfv = (UInt16*)s->arr1; 4551 s->zbits = NULL; 4552 s->ptr = (UInt32*)s->arr1; 4553 4554 strm->state = s; 4555 strm->total_in_lo32 = 0; 4556 strm->total_in_hi32 = 0; 4557 strm->total_out_lo32 = 0; 4558 strm->total_out_hi32 = 0; 4559 init_RL ( s ); 4560 prepare_new_block ( s ); 4561 return BZ_OK; 4562 } 4563 4564 4565 /*---------------------------------------------------*/ 4566 static 4567 void add_pair_to_block ( EState* s ) 4568 { 4569 Int32 i; 4570 UChar ch = (UChar)(s->state_in_ch); 4571 for (i = 0; i < s->state_in_len; i++) { 4572 BZ_UPDATE_CRC( s->blockCRC, ch ); 4573 } 4574 s->inUse[s->state_in_ch] = True; 4575 switch (s->state_in_len) { 4576 case 1: 4577 s->block[s->nblock] = (UChar)ch; s->nblock++; 4578 break; 4579 case 2: 4580 s->block[s->nblock] = (UChar)ch; s->nblock++; 4581 s->block[s->nblock] = (UChar)ch; s->nblock++; 4582 break; 4583 case 3: 4584 s->block[s->nblock] = (UChar)ch; s->nblock++; 4585 s->block[s->nblock] = (UChar)ch; s->nblock++; 4586 s->block[s->nblock] = (UChar)ch; s->nblock++; 4587 break; 4588 default: 4589 s->inUse[s->state_in_len-4] = True; 4590 s->block[s->nblock] = (UChar)ch; s->nblock++; 4591 s->block[s->nblock] = (UChar)ch; s->nblock++; 4592 s->block[s->nblock] = (UChar)ch; s->nblock++; 4593 s->block[s->nblock] = (UChar)ch; s->nblock++; 4594 s->block[s->nblock] = ((UChar)(s->state_in_len-4)); 4595 s->nblock++; 4596 break; 4597 } 4598 } 4599 4600 4601 /*---------------------------------------------------*/ 4602 static 4603 void flush_RL ( EState* s ) 4604 { 4605 if (s->state_in_ch < 256) add_pair_to_block ( s ); 4606 init_RL ( s ); 4607 } 4608 4609 4610 /*---------------------------------------------------*/ 4611 #define ADD_CHAR_TO_BLOCK(zs,zchh0) \ 4612 { \ 4613 UInt32 zchh = (UInt32)(zchh0); \ 4614 /*-- fast track the common case --*/ \ 4615 if (zchh != zs->state_in_ch && \ 4616 zs->state_in_len == 1) { \ 4617 UChar ch = (UChar)(zs->state_in_ch); \ 4618 BZ_UPDATE_CRC( zs->blockCRC, ch ); \ 4619 zs->inUse[zs->state_in_ch] = True; \ 4620 zs->block[zs->nblock] = (UChar)ch; \ 4621 zs->nblock++; \ 4622 zs->state_in_ch = zchh; \ 4623 } \ 4624 else \ 4625 /*-- general, uncommon cases --*/ \ 4626 if (zchh != zs->state_in_ch || \ 4627 zs->state_in_len == 255) { \ 4628 if (zs->state_in_ch < 256) \ 4629 add_pair_to_block ( zs ); \ 4630 zs->state_in_ch = zchh; \ 4631 zs->state_in_len = 1; \ 4632 } else { \ 4633 zs->state_in_len++; \ 4634 } \ 4635 } 4636 4637 4638 /*---------------------------------------------------*/ 4639 static 4640 Bool copy_input_until_stop ( EState* s ) 4641 { 4642 Bool progress_in = False; 4643 4644 if (s->mode == BZ_M_RUNNING) { 4645 4646 /*-- fast track the common case --*/ 4647 while (True) { 4648 /*-- block full? --*/ 4649 if (s->nblock >= s->nblockMAX) break; 4650 /*-- no input? --*/ 4651 if (s->strm->avail_in == 0) break; 4652 progress_in = True; 4653 ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 4654 s->strm->next_in++; 4655 s->strm->avail_in--; 4656 s->strm->total_in_lo32++; 4657 if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; 4658 } 4659 4660 } else { 4661 4662 /*-- general, uncommon case --*/ 4663 while (True) { 4664 /*-- block full? --*/ 4665 if (s->nblock >= s->nblockMAX) break; 4666 /*-- no input? --*/ 4667 if (s->strm->avail_in == 0) break; 4668 /*-- flush/finish end? --*/ 4669 if (s->avail_in_expect == 0) break; 4670 progress_in = True; 4671 ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 4672 s->strm->next_in++; 4673 s->strm->avail_in--; 4674 s->strm->total_in_lo32++; 4675 if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; 4676 s->avail_in_expect--; 4677 } 4678 } 4679 return progress_in; 4680 } 4681 4682 4683 /*---------------------------------------------------*/ 4684 static 4685 Bool copy_output_until_stop ( EState* s ) 4686 { 4687 Bool progress_out = False; 4688 4689 while (True) { 4690 4691 /*-- no output space? --*/ 4692 if (s->strm->avail_out == 0) break; 4693 4694 /*-- block done? --*/ 4695 if (s->state_out_pos >= s->numZ) break; 4696 4697 progress_out = True; 4698 *(s->strm->next_out) = s->zbits[s->state_out_pos]; 4699 s->state_out_pos++; 4700 s->strm->avail_out--; 4701 s->strm->next_out++; 4702 s->strm->total_out_lo32++; 4703 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 4704 } 4705 4706 return progress_out; 4707 } 4708 4709 4710 /*---------------------------------------------------*/ 4711 static 4712 Bool handle_compress ( bz_stream* strm ) 4713 { 4714 Bool progress_in = False; 4715 Bool progress_out = False; 4716 EState* s = strm->state; 4717 4718 while (True) { 4719 4720 if (s->state == BZ_S_OUTPUT) { 4721 progress_out |= copy_output_until_stop ( s ); 4722 if (s->state_out_pos < s->numZ) break; 4723 if (s->mode == BZ_M_FINISHING && 4724 s->avail_in_expect == 0 && 4725 isempty_RL(s)) break; 4726 prepare_new_block ( s ); 4727 s->state = BZ_S_INPUT; 4728 if (s->mode == BZ_M_FLUSHING && 4729 s->avail_in_expect == 0 && 4730 isempty_RL(s)) break; 4731 } 4732 4733 if (s->state == BZ_S_INPUT) { 4734 progress_in |= copy_input_until_stop ( s ); 4735 if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { 4736 flush_RL ( s ); 4737 BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); 4738 s->state = BZ_S_OUTPUT; 4739 } 4740 else 4741 if (s->nblock >= s->nblockMAX) { 4742 BZ2_compressBlock ( s, False ); 4743 s->state = BZ_S_OUTPUT; 4744 } 4745 else 4746 if (s->strm->avail_in == 0) { 4747 break; 4748 } 4749 } 4750 4751 } 4752 4753 return progress_in || progress_out; 4754 } 4755 4756 4757 /*---------------------------------------------------*/ 4758 int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) 4759 { 4760 Bool progress; 4761 EState* s; 4762 if (strm == NULL) return BZ_PARAM_ERROR; 4763 s = strm->state; 4764 if (s == NULL) return BZ_PARAM_ERROR; 4765 if (s->strm != strm) return BZ_PARAM_ERROR; 4766 4767 preswitch: 4768 switch (s->mode) { 4769 4770 case BZ_M_IDLE: 4771 return BZ_SEQUENCE_ERROR; 4772 4773 case BZ_M_RUNNING: 4774 if (action == BZ_RUN) { 4775 progress = handle_compress ( strm ); 4776 return progress ? BZ_RUN_OK : BZ_PARAM_ERROR; 4777 } 4778 else 4779 if (action == BZ_FLUSH) { 4780 s->avail_in_expect = strm->avail_in; 4781 s->mode = BZ_M_FLUSHING; 4782 goto preswitch; 4783 } 4784 else 4785 if (action == BZ_FINISH) { 4786 s->avail_in_expect = strm->avail_in; 4787 s->mode = BZ_M_FINISHING; 4788 goto preswitch; 4789 } 4790 else 4791 return BZ_PARAM_ERROR; 4792 4793 case BZ_M_FLUSHING: 4794 if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; 4795 if (s->avail_in_expect != s->strm->avail_in) 4796 return BZ_SEQUENCE_ERROR; 4797 progress = handle_compress ( strm ); 4798 if (s->avail_in_expect > 0 || !isempty_RL(s) || 4799 s->state_out_pos < s->numZ) return BZ_FLUSH_OK; 4800 s->mode = BZ_M_RUNNING; 4801 return BZ_RUN_OK; 4802 4803 case BZ_M_FINISHING: 4804 if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; 4805 if (s->avail_in_expect != s->strm->avail_in) 4806 return BZ_SEQUENCE_ERROR; 4807 progress = handle_compress ( strm ); 4808 if (!progress) return BZ_SEQUENCE_ERROR; 4809 if (s->avail_in_expect > 0 || !isempty_RL(s) || 4810 s->state_out_pos < s->numZ) return BZ_FINISH_OK; 4811 s->mode = BZ_M_IDLE; 4812 return BZ_STREAM_END; 4813 } 4814 return BZ_OK; /*--not reached--*/ 4815 } 4816 4817 4818 /*---------------------------------------------------*/ 4819 int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm ) 4820 { 4821 EState* s; 4822 if (strm == NULL) return BZ_PARAM_ERROR; 4823 s = strm->state; 4824 if (s == NULL) return BZ_PARAM_ERROR; 4825 if (s->strm != strm) return BZ_PARAM_ERROR; 4826 4827 if (s->arr1 != NULL) BZFREE(s->arr1); 4828 if (s->arr2 != NULL) BZFREE(s->arr2); 4829 if (s->ftab != NULL) BZFREE(s->ftab); 4830 BZFREE(strm->state); 4831 4832 strm->state = NULL; 4833 4834 return BZ_OK; 4835 } 4836 4837 4838 /*---------------------------------------------------*/ 4839 /*--- Decompression stuff ---*/ 4840 /*---------------------------------------------------*/ 4841 4842 /*---------------------------------------------------*/ 4843 int BZ_API(BZ2_bzDecompressInit) 4844 ( bz_stream* strm, 4845 int verbosity, 4846 int small ) 4847 { 4848 DState* s; 4849 4850 if (!bz_config_ok()) return BZ_CONFIG_ERROR; 4851 4852 if (strm == NULL) return BZ_PARAM_ERROR; 4853 if (small != 0 && small != 1) return BZ_PARAM_ERROR; 4854 if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR; 4855 4856 if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; 4857 if (strm->bzfree == NULL) strm->bzfree = default_bzfree; 4858 4859 s = BZALLOC( sizeof(DState) ); 4860 if (s == NULL) return BZ_MEM_ERROR; 4861 s->strm = strm; 4862 strm->state = s; 4863 s->state = BZ_X_MAGIC_1; 4864 s->bsLive = 0; 4865 s->bsBuff = 0; 4866 s->calculatedCombinedCRC = 0; 4867 strm->total_in_lo32 = 0; 4868 strm->total_in_hi32 = 0; 4869 strm->total_out_lo32 = 0; 4870 strm->total_out_hi32 = 0; 4871 s->smallDecompress = (Bool)small; 4872 s->ll4 = NULL; 4873 s->ll16 = NULL; 4874 s->tt = NULL; 4875 s->currBlockNo = 0; 4876 s->verbosity = verbosity; 4877 4878 return BZ_OK; 4879 } 4880 4881 4882 /*---------------------------------------------------*/ 4883 /* Return True iff data corruption is discovered. 4884 Returns False if there is no problem. 4885 */ 4886 static 4887 Bool unRLE_obuf_to_output_FAST ( DState* s ) 4888 { 4889 UChar k1; 4890 4891 if (s->blockRandomised) { 4892 4893 while (True) { 4894 /* try to finish existing run */ 4895 while (True) { 4896 if (s->strm->avail_out == 0) return False; 4897 if (s->state_out_len == 0) break; 4898 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 4899 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 4900 s->state_out_len--; 4901 s->strm->next_out++; 4902 s->strm->avail_out--; 4903 s->strm->total_out_lo32++; 4904 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 4905 } 4906 4907 /* can a new run be started? */ 4908 if (s->nblock_used == s->save_nblock+1) return False; 4909 4910 /* Only caused by corrupt data stream? */ 4911 if (s->nblock_used > s->save_nblock+1) 4912 return True; 4913 4914 s->state_out_len = 1; 4915 s->state_out_ch = s->k0; 4916 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 4917 k1 ^= BZ_RAND_MASK; s->nblock_used++; 4918 if (s->nblock_used == s->save_nblock+1) continue; 4919 if (k1 != s->k0) { s->k0 = k1; continue; }; 4920 4921 s->state_out_len = 2; 4922 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 4923 k1 ^= BZ_RAND_MASK; s->nblock_used++; 4924 if (s->nblock_used == s->save_nblock+1) continue; 4925 if (k1 != s->k0) { s->k0 = k1; continue; }; 4926 4927 s->state_out_len = 3; 4928 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 4929 k1 ^= BZ_RAND_MASK; s->nblock_used++; 4930 if (s->nblock_used == s->save_nblock+1) continue; 4931 if (k1 != s->k0) { s->k0 = k1; continue; }; 4932 4933 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 4934 k1 ^= BZ_RAND_MASK; s->nblock_used++; 4935 s->state_out_len = ((Int32)k1) + 4; 4936 BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; 4937 s->k0 ^= BZ_RAND_MASK; s->nblock_used++; 4938 } 4939 4940 } else { 4941 4942 /* restore */ 4943 UInt32 c_calculatedBlockCRC = s->calculatedBlockCRC; 4944 UChar c_state_out_ch = s->state_out_ch; 4945 Int32 c_state_out_len = s->state_out_len; 4946 Int32 c_nblock_used = s->nblock_used; 4947 Int32 c_k0 = s->k0; 4948 UInt32* c_tt = s->tt; 4949 UInt32 c_tPos = s->tPos; 4950 char* cs_next_out = s->strm->next_out; 4951 unsigned int cs_avail_out = s->strm->avail_out; 4952 /* end restore */ 4953 4954 UInt32 avail_out_INIT = cs_avail_out; 4955 Int32 s_save_nblockPP = s->save_nblock+1; 4956 unsigned int total_out_lo32_old; 4957 4958 while (True) { 4959 4960 /* try to finish existing run */ 4961 if (c_state_out_len > 0) { 4962 while (True) { 4963 if (cs_avail_out == 0) goto return_notr; 4964 if (c_state_out_len == 1) break; 4965 *( (UChar*)(cs_next_out) ) = c_state_out_ch; 4966 BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); 4967 c_state_out_len--; 4968 cs_next_out++; 4969 cs_avail_out--; 4970 } 4971 s_state_out_len_eq_one: 4972 { 4973 if (cs_avail_out == 0) { 4974 c_state_out_len = 1; goto return_notr; 4975 }; 4976 *( (UChar*)(cs_next_out) ) = c_state_out_ch; 4977 BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); 4978 cs_next_out++; 4979 cs_avail_out--; 4980 } 4981 } 4982 /* Only caused by corrupt data stream? */ 4983 if (c_nblock_used > s_save_nblockPP) 4984 return True; 4985 4986 /* can a new run be started? */ 4987 if (c_nblock_used == s_save_nblockPP) { 4988 c_state_out_len = 0; goto return_notr; 4989 }; 4990 c_state_out_ch = c_k0; 4991 BZ_GET_FAST_C(k1); c_nblock_used++; 4992 if (k1 != c_k0) { 4993 c_k0 = k1; goto s_state_out_len_eq_one; 4994 }; 4995 if (c_nblock_used == s_save_nblockPP) 4996 goto s_state_out_len_eq_one; 4997 4998 c_state_out_len = 2; 4999 BZ_GET_FAST_C(k1); c_nblock_used++; 5000 if (c_nblock_used == s_save_nblockPP) continue; 5001 if (k1 != c_k0) { c_k0 = k1; continue; }; 5002 5003 c_state_out_len = 3; 5004 BZ_GET_FAST_C(k1); c_nblock_used++; 5005 if (c_nblock_used == s_save_nblockPP) continue; 5006 if (k1 != c_k0) { c_k0 = k1; continue; }; 5007 5008 BZ_GET_FAST_C(k1); c_nblock_used++; 5009 c_state_out_len = ((Int32)k1) + 4; 5010 BZ_GET_FAST_C(c_k0); c_nblock_used++; 5011 } 5012 5013 return_notr: 5014 total_out_lo32_old = s->strm->total_out_lo32; 5015 s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out); 5016 if (s->strm->total_out_lo32 < total_out_lo32_old) 5017 s->strm->total_out_hi32++; 5018 5019 /* save */ 5020 s->calculatedBlockCRC = c_calculatedBlockCRC; 5021 s->state_out_ch = c_state_out_ch; 5022 s->state_out_len = c_state_out_len; 5023 s->nblock_used = c_nblock_used; 5024 s->k0 = c_k0; 5025 s->tt = c_tt; 5026 s->tPos = c_tPos; 5027 s->strm->next_out = cs_next_out; 5028 s->strm->avail_out = cs_avail_out; 5029 /* end save */ 5030 } 5031 return False; 5032 } 5033 5034 5035 5036 /*---------------------------------------------------*/ 5037 /* Return True iff data corruption is discovered. 5038 Returns False if there is no problem. 5039 */ 5040 static 5041 Bool unRLE_obuf_to_output_SMALL ( DState* s ) 5042 { 5043 UChar k1; 5044 5045 if (s->blockRandomised) { 5046 5047 while (True) { 5048 /* try to finish existing run */ 5049 while (True) { 5050 if (s->strm->avail_out == 0) return False; 5051 if (s->state_out_len == 0) break; 5052 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 5053 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 5054 s->state_out_len--; 5055 s->strm->next_out++; 5056 s->strm->avail_out--; 5057 s->strm->total_out_lo32++; 5058 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 5059 } 5060 5061 /* can a new run be started? */ 5062 if (s->nblock_used == s->save_nblock+1) return False; 5063 5064 /* Only caused by corrupt data stream? */ 5065 if (s->nblock_used > s->save_nblock+1) 5066 return True; 5067 5068 s->state_out_len = 1; 5069 s->state_out_ch = s->k0; 5070 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 5071 k1 ^= BZ_RAND_MASK; s->nblock_used++; 5072 if (s->nblock_used == s->save_nblock+1) continue; 5073 if (k1 != s->k0) { s->k0 = k1; continue; }; 5074 5075 s->state_out_len = 2; 5076 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 5077 k1 ^= BZ_RAND_MASK; s->nblock_used++; 5078 if (s->nblock_used == s->save_nblock+1) continue; 5079 if (k1 != s->k0) { s->k0 = k1; continue; }; 5080 5081 s->state_out_len = 3; 5082 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 5083 k1 ^= BZ_RAND_MASK; s->nblock_used++; 5084 if (s->nblock_used == s->save_nblock+1) continue; 5085 if (k1 != s->k0) { s->k0 = k1; continue; }; 5086 5087 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 5088 k1 ^= BZ_RAND_MASK; s->nblock_used++; 5089 s->state_out_len = ((Int32)k1) + 4; 5090 BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; 5091 s->k0 ^= BZ_RAND_MASK; s->nblock_used++; 5092 } 5093 5094 } else { 5095 5096 while (True) { 5097 /* try to finish existing run */ 5098 while (True) { 5099 if (s->strm->avail_out == 0) return False; 5100 if (s->state_out_len == 0) break; 5101 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 5102 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 5103 s->state_out_len--; 5104 s->strm->next_out++; 5105 s->strm->avail_out--; 5106 s->strm->total_out_lo32++; 5107 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 5108 } 5109 5110 /* can a new run be started? */ 5111 if (s->nblock_used == s->save_nblock+1) return False; 5112 5113 /* Only caused by corrupt data stream? */ 5114 if (s->nblock_used > s->save_nblock+1) 5115 return True; 5116 5117 s->state_out_len = 1; 5118 s->state_out_ch = s->k0; 5119 BZ_GET_SMALL(k1); s->nblock_used++; 5120 if (s->nblock_used == s->save_nblock+1) continue; 5121 if (k1 != s->k0) { s->k0 = k1; continue; }; 5122 5123 s->state_out_len = 2; 5124 BZ_GET_SMALL(k1); s->nblock_used++; 5125 if (s->nblock_used == s->save_nblock+1) continue; 5126 if (k1 != s->k0) { s->k0 = k1; continue; }; 5127 5128 s->state_out_len = 3; 5129 BZ_GET_SMALL(k1); s->nblock_used++; 5130 if (s->nblock_used == s->save_nblock+1) continue; 5131 if (k1 != s->k0) { s->k0 = k1; continue; }; 5132 5133 BZ_GET_SMALL(k1); s->nblock_used++; 5134 s->state_out_len = ((Int32)k1) + 4; 5135 BZ_GET_SMALL(s->k0); s->nblock_used++; 5136 } 5137 5138 } 5139 } 5140 5141 5142 /*---------------------------------------------------*/ 5143 int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) 5144 { 5145 Bool corrupt; 5146 DState* s; 5147 if (strm == NULL) return BZ_PARAM_ERROR; 5148 s = strm->state; 5149 if (s == NULL) return BZ_PARAM_ERROR; 5150 if (s->strm != strm) return BZ_PARAM_ERROR; 5151 5152 while (True) { 5153 if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; 5154 if (s->state == BZ_X_OUTPUT) { 5155 if (s->smallDecompress) 5156 corrupt = unRLE_obuf_to_output_SMALL ( s ); else 5157 corrupt = unRLE_obuf_to_output_FAST ( s ); 5158 if (corrupt) return BZ_DATA_ERROR; 5159 if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { 5160 BZ_FINALISE_CRC ( s->calculatedBlockCRC ); 5161 if (s->verbosity >= 3) 5162 VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, 5163 s->calculatedBlockCRC ); 5164 if (s->verbosity >= 2) VPrintf0 ( "]" ); 5165 if (s->calculatedBlockCRC != s->storedBlockCRC) 5166 return BZ_DATA_ERROR; 5167 s->calculatedCombinedCRC 5168 = (s->calculatedCombinedCRC << 1) | 5169 (s->calculatedCombinedCRC >> 31); 5170 s->calculatedCombinedCRC ^= s->calculatedBlockCRC; 5171 s->state = BZ_X_BLKHDR_1; 5172 } else { 5173 return BZ_OK; 5174 } 5175 } 5176 if (s->state >= BZ_X_MAGIC_1) { 5177 Int32 r = BZ2_decompress ( s ); 5178 if (r == BZ_STREAM_END) { 5179 if (s->verbosity >= 3) 5180 VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x", 5181 s->storedCombinedCRC, s->calculatedCombinedCRC ); 5182 if (s->calculatedCombinedCRC != s->storedCombinedCRC) 5183 return BZ_DATA_ERROR; 5184 return r; 5185 } 5186 if (s->state != BZ_X_OUTPUT) return r; 5187 } 5188 } 5189 5190 AssertH ( 0, 6001 ); 5191 5192 return 0; /*NOTREACHED*/ 5193 } 5194 5195 5196 /*---------------------------------------------------*/ 5197 int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm ) 5198 { 5199 DState* s; 5200 if (strm == NULL) return BZ_PARAM_ERROR; 5201 s = strm->state; 5202 if (s == NULL) return BZ_PARAM_ERROR; 5203 if (s->strm != strm) return BZ_PARAM_ERROR; 5204 5205 if (s->tt != NULL) BZFREE(s->tt); 5206 if (s->ll16 != NULL) BZFREE(s->ll16); 5207 if (s->ll4 != NULL) BZFREE(s->ll4); 5208 5209 BZFREE(strm->state); 5210 strm->state = NULL; 5211 5212 return BZ_OK; 5213 } 5214 5215 5216 #ifndef BZ_NO_STDIO 5217 /*---------------------------------------------------*/ 5218 /*--- File I/O stuff ---*/ 5219 /*---------------------------------------------------*/ 5220 5221 #define BZ_SETERR(eee) \ 5222 { \ 5223 if (bzerror != NULL) *bzerror = eee; \ 5224 if (bzf != NULL) bzf->lastErr = eee; \ 5225 } 5226 5227 typedef 5228 struct { 5229 FILE* handle; 5230 Char buf[BZ_MAX_UNUSED]; 5231 Int32 bufN; 5232 Bool writing; 5233 bz_stream strm; 5234 Int32 lastErr; 5235 Bool initialisedOk; 5236 } 5237 bzFile; 5238 5239 5240 /*---------------------------------------------*/ 5241 static Bool myfeof ( FILE* f ) 5242 { 5243 Int32 c = fgetc ( f ); 5244 if (c == EOF) return True; 5245 ungetc ( c, f ); 5246 return False; 5247 } 5248 5249 5250 /*---------------------------------------------------*/ 5251 BZFILE* BZ_API(BZ2_bzWriteOpen) 5252 ( int* bzerror, 5253 FILE* f, 5254 int blockSize100k, 5255 int verbosity, 5256 int workFactor ) 5257 { 5258 Int32 ret; 5259 bzFile* bzf = NULL; 5260 5261 BZ_SETERR(BZ_OK); 5262 5263 if (f == NULL || 5264 (blockSize100k < 1 || blockSize100k > 9) || 5265 (workFactor < 0 || workFactor > 250) || 5266 (verbosity < 0 || verbosity > 4)) 5267 { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; 5268 5269 if (ferror(f)) 5270 { BZ_SETERR(BZ_IO_ERROR); return NULL; }; 5271 5272 bzf = malloc ( sizeof(bzFile) ); 5273 if (bzf == NULL) 5274 { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; 5275 5276 BZ_SETERR(BZ_OK); 5277 bzf->initialisedOk = False; 5278 bzf->bufN = 0; 5279 bzf->handle = f; 5280 bzf->writing = True; 5281 bzf->strm.bzalloc = NULL; 5282 bzf->strm.bzfree = NULL; 5283 bzf->strm.opaque = NULL; 5284 5285 if (workFactor == 0) workFactor = 30; 5286 ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, 5287 verbosity, workFactor ); 5288 if (ret != BZ_OK) 5289 { BZ_SETERR(ret); free(bzf); return NULL; }; 5290 5291 bzf->strm.avail_in = 0; 5292 bzf->initialisedOk = True; 5293 return bzf; 5294 } 5295 5296 5297 5298 /*---------------------------------------------------*/ 5299 void BZ_API(BZ2_bzWrite) 5300 ( int* bzerror, 5301 BZFILE* b, 5302 void* buf, 5303 int len ) 5304 { 5305 Int32 n, n2, ret; 5306 bzFile* bzf = (bzFile*)b; 5307 5308 BZ_SETERR(BZ_OK); 5309 if (bzf == NULL || buf == NULL || len < 0) 5310 { BZ_SETERR(BZ_PARAM_ERROR); return; }; 5311 if (!(bzf->writing)) 5312 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 5313 if (ferror(bzf->handle)) 5314 { BZ_SETERR(BZ_IO_ERROR); return; }; 5315 5316 if (len == 0) 5317 { BZ_SETERR(BZ_OK); return; }; 5318 5319 bzf->strm.avail_in = len; 5320 bzf->strm.next_in = buf; 5321 5322 while (True) { 5323 bzf->strm.avail_out = BZ_MAX_UNUSED; 5324 bzf->strm.next_out = bzf->buf; 5325 ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN ); 5326 if (ret != BZ_RUN_OK) 5327 { BZ_SETERR(ret); return; }; 5328 5329 if (bzf->strm.avail_out < BZ_MAX_UNUSED) { 5330 n = BZ_MAX_UNUSED - bzf->strm.avail_out; 5331 n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 5332 n, bzf->handle ); 5333 if (n != n2 || ferror(bzf->handle)) 5334 { BZ_SETERR(BZ_IO_ERROR); return; }; 5335 } 5336 5337 if (bzf->strm.avail_in == 0) 5338 { BZ_SETERR(BZ_OK); return; }; 5339 } 5340 } 5341 5342 5343 /*---------------------------------------------------*/ 5344 void BZ_API(BZ2_bzWriteClose) 5345 ( int* bzerror, 5346 BZFILE* b, 5347 int abandon, 5348 unsigned int* nbytes_in, 5349 unsigned int* nbytes_out ) 5350 { 5351 BZ2_bzWriteClose64 ( bzerror, b, abandon, 5352 nbytes_in, NULL, nbytes_out, NULL ); 5353 } 5354 5355 5356 void BZ_API(BZ2_bzWriteClose64) 5357 ( int* bzerror, 5358 BZFILE* b, 5359 int abandon, 5360 unsigned int* nbytes_in_lo32, 5361 unsigned int* nbytes_in_hi32, 5362 unsigned int* nbytes_out_lo32, 5363 unsigned int* nbytes_out_hi32 ) 5364 { 5365 Int32 n, n2, ret; 5366 bzFile* bzf = (bzFile*)b; 5367 5368 if (bzf == NULL) 5369 { BZ_SETERR(BZ_OK); return; }; 5370 if (!(bzf->writing)) 5371 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 5372 if (ferror(bzf->handle)) 5373 { BZ_SETERR(BZ_IO_ERROR); return; }; 5374 5375 if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0; 5376 if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0; 5377 if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0; 5378 if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0; 5379 5380 if ((!abandon) && bzf->lastErr == BZ_OK) { 5381 while (True) { 5382 bzf->strm.avail_out = BZ_MAX_UNUSED; 5383 bzf->strm.next_out = bzf->buf; 5384 ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH ); 5385 if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) 5386 { BZ_SETERR(ret); return; }; 5387 5388 if (bzf->strm.avail_out < BZ_MAX_UNUSED) { 5389 n = BZ_MAX_UNUSED - bzf->strm.avail_out; 5390 n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 5391 n, bzf->handle ); 5392 if (n != n2 || ferror(bzf->handle)) 5393 { BZ_SETERR(BZ_IO_ERROR); return; }; 5394 } 5395 5396 if (ret == BZ_STREAM_END) break; 5397 } 5398 } 5399 5400 if ( !abandon && !ferror ( bzf->handle ) ) { 5401 fflush ( bzf->handle ); 5402 if (ferror(bzf->handle)) 5403 { BZ_SETERR(BZ_IO_ERROR); return; }; 5404 } 5405 5406 if (nbytes_in_lo32 != NULL) 5407 *nbytes_in_lo32 = bzf->strm.total_in_lo32; 5408 if (nbytes_in_hi32 != NULL) 5409 *nbytes_in_hi32 = bzf->strm.total_in_hi32; 5410 if (nbytes_out_lo32 != NULL) 5411 *nbytes_out_lo32 = bzf->strm.total_out_lo32; 5412 if (nbytes_out_hi32 != NULL) 5413 *nbytes_out_hi32 = bzf->strm.total_out_hi32; 5414 5415 BZ_SETERR(BZ_OK); 5416 BZ2_bzCompressEnd ( &(bzf->strm) ); 5417 free ( bzf ); 5418 } 5419 5420 5421 /*---------------------------------------------------*/ 5422 BZFILE* BZ_API(BZ2_bzReadOpen) 5423 ( int* bzerror, 5424 FILE* f, 5425 int verbosity, 5426 int small, 5427 void* unused, 5428 int nUnused ) 5429 { 5430 bzFile* bzf = NULL; 5431 int ret; 5432 5433 BZ_SETERR(BZ_OK); 5434 5435 if (f == NULL || 5436 (small != 0 && small != 1) || 5437 (verbosity < 0 || verbosity > 4) || 5438 (unused == NULL && nUnused != 0) || 5439 (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED))) 5440 { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; 5441 5442 if (ferror(f)) 5443 { BZ_SETERR(BZ_IO_ERROR); return NULL; }; 5444 5445 bzf = malloc ( sizeof(bzFile) ); 5446 if (bzf == NULL) 5447 { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; 5448 5449 BZ_SETERR(BZ_OK); 5450 5451 bzf->initialisedOk = False; 5452 bzf->handle = f; 5453 bzf->bufN = 0; 5454 bzf->writing = False; 5455 bzf->strm.bzalloc = NULL; 5456 bzf->strm.bzfree = NULL; 5457 bzf->strm.opaque = NULL; 5458 5459 while (nUnused > 0) { 5460 bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++; 5461 unused = ((void*)( 1 + ((UChar*)(unused)) )); 5462 nUnused--; 5463 } 5464 5465 ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small ); 5466 if (ret != BZ_OK) 5467 { BZ_SETERR(ret); free(bzf); return NULL; }; 5468 5469 bzf->strm.avail_in = bzf->bufN; 5470 bzf->strm.next_in = bzf->buf; 5471 5472 bzf->initialisedOk = True; 5473 return bzf; 5474 } 5475 5476 5477 /*---------------------------------------------------*/ 5478 void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b ) 5479 { 5480 bzFile* bzf = (bzFile*)b; 5481 5482 BZ_SETERR(BZ_OK); 5483 if (bzf == NULL) 5484 { BZ_SETERR(BZ_OK); return; }; 5485 5486 if (bzf->writing) 5487 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 5488 5489 if (bzf->initialisedOk) 5490 (void)BZ2_bzDecompressEnd ( &(bzf->strm) ); 5491 free ( bzf ); 5492 } 5493 5494 5495 /*---------------------------------------------------*/ 5496 int BZ_API(BZ2_bzRead) 5497 ( int* bzerror, 5498 BZFILE* b, 5499 void* buf, 5500 int len ) 5501 { 5502 Int32 n, ret; 5503 bzFile* bzf = (bzFile*)b; 5504 5505 BZ_SETERR(BZ_OK); 5506 5507 if (bzf == NULL || buf == NULL || len < 0) 5508 { BZ_SETERR(BZ_PARAM_ERROR); return 0; }; 5509 5510 if (bzf->writing) 5511 { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; }; 5512 5513 if (len == 0) 5514 { BZ_SETERR(BZ_OK); return 0; }; 5515 5516 bzf->strm.avail_out = len; 5517 bzf->strm.next_out = buf; 5518 5519 while (True) { 5520 5521 if (ferror(bzf->handle)) 5522 { BZ_SETERR(BZ_IO_ERROR); return 0; }; 5523 5524 if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) { 5525 n = fread ( bzf->buf, sizeof(UChar), 5526 BZ_MAX_UNUSED, bzf->handle ); 5527 if (ferror(bzf->handle)) 5528 { BZ_SETERR(BZ_IO_ERROR); return 0; }; 5529 bzf->bufN = n; 5530 bzf->strm.avail_in = bzf->bufN; 5531 bzf->strm.next_in = bzf->buf; 5532 } 5533 5534 ret = BZ2_bzDecompress ( &(bzf->strm) ); 5535 5536 if (ret != BZ_OK && ret != BZ_STREAM_END) 5537 { BZ_SETERR(ret); return 0; }; 5538 5539 if (ret == BZ_OK && myfeof(bzf->handle) && 5540 bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) 5541 { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; }; 5542 5543 if (ret == BZ_STREAM_END) 5544 { BZ_SETERR(BZ_STREAM_END); 5545 return len - bzf->strm.avail_out; }; 5546 if (bzf->strm.avail_out == 0) 5547 { BZ_SETERR(BZ_OK); return len; }; 5548 5549 } 5550 5551 return 0; /*not reached*/ 5552 } 5553 5554 5555 /*---------------------------------------------------*/ 5556 void BZ_API(BZ2_bzReadGetUnused) 5557 ( int* bzerror, 5558 BZFILE* b, 5559 void** unused, 5560 int* nUnused ) 5561 { 5562 bzFile* bzf = (bzFile*)b; 5563 if (bzf == NULL) 5564 { BZ_SETERR(BZ_PARAM_ERROR); return; }; 5565 if (bzf->lastErr != BZ_STREAM_END) 5566 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 5567 if (unused == NULL || nUnused == NULL) 5568 { BZ_SETERR(BZ_PARAM_ERROR); return; }; 5569 5570 BZ_SETERR(BZ_OK); 5571 *nUnused = bzf->strm.avail_in; 5572 *unused = bzf->strm.next_in; 5573 } 5574 #endif 5575 5576 5577 /*---------------------------------------------------*/ 5578 /*--- Misc convenience stuff ---*/ 5579 /*---------------------------------------------------*/ 5580 5581 /*---------------------------------------------------*/ 5582 int BZ_API(BZ2_bzBuffToBuffCompress) 5583 ( char* dest, 5584 unsigned int* destLen, 5585 char* source, 5586 unsigned int sourceLen, 5587 int blockSize100k, 5588 int verbosity, 5589 int workFactor ) 5590 { 5591 bz_stream strm; 5592 int ret; 5593 5594 if (dest == NULL || destLen == NULL || 5595 source == NULL || 5596 blockSize100k < 1 || blockSize100k > 9 || 5597 verbosity < 0 || verbosity > 4 || 5598 workFactor < 0 || workFactor > 250) 5599 return BZ_PARAM_ERROR; 5600 5601 if (workFactor == 0) workFactor = 30; 5602 strm.bzalloc = NULL; 5603 strm.bzfree = NULL; 5604 strm.opaque = NULL; 5605 ret = BZ2_bzCompressInit ( &strm, blockSize100k, 5606 verbosity, workFactor ); 5607 if (ret != BZ_OK) return ret; 5608 5609 strm.next_in = source; 5610 strm.next_out = dest; 5611 strm.avail_in = sourceLen; 5612 strm.avail_out = *destLen; 5613 5614 ret = BZ2_bzCompress ( &strm, BZ_FINISH ); 5615 if (ret == BZ_FINISH_OK) goto output_overflow; 5616 if (ret != BZ_STREAM_END) goto errhandler; 5617 5618 /* normal termination */ 5619 *destLen -= strm.avail_out; 5620 BZ2_bzCompressEnd ( &strm ); 5621 return BZ_OK; 5622 5623 output_overflow: 5624 BZ2_bzCompressEnd ( &strm ); 5625 return BZ_OUTBUFF_FULL; 5626 5627 errhandler: 5628 BZ2_bzCompressEnd ( &strm ); 5629 return ret; 5630 } 5631 5632 5633 /*---------------------------------------------------*/ 5634 int BZ_API(BZ2_bzBuffToBuffDecompress) 5635 ( char* dest, 5636 unsigned int* destLen, 5637 char* source, 5638 unsigned int sourceLen, 5639 int small, 5640 int verbosity ) 5641 { 5642 bz_stream strm; 5643 int ret; 5644 5645 if (dest == NULL || destLen == NULL || 5646 source == NULL || 5647 (small != 0 && small != 1) || 5648 verbosity < 0 || verbosity > 4) 5649 return BZ_PARAM_ERROR; 5650 5651 strm.bzalloc = NULL; 5652 strm.bzfree = NULL; 5653 strm.opaque = NULL; 5654 ret = BZ2_bzDecompressInit ( &strm, verbosity, small ); 5655 if (ret != BZ_OK) return ret; 5656 5657 strm.next_in = source; 5658 strm.next_out = dest; 5659 strm.avail_in = sourceLen; 5660 strm.avail_out = *destLen; 5661 5662 ret = BZ2_bzDecompress ( &strm ); 5663 if (ret == BZ_OK) goto output_overflow_or_eof; 5664 if (ret != BZ_STREAM_END) goto errhandler; 5665 5666 /* normal termination */ 5667 *destLen -= strm.avail_out; 5668 BZ2_bzDecompressEnd ( &strm ); 5669 return BZ_OK; 5670 5671 output_overflow_or_eof: 5672 if (strm.avail_out > 0) { 5673 BZ2_bzDecompressEnd ( &strm ); 5674 return BZ_UNEXPECTED_EOF; 5675 } else { 5676 BZ2_bzDecompressEnd ( &strm ); 5677 return BZ_OUTBUFF_FULL; 5678 }; 5679 5680 errhandler: 5681 BZ2_bzDecompressEnd ( &strm ); 5682 return ret; 5683 } 5684 5685 5686 /*---------------------------------------------------*/ 5687 /*-- 5688 Code contributed by Yoshioka Tsuneo 5689 (QWF00133 (at) niftyserve.or.jp/tsuneo-y (at) is.aist-nara.ac.jp), 5690 to support better zlib compatibility. 5691 This code is not _officially_ part of libbzip2 (yet); 5692 I haven't tested it, documented it, or considered the 5693 threading-safeness of it. 5694 If this code breaks, please contact both Yoshioka and me. 5695 --*/ 5696 /*---------------------------------------------------*/ 5697 5698 /*---------------------------------------------------*/ 5699 /*-- 5700 return version like "0.9.0c". 5701 --*/ 5702 const char * BZ_API(BZ2_bzlibVersion)(void) 5703 { 5704 return BZ_VERSION; 5705 } 5706 5707 5708 #ifndef BZ_NO_STDIO 5709 /*---------------------------------------------------*/ 5710 5711 #if defined(_WIN32) || defined(OS2) || defined(MSDOS) 5712 # include <fcntl.h> 5713 # include <io.h> 5714 # define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY) 5715 #else 5716 # define SET_BINARY_MODE(file) 5717 #endif 5718 static 5719 BZFILE * bzopen_or_bzdopen 5720 ( const char *path, /* no use when bzdopen */ 5721 int fd, /* no use when bzdopen */ 5722 const char *mode, 5723 int open_mode) /* bzopen: 0, bzdopen:1 */ 5724 { 5725 int bzerr; 5726 char unused[BZ_MAX_UNUSED]; 5727 int blockSize100k = 9; 5728 int writing = 0; 5729 char mode2[10] = ""; 5730 FILE *fp = NULL; 5731 BZFILE *bzfp = NULL; 5732 int verbosity = 0; 5733 int workFactor = 30; 5734 int smallMode = 0; 5735 int nUnused = 0; 5736 5737 if (mode == NULL) return NULL; 5738 while (*mode) { 5739 switch (*mode) { 5740 case 'r': 5741 writing = 0; break; 5742 case 'w': 5743 writing = 1; break; 5744 case 's': 5745 smallMode = 1; break; 5746 default: 5747 if (isdigit((int)(*mode))) { 5748 blockSize100k = *mode-BZ_HDR_0; 5749 } 5750 } 5751 mode++; 5752 } 5753 strcat(mode2, writing ? "w" : "r" ); 5754 strcat(mode2,"b"); /* binary mode */ 5755 5756 if (open_mode==0) { 5757 if (path==NULL || strcmp(path,"")==0) { 5758 fp = (writing ? stdout : stdin); 5759 SET_BINARY_MODE(fp); 5760 } else { 5761 fp = fopen(path,mode2); 5762 } 5763 } else { 5764 #ifdef BZ_STRICT_ANSI 5765 fp = NULL; 5766 #else 5767 fp = fdopen(fd,mode2); 5768 #endif 5769 } 5770 if (fp == NULL) return NULL; 5771 5772 if (writing) { 5773 /* Guard against total chaos and anarchy -- JRS */ 5774 if (blockSize100k < 1) blockSize100k = 1; 5775 if (blockSize100k > 9) blockSize100k = 9; 5776 bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k, 5777 verbosity,workFactor); 5778 } else { 5779 bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode, 5780 unused,nUnused); 5781 } 5782 if (bzfp == NULL) { 5783 if (fp != stdin && fp != stdout) fclose(fp); 5784 return NULL; 5785 } 5786 return bzfp; 5787 } 5788 5789 5790 /*---------------------------------------------------*/ 5791 /*-- 5792 open file for read or write. 5793 ex) bzopen("file","w9") 5794 case path="" or NULL => use stdin or stdout. 5795 --*/ 5796 BZFILE * BZ_API(BZ2_bzopen) 5797 ( const char *path, 5798 const char *mode ) 5799 { 5800 return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0); 5801 } 5802 5803 5804 /*---------------------------------------------------*/ 5805 BZFILE * BZ_API(BZ2_bzdopen) 5806 ( int fd, 5807 const char *mode ) 5808 { 5809 return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1); 5810 } 5811 5812 5813 /*---------------------------------------------------*/ 5814 int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len ) 5815 { 5816 int bzerr, nread; 5817 if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0; 5818 nread = BZ2_bzRead(&bzerr,b,buf,len); 5819 if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) { 5820 return nread; 5821 } else { 5822 return -1; 5823 } 5824 } 5825 5826 5827 /*---------------------------------------------------*/ 5828 int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len ) 5829 { 5830 int bzerr; 5831 5832 BZ2_bzWrite(&bzerr,b,buf,len); 5833 if(bzerr == BZ_OK){ 5834 return len; 5835 }else{ 5836 return -1; 5837 } 5838 } 5839 5840 5841 /*---------------------------------------------------*/ 5842 int BZ_API(BZ2_bzflush) (BZFILE *b) 5843 { 5844 /* do nothing now... */ 5845 return 0; 5846 } 5847 5848 5849 /*---------------------------------------------------*/ 5850 void BZ_API(BZ2_bzclose) (BZFILE* b) 5851 { 5852 int bzerr; 5853 FILE *fp = ((bzFile *)b)->handle; 5854 5855 if (b==NULL) {return;} 5856 if(((bzFile*)b)->writing){ 5857 BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL); 5858 if(bzerr != BZ_OK){ 5859 BZ2_bzWriteClose(NULL,b,1,NULL,NULL); 5860 } 5861 }else{ 5862 BZ2_bzReadClose(&bzerr,b); 5863 } 5864 if(fp!=stdin && fp!=stdout){ 5865 fclose(fp); 5866 } 5867 } 5868 5869 5870 /*---------------------------------------------------*/ 5871 /*-- 5872 return last error code 5873 --*/ 5874 static char *bzerrorstrings[] = { 5875 "OK" 5876 ,"SEQUENCE_ERROR" 5877 ,"PARAM_ERROR" 5878 ,"MEM_ERROR" 5879 ,"DATA_ERROR" 5880 ,"DATA_ERROR_MAGIC" 5881 ,"IO_ERROR" 5882 ,"UNEXPECTED_EOF" 5883 ,"OUTBUFF_FULL" 5884 ,"CONFIG_ERROR" 5885 ,"???" /* for future */ 5886 ,"???" /* for future */ 5887 ,"???" /* for future */ 5888 ,"???" /* for future */ 5889 ,"???" /* for future */ 5890 ,"???" /* for future */ 5891 }; 5892 5893 5894 const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum) 5895 { 5896 int err = ((bzFile *)b)->lastErr; 5897 5898 if(err>0) err = 0; 5899 *errnum = err; 5900 return bzerrorstrings[err*-1]; 5901 } 5902 #endif 5903 5904 5905 /*-------------------------------------------------------------*/ 5906 /*--- end bzlib.c ---*/ 5907 /*-------------------------------------------------------------*/ 5908 5909 5910 ///////////////////////////////////////////////////////////////////// 5911 ///////////////////////////////////////////////////////////////////// 5912 5913 5914 /* A test program written to test robustness to decompression of 5915 corrupted data. Usage is 5916 unzcrash filename 5917 and the program will read the specified file, compress it (in memory), 5918 and then repeatedly decompress it, each time with a different bit of 5919 the compressed data inverted, so as to test all possible one-bit errors. 5920 This should not cause any invalid memory accesses. If it does, 5921 I want to know about it! 5922 5923 p.s. As you can see from the above description, the process is 5924 incredibly slow. A file of size eg 5KB will cause it to run for 5925 many hours. 5926 */ 5927 5928 //#include <stdio.h> 5929 //#include <assert.h> 5930 //#include "bzlib.h" 5931 5932 #define M_BLOCK 1000000 5933 5934 typedef unsigned char uchar; 5935 5936 #define M_BLOCK_OUT (M_BLOCK + 1000000) 5937 uchar inbuf[M_BLOCK]; 5938 uchar outbuf[M_BLOCK_OUT]; 5939 uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)]; 5940 5941 int nIn, nOut, nZ; 5942 5943 static char *bzerrorstrings[] = { 5944 "OK" 5945 ,"SEQUENCE_ERROR" 5946 ,"PARAM_ERROR" 5947 ,"MEM_ERROR" 5948 ,"DATA_ERROR" 5949 ,"DATA_ERROR_MAGIC" 5950 ,"IO_ERROR" 5951 ,"UNEXPECTED_EOF" 5952 ,"OUTBUFF_FULL" 5953 ,"???" /* for future */ 5954 ,"???" /* for future */ 5955 ,"???" /* for future */ 5956 ,"???" /* for future */ 5957 ,"???" /* for future */ 5958 ,"???" /* for future */ 5959 }; 5960 5961 void flip_bit ( int bit ) 5962 { 5963 int byteno = bit / 8; 5964 int bitno = bit % 8; 5965 uchar mask = 1 << bitno; 5966 //fprintf ( stderr, "(byte %d bit %d mask %d)", 5967 // byteno, bitno, (int)mask ); 5968 zbuf[byteno] ^= mask; 5969 } 5970 5971 void set_inbuf ( void ) 5972 { 5973 inbuf[0] = 0; 5974 my_strcat(inbuf, "At her sixtieth birthday party, Margaret Thatcher "); 5975 my_strcat(inbuf, "blew on the cake to light the candles.\n"); 5976 my_strcat(inbuf, "This program, bzip2, the associated library libbzip2, and all\n"); 5977 my_strcat(inbuf, "documentation, are copyright (C) 1996-2004 Julian R Seward. All\n"); 5978 my_strcat(inbuf, "rights reserved.\n"); 5979 my_strcat(inbuf, "\n"); 5980 my_strcat(inbuf, "Redistribution and use in source and binary forms, with or without\n"); 5981 my_strcat(inbuf, "modification, are permitted provided that the following conditions\n"); 5982 my_strcat(inbuf, "are met:\n"); 5983 my_strcat(inbuf, "\n"); 5984 my_strcat(inbuf, "1. Redistributions of source code must retain the above copyright\n"); 5985 my_strcat(inbuf, " notice, this list of conditions and the following disclaimer.\n"); 5986 my_strcat(inbuf, "\n"); 5987 my_strcat(inbuf, "2. The origin of this software must not be misrepresented; you must\n"); 5988 my_strcat(inbuf, " not claim that you wrote the original software. If you use this\n"); 5989 my_strcat(inbuf, " software in a product, an acknowledgment in the product\n"); 5990 my_strcat(inbuf, " documentation would be appreciated but is not required.\n"); 5991 my_strcat(inbuf, "\n"); 5992 my_strcat(inbuf, "3. Altered source versions must be plainly marked as such, and must\n"); 5993 my_strcat(inbuf, " not be misrepresented as being the original software.\n"); 5994 my_strcat(inbuf, "\n"); 5995 my_strcat(inbuf, "4. The name of the author may not be used to endorse or promote\n"); 5996 my_strcat(inbuf, " products derived from this software without specific prior written\n"); 5997 my_strcat(inbuf, " permission.\n"); 5998 my_strcat(inbuf, "\n"); 5999 my_strcat(inbuf, "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS\n"); 6000 my_strcat(inbuf, "OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n"); 6001 my_strcat(inbuf, "WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n"); 6002 my_strcat(inbuf, "ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY\n"); 6003 my_strcat(inbuf, "DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n"); 6004 my_strcat(inbuf, "DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE\n"); 6005 my_strcat(inbuf, "GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n"); 6006 my_strcat(inbuf, "INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n"); 6007 my_strcat(inbuf, "WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n"); 6008 my_strcat(inbuf, "NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n"); 6009 my_strcat(inbuf, "SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"); 6010 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6011 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6012 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6013 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6014 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6015 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6016 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6017 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6018 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6019 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6020 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6021 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6022 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6023 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6024 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6025 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6026 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6027 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6028 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6029 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6030 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6031 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6032 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6033 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6034 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6035 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6036 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6037 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6038 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6039 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6040 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6041 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6042 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6043 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6044 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6045 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6046 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6047 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6048 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6049 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6050 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6051 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6052 my_strcat(inbuf, "\n"); 6053 } 6054 6055 6056 void entry ( HWord(*service)(HWord,HWord) ) 6057 { 6058 int r; 6059 int bit; 6060 int i; 6061 6062 serviceFn = service; 6063 6064 set_inbuf(); 6065 nIn = vexxx_strlen(inbuf)+1; 6066 vexxx_printf( "%d bytes read\n", nIn ); 6067 6068 nZ = M_BLOCK; 6069 r = BZ2_bzBuffToBuffCompress ( 6070 zbuf, &nZ, inbuf, nIn, 9, 4/*verb*/, 30 ); 6071 6072 if (r != BZ_OK) { 6073 vexxx_printf("initial compress failed!\n"); 6074 (*serviceFn)(0,0); 6075 } 6076 vexxx_printf( "%d after compression\n", nZ ); 6077 6078 for (bit = 0; bit < nZ*8; bit += (bit < 35 ? 1 : 377)) { 6079 vexxx_printf( "bit %d ", bit ); 6080 flip_bit ( bit ); 6081 nOut = M_BLOCK_OUT; 6082 r = BZ2_bzBuffToBuffDecompress ( 6083 outbuf, &nOut, zbuf, nZ, 1/*small*/, 0 ); 6084 vexxx_printf( " %d %s ", r, bzerrorstrings[-r] ); 6085 6086 if (r != BZ_OK) { 6087 vexxx_printf( "\n" ); 6088 } else { 6089 if (nOut != nIn) { 6090 vexxx_printf( "nIn/nOut mismatch %d %d\n", nIn, nOut ); 6091 (*serviceFn)(0,0); 6092 } else { 6093 for (i = 0; i < nOut; i++) 6094 if (inbuf[i] != outbuf[i]) { 6095 vexxx_printf( "mismatch at %d\n", i ); 6096 (*serviceFn)(0,0); 6097 } 6098 if (i == nOut) vexxx_printf( "really ok!\n" ); 6099 } 6100 } 6101 6102 flip_bit ( bit ); 6103 } 6104 6105 #if 0 6106 assert (nOut == nIn); 6107 for (i = 0; i < nOut; i++) { 6108 if (inbuf[i] != outbuf[i]) { 6109 vexxx_printf( "difference at %d !\n", i ); 6110 return 1; 6111 } 6112 } 6113 #endif 6114 6115 vexxx_printf( "all ok\n" ); 6116 (*serviceFn)(0,0); 6117 } 6118