1 /* $Id: tif_lzw.c,v 1.55 2017-05-17 09:38:58 erouault Exp $ */ 2 3 /* 4 * Copyright (c) 1988-1997 Sam Leffler 5 * Copyright (c) 1991-1997 Silicon Graphics, Inc. 6 * 7 * Permission to use, copy, modify, distribute, and sell this software and 8 * its documentation for any purpose is hereby granted without fee, provided 9 * that (i) the above copyright notices and this permission notice appear in 10 * all copies of the software and related documentation, and (ii) the names of 11 * Sam Leffler and Silicon Graphics may not be used in any advertising or 12 * publicity relating to the software without the specific, prior written 13 * permission of Sam Leffler and Silicon Graphics. 14 * 15 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 17 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 18 * 19 * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR 20 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, 21 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 22 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 23 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 24 * OF THIS SOFTWARE. 25 */ 26 27 #include "tiffiop.h" 28 #ifdef LZW_SUPPORT 29 /* 30 * TIFF Library. 31 * Rev 5.0 Lempel-Ziv & Welch Compression Support 32 * 33 * This code is derived from the compress program whose code is 34 * derived from software contributed to Berkeley by James A. Woods, 35 * derived from original work by Spencer Thomas and Joseph Orost. 36 * 37 * The original Berkeley copyright notice appears below in its entirety. 38 */ 39 #include "tif_predict.h" 40 41 #include <stdio.h> 42 43 /* 44 * NB: The 5.0 spec describes a different algorithm than Aldus 45 * implements. Specifically, Aldus does code length transitions 46 * one code earlier than should be done (for real LZW). 47 * Earlier versions of this library implemented the correct 48 * LZW algorithm, but emitted codes in a bit order opposite 49 * to the TIFF spec. Thus, to maintain compatibility w/ Aldus 50 * we interpret MSB-LSB ordered codes to be images written w/ 51 * old versions of this library, but otherwise adhere to the 52 * Aldus "off by one" algorithm. 53 * 54 * Future revisions to the TIFF spec are expected to "clarify this issue". 55 */ 56 #define LZW_COMPAT /* include backwards compatibility code */ 57 /* 58 * Each strip of data is supposed to be terminated by a CODE_EOI. 59 * If the following #define is included, the decoder will also 60 * check for end-of-strip w/o seeing this code. This makes the 61 * library more robust, but also slower. 62 */ 63 #define LZW_CHECKEOS /* include checks for strips w/o EOI code */ 64 65 #define MAXCODE(n) ((1L<<(n))-1) 66 /* 67 * The TIFF spec specifies that encoded bit 68 * strings range from 9 to 12 bits. 69 */ 70 #define BITS_MIN 9 /* start with 9 bits */ 71 #define BITS_MAX 12 /* max of 12 bit strings */ 72 /* predefined codes */ 73 #define CODE_CLEAR 256 /* code to clear string table */ 74 #define CODE_EOI 257 /* end-of-information code */ 75 #define CODE_FIRST 258 /* first free code entry */ 76 #define CODE_MAX MAXCODE(BITS_MAX) 77 #define HSIZE 9001L /* 91% occupancy */ 78 #define HSHIFT (13-8) 79 #ifdef LZW_COMPAT 80 /* NB: +1024 is for compatibility with old files */ 81 #define CSIZE (MAXCODE(BITS_MAX)+1024L) 82 #else 83 #define CSIZE (MAXCODE(BITS_MAX)+1L) 84 #endif 85 86 /* 87 * State block for each open TIFF file using LZW 88 * compression/decompression. Note that the predictor 89 * state block must be first in this data structure. 90 */ 91 typedef struct { 92 TIFFPredictorState predict; /* predictor super class */ 93 94 unsigned short nbits; /* # of bits/code */ 95 unsigned short maxcode; /* maximum code for lzw_nbits */ 96 unsigned short free_ent; /* next free entry in hash table */ 97 unsigned long nextdata; /* next bits of i/o */ 98 long nextbits; /* # of valid bits in lzw_nextdata */ 99 100 int rw_mode; /* preserve rw_mode from init */ 101 } LZWBaseState; 102 103 #define lzw_nbits base.nbits 104 #define lzw_maxcode base.maxcode 105 #define lzw_free_ent base.free_ent 106 #define lzw_nextdata base.nextdata 107 #define lzw_nextbits base.nextbits 108 109 /* 110 * Encoding-specific state. 111 */ 112 typedef uint16 hcode_t; /* codes fit in 16 bits */ 113 typedef struct { 114 long hash; 115 hcode_t code; 116 } hash_t; 117 118 /* 119 * Decoding-specific state. 120 */ 121 typedef struct code_ent { 122 struct code_ent *next; 123 unsigned short length; /* string len, including this token */ 124 unsigned char value; /* data value */ 125 unsigned char firstchar; /* first token of string */ 126 } code_t; 127 128 typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16); 129 130 typedef struct { 131 LZWBaseState base; 132 133 /* Decoding specific data */ 134 long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */ 135 long dec_restart; /* restart count */ 136 #ifdef LZW_CHECKEOS 137 uint64 dec_bitsleft; /* available bits in raw data */ 138 #endif 139 decodeFunc dec_decode; /* regular or backwards compatible */ 140 code_t* dec_codep; /* current recognized code */ 141 code_t* dec_oldcodep; /* previously recognized code */ 142 code_t* dec_free_entp; /* next free entry */ 143 code_t* dec_maxcodep; /* max available entry */ 144 code_t* dec_codetab; /* kept separate for small machines */ 145 146 /* Encoding specific data */ 147 int enc_oldcode; /* last code encountered */ 148 long enc_checkpoint; /* point at which to clear table */ 149 #define CHECK_GAP 10000 /* enc_ratio check interval */ 150 long enc_ratio; /* current compression ratio */ 151 long enc_incount; /* (input) data bytes encoded */ 152 long enc_outcount; /* encoded (output) bytes */ 153 uint8* enc_rawlimit; /* bound on tif_rawdata buffer */ 154 hash_t* enc_hashtab; /* kept separate for small machines */ 155 } LZWCodecState; 156 157 #define LZWState(tif) ((LZWBaseState*) (tif)->tif_data) 158 #define DecoderState(tif) ((LZWCodecState*) LZWState(tif)) 159 #define EncoderState(tif) ((LZWCodecState*) LZWState(tif)) 160 161 static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); 162 #ifdef LZW_COMPAT 163 static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); 164 #endif 165 static void cl_hash(LZWCodecState*); 166 167 /* 168 * LZW Decoder. 169 */ 170 171 #ifdef LZW_CHECKEOS 172 /* 173 * This check shouldn't be necessary because each 174 * strip is suppose to be terminated with CODE_EOI. 175 */ 176 #define NextCode(_tif, _sp, _bp, _code, _get) { \ 177 if ((_sp)->dec_bitsleft < (uint64)nbits) { \ 178 TIFFWarningExt(_tif->tif_clientdata, module, \ 179 "LZWDecode: Strip %d not terminated with EOI code", \ 180 _tif->tif_curstrip); \ 181 _code = CODE_EOI; \ 182 } else { \ 183 _get(_sp,_bp,_code); \ 184 (_sp)->dec_bitsleft -= nbits; \ 185 } \ 186 } 187 #else 188 #define NextCode(tif, sp, bp, code, get) get(sp, bp, code) 189 #endif 190 191 static int 192 LZWFixupTags(TIFF* tif) 193 { 194 (void) tif; 195 return (1); 196 } 197 198 static int 199 LZWSetupDecode(TIFF* tif) 200 { 201 static const char module[] = "LZWSetupDecode"; 202 LZWCodecState* sp = DecoderState(tif); 203 int code; 204 205 if( sp == NULL ) 206 { 207 /* 208 * Allocate state block so tag methods have storage to record 209 * values. 210 */ 211 tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState)); 212 if (tif->tif_data == NULL) 213 { 214 TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block"); 215 return (0); 216 } 217 218 DecoderState(tif)->dec_codetab = NULL; 219 DecoderState(tif)->dec_decode = NULL; 220 221 /* 222 * Setup predictor setup. 223 */ 224 (void) TIFFPredictorInit(tif); 225 226 sp = DecoderState(tif); 227 } 228 229 assert(sp != NULL); 230 231 if (sp->dec_codetab == NULL) { 232 sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t)); 233 if (sp->dec_codetab == NULL) { 234 TIFFErrorExt(tif->tif_clientdata, module, 235 "No space for LZW code table"); 236 return (0); 237 } 238 /* 239 * Pre-load the table. 240 */ 241 code = 255; 242 do { 243 sp->dec_codetab[code].value = (unsigned char)code; 244 sp->dec_codetab[code].firstchar = (unsigned char)code; 245 sp->dec_codetab[code].length = 1; 246 sp->dec_codetab[code].next = NULL; 247 } while (code--); 248 /* 249 * Zero-out the unused entries 250 */ 251 _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0, 252 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t)); 253 } 254 return (1); 255 } 256 257 /* 258 * Setup state for decoding a strip. 259 */ 260 static int 261 LZWPreDecode(TIFF* tif, uint16 s) 262 { 263 static const char module[] = "LZWPreDecode"; 264 LZWCodecState *sp = DecoderState(tif); 265 266 (void) s; 267 assert(sp != NULL); 268 if( sp->dec_codetab == NULL ) 269 { 270 tif->tif_setupdecode( tif ); 271 if( sp->dec_codetab == NULL ) 272 return (0); 273 } 274 275 /* 276 * Check for old bit-reversed codes. 277 */ 278 if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) { 279 #ifdef LZW_COMPAT 280 if (!sp->dec_decode) { 281 TIFFWarningExt(tif->tif_clientdata, module, 282 "Old-style LZW codes, convert file"); 283 /* 284 * Override default decoding methods with 285 * ones that deal with the old coding. 286 * Otherwise the predictor versions set 287 * above will call the compatibility routines 288 * through the dec_decode method. 289 */ 290 tif->tif_decoderow = LZWDecodeCompat; 291 tif->tif_decodestrip = LZWDecodeCompat; 292 tif->tif_decodetile = LZWDecodeCompat; 293 /* 294 * If doing horizontal differencing, must 295 * re-setup the predictor logic since we 296 * switched the basic decoder methods... 297 */ 298 (*tif->tif_setupdecode)(tif); 299 sp->dec_decode = LZWDecodeCompat; 300 } 301 sp->lzw_maxcode = MAXCODE(BITS_MIN); 302 #else /* !LZW_COMPAT */ 303 if (!sp->dec_decode) { 304 TIFFErrorExt(tif->tif_clientdata, module, 305 "Old-style LZW codes not supported"); 306 sp->dec_decode = LZWDecode; 307 } 308 return (0); 309 #endif/* !LZW_COMPAT */ 310 } else { 311 sp->lzw_maxcode = MAXCODE(BITS_MIN)-1; 312 sp->dec_decode = LZWDecode; 313 } 314 sp->lzw_nbits = BITS_MIN; 315 sp->lzw_nextbits = 0; 316 sp->lzw_nextdata = 0; 317 318 sp->dec_restart = 0; 319 sp->dec_nbitsmask = MAXCODE(BITS_MIN); 320 #ifdef LZW_CHECKEOS 321 sp->dec_bitsleft = 0; 322 #endif 323 sp->dec_free_entp = sp->dec_codetab + CODE_FIRST; 324 /* 325 * Zero entries that are not yet filled in. We do 326 * this to guard against bogus input data that causes 327 * us to index into undefined entries. If you can 328 * come up with a way to safely bounds-check input codes 329 * while decoding then you can remove this operation. 330 */ 331 _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t)); 332 sp->dec_oldcodep = &sp->dec_codetab[-1]; 333 sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1]; 334 return (1); 335 } 336 337 /* 338 * Decode a "hunk of data". 339 */ 340 #define GetNextCode(sp, bp, code) { \ 341 nextdata = (nextdata<<8) | *(bp)++; \ 342 nextbits += 8; \ 343 if (nextbits < nbits) { \ 344 nextdata = (nextdata<<8) | *(bp)++; \ 345 nextbits += 8; \ 346 } \ 347 code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \ 348 nextbits -= nbits; \ 349 } 350 351 static void 352 codeLoop(TIFF* tif, const char* module) 353 { 354 TIFFErrorExt(tif->tif_clientdata, module, 355 "Bogus encoding, loop in the code table; scanline %d", 356 tif->tif_row); 357 } 358 359 static int 360 LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) 361 { 362 static const char module[] = "LZWDecode"; 363 LZWCodecState *sp = DecoderState(tif); 364 char *op = (char*) op0; 365 long occ = (long) occ0; 366 char *tp; 367 unsigned char *bp; 368 hcode_t code; 369 int len; 370 long nbits, nextbits, nbitsmask; 371 unsigned long nextdata; 372 code_t *codep, *free_entp, *maxcodep, *oldcodep; 373 374 (void) s; 375 assert(sp != NULL); 376 assert(sp->dec_codetab != NULL); 377 378 /* 379 Fail if value does not fit in long. 380 */ 381 if ((tmsize_t) occ != occ0) 382 return (0); 383 /* 384 * Restart interrupted output operation. 385 */ 386 if (sp->dec_restart) { 387 long residue; 388 389 codep = sp->dec_codep; 390 residue = codep->length - sp->dec_restart; 391 if (residue > occ) { 392 /* 393 * Residue from previous decode is sufficient 394 * to satisfy decode request. Skip to the 395 * start of the decoded string, place decoded 396 * values in the output buffer, and return. 397 */ 398 sp->dec_restart += occ; 399 do { 400 codep = codep->next; 401 } while (--residue > occ && codep); 402 if (codep) { 403 tp = op + occ; 404 do { 405 *--tp = codep->value; 406 codep = codep->next; 407 } while (--occ && codep); 408 } 409 return (1); 410 } 411 /* 412 * Residue satisfies only part of the decode request. 413 */ 414 op += residue; 415 occ -= residue; 416 tp = op; 417 do { 418 int t; 419 --tp; 420 t = codep->value; 421 codep = codep->next; 422 *tp = (char)t; 423 } while (--residue && codep); 424 sp->dec_restart = 0; 425 } 426 427 bp = (unsigned char *)tif->tif_rawcp; 428 #ifdef LZW_CHECKEOS 429 sp->dec_bitsleft = (((uint64)tif->tif_rawcc) << 3); 430 #endif 431 nbits = sp->lzw_nbits; 432 nextdata = sp->lzw_nextdata; 433 nextbits = sp->lzw_nextbits; 434 nbitsmask = sp->dec_nbitsmask; 435 oldcodep = sp->dec_oldcodep; 436 free_entp = sp->dec_free_entp; 437 maxcodep = sp->dec_maxcodep; 438 439 while (occ > 0) { 440 NextCode(tif, sp, bp, code, GetNextCode); 441 if (code == CODE_EOI) 442 break; 443 if (code == CODE_CLEAR) { 444 do { 445 free_entp = sp->dec_codetab + CODE_FIRST; 446 _TIFFmemset(free_entp, 0, 447 (CSIZE - CODE_FIRST) * sizeof (code_t)); 448 nbits = BITS_MIN; 449 nbitsmask = MAXCODE(BITS_MIN); 450 maxcodep = sp->dec_codetab + nbitsmask-1; 451 NextCode(tif, sp, bp, code, GetNextCode); 452 } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */ 453 if (code == CODE_EOI) 454 break; 455 if (code > CODE_CLEAR) { 456 TIFFErrorExt(tif->tif_clientdata, tif->tif_name, 457 "LZWDecode: Corrupted LZW table at scanline %d", 458 tif->tif_row); 459 return (0); 460 } 461 *op++ = (char)code; 462 occ--; 463 oldcodep = sp->dec_codetab + code; 464 continue; 465 } 466 codep = sp->dec_codetab + code; 467 468 /* 469 * Add the new entry to the code table. 470 */ 471 if (free_entp < &sp->dec_codetab[0] || 472 free_entp >= &sp->dec_codetab[CSIZE]) { 473 TIFFErrorExt(tif->tif_clientdata, module, 474 "Corrupted LZW table at scanline %d", 475 tif->tif_row); 476 return (0); 477 } 478 479 free_entp->next = oldcodep; 480 if (free_entp->next < &sp->dec_codetab[0] || 481 free_entp->next >= &sp->dec_codetab[CSIZE]) { 482 TIFFErrorExt(tif->tif_clientdata, module, 483 "Corrupted LZW table at scanline %d", 484 tif->tif_row); 485 return (0); 486 } 487 free_entp->firstchar = free_entp->next->firstchar; 488 free_entp->length = free_entp->next->length+1; 489 free_entp->value = (codep < free_entp) ? 490 codep->firstchar : free_entp->firstchar; 491 if (++free_entp > maxcodep) { 492 if (++nbits > BITS_MAX) /* should not happen */ 493 nbits = BITS_MAX; 494 nbitsmask = MAXCODE(nbits); 495 maxcodep = sp->dec_codetab + nbitsmask-1; 496 } 497 oldcodep = codep; 498 if (code >= 256) { 499 /* 500 * Code maps to a string, copy string 501 * value to output (written in reverse). 502 */ 503 if(codep->length == 0) { 504 TIFFErrorExt(tif->tif_clientdata, module, 505 "Wrong length of decoded string: " 506 "data probably corrupted at scanline %d", 507 tif->tif_row); 508 return (0); 509 } 510 if (codep->length > occ) { 511 /* 512 * String is too long for decode buffer, 513 * locate portion that will fit, copy to 514 * the decode buffer, and setup restart 515 * logic for the next decoding call. 516 */ 517 sp->dec_codep = codep; 518 do { 519 codep = codep->next; 520 } while (codep && codep->length > occ); 521 if (codep) { 522 sp->dec_restart = (long)occ; 523 tp = op + occ; 524 do { 525 *--tp = codep->value; 526 codep = codep->next; 527 } while (--occ && codep); 528 if (codep) 529 codeLoop(tif, module); 530 } 531 break; 532 } 533 len = codep->length; 534 tp = op + len; 535 do { 536 int t; 537 --tp; 538 t = codep->value; 539 codep = codep->next; 540 *tp = (char)t; 541 } while (codep && tp > op); 542 if (codep) { 543 codeLoop(tif, module); 544 break; 545 } 546 assert(occ >= len); 547 op += len; 548 occ -= len; 549 } else { 550 *op++ = (char)code; 551 occ--; 552 } 553 } 554 555 tif->tif_rawcc -= (tmsize_t)( (uint8*) bp - tif->tif_rawcp ); 556 tif->tif_rawcp = (uint8*) bp; 557 sp->lzw_nbits = (unsigned short) nbits; 558 sp->lzw_nextdata = nextdata; 559 sp->lzw_nextbits = nextbits; 560 sp->dec_nbitsmask = nbitsmask; 561 sp->dec_oldcodep = oldcodep; 562 sp->dec_free_entp = free_entp; 563 sp->dec_maxcodep = maxcodep; 564 565 if (occ > 0) { 566 #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) 567 TIFFErrorExt(tif->tif_clientdata, module, 568 "Not enough data at scanline %d (short %I64d bytes)", 569 tif->tif_row, (unsigned __int64) occ); 570 #else 571 TIFFErrorExt(tif->tif_clientdata, module, 572 "Not enough data at scanline %d (short %llu bytes)", 573 tif->tif_row, (unsigned long long) occ); 574 #endif 575 return (0); 576 } 577 return (1); 578 } 579 580 #ifdef LZW_COMPAT 581 /* 582 * Decode a "hunk of data" for old images. 583 */ 584 #define GetNextCodeCompat(sp, bp, code) { \ 585 nextdata |= (unsigned long) *(bp)++ << nextbits; \ 586 nextbits += 8; \ 587 if (nextbits < nbits) { \ 588 nextdata |= (unsigned long) *(bp)++ << nextbits;\ 589 nextbits += 8; \ 590 } \ 591 code = (hcode_t)(nextdata & nbitsmask); \ 592 nextdata >>= nbits; \ 593 nextbits -= nbits; \ 594 } 595 596 static int 597 LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) 598 { 599 static const char module[] = "LZWDecodeCompat"; 600 LZWCodecState *sp = DecoderState(tif); 601 char *op = (char*) op0; 602 long occ = (long) occ0; 603 char *tp; 604 unsigned char *bp; 605 int code, nbits; 606 long nextbits, nextdata, nbitsmask; 607 code_t *codep, *free_entp, *maxcodep, *oldcodep; 608 609 (void) s; 610 assert(sp != NULL); 611 612 /* 613 Fail if value does not fit in long. 614 */ 615 if ((tmsize_t) occ != occ0) 616 return (0); 617 618 /* 619 * Restart interrupted output operation. 620 */ 621 if (sp->dec_restart) { 622 long residue; 623 624 codep = sp->dec_codep; 625 residue = codep->length - sp->dec_restart; 626 if (residue > occ) { 627 /* 628 * Residue from previous decode is sufficient 629 * to satisfy decode request. Skip to the 630 * start of the decoded string, place decoded 631 * values in the output buffer, and return. 632 */ 633 sp->dec_restart += occ; 634 do { 635 codep = codep->next; 636 } while (--residue > occ); 637 tp = op + occ; 638 do { 639 *--tp = codep->value; 640 codep = codep->next; 641 } while (--occ); 642 return (1); 643 } 644 /* 645 * Residue satisfies only part of the decode request. 646 */ 647 op += residue; 648 occ -= residue; 649 tp = op; 650 do { 651 *--tp = codep->value; 652 codep = codep->next; 653 } while (--residue); 654 sp->dec_restart = 0; 655 } 656 657 bp = (unsigned char *)tif->tif_rawcp; 658 nbits = sp->lzw_nbits; 659 nextdata = sp->lzw_nextdata; 660 nextbits = sp->lzw_nextbits; 661 nbitsmask = sp->dec_nbitsmask; 662 oldcodep = sp->dec_oldcodep; 663 free_entp = sp->dec_free_entp; 664 maxcodep = sp->dec_maxcodep; 665 666 while (occ > 0) { 667 NextCode(tif, sp, bp, code, GetNextCodeCompat); 668 if (code == CODE_EOI) 669 break; 670 if (code == CODE_CLEAR) { 671 do { 672 free_entp = sp->dec_codetab + CODE_FIRST; 673 _TIFFmemset(free_entp, 0, 674 (CSIZE - CODE_FIRST) * sizeof (code_t)); 675 nbits = BITS_MIN; 676 nbitsmask = MAXCODE(BITS_MIN); 677 maxcodep = sp->dec_codetab + nbitsmask; 678 NextCode(tif, sp, bp, code, GetNextCodeCompat); 679 } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */ 680 if (code == CODE_EOI) 681 break; 682 if (code > CODE_CLEAR) { 683 TIFFErrorExt(tif->tif_clientdata, tif->tif_name, 684 "LZWDecode: Corrupted LZW table at scanline %d", 685 tif->tif_row); 686 return (0); 687 } 688 *op++ = (char)code; 689 occ--; 690 oldcodep = sp->dec_codetab + code; 691 continue; 692 } 693 codep = sp->dec_codetab + code; 694 695 /* 696 * Add the new entry to the code table. 697 */ 698 if (free_entp < &sp->dec_codetab[0] || 699 free_entp >= &sp->dec_codetab[CSIZE]) { 700 TIFFErrorExt(tif->tif_clientdata, module, 701 "Corrupted LZW table at scanline %d", tif->tif_row); 702 return (0); 703 } 704 705 free_entp->next = oldcodep; 706 if (free_entp->next < &sp->dec_codetab[0] || 707 free_entp->next >= &sp->dec_codetab[CSIZE]) { 708 TIFFErrorExt(tif->tif_clientdata, module, 709 "Corrupted LZW table at scanline %d", tif->tif_row); 710 return (0); 711 } 712 free_entp->firstchar = free_entp->next->firstchar; 713 free_entp->length = free_entp->next->length+1; 714 free_entp->value = (codep < free_entp) ? 715 codep->firstchar : free_entp->firstchar; 716 if (++free_entp > maxcodep) { 717 if (++nbits > BITS_MAX) /* should not happen */ 718 nbits = BITS_MAX; 719 nbitsmask = MAXCODE(nbits); 720 maxcodep = sp->dec_codetab + nbitsmask; 721 } 722 oldcodep = codep; 723 if (code >= 256) { 724 /* 725 * Code maps to a string, copy string 726 * value to output (written in reverse). 727 */ 728 if(codep->length == 0) { 729 TIFFErrorExt(tif->tif_clientdata, module, 730 "Wrong length of decoded " 731 "string: data probably corrupted at scanline %d", 732 tif->tif_row); 733 return (0); 734 } 735 if (codep->length > occ) { 736 /* 737 * String is too long for decode buffer, 738 * locate portion that will fit, copy to 739 * the decode buffer, and setup restart 740 * logic for the next decoding call. 741 */ 742 sp->dec_codep = codep; 743 do { 744 codep = codep->next; 745 } while (codep->length > occ); 746 sp->dec_restart = occ; 747 tp = op + occ; 748 do { 749 *--tp = codep->value; 750 codep = codep->next; 751 } while (--occ); 752 break; 753 } 754 assert(occ >= codep->length); 755 op += codep->length; 756 occ -= codep->length; 757 tp = op; 758 do { 759 *--tp = codep->value; 760 } while( (codep = codep->next) != NULL ); 761 } else { 762 *op++ = (char)code; 763 occ--; 764 } 765 } 766 767 tif->tif_rawcp = (uint8*) bp; 768 sp->lzw_nbits = (unsigned short)nbits; 769 sp->lzw_nextdata = nextdata; 770 sp->lzw_nextbits = nextbits; 771 sp->dec_nbitsmask = nbitsmask; 772 sp->dec_oldcodep = oldcodep; 773 sp->dec_free_entp = free_entp; 774 sp->dec_maxcodep = maxcodep; 775 776 if (occ > 0) { 777 #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) 778 TIFFErrorExt(tif->tif_clientdata, module, 779 "Not enough data at scanline %d (short %I64d bytes)", 780 tif->tif_row, (unsigned __int64) occ); 781 #else 782 TIFFErrorExt(tif->tif_clientdata, module, 783 "Not enough data at scanline %d (short %llu bytes)", 784 tif->tif_row, (unsigned long long) occ); 785 #endif 786 return (0); 787 } 788 return (1); 789 } 790 #endif /* LZW_COMPAT */ 791 792 /* 793 * LZW Encoding. 794 */ 795 796 static int 797 LZWSetupEncode(TIFF* tif) 798 { 799 static const char module[] = "LZWSetupEncode"; 800 LZWCodecState* sp = EncoderState(tif); 801 802 assert(sp != NULL); 803 sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t)); 804 if (sp->enc_hashtab == NULL) { 805 TIFFErrorExt(tif->tif_clientdata, module, 806 "No space for LZW hash table"); 807 return (0); 808 } 809 return (1); 810 } 811 812 /* 813 * Reset encoding state at the start of a strip. 814 */ 815 static int 816 LZWPreEncode(TIFF* tif, uint16 s) 817 { 818 LZWCodecState *sp = EncoderState(tif); 819 820 (void) s; 821 assert(sp != NULL); 822 823 if( sp->enc_hashtab == NULL ) 824 { 825 tif->tif_setupencode( tif ); 826 } 827 828 sp->lzw_nbits = BITS_MIN; 829 sp->lzw_maxcode = MAXCODE(BITS_MIN); 830 sp->lzw_free_ent = CODE_FIRST; 831 sp->lzw_nextbits = 0; 832 sp->lzw_nextdata = 0; 833 sp->enc_checkpoint = CHECK_GAP; 834 sp->enc_ratio = 0; 835 sp->enc_incount = 0; 836 sp->enc_outcount = 0; 837 /* 838 * The 4 here insures there is space for 2 max-sized 839 * codes in LZWEncode and LZWPostDecode. 840 */ 841 sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4; 842 cl_hash(sp); /* clear hash table */ 843 sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */ 844 return (1); 845 } 846 847 #define CALCRATIO(sp, rat) { \ 848 if (incount > 0x007fffff) { /* NB: shift will overflow */\ 849 rat = outcount >> 8; \ 850 rat = (rat == 0 ? 0x7fffffff : incount/rat); \ 851 } else \ 852 rat = (incount<<8) / outcount; \ 853 } 854 855 /* Explicit 0xff masking to make icc -check=conversions happy */ 856 #define PutNextCode(op, c) { \ 857 nextdata = (nextdata << nbits) | c; \ 858 nextbits += nbits; \ 859 *op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff); \ 860 nextbits -= 8; \ 861 if (nextbits >= 8) { \ 862 *op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff); \ 863 nextbits -= 8; \ 864 } \ 865 outcount += nbits; \ 866 } 867 868 /* 869 * Encode a chunk of pixels. 870 * 871 * Uses an open addressing double hashing (no chaining) on the 872 * prefix code/next character combination. We do a variant of 873 * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's 874 * relatively-prime secondary probe. Here, the modular division 875 * first probe is gives way to a faster exclusive-or manipulation. 876 * Also do block compression with an adaptive reset, whereby the 877 * code table is cleared when the compression ratio decreases, 878 * but after the table fills. The variable-length output codes 879 * are re-sized at this point, and a CODE_CLEAR is generated 880 * for the decoder. 881 */ 882 static int 883 LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) 884 { 885 register LZWCodecState *sp = EncoderState(tif); 886 register long fcode; 887 register hash_t *hp; 888 register int h, c; 889 hcode_t ent; 890 long disp; 891 long incount, outcount, checkpoint; 892 unsigned long nextdata; 893 long nextbits; 894 int free_ent, maxcode, nbits; 895 uint8* op; 896 uint8* limit; 897 898 (void) s; 899 if (sp == NULL) 900 return (0); 901 902 assert(sp->enc_hashtab != NULL); 903 904 /* 905 * Load local state. 906 */ 907 incount = sp->enc_incount; 908 outcount = sp->enc_outcount; 909 checkpoint = sp->enc_checkpoint; 910 nextdata = sp->lzw_nextdata; 911 nextbits = sp->lzw_nextbits; 912 free_ent = sp->lzw_free_ent; 913 maxcode = sp->lzw_maxcode; 914 nbits = sp->lzw_nbits; 915 op = tif->tif_rawcp; 916 limit = sp->enc_rawlimit; 917 ent = (hcode_t)sp->enc_oldcode; 918 919 if (ent == (hcode_t) -1 && cc > 0) { 920 /* 921 * NB: This is safe because it can only happen 922 * at the start of a strip where we know there 923 * is space in the data buffer. 924 */ 925 PutNextCode(op, CODE_CLEAR); 926 ent = *bp++; cc--; incount++; 927 } 928 while (cc > 0) { 929 c = *bp++; cc--; incount++; 930 fcode = ((long)c << BITS_MAX) + ent; 931 h = (c << HSHIFT) ^ ent; /* xor hashing */ 932 #ifdef _WINDOWS 933 /* 934 * Check hash index for an overflow. 935 */ 936 if (h >= HSIZE) 937 h -= HSIZE; 938 #endif 939 hp = &sp->enc_hashtab[h]; 940 if (hp->hash == fcode) { 941 ent = hp->code; 942 continue; 943 } 944 if (hp->hash >= 0) { 945 /* 946 * Primary hash failed, check secondary hash. 947 */ 948 disp = HSIZE - h; 949 if (h == 0) 950 disp = 1; 951 do { 952 /* 953 * Avoid pointer arithmetic because of 954 * wraparound problems with segments. 955 */ 956 if ((h -= disp) < 0) 957 h += HSIZE; 958 hp = &sp->enc_hashtab[h]; 959 if (hp->hash == fcode) { 960 ent = hp->code; 961 goto hit; 962 } 963 } while (hp->hash >= 0); 964 } 965 /* 966 * New entry, emit code and add to table. 967 */ 968 /* 969 * Verify there is space in the buffer for the code 970 * and any potential Clear code that might be emitted 971 * below. The value of limit is setup so that there 972 * are at least 4 bytes free--room for 2 codes. 973 */ 974 if (op > limit) { 975 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 976 if( !TIFFFlushData1(tif) ) 977 return 0; 978 op = tif->tif_rawdata; 979 } 980 PutNextCode(op, ent); 981 ent = (hcode_t)c; 982 hp->code = (hcode_t)(free_ent++); 983 hp->hash = fcode; 984 if (free_ent == CODE_MAX-1) { 985 /* table is full, emit clear code and reset */ 986 cl_hash(sp); 987 sp->enc_ratio = 0; 988 incount = 0; 989 outcount = 0; 990 free_ent = CODE_FIRST; 991 PutNextCode(op, CODE_CLEAR); 992 nbits = BITS_MIN; 993 maxcode = MAXCODE(BITS_MIN); 994 } else { 995 /* 996 * If the next entry is going to be too big for 997 * the code size, then increase it, if possible. 998 */ 999 if (free_ent > maxcode) { 1000 nbits++; 1001 assert(nbits <= BITS_MAX); 1002 maxcode = (int) MAXCODE(nbits); 1003 } else if (incount >= checkpoint) { 1004 long rat; 1005 /* 1006 * Check compression ratio and, if things seem 1007 * to be slipping, clear the hash table and 1008 * reset state. The compression ratio is a 1009 * 24+8-bit fractional number. 1010 */ 1011 checkpoint = incount+CHECK_GAP; 1012 CALCRATIO(sp, rat); 1013 if (rat <= sp->enc_ratio) { 1014 cl_hash(sp); 1015 sp->enc_ratio = 0; 1016 incount = 0; 1017 outcount = 0; 1018 free_ent = CODE_FIRST; 1019 PutNextCode(op, CODE_CLEAR); 1020 nbits = BITS_MIN; 1021 maxcode = MAXCODE(BITS_MIN); 1022 } else 1023 sp->enc_ratio = rat; 1024 } 1025 } 1026 hit: 1027 ; 1028 } 1029 1030 /* 1031 * Restore global state. 1032 */ 1033 sp->enc_incount = incount; 1034 sp->enc_outcount = outcount; 1035 sp->enc_checkpoint = checkpoint; 1036 sp->enc_oldcode = ent; 1037 sp->lzw_nextdata = nextdata; 1038 sp->lzw_nextbits = nextbits; 1039 sp->lzw_free_ent = (unsigned short)free_ent; 1040 sp->lzw_maxcode = (unsigned short)maxcode; 1041 sp->lzw_nbits = (unsigned short)nbits; 1042 tif->tif_rawcp = op; 1043 return (1); 1044 } 1045 1046 /* 1047 * Finish off an encoded strip by flushing the last 1048 * string and tacking on an End Of Information code. 1049 */ 1050 static int 1051 LZWPostEncode(TIFF* tif) 1052 { 1053 register LZWCodecState *sp = EncoderState(tif); 1054 uint8* op = tif->tif_rawcp; 1055 long nextbits = sp->lzw_nextbits; 1056 unsigned long nextdata = sp->lzw_nextdata; 1057 long outcount = sp->enc_outcount; 1058 int nbits = sp->lzw_nbits; 1059 1060 if (op > sp->enc_rawlimit) { 1061 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 1062 if( !TIFFFlushData1(tif) ) 1063 return 0; 1064 op = tif->tif_rawdata; 1065 } 1066 if (sp->enc_oldcode != (hcode_t) -1) { 1067 int free_ent = sp->lzw_free_ent; 1068 1069 PutNextCode(op, sp->enc_oldcode); 1070 sp->enc_oldcode = (hcode_t) -1; 1071 free_ent ++; 1072 1073 if (free_ent == CODE_MAX-1) { 1074 /* table is full, emit clear code and reset */ 1075 outcount = 0; 1076 PutNextCode(op, CODE_CLEAR); 1077 nbits = BITS_MIN; 1078 } else { 1079 /* 1080 * If the next entry is going to be too big for 1081 * the code size, then increase it, if possible. 1082 */ 1083 if (free_ent > sp->lzw_maxcode) { 1084 nbits++; 1085 assert(nbits <= BITS_MAX); 1086 } 1087 } 1088 } 1089 PutNextCode(op, CODE_EOI); 1090 /* Explicit 0xff masking to make icc -check=conversions happy */ 1091 if (nextbits > 0) 1092 *op++ = (unsigned char)((nextdata << (8-nextbits))&0xff); 1093 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 1094 return (1); 1095 } 1096 1097 /* 1098 * Reset encoding hash table. 1099 */ 1100 static void 1101 cl_hash(LZWCodecState* sp) 1102 { 1103 register hash_t *hp = &sp->enc_hashtab[HSIZE-1]; 1104 register long i = HSIZE-8; 1105 1106 do { 1107 i -= 8; 1108 hp[-7].hash = -1; 1109 hp[-6].hash = -1; 1110 hp[-5].hash = -1; 1111 hp[-4].hash = -1; 1112 hp[-3].hash = -1; 1113 hp[-2].hash = -1; 1114 hp[-1].hash = -1; 1115 hp[ 0].hash = -1; 1116 hp -= 8; 1117 } while (i >= 0); 1118 for (i += 8; i > 0; i--, hp--) 1119 hp->hash = -1; 1120 } 1121 1122 static void 1123 LZWCleanup(TIFF* tif) 1124 { 1125 (void)TIFFPredictorCleanup(tif); 1126 1127 assert(tif->tif_data != 0); 1128 1129 if (DecoderState(tif)->dec_codetab) 1130 _TIFFfree(DecoderState(tif)->dec_codetab); 1131 1132 if (EncoderState(tif)->enc_hashtab) 1133 _TIFFfree(EncoderState(tif)->enc_hashtab); 1134 1135 _TIFFfree(tif->tif_data); 1136 tif->tif_data = NULL; 1137 1138 _TIFFSetDefaultCompressionState(tif); 1139 } 1140 1141 int 1142 TIFFInitLZW(TIFF* tif, int scheme) 1143 { 1144 static const char module[] = "TIFFInitLZW"; 1145 assert(scheme == COMPRESSION_LZW); 1146 /* 1147 * Allocate state block so tag methods have storage to record values. 1148 */ 1149 tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState)); 1150 if (tif->tif_data == NULL) 1151 goto bad; 1152 DecoderState(tif)->dec_codetab = NULL; 1153 DecoderState(tif)->dec_decode = NULL; 1154 EncoderState(tif)->enc_hashtab = NULL; 1155 LZWState(tif)->rw_mode = tif->tif_mode; 1156 1157 /* 1158 * Install codec methods. 1159 */ 1160 tif->tif_fixuptags = LZWFixupTags; 1161 tif->tif_setupdecode = LZWSetupDecode; 1162 tif->tif_predecode = LZWPreDecode; 1163 tif->tif_decoderow = LZWDecode; 1164 tif->tif_decodestrip = LZWDecode; 1165 tif->tif_decodetile = LZWDecode; 1166 tif->tif_setupencode = LZWSetupEncode; 1167 tif->tif_preencode = LZWPreEncode; 1168 tif->tif_postencode = LZWPostEncode; 1169 tif->tif_encoderow = LZWEncode; 1170 tif->tif_encodestrip = LZWEncode; 1171 tif->tif_encodetile = LZWEncode; 1172 tif->tif_cleanup = LZWCleanup; 1173 /* 1174 * Setup predictor setup. 1175 */ 1176 (void) TIFFPredictorInit(tif); 1177 return (1); 1178 bad: 1179 TIFFErrorExt(tif->tif_clientdata, module, 1180 "No space for LZW state block"); 1181 return (0); 1182 } 1183 1184 /* 1185 * Copyright (c) 1985, 1986 The Regents of the University of California. 1186 * All rights reserved. 1187 * 1188 * This code is derived from software contributed to Berkeley by 1189 * James A. Woods, derived from original work by Spencer Thomas 1190 * and Joseph Orost. 1191 * 1192 * Redistribution and use in source and binary forms are permitted 1193 * provided that the above copyright notice and this paragraph are 1194 * duplicated in all such forms and that any documentation, 1195 * advertising materials, and other materials related to such 1196 * distribution and use acknowledge that the software was developed 1197 * by the University of California, Berkeley. The name of the 1198 * University may not be used to endorse or promote products derived 1199 * from this software without specific prior written permission. 1200 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1201 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1202 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1203 */ 1204 #endif /* LZW_SUPPORT */ 1205 1206 /* vim: set ts=8 sts=8 sw=8 noet: */ 1207 /* 1208 * Local Variables: 1209 * mode: c 1210 * c-basic-offset: 8 1211 * fill-column: 78 1212 * End: 1213 */ 1214