1 /* $Id: tif_lzw.c,v 1.49 2015-08-30 21:07:44 erouault Exp $ */ 2 3 /* 4 * Copyright (c) 1988-1997 Sam Leffler 5 * Copyright (c) 1991-1997 Silicon Graphics, Inc. 6 * 7 * Permission to use, copy, modify, distribute, and sell this software and 8 * its documentation for any purpose is hereby granted without fee, provided 9 * that (i) the above copyright notices and this permission notice appear in 10 * all copies of the software and related documentation, and (ii) the names of 11 * Sam Leffler and Silicon Graphics may not be used in any advertising or 12 * publicity relating to the software without the specific, prior written 13 * permission of Sam Leffler and Silicon Graphics. 14 * 15 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 17 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 18 * 19 * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR 20 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, 21 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 22 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 23 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 24 * OF THIS SOFTWARE. 25 */ 26 27 #include "tiffiop.h" 28 #ifdef LZW_SUPPORT 29 /* 30 * TIFF Library. 31 * Rev 5.0 Lempel-Ziv & Welch Compression Support 32 * 33 * This code is derived from the compress program whose code is 34 * derived from software contributed to Berkeley by James A. Woods, 35 * derived from original work by Spencer Thomas and Joseph Orost. 36 * 37 * The original Berkeley copyright notice appears below in its entirety. 38 */ 39 #include "tif_predict.h" 40 41 #include <stdio.h> 42 43 /* 44 * NB: The 5.0 spec describes a different algorithm than Aldus 45 * implements. Specifically, Aldus does code length transitions 46 * one code earlier than should be done (for real LZW). 47 * Earlier versions of this library implemented the correct 48 * LZW algorithm, but emitted codes in a bit order opposite 49 * to the TIFF spec. Thus, to maintain compatibility w/ Aldus 50 * we interpret MSB-LSB ordered codes to be images written w/ 51 * old versions of this library, but otherwise adhere to the 52 * Aldus "off by one" algorithm. 53 * 54 * Future revisions to the TIFF spec are expected to "clarify this issue". 55 */ 56 #define LZW_COMPAT /* include backwards compatibility code */ 57 /* 58 * Each strip of data is supposed to be terminated by a CODE_EOI. 59 * If the following #define is included, the decoder will also 60 * check for end-of-strip w/o seeing this code. This makes the 61 * library more robust, but also slower. 62 */ 63 #define LZW_CHECKEOS /* include checks for strips w/o EOI code */ 64 65 #define MAXCODE(n) ((1L<<(n))-1) 66 /* 67 * The TIFF spec specifies that encoded bit 68 * strings range from 9 to 12 bits. 69 */ 70 #define BITS_MIN 9 /* start with 9 bits */ 71 #define BITS_MAX 12 /* max of 12 bit strings */ 72 /* predefined codes */ 73 #define CODE_CLEAR 256 /* code to clear string table */ 74 #define CODE_EOI 257 /* end-of-information code */ 75 #define CODE_FIRST 258 /* first free code entry */ 76 #define CODE_MAX MAXCODE(BITS_MAX) 77 #define HSIZE 9001L /* 91% occupancy */ 78 #define HSHIFT (13-8) 79 #ifdef LZW_COMPAT 80 /* NB: +1024 is for compatibility with old files */ 81 #define CSIZE (MAXCODE(BITS_MAX)+1024L) 82 #else 83 #define CSIZE (MAXCODE(BITS_MAX)+1L) 84 #endif 85 86 /* 87 * State block for each open TIFF file using LZW 88 * compression/decompression. Note that the predictor 89 * state block must be first in this data structure. 90 */ 91 typedef struct { 92 TIFFPredictorState predict; /* predictor super class */ 93 94 unsigned short nbits; /* # of bits/code */ 95 unsigned short maxcode; /* maximum code for lzw_nbits */ 96 unsigned short free_ent; /* next free entry in hash table */ 97 unsigned long nextdata; /* next bits of i/o */ 98 long nextbits; /* # of valid bits in lzw_nextdata */ 99 100 int rw_mode; /* preserve rw_mode from init */ 101 } LZWBaseState; 102 103 #define lzw_nbits base.nbits 104 #define lzw_maxcode base.maxcode 105 #define lzw_free_ent base.free_ent 106 #define lzw_nextdata base.nextdata 107 #define lzw_nextbits base.nextbits 108 109 /* 110 * Encoding-specific state. 111 */ 112 typedef uint16 hcode_t; /* codes fit in 16 bits */ 113 typedef struct { 114 long hash; 115 hcode_t code; 116 } hash_t; 117 118 /* 119 * Decoding-specific state. 120 */ 121 typedef struct code_ent { 122 struct code_ent *next; 123 unsigned short length; /* string len, including this token */ 124 unsigned char value; /* data value */ 125 unsigned char firstchar; /* first token of string */ 126 } code_t; 127 128 typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16); 129 130 typedef struct { 131 LZWBaseState base; 132 133 /* Decoding specific data */ 134 long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */ 135 long dec_restart; /* restart count */ 136 #ifdef LZW_CHECKEOS 137 uint64 dec_bitsleft; /* available bits in raw data */ 138 #endif 139 decodeFunc dec_decode; /* regular or backwards compatible */ 140 code_t* dec_codep; /* current recognized code */ 141 code_t* dec_oldcodep; /* previously recognized code */ 142 code_t* dec_free_entp; /* next free entry */ 143 code_t* dec_maxcodep; /* max available entry */ 144 code_t* dec_codetab; /* kept separate for small machines */ 145 146 /* Encoding specific data */ 147 int enc_oldcode; /* last code encountered */ 148 long enc_checkpoint; /* point at which to clear table */ 149 #define CHECK_GAP 10000 /* enc_ratio check interval */ 150 long enc_ratio; /* current compression ratio */ 151 long enc_incount; /* (input) data bytes encoded */ 152 long enc_outcount; /* encoded (output) bytes */ 153 uint8* enc_rawlimit; /* bound on tif_rawdata buffer */ 154 hash_t* enc_hashtab; /* kept separate for small machines */ 155 } LZWCodecState; 156 157 #define LZWState(tif) ((LZWBaseState*) (tif)->tif_data) 158 #define DecoderState(tif) ((LZWCodecState*) LZWState(tif)) 159 #define EncoderState(tif) ((LZWCodecState*) LZWState(tif)) 160 161 static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); 162 #ifdef LZW_COMPAT 163 static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); 164 #endif 165 static void cl_hash(LZWCodecState*); 166 167 /* 168 * LZW Decoder. 169 */ 170 171 #ifdef LZW_CHECKEOS 172 /* 173 * This check shouldn't be necessary because each 174 * strip is suppose to be terminated with CODE_EOI. 175 */ 176 #define NextCode(_tif, _sp, _bp, _code, _get) { \ 177 if ((_sp)->dec_bitsleft < (uint64)nbits) { \ 178 TIFFWarningExt(_tif->tif_clientdata, module, \ 179 "LZWDecode: Strip %d not terminated with EOI code", \ 180 _tif->tif_curstrip); \ 181 _code = CODE_EOI; \ 182 } else { \ 183 _get(_sp,_bp,_code); \ 184 (_sp)->dec_bitsleft -= nbits; \ 185 } \ 186 } 187 #else 188 #define NextCode(tif, sp, bp, code, get) get(sp, bp, code) 189 #endif 190 191 static int 192 LZWFixupTags(TIFF* tif) 193 { 194 (void) tif; 195 return (1); 196 } 197 198 static int 199 LZWSetupDecode(TIFF* tif) 200 { 201 static const char module[] = "LZWSetupDecode"; 202 LZWCodecState* sp = DecoderState(tif); 203 int code; 204 205 if( sp == NULL ) 206 { 207 /* 208 * Allocate state block so tag methods have storage to record 209 * values. 210 */ 211 tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState)); 212 if (tif->tif_data == NULL) 213 { 214 TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block"); 215 return (0); 216 } 217 218 DecoderState(tif)->dec_codetab = NULL; 219 DecoderState(tif)->dec_decode = NULL; 220 221 /* 222 * Setup predictor setup. 223 */ 224 (void) TIFFPredictorInit(tif); 225 226 sp = DecoderState(tif); 227 } 228 229 assert(sp != NULL); 230 231 if (sp->dec_codetab == NULL) { 232 sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t)); 233 if (sp->dec_codetab == NULL) { 234 TIFFErrorExt(tif->tif_clientdata, module, 235 "No space for LZW code table"); 236 return (0); 237 } 238 /* 239 * Pre-load the table. 240 */ 241 code = 255; 242 do { 243 sp->dec_codetab[code].value = code; 244 sp->dec_codetab[code].firstchar = code; 245 sp->dec_codetab[code].length = 1; 246 sp->dec_codetab[code].next = NULL; 247 } while (code--); 248 /* 249 * Zero-out the unused entries 250 */ 251 _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0, 252 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t)); 253 } 254 return (1); 255 } 256 257 /* 258 * Setup state for decoding a strip. 259 */ 260 static int 261 LZWPreDecode(TIFF* tif, uint16 s) 262 { 263 static const char module[] = "LZWPreDecode"; 264 LZWCodecState *sp = DecoderState(tif); 265 266 (void) s; 267 assert(sp != NULL); 268 if( sp->dec_codetab == NULL ) 269 { 270 tif->tif_setupdecode( tif ); 271 if( sp->dec_codetab == NULL ) 272 return (0); 273 } 274 275 /* 276 * Check for old bit-reversed codes. 277 */ 278 if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) { 279 #ifdef LZW_COMPAT 280 if (!sp->dec_decode) { 281 TIFFWarningExt(tif->tif_clientdata, module, 282 "Old-style LZW codes, convert file"); 283 /* 284 * Override default decoding methods with 285 * ones that deal with the old coding. 286 * Otherwise the predictor versions set 287 * above will call the compatibility routines 288 * through the dec_decode method. 289 */ 290 tif->tif_decoderow = LZWDecodeCompat; 291 tif->tif_decodestrip = LZWDecodeCompat; 292 tif->tif_decodetile = LZWDecodeCompat; 293 /* 294 * If doing horizontal differencing, must 295 * re-setup the predictor logic since we 296 * switched the basic decoder methods... 297 */ 298 (*tif->tif_setupdecode)(tif); 299 sp->dec_decode = LZWDecodeCompat; 300 } 301 sp->lzw_maxcode = MAXCODE(BITS_MIN); 302 #else /* !LZW_COMPAT */ 303 if (!sp->dec_decode) { 304 TIFFErrorExt(tif->tif_clientdata, module, 305 "Old-style LZW codes not supported"); 306 sp->dec_decode = LZWDecode; 307 } 308 return (0); 309 #endif/* !LZW_COMPAT */ 310 } else { 311 sp->lzw_maxcode = MAXCODE(BITS_MIN)-1; 312 sp->dec_decode = LZWDecode; 313 } 314 sp->lzw_nbits = BITS_MIN; 315 sp->lzw_nextbits = 0; 316 sp->lzw_nextdata = 0; 317 318 sp->dec_restart = 0; 319 sp->dec_nbitsmask = MAXCODE(BITS_MIN); 320 #ifdef LZW_CHECKEOS 321 sp->dec_bitsleft = ((uint64)tif->tif_rawcc) << 3; 322 #endif 323 sp->dec_free_entp = sp->dec_codetab + CODE_FIRST; 324 /* 325 * Zero entries that are not yet filled in. We do 326 * this to guard against bogus input data that causes 327 * us to index into undefined entries. If you can 328 * come up with a way to safely bounds-check input codes 329 * while decoding then you can remove this operation. 330 */ 331 _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t)); 332 sp->dec_oldcodep = &sp->dec_codetab[-1]; 333 sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1]; 334 return (1); 335 } 336 337 /* 338 * Decode a "hunk of data". 339 */ 340 #define GetNextCode(sp, bp, code) { \ 341 nextdata = (nextdata<<8) | *(bp)++; \ 342 nextbits += 8; \ 343 if (nextbits < nbits) { \ 344 nextdata = (nextdata<<8) | *(bp)++; \ 345 nextbits += 8; \ 346 } \ 347 code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \ 348 nextbits -= nbits; \ 349 } 350 351 static void 352 codeLoop(TIFF* tif, const char* module) 353 { 354 TIFFErrorExt(tif->tif_clientdata, module, 355 "Bogus encoding, loop in the code table; scanline %d", 356 tif->tif_row); 357 } 358 359 static int 360 LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) 361 { 362 static const char module[] = "LZWDecode"; 363 LZWCodecState *sp = DecoderState(tif); 364 char *op = (char*) op0; 365 long occ = (long) occ0; 366 char *tp; 367 unsigned char *bp; 368 hcode_t code; 369 int len; 370 long nbits, nextbits, nbitsmask; 371 unsigned long nextdata; 372 code_t *codep, *free_entp, *maxcodep, *oldcodep; 373 374 (void) s; 375 assert(sp != NULL); 376 assert(sp->dec_codetab != NULL); 377 378 /* 379 Fail if value does not fit in long. 380 */ 381 if ((tmsize_t) occ != occ0) 382 return (0); 383 /* 384 * Restart interrupted output operation. 385 */ 386 if (sp->dec_restart) { 387 long residue; 388 389 codep = sp->dec_codep; 390 residue = codep->length - sp->dec_restart; 391 if (residue > occ) { 392 /* 393 * Residue from previous decode is sufficient 394 * to satisfy decode request. Skip to the 395 * start of the decoded string, place decoded 396 * values in the output buffer, and return. 397 */ 398 sp->dec_restart += occ; 399 do { 400 codep = codep->next; 401 } while (--residue > occ && codep); 402 if (codep) { 403 tp = op + occ; 404 do { 405 *--tp = codep->value; 406 codep = codep->next; 407 } while (--occ && codep); 408 } 409 return (1); 410 } 411 /* 412 * Residue satisfies only part of the decode request. 413 */ 414 op += residue, occ -= residue; 415 tp = op; 416 do { 417 int t; 418 --tp; 419 t = codep->value; 420 codep = codep->next; 421 *tp = t; 422 } while (--residue && codep); 423 sp->dec_restart = 0; 424 } 425 426 bp = (unsigned char *)tif->tif_rawcp; 427 nbits = sp->lzw_nbits; 428 nextdata = sp->lzw_nextdata; 429 nextbits = sp->lzw_nextbits; 430 nbitsmask = sp->dec_nbitsmask; 431 oldcodep = sp->dec_oldcodep; 432 free_entp = sp->dec_free_entp; 433 maxcodep = sp->dec_maxcodep; 434 435 while (occ > 0) { 436 NextCode(tif, sp, bp, code, GetNextCode); 437 if (code == CODE_EOI) 438 break; 439 if (code == CODE_CLEAR) { 440 do { 441 free_entp = sp->dec_codetab + CODE_FIRST; 442 _TIFFmemset(free_entp, 0, 443 (CSIZE - CODE_FIRST) * sizeof (code_t)); 444 nbits = BITS_MIN; 445 nbitsmask = MAXCODE(BITS_MIN); 446 maxcodep = sp->dec_codetab + nbitsmask-1; 447 NextCode(tif, sp, bp, code, GetNextCode); 448 } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */ 449 if (code == CODE_EOI) 450 break; 451 if (code > CODE_CLEAR) { 452 TIFFErrorExt(tif->tif_clientdata, tif->tif_name, 453 "LZWDecode: Corrupted LZW table at scanline %d", 454 tif->tif_row); 455 return (0); 456 } 457 *op++ = (char)code, occ--; 458 oldcodep = sp->dec_codetab + code; 459 continue; 460 } 461 codep = sp->dec_codetab + code; 462 463 /* 464 * Add the new entry to the code table. 465 */ 466 if (free_entp < &sp->dec_codetab[0] || 467 free_entp >= &sp->dec_codetab[CSIZE]) { 468 TIFFErrorExt(tif->tif_clientdata, module, 469 "Corrupted LZW table at scanline %d", 470 tif->tif_row); 471 return (0); 472 } 473 474 free_entp->next = oldcodep; 475 if (free_entp->next < &sp->dec_codetab[0] || 476 free_entp->next >= &sp->dec_codetab[CSIZE]) { 477 TIFFErrorExt(tif->tif_clientdata, module, 478 "Corrupted LZW table at scanline %d", 479 tif->tif_row); 480 return (0); 481 } 482 free_entp->firstchar = free_entp->next->firstchar; 483 free_entp->length = free_entp->next->length+1; 484 free_entp->value = (codep < free_entp) ? 485 codep->firstchar : free_entp->firstchar; 486 if (++free_entp > maxcodep) { 487 if (++nbits > BITS_MAX) /* should not happen */ 488 nbits = BITS_MAX; 489 nbitsmask = MAXCODE(nbits); 490 maxcodep = sp->dec_codetab + nbitsmask-1; 491 } 492 oldcodep = codep; 493 if (code >= 256) { 494 /* 495 * Code maps to a string, copy string 496 * value to output (written in reverse). 497 */ 498 if(codep->length == 0) { 499 TIFFErrorExt(tif->tif_clientdata, module, 500 "Wrong length of decoded string: " 501 "data probably corrupted at scanline %d", 502 tif->tif_row); 503 return (0); 504 } 505 if (codep->length > occ) { 506 /* 507 * String is too long for decode buffer, 508 * locate portion that will fit, copy to 509 * the decode buffer, and setup restart 510 * logic for the next decoding call. 511 */ 512 sp->dec_codep = codep; 513 do { 514 codep = codep->next; 515 } while (codep && codep->length > occ); 516 if (codep) { 517 sp->dec_restart = (long)occ; 518 tp = op + occ; 519 do { 520 *--tp = codep->value; 521 codep = codep->next; 522 } while (--occ && codep); 523 if (codep) 524 codeLoop(tif, module); 525 } 526 break; 527 } 528 len = codep->length; 529 tp = op + len; 530 do { 531 int t; 532 --tp; 533 t = codep->value; 534 codep = codep->next; 535 *tp = t; 536 } while (codep && tp > op); 537 if (codep) { 538 codeLoop(tif, module); 539 break; 540 } 541 assert(occ >= len); 542 op += len, occ -= len; 543 } else 544 *op++ = (char)code, occ--; 545 } 546 547 tif->tif_rawcp = (uint8*) bp; 548 sp->lzw_nbits = (unsigned short) nbits; 549 sp->lzw_nextdata = nextdata; 550 sp->lzw_nextbits = nextbits; 551 sp->dec_nbitsmask = nbitsmask; 552 sp->dec_oldcodep = oldcodep; 553 sp->dec_free_entp = free_entp; 554 sp->dec_maxcodep = maxcodep; 555 556 if (occ > 0) { 557 #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) 558 TIFFErrorExt(tif->tif_clientdata, module, 559 "Not enough data at scanline %d (short %I64d bytes)", 560 tif->tif_row, (unsigned __int64) occ); 561 #else 562 TIFFErrorExt(tif->tif_clientdata, module, 563 "Not enough data at scanline %d (short %llu bytes)", 564 tif->tif_row, (unsigned long long) occ); 565 #endif 566 return (0); 567 } 568 return (1); 569 } 570 571 #ifdef LZW_COMPAT 572 /* 573 * Decode a "hunk of data" for old images. 574 */ 575 #define GetNextCodeCompat(sp, bp, code) { \ 576 nextdata |= (unsigned long) *(bp)++ << nextbits; \ 577 nextbits += 8; \ 578 if (nextbits < nbits) { \ 579 nextdata |= (unsigned long) *(bp)++ << nextbits;\ 580 nextbits += 8; \ 581 } \ 582 code = (hcode_t)(nextdata & nbitsmask); \ 583 nextdata >>= nbits; \ 584 nextbits -= nbits; \ 585 } 586 587 static int 588 LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) 589 { 590 static const char module[] = "LZWDecodeCompat"; 591 LZWCodecState *sp = DecoderState(tif); 592 char *op = (char*) op0; 593 long occ = (long) occ0; 594 char *tp; 595 unsigned char *bp; 596 int code, nbits; 597 long nextbits, nextdata, nbitsmask; 598 code_t *codep, *free_entp, *maxcodep, *oldcodep; 599 600 (void) s; 601 assert(sp != NULL); 602 603 /* 604 Fail if value does not fit in long. 605 */ 606 if ((tmsize_t) occ != occ0) 607 return (0); 608 609 /* 610 * Restart interrupted output operation. 611 */ 612 if (sp->dec_restart) { 613 long residue; 614 615 codep = sp->dec_codep; 616 residue = codep->length - sp->dec_restart; 617 if (residue > occ) { 618 /* 619 * Residue from previous decode is sufficient 620 * to satisfy decode request. Skip to the 621 * start of the decoded string, place decoded 622 * values in the output buffer, and return. 623 */ 624 sp->dec_restart += occ; 625 do { 626 codep = codep->next; 627 } while (--residue > occ); 628 tp = op + occ; 629 do { 630 *--tp = codep->value; 631 codep = codep->next; 632 } while (--occ); 633 return (1); 634 } 635 /* 636 * Residue satisfies only part of the decode request. 637 */ 638 op += residue, occ -= residue; 639 tp = op; 640 do { 641 *--tp = codep->value; 642 codep = codep->next; 643 } while (--residue); 644 sp->dec_restart = 0; 645 } 646 647 bp = (unsigned char *)tif->tif_rawcp; 648 nbits = sp->lzw_nbits; 649 nextdata = sp->lzw_nextdata; 650 nextbits = sp->lzw_nextbits; 651 nbitsmask = sp->dec_nbitsmask; 652 oldcodep = sp->dec_oldcodep; 653 free_entp = sp->dec_free_entp; 654 maxcodep = sp->dec_maxcodep; 655 656 while (occ > 0) { 657 NextCode(tif, sp, bp, code, GetNextCodeCompat); 658 if (code == CODE_EOI) 659 break; 660 if (code == CODE_CLEAR) { 661 do { 662 free_entp = sp->dec_codetab + CODE_FIRST; 663 _TIFFmemset(free_entp, 0, 664 (CSIZE - CODE_FIRST) * sizeof (code_t)); 665 nbits = BITS_MIN; 666 nbitsmask = MAXCODE(BITS_MIN); 667 maxcodep = sp->dec_codetab + nbitsmask; 668 NextCode(tif, sp, bp, code, GetNextCodeCompat); 669 } while (code == CODE_CLEAR); /* consecutive CODE_CLEAR codes */ 670 if (code == CODE_EOI) 671 break; 672 if (code > CODE_CLEAR) { 673 TIFFErrorExt(tif->tif_clientdata, tif->tif_name, 674 "LZWDecode: Corrupted LZW table at scanline %d", 675 tif->tif_row); 676 return (0); 677 } 678 *op++ = code, occ--; 679 oldcodep = sp->dec_codetab + code; 680 continue; 681 } 682 codep = sp->dec_codetab + code; 683 684 /* 685 * Add the new entry to the code table. 686 */ 687 if (free_entp < &sp->dec_codetab[0] || 688 free_entp >= &sp->dec_codetab[CSIZE]) { 689 TIFFErrorExt(tif->tif_clientdata, module, 690 "Corrupted LZW table at scanline %d", tif->tif_row); 691 return (0); 692 } 693 694 free_entp->next = oldcodep; 695 if (free_entp->next < &sp->dec_codetab[0] || 696 free_entp->next >= &sp->dec_codetab[CSIZE]) { 697 TIFFErrorExt(tif->tif_clientdata, module, 698 "Corrupted LZW table at scanline %d", tif->tif_row); 699 return (0); 700 } 701 free_entp->firstchar = free_entp->next->firstchar; 702 free_entp->length = free_entp->next->length+1; 703 free_entp->value = (codep < free_entp) ? 704 codep->firstchar : free_entp->firstchar; 705 if (++free_entp > maxcodep) { 706 if (++nbits > BITS_MAX) /* should not happen */ 707 nbits = BITS_MAX; 708 nbitsmask = MAXCODE(nbits); 709 maxcodep = sp->dec_codetab + nbitsmask; 710 } 711 oldcodep = codep; 712 if (code >= 256) { 713 /* 714 * Code maps to a string, copy string 715 * value to output (written in reverse). 716 */ 717 if(codep->length == 0) { 718 TIFFErrorExt(tif->tif_clientdata, module, 719 "Wrong length of decoded " 720 "string: data probably corrupted at scanline %d", 721 tif->tif_row); 722 return (0); 723 } 724 if (codep->length > occ) { 725 /* 726 * String is too long for decode buffer, 727 * locate portion that will fit, copy to 728 * the decode buffer, and setup restart 729 * logic for the next decoding call. 730 */ 731 sp->dec_codep = codep; 732 do { 733 codep = codep->next; 734 } while (codep->length > occ); 735 sp->dec_restart = occ; 736 tp = op + occ; 737 do { 738 *--tp = codep->value; 739 codep = codep->next; 740 } while (--occ); 741 break; 742 } 743 assert(occ >= codep->length); 744 op += codep->length, occ -= codep->length; 745 tp = op; 746 do { 747 *--tp = codep->value; 748 } while( (codep = codep->next) != NULL ); 749 } else 750 *op++ = code, occ--; 751 } 752 753 tif->tif_rawcp = (uint8*) bp; 754 sp->lzw_nbits = nbits; 755 sp->lzw_nextdata = nextdata; 756 sp->lzw_nextbits = nextbits; 757 sp->dec_nbitsmask = nbitsmask; 758 sp->dec_oldcodep = oldcodep; 759 sp->dec_free_entp = free_entp; 760 sp->dec_maxcodep = maxcodep; 761 762 if (occ > 0) { 763 #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) 764 TIFFErrorExt(tif->tif_clientdata, module, 765 "Not enough data at scanline %d (short %I64d bytes)", 766 tif->tif_row, (unsigned __int64) occ); 767 #else 768 TIFFErrorExt(tif->tif_clientdata, module, 769 "Not enough data at scanline %d (short %llu bytes)", 770 tif->tif_row, (unsigned long long) occ); 771 #endif 772 return (0); 773 } 774 return (1); 775 } 776 #endif /* LZW_COMPAT */ 777 778 /* 779 * LZW Encoding. 780 */ 781 782 static int 783 LZWSetupEncode(TIFF* tif) 784 { 785 static const char module[] = "LZWSetupEncode"; 786 LZWCodecState* sp = EncoderState(tif); 787 788 assert(sp != NULL); 789 sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t)); 790 if (sp->enc_hashtab == NULL) { 791 TIFFErrorExt(tif->tif_clientdata, module, 792 "No space for LZW hash table"); 793 return (0); 794 } 795 return (1); 796 } 797 798 /* 799 * Reset encoding state at the start of a strip. 800 */ 801 static int 802 LZWPreEncode(TIFF* tif, uint16 s) 803 { 804 LZWCodecState *sp = EncoderState(tif); 805 806 (void) s; 807 assert(sp != NULL); 808 809 if( sp->enc_hashtab == NULL ) 810 { 811 tif->tif_setupencode( tif ); 812 } 813 814 sp->lzw_nbits = BITS_MIN; 815 sp->lzw_maxcode = MAXCODE(BITS_MIN); 816 sp->lzw_free_ent = CODE_FIRST; 817 sp->lzw_nextbits = 0; 818 sp->lzw_nextdata = 0; 819 sp->enc_checkpoint = CHECK_GAP; 820 sp->enc_ratio = 0; 821 sp->enc_incount = 0; 822 sp->enc_outcount = 0; 823 /* 824 * The 4 here insures there is space for 2 max-sized 825 * codes in LZWEncode and LZWPostDecode. 826 */ 827 sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4; 828 cl_hash(sp); /* clear hash table */ 829 sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */ 830 return (1); 831 } 832 833 #define CALCRATIO(sp, rat) { \ 834 if (incount > 0x007fffff) { /* NB: shift will overflow */\ 835 rat = outcount >> 8; \ 836 rat = (rat == 0 ? 0x7fffffff : incount/rat); \ 837 } else \ 838 rat = (incount<<8) / outcount; \ 839 } 840 841 /* Explicit 0xff masking to make icc -check=conversions happy */ 842 #define PutNextCode(op, c) { \ 843 nextdata = (nextdata << nbits) | c; \ 844 nextbits += nbits; \ 845 *op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff); \ 846 nextbits -= 8; \ 847 if (nextbits >= 8) { \ 848 *op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff); \ 849 nextbits -= 8; \ 850 } \ 851 outcount += nbits; \ 852 } 853 854 /* 855 * Encode a chunk of pixels. 856 * 857 * Uses an open addressing double hashing (no chaining) on the 858 * prefix code/next character combination. We do a variant of 859 * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's 860 * relatively-prime secondary probe. Here, the modular division 861 * first probe is gives way to a faster exclusive-or manipulation. 862 * Also do block compression with an adaptive reset, whereby the 863 * code table is cleared when the compression ratio decreases, 864 * but after the table fills. The variable-length output codes 865 * are re-sized at this point, and a CODE_CLEAR is generated 866 * for the decoder. 867 */ 868 static int 869 LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) 870 { 871 register LZWCodecState *sp = EncoderState(tif); 872 register long fcode; 873 register hash_t *hp; 874 register int h, c; 875 hcode_t ent; 876 long disp; 877 long incount, outcount, checkpoint; 878 unsigned long nextdata; 879 long nextbits; 880 int free_ent, maxcode, nbits; 881 uint8* op; 882 uint8* limit; 883 884 (void) s; 885 if (sp == NULL) 886 return (0); 887 888 assert(sp->enc_hashtab != NULL); 889 890 /* 891 * Load local state. 892 */ 893 incount = sp->enc_incount; 894 outcount = sp->enc_outcount; 895 checkpoint = sp->enc_checkpoint; 896 nextdata = sp->lzw_nextdata; 897 nextbits = sp->lzw_nextbits; 898 free_ent = sp->lzw_free_ent; 899 maxcode = sp->lzw_maxcode; 900 nbits = sp->lzw_nbits; 901 op = tif->tif_rawcp; 902 limit = sp->enc_rawlimit; 903 ent = sp->enc_oldcode; 904 905 if (ent == (hcode_t) -1 && cc > 0) { 906 /* 907 * NB: This is safe because it can only happen 908 * at the start of a strip where we know there 909 * is space in the data buffer. 910 */ 911 PutNextCode(op, CODE_CLEAR); 912 ent = *bp++; cc--; incount++; 913 } 914 while (cc > 0) { 915 c = *bp++; cc--; incount++; 916 fcode = ((long)c << BITS_MAX) + ent; 917 h = (c << HSHIFT) ^ ent; /* xor hashing */ 918 #ifdef _WINDOWS 919 /* 920 * Check hash index for an overflow. 921 */ 922 if (h >= HSIZE) 923 h -= HSIZE; 924 #endif 925 hp = &sp->enc_hashtab[h]; 926 if (hp->hash == fcode) { 927 ent = hp->code; 928 continue; 929 } 930 if (hp->hash >= 0) { 931 /* 932 * Primary hash failed, check secondary hash. 933 */ 934 disp = HSIZE - h; 935 if (h == 0) 936 disp = 1; 937 do { 938 /* 939 * Avoid pointer arithmetic 'cuz of 940 * wraparound problems with segments. 941 */ 942 if ((h -= disp) < 0) 943 h += HSIZE; 944 hp = &sp->enc_hashtab[h]; 945 if (hp->hash == fcode) { 946 ent = hp->code; 947 goto hit; 948 } 949 } while (hp->hash >= 0); 950 } 951 /* 952 * New entry, emit code and add to table. 953 */ 954 /* 955 * Verify there is space in the buffer for the code 956 * and any potential Clear code that might be emitted 957 * below. The value of limit is setup so that there 958 * are at least 4 bytes free--room for 2 codes. 959 */ 960 if (op > limit) { 961 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 962 TIFFFlushData1(tif); 963 op = tif->tif_rawdata; 964 } 965 PutNextCode(op, ent); 966 ent = c; 967 hp->code = free_ent++; 968 hp->hash = fcode; 969 if (free_ent == CODE_MAX-1) { 970 /* table is full, emit clear code and reset */ 971 cl_hash(sp); 972 sp->enc_ratio = 0; 973 incount = 0; 974 outcount = 0; 975 free_ent = CODE_FIRST; 976 PutNextCode(op, CODE_CLEAR); 977 nbits = BITS_MIN; 978 maxcode = MAXCODE(BITS_MIN); 979 } else { 980 /* 981 * If the next entry is going to be too big for 982 * the code size, then increase it, if possible. 983 */ 984 if (free_ent > maxcode) { 985 nbits++; 986 assert(nbits <= BITS_MAX); 987 maxcode = (int) MAXCODE(nbits); 988 } else if (incount >= checkpoint) { 989 long rat; 990 /* 991 * Check compression ratio and, if things seem 992 * to be slipping, clear the hash table and 993 * reset state. The compression ratio is a 994 * 24+8-bit fractional number. 995 */ 996 checkpoint = incount+CHECK_GAP; 997 CALCRATIO(sp, rat); 998 if (rat <= sp->enc_ratio) { 999 cl_hash(sp); 1000 sp->enc_ratio = 0; 1001 incount = 0; 1002 outcount = 0; 1003 free_ent = CODE_FIRST; 1004 PutNextCode(op, CODE_CLEAR); 1005 nbits = BITS_MIN; 1006 maxcode = MAXCODE(BITS_MIN); 1007 } else 1008 sp->enc_ratio = rat; 1009 } 1010 } 1011 hit: 1012 ; 1013 } 1014 1015 /* 1016 * Restore global state. 1017 */ 1018 sp->enc_incount = incount; 1019 sp->enc_outcount = outcount; 1020 sp->enc_checkpoint = checkpoint; 1021 sp->enc_oldcode = ent; 1022 sp->lzw_nextdata = nextdata; 1023 sp->lzw_nextbits = nextbits; 1024 sp->lzw_free_ent = free_ent; 1025 sp->lzw_maxcode = maxcode; 1026 sp->lzw_nbits = nbits; 1027 tif->tif_rawcp = op; 1028 return (1); 1029 } 1030 1031 /* 1032 * Finish off an encoded strip by flushing the last 1033 * string and tacking on an End Of Information code. 1034 */ 1035 static int 1036 LZWPostEncode(TIFF* tif) 1037 { 1038 register LZWCodecState *sp = EncoderState(tif); 1039 uint8* op = tif->tif_rawcp; 1040 long nextbits = sp->lzw_nextbits; 1041 unsigned long nextdata = sp->lzw_nextdata; 1042 long outcount = sp->enc_outcount; 1043 int nbits = sp->lzw_nbits; 1044 1045 if (op > sp->enc_rawlimit) { 1046 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 1047 TIFFFlushData1(tif); 1048 op = tif->tif_rawdata; 1049 } 1050 if (sp->enc_oldcode != (hcode_t) -1) { 1051 PutNextCode(op, sp->enc_oldcode); 1052 sp->enc_oldcode = (hcode_t) -1; 1053 } 1054 PutNextCode(op, CODE_EOI); 1055 /* Explicit 0xff masking to make icc -check=conversions happy */ 1056 if (nextbits > 0) 1057 *op++ = (unsigned char)((nextdata << (8-nextbits))&0xff); 1058 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 1059 return (1); 1060 } 1061 1062 /* 1063 * Reset encoding hash table. 1064 */ 1065 static void 1066 cl_hash(LZWCodecState* sp) 1067 { 1068 register hash_t *hp = &sp->enc_hashtab[HSIZE-1]; 1069 register long i = HSIZE-8; 1070 1071 do { 1072 i -= 8; 1073 hp[-7].hash = -1; 1074 hp[-6].hash = -1; 1075 hp[-5].hash = -1; 1076 hp[-4].hash = -1; 1077 hp[-3].hash = -1; 1078 hp[-2].hash = -1; 1079 hp[-1].hash = -1; 1080 hp[ 0].hash = -1; 1081 hp -= 8; 1082 } while (i >= 0); 1083 for (i += 8; i > 0; i--, hp--) 1084 hp->hash = -1; 1085 } 1086 1087 static void 1088 LZWCleanup(TIFF* tif) 1089 { 1090 (void)TIFFPredictorCleanup(tif); 1091 1092 assert(tif->tif_data != 0); 1093 1094 if (DecoderState(tif)->dec_codetab) 1095 _TIFFfree(DecoderState(tif)->dec_codetab); 1096 1097 if (EncoderState(tif)->enc_hashtab) 1098 _TIFFfree(EncoderState(tif)->enc_hashtab); 1099 1100 _TIFFfree(tif->tif_data); 1101 tif->tif_data = NULL; 1102 1103 _TIFFSetDefaultCompressionState(tif); 1104 } 1105 1106 int 1107 TIFFInitLZW(TIFF* tif, int scheme) 1108 { 1109 static const char module[] = "TIFFInitLZW"; 1110 assert(scheme == COMPRESSION_LZW); 1111 /* 1112 * Allocate state block so tag methods have storage to record values. 1113 */ 1114 tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState)); 1115 if (tif->tif_data == NULL) 1116 goto bad; 1117 DecoderState(tif)->dec_codetab = NULL; 1118 DecoderState(tif)->dec_decode = NULL; 1119 EncoderState(tif)->enc_hashtab = NULL; 1120 LZWState(tif)->rw_mode = tif->tif_mode; 1121 1122 /* 1123 * Install codec methods. 1124 */ 1125 tif->tif_fixuptags = LZWFixupTags; 1126 tif->tif_setupdecode = LZWSetupDecode; 1127 tif->tif_predecode = LZWPreDecode; 1128 tif->tif_decoderow = LZWDecode; 1129 tif->tif_decodestrip = LZWDecode; 1130 tif->tif_decodetile = LZWDecode; 1131 tif->tif_setupencode = LZWSetupEncode; 1132 tif->tif_preencode = LZWPreEncode; 1133 tif->tif_postencode = LZWPostEncode; 1134 tif->tif_encoderow = LZWEncode; 1135 tif->tif_encodestrip = LZWEncode; 1136 tif->tif_encodetile = LZWEncode; 1137 tif->tif_cleanup = LZWCleanup; 1138 /* 1139 * Setup predictor setup. 1140 */ 1141 (void) TIFFPredictorInit(tif); 1142 return (1); 1143 bad: 1144 TIFFErrorExt(tif->tif_clientdata, module, 1145 "No space for LZW state block"); 1146 return (0); 1147 } 1148 1149 /* 1150 * Copyright (c) 1985, 1986 The Regents of the University of California. 1151 * All rights reserved. 1152 * 1153 * This code is derived from software contributed to Berkeley by 1154 * James A. Woods, derived from original work by Spencer Thomas 1155 * and Joseph Orost. 1156 * 1157 * Redistribution and use in source and binary forms are permitted 1158 * provided that the above copyright notice and this paragraph are 1159 * duplicated in all such forms and that any documentation, 1160 * advertising materials, and other materials related to such 1161 * distribution and use acknowledge that the software was developed 1162 * by the University of California, Berkeley. The name of the 1163 * University may not be used to endorse or promote products derived 1164 * from this software without specific prior written permission. 1165 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1166 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1167 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1168 */ 1169 #endif /* LZW_SUPPORT */ 1170 1171 /* vim: set ts=8 sts=8 sw=8 noet: */ 1172 /* 1173 * Local Variables: 1174 * mode: c 1175 * c-basic-offset: 8 1176 * fill-column: 78 1177 * End: 1178 */ 1179