Home | History | Annotate | Download | only in libtiff
      1 /* $Id: tif_lzw.c,v 1.55 2017-05-17 09:38:58 erouault Exp $ */
      2 
      3 /*
      4  * Copyright (c) 1988-1997 Sam Leffler
      5  * Copyright (c) 1991-1997 Silicon Graphics, Inc.
      6  *
      7  * Permission to use, copy, modify, distribute, and sell this software and
      8  * its documentation for any purpose is hereby granted without fee, provided
      9  * that (i) the above copyright notices and this permission notice appear in
     10  * all copies of the software and related documentation, and (ii) the names of
     11  * Sam Leffler and Silicon Graphics may not be used in any advertising or
     12  * publicity relating to the software without the specific, prior written
     13  * permission of Sam Leffler and Silicon Graphics.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
     16  * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
     17  * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
     18  *
     19  * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
     20  * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
     21  * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
     22  * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
     23  * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
     24  * OF THIS SOFTWARE.
     25  */
     26 
     27 #include "tiffiop.h"
     28 #ifdef LZW_SUPPORT
     29 /*
     30  * TIFF Library.
     31  * Rev 5.0 Lempel-Ziv & Welch Compression Support
     32  *
     33  * This code is derived from the compress program whose code is
     34  * derived from software contributed to Berkeley by James A. Woods,
     35  * derived from original work by Spencer Thomas and Joseph Orost.
     36  *
     37  * The original Berkeley copyright notice appears below in its entirety.
     38  */
     39 #include "tif_predict.h"
     40 
     41 #include <stdio.h>
     42 
     43 /*
     44  * NB: The 5.0 spec describes a different algorithm than Aldus
     45  *     implements.  Specifically, Aldus does code length transitions
     46  *     one code earlier than should be done (for real LZW).
     47  *     Earlier versions of this library implemented the correct
     48  *     LZW algorithm, but emitted codes in a bit order opposite
     49  *     to the TIFF spec.  Thus, to maintain compatibility w/ Aldus
     50  *     we interpret MSB-LSB ordered codes to be images written w/
     51  *     old versions of this library, but otherwise adhere to the
     52  *     Aldus "off by one" algorithm.
     53  *
     54  * Future revisions to the TIFF spec are expected to "clarify this issue".
     55  */
     56 #define LZW_COMPAT              /* include backwards compatibility code */
     57 /*
     58  * Each strip of data is supposed to be terminated by a CODE_EOI.
     59  * If the following #define is included, the decoder will also
     60  * check for end-of-strip w/o seeing this code.  This makes the
     61  * library more robust, but also slower.
     62  */
     63 #define LZW_CHECKEOS            /* include checks for strips w/o EOI code */
     64 
     65 #define MAXCODE(n)	((1L<<(n))-1)
     66 /*
     67  * The TIFF spec specifies that encoded bit
     68  * strings range from 9 to 12 bits.
     69  */
     70 #define BITS_MIN        9               /* start with 9 bits */
     71 #define BITS_MAX        12              /* max of 12 bit strings */
     72 /* predefined codes */
     73 #define CODE_CLEAR      256             /* code to clear string table */
     74 #define CODE_EOI        257             /* end-of-information code */
     75 #define CODE_FIRST      258             /* first free code entry */
     76 #define CODE_MAX        MAXCODE(BITS_MAX)
     77 #define HSIZE           9001L           /* 91% occupancy */
     78 #define HSHIFT          (13-8)
     79 #ifdef LZW_COMPAT
     80 /* NB: +1024 is for compatibility with old files */
     81 #define CSIZE           (MAXCODE(BITS_MAX)+1024L)
     82 #else
     83 #define CSIZE           (MAXCODE(BITS_MAX)+1L)
     84 #endif
     85 
     86 /*
     87  * State block for each open TIFF file using LZW
     88  * compression/decompression.  Note that the predictor
     89  * state block must be first in this data structure.
     90  */
     91 typedef struct {
     92 	TIFFPredictorState predict;     /* predictor super class */
     93 
     94 	unsigned short  nbits;          /* # of bits/code */
     95 	unsigned short  maxcode;        /* maximum code for lzw_nbits */
     96 	unsigned short  free_ent;       /* next free entry in hash table */
     97 	unsigned long   nextdata;       /* next bits of i/o */
     98 	long            nextbits;       /* # of valid bits in lzw_nextdata */
     99 
    100 	int             rw_mode;        /* preserve rw_mode from init */
    101 } LZWBaseState;
    102 
    103 #define lzw_nbits       base.nbits
    104 #define lzw_maxcode     base.maxcode
    105 #define lzw_free_ent    base.free_ent
    106 #define lzw_nextdata    base.nextdata
    107 #define lzw_nextbits    base.nextbits
    108 
    109 /*
    110  * Encoding-specific state.
    111  */
    112 typedef uint16 hcode_t;			/* codes fit in 16 bits */
    113 typedef struct {
    114 	long	hash;
    115 	hcode_t	code;
    116 } hash_t;
    117 
    118 /*
    119  * Decoding-specific state.
    120  */
    121 typedef struct code_ent {
    122 	struct code_ent *next;
    123 	unsigned short	length;		/* string len, including this token */
    124 	unsigned char	value;		/* data value */
    125 	unsigned char	firstchar;	/* first token of string */
    126 } code_t;
    127 
    128 typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16);
    129 
    130 typedef struct {
    131 	LZWBaseState base;
    132 
    133 	/* Decoding specific data */
    134 	long    dec_nbitsmask;		/* lzw_nbits 1 bits, right adjusted */
    135 	long    dec_restart;		/* restart count */
    136 #ifdef LZW_CHECKEOS
    137 	uint64  dec_bitsleft;		/* available bits in raw data */
    138 #endif
    139 	decodeFunc dec_decode;		/* regular or backwards compatible */
    140 	code_t* dec_codep;		/* current recognized code */
    141 	code_t* dec_oldcodep;		/* previously recognized code */
    142 	code_t* dec_free_entp;		/* next free entry */
    143 	code_t* dec_maxcodep;		/* max available entry */
    144 	code_t* dec_codetab;		/* kept separate for small machines */
    145 
    146 	/* Encoding specific data */
    147 	int     enc_oldcode;		/* last code encountered */
    148 	long    enc_checkpoint;		/* point at which to clear table */
    149 #define CHECK_GAP	10000		/* enc_ratio check interval */
    150 	long    enc_ratio;		/* current compression ratio */
    151 	long    enc_incount;		/* (input) data bytes encoded */
    152 	long    enc_outcount;		/* encoded (output) bytes */
    153 	uint8*  enc_rawlimit;		/* bound on tif_rawdata buffer */
    154 	hash_t* enc_hashtab;		/* kept separate for small machines */
    155 } LZWCodecState;
    156 
    157 #define LZWState(tif)		((LZWBaseState*) (tif)->tif_data)
    158 #define DecoderState(tif)	((LZWCodecState*) LZWState(tif))
    159 #define EncoderState(tif)	((LZWCodecState*) LZWState(tif))
    160 
    161 static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
    162 #ifdef LZW_COMPAT
    163 static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
    164 #endif
    165 static void cl_hash(LZWCodecState*);
    166 
    167 /*
    168  * LZW Decoder.
    169  */
    170 
    171 #ifdef LZW_CHECKEOS
    172 /*
    173  * This check shouldn't be necessary because each
    174  * strip is suppose to be terminated with CODE_EOI.
    175  */
    176 #define	NextCode(_tif, _sp, _bp, _code, _get) {				\
    177 	if ((_sp)->dec_bitsleft < (uint64)nbits) {			\
    178 		TIFFWarningExt(_tif->tif_clientdata, module,		\
    179 		    "LZWDecode: Strip %d not terminated with EOI code", \
    180 		    _tif->tif_curstrip);				\
    181 		_code = CODE_EOI;					\
    182 	} else {							\
    183 		_get(_sp,_bp,_code);					\
    184 		(_sp)->dec_bitsleft -= nbits;				\
    185 	}								\
    186 }
    187 #else
    188 #define	NextCode(tif, sp, bp, code, get) get(sp, bp, code)
    189 #endif
    190 
    191 static int
    192 LZWFixupTags(TIFF* tif)
    193 {
    194 	(void) tif;
    195 	return (1);
    196 }
    197 
    198 static int
    199 LZWSetupDecode(TIFF* tif)
    200 {
    201 	static const char module[] = "LZWSetupDecode";
    202 	LZWCodecState* sp = DecoderState(tif);
    203 	int code;
    204 
    205 	if( sp == NULL )
    206 	{
    207 		/*
    208 		 * Allocate state block so tag methods have storage to record
    209 		 * values.
    210 		*/
    211 		tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState));
    212 		if (tif->tif_data == NULL)
    213 		{
    214 			TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block");
    215 			return (0);
    216 		}
    217 
    218 		DecoderState(tif)->dec_codetab = NULL;
    219 		DecoderState(tif)->dec_decode = NULL;
    220 
    221 		/*
    222 		 * Setup predictor setup.
    223 		 */
    224 		(void) TIFFPredictorInit(tif);
    225 
    226 		sp = DecoderState(tif);
    227 	}
    228 
    229 	assert(sp != NULL);
    230 
    231 	if (sp->dec_codetab == NULL) {
    232 		sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t));
    233 		if (sp->dec_codetab == NULL) {
    234 			TIFFErrorExt(tif->tif_clientdata, module,
    235 				     "No space for LZW code table");
    236 			return (0);
    237 		}
    238 		/*
    239 		 * Pre-load the table.
    240 		 */
    241 		code = 255;
    242 		do {
    243 			sp->dec_codetab[code].value = (unsigned char)code;
    244 			sp->dec_codetab[code].firstchar = (unsigned char)code;
    245 			sp->dec_codetab[code].length = 1;
    246 			sp->dec_codetab[code].next = NULL;
    247 		} while (code--);
    248 		/*
    249 		 * Zero-out the unused entries
    250                  */
    251                  _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0,
    252 			     (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
    253 	}
    254 	return (1);
    255 }
    256 
    257 /*
    258  * Setup state for decoding a strip.
    259  */
    260 static int
    261 LZWPreDecode(TIFF* tif, uint16 s)
    262 {
    263 	static const char module[] = "LZWPreDecode";
    264 	LZWCodecState *sp = DecoderState(tif);
    265 
    266 	(void) s;
    267 	assert(sp != NULL);
    268 	if( sp->dec_codetab == NULL )
    269         {
    270             tif->tif_setupdecode( tif );
    271 	    if( sp->dec_codetab == NULL )
    272 		return (0);
    273         }
    274 
    275 	/*
    276 	 * Check for old bit-reversed codes.
    277 	 */
    278 	if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) {
    279 #ifdef LZW_COMPAT
    280 		if (!sp->dec_decode) {
    281 			TIFFWarningExt(tif->tif_clientdata, module,
    282 			    "Old-style LZW codes, convert file");
    283 			/*
    284 			 * Override default decoding methods with
    285 			 * ones that deal with the old coding.
    286 			 * Otherwise the predictor versions set
    287 			 * above will call the compatibility routines
    288 			 * through the dec_decode method.
    289 			 */
    290 			tif->tif_decoderow = LZWDecodeCompat;
    291 			tif->tif_decodestrip = LZWDecodeCompat;
    292 			tif->tif_decodetile = LZWDecodeCompat;
    293 			/*
    294 			 * If doing horizontal differencing, must
    295 			 * re-setup the predictor logic since we
    296 			 * switched the basic decoder methods...
    297 			 */
    298 			(*tif->tif_setupdecode)(tif);
    299 			sp->dec_decode = LZWDecodeCompat;
    300 		}
    301 		sp->lzw_maxcode = MAXCODE(BITS_MIN);
    302 #else /* !LZW_COMPAT */
    303 		if (!sp->dec_decode) {
    304 			TIFFErrorExt(tif->tif_clientdata, module,
    305 			    "Old-style LZW codes not supported");
    306 			sp->dec_decode = LZWDecode;
    307 		}
    308 		return (0);
    309 #endif/* !LZW_COMPAT */
    310 	} else {
    311 		sp->lzw_maxcode = MAXCODE(BITS_MIN)-1;
    312 		sp->dec_decode = LZWDecode;
    313 	}
    314 	sp->lzw_nbits = BITS_MIN;
    315 	sp->lzw_nextbits = 0;
    316 	sp->lzw_nextdata = 0;
    317 
    318 	sp->dec_restart = 0;
    319 	sp->dec_nbitsmask = MAXCODE(BITS_MIN);
    320 #ifdef LZW_CHECKEOS
    321 	sp->dec_bitsleft = 0;
    322 #endif
    323 	sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
    324 	/*
    325 	 * Zero entries that are not yet filled in.  We do
    326 	 * this to guard against bogus input data that causes
    327 	 * us to index into undefined entries.  If you can
    328 	 * come up with a way to safely bounds-check input codes
    329 	 * while decoding then you can remove this operation.
    330 	 */
    331 	_TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
    332 	sp->dec_oldcodep = &sp->dec_codetab[-1];
    333 	sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
    334 	return (1);
    335 }
    336 
    337 /*
    338  * Decode a "hunk of data".
    339  */
    340 #define	GetNextCode(sp, bp, code) {				\
    341 	nextdata = (nextdata<<8) | *(bp)++;			\
    342 	nextbits += 8;						\
    343 	if (nextbits < nbits) {					\
    344 		nextdata = (nextdata<<8) | *(bp)++;		\
    345 		nextbits += 8;					\
    346 	}							\
    347 	code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask);	\
    348 	nextbits -= nbits;					\
    349 }
    350 
    351 static void
    352 codeLoop(TIFF* tif, const char* module)
    353 {
    354 	TIFFErrorExt(tif->tif_clientdata, module,
    355 	    "Bogus encoding, loop in the code table; scanline %d",
    356 	    tif->tif_row);
    357 }
    358 
    359 static int
    360 LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
    361 {
    362 	static const char module[] = "LZWDecode";
    363 	LZWCodecState *sp = DecoderState(tif);
    364 	char *op = (char*) op0;
    365 	long occ = (long) occ0;
    366 	char *tp;
    367 	unsigned char *bp;
    368 	hcode_t code;
    369 	int len;
    370 	long nbits, nextbits, nbitsmask;
    371         unsigned long nextdata;
    372 	code_t *codep, *free_entp, *maxcodep, *oldcodep;
    373 
    374 	(void) s;
    375 	assert(sp != NULL);
    376         assert(sp->dec_codetab != NULL);
    377 
    378 	/*
    379 	  Fail if value does not fit in long.
    380 	*/
    381 	if ((tmsize_t) occ != occ0)
    382 	        return (0);
    383 	/*
    384 	 * Restart interrupted output operation.
    385 	 */
    386 	if (sp->dec_restart) {
    387 		long residue;
    388 
    389 		codep = sp->dec_codep;
    390 		residue = codep->length - sp->dec_restart;
    391 		if (residue > occ) {
    392 			/*
    393 			 * Residue from previous decode is sufficient
    394 			 * to satisfy decode request.  Skip to the
    395 			 * start of the decoded string, place decoded
    396 			 * values in the output buffer, and return.
    397 			 */
    398 			sp->dec_restart += occ;
    399 			do {
    400 				codep = codep->next;
    401 			} while (--residue > occ && codep);
    402 			if (codep) {
    403 				tp = op + occ;
    404 				do {
    405 					*--tp = codep->value;
    406 					codep = codep->next;
    407 				} while (--occ && codep);
    408 			}
    409 			return (1);
    410 		}
    411 		/*
    412 		 * Residue satisfies only part of the decode request.
    413 		 */
    414 		op += residue;
    415 		occ -= residue;
    416 		tp = op;
    417 		do {
    418 			int t;
    419 			--tp;
    420 			t = codep->value;
    421 			codep = codep->next;
    422 			*tp = (char)t;
    423 		} while (--residue && codep);
    424 		sp->dec_restart = 0;
    425 	}
    426 
    427 	bp = (unsigned char *)tif->tif_rawcp;
    428 #ifdef LZW_CHECKEOS
    429 	sp->dec_bitsleft = (((uint64)tif->tif_rawcc) << 3);
    430 #endif
    431 	nbits = sp->lzw_nbits;
    432 	nextdata = sp->lzw_nextdata;
    433 	nextbits = sp->lzw_nextbits;
    434 	nbitsmask = sp->dec_nbitsmask;
    435 	oldcodep = sp->dec_oldcodep;
    436 	free_entp = sp->dec_free_entp;
    437 	maxcodep = sp->dec_maxcodep;
    438 
    439 	while (occ > 0) {
    440 		NextCode(tif, sp, bp, code, GetNextCode);
    441 		if (code == CODE_EOI)
    442 			break;
    443 		if (code == CODE_CLEAR) {
    444 			do {
    445 				free_entp = sp->dec_codetab + CODE_FIRST;
    446 				_TIFFmemset(free_entp, 0,
    447 					    (CSIZE - CODE_FIRST) * sizeof (code_t));
    448 				nbits = BITS_MIN;
    449 				nbitsmask = MAXCODE(BITS_MIN);
    450 				maxcodep = sp->dec_codetab + nbitsmask-1;
    451 				NextCode(tif, sp, bp, code, GetNextCode);
    452 			} while (code == CODE_CLEAR);	/* consecutive CODE_CLEAR codes */
    453 			if (code == CODE_EOI)
    454 				break;
    455 			if (code > CODE_CLEAR) {
    456 				TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
    457 				"LZWDecode: Corrupted LZW table at scanline %d",
    458 					     tif->tif_row);
    459 				return (0);
    460 			}
    461 			*op++ = (char)code;
    462 			occ--;
    463 			oldcodep = sp->dec_codetab + code;
    464 			continue;
    465 		}
    466 		codep = sp->dec_codetab + code;
    467 
    468 		/*
    469 		 * Add the new entry to the code table.
    470 		 */
    471 		if (free_entp < &sp->dec_codetab[0] ||
    472 		    free_entp >= &sp->dec_codetab[CSIZE]) {
    473 			TIFFErrorExt(tif->tif_clientdata, module,
    474 			    "Corrupted LZW table at scanline %d",
    475 			    tif->tif_row);
    476 			return (0);
    477 		}
    478 
    479 		free_entp->next = oldcodep;
    480 		if (free_entp->next < &sp->dec_codetab[0] ||
    481 		    free_entp->next >= &sp->dec_codetab[CSIZE]) {
    482 			TIFFErrorExt(tif->tif_clientdata, module,
    483 			    "Corrupted LZW table at scanline %d",
    484 			    tif->tif_row);
    485 			return (0);
    486 		}
    487 		free_entp->firstchar = free_entp->next->firstchar;
    488 		free_entp->length = free_entp->next->length+1;
    489 		free_entp->value = (codep < free_entp) ?
    490 		    codep->firstchar : free_entp->firstchar;
    491 		if (++free_entp > maxcodep) {
    492 			if (++nbits > BITS_MAX)		/* should not happen */
    493 				nbits = BITS_MAX;
    494 			nbitsmask = MAXCODE(nbits);
    495 			maxcodep = sp->dec_codetab + nbitsmask-1;
    496 		}
    497 		oldcodep = codep;
    498 		if (code >= 256) {
    499 			/*
    500 			 * Code maps to a string, copy string
    501 			 * value to output (written in reverse).
    502 			 */
    503 			if(codep->length == 0) {
    504 				TIFFErrorExt(tif->tif_clientdata, module,
    505 				    "Wrong length of decoded string: "
    506 				    "data probably corrupted at scanline %d",
    507 				    tif->tif_row);
    508 				return (0);
    509 			}
    510 			if (codep->length > occ) {
    511 				/*
    512 				 * String is too long for decode buffer,
    513 				 * locate portion that will fit, copy to
    514 				 * the decode buffer, and setup restart
    515 				 * logic for the next decoding call.
    516 				 */
    517 				sp->dec_codep = codep;
    518 				do {
    519 					codep = codep->next;
    520 				} while (codep && codep->length > occ);
    521 				if (codep) {
    522 					sp->dec_restart = (long)occ;
    523 					tp = op + occ;
    524 					do  {
    525 						*--tp = codep->value;
    526 						codep = codep->next;
    527 					}  while (--occ && codep);
    528 					if (codep)
    529 						codeLoop(tif, module);
    530 				}
    531 				break;
    532 			}
    533 			len = codep->length;
    534 			tp = op + len;
    535 			do {
    536 				int t;
    537 				--tp;
    538 				t = codep->value;
    539 				codep = codep->next;
    540 				*tp = (char)t;
    541 			} while (codep && tp > op);
    542 			if (codep) {
    543 			    codeLoop(tif, module);
    544 			    break;
    545 			}
    546 			assert(occ >= len);
    547 			op += len;
    548 			occ -= len;
    549 		} else {
    550 			*op++ = (char)code;
    551 			occ--;
    552 		}
    553 	}
    554 
    555 	tif->tif_rawcc -= (tmsize_t)( (uint8*) bp - tif->tif_rawcp );
    556 	tif->tif_rawcp = (uint8*) bp;
    557 	sp->lzw_nbits = (unsigned short) nbits;
    558 	sp->lzw_nextdata = nextdata;
    559 	sp->lzw_nextbits = nextbits;
    560 	sp->dec_nbitsmask = nbitsmask;
    561 	sp->dec_oldcodep = oldcodep;
    562 	sp->dec_free_entp = free_entp;
    563 	sp->dec_maxcodep = maxcodep;
    564 
    565 	if (occ > 0) {
    566 #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
    567 		TIFFErrorExt(tif->tif_clientdata, module,
    568 			"Not enough data at scanline %d (short %I64d bytes)",
    569 			     tif->tif_row, (unsigned __int64) occ);
    570 #else
    571 		TIFFErrorExt(tif->tif_clientdata, module,
    572 			"Not enough data at scanline %d (short %llu bytes)",
    573 			     tif->tif_row, (unsigned long long) occ);
    574 #endif
    575 		return (0);
    576 	}
    577 	return (1);
    578 }
    579 
    580 #ifdef LZW_COMPAT
    581 /*
    582  * Decode a "hunk of data" for old images.
    583  */
    584 #define	GetNextCodeCompat(sp, bp, code) {			\
    585 	nextdata |= (unsigned long) *(bp)++ << nextbits;	\
    586 	nextbits += 8;						\
    587 	if (nextbits < nbits) {					\
    588 		nextdata |= (unsigned long) *(bp)++ << nextbits;\
    589 		nextbits += 8;					\
    590 	}							\
    591 	code = (hcode_t)(nextdata & nbitsmask);			\
    592 	nextdata >>= nbits;					\
    593 	nextbits -= nbits;					\
    594 }
    595 
    596 static int
    597 LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
    598 {
    599 	static const char module[] = "LZWDecodeCompat";
    600 	LZWCodecState *sp = DecoderState(tif);
    601 	char *op = (char*) op0;
    602 	long occ = (long) occ0;
    603 	char *tp;
    604 	unsigned char *bp;
    605 	int code, nbits;
    606 	long nextbits, nextdata, nbitsmask;
    607 	code_t *codep, *free_entp, *maxcodep, *oldcodep;
    608 
    609 	(void) s;
    610 	assert(sp != NULL);
    611 
    612 	/*
    613 	  Fail if value does not fit in long.
    614 	*/
    615 	if ((tmsize_t) occ != occ0)
    616 	        return (0);
    617 
    618 	/*
    619 	 * Restart interrupted output operation.
    620 	 */
    621 	if (sp->dec_restart) {
    622 		long residue;
    623 
    624 		codep = sp->dec_codep;
    625 		residue = codep->length - sp->dec_restart;
    626 		if (residue > occ) {
    627 			/*
    628 			 * Residue from previous decode is sufficient
    629 			 * to satisfy decode request.  Skip to the
    630 			 * start of the decoded string, place decoded
    631 			 * values in the output buffer, and return.
    632 			 */
    633 			sp->dec_restart += occ;
    634 			do {
    635 				codep = codep->next;
    636 			} while (--residue > occ);
    637 			tp = op + occ;
    638 			do {
    639 				*--tp = codep->value;
    640 				codep = codep->next;
    641 			} while (--occ);
    642 			return (1);
    643 		}
    644 		/*
    645 		 * Residue satisfies only part of the decode request.
    646 		 */
    647 		op += residue;
    648 		occ -= residue;
    649 		tp = op;
    650 		do {
    651 			*--tp = codep->value;
    652 			codep = codep->next;
    653 		} while (--residue);
    654 		sp->dec_restart = 0;
    655 	}
    656 
    657 	bp = (unsigned char *)tif->tif_rawcp;
    658 	nbits = sp->lzw_nbits;
    659 	nextdata = sp->lzw_nextdata;
    660 	nextbits = sp->lzw_nextbits;
    661 	nbitsmask = sp->dec_nbitsmask;
    662 	oldcodep = sp->dec_oldcodep;
    663 	free_entp = sp->dec_free_entp;
    664 	maxcodep = sp->dec_maxcodep;
    665 
    666 	while (occ > 0) {
    667 		NextCode(tif, sp, bp, code, GetNextCodeCompat);
    668 		if (code == CODE_EOI)
    669 			break;
    670 		if (code == CODE_CLEAR) {
    671 			do {
    672 				free_entp = sp->dec_codetab + CODE_FIRST;
    673 				_TIFFmemset(free_entp, 0,
    674 					    (CSIZE - CODE_FIRST) * sizeof (code_t));
    675 				nbits = BITS_MIN;
    676 				nbitsmask = MAXCODE(BITS_MIN);
    677 				maxcodep = sp->dec_codetab + nbitsmask;
    678 				NextCode(tif, sp, bp, code, GetNextCodeCompat);
    679 			} while (code == CODE_CLEAR);	/* consecutive CODE_CLEAR codes */
    680 			if (code == CODE_EOI)
    681 				break;
    682 			if (code > CODE_CLEAR) {
    683 				TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
    684 				"LZWDecode: Corrupted LZW table at scanline %d",
    685 					     tif->tif_row);
    686 				return (0);
    687 			}
    688 			*op++ = (char)code;
    689 			occ--;
    690 			oldcodep = sp->dec_codetab + code;
    691 			continue;
    692 		}
    693 		codep = sp->dec_codetab + code;
    694 
    695 		/*
    696 		 * Add the new entry to the code table.
    697 		 */
    698 		if (free_entp < &sp->dec_codetab[0] ||
    699 		    free_entp >= &sp->dec_codetab[CSIZE]) {
    700 			TIFFErrorExt(tif->tif_clientdata, module,
    701 			    "Corrupted LZW table at scanline %d", tif->tif_row);
    702 			return (0);
    703 		}
    704 
    705 		free_entp->next = oldcodep;
    706 		if (free_entp->next < &sp->dec_codetab[0] ||
    707 		    free_entp->next >= &sp->dec_codetab[CSIZE]) {
    708 			TIFFErrorExt(tif->tif_clientdata, module,
    709 			    "Corrupted LZW table at scanline %d", tif->tif_row);
    710 			return (0);
    711 		}
    712 		free_entp->firstchar = free_entp->next->firstchar;
    713 		free_entp->length = free_entp->next->length+1;
    714 		free_entp->value = (codep < free_entp) ?
    715 		    codep->firstchar : free_entp->firstchar;
    716 		if (++free_entp > maxcodep) {
    717 			if (++nbits > BITS_MAX)		/* should not happen */
    718 				nbits = BITS_MAX;
    719 			nbitsmask = MAXCODE(nbits);
    720 			maxcodep = sp->dec_codetab + nbitsmask;
    721 		}
    722 		oldcodep = codep;
    723 		if (code >= 256) {
    724 			/*
    725 			 * Code maps to a string, copy string
    726 			 * value to output (written in reverse).
    727 			 */
    728 			if(codep->length == 0) {
    729 				TIFFErrorExt(tif->tif_clientdata, module,
    730 				    "Wrong length of decoded "
    731 				    "string: data probably corrupted at scanline %d",
    732 				    tif->tif_row);
    733 				return (0);
    734 			}
    735 			if (codep->length > occ) {
    736 				/*
    737 				 * String is too long for decode buffer,
    738 				 * locate portion that will fit, copy to
    739 				 * the decode buffer, and setup restart
    740 				 * logic for the next decoding call.
    741 				 */
    742 				sp->dec_codep = codep;
    743 				do {
    744 					codep = codep->next;
    745 				} while (codep->length > occ);
    746 				sp->dec_restart = occ;
    747 				tp = op + occ;
    748 				do  {
    749 					*--tp = codep->value;
    750 					codep = codep->next;
    751 				}  while (--occ);
    752 				break;
    753 			}
    754 			assert(occ >= codep->length);
    755 			op += codep->length;
    756 			occ -= codep->length;
    757 			tp = op;
    758 			do {
    759 				*--tp = codep->value;
    760 			} while( (codep = codep->next) != NULL );
    761 		} else {
    762 			*op++ = (char)code;
    763 			occ--;
    764 		}
    765 	}
    766 
    767 	tif->tif_rawcp = (uint8*) bp;
    768 	sp->lzw_nbits = (unsigned short)nbits;
    769 	sp->lzw_nextdata = nextdata;
    770 	sp->lzw_nextbits = nextbits;
    771 	sp->dec_nbitsmask = nbitsmask;
    772 	sp->dec_oldcodep = oldcodep;
    773 	sp->dec_free_entp = free_entp;
    774 	sp->dec_maxcodep = maxcodep;
    775 
    776 	if (occ > 0) {
    777 #if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
    778 		TIFFErrorExt(tif->tif_clientdata, module,
    779 			"Not enough data at scanline %d (short %I64d bytes)",
    780 			     tif->tif_row, (unsigned __int64) occ);
    781 #else
    782 		TIFFErrorExt(tif->tif_clientdata, module,
    783 			"Not enough data at scanline %d (short %llu bytes)",
    784 			     tif->tif_row, (unsigned long long) occ);
    785 #endif
    786 		return (0);
    787 	}
    788 	return (1);
    789 }
    790 #endif /* LZW_COMPAT */
    791 
    792 /*
    793  * LZW Encoding.
    794  */
    795 
    796 static int
    797 LZWSetupEncode(TIFF* tif)
    798 {
    799 	static const char module[] = "LZWSetupEncode";
    800 	LZWCodecState* sp = EncoderState(tif);
    801 
    802 	assert(sp != NULL);
    803 	sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t));
    804 	if (sp->enc_hashtab == NULL) {
    805 		TIFFErrorExt(tif->tif_clientdata, module,
    806 			     "No space for LZW hash table");
    807 		return (0);
    808 	}
    809 	return (1);
    810 }
    811 
    812 /*
    813  * Reset encoding state at the start of a strip.
    814  */
    815 static int
    816 LZWPreEncode(TIFF* tif, uint16 s)
    817 {
    818 	LZWCodecState *sp = EncoderState(tif);
    819 
    820 	(void) s;
    821 	assert(sp != NULL);
    822 
    823 	if( sp->enc_hashtab == NULL )
    824         {
    825             tif->tif_setupencode( tif );
    826         }
    827 
    828 	sp->lzw_nbits = BITS_MIN;
    829 	sp->lzw_maxcode = MAXCODE(BITS_MIN);
    830 	sp->lzw_free_ent = CODE_FIRST;
    831 	sp->lzw_nextbits = 0;
    832 	sp->lzw_nextdata = 0;
    833 	sp->enc_checkpoint = CHECK_GAP;
    834 	sp->enc_ratio = 0;
    835 	sp->enc_incount = 0;
    836 	sp->enc_outcount = 0;
    837 	/*
    838 	 * The 4 here insures there is space for 2 max-sized
    839 	 * codes in LZWEncode and LZWPostDecode.
    840 	 */
    841 	sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4;
    842 	cl_hash(sp);		/* clear hash table */
    843 	sp->enc_oldcode = (hcode_t) -1;	/* generates CODE_CLEAR in LZWEncode */
    844 	return (1);
    845 }
    846 
    847 #define	CALCRATIO(sp, rat) {					\
    848 	if (incount > 0x007fffff) { /* NB: shift will overflow */\
    849 		rat = outcount >> 8;				\
    850 		rat = (rat == 0 ? 0x7fffffff : incount/rat);	\
    851 	} else							\
    852 		rat = (incount<<8) / outcount;			\
    853 }
    854 
    855 /* Explicit 0xff masking to make icc -check=conversions happy */
    856 #define	PutNextCode(op, c) {					\
    857 	nextdata = (nextdata << nbits) | c;			\
    858 	nextbits += nbits;					\
    859 	*op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff);		\
    860 	nextbits -= 8;						\
    861 	if (nextbits >= 8) {					\
    862 		*op++ = (unsigned char)((nextdata >> (nextbits-8))&0xff);	\
    863 		nextbits -= 8;					\
    864 	}							\
    865 	outcount += nbits;					\
    866 }
    867 
    868 /*
    869  * Encode a chunk of pixels.
    870  *
    871  * Uses an open addressing double hashing (no chaining) on the
    872  * prefix code/next character combination.  We do a variant of
    873  * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
    874  * relatively-prime secondary probe.  Here, the modular division
    875  * first probe is gives way to a faster exclusive-or manipulation.
    876  * Also do block compression with an adaptive reset, whereby the
    877  * code table is cleared when the compression ratio decreases,
    878  * but after the table fills.  The variable-length output codes
    879  * are re-sized at this point, and a CODE_CLEAR is generated
    880  * for the decoder.
    881  */
    882 static int
    883 LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s)
    884 {
    885 	register LZWCodecState *sp = EncoderState(tif);
    886 	register long fcode;
    887 	register hash_t *hp;
    888 	register int h, c;
    889 	hcode_t ent;
    890 	long disp;
    891 	long incount, outcount, checkpoint;
    892 	unsigned long nextdata;
    893         long nextbits;
    894 	int free_ent, maxcode, nbits;
    895 	uint8* op;
    896 	uint8* limit;
    897 
    898 	(void) s;
    899 	if (sp == NULL)
    900 		return (0);
    901 
    902         assert(sp->enc_hashtab != NULL);
    903 
    904 	/*
    905 	 * Load local state.
    906 	 */
    907 	incount = sp->enc_incount;
    908 	outcount = sp->enc_outcount;
    909 	checkpoint = sp->enc_checkpoint;
    910 	nextdata = sp->lzw_nextdata;
    911 	nextbits = sp->lzw_nextbits;
    912 	free_ent = sp->lzw_free_ent;
    913 	maxcode = sp->lzw_maxcode;
    914 	nbits = sp->lzw_nbits;
    915 	op = tif->tif_rawcp;
    916 	limit = sp->enc_rawlimit;
    917 	ent = (hcode_t)sp->enc_oldcode;
    918 
    919 	if (ent == (hcode_t) -1 && cc > 0) {
    920 		/*
    921 		 * NB: This is safe because it can only happen
    922 		 *     at the start of a strip where we know there
    923 		 *     is space in the data buffer.
    924 		 */
    925 		PutNextCode(op, CODE_CLEAR);
    926 		ent = *bp++; cc--; incount++;
    927 	}
    928 	while (cc > 0) {
    929 		c = *bp++; cc--; incount++;
    930 		fcode = ((long)c << BITS_MAX) + ent;
    931 		h = (c << HSHIFT) ^ ent;	/* xor hashing */
    932 #ifdef _WINDOWS
    933 		/*
    934 		 * Check hash index for an overflow.
    935 		 */
    936 		if (h >= HSIZE)
    937 			h -= HSIZE;
    938 #endif
    939 		hp = &sp->enc_hashtab[h];
    940 		if (hp->hash == fcode) {
    941 			ent = hp->code;
    942 			continue;
    943 		}
    944 		if (hp->hash >= 0) {
    945 			/*
    946 			 * Primary hash failed, check secondary hash.
    947 			 */
    948 			disp = HSIZE - h;
    949 			if (h == 0)
    950 				disp = 1;
    951 			do {
    952 				/*
    953 				 * Avoid pointer arithmetic because of
    954 				 * wraparound problems with segments.
    955 				 */
    956 				if ((h -= disp) < 0)
    957 					h += HSIZE;
    958 				hp = &sp->enc_hashtab[h];
    959 				if (hp->hash == fcode) {
    960 					ent = hp->code;
    961 					goto hit;
    962 				}
    963 			} while (hp->hash >= 0);
    964 		}
    965 		/*
    966 		 * New entry, emit code and add to table.
    967 		 */
    968 		/*
    969 		 * Verify there is space in the buffer for the code
    970 		 * and any potential Clear code that might be emitted
    971 		 * below.  The value of limit is setup so that there
    972 		 * are at least 4 bytes free--room for 2 codes.
    973 		 */
    974 		if (op > limit) {
    975 			tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
    976 			if( !TIFFFlushData1(tif) )
    977                             return 0;
    978 			op = tif->tif_rawdata;
    979 		}
    980 		PutNextCode(op, ent);
    981 		ent = (hcode_t)c;
    982 		hp->code = (hcode_t)(free_ent++);
    983 		hp->hash = fcode;
    984 		if (free_ent == CODE_MAX-1) {
    985 			/* table is full, emit clear code and reset */
    986 			cl_hash(sp);
    987 			sp->enc_ratio = 0;
    988 			incount = 0;
    989 			outcount = 0;
    990 			free_ent = CODE_FIRST;
    991 			PutNextCode(op, CODE_CLEAR);
    992 			nbits = BITS_MIN;
    993 			maxcode = MAXCODE(BITS_MIN);
    994 		} else {
    995 			/*
    996 			 * If the next entry is going to be too big for
    997 			 * the code size, then increase it, if possible.
    998 			 */
    999 			if (free_ent > maxcode) {
   1000 				nbits++;
   1001 				assert(nbits <= BITS_MAX);
   1002 				maxcode = (int) MAXCODE(nbits);
   1003 			} else if (incount >= checkpoint) {
   1004 				long rat;
   1005 				/*
   1006 				 * Check compression ratio and, if things seem
   1007 				 * to be slipping, clear the hash table and
   1008 				 * reset state.  The compression ratio is a
   1009 				 * 24+8-bit fractional number.
   1010 				 */
   1011 				checkpoint = incount+CHECK_GAP;
   1012 				CALCRATIO(sp, rat);
   1013 				if (rat <= sp->enc_ratio) {
   1014 					cl_hash(sp);
   1015 					sp->enc_ratio = 0;
   1016 					incount = 0;
   1017 					outcount = 0;
   1018 					free_ent = CODE_FIRST;
   1019 					PutNextCode(op, CODE_CLEAR);
   1020 					nbits = BITS_MIN;
   1021 					maxcode = MAXCODE(BITS_MIN);
   1022 				} else
   1023 					sp->enc_ratio = rat;
   1024 			}
   1025 		}
   1026 	hit:
   1027 		;
   1028 	}
   1029 
   1030 	/*
   1031 	 * Restore global state.
   1032 	 */
   1033 	sp->enc_incount = incount;
   1034 	sp->enc_outcount = outcount;
   1035 	sp->enc_checkpoint = checkpoint;
   1036 	sp->enc_oldcode = ent;
   1037 	sp->lzw_nextdata = nextdata;
   1038 	sp->lzw_nextbits = nextbits;
   1039 	sp->lzw_free_ent = (unsigned short)free_ent;
   1040 	sp->lzw_maxcode = (unsigned short)maxcode;
   1041 	sp->lzw_nbits = (unsigned short)nbits;
   1042 	tif->tif_rawcp = op;
   1043 	return (1);
   1044 }
   1045 
   1046 /*
   1047  * Finish off an encoded strip by flushing the last
   1048  * string and tacking on an End Of Information code.
   1049  */
   1050 static int
   1051 LZWPostEncode(TIFF* tif)
   1052 {
   1053 	register LZWCodecState *sp = EncoderState(tif);
   1054 	uint8* op = tif->tif_rawcp;
   1055 	long nextbits = sp->lzw_nextbits;
   1056 	unsigned long nextdata = sp->lzw_nextdata;
   1057 	long outcount = sp->enc_outcount;
   1058 	int nbits = sp->lzw_nbits;
   1059 
   1060 	if (op > sp->enc_rawlimit) {
   1061 		tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
   1062 		if( !TIFFFlushData1(tif) )
   1063                     return 0;
   1064 		op = tif->tif_rawdata;
   1065 	}
   1066 	if (sp->enc_oldcode != (hcode_t) -1) {
   1067                 int free_ent = sp->lzw_free_ent;
   1068 
   1069 		PutNextCode(op, sp->enc_oldcode);
   1070 		sp->enc_oldcode = (hcode_t) -1;
   1071                 free_ent ++;
   1072 
   1073                 if (free_ent == CODE_MAX-1) {
   1074                         /* table is full, emit clear code and reset */
   1075                         outcount = 0;
   1076                         PutNextCode(op, CODE_CLEAR);
   1077                         nbits = BITS_MIN;
   1078                 } else {
   1079                         /*
   1080                         * If the next entry is going to be too big for
   1081                         * the code size, then increase it, if possible.
   1082                         */
   1083                         if (free_ent > sp->lzw_maxcode) {
   1084                                 nbits++;
   1085                                 assert(nbits <= BITS_MAX);
   1086                         }
   1087                 }
   1088 	}
   1089 	PutNextCode(op, CODE_EOI);
   1090         /* Explicit 0xff masking to make icc -check=conversions happy */
   1091 	if (nextbits > 0)
   1092 		*op++ = (unsigned char)((nextdata << (8-nextbits))&0xff);
   1093 	tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
   1094 	return (1);
   1095 }
   1096 
   1097 /*
   1098  * Reset encoding hash table.
   1099  */
   1100 static void
   1101 cl_hash(LZWCodecState* sp)
   1102 {
   1103 	register hash_t *hp = &sp->enc_hashtab[HSIZE-1];
   1104 	register long i = HSIZE-8;
   1105 
   1106 	do {
   1107 		i -= 8;
   1108 		hp[-7].hash = -1;
   1109 		hp[-6].hash = -1;
   1110 		hp[-5].hash = -1;
   1111 		hp[-4].hash = -1;
   1112 		hp[-3].hash = -1;
   1113 		hp[-2].hash = -1;
   1114 		hp[-1].hash = -1;
   1115 		hp[ 0].hash = -1;
   1116 		hp -= 8;
   1117 	} while (i >= 0);
   1118 	for (i += 8; i > 0; i--, hp--)
   1119 		hp->hash = -1;
   1120 }
   1121 
   1122 static void
   1123 LZWCleanup(TIFF* tif)
   1124 {
   1125 	(void)TIFFPredictorCleanup(tif);
   1126 
   1127 	assert(tif->tif_data != 0);
   1128 
   1129 	if (DecoderState(tif)->dec_codetab)
   1130 		_TIFFfree(DecoderState(tif)->dec_codetab);
   1131 
   1132 	if (EncoderState(tif)->enc_hashtab)
   1133 		_TIFFfree(EncoderState(tif)->enc_hashtab);
   1134 
   1135 	_TIFFfree(tif->tif_data);
   1136 	tif->tif_data = NULL;
   1137 
   1138 	_TIFFSetDefaultCompressionState(tif);
   1139 }
   1140 
   1141 int
   1142 TIFFInitLZW(TIFF* tif, int scheme)
   1143 {
   1144 	static const char module[] = "TIFFInitLZW";
   1145 	assert(scheme == COMPRESSION_LZW);
   1146 	/*
   1147 	 * Allocate state block so tag methods have storage to record values.
   1148 	 */
   1149 	tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState));
   1150 	if (tif->tif_data == NULL)
   1151 		goto bad;
   1152 	DecoderState(tif)->dec_codetab = NULL;
   1153 	DecoderState(tif)->dec_decode = NULL;
   1154 	EncoderState(tif)->enc_hashtab = NULL;
   1155         LZWState(tif)->rw_mode = tif->tif_mode;
   1156 
   1157 	/*
   1158 	 * Install codec methods.
   1159 	 */
   1160 	tif->tif_fixuptags = LZWFixupTags;
   1161 	tif->tif_setupdecode = LZWSetupDecode;
   1162 	tif->tif_predecode = LZWPreDecode;
   1163 	tif->tif_decoderow = LZWDecode;
   1164 	tif->tif_decodestrip = LZWDecode;
   1165 	tif->tif_decodetile = LZWDecode;
   1166 	tif->tif_setupencode = LZWSetupEncode;
   1167 	tif->tif_preencode = LZWPreEncode;
   1168 	tif->tif_postencode = LZWPostEncode;
   1169 	tif->tif_encoderow = LZWEncode;
   1170 	tif->tif_encodestrip = LZWEncode;
   1171 	tif->tif_encodetile = LZWEncode;
   1172 	tif->tif_cleanup = LZWCleanup;
   1173 	/*
   1174 	 * Setup predictor setup.
   1175 	 */
   1176 	(void) TIFFPredictorInit(tif);
   1177 	return (1);
   1178 bad:
   1179 	TIFFErrorExt(tif->tif_clientdata, module,
   1180 		     "No space for LZW state block");
   1181 	return (0);
   1182 }
   1183 
   1184 /*
   1185  * Copyright (c) 1985, 1986 The Regents of the University of California.
   1186  * All rights reserved.
   1187  *
   1188  * This code is derived from software contributed to Berkeley by
   1189  * James A. Woods, derived from original work by Spencer Thomas
   1190  * and Joseph Orost.
   1191  *
   1192  * Redistribution and use in source and binary forms are permitted
   1193  * provided that the above copyright notice and this paragraph are
   1194  * duplicated in all such forms and that any documentation,
   1195  * advertising materials, and other materials related to such
   1196  * distribution and use acknowledge that the software was developed
   1197  * by the University of California, Berkeley.  The name of the
   1198  * University may not be used to endorse or promote products derived
   1199  * from this software without specific prior written permission.
   1200  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
   1201  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
   1202  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
   1203  */
   1204 #endif /* LZW_SUPPORT */
   1205 
   1206 /* vim: set ts=8 sts=8 sw=8 noet: */
   1207 /*
   1208  * Local Variables:
   1209  * mode: c
   1210  * c-basic-offset: 8
   1211  * fill-column: 78
   1212  * End:
   1213  */
   1214