Home | History | Annotate | Download | only in flate
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package flate
      6 
      7 import (
      8 	"fmt"
      9 	"io"
     10 	"math"
     11 )
     12 
     13 const (
     14 	NoCompression      = 0
     15 	BestSpeed          = 1
     16 	fastCompression    = 3
     17 	BestCompression    = 9
     18 	DefaultCompression = -1
     19 	logWindowSize      = 15
     20 	windowSize         = 1 << logWindowSize
     21 	windowMask         = windowSize - 1
     22 	logMaxOffsetSize   = 15  // Standard DEFLATE
     23 	minMatchLength     = 3   // The smallest match that the compressor looks for
     24 	maxMatchLength     = 258 // The longest match for the compressor
     25 	minOffsetSize      = 1   // The shortest offset that makes any sense
     26 
     27 	// The maximum number of tokens we put into a single flat block, just to
     28 	// stop things from getting too large.
     29 	maxFlateBlockTokens = 1 << 14
     30 	maxStoreBlockSize   = 65535
     31 	hashBits            = 17
     32 	hashSize            = 1 << hashBits
     33 	hashMask            = (1 << hashBits) - 1
     34 	hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
     35 	maxHashOffset       = 1 << 24
     36 
     37 	skipNever = math.MaxInt32
     38 )
     39 
     40 type compressionLevel struct {
     41 	good, lazy, nice, chain, fastSkipHashing int
     42 }
     43 
     44 var levels = []compressionLevel{
     45 	{}, // 0
     46 	// For levels 1-3 we don't bother trying with lazy matches
     47 	{3, 0, 8, 4, 4},
     48 	{3, 0, 16, 8, 5},
     49 	{3, 0, 32, 32, 6},
     50 	// Levels 4-9 use increasingly more lazy matching
     51 	// and increasingly stringent conditions for "good enough".
     52 	{4, 4, 16, 16, skipNever},
     53 	{8, 16, 32, 32, skipNever},
     54 	{8, 16, 128, 128, skipNever},
     55 	{8, 32, 128, 256, skipNever},
     56 	{32, 128, 258, 1024, skipNever},
     57 	{32, 258, 258, 4096, skipNever},
     58 }
     59 
     60 type compressor struct {
     61 	compressionLevel
     62 
     63 	w *huffmanBitWriter
     64 
     65 	// compression algorithm
     66 	fill func(*compressor, []byte) int // copy data to window
     67 	step func(*compressor)             // process window
     68 	sync bool                          // requesting flush
     69 
     70 	// Input hash chains
     71 	// hashHead[hashValue] contains the largest inputIndex with the specified hash value
     72 	// If hashHead[hashValue] is within the current window, then
     73 	// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
     74 	// with the same hash value.
     75 	chainHead  int
     76 	hashHead   []int
     77 	hashPrev   []int
     78 	hashOffset int
     79 
     80 	// input window: unprocessed data is window[index:windowEnd]
     81 	index         int
     82 	window        []byte
     83 	windowEnd     int
     84 	blockStart    int  // window index where current tokens start
     85 	byteAvailable bool // if true, still need to process window[index-1].
     86 
     87 	// queued output tokens
     88 	tokens []token
     89 
     90 	// deflate state
     91 	length         int
     92 	offset         int
     93 	hash           int
     94 	maxInsertIndex int
     95 	err            error
     96 }
     97 
     98 func (d *compressor) fillDeflate(b []byte) int {
     99 	if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
    100 		// shift the window by windowSize
    101 		copy(d.window, d.window[windowSize:2*windowSize])
    102 		d.index -= windowSize
    103 		d.windowEnd -= windowSize
    104 		if d.blockStart >= windowSize {
    105 			d.blockStart -= windowSize
    106 		} else {
    107 			d.blockStart = math.MaxInt32
    108 		}
    109 		d.hashOffset += windowSize
    110 		if d.hashOffset > maxHashOffset {
    111 			delta := d.hashOffset - 1
    112 			d.hashOffset -= delta
    113 			d.chainHead -= delta
    114 			for i, v := range d.hashPrev {
    115 				if v > delta {
    116 					d.hashPrev[i] -= delta
    117 				} else {
    118 					d.hashPrev[i] = 0
    119 				}
    120 			}
    121 			for i, v := range d.hashHead {
    122 				if v > delta {
    123 					d.hashHead[i] -= delta
    124 				} else {
    125 					d.hashHead[i] = 0
    126 				}
    127 			}
    128 		}
    129 	}
    130 	n := copy(d.window[d.windowEnd:], b)
    131 	d.windowEnd += n
    132 	return n
    133 }
    134 
    135 func (d *compressor) writeBlock(tokens []token, index int, eof bool) error {
    136 	if index > 0 || eof {
    137 		var window []byte
    138 		if d.blockStart <= index {
    139 			window = d.window[d.blockStart:index]
    140 		}
    141 		d.blockStart = index
    142 		d.w.writeBlock(tokens, eof, window)
    143 		return d.w.err
    144 	}
    145 	return nil
    146 }
    147 
    148 // Try to find a match starting at index whose length is greater than prevSize.
    149 // We only look at chainCount possibilities before giving up.
    150 func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
    151 	minMatchLook := maxMatchLength
    152 	if lookahead < minMatchLook {
    153 		minMatchLook = lookahead
    154 	}
    155 
    156 	win := d.window[0 : pos+minMatchLook]
    157 
    158 	// We quit when we get a match that's at least nice long
    159 	nice := len(win) - pos
    160 	if d.nice < nice {
    161 		nice = d.nice
    162 	}
    163 
    164 	// If we've got a match that's good enough, only look in 1/4 the chain.
    165 	tries := d.chain
    166 	length = prevLength
    167 	if length >= d.good {
    168 		tries >>= 2
    169 	}
    170 
    171 	w0 := win[pos]
    172 	w1 := win[pos+1]
    173 	wEnd := win[pos+length]
    174 	minIndex := pos - windowSize
    175 
    176 	for i := prevHead; tries > 0; tries-- {
    177 		if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
    178 			// The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
    179 
    180 			n := 3
    181 			for pos+n < len(win) && win[i+n] == win[pos+n] {
    182 				n++
    183 			}
    184 			if n > length && (n > 3 || pos-i <= 4096) {
    185 				length = n
    186 				offset = pos - i
    187 				ok = true
    188 				if n >= nice {
    189 					// The match is good enough that we don't try to find a better one.
    190 					break
    191 				}
    192 				wEnd = win[pos+n]
    193 			}
    194 		}
    195 		if i == minIndex {
    196 			// hashPrev[i & windowMask] has already been overwritten, so stop now.
    197 			break
    198 		}
    199 		if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 {
    200 			break
    201 		}
    202 	}
    203 	return
    204 }
    205 
    206 func (d *compressor) writeStoredBlock(buf []byte) error {
    207 	if d.w.writeStoredHeader(len(buf), false); d.w.err != nil {
    208 		return d.w.err
    209 	}
    210 	d.w.writeBytes(buf)
    211 	return d.w.err
    212 }
    213 
    214 func (d *compressor) initDeflate() {
    215 	d.hashHead = make([]int, hashSize)
    216 	d.hashPrev = make([]int, windowSize)
    217 	d.window = make([]byte, 2*windowSize)
    218 	d.hashOffset = 1
    219 	d.tokens = make([]token, 0, maxFlateBlockTokens+1)
    220 	d.length = minMatchLength - 1
    221 	d.offset = 0
    222 	d.byteAvailable = false
    223 	d.index = 0
    224 	d.hash = 0
    225 	d.chainHead = -1
    226 }
    227 
    228 func (d *compressor) deflate() {
    229 	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
    230 		return
    231 	}
    232 
    233 	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
    234 	if d.index < d.maxInsertIndex {
    235 		d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1])
    236 	}
    237 
    238 Loop:
    239 	for {
    240 		if d.index > d.windowEnd {
    241 			panic("index > windowEnd")
    242 		}
    243 		lookahead := d.windowEnd - d.index
    244 		if lookahead < minMatchLength+maxMatchLength {
    245 			if !d.sync {
    246 				break Loop
    247 			}
    248 			if d.index > d.windowEnd {
    249 				panic("index > windowEnd")
    250 			}
    251 			if lookahead == 0 {
    252 				// Flush current output block if any.
    253 				if d.byteAvailable {
    254 					// There is still one pending token that needs to be flushed
    255 					d.tokens = append(d.tokens, literalToken(uint32(d.window[d.index-1])))
    256 					d.byteAvailable = false
    257 				}
    258 				if len(d.tokens) > 0 {
    259 					if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
    260 						return
    261 					}
    262 					d.tokens = d.tokens[:0]
    263 				}
    264 				break Loop
    265 			}
    266 		}
    267 		if d.index < d.maxInsertIndex {
    268 			// Update the hash
    269 			d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
    270 			d.chainHead = d.hashHead[d.hash]
    271 			d.hashPrev[d.index&windowMask] = d.chainHead
    272 			d.hashHead[d.hash] = d.index + d.hashOffset
    273 		}
    274 		prevLength := d.length
    275 		prevOffset := d.offset
    276 		d.length = minMatchLength - 1
    277 		d.offset = 0
    278 		minIndex := d.index - windowSize
    279 		if minIndex < 0 {
    280 			minIndex = 0
    281 		}
    282 
    283 		if d.chainHead-d.hashOffset >= minIndex &&
    284 			(d.fastSkipHashing != skipNever && lookahead > minMatchLength-1 ||
    285 				d.fastSkipHashing == skipNever && lookahead > prevLength && prevLength < d.lazy) {
    286 			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead-d.hashOffset, minMatchLength-1, lookahead); ok {
    287 				d.length = newLength
    288 				d.offset = newOffset
    289 			}
    290 		}
    291 		if d.fastSkipHashing != skipNever && d.length >= minMatchLength ||
    292 			d.fastSkipHashing == skipNever && prevLength >= minMatchLength && d.length <= prevLength {
    293 			// There was a match at the previous step, and the current match is
    294 			// not better. Output the previous match.
    295 			if d.fastSkipHashing != skipNever {
    296 				d.tokens = append(d.tokens, matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize)))
    297 			} else {
    298 				d.tokens = append(d.tokens, matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)))
    299 			}
    300 			// Insert in the hash table all strings up to the end of the match.
    301 			// index and index-1 are already inserted. If there is not enough
    302 			// lookahead, the last two strings are not inserted into the hash
    303 			// table.
    304 			if d.length <= d.fastSkipHashing {
    305 				var newIndex int
    306 				if d.fastSkipHashing != skipNever {
    307 					newIndex = d.index + d.length
    308 				} else {
    309 					newIndex = d.index + prevLength - 1
    310 				}
    311 				for d.index++; d.index < newIndex; d.index++ {
    312 					if d.index < d.maxInsertIndex {
    313 						d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
    314 						// Get previous value with the same hash.
    315 						// Our chain should point to the previous value.
    316 						d.hashPrev[d.index&windowMask] = d.hashHead[d.hash]
    317 						// Set the head of the hash chain to us.
    318 						d.hashHead[d.hash] = d.index + d.hashOffset
    319 					}
    320 				}
    321 				if d.fastSkipHashing == skipNever {
    322 					d.byteAvailable = false
    323 					d.length = minMatchLength - 1
    324 				}
    325 			} else {
    326 				// For matches this long, we don't bother inserting each individual
    327 				// item into the table.
    328 				d.index += d.length
    329 				if d.index < d.maxInsertIndex {
    330 					d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1]))
    331 				}
    332 			}
    333 			if len(d.tokens) == maxFlateBlockTokens {
    334 				// The block includes the current character
    335 				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
    336 					return
    337 				}
    338 				d.tokens = d.tokens[:0]
    339 			}
    340 		} else {
    341 			if d.fastSkipHashing != skipNever || d.byteAvailable {
    342 				i := d.index - 1
    343 				if d.fastSkipHashing != skipNever {
    344 					i = d.index
    345 				}
    346 				d.tokens = append(d.tokens, literalToken(uint32(d.window[i])))
    347 				if len(d.tokens) == maxFlateBlockTokens {
    348 					if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil {
    349 						return
    350 					}
    351 					d.tokens = d.tokens[:0]
    352 				}
    353 			}
    354 			d.index++
    355 			if d.fastSkipHashing == skipNever {
    356 				d.byteAvailable = true
    357 			}
    358 		}
    359 	}
    360 }
    361 
    362 func (d *compressor) fillStore(b []byte) int {
    363 	n := copy(d.window[d.windowEnd:], b)
    364 	d.windowEnd += n
    365 	return n
    366 }
    367 
    368 func (d *compressor) store() {
    369 	if d.windowEnd > 0 {
    370 		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
    371 	}
    372 	d.windowEnd = 0
    373 }
    374 
    375 func (d *compressor) write(b []byte) (n int, err error) {
    376 	n = len(b)
    377 	b = b[d.fill(d, b):]
    378 	for len(b) > 0 {
    379 		d.step(d)
    380 		b = b[d.fill(d, b):]
    381 	}
    382 	return n, d.err
    383 }
    384 
    385 func (d *compressor) syncFlush() error {
    386 	d.sync = true
    387 	d.step(d)
    388 	if d.err == nil {
    389 		d.w.writeStoredHeader(0, false)
    390 		d.w.flush()
    391 		d.err = d.w.err
    392 	}
    393 	d.sync = false
    394 	return d.err
    395 }
    396 
    397 func (d *compressor) init(w io.Writer, level int) (err error) {
    398 	d.w = newHuffmanBitWriter(w)
    399 
    400 	switch {
    401 	case level == NoCompression:
    402 		d.window = make([]byte, maxStoreBlockSize)
    403 		d.fill = (*compressor).fillStore
    404 		d.step = (*compressor).store
    405 	case level == DefaultCompression:
    406 		level = 6
    407 		fallthrough
    408 	case 1 <= level && level <= 9:
    409 		d.compressionLevel = levels[level]
    410 		d.initDeflate()
    411 		d.fill = (*compressor).fillDeflate
    412 		d.step = (*compressor).deflate
    413 	default:
    414 		return fmt.Errorf("flate: invalid compression level %d: want value in range [-1, 9]", level)
    415 	}
    416 	return nil
    417 }
    418 
    419 var zeroes [32]int
    420 var bzeroes [256]byte
    421 
    422 func (d *compressor) reset(w io.Writer) {
    423 	d.w.reset(w)
    424 	d.sync = false
    425 	d.err = nil
    426 	switch d.compressionLevel.chain {
    427 	case 0:
    428 		// level was NoCompression.
    429 		for i := range d.window {
    430 			d.window[i] = 0
    431 		}
    432 		d.windowEnd = 0
    433 	default:
    434 		d.chainHead = -1
    435 		for s := d.hashHead; len(s) > 0; {
    436 			n := copy(s, zeroes[:])
    437 			s = s[n:]
    438 		}
    439 		for s := d.hashPrev; len(s) > 0; s = s[len(zeroes):] {
    440 			copy(s, zeroes[:])
    441 		}
    442 		d.hashOffset = 1
    443 
    444 		d.index, d.windowEnd = 0, 0
    445 		for s := d.window; len(s) > 0; {
    446 			n := copy(s, bzeroes[:])
    447 			s = s[n:]
    448 		}
    449 		d.blockStart, d.byteAvailable = 0, false
    450 
    451 		d.tokens = d.tokens[:maxFlateBlockTokens+1]
    452 		for i := 0; i <= maxFlateBlockTokens; i++ {
    453 			d.tokens[i] = 0
    454 		}
    455 		d.tokens = d.tokens[:0]
    456 		d.length = minMatchLength - 1
    457 		d.offset = 0
    458 		d.hash = 0
    459 		d.maxInsertIndex = 0
    460 	}
    461 }
    462 
    463 func (d *compressor) close() error {
    464 	d.sync = true
    465 	d.step(d)
    466 	if d.err != nil {
    467 		return d.err
    468 	}
    469 	if d.w.writeStoredHeader(0, true); d.w.err != nil {
    470 		return d.w.err
    471 	}
    472 	d.w.flush()
    473 	return d.w.err
    474 }
    475 
    476 // NewWriter returns a new Writer compressing data at the given level.
    477 // Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression);
    478 // higher levels typically run slower but compress more. Level 0
    479 // (NoCompression) does not attempt any compression; it only adds the
    480 // necessary DEFLATE framing. Level -1 (DefaultCompression) uses the default
    481 // compression level.
    482 //
    483 // If level is in the range [-1, 9] then the error returned will be nil.
    484 // Otherwise the error returned will be non-nil.
    485 func NewWriter(w io.Writer, level int) (*Writer, error) {
    486 	var dw Writer
    487 	if err := dw.d.init(w, level); err != nil {
    488 		return nil, err
    489 	}
    490 	return &dw, nil
    491 }
    492 
    493 // NewWriterDict is like NewWriter but initializes the new
    494 // Writer with a preset dictionary.  The returned Writer behaves
    495 // as if the dictionary had been written to it without producing
    496 // any compressed output.  The compressed data written to w
    497 // can only be decompressed by a Reader initialized with the
    498 // same dictionary.
    499 func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
    500 	dw := &dictWriter{w, false}
    501 	zw, err := NewWriter(dw, level)
    502 	if err != nil {
    503 		return nil, err
    504 	}
    505 	zw.Write(dict)
    506 	zw.Flush()
    507 	dw.enabled = true
    508 	zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
    509 	return zw, err
    510 }
    511 
    512 type dictWriter struct {
    513 	w       io.Writer
    514 	enabled bool
    515 }
    516 
    517 func (w *dictWriter) Write(b []byte) (n int, err error) {
    518 	if w.enabled {
    519 		return w.w.Write(b)
    520 	}
    521 	return len(b), nil
    522 }
    523 
    524 // A Writer takes data written to it and writes the compressed
    525 // form of that data to an underlying writer (see NewWriter).
    526 type Writer struct {
    527 	d    compressor
    528 	dict []byte
    529 }
    530 
    531 // Write writes data to w, which will eventually write the
    532 // compressed form of data to its underlying writer.
    533 func (w *Writer) Write(data []byte) (n int, err error) {
    534 	return w.d.write(data)
    535 }
    536 
    537 // Flush flushes any pending compressed data to the underlying writer.
    538 // It is useful mainly in compressed network protocols, to ensure that
    539 // a remote reader has enough data to reconstruct a packet.
    540 // Flush does not return until the data has been written.
    541 // If the underlying writer returns an error, Flush returns that error.
    542 //
    543 // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
    544 func (w *Writer) Flush() error {
    545 	// For more about flushing:
    546 	// http://www.bolet.org/~pornin/deflate-flush.html
    547 	return w.d.syncFlush()
    548 }
    549 
    550 // Close flushes and closes the writer.
    551 func (w *Writer) Close() error {
    552 	return w.d.close()
    553 }
    554 
    555 // Reset discards the writer's state and makes it equivalent to
    556 // the result of NewWriter or NewWriterDict called with dst
    557 // and w's level and dictionary.
    558 func (w *Writer) Reset(dst io.Writer) {
    559 	if dw, ok := w.d.w.w.(*dictWriter); ok {
    560 		// w was created with NewWriterDict
    561 		dw.w = dst
    562 		w.d.reset(dw)
    563 		dw.enabled = false
    564 		w.Write(w.dict)
    565 		w.Flush()
    566 		dw.enabled = true
    567 	} else {
    568 		// w was created with NewWriter
    569 		w.d.reset(dst)
    570 	}
    571 }
    572