Home | History | Annotate | Download | only in flate
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package flate
      6 
      7 // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
      8 // LZ77 decompresses data through sequences of two forms of commands:
      9 //
     10 //	* Literal insertions: Runs of one or more symbols are inserted into the data
     11 //	stream as is. This is accomplished through the writeByte method for a
     12 //	single symbol, or combinations of writeSlice/writeMark for multiple symbols.
     13 //	Any valid stream must start with a literal insertion if no preset dictionary
     14 //	is used.
     15 //
     16 //	* Backward copies: Runs of one or more symbols are copied from previously
     17 //	emitted data. Backward copies come as the tuple (dist, length) where dist
     18 //	determines how far back in the stream to copy from and length determines how
     19 //	many bytes to copy. Note that it is valid for the length to be greater than
     20 //	the distance. Since LZ77 uses forward copies, that situation is used to
     21 //	perform a form of run-length encoding on repeated runs of symbols.
     22 //	The writeCopy and tryWriteCopy are used to implement this command.
     23 //
     24 // For performance reasons, this implementation performs little to no sanity
     25 // checks about the arguments. As such, the invariants documented for each
     26 // method call must be respected.
     27 type dictDecoder struct {
     28 	hist []byte // Sliding window history
     29 
     30 	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
     31 	wrPos int  // Current output position in buffer
     32 	rdPos int  // Have emitted hist[:rdPos] already
     33 	full  bool // Has a full window length been written yet?
     34 }
     35 
     36 // init initializes dictDecoder to have a sliding window dictionary of the given
     37 // size. If a preset dict is provided, it will initialize the dictionary with
     38 // the contents of dict.
     39 func (dd *dictDecoder) init(size int, dict []byte) {
     40 	*dd = dictDecoder{hist: dd.hist}
     41 
     42 	if cap(dd.hist) < size {
     43 		dd.hist = make([]byte, size)
     44 	}
     45 	dd.hist = dd.hist[:size]
     46 
     47 	if len(dict) > len(dd.hist) {
     48 		dict = dict[len(dict)-len(dd.hist):]
     49 	}
     50 	dd.wrPos = copy(dd.hist, dict)
     51 	if dd.wrPos == len(dd.hist) {
     52 		dd.wrPos = 0
     53 		dd.full = true
     54 	}
     55 	dd.rdPos = dd.wrPos
     56 }
     57 
     58 // histSize reports the total amount of historical data in the dictionary.
     59 func (dd *dictDecoder) histSize() int {
     60 	if dd.full {
     61 		return len(dd.hist)
     62 	}
     63 	return dd.wrPos
     64 }
     65 
     66 // availRead reports the number of bytes that can be flushed by readFlush.
     67 func (dd *dictDecoder) availRead() int {
     68 	return dd.wrPos - dd.rdPos
     69 }
     70 
     71 // availWrite reports the available amount of output buffer space.
     72 func (dd *dictDecoder) availWrite() int {
     73 	return len(dd.hist) - dd.wrPos
     74 }
     75 
     76 // writeSlice returns a slice of the available buffer to write data to.
     77 //
     78 // This invariant will be kept: len(s) <= availWrite()
     79 func (dd *dictDecoder) writeSlice() []byte {
     80 	return dd.hist[dd.wrPos:]
     81 }
     82 
     83 // writeMark advances the writer pointer by cnt.
     84 //
     85 // This invariant must be kept: 0 <= cnt <= availWrite()
     86 func (dd *dictDecoder) writeMark(cnt int) {
     87 	dd.wrPos += cnt
     88 }
     89 
     90 // writeByte writes a single byte to the dictionary.
     91 //
     92 // This invariant must be kept: 0 < availWrite()
     93 func (dd *dictDecoder) writeByte(c byte) {
     94 	dd.hist[dd.wrPos] = c
     95 	dd.wrPos++
     96 }
     97 
     98 // writeCopy copies a string at a given (dist, length) to the output.
     99 // This returns the number of bytes copied and may be less than the requested
    100 // length if the available space in the output buffer is too small.
    101 //
    102 // This invariant must be kept: 0 < dist <= histSize()
    103 func (dd *dictDecoder) writeCopy(dist, length int) int {
    104 	dstBase := dd.wrPos
    105 	dstPos := dstBase
    106 	srcPos := dstPos - dist
    107 	endPos := dstPos + length
    108 	if endPos > len(dd.hist) {
    109 		endPos = len(dd.hist)
    110 	}
    111 
    112 	// Copy non-overlapping section after destination position.
    113 	//
    114 	// This section is non-overlapping in that the copy length for this section
    115 	// is always less than or equal to the backwards distance. This can occur
    116 	// if a distance refers to data that wraps-around in the buffer.
    117 	// Thus, a backwards copy is performed here; that is, the exact bytes in
    118 	// the source prior to the copy is placed in the destination.
    119 	if srcPos < 0 {
    120 		srcPos += len(dd.hist)
    121 		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
    122 		srcPos = 0
    123 	}
    124 
    125 	// Copy possibly overlapping section before destination position.
    126 	//
    127 	// This section can overlap if the copy length for this section is larger
    128 	// than the backwards distance. This is allowed by LZ77 so that repeated
    129 	// strings can be succinctly represented using (dist, length) pairs.
    130 	// Thus, a forwards copy is performed here; that is, the bytes copied is
    131 	// possibly dependent on the resulting bytes in the destination as the copy
    132 	// progresses along. This is functionally equivalent to the following:
    133 	//
    134 	//	for i := 0; i < endPos-dstPos; i++ {
    135 	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
    136 	//	}
    137 	//	dstPos = endPos
    138 	//
    139 	for dstPos < endPos {
    140 		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
    141 	}
    142 
    143 	dd.wrPos = dstPos
    144 	return dstPos - dstBase
    145 }
    146 
    147 // tryWriteCopy tries to copy a string at a given (distance, length) to the
    148 // output. This specialized version is optimized for short distances.
    149 //
    150 // This method is designed to be inlined for performance reasons.
    151 //
    152 // This invariant must be kept: 0 < dist <= histSize()
    153 func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
    154 	dstPos := dd.wrPos
    155 	endPos := dstPos + length
    156 	if dstPos < dist || endPos > len(dd.hist) {
    157 		return 0
    158 	}
    159 	dstBase := dstPos
    160 	srcPos := dstPos - dist
    161 
    162 	// Copy possibly overlapping section before destination position.
    163 loop:
    164 	dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
    165 	if dstPos < endPos {
    166 		goto loop // Avoid for-loop so that this function can be inlined
    167 	}
    168 
    169 	dd.wrPos = dstPos
    170 	return dstPos - dstBase
    171 }
    172 
    173 // readFlush returns a slice of the historical buffer that is ready to be
    174 // emitted to the user. The data returned by readFlush must be fully consumed
    175 // before calling any other dictDecoder methods.
    176 func (dd *dictDecoder) readFlush() []byte {
    177 	toRead := dd.hist[dd.rdPos:dd.wrPos]
    178 	dd.rdPos = dd.wrPos
    179 	if dd.wrPos == len(dd.hist) {
    180 		dd.wrPos, dd.rdPos = 0, 0
    181 		dd.full = true
    182 	}
    183 	return toRead
    184 }
    185