Home | History | Annotate | Download | only in lzw
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package lzw
      6 
      7 import (
      8 	"bytes"
      9 	"fmt"
     10 	"io"
     11 	"io/ioutil"
     12 	"math"
     13 	"runtime"
     14 	"strconv"
     15 	"strings"
     16 	"testing"
     17 )
     18 
     19 type lzwTest struct {
     20 	desc       string
     21 	raw        string
     22 	compressed string
     23 	err        error
     24 }
     25 
     26 var lzwTests = []lzwTest{
     27 	{
     28 		"empty;LSB;8",
     29 		"",
     30 		"\x01\x01",
     31 		nil,
     32 	},
     33 	{
     34 		"empty;MSB;8",
     35 		"",
     36 		"\x80\x80",
     37 		nil,
     38 	},
     39 	{
     40 		"tobe;LSB;7",
     41 		"TOBEORNOTTOBEORTOBEORNOT",
     42 		"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
     43 		nil,
     44 	},
     45 	{
     46 		"tobe;LSB;8",
     47 		"TOBEORNOTTOBEORTOBEORNOT",
     48 		"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04\x12\x34\xb8\xb0\xe0\xc1\x84\x01\x01",
     49 		nil,
     50 	},
     51 	{
     52 		"tobe;MSB;7",
     53 		"TOBEORNOTTOBEORTOBEORNOT",
     54 		"\x54\x4f\x42\x45\x4f\x52\x4e\x4f\x54\x82\x84\x86\x8b\x85\x87\x89\x81",
     55 		nil,
     56 	},
     57 	{
     58 		"tobe;MSB;8",
     59 		"TOBEORNOTTOBEORTOBEORNOT",
     60 		"\x2a\x13\xc8\x44\x52\x79\x48\x9c\x4f\x2a\x40\xa0\x90\x68\x5c\x16\x0f\x09\x80\x80",
     61 		nil,
     62 	},
     63 	{
     64 		"tobe-truncated;LSB;8",
     65 		"TOBEORNOTTOBEORTOBEORNOT",
     66 		"\x54\x9e\x08\x29\xf2\x44\x8a\x93\x27\x54\x04",
     67 		io.ErrUnexpectedEOF,
     68 	},
     69 	// This example comes from http://en.wikipedia.org/wiki/Graphics_Interchange_Format.
     70 	{
     71 		"gif;LSB;8",
     72 		"\x28\xff\xff\xff\x28\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
     73 		"\x00\x51\xfc\x1b\x28\x70\xa0\xc1\x83\x01\x01",
     74 		nil,
     75 	},
     76 	// This example comes from http://compgroups.net/comp.lang.ruby/Decompressing-LZW-compression-from-PDF-file
     77 	{
     78 		"pdf;MSB;8",
     79 		"-----A---B",
     80 		"\x80\x0b\x60\x50\x22\x0c\x0c\x85\x01",
     81 		nil,
     82 	},
     83 }
     84 
     85 func TestReader(t *testing.T) {
     86 	var b bytes.Buffer
     87 	for _, tt := range lzwTests {
     88 		d := strings.Split(tt.desc, ";")
     89 		var order Order
     90 		switch d[1] {
     91 		case "LSB":
     92 			order = LSB
     93 		case "MSB":
     94 			order = MSB
     95 		default:
     96 			t.Errorf("%s: bad order %q", tt.desc, d[1])
     97 		}
     98 		litWidth, _ := strconv.Atoi(d[2])
     99 		rc := NewReader(strings.NewReader(tt.compressed), order, litWidth)
    100 		defer rc.Close()
    101 		b.Reset()
    102 		n, err := io.Copy(&b, rc)
    103 		s := b.String()
    104 		if err != nil {
    105 			if err != tt.err {
    106 				t.Errorf("%s: io.Copy: %v want %v", tt.desc, err, tt.err)
    107 			}
    108 			if err == io.ErrUnexpectedEOF {
    109 				// Even if the input is truncated, we should still return the
    110 				// partial decoded result.
    111 				if n == 0 || !strings.HasPrefix(tt.raw, s) {
    112 					t.Errorf("got %d bytes (%q), want a non-empty prefix of %q", n, s, tt.raw)
    113 				}
    114 			}
    115 			continue
    116 		}
    117 		if s != tt.raw {
    118 			t.Errorf("%s: got %d-byte %q want %d-byte %q", tt.desc, n, s, len(tt.raw), tt.raw)
    119 		}
    120 	}
    121 }
    122 
    123 type devZero struct{}
    124 
    125 func (devZero) Read(p []byte) (int, error) {
    126 	for i := range p {
    127 		p[i] = 0
    128 	}
    129 	return len(p), nil
    130 }
    131 
    132 func TestHiCodeDoesNotOverflow(t *testing.T) {
    133 	r := NewReader(devZero{}, LSB, 8)
    134 	d := r.(*decoder)
    135 	buf := make([]byte, 1024)
    136 	oldHi := uint16(0)
    137 	for i := 0; i < 100; i++ {
    138 		if _, err := io.ReadFull(r, buf); err != nil {
    139 			t.Fatalf("i=%d: %v", i, err)
    140 		}
    141 		// The hi code should never decrease.
    142 		if d.hi < oldHi {
    143 			t.Fatalf("i=%d: hi=%d decreased from previous value %d", i, d.hi, oldHi)
    144 		}
    145 		oldHi = d.hi
    146 	}
    147 }
    148 
    149 // TestNoLongerSavingPriorExpansions tests the decoder state when codes other
    150 // than clear codes continue to be seen after decoder.hi and decoder.width
    151 // reach their maximum values (4095 and 12), i.e. after we no longer save prior
    152 // expansions. In particular, it tests seeing the highest possible code, 4095.
    153 func TestNoLongerSavingPriorExpansions(t *testing.T) {
    154 	// Iterations is used to calculate how many input bits are needed to get
    155 	// the decoder.hi and decoder.width values up to their maximum.
    156 	iterations := []struct {
    157 		width, n int
    158 	}{
    159 		// The final term is 257, not 256, as NewReader initializes d.hi to
    160 		// d.clear+1 and the clear code is 256.
    161 		{9, 512 - 257},
    162 		{10, 1024 - 512},
    163 		{11, 2048 - 1024},
    164 		{12, 4096 - 2048},
    165 	}
    166 	nCodes, nBits := 0, 0
    167 	for _, e := range iterations {
    168 		nCodes += e.n
    169 		nBits += e.n * e.width
    170 	}
    171 	if nCodes != 3839 {
    172 		t.Fatalf("nCodes: got %v, want %v", nCodes, 3839)
    173 	}
    174 	if nBits != 43255 {
    175 		t.Fatalf("nBits: got %v, want %v", nBits, 43255)
    176 	}
    177 
    178 	// Construct our input of 43255 zero bits (which gets d.hi and d.width up
    179 	// to 4095 and 12), followed by 0xfff (4095) as 12 bits, followed by 0x101
    180 	// (EOF) as 12 bits.
    181 	//
    182 	// 43255 = 5406*8 + 7, and codes are read in LSB order. The final bytes are
    183 	// therefore:
    184 	//
    185 	// xwwwwwww xxxxxxxx yyyyyxxx zyyyyyyy
    186 	// 10000000 11111111 00001111 00001000
    187 	//
    188 	// or split out:
    189 	//
    190 	// .0000000 ........ ........ ........   w = 0x000
    191 	// 1....... 11111111 .....111 ........   x = 0xfff
    192 	// ........ ........ 00001... .0001000   y = 0x101
    193 	//
    194 	// The 12 'w' bits (not all are shown) form the 3839'th code, with value
    195 	// 0x000. Just after decoder.read returns that code, d.hi == 4095 and
    196 	// d.last == 0.
    197 	//
    198 	// The 12 'x' bits form the 3840'th code, with value 0xfff or 4095. Just
    199 	// after decoder.read returns that code, d.hi == 4095 and d.last ==
    200 	// decoderInvalidCode.
    201 	//
    202 	// The 12 'y' bits form the 3841'st code, with value 0x101, the EOF code.
    203 	//
    204 	// The 'z' bit is unused.
    205 	in := make([]byte, 5406)
    206 	in = append(in, 0x80, 0xff, 0x0f, 0x08)
    207 
    208 	r := NewReader(bytes.NewReader(in), LSB, 8)
    209 	nDecoded, err := io.Copy(ioutil.Discard, r)
    210 	if err != nil {
    211 		t.Fatalf("Copy: %v", err)
    212 	}
    213 	// nDecoded should be 3841: 3839 literal codes and then 2 decoded bytes
    214 	// from 1 non-literal code. The EOF code contributes 0 decoded bytes.
    215 	if nDecoded != int64(nCodes+2) {
    216 		t.Fatalf("nDecoded: got %v, want %v", nDecoded, nCodes+2)
    217 	}
    218 }
    219 
    220 func BenchmarkDecoder(b *testing.B) {
    221 	buf, err := ioutil.ReadFile("../testdata/e.txt")
    222 	if err != nil {
    223 		b.Fatal(err)
    224 	}
    225 	if len(buf) == 0 {
    226 		b.Fatalf("test file has no data")
    227 	}
    228 
    229 	for e := 4; e <= 6; e++ {
    230 		n := int(math.Pow10(e))
    231 		b.Run(fmt.Sprint("1e", e), func(b *testing.B) {
    232 			b.StopTimer()
    233 			b.SetBytes(int64(n))
    234 			buf0 := buf
    235 			compressed := new(bytes.Buffer)
    236 			w := NewWriter(compressed, LSB, 8)
    237 			for i := 0; i < n; i += len(buf0) {
    238 				if len(buf0) > n-i {
    239 					buf0 = buf0[:n-i]
    240 				}
    241 				w.Write(buf0)
    242 			}
    243 			w.Close()
    244 			buf1 := compressed.Bytes()
    245 			buf0, compressed, w = nil, nil, nil
    246 			runtime.GC()
    247 			b.StartTimer()
    248 			for i := 0; i < b.N; i++ {
    249 				io.Copy(ioutil.Discard, NewReader(bytes.NewReader(buf1), LSB, 8))
    250 			}
    251 		})
    252 	}
    253 }
    254