Home | History | Annotate | Download | only in bzip2
      1 // Copyright 2011 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package bzip2
      6 
      7 import (
      8 	"bytes"
      9 	"encoding/hex"
     10 	"fmt"
     11 	"io"
     12 	"io/ioutil"
     13 	"testing"
     14 )
     15 
     16 func mustDecodeHex(s string) []byte {
     17 	b, err := hex.DecodeString(s)
     18 	if err != nil {
     19 		panic(err)
     20 	}
     21 	return b
     22 }
     23 
     24 func mustLoadFile(f string) []byte {
     25 	b, err := ioutil.ReadFile(f)
     26 	if err != nil {
     27 		panic(err)
     28 	}
     29 	return b
     30 }
     31 
     32 func trim(b []byte) string {
     33 	const limit = 1024
     34 	if len(b) < limit {
     35 		return fmt.Sprintf("%q", b)
     36 	}
     37 	return fmt.Sprintf("%q...", b[:limit])
     38 }
     39 
     40 func TestReader(t *testing.T) {
     41 	var vectors = []struct {
     42 		desc   string
     43 		input  []byte
     44 		output []byte
     45 		fail   bool
     46 	}{{
     47 		desc: "hello world",
     48 		input: mustDecodeHex("" +
     49 			"425a68393141592653594eece83600000251800010400006449080200031064c" +
     50 			"4101a7a9a580bb9431f8bb9229c28482776741b0",
     51 		),
     52 		output: []byte("hello world\n"),
     53 	}, {
     54 		desc: "concatenated files",
     55 		input: mustDecodeHex("" +
     56 			"425a68393141592653594eece83600000251800010400006449080200031064c" +
     57 			"4101a7a9a580bb9431f8bb9229c28482776741b0425a68393141592653594eec" +
     58 			"e83600000251800010400006449080200031064c4101a7a9a580bb9431f8bb92" +
     59 			"29c28482776741b0",
     60 		),
     61 		output: []byte("hello world\nhello world\n"),
     62 	}, {
     63 		desc: "32B zeros",
     64 		input: mustDecodeHex("" +
     65 			"425a6839314159265359b5aa5098000000600040000004200021008283177245" +
     66 			"385090b5aa5098",
     67 		),
     68 		output: make([]byte, 32),
     69 	}, {
     70 		desc: "1MiB zeros",
     71 		input: mustDecodeHex("" +
     72 			"425a683931415926535938571ce50008084000c0040008200030cc0529a60806" +
     73 			"c4201e2ee48a70a12070ae39ca",
     74 		),
     75 		output: make([]byte, 1<<20),
     76 	}, {
     77 		desc:   "random data",
     78 		input:  mustLoadFile("testdata/pass-random1.bz2"),
     79 		output: mustLoadFile("testdata/pass-random1.bin"),
     80 	}, {
     81 		desc:   "random data - full symbol range",
     82 		input:  mustLoadFile("testdata/pass-random2.bz2"),
     83 		output: mustLoadFile("testdata/pass-random2.bin"),
     84 	}, {
     85 		desc: "random data - uses RLE1 stage",
     86 		input: mustDecodeHex("" +
     87 			"425a6839314159265359d992d0f60000137dfe84020310091c1e280e100e0428" +
     88 			"01099210094806c0110002e70806402000546034000034000000f28300000320" +
     89 			"00d3403264049270eb7a9280d308ca06ad28f6981bee1bf8160727c7364510d7" +
     90 			"3a1e123083421b63f031f63993a0f40051fbf177245385090d992d0f60",
     91 		),
     92 		output: mustDecodeHex("" +
     93 			"92d5652616ac444a4a04af1a8a3964aca0450d43d6cf233bd03233f4ba92f871" +
     94 			"9e6c2a2bd4f5f88db07ecd0da3a33b263483db9b2c158786ad6363be35d17335" +
     95 			"ba",
     96 		),
     97 	}, {
     98 		desc:  "1MiB sawtooth",
     99 		input: mustLoadFile("testdata/pass-sawtooth.bz2"),
    100 		output: func() []byte {
    101 			b := make([]byte, 1<<20)
    102 			for i := range b {
    103 				b[i] = byte(i)
    104 			}
    105 			return b
    106 		}(),
    107 	}, {
    108 		desc:  "RLE2 buffer overrun - issue 5747",
    109 		input: mustLoadFile("testdata/fail-issue5747.bz2"),
    110 		fail:  true,
    111 	}, {
    112 		desc: "out-of-range selector - issue 8363",
    113 		input: mustDecodeHex("" +
    114 			"425a68393141592653594eece83600000251800010400006449080200031064c" +
    115 			"4101a7a9a580bb943117724538509000000000",
    116 		),
    117 		fail: true,
    118 	}, {
    119 		desc: "bad block size - issue 13941",
    120 		input: mustDecodeHex("" +
    121 			"425a683131415926535936dc55330063ffc0006000200020a40830008b0008b8" +
    122 			"bb9229c28481b6e2a998",
    123 		),
    124 		fail: true,
    125 	}, {
    126 		desc: "bad huffman delta",
    127 		input: mustDecodeHex("" +
    128 			"425a6836314159265359b1f7404b000000400040002000217d184682ee48a70a" +
    129 			"12163ee80960",
    130 		),
    131 		fail: true,
    132 	}}
    133 
    134 	for i, v := range vectors {
    135 		rd := NewReader(bytes.NewReader(v.input))
    136 		buf, err := ioutil.ReadAll(rd)
    137 
    138 		if fail := bool(err != nil); fail != v.fail {
    139 			if fail {
    140 				t.Errorf("test %d (%s), unexpected failure: %v", i, v.desc, err)
    141 			} else {
    142 				t.Errorf("test %d (%s), unexpected success", i, v.desc)
    143 			}
    144 		}
    145 		if !v.fail && !bytes.Equal(buf, v.output) {
    146 			t.Errorf("test %d (%s), output mismatch:\ngot  %s\nwant %s", i, v.desc, trim(buf), trim(v.output))
    147 		}
    148 	}
    149 }
    150 
    151 func TestBitReader(t *testing.T) {
    152 	var vectors = []struct {
    153 		nbits uint // Number of bits to read
    154 		value int  // Expected output value (0 for error)
    155 		fail  bool // Expected operation failure?
    156 	}{
    157 		{nbits: 1, value: 1},
    158 		{nbits: 1, value: 0},
    159 		{nbits: 1, value: 1},
    160 		{nbits: 5, value: 11},
    161 		{nbits: 32, value: 0x12345678},
    162 		{nbits: 15, value: 14495},
    163 		{nbits: 3, value: 6},
    164 		{nbits: 6, value: 13},
    165 		{nbits: 1, fail: true},
    166 	}
    167 
    168 	rd := bytes.NewReader([]byte{0xab, 0x12, 0x34, 0x56, 0x78, 0x71, 0x3f, 0x8d})
    169 	br := newBitReader(rd)
    170 	for i, v := range vectors {
    171 		val := br.ReadBits(v.nbits)
    172 		if fail := bool(br.err != nil); fail != v.fail {
    173 			if fail {
    174 				t.Errorf("test %d, unexpected failure: ReadBits(%d) = %v", i, v.nbits, br.err)
    175 			} else {
    176 				t.Errorf("test %d, unexpected success: ReadBits(%d) = nil", i, v.nbits)
    177 			}
    178 		}
    179 		if !v.fail && val != v.value {
    180 			t.Errorf("test %d, mismatching value: ReadBits(%d) = %d, want %d", i, v.nbits, val, v.value)
    181 		}
    182 	}
    183 }
    184 
    185 func TestMTF(t *testing.T) {
    186 	var vectors = []struct {
    187 		idx int   // Input index
    188 		sym uint8 // Expected output symbol
    189 	}{
    190 		{idx: 1, sym: 1}, // [1 0 2 3 4]
    191 		{idx: 0, sym: 1}, // [1 0 2 3 4]
    192 		{idx: 1, sym: 0}, // [0 1 2 3 4]
    193 		{idx: 4, sym: 4}, // [4 0 1 2 3]
    194 		{idx: 1, sym: 0}, // [0 4 1 2 3]
    195 	}
    196 
    197 	mtf := newMTFDecoderWithRange(5)
    198 	for i, v := range vectors {
    199 		sym := mtf.Decode(v.idx)
    200 		t.Log(mtf)
    201 		if sym != v.sym {
    202 			t.Errorf("test %d, symbol mismatch: Decode(%d) = %d, want %d", i, v.idx, sym, v.sym)
    203 		}
    204 	}
    205 }
    206 
    207 func TestZeroRead(t *testing.T) {
    208 	b := mustDecodeHex("425a6839314159265359b5aa5098000000600040000004200021008283177245385090b5aa5098")
    209 	r := NewReader(bytes.NewReader(b))
    210 	if n, err := r.Read(nil); n != 0 || err != nil {
    211 		t.Errorf("Read(nil) = (%d, %v), want (0, nil)", n, err)
    212 	}
    213 }
    214 
    215 var (
    216 	digits = mustLoadFile("testdata/e.txt.bz2")
    217 	twain  = mustLoadFile("testdata/Mark.Twain-Tom.Sawyer.txt.bz2")
    218 	random = mustLoadFile("testdata/random.data.bz2")
    219 )
    220 
    221 func benchmarkDecode(b *testing.B, compressed []byte) {
    222 	// Determine the uncompressed size of testfile.
    223 	uncompressedSize, err := io.Copy(ioutil.Discard, NewReader(bytes.NewReader(compressed)))
    224 	if err != nil {
    225 		b.Fatal(err)
    226 	}
    227 
    228 	b.SetBytes(uncompressedSize)
    229 	b.ReportAllocs()
    230 	b.ResetTimer()
    231 
    232 	for i := 0; i < b.N; i++ {
    233 		r := bytes.NewReader(compressed)
    234 		io.Copy(ioutil.Discard, NewReader(r))
    235 	}
    236 }
    237 
    238 func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) }
    239 func BenchmarkDecodeTwain(b *testing.B)  { benchmarkDecode(b, twain) }
    240 func BenchmarkDecodeRand(b *testing.B)   { benchmarkDecode(b, random) }
    241