Home | History | Annotate | Download | only in runtime
      1 // Copyright 2013 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package runtime_test
      6 
      7 import (
      8 	"fmt"
      9 	"math"
     10 	"math/rand"
     11 	. "runtime"
     12 	"strings"
     13 	"testing"
     14 )
     15 
     16 // Smhasher is a torture test for hash functions.
     17 // https://code.google.com/p/smhasher/
     18 // This code is a port of some of the Smhasher tests to Go.
     19 //
     20 // The current AES hash function passes Smhasher.  Our fallback
     21 // hash functions don't, so we only enable the difficult tests when
     22 // we know the AES implementation is available.
     23 
     24 // Sanity checks.
     25 // hash should not depend on values outside key.
     26 // hash should not depend on alignment.
     27 func TestSmhasherSanity(t *testing.T) {
     28 	r := rand.New(rand.NewSource(1234))
     29 	const REP = 10
     30 	const KEYMAX = 128
     31 	const PAD = 16
     32 	const OFFMAX = 16
     33 	for k := 0; k < REP; k++ {
     34 		for n := 0; n < KEYMAX; n++ {
     35 			for i := 0; i < OFFMAX; i++ {
     36 				var b [KEYMAX + OFFMAX + 2*PAD]byte
     37 				var c [KEYMAX + OFFMAX + 2*PAD]byte
     38 				randBytes(r, b[:])
     39 				randBytes(r, c[:])
     40 				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
     41 				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
     42 					t.Errorf("hash depends on bytes outside key")
     43 				}
     44 			}
     45 		}
     46 	}
     47 }
     48 
     49 type HashSet struct {
     50 	m map[uintptr]struct{} // set of hashes added
     51 	n int                  // number of hashes added
     52 }
     53 
     54 func newHashSet() *HashSet {
     55 	return &HashSet{make(map[uintptr]struct{}), 0}
     56 }
     57 func (s *HashSet) add(h uintptr) {
     58 	s.m[h] = struct{}{}
     59 	s.n++
     60 }
     61 func (s *HashSet) addS(x string) {
     62 	s.add(StringHash(x, 0))
     63 }
     64 func (s *HashSet) addB(x []byte) {
     65 	s.add(BytesHash(x, 0))
     66 }
     67 func (s *HashSet) addS_seed(x string, seed uintptr) {
     68 	s.add(StringHash(x, seed))
     69 }
     70 func (s *HashSet) check(t *testing.T) {
     71 	const SLOP = 10.0
     72 	collisions := s.n - len(s.m)
     73 	//fmt.Printf("%d/%d\n", len(s.m), s.n)
     74 	pairs := int64(s.n) * int64(s.n-1) / 2
     75 	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
     76 	stddev := math.Sqrt(expected)
     77 	if float64(collisions) > expected+SLOP*3*stddev {
     78 		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
     79 	}
     80 }
     81 
     82 // a string plus adding zeros must make distinct hashes
     83 func TestSmhasherAppendedZeros(t *testing.T) {
     84 	s := "hello" + strings.Repeat("\x00", 256)
     85 	h := newHashSet()
     86 	for i := 0; i <= len(s); i++ {
     87 		h.addS(s[:i])
     88 	}
     89 	h.check(t)
     90 }
     91 
     92 // All 0-3 byte strings have distinct hashes.
     93 func TestSmhasherSmallKeys(t *testing.T) {
     94 	h := newHashSet()
     95 	var b [3]byte
     96 	for i := 0; i < 256; i++ {
     97 		b[0] = byte(i)
     98 		h.addB(b[:1])
     99 		for j := 0; j < 256; j++ {
    100 			b[1] = byte(j)
    101 			h.addB(b[:2])
    102 			if !testing.Short() {
    103 				for k := 0; k < 256; k++ {
    104 					b[2] = byte(k)
    105 					h.addB(b[:3])
    106 				}
    107 			}
    108 		}
    109 	}
    110 	h.check(t)
    111 }
    112 
    113 // Different length strings of all zeros have distinct hashes.
    114 func TestSmhasherZeros(t *testing.T) {
    115 	N := 256 * 1024
    116 	if testing.Short() {
    117 		N = 1024
    118 	}
    119 	h := newHashSet()
    120 	b := make([]byte, N)
    121 	for i := 0; i <= N; i++ {
    122 		h.addB(b[:i])
    123 	}
    124 	h.check(t)
    125 }
    126 
    127 // Strings with up to two nonzero bytes all have distinct hashes.
    128 func TestSmhasherTwoNonzero(t *testing.T) {
    129 	if testing.Short() {
    130 		t.Skip("Skipping in short mode")
    131 	}
    132 	h := newHashSet()
    133 	for n := 2; n <= 16; n++ {
    134 		twoNonZero(h, n)
    135 	}
    136 	h.check(t)
    137 }
    138 func twoNonZero(h *HashSet, n int) {
    139 	b := make([]byte, n)
    140 
    141 	// all zero
    142 	h.addB(b[:])
    143 
    144 	// one non-zero byte
    145 	for i := 0; i < n; i++ {
    146 		for x := 1; x < 256; x++ {
    147 			b[i] = byte(x)
    148 			h.addB(b[:])
    149 			b[i] = 0
    150 		}
    151 	}
    152 
    153 	// two non-zero bytes
    154 	for i := 0; i < n; i++ {
    155 		for x := 1; x < 256; x++ {
    156 			b[i] = byte(x)
    157 			for j := i + 1; j < n; j++ {
    158 				for y := 1; y < 256; y++ {
    159 					b[j] = byte(y)
    160 					h.addB(b[:])
    161 					b[j] = 0
    162 				}
    163 			}
    164 			b[i] = 0
    165 		}
    166 	}
    167 }
    168 
    169 // Test strings with repeats, like "abcdabcdabcdabcd..."
    170 func TestSmhasherCyclic(t *testing.T) {
    171 	if testing.Short() {
    172 		t.Skip("Skipping in short mode")
    173 	}
    174 	r := rand.New(rand.NewSource(1234))
    175 	const REPEAT = 8
    176 	const N = 1000000
    177 	for n := 4; n <= 12; n++ {
    178 		h := newHashSet()
    179 		b := make([]byte, REPEAT*n)
    180 		for i := 0; i < N; i++ {
    181 			b[0] = byte(i * 79 % 97)
    182 			b[1] = byte(i * 43 % 137)
    183 			b[2] = byte(i * 151 % 197)
    184 			b[3] = byte(i * 199 % 251)
    185 			randBytes(r, b[4:n])
    186 			for j := n; j < n*REPEAT; j++ {
    187 				b[j] = b[j-n]
    188 			}
    189 			h.addB(b)
    190 		}
    191 		h.check(t)
    192 	}
    193 }
    194 
    195 // Test strings with only a few bits set
    196 func TestSmhasherSparse(t *testing.T) {
    197 	if testing.Short() {
    198 		t.Skip("Skipping in short mode")
    199 	}
    200 	sparse(t, 32, 6)
    201 	sparse(t, 40, 6)
    202 	sparse(t, 48, 5)
    203 	sparse(t, 56, 5)
    204 	sparse(t, 64, 5)
    205 	sparse(t, 96, 4)
    206 	sparse(t, 256, 3)
    207 	sparse(t, 2048, 2)
    208 }
    209 func sparse(t *testing.T, n int, k int) {
    210 	b := make([]byte, n/8)
    211 	h := newHashSet()
    212 	setbits(h, b, 0, k)
    213 	h.check(t)
    214 }
    215 
    216 // set up to k bits at index i and greater
    217 func setbits(h *HashSet, b []byte, i int, k int) {
    218 	h.addB(b)
    219 	if k == 0 {
    220 		return
    221 	}
    222 	for j := i; j < len(b)*8; j++ {
    223 		b[j/8] |= byte(1 << uint(j&7))
    224 		setbits(h, b, j+1, k-1)
    225 		b[j/8] &= byte(^(1 << uint(j&7)))
    226 	}
    227 }
    228 
    229 // Test all possible combinations of n blocks from the set s.
    230 // "permutation" is a bad name here, but it is what Smhasher uses.
    231 func TestSmhasherPermutation(t *testing.T) {
    232 	if testing.Short() {
    233 		t.Skip("Skipping in short mode")
    234 	}
    235 	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
    236 	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
    237 	permutation(t, []uint32{0, 1}, 20)
    238 	permutation(t, []uint32{0, 1 << 31}, 20)
    239 	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
    240 }
    241 func permutation(t *testing.T, s []uint32, n int) {
    242 	b := make([]byte, n*4)
    243 	h := newHashSet()
    244 	genPerm(h, b, s, 0)
    245 	h.check(t)
    246 }
    247 func genPerm(h *HashSet, b []byte, s []uint32, n int) {
    248 	h.addB(b[:n])
    249 	if n == len(b) {
    250 		return
    251 	}
    252 	for _, v := range s {
    253 		b[n] = byte(v)
    254 		b[n+1] = byte(v >> 8)
    255 		b[n+2] = byte(v >> 16)
    256 		b[n+3] = byte(v >> 24)
    257 		genPerm(h, b, s, n+4)
    258 	}
    259 }
    260 
    261 type Key interface {
    262 	clear()              // set bits all to 0
    263 	random(r *rand.Rand) // set key to something random
    264 	bits() int           // how many bits key has
    265 	flipBit(i int)       // flip bit i of the key
    266 	hash() uintptr       // hash the key
    267 	name() string        // for error reporting
    268 }
    269 
    270 type BytesKey struct {
    271 	b []byte
    272 }
    273 
    274 func (k *BytesKey) clear() {
    275 	for i := range k.b {
    276 		k.b[i] = 0
    277 	}
    278 }
    279 func (k *BytesKey) random(r *rand.Rand) {
    280 	randBytes(r, k.b)
    281 }
    282 func (k *BytesKey) bits() int {
    283 	return len(k.b) * 8
    284 }
    285 func (k *BytesKey) flipBit(i int) {
    286 	k.b[i>>3] ^= byte(1 << uint(i&7))
    287 }
    288 func (k *BytesKey) hash() uintptr {
    289 	return BytesHash(k.b, 0)
    290 }
    291 func (k *BytesKey) name() string {
    292 	return fmt.Sprintf("bytes%d", len(k.b))
    293 }
    294 
    295 type Int32Key struct {
    296 	i uint32
    297 }
    298 
    299 func (k *Int32Key) clear() {
    300 	k.i = 0
    301 }
    302 func (k *Int32Key) random(r *rand.Rand) {
    303 	k.i = r.Uint32()
    304 }
    305 func (k *Int32Key) bits() int {
    306 	return 32
    307 }
    308 func (k *Int32Key) flipBit(i int) {
    309 	k.i ^= 1 << uint(i)
    310 }
    311 func (k *Int32Key) hash() uintptr {
    312 	return Int32Hash(k.i, 0)
    313 }
    314 func (k *Int32Key) name() string {
    315 	return "int32"
    316 }
    317 
    318 type Int64Key struct {
    319 	i uint64
    320 }
    321 
    322 func (k *Int64Key) clear() {
    323 	k.i = 0
    324 }
    325 func (k *Int64Key) random(r *rand.Rand) {
    326 	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
    327 }
    328 func (k *Int64Key) bits() int {
    329 	return 64
    330 }
    331 func (k *Int64Key) flipBit(i int) {
    332 	k.i ^= 1 << uint(i)
    333 }
    334 func (k *Int64Key) hash() uintptr {
    335 	return Int64Hash(k.i, 0)
    336 }
    337 func (k *Int64Key) name() string {
    338 	return "int64"
    339 }
    340 
    341 type EfaceKey struct {
    342 	i interface{}
    343 }
    344 
    345 func (k *EfaceKey) clear() {
    346 	k.i = nil
    347 }
    348 func (k *EfaceKey) random(r *rand.Rand) {
    349 	k.i = uint64(r.Int63())
    350 }
    351 func (k *EfaceKey) bits() int {
    352 	// use 64 bits.  This tests inlined interfaces
    353 	// on 64-bit targets and indirect interfaces on
    354 	// 32-bit targets.
    355 	return 64
    356 }
    357 func (k *EfaceKey) flipBit(i int) {
    358 	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
    359 }
    360 func (k *EfaceKey) hash() uintptr {
    361 	return EfaceHash(k.i, 0)
    362 }
    363 func (k *EfaceKey) name() string {
    364 	return "Eface"
    365 }
    366 
    367 type IfaceKey struct {
    368 	i interface {
    369 		F()
    370 	}
    371 }
    372 type fInter uint64
    373 
    374 func (x fInter) F() {
    375 }
    376 
    377 func (k *IfaceKey) clear() {
    378 	k.i = nil
    379 }
    380 func (k *IfaceKey) random(r *rand.Rand) {
    381 	k.i = fInter(r.Int63())
    382 }
    383 func (k *IfaceKey) bits() int {
    384 	// use 64 bits.  This tests inlined interfaces
    385 	// on 64-bit targets and indirect interfaces on
    386 	// 32-bit targets.
    387 	return 64
    388 }
    389 func (k *IfaceKey) flipBit(i int) {
    390 	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
    391 }
    392 func (k *IfaceKey) hash() uintptr {
    393 	return IfaceHash(k.i, 0)
    394 }
    395 func (k *IfaceKey) name() string {
    396 	return "Iface"
    397 }
    398 
    399 // Flipping a single bit of a key should flip each output bit with 50% probability.
    400 func TestSmhasherAvalanche(t *testing.T) {
    401 	if testing.Short() {
    402 		t.Skip("Skipping in short mode")
    403 	}
    404 	avalancheTest1(t, &BytesKey{make([]byte, 2)})
    405 	avalancheTest1(t, &BytesKey{make([]byte, 4)})
    406 	avalancheTest1(t, &BytesKey{make([]byte, 8)})
    407 	avalancheTest1(t, &BytesKey{make([]byte, 16)})
    408 	avalancheTest1(t, &BytesKey{make([]byte, 32)})
    409 	avalancheTest1(t, &BytesKey{make([]byte, 200)})
    410 	avalancheTest1(t, &Int32Key{})
    411 	avalancheTest1(t, &Int64Key{})
    412 	avalancheTest1(t, &EfaceKey{})
    413 	avalancheTest1(t, &IfaceKey{})
    414 }
    415 func avalancheTest1(t *testing.T, k Key) {
    416 	const REP = 100000
    417 	r := rand.New(rand.NewSource(1234))
    418 	n := k.bits()
    419 
    420 	// grid[i][j] is a count of whether flipping
    421 	// input bit i affects output bit j.
    422 	grid := make([][hashSize]int, n)
    423 
    424 	for z := 0; z < REP; z++ {
    425 		// pick a random key, hash it
    426 		k.random(r)
    427 		h := k.hash()
    428 
    429 		// flip each bit, hash & compare the results
    430 		for i := 0; i < n; i++ {
    431 			k.flipBit(i)
    432 			d := h ^ k.hash()
    433 			k.flipBit(i)
    434 
    435 			// record the effects of that bit flip
    436 			g := &grid[i]
    437 			for j := 0; j < hashSize; j++ {
    438 				g[j] += int(d & 1)
    439 				d >>= 1
    440 			}
    441 		}
    442 	}
    443 
    444 	// Each entry in the grid should be about REP/2.
    445 	// More precisely, we did N = k.bits() * hashSize experiments where
    446 	// each is the sum of REP coin flips.  We want to find bounds on the
    447 	// sum of coin flips such that a truly random experiment would have
    448 	// all sums inside those bounds with 99% probability.
    449 	N := n * hashSize
    450 	var c float64
    451 	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
    452 	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
    453 	}
    454 	c *= 4.0 // allowed slack - we don't need to be perfectly random
    455 	mean := .5 * REP
    456 	stddev := .5 * math.Sqrt(REP)
    457 	low := int(mean - c*stddev)
    458 	high := int(mean + c*stddev)
    459 	for i := 0; i < n; i++ {
    460 		for j := 0; j < hashSize; j++ {
    461 			x := grid[i][j]
    462 			if x < low || x > high {
    463 				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
    464 			}
    465 		}
    466 	}
    467 }
    468 
    469 // All bit rotations of a set of distinct keys
    470 func TestSmhasherWindowed(t *testing.T) {
    471 	windowed(t, &Int32Key{})
    472 	windowed(t, &Int64Key{})
    473 	windowed(t, &BytesKey{make([]byte, 128)})
    474 }
    475 func windowed(t *testing.T, k Key) {
    476 	if testing.Short() {
    477 		t.Skip("Skipping in short mode")
    478 	}
    479 	const BITS = 16
    480 
    481 	for r := 0; r < k.bits(); r++ {
    482 		h := newHashSet()
    483 		for i := 0; i < 1<<BITS; i++ {
    484 			k.clear()
    485 			for j := 0; j < BITS; j++ {
    486 				if i>>uint(j)&1 != 0 {
    487 					k.flipBit((j + r) % k.bits())
    488 				}
    489 			}
    490 			h.add(k.hash())
    491 		}
    492 		h.check(t)
    493 	}
    494 }
    495 
    496 // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
    497 func TestSmhasherText(t *testing.T) {
    498 	if testing.Short() {
    499 		t.Skip("Skipping in short mode")
    500 	}
    501 	text(t, "Foo", "Bar")
    502 	text(t, "FooBar", "")
    503 	text(t, "", "FooBar")
    504 }
    505 func text(t *testing.T, prefix, suffix string) {
    506 	const N = 4
    507 	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
    508 	const L = len(S)
    509 	b := make([]byte, len(prefix)+N+len(suffix))
    510 	copy(b, prefix)
    511 	copy(b[len(prefix)+N:], suffix)
    512 	h := newHashSet()
    513 	c := b[len(prefix):]
    514 	for i := 0; i < L; i++ {
    515 		c[0] = S[i]
    516 		for j := 0; j < L; j++ {
    517 			c[1] = S[j]
    518 			for k := 0; k < L; k++ {
    519 				c[2] = S[k]
    520 				for x := 0; x < L; x++ {
    521 					c[3] = S[x]
    522 					h.addB(b)
    523 				}
    524 			}
    525 		}
    526 	}
    527 	h.check(t)
    528 }
    529 
    530 // Make sure different seed values generate different hashes.
    531 func TestSmhasherSeed(t *testing.T) {
    532 	h := newHashSet()
    533 	const N = 100000
    534 	s := "hello"
    535 	for i := 0; i < N; i++ {
    536 		h.addS_seed(s, uintptr(i))
    537 	}
    538 	h.check(t)
    539 }
    540 
    541 // size of the hash output (32 or 64 bits)
    542 const hashSize = 32 + int(^uintptr(0)>>63<<5)
    543 
    544 func randBytes(r *rand.Rand, b []byte) {
    545 	for i := range b {
    546 		b[i] = byte(r.Uint32())
    547 	}
    548 }
    549 
    550 func benchmarkHash(b *testing.B, n int) {
    551 	s := strings.Repeat("A", n)
    552 
    553 	for i := 0; i < b.N; i++ {
    554 		StringHash(s, 0)
    555 	}
    556 	b.SetBytes(int64(n))
    557 }
    558 
    559 func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
    560 func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
    561 func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
    562 func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
    563 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
    564