1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime_test 6 7 import ( 8 "fmt" 9 "math" 10 "math/rand" 11 . "runtime" 12 "strings" 13 "testing" 14 ) 15 16 // Smhasher is a torture test for hash functions. 17 // https://code.google.com/p/smhasher/ 18 // This code is a port of some of the Smhasher tests to Go. 19 // 20 // The current AES hash function passes Smhasher. Our fallback 21 // hash functions don't, so we only enable the difficult tests when 22 // we know the AES implementation is available. 23 24 // Sanity checks. 25 // hash should not depend on values outside key. 26 // hash should not depend on alignment. 27 func TestSmhasherSanity(t *testing.T) { 28 r := rand.New(rand.NewSource(1234)) 29 const REP = 10 30 const KEYMAX = 128 31 const PAD = 16 32 const OFFMAX = 16 33 for k := 0; k < REP; k++ { 34 for n := 0; n < KEYMAX; n++ { 35 for i := 0; i < OFFMAX; i++ { 36 var b [KEYMAX + OFFMAX + 2*PAD]byte 37 var c [KEYMAX + OFFMAX + 2*PAD]byte 38 randBytes(r, b[:]) 39 randBytes(r, c[:]) 40 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) 41 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) { 42 t.Errorf("hash depends on bytes outside key") 43 } 44 } 45 } 46 } 47 } 48 49 type HashSet struct { 50 m map[uintptr]struct{} // set of hashes added 51 n int // number of hashes added 52 } 53 54 func newHashSet() *HashSet { 55 return &HashSet{make(map[uintptr]struct{}), 0} 56 } 57 func (s *HashSet) add(h uintptr) { 58 s.m[h] = struct{}{} 59 s.n++ 60 } 61 func (s *HashSet) addS(x string) { 62 s.add(StringHash(x, 0)) 63 } 64 func (s *HashSet) addB(x []byte) { 65 s.add(BytesHash(x, 0)) 66 } 67 func (s *HashSet) addS_seed(x string, seed uintptr) { 68 s.add(StringHash(x, seed)) 69 } 70 func (s *HashSet) check(t *testing.T) { 71 const SLOP = 10.0 72 collisions := s.n - len(s.m) 73 //fmt.Printf("%d/%d\n", len(s.m), s.n) 74 pairs := int64(s.n) * int64(s.n-1) / 2 75 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) 76 stddev := math.Sqrt(expected) 77 if float64(collisions) > expected+SLOP*3*stddev { 78 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) 79 } 80 } 81 82 // a string plus adding zeros must make distinct hashes 83 func TestSmhasherAppendedZeros(t *testing.T) { 84 s := "hello" + strings.Repeat("\x00", 256) 85 h := newHashSet() 86 for i := 0; i <= len(s); i++ { 87 h.addS(s[:i]) 88 } 89 h.check(t) 90 } 91 92 // All 0-3 byte strings have distinct hashes. 93 func TestSmhasherSmallKeys(t *testing.T) { 94 h := newHashSet() 95 var b [3]byte 96 for i := 0; i < 256; i++ { 97 b[0] = byte(i) 98 h.addB(b[:1]) 99 for j := 0; j < 256; j++ { 100 b[1] = byte(j) 101 h.addB(b[:2]) 102 if !testing.Short() { 103 for k := 0; k < 256; k++ { 104 b[2] = byte(k) 105 h.addB(b[:3]) 106 } 107 } 108 } 109 } 110 h.check(t) 111 } 112 113 // Different length strings of all zeros have distinct hashes. 114 func TestSmhasherZeros(t *testing.T) { 115 N := 256 * 1024 116 if testing.Short() { 117 N = 1024 118 } 119 h := newHashSet() 120 b := make([]byte, N) 121 for i := 0; i <= N; i++ { 122 h.addB(b[:i]) 123 } 124 h.check(t) 125 } 126 127 // Strings with up to two nonzero bytes all have distinct hashes. 128 func TestSmhasherTwoNonzero(t *testing.T) { 129 if testing.Short() { 130 t.Skip("Skipping in short mode") 131 } 132 h := newHashSet() 133 for n := 2; n <= 16; n++ { 134 twoNonZero(h, n) 135 } 136 h.check(t) 137 } 138 func twoNonZero(h *HashSet, n int) { 139 b := make([]byte, n) 140 141 // all zero 142 h.addB(b[:]) 143 144 // one non-zero byte 145 for i := 0; i < n; i++ { 146 for x := 1; x < 256; x++ { 147 b[i] = byte(x) 148 h.addB(b[:]) 149 b[i] = 0 150 } 151 } 152 153 // two non-zero bytes 154 for i := 0; i < n; i++ { 155 for x := 1; x < 256; x++ { 156 b[i] = byte(x) 157 for j := i + 1; j < n; j++ { 158 for y := 1; y < 256; y++ { 159 b[j] = byte(y) 160 h.addB(b[:]) 161 b[j] = 0 162 } 163 } 164 b[i] = 0 165 } 166 } 167 } 168 169 // Test strings with repeats, like "abcdabcdabcdabcd..." 170 func TestSmhasherCyclic(t *testing.T) { 171 if testing.Short() { 172 t.Skip("Skipping in short mode") 173 } 174 r := rand.New(rand.NewSource(1234)) 175 const REPEAT = 8 176 const N = 1000000 177 for n := 4; n <= 12; n++ { 178 h := newHashSet() 179 b := make([]byte, REPEAT*n) 180 for i := 0; i < N; i++ { 181 b[0] = byte(i * 79 % 97) 182 b[1] = byte(i * 43 % 137) 183 b[2] = byte(i * 151 % 197) 184 b[3] = byte(i * 199 % 251) 185 randBytes(r, b[4:n]) 186 for j := n; j < n*REPEAT; j++ { 187 b[j] = b[j-n] 188 } 189 h.addB(b) 190 } 191 h.check(t) 192 } 193 } 194 195 // Test strings with only a few bits set 196 func TestSmhasherSparse(t *testing.T) { 197 if testing.Short() { 198 t.Skip("Skipping in short mode") 199 } 200 sparse(t, 32, 6) 201 sparse(t, 40, 6) 202 sparse(t, 48, 5) 203 sparse(t, 56, 5) 204 sparse(t, 64, 5) 205 sparse(t, 96, 4) 206 sparse(t, 256, 3) 207 sparse(t, 2048, 2) 208 } 209 func sparse(t *testing.T, n int, k int) { 210 b := make([]byte, n/8) 211 h := newHashSet() 212 setbits(h, b, 0, k) 213 h.check(t) 214 } 215 216 // set up to k bits at index i and greater 217 func setbits(h *HashSet, b []byte, i int, k int) { 218 h.addB(b) 219 if k == 0 { 220 return 221 } 222 for j := i; j < len(b)*8; j++ { 223 b[j/8] |= byte(1 << uint(j&7)) 224 setbits(h, b, j+1, k-1) 225 b[j/8] &= byte(^(1 << uint(j&7))) 226 } 227 } 228 229 // Test all possible combinations of n blocks from the set s. 230 // "permutation" is a bad name here, but it is what Smhasher uses. 231 func TestSmhasherPermutation(t *testing.T) { 232 if testing.Short() { 233 t.Skip("Skipping in short mode") 234 } 235 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) 236 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) 237 permutation(t, []uint32{0, 1}, 20) 238 permutation(t, []uint32{0, 1 << 31}, 20) 239 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) 240 } 241 func permutation(t *testing.T, s []uint32, n int) { 242 b := make([]byte, n*4) 243 h := newHashSet() 244 genPerm(h, b, s, 0) 245 h.check(t) 246 } 247 func genPerm(h *HashSet, b []byte, s []uint32, n int) { 248 h.addB(b[:n]) 249 if n == len(b) { 250 return 251 } 252 for _, v := range s { 253 b[n] = byte(v) 254 b[n+1] = byte(v >> 8) 255 b[n+2] = byte(v >> 16) 256 b[n+3] = byte(v >> 24) 257 genPerm(h, b, s, n+4) 258 } 259 } 260 261 type Key interface { 262 clear() // set bits all to 0 263 random(r *rand.Rand) // set key to something random 264 bits() int // how many bits key has 265 flipBit(i int) // flip bit i of the key 266 hash() uintptr // hash the key 267 name() string // for error reporting 268 } 269 270 type BytesKey struct { 271 b []byte 272 } 273 274 func (k *BytesKey) clear() { 275 for i := range k.b { 276 k.b[i] = 0 277 } 278 } 279 func (k *BytesKey) random(r *rand.Rand) { 280 randBytes(r, k.b) 281 } 282 func (k *BytesKey) bits() int { 283 return len(k.b) * 8 284 } 285 func (k *BytesKey) flipBit(i int) { 286 k.b[i>>3] ^= byte(1 << uint(i&7)) 287 } 288 func (k *BytesKey) hash() uintptr { 289 return BytesHash(k.b, 0) 290 } 291 func (k *BytesKey) name() string { 292 return fmt.Sprintf("bytes%d", len(k.b)) 293 } 294 295 type Int32Key struct { 296 i uint32 297 } 298 299 func (k *Int32Key) clear() { 300 k.i = 0 301 } 302 func (k *Int32Key) random(r *rand.Rand) { 303 k.i = r.Uint32() 304 } 305 func (k *Int32Key) bits() int { 306 return 32 307 } 308 func (k *Int32Key) flipBit(i int) { 309 k.i ^= 1 << uint(i) 310 } 311 func (k *Int32Key) hash() uintptr { 312 return Int32Hash(k.i, 0) 313 } 314 func (k *Int32Key) name() string { 315 return "int32" 316 } 317 318 type Int64Key struct { 319 i uint64 320 } 321 322 func (k *Int64Key) clear() { 323 k.i = 0 324 } 325 func (k *Int64Key) random(r *rand.Rand) { 326 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32 327 } 328 func (k *Int64Key) bits() int { 329 return 64 330 } 331 func (k *Int64Key) flipBit(i int) { 332 k.i ^= 1 << uint(i) 333 } 334 func (k *Int64Key) hash() uintptr { 335 return Int64Hash(k.i, 0) 336 } 337 func (k *Int64Key) name() string { 338 return "int64" 339 } 340 341 type EfaceKey struct { 342 i interface{} 343 } 344 345 func (k *EfaceKey) clear() { 346 k.i = nil 347 } 348 func (k *EfaceKey) random(r *rand.Rand) { 349 k.i = uint64(r.Int63()) 350 } 351 func (k *EfaceKey) bits() int { 352 // use 64 bits. This tests inlined interfaces 353 // on 64-bit targets and indirect interfaces on 354 // 32-bit targets. 355 return 64 356 } 357 func (k *EfaceKey) flipBit(i int) { 358 k.i = k.i.(uint64) ^ uint64(1)<<uint(i) 359 } 360 func (k *EfaceKey) hash() uintptr { 361 return EfaceHash(k.i, 0) 362 } 363 func (k *EfaceKey) name() string { 364 return "Eface" 365 } 366 367 type IfaceKey struct { 368 i interface { 369 F() 370 } 371 } 372 type fInter uint64 373 374 func (x fInter) F() { 375 } 376 377 func (k *IfaceKey) clear() { 378 k.i = nil 379 } 380 func (k *IfaceKey) random(r *rand.Rand) { 381 k.i = fInter(r.Int63()) 382 } 383 func (k *IfaceKey) bits() int { 384 // use 64 bits. This tests inlined interfaces 385 // on 64-bit targets and indirect interfaces on 386 // 32-bit targets. 387 return 64 388 } 389 func (k *IfaceKey) flipBit(i int) { 390 k.i = k.i.(fInter) ^ fInter(1)<<uint(i) 391 } 392 func (k *IfaceKey) hash() uintptr { 393 return IfaceHash(k.i, 0) 394 } 395 func (k *IfaceKey) name() string { 396 return "Iface" 397 } 398 399 // Flipping a single bit of a key should flip each output bit with 50% probability. 400 func TestSmhasherAvalanche(t *testing.T) { 401 if testing.Short() { 402 t.Skip("Skipping in short mode") 403 } 404 avalancheTest1(t, &BytesKey{make([]byte, 2)}) 405 avalancheTest1(t, &BytesKey{make([]byte, 4)}) 406 avalancheTest1(t, &BytesKey{make([]byte, 8)}) 407 avalancheTest1(t, &BytesKey{make([]byte, 16)}) 408 avalancheTest1(t, &BytesKey{make([]byte, 32)}) 409 avalancheTest1(t, &BytesKey{make([]byte, 200)}) 410 avalancheTest1(t, &Int32Key{}) 411 avalancheTest1(t, &Int64Key{}) 412 avalancheTest1(t, &EfaceKey{}) 413 avalancheTest1(t, &IfaceKey{}) 414 } 415 func avalancheTest1(t *testing.T, k Key) { 416 const REP = 100000 417 r := rand.New(rand.NewSource(1234)) 418 n := k.bits() 419 420 // grid[i][j] is a count of whether flipping 421 // input bit i affects output bit j. 422 grid := make([][hashSize]int, n) 423 424 for z := 0; z < REP; z++ { 425 // pick a random key, hash it 426 k.random(r) 427 h := k.hash() 428 429 // flip each bit, hash & compare the results 430 for i := 0; i < n; i++ { 431 k.flipBit(i) 432 d := h ^ k.hash() 433 k.flipBit(i) 434 435 // record the effects of that bit flip 436 g := &grid[i] 437 for j := 0; j < hashSize; j++ { 438 g[j] += int(d & 1) 439 d >>= 1 440 } 441 } 442 } 443 444 // Each entry in the grid should be about REP/2. 445 // More precisely, we did N = k.bits() * hashSize experiments where 446 // each is the sum of REP coin flips. We want to find bounds on the 447 // sum of coin flips such that a truly random experiment would have 448 // all sums inside those bounds with 99% probability. 449 N := n * hashSize 450 var c float64 451 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 452 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { 453 } 454 c *= 4.0 // allowed slack - we don't need to be perfectly random 455 mean := .5 * REP 456 stddev := .5 * math.Sqrt(REP) 457 low := int(mean - c*stddev) 458 high := int(mean + c*stddev) 459 for i := 0; i < n; i++ { 460 for j := 0; j < hashSize; j++ { 461 x := grid[i][j] 462 if x < low || x > high { 463 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) 464 } 465 } 466 } 467 } 468 469 // All bit rotations of a set of distinct keys 470 func TestSmhasherWindowed(t *testing.T) { 471 windowed(t, &Int32Key{}) 472 windowed(t, &Int64Key{}) 473 windowed(t, &BytesKey{make([]byte, 128)}) 474 } 475 func windowed(t *testing.T, k Key) { 476 if testing.Short() { 477 t.Skip("Skipping in short mode") 478 } 479 const BITS = 16 480 481 for r := 0; r < k.bits(); r++ { 482 h := newHashSet() 483 for i := 0; i < 1<<BITS; i++ { 484 k.clear() 485 for j := 0; j < BITS; j++ { 486 if i>>uint(j)&1 != 0 { 487 k.flipBit((j + r) % k.bits()) 488 } 489 } 490 h.add(k.hash()) 491 } 492 h.check(t) 493 } 494 } 495 496 // All keys of the form prefix + [A-Za-z0-9]*N + suffix. 497 func TestSmhasherText(t *testing.T) { 498 if testing.Short() { 499 t.Skip("Skipping in short mode") 500 } 501 text(t, "Foo", "Bar") 502 text(t, "FooBar", "") 503 text(t, "", "FooBar") 504 } 505 func text(t *testing.T, prefix, suffix string) { 506 const N = 4 507 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" 508 const L = len(S) 509 b := make([]byte, len(prefix)+N+len(suffix)) 510 copy(b, prefix) 511 copy(b[len(prefix)+N:], suffix) 512 h := newHashSet() 513 c := b[len(prefix):] 514 for i := 0; i < L; i++ { 515 c[0] = S[i] 516 for j := 0; j < L; j++ { 517 c[1] = S[j] 518 for k := 0; k < L; k++ { 519 c[2] = S[k] 520 for x := 0; x < L; x++ { 521 c[3] = S[x] 522 h.addB(b) 523 } 524 } 525 } 526 } 527 h.check(t) 528 } 529 530 // Make sure different seed values generate different hashes. 531 func TestSmhasherSeed(t *testing.T) { 532 h := newHashSet() 533 const N = 100000 534 s := "hello" 535 for i := 0; i < N; i++ { 536 h.addS_seed(s, uintptr(i)) 537 } 538 h.check(t) 539 } 540 541 // size of the hash output (32 or 64 bits) 542 const hashSize = 32 + int(^uintptr(0)>>63<<5) 543 544 func randBytes(r *rand.Rand, b []byte) { 545 for i := range b { 546 b[i] = byte(r.Uint32()) 547 } 548 } 549 550 func benchmarkHash(b *testing.B, n int) { 551 s := strings.Repeat("A", n) 552 553 for i := 0; i < b.N; i++ { 554 StringHash(s, 0) 555 } 556 b.SetBytes(int64(n)) 557 } 558 559 func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) } 560 func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) } 561 func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) } 562 func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) } 563 func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) } 564