// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package runtime_test import ( "fmt" "math" "math/rand" . "runtime" "strings" "testing" ) // Smhasher is a torture test for hash functions. // https://code.google.com/p/smhasher/ // This code is a port of some of the Smhasher tests to Go. // // The current AES hash function passes Smhasher. Our fallback // hash functions don't, so we only enable the difficult tests when // we know the AES implementation is available. // Sanity checks. // hash should not depend on values outside key. // hash should not depend on alignment. func TestSmhasherSanity(t *testing.T) { r := rand.New(rand.NewSource(1234)) const REP = 10 const KEYMAX = 128 const PAD = 16 const OFFMAX = 16 for k := 0; k < REP; k++ { for n := 0; n < KEYMAX; n++ { for i := 0; i < OFFMAX; i++ { var b [KEYMAX + OFFMAX + 2*PAD]byte var c [KEYMAX + OFFMAX + 2*PAD]byte randBytes(r, b[:]) randBytes(r, c[:]) copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) { t.Errorf("hash depends on bytes outside key") } } } } } type HashSet struct { m map[uintptr]struct{} // set of hashes added n int // number of hashes added } func newHashSet() *HashSet { return &HashSet{make(map[uintptr]struct{}), 0} } func (s *HashSet) add(h uintptr) { s.m[h] = struct{}{} s.n++ } func (s *HashSet) addS(x string) { s.add(StringHash(x, 0)) } func (s *HashSet) addB(x []byte) { s.add(BytesHash(x, 0)) } func (s *HashSet) addS_seed(x string, seed uintptr) { s.add(StringHash(x, seed)) } func (s *HashSet) check(t *testing.T) { const SLOP = 10.0 collisions := s.n - len(s.m) //fmt.Printf("%d/%d\n", len(s.m), s.n) pairs := int64(s.n) * int64(s.n-1) / 2 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) stddev := math.Sqrt(expected) if float64(collisions) > expected+SLOP*(3*stddev+1) { t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) } } // a string plus adding zeros must make distinct hashes func TestSmhasherAppendedZeros(t *testing.T) { s := "hello" + strings.Repeat("\x00", 256) h := newHashSet() for i := 0; i <= len(s); i++ { h.addS(s[:i]) } h.check(t) } // All 0-3 byte strings have distinct hashes. func TestSmhasherSmallKeys(t *testing.T) { h := newHashSet() var b [3]byte for i := 0; i < 256; i++ { b[0] = byte(i) h.addB(b[:1]) for j := 0; j < 256; j++ { b[1] = byte(j) h.addB(b[:2]) if !testing.Short() { for k := 0; k < 256; k++ { b[2] = byte(k) h.addB(b[:3]) } } } } h.check(t) } // Different length strings of all zeros have distinct hashes. func TestSmhasherZeros(t *testing.T) { N := 256 * 1024 if testing.Short() { N = 1024 } h := newHashSet() b := make([]byte, N) for i := 0; i <= N; i++ { h.addB(b[:i]) } h.check(t) } // Strings with up to two nonzero bytes all have distinct hashes. func TestSmhasherTwoNonzero(t *testing.T) { if testing.Short() { t.Skip("Skipping in short mode") } h := newHashSet() for n := 2; n <= 16; n++ { twoNonZero(h, n) } h.check(t) } func twoNonZero(h *HashSet, n int) { b := make([]byte, n) // all zero h.addB(b[:]) // one non-zero byte for i := 0; i < n; i++ { for x := 1; x < 256; x++ { b[i] = byte(x) h.addB(b[:]) b[i] = 0 } } // two non-zero bytes for i := 0; i < n; i++ { for x := 1; x < 256; x++ { b[i] = byte(x) for j := i + 1; j < n; j++ { for y := 1; y < 256; y++ { b[j] = byte(y) h.addB(b[:]) b[j] = 0 } } b[i] = 0 } } } // Test strings with repeats, like "abcdabcdabcdabcd..." func TestSmhasherCyclic(t *testing.T) { if testing.Short() { t.Skip("Skipping in short mode") } r := rand.New(rand.NewSource(1234)) const REPEAT = 8 const N = 1000000 for n := 4; n <= 12; n++ { h := newHashSet() b := make([]byte, REPEAT*n) for i := 0; i < N; i++ { b[0] = byte(i * 79 % 97) b[1] = byte(i * 43 % 137) b[2] = byte(i * 151 % 197) b[3] = byte(i * 199 % 251) randBytes(r, b[4:n]) for j := n; j < n*REPEAT; j++ { b[j] = b[j-n] } h.addB(b) } h.check(t) } } // Test strings with only a few bits set func TestSmhasherSparse(t *testing.T) { if testing.Short() { t.Skip("Skipping in short mode") } sparse(t, 32, 6) sparse(t, 40, 6) sparse(t, 48, 5) sparse(t, 56, 5) sparse(t, 64, 5) sparse(t, 96, 4) sparse(t, 256, 3) sparse(t, 2048, 2) } func sparse(t *testing.T, n int, k int) { b := make([]byte, n/8) h := newHashSet() setbits(h, b, 0, k) h.check(t) } // set up to k bits at index i and greater func setbits(h *HashSet, b []byte, i int, k int) { h.addB(b) if k == 0 { return } for j := i; j < len(b)*8; j++ { b[j/8] |= byte(1 << uint(j&7)) setbits(h, b, j+1, k-1) b[j/8] &= byte(^(1 << uint(j&7))) } } // Test all possible combinations of n blocks from the set s. // "permutation" is a bad name here, but it is what Smhasher uses. func TestSmhasherPermutation(t *testing.T) { if testing.Short() { t.Skip("Skipping in short mode") } permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) permutation(t, []uint32{0, 1}, 20) permutation(t, []uint32{0, 1 << 31}, 20) permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) } func permutation(t *testing.T, s []uint32, n int) { b := make([]byte, n*4) h := newHashSet() genPerm(h, b, s, 0) h.check(t) } func genPerm(h *HashSet, b []byte, s []uint32, n int) { h.addB(b[:n]) if n == len(b) { return } for _, v := range s { b[n] = byte(v) b[n+1] = byte(v >> 8) b[n+2] = byte(v >> 16) b[n+3] = byte(v >> 24) genPerm(h, b, s, n+4) } } type Key interface { clear() // set bits all to 0 random(r *rand.Rand) // set key to something random bits() int // how many bits key has flipBit(i int) // flip bit i of the key hash() uintptr // hash the key name() string // for error reporting } type BytesKey struct { b []byte } func (k *BytesKey) clear() { for i := range k.b { k.b[i] = 0 } } func (k *BytesKey) random(r *rand.Rand) { randBytes(r, k.b) } func (k *BytesKey) bits() int { return len(k.b) * 8 } func (k *BytesKey) flipBit(i int) { k.b[i>>3] ^= byte(1 << uint(i&7)) } func (k *BytesKey) hash() uintptr { return BytesHash(k.b, 0) } func (k *BytesKey) name() string { return fmt.Sprintf("bytes%d", len(k.b)) } type Int32Key struct { i uint32 } func (k *Int32Key) clear() { k.i = 0 } func (k *Int32Key) random(r *rand.Rand) { k.i = r.Uint32() } func (k *Int32Key) bits() int { return 32 } func (k *Int32Key) flipBit(i int) { k.i ^= 1 << uint(i) } func (k *Int32Key) hash() uintptr { return Int32Hash(k.i, 0) } func (k *Int32Key) name() string { return "int32" } type Int64Key struct { i uint64 } func (k *Int64Key) clear() { k.i = 0 } func (k *Int64Key) random(r *rand.Rand) { k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32 } func (k *Int64Key) bits() int { return 64 } func (k *Int64Key) flipBit(i int) { k.i ^= 1 << uint(i) } func (k *Int64Key) hash() uintptr { return Int64Hash(k.i, 0) } func (k *Int64Key) name() string { return "int64" } type EfaceKey struct { i interface{} } func (k *EfaceKey) clear() { k.i = nil } func (k *EfaceKey) random(r *rand.Rand) { k.i = uint64(r.Int63()) } func (k *EfaceKey) bits() int { // use 64 bits. This tests inlined interfaces // on 64-bit targets and indirect interfaces on // 32-bit targets. return 64 } func (k *EfaceKey) flipBit(i int) { k.i = k.i.(uint64) ^ uint64(1)<>= 1 } } } // Each entry in the grid should be about REP/2. // More precisely, we did N = k.bits() * hashSize experiments where // each is the sum of REP coin flips. We want to find bounds on the // sum of coin flips such that a truly random experiment would have // all sums inside those bounds with 99% probability. N := n * hashSize var c float64 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { } c *= 4.0 // allowed slack - we don't need to be perfectly random mean := .5 * REP stddev := .5 * math.Sqrt(REP) low := int(mean - c*stddev) high := int(mean + c*stddev) for i := 0; i < n; i++ { for j := 0; j < hashSize; j++ { x := grid[i][j] if x < low || x > high { t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) } } } } // All bit rotations of a set of distinct keys func TestSmhasherWindowed(t *testing.T) { windowed(t, &Int32Key{}) windowed(t, &Int64Key{}) windowed(t, &BytesKey{make([]byte, 128)}) } func windowed(t *testing.T, k Key) { if testing.Short() { t.Skip("Skipping in short mode") } const BITS = 16 for r := 0; r < k.bits(); r++ { h := newHashSet() for i := 0; i < 1<>uint(j)&1 != 0 { k.flipBit((j + r) % k.bits()) } } h.add(k.hash()) } h.check(t) } } // All keys of the form prefix + [A-Za-z0-9]*N + suffix. func TestSmhasherText(t *testing.T) { if testing.Short() { t.Skip("Skipping in short mode") } text(t, "Foo", "Bar") text(t, "FooBar", "") text(t, "", "FooBar") } func text(t *testing.T, prefix, suffix string) { const N = 4 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" const L = len(S) b := make([]byte, len(prefix)+N+len(suffix)) copy(b, prefix) copy(b[len(prefix)+N:], suffix) h := newHashSet() c := b[len(prefix):] for i := 0; i < L; i++ { c[0] = S[i] for j := 0; j < L; j++ { c[1] = S[j] for k := 0; k < L; k++ { c[2] = S[k] for x := 0; x < L; x++ { c[3] = S[x] h.addB(b) } } } } h.check(t) } // Make sure different seed values generate different hashes. func TestSmhasherSeed(t *testing.T) { h := newHashSet() const N = 100000 s := "hello" for i := 0; i < N; i++ { h.addS_seed(s, uintptr(i)) } h.check(t) } // size of the hash output (32 or 64 bits) const hashSize = 32 + int(^uintptr(0)>>63<<5) func randBytes(r *rand.Rand, b []byte) { for i := range b { b[i] = byte(r.Uint32()) } } func benchmarkHash(b *testing.B, n int) { s := strings.Repeat("A", n) for i := 0; i < b.N; i++ { StringHash(s, 0) } b.SetBytes(int64(n)) } func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) } func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) } func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) } func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) } func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) } func TestArrayHash(t *testing.T) { // Make sure that "" in arrays hash correctly. The hash // should at least scramble the input seed so that, e.g., // {"","foo"} and {"foo",""} have different hashes. // If the hash is bad, then all (8 choose 4) = 70 keys // have the same hash. If so, we allocate 70/8 = 8 // overflow buckets. If the hash is good we don't // normally allocate any overflow buckets, and the // probability of even one or two overflows goes down rapidly. // (There is always 1 allocation of the bucket array. The map // header is allocated on the stack.) f := func() { // Make the key type at most 128 bytes. Otherwise, // we get an allocation per key. type key [8]string m := make(map[key]bool, 70) // fill m with keys that have 4 "foo"s and 4 ""s. for i := 0; i < 256; i++ { var k key cnt := 0 for j := uint(0); j < 8; j++ { if i>>j&1 != 0 { k[j] = "foo" cnt++ } } if cnt == 4 { m[k] = true } } if len(m) != 70 { t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) } } if n := testing.AllocsPerRun(10, f); n > 6 { t.Errorf("too many allocs %f - hash not balanced", n) } } func TestStructHash(t *testing.T) { // See the comment in TestArrayHash. f := func() { type key struct { a, b, c, d, e, f, g, h string } m := make(map[key]bool, 70) // fill m with keys that have 4 "foo"s and 4 ""s. for i := 0; i < 256; i++ { var k key cnt := 0 if i&1 != 0 { k.a = "foo" cnt++ } if i&2 != 0 { k.b = "foo" cnt++ } if i&4 != 0 { k.c = "foo" cnt++ } if i&8 != 0 { k.d = "foo" cnt++ } if i&16 != 0 { k.e = "foo" cnt++ } if i&32 != 0 { k.f = "foo" cnt++ } if i&64 != 0 { k.g = "foo" cnt++ } if i&128 != 0 { k.h = "foo" cnt++ } if cnt == 4 { m[k] = true } } if len(m) != 70 { t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) } } if n := testing.AllocsPerRun(10, f); n > 6 { t.Errorf("too many allocs %f - hash not balanced", n) } }