1
0
mirror of https://github.com/golang/go synced 2024-11-07 12:26:16 -07:00
go/src/runtime/hash_test.go
Keith Randall 8dae5390cb runtime: raise alert threshold on window smhasher test
This alert is triggering occasionally. I've investigated the
collisions that happen, and they all seem to be pairwise, so they are
not a big deal.  "pairwise" = when there are 32 collisions, it is two
keys mapping to the same hash, 32 times, not 33 keys all mapping to
the same hash.

Add some t.Logf calls in case this comes back, which will help isolate
the problem.

Fixes #39352

Change-Id: I1749d7c8efd0afcf9024d8964d15bc0f58a86e4f
Reviewed-on: https://go-review.googlesource.com/c/go/+/237718
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
2020-06-12 20:46:27 +00:00

807 lines
18 KiB
Go

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime_test
import (
"fmt"
"math"
"math/rand"
"reflect"
. "runtime"
"strings"
"testing"
"unsafe"
)
func TestMemHash32Equality(t *testing.T) {
if *UseAeshash {
t.Skip("skipping since AES hash implementation is used")
}
var b [4]byte
r := rand.New(rand.NewSource(1234))
seed := uintptr(r.Uint64())
for i := 0; i < 100; i++ {
randBytes(r, b[:])
got := MemHash32(unsafe.Pointer(&b), seed)
want := MemHash(unsafe.Pointer(&b), seed, 4)
if got != want {
t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
}
}
}
func TestMemHash64Equality(t *testing.T) {
if *UseAeshash {
t.Skip("skipping since AES hash implementation is used")
}
var b [8]byte
r := rand.New(rand.NewSource(1234))
seed := uintptr(r.Uint64())
for i := 0; i < 100; i++ {
randBytes(r, b[:])
got := MemHash64(unsafe.Pointer(&b), seed)
want := MemHash(unsafe.Pointer(&b), seed, 8)
if got != want {
t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
}
}
}
func TestCompilerVsRuntimeHash(t *testing.T) {
// Test to make sure the compiler's hash function and the runtime's hash function agree.
// See issue 37716.
for _, m := range []interface{}{
map[bool]int{},
map[int8]int{},
map[uint8]int{},
map[int16]int{},
map[uint16]int{},
map[int32]int{},
map[uint32]int{},
map[int64]int{},
map[uint64]int{},
map[int]int{},
map[uint]int{},
map[uintptr]int{},
map[*byte]int{},
map[chan int]int{},
map[unsafe.Pointer]int{},
map[float32]int{},
map[float64]int{},
map[complex64]int{},
map[complex128]int{},
map[string]int{},
//map[interface{}]int{},
//map[interface{F()}]int{},
map[[8]uint64]int{},
map[[8]string]int{},
map[struct{ a, b, c, d int32 }]int{}, // Note: tests AMEM128
map[struct{ a, b, _, d int32 }]int{},
map[struct {
a, b int32
c float32
d, e [8]byte
}]int{},
map[struct {
a int16
b int64
}]int{},
} {
k := reflect.New(reflect.TypeOf(m).Key()).Elem().Interface() // the zero key
x, y := MapHashCheck(m, k)
if x != y {
t.Errorf("hashes did not match (%x vs %x) for map %T", x, y, m)
}
}
}
// Smhasher is a torture test for hash functions.
// https://code.google.com/p/smhasher/
// This code is a port of some of the Smhasher tests to Go.
//
// The current AES hash function passes Smhasher. Our fallback
// hash functions don't, so we only enable the difficult tests when
// we know the AES implementation is available.
// Sanity checks.
// hash should not depend on values outside key.
// hash should not depend on alignment.
func TestSmhasherSanity(t *testing.T) {
r := rand.New(rand.NewSource(1234))
const REP = 10
const KEYMAX = 128
const PAD = 16
const OFFMAX = 16
for k := 0; k < REP; k++ {
for n := 0; n < KEYMAX; n++ {
for i := 0; i < OFFMAX; i++ {
var b [KEYMAX + OFFMAX + 2*PAD]byte
var c [KEYMAX + OFFMAX + 2*PAD]byte
randBytes(r, b[:])
randBytes(r, c[:])
copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
t.Errorf("hash depends on bytes outside key")
}
}
}
}
}
type HashSet struct {
m map[uintptr]struct{} // set of hashes added
n int // number of hashes added
}
func newHashSet() *HashSet {
return &HashSet{make(map[uintptr]struct{}), 0}
}
func (s *HashSet) add(h uintptr) {
s.m[h] = struct{}{}
s.n++
}
func (s *HashSet) addS(x string) {
s.add(StringHash(x, 0))
}
func (s *HashSet) addB(x []byte) {
s.add(BytesHash(x, 0))
}
func (s *HashSet) addS_seed(x string, seed uintptr) {
s.add(StringHash(x, seed))
}
func (s *HashSet) check(t *testing.T) {
const SLOP = 50.0
collisions := s.n - len(s.m)
pairs := int64(s.n) * int64(s.n-1) / 2
expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
stddev := math.Sqrt(expected)
if float64(collisions) > expected+SLOP*(3*stddev+1) {
t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f threshold=%f", collisions, expected, stddev, expected+SLOP*(3*stddev+1))
}
}
// a string plus adding zeros must make distinct hashes
func TestSmhasherAppendedZeros(t *testing.T) {
s := "hello" + strings.Repeat("\x00", 256)
h := newHashSet()
for i := 0; i <= len(s); i++ {
h.addS(s[:i])
}
h.check(t)
}
// All 0-3 byte strings have distinct hashes.
func TestSmhasherSmallKeys(t *testing.T) {
h := newHashSet()
var b [3]byte
for i := 0; i < 256; i++ {
b[0] = byte(i)
h.addB(b[:1])
for j := 0; j < 256; j++ {
b[1] = byte(j)
h.addB(b[:2])
if !testing.Short() {
for k := 0; k < 256; k++ {
b[2] = byte(k)
h.addB(b[:3])
}
}
}
}
h.check(t)
}
// Different length strings of all zeros have distinct hashes.
func TestSmhasherZeros(t *testing.T) {
N := 256 * 1024
if testing.Short() {
N = 1024
}
h := newHashSet()
b := make([]byte, N)
for i := 0; i <= N; i++ {
h.addB(b[:i])
}
h.check(t)
}
// Strings with up to two nonzero bytes all have distinct hashes.
func TestSmhasherTwoNonzero(t *testing.T) {
if GOARCH == "wasm" {
t.Skip("Too slow on wasm")
}
if testing.Short() {
t.Skip("Skipping in short mode")
}
h := newHashSet()
for n := 2; n <= 16; n++ {
twoNonZero(h, n)
}
h.check(t)
}
func twoNonZero(h *HashSet, n int) {
b := make([]byte, n)
// all zero
h.addB(b)
// one non-zero byte
for i := 0; i < n; i++ {
for x := 1; x < 256; x++ {
b[i] = byte(x)
h.addB(b)
b[i] = 0
}
}
// two non-zero bytes
for i := 0; i < n; i++ {
for x := 1; x < 256; x++ {
b[i] = byte(x)
for j := i + 1; j < n; j++ {
for y := 1; y < 256; y++ {
b[j] = byte(y)
h.addB(b)
b[j] = 0
}
}
b[i] = 0
}
}
}
// Test strings with repeats, like "abcdabcdabcdabcd..."
func TestSmhasherCyclic(t *testing.T) {
if testing.Short() {
t.Skip("Skipping in short mode")
}
r := rand.New(rand.NewSource(1234))
const REPEAT = 8
const N = 1000000
for n := 4; n <= 12; n++ {
h := newHashSet()
b := make([]byte, REPEAT*n)
for i := 0; i < N; i++ {
b[0] = byte(i * 79 % 97)
b[1] = byte(i * 43 % 137)
b[2] = byte(i * 151 % 197)
b[3] = byte(i * 199 % 251)
randBytes(r, b[4:n])
for j := n; j < n*REPEAT; j++ {
b[j] = b[j-n]
}
h.addB(b)
}
h.check(t)
}
}
// Test strings with only a few bits set
func TestSmhasherSparse(t *testing.T) {
if GOARCH == "wasm" {
t.Skip("Too slow on wasm")
}
if testing.Short() {
t.Skip("Skipping in short mode")
}
sparse(t, 32, 6)
sparse(t, 40, 6)
sparse(t, 48, 5)
sparse(t, 56, 5)
sparse(t, 64, 5)
sparse(t, 96, 4)
sparse(t, 256, 3)
sparse(t, 2048, 2)
}
func sparse(t *testing.T, n int, k int) {
b := make([]byte, n/8)
h := newHashSet()
setbits(h, b, 0, k)
h.check(t)
}
// set up to k bits at index i and greater
func setbits(h *HashSet, b []byte, i int, k int) {
h.addB(b)
if k == 0 {
return
}
for j := i; j < len(b)*8; j++ {
b[j/8] |= byte(1 << uint(j&7))
setbits(h, b, j+1, k-1)
b[j/8] &= byte(^(1 << uint(j&7)))
}
}
// Test all possible combinations of n blocks from the set s.
// "permutation" is a bad name here, but it is what Smhasher uses.
func TestSmhasherPermutation(t *testing.T) {
if GOARCH == "wasm" {
t.Skip("Too slow on wasm")
}
if testing.Short() {
t.Skip("Skipping in short mode")
}
permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
permutation(t, []uint32{0, 1}, 20)
permutation(t, []uint32{0, 1 << 31}, 20)
permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
}
func permutation(t *testing.T, s []uint32, n int) {
b := make([]byte, n*4)
h := newHashSet()
genPerm(h, b, s, 0)
h.check(t)
}
func genPerm(h *HashSet, b []byte, s []uint32, n int) {
h.addB(b[:n])
if n == len(b) {
return
}
for _, v := range s {
b[n] = byte(v)
b[n+1] = byte(v >> 8)
b[n+2] = byte(v >> 16)
b[n+3] = byte(v >> 24)
genPerm(h, b, s, n+4)
}
}
type Key interface {
clear() // set bits all to 0
random(r *rand.Rand) // set key to something random
bits() int // how many bits key has
flipBit(i int) // flip bit i of the key
hash() uintptr // hash the key
name() string // for error reporting
}
type BytesKey struct {
b []byte
}
func (k *BytesKey) clear() {
for i := range k.b {
k.b[i] = 0
}
}
func (k *BytesKey) random(r *rand.Rand) {
randBytes(r, k.b)
}
func (k *BytesKey) bits() int {
return len(k.b) * 8
}
func (k *BytesKey) flipBit(i int) {
k.b[i>>3] ^= byte(1 << uint(i&7))
}
func (k *BytesKey) hash() uintptr {
return BytesHash(k.b, 0)
}
func (k *BytesKey) name() string {
return fmt.Sprintf("bytes%d", len(k.b))
}
type Int32Key struct {
i uint32
}
func (k *Int32Key) clear() {
k.i = 0
}
func (k *Int32Key) random(r *rand.Rand) {
k.i = r.Uint32()
}
func (k *Int32Key) bits() int {
return 32
}
func (k *Int32Key) flipBit(i int) {
k.i ^= 1 << uint(i)
}
func (k *Int32Key) hash() uintptr {
return Int32Hash(k.i, 0)
}
func (k *Int32Key) name() string {
return "int32"
}
type Int64Key struct {
i uint64
}
func (k *Int64Key) clear() {
k.i = 0
}
func (k *Int64Key) random(r *rand.Rand) {
k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
}
func (k *Int64Key) bits() int {
return 64
}
func (k *Int64Key) flipBit(i int) {
k.i ^= 1 << uint(i)
}
func (k *Int64Key) hash() uintptr {
return Int64Hash(k.i, 0)
}
func (k *Int64Key) name() string {
return "int64"
}
type EfaceKey struct {
i interface{}
}
func (k *EfaceKey) clear() {
k.i = nil
}
func (k *EfaceKey) random(r *rand.Rand) {
k.i = uint64(r.Int63())
}
func (k *EfaceKey) bits() int {
// use 64 bits. This tests inlined interfaces
// on 64-bit targets and indirect interfaces on
// 32-bit targets.
return 64
}
func (k *EfaceKey) flipBit(i int) {
k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
}
func (k *EfaceKey) hash() uintptr {
return EfaceHash(k.i, 0)
}
func (k *EfaceKey) name() string {
return "Eface"
}
type IfaceKey struct {
i interface {
F()
}
}
type fInter uint64
func (x fInter) F() {
}
func (k *IfaceKey) clear() {
k.i = nil
}
func (k *IfaceKey) random(r *rand.Rand) {
k.i = fInter(r.Int63())
}
func (k *IfaceKey) bits() int {
// use 64 bits. This tests inlined interfaces
// on 64-bit targets and indirect interfaces on
// 32-bit targets.
return 64
}
func (k *IfaceKey) flipBit(i int) {
k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
}
func (k *IfaceKey) hash() uintptr {
return IfaceHash(k.i, 0)
}
func (k *IfaceKey) name() string {
return "Iface"
}
// Flipping a single bit of a key should flip each output bit with 50% probability.
func TestSmhasherAvalanche(t *testing.T) {
if GOARCH == "wasm" {
t.Skip("Too slow on wasm")
}
if testing.Short() {
t.Skip("Skipping in short mode")
}
avalancheTest1(t, &BytesKey{make([]byte, 2)})
avalancheTest1(t, &BytesKey{make([]byte, 4)})
avalancheTest1(t, &BytesKey{make([]byte, 8)})
avalancheTest1(t, &BytesKey{make([]byte, 16)})
avalancheTest1(t, &BytesKey{make([]byte, 32)})
avalancheTest1(t, &BytesKey{make([]byte, 200)})
avalancheTest1(t, &Int32Key{})
avalancheTest1(t, &Int64Key{})
avalancheTest1(t, &EfaceKey{})
avalancheTest1(t, &IfaceKey{})
}
func avalancheTest1(t *testing.T, k Key) {
const REP = 100000
r := rand.New(rand.NewSource(1234))
n := k.bits()
// grid[i][j] is a count of whether flipping
// input bit i affects output bit j.
grid := make([][hashSize]int, n)
for z := 0; z < REP; z++ {
// pick a random key, hash it
k.random(r)
h := k.hash()
// flip each bit, hash & compare the results
for i := 0; i < n; i++ {
k.flipBit(i)
d := h ^ k.hash()
k.flipBit(i)
// record the effects of that bit flip
g := &grid[i]
for j := 0; j < hashSize; j++ {
g[j] += int(d & 1)
d >>= 1
}
}
}
// Each entry in the grid should be about REP/2.
// More precisely, we did N = k.bits() * hashSize experiments where
// each is the sum of REP coin flips. We want to find bounds on the
// sum of coin flips such that a truly random experiment would have
// all sums inside those bounds with 99% probability.
N := n * hashSize
var c float64
// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
}
c *= 4.0 // allowed slack - we don't need to be perfectly random
mean := .5 * REP
stddev := .5 * math.Sqrt(REP)
low := int(mean - c*stddev)
high := int(mean + c*stddev)
for i := 0; i < n; i++ {
for j := 0; j < hashSize; j++ {
x := grid[i][j]
if x < low || x > high {
t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
}
}
}
}
// All bit rotations of a set of distinct keys
func TestSmhasherWindowed(t *testing.T) {
t.Logf("32 bit keys")
windowed(t, &Int32Key{})
t.Logf("64 bit keys")
windowed(t, &Int64Key{})
t.Logf("string keys")
windowed(t, &BytesKey{make([]byte, 128)})
}
func windowed(t *testing.T, k Key) {
if GOARCH == "wasm" {
t.Skip("Too slow on wasm")
}
if testing.Short() {
t.Skip("Skipping in short mode")
}
const BITS = 16
for r := 0; r < k.bits(); r++ {
h := newHashSet()
for i := 0; i < 1<<BITS; i++ {
k.clear()
for j := 0; j < BITS; j++ {
if i>>uint(j)&1 != 0 {
k.flipBit((j + r) % k.bits())
}
}
h.add(k.hash())
}
h.check(t)
}
}
// All keys of the form prefix + [A-Za-z0-9]*N + suffix.
func TestSmhasherText(t *testing.T) {
if testing.Short() {
t.Skip("Skipping in short mode")
}
text(t, "Foo", "Bar")
text(t, "FooBar", "")
text(t, "", "FooBar")
}
func text(t *testing.T, prefix, suffix string) {
const N = 4
const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
const L = len(S)
b := make([]byte, len(prefix)+N+len(suffix))
copy(b, prefix)
copy(b[len(prefix)+N:], suffix)
h := newHashSet()
c := b[len(prefix):]
for i := 0; i < L; i++ {
c[0] = S[i]
for j := 0; j < L; j++ {
c[1] = S[j]
for k := 0; k < L; k++ {
c[2] = S[k]
for x := 0; x < L; x++ {
c[3] = S[x]
h.addB(b)
}
}
}
}
h.check(t)
}
// Make sure different seed values generate different hashes.
func TestSmhasherSeed(t *testing.T) {
h := newHashSet()
const N = 100000
s := "hello"
for i := 0; i < N; i++ {
h.addS_seed(s, uintptr(i))
}
h.check(t)
}
// size of the hash output (32 or 64 bits)
const hashSize = 32 + int(^uintptr(0)>>63<<5)
func randBytes(r *rand.Rand, b []byte) {
for i := range b {
b[i] = byte(r.Uint32())
}
}
func benchmarkHash(b *testing.B, n int) {
s := strings.Repeat("A", n)
for i := 0; i < b.N; i++ {
StringHash(s, 0)
}
b.SetBytes(int64(n))
}
func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) }
func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) }
func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) }
func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) }
func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
func TestArrayHash(t *testing.T) {
// Make sure that "" in arrays hash correctly. The hash
// should at least scramble the input seed so that, e.g.,
// {"","foo"} and {"foo",""} have different hashes.
// If the hash is bad, then all (8 choose 4) = 70 keys
// have the same hash. If so, we allocate 70/8 = 8
// overflow buckets. If the hash is good we don't
// normally allocate any overflow buckets, and the
// probability of even one or two overflows goes down rapidly.
// (There is always 1 allocation of the bucket array. The map
// header is allocated on the stack.)
f := func() {
// Make the key type at most 128 bytes. Otherwise,
// we get an allocation per key.
type key [8]string
m := make(map[key]bool, 70)
// fill m with keys that have 4 "foo"s and 4 ""s.
for i := 0; i < 256; i++ {
var k key
cnt := 0
for j := uint(0); j < 8; j++ {
if i>>j&1 != 0 {
k[j] = "foo"
cnt++
}
}
if cnt == 4 {
m[k] = true
}
}
if len(m) != 70 {
t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
}
}
if n := testing.AllocsPerRun(10, f); n > 6 {
t.Errorf("too many allocs %f - hash not balanced", n)
}
}
func TestStructHash(t *testing.T) {
// See the comment in TestArrayHash.
f := func() {
type key struct {
a, b, c, d, e, f, g, h string
}
m := make(map[key]bool, 70)
// fill m with keys that have 4 "foo"s and 4 ""s.
for i := 0; i < 256; i++ {
var k key
cnt := 0
if i&1 != 0 {
k.a = "foo"
cnt++
}
if i&2 != 0 {
k.b = "foo"
cnt++
}
if i&4 != 0 {
k.c = "foo"
cnt++
}
if i&8 != 0 {
k.d = "foo"
cnt++
}
if i&16 != 0 {
k.e = "foo"
cnt++
}
if i&32 != 0 {
k.f = "foo"
cnt++
}
if i&64 != 0 {
k.g = "foo"
cnt++
}
if i&128 != 0 {
k.h = "foo"
cnt++
}
if cnt == 4 {
m[k] = true
}
}
if len(m) != 70 {
t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
}
}
if n := testing.AllocsPerRun(10, f); n > 6 {
t.Errorf("too many allocs %f - hash not balanced", n)
}
}
var sink uint64
func BenchmarkAlignedLoad(b *testing.B) {
var buf [16]byte
p := unsafe.Pointer(&buf[0])
var s uint64
for i := 0; i < b.N; i++ {
s += ReadUnaligned64(p)
}
sink = s
}
func BenchmarkUnalignedLoad(b *testing.B) {
var buf [16]byte
p := unsafe.Pointer(&buf[1])
var s uint64
for i := 0; i < b.N; i++ {
s += ReadUnaligned64(p)
}
sink = s
}
func TestCollisions(t *testing.T) {
if testing.Short() {
t.Skip("Skipping in short mode")
}
for i := 0; i < 16; i++ {
for j := 0; j < 16; j++ {
if j == i {
continue
}
var a [16]byte
m := make(map[uint16]struct{}, 1<<16)
for n := 0; n < 1<<16; n++ {
a[i] = byte(n)
a[j] = byte(n >> 8)
m[uint16(BytesHash(a[:], 0))] = struct{}{}
}
if len(m) <= 1<<15 {
t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))
}
}
}
}