1
0
mirror of https://github.com/golang/go synced 2024-11-13 12:20:26 -07:00

hash/maphash: add WriteComparable and Comparable

Default, use  hash function in the runtime package.
If the build tag is purego or raw memory cannot be hash directly,
use reflect get each field to hash separately.

Fixes #54670

Change-Id: Ic968864c9c3c51883967d4f6dc24432385c7dc79
GitHub-Last-Rev: 5ae8a28834
GitHub-Pull-Request: golang/go#69166
Reviewed-on: https://go-review.googlesource.com/c/go/+/609761
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
qiulaidongfeng 2024-09-25 10:32:49 +00:00 committed by Keith Randall
parent 658a6a6e1f
commit 03103a54d8
6 changed files with 346 additions and 1 deletions

2
api/next/54670.txt Normal file
View File

@ -0,0 +1,2 @@
pkg hash/maphash, func Comparable[$0 comparable](Seed, $0) uint64 #54670
pkg hash/maphash, func WriteComparable[$0 comparable](*Hash, $0) #54670

View File

@ -0,0 +1,2 @@
New function [Comparable] returns the hash of comparable value v.
New function [WriteComparable] adds x to the data hashed by [Hash].

View File

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package maphash provides hash functions on byte sequences.
// Package maphash provides hash functions on byte sequences and comparable values.
// These hash functions are intended to be used to implement hash tables or
// other data structures that need to map arbitrary strings or byte
// sequences to a uniform distribution on unsigned 64-bit integers.
@ -12,6 +12,13 @@
// (See crypto/sha256 and crypto/sha512 for cryptographic use.)
package maphash
import (
"internal/abi"
"internal/byteorder"
"math"
"reflect"
)
// A Seed is a random value that selects the specific hash function
// computed by a [Hash]. If two Hashes use the same Seeds, they
// will compute the same hash values for any given input.
@ -275,3 +282,117 @@ func (h *Hash) Size() int { return 8 }
// BlockSize returns h's block size.
func (h *Hash) BlockSize() int { return len(h.buf) }
// Comparable returns the hash of comparable value v with the given seed
// such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2.
// If v != v, then the resulting hash is randomly distributed.
func Comparable[T comparable](seed Seed, v T) uint64 {
comparableReady(v)
var h Hash
h.SetSeed(seed)
comparableF(&h, v)
return h.Sum64()
}
func comparableReady[T comparable](v T) {
// Force v to be on the heap.
// We cannot hash pointers to local variables,
// as the address of the local variable
// might change on stack growth.
abi.Escape(v)
}
// WriteComparable adds x to the data hashed by h.
func WriteComparable[T comparable](h *Hash, x T) {
comparableReady(x)
comparableF(h, x)
}
// appendT hash a value,
// when the value cannot be directly hash raw memory,
// or when purego is used.
func appendT(h *Hash, v reflect.Value) {
h.WriteString(v.Type().String())
switch v.Kind() {
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
var buf [8]byte
byteorder.LePutUint64(buf[:], uint64(v.Int()))
h.Write(buf[:])
return
case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
var buf [8]byte
byteorder.LePutUint64(buf[:], v.Uint())
h.Write(buf[:])
return
case reflect.Array:
var buf [8]byte
for i := range uint64(v.Len()) {
byteorder.LePutUint64(buf[:], i)
// do not want to hash to the same value,
// [2]string{"foo", ""} and [2]string{"", "foo"}.
h.Write(buf[:])
appendT(h, v.Index(int(i)))
}
return
case reflect.String:
h.WriteString(v.String())
return
case reflect.Struct:
var buf [8]byte
for i := range v.NumField() {
f := v.Field(i)
byteorder.LePutUint64(buf[:], uint64(i))
// do not want to hash to the same value,
// struct{a,b string}{"foo",""} and
// struct{a,b string}{"","foo"}.
h.Write(buf[:])
appendT(h, f)
}
return
case reflect.Complex64, reflect.Complex128:
c := v.Complex()
h.float64(real(c))
h.float64(imag(c))
return
case reflect.Float32, reflect.Float64:
h.float64(v.Float())
return
case reflect.Bool:
h.WriteByte(btoi(v.Bool()))
return
case reflect.UnsafePointer, reflect.Pointer:
var buf [8]byte
// because pointing to the abi.Escape call in comparableReady,
// So this is ok to hash pointer,
// this way because we know their target won't be moved.
byteorder.LePutUint64(buf[:], uint64(v.Pointer()))
h.Write(buf[:])
return
case reflect.Interface:
appendT(h, v.Elem())
return
}
panic("maphash: " + v.Type().String() + " not comparable")
}
func (h *Hash) float64(f float64) {
if f == 0 {
h.WriteByte(0)
return
}
var buf [8]byte
if f != f {
byteorder.LePutUint64(buf[:], randUint64())
h.Write(buf[:])
return
}
byteorder.LePutUint64(buf[:], math.Float64bits(f))
h.Write(buf[:])
}
func btoi(b bool) byte {
if b {
return 1
}
return 0
}

View File

@ -10,6 +10,7 @@ import (
"crypto/rand"
"internal/byteorder"
"math/bits"
"reflect"
)
func rthash(buf []byte, seed uint64) uint64 {
@ -92,3 +93,8 @@ func mix(a, b uint64) uint64 {
hi, lo := bits.Mul64(a, b)
return hi ^ lo
}
func comparableF[T comparable](h *Hash, v T) {
vv := reflect.ValueOf(v)
appendT(h, vv)
}

View File

@ -7,6 +7,8 @@
package maphash
import (
"internal/abi"
"reflect"
"unsafe"
)
@ -41,3 +43,19 @@ func rthashString(s string, state uint64) uint64 {
func randUint64() uint64 {
return runtime_rand()
}
func comparableF[T comparable](h *Hash, v T) {
t := abi.TypeFor[T]()
// We can only use the raw memory contents for the hash,
// if the raw memory contents are used for computing equality.
// That works for some types (int),
// but not others (float, string, structs with padding, etc.)
if t.TFlag&abi.TFlagRegularMemory != 0 {
ptr := unsafe.Pointer(&v)
l := t.Size()
h.Write(unsafe.Slice((*byte)(ptr), l))
return
}
vv := reflect.ValueOf(v)
appendT(h, vv)
}

View File

@ -8,7 +8,10 @@ import (
"bytes"
"fmt"
"hash"
"math"
"reflect"
"testing"
"unsafe"
)
func TestUnseededHash(t *testing.T) {
@ -210,6 +213,199 @@ func TestSeedFromReset(t *testing.T) {
}
}
func negativeZero[T float32 | float64]() T {
var f T
f = -f
return f
}
func TestComparable(t *testing.T) {
testComparable(t, int64(2))
testComparable(t, uint64(8))
testComparable(t, uintptr(12))
testComparable(t, any("s"))
testComparable(t, "s")
testComparable(t, true)
testComparable(t, new(float64))
testComparable(t, float64(9))
testComparable(t, complex128(9i+1))
testComparable(t, struct{}{})
testComparable(t, struct {
i int
u uint
b bool
f float64
p *int
a any
}{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1})
type S struct {
s string
}
s1 := S{s: heapStr(t)}
s2 := S{s: heapStr(t)}
if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) {
t.Fatalf("unexpected two heapStr ptr equal")
}
if s1.s != s2.s {
t.Fatalf("unexpected two heapStr value not equal")
}
testComparable(t, s1, s2)
testComparable(t, s1.s, s2.s)
testComparable(t, float32(0), negativeZero[float32]())
testComparable(t, float64(0), negativeZero[float64]())
testComparableNoEqual(t, math.NaN(), math.NaN())
testComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"})
testComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"})
testComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)})
}
func testComparableNoEqual[T comparable](t *testing.T, v1, v2 T) {
seed := MakeSeed()
if Comparable(seed, v1) == Comparable(seed, v2) {
t.Fatalf("Comparable(seed, %v) == Comparable(seed, %v)", v1, v2)
}
}
var heapStrValue = []byte("aTestString")
func heapStr(t *testing.T) string {
return string(heapStrValue)
}
func testComparable[T comparable](t *testing.T, v T, v2 ...T) {
t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) {
var a, b T = v, v
if len(v2) != 0 {
b = v2[0]
}
var pa *T = &a
seed := MakeSeed()
if Comparable(seed, a) != Comparable(seed, b) {
t.Fatalf("Comparable(seed, %v) != Comparable(seed, %v)", a, b)
}
old := Comparable(seed, pa)
stackGrow(8192)
new := Comparable(seed, pa)
if old != new {
t.Fatal("Comparable(seed, ptr) != Comparable(seed, ptr)")
}
})
}
var use byte
//go:noinline
func stackGrow(dep int) {
if dep == 0 {
return
}
var local [1024]byte
// make sure local is allocated on the stack.
local[randUint64()%1024] = byte(randUint64())
use = local[randUint64()%1024]
stackGrow(dep - 1)
}
func TestWriteComparable(t *testing.T) {
testWriteComparable(t, int64(2))
testWriteComparable(t, uint64(8))
testWriteComparable(t, uintptr(12))
testWriteComparable(t, any("s"))
testWriteComparable(t, "s")
testComparable(t, true)
testWriteComparable(t, new(float64))
testWriteComparable(t, float64(9))
testWriteComparable(t, complex128(9i+1))
testWriteComparable(t, struct{}{})
testWriteComparable(t, struct {
i int
u uint
b bool
f float64
p *int
a any
}{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1})
type S struct {
s string
}
s1 := S{s: heapStr(t)}
s2 := S{s: heapStr(t)}
if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) {
t.Fatalf("unexpected two heapStr ptr equal")
}
if s1.s != s2.s {
t.Fatalf("unexpected two heapStr value not equal")
}
testWriteComparable(t, s1, s2)
testWriteComparable(t, s1.s, s2.s)
testWriteComparable(t, float32(0), negativeZero[float32]())
testWriteComparable(t, float64(0), negativeZero[float64]())
testWriteComparableNoEqual(t, math.NaN(), math.NaN())
testWriteComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"})
testWriteComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"})
testWriteComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)})
}
func testWriteComparableNoEqual[T comparable](t *testing.T, v1, v2 T) {
seed := MakeSeed()
h1 := Hash{}
h2 := Hash{}
h1.seed, h2.seed = seed, seed
WriteComparable(&h1, v1)
WriteComparable(&h2, v2)
if h1.Sum64() == h2.Sum64() {
t.Fatalf("WriteComparable(seed, %v) == WriteComparable(seed, %v)", v1, v2)
}
}
func testWriteComparable[T comparable](t *testing.T, v T, v2 ...T) {
t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) {
var a, b T = v, v
if len(v2) != 0 {
b = v2[0]
}
var pa *T = &a
h1 := Hash{}
h2 := Hash{}
h1.seed = MakeSeed()
h2.seed = h1.seed
WriteComparable(&h1, a)
WriteComparable(&h2, b)
if h1.Sum64() != h2.Sum64() {
t.Fatalf("WriteComparable(h, %v) != WriteComparable(h, %v)", a, b)
}
WriteComparable(&h1, pa)
old := h1.Sum64()
stackGrow(8192)
WriteComparable(&h2, pa)
new := h2.Sum64()
if old != new {
t.Fatal("WriteComparable(seed, ptr) != WriteComparable(seed, ptr)")
}
})
}
func TestComparableShouldPanic(t *testing.T) {
s := []byte("s")
a := any(s)
defer func() {
err := recover()
if err == nil {
t.Fatalf("hash any([]byte) should panic in maphash.appendT")
}
s, ok := err.(string)
if !ok {
t.Fatalf("hash any([]byte) should panic in maphash.appendT")
}
want := "maphash: []uint8 not comparable"
if s != want {
t.Fatalf("want %s, got %s", want, s)
}
}()
Comparable(MakeSeed(), a)
}
// Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces.
var _ hash.Hash = &Hash{}
var _ hash.Hash64 = &Hash{}