hash/maphash: add WriteComparable and Comparable

Default, use hash function in the runtime package. If the build tag is purego or raw memory cannot be hash directly, use reflect get each field to hash separately. Fixes #54670 Change-Id: Ic968864c9c3c51883967d4f6dc24432385c7dc79 GitHub-Last-Rev: 5ae8a28834 GitHub-Pull-Request: golang/go#69166 Reviewed-on: https://go-review.googlesource.com/c/go/+/609761 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Keith Randall <khr@golang.org> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
2024-11-13 12:20:26 -07:00 · 2024-09-25 10:32:49 +00:00 · 2024-09-25 10:32:49 +00:00 · 03103a54d8
commit 03103a54d8
parent 658a6a6e1f
6 changed files with 346 additions and 1 deletions
--- a/api/next/54670.txt
+++ b/api/next/54670.txt
@ -0,0 +1,2 @@
+pkg hash/maphash, func Comparable[$0 comparable](Seed, $0) uint64 #54670
+pkg hash/maphash, func WriteComparable[$0 comparable](*Hash, $0) #54670
--- a/doc/next/6-stdlib/99-minor/hash/maphash/54670.md
+++ b/doc/next/6-stdlib/99-minor/hash/maphash/54670.md
@ -0,0 +1,2 @@
+New function [Comparable] returns the hash of comparable value v.
+New function [WriteComparable] adds x to the data hashed by [Hash].
--- a/src/hash/maphash/maphash.go
+++ b/src/hash/maphash/maphash.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// Package maphash provides hash functions on byte sequences.
+// Package maphash provides hash functions on byte sequences and comparable values.
 // These hash functions are intended to be used to implement hash tables or
 // other data structures that need to map arbitrary strings or byte
 // sequences to a uniform distribution on unsigned 64-bit integers.
@ -12,6 +12,13 @@
 // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
 package maphash

+import (
+	"internal/abi"
+	"internal/byteorder"
+	"math"
+	"reflect"
+)
+
 // A Seed is a random value that selects the specific hash function
 // computed by a [Hash]. If two Hashes use the same Seeds, they
 // will compute the same hash values for any given input.
@ -275,3 +282,117 @@ func (h *Hash) Size() int { return 8 }

 // BlockSize returns h's block size.
 func (h *Hash) BlockSize() int { return len(h.buf) }
+
+// Comparable returns the hash of comparable value v with the given seed
+// such that Comparable(s, v1) == Comparable(s, v2) if v1 == v2.
+// If v != v, then the resulting hash is randomly distributed.
+func Comparable[T comparable](seed Seed, v T) uint64 {
+	comparableReady(v)
+	var h Hash
+	h.SetSeed(seed)
+	comparableF(&h, v)
+	return h.Sum64()
+}
+
+func comparableReady[T comparable](v T) {
+	// Force v to be on the heap.
+	// We cannot hash pointers to local variables,
+	// as the address of the local variable
+	// might change on stack growth.
+	abi.Escape(v)
+}
+
+// WriteComparable adds x to the data hashed by h.
+func WriteComparable[T comparable](h *Hash, x T) {
+	comparableReady(x)
+	comparableF(h, x)
+}
+
+// appendT hash a value,
+// when the value cannot be directly hash raw memory,
+// or when purego is used.
+func appendT(h *Hash, v reflect.Value) {
+	h.WriteString(v.Type().String())
+	switch v.Kind() {
+	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
+		var buf [8]byte
+		byteorder.LePutUint64(buf[:], uint64(v.Int()))
+		h.Write(buf[:])
+		return
+	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
+		var buf [8]byte
+		byteorder.LePutUint64(buf[:], v.Uint())
+		h.Write(buf[:])
+		return
+	case reflect.Array:
+		var buf [8]byte
+		for i := range uint64(v.Len()) {
+			byteorder.LePutUint64(buf[:], i)
+			// do not want to hash to the same value,
+			// [2]string{"foo", ""} and [2]string{"", "foo"}.
+			h.Write(buf[:])
+			appendT(h, v.Index(int(i)))
+		}
+		return
+	case reflect.String:
+		h.WriteString(v.String())
+		return
+	case reflect.Struct:
+		var buf [8]byte
+		for i := range v.NumField() {
+			f := v.Field(i)
+			byteorder.LePutUint64(buf[:], uint64(i))
+			// do not want to hash to the same value,
+			// struct{a,b string}{"foo",""} and
+			// struct{a,b string}{"","foo"}.
+			h.Write(buf[:])
+			appendT(h, f)
+		}
+		return
+	case reflect.Complex64, reflect.Complex128:
+		c := v.Complex()
+		h.float64(real(c))
+		h.float64(imag(c))
+		return
+	case reflect.Float32, reflect.Float64:
+		h.float64(v.Float())
+		return
+	case reflect.Bool:
+		h.WriteByte(btoi(v.Bool()))
+		return
+	case reflect.UnsafePointer, reflect.Pointer:
+		var buf [8]byte
+		// because pointing to the abi.Escape call in comparableReady,
+		// So this is ok to hash pointer,
+		// this way because we know their target won't be moved.
+		byteorder.LePutUint64(buf[:], uint64(v.Pointer()))
+		h.Write(buf[:])
+		return
+	case reflect.Interface:
+		appendT(h, v.Elem())
+		return
+	}
+	panic("maphash: " + v.Type().String() + " not comparable")
+}
+
+func (h *Hash) float64(f float64) {
+	if f == 0 {
+		h.WriteByte(0)
+		return
+	}
+	var buf [8]byte
+	if f != f {
+		byteorder.LePutUint64(buf[:], randUint64())
+		h.Write(buf[:])
+		return
+	}
+	byteorder.LePutUint64(buf[:], math.Float64bits(f))
+	h.Write(buf[:])
+}
+
+func btoi(b bool) byte {
+	if b {
+		return 1
+	}
+	return 0
+}
--- a/src/hash/maphash/maphash_purego.go
+++ b/src/hash/maphash/maphash_purego.go
@ -10,6 +10,7 @@ import (
 	"crypto/rand"
 	"internal/byteorder"
 	"math/bits"
+	"reflect"
 )

 func rthash(buf []byte, seed uint64) uint64 {
@ -92,3 +93,8 @@ func mix(a, b uint64) uint64 {
 	hi, lo := bits.Mul64(a, b)
 	return hi ^ lo
 }
+
+func comparableF[T comparable](h *Hash, v T) {
+	vv := reflect.ValueOf(v)
+	appendT(h, vv)
+}
--- a/src/hash/maphash/maphash_runtime.go
+++ b/src/hash/maphash/maphash_runtime.go
@ -7,6 +7,8 @@
 package maphash

 import (
+	"internal/abi"
+	"reflect"
 	"unsafe"
 )

@ -41,3 +43,19 @@ func rthashString(s string, state uint64) uint64 {
 func randUint64() uint64 {
 	return runtime_rand()
 }
+
+func comparableF[T comparable](h *Hash, v T) {
+	t := abi.TypeFor[T]()
+	// We can only use the raw memory contents for the hash,
+	// if the raw memory contents are used for computing equality.
+	// That works for some types (int),
+	// but not others (float, string, structs with padding, etc.)
+	if t.TFlag&abi.TFlagRegularMemory != 0 {
+		ptr := unsafe.Pointer(&v)
+		l := t.Size()
+		h.Write(unsafe.Slice((*byte)(ptr), l))
+		return
+	}
+	vv := reflect.ValueOf(v)
+	appendT(h, vv)
+}
--- a/src/hash/maphash/maphash_test.go
+++ b/src/hash/maphash/maphash_test.go
@ -8,7 +8,10 @@ import (
 	"bytes"
 	"fmt"
 	"hash"
+	"math"
+	"reflect"
 	"testing"
+	"unsafe"
 )

 func TestUnseededHash(t *testing.T) {
@ -210,6 +213,199 @@ func TestSeedFromReset(t *testing.T) {
 	}
 }

+func negativeZero[T float32 | float64]() T {
+	var f T
+	f = -f
+	return f
+}
+
+func TestComparable(t *testing.T) {
+	testComparable(t, int64(2))
+	testComparable(t, uint64(8))
+	testComparable(t, uintptr(12))
+	testComparable(t, any("s"))
+	testComparable(t, "s")
+	testComparable(t, true)
+	testComparable(t, new(float64))
+	testComparable(t, float64(9))
+	testComparable(t, complex128(9i+1))
+	testComparable(t, struct{}{})
+	testComparable(t, struct {
+		i int
+		u uint
+		b bool
+		f float64
+		p *int
+		a any
+	}{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1})
+	type S struct {
+		s string
+	}
+	s1 := S{s: heapStr(t)}
+	s2 := S{s: heapStr(t)}
+	if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) {
+		t.Fatalf("unexpected two heapStr ptr equal")
+	}
+	if s1.s != s2.s {
+		t.Fatalf("unexpected two heapStr value not equal")
+	}
+	testComparable(t, s1, s2)
+	testComparable(t, s1.s, s2.s)
+	testComparable(t, float32(0), negativeZero[float32]())
+	testComparable(t, float64(0), negativeZero[float64]())
+	testComparableNoEqual(t, math.NaN(), math.NaN())
+	testComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"})
+	testComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"})
+	testComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)})
+}
+
+func testComparableNoEqual[T comparable](t *testing.T, v1, v2 T) {
+	seed := MakeSeed()
+	if Comparable(seed, v1) == Comparable(seed, v2) {
+		t.Fatalf("Comparable(seed, %v) == Comparable(seed, %v)", v1, v2)
+	}
+}
+
+var heapStrValue = []byte("aTestString")
+
+func heapStr(t *testing.T) string {
+	return string(heapStrValue)
+}
+
+func testComparable[T comparable](t *testing.T, v T, v2 ...T) {
+	t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) {
+		var a, b T = v, v
+		if len(v2) != 0 {
+			b = v2[0]
+		}
+		var pa *T = &a
+		seed := MakeSeed()
+		if Comparable(seed, a) != Comparable(seed, b) {
+			t.Fatalf("Comparable(seed, %v) != Comparable(seed, %v)", a, b)
+		}
+		old := Comparable(seed, pa)
+		stackGrow(8192)
+		new := Comparable(seed, pa)
+		if old != new {
+			t.Fatal("Comparable(seed, ptr) != Comparable(seed, ptr)")
+		}
+	})
+}
+
+var use byte
+
+//go:noinline
+func stackGrow(dep int) {
+	if dep == 0 {
+		return
+	}
+	var local [1024]byte
+	// make sure local is allocated on the stack.
+	local[randUint64()%1024] = byte(randUint64())
+	use = local[randUint64()%1024]
+	stackGrow(dep - 1)
+}
+
+func TestWriteComparable(t *testing.T) {
+	testWriteComparable(t, int64(2))
+	testWriteComparable(t, uint64(8))
+	testWriteComparable(t, uintptr(12))
+	testWriteComparable(t, any("s"))
+	testWriteComparable(t, "s")
+	testComparable(t, true)
+	testWriteComparable(t, new(float64))
+	testWriteComparable(t, float64(9))
+	testWriteComparable(t, complex128(9i+1))
+	testWriteComparable(t, struct{}{})
+	testWriteComparable(t, struct {
+		i int
+		u uint
+		b bool
+		f float64
+		p *int
+		a any
+	}{i: 9, u: 1, b: true, f: 9.9, p: new(int), a: 1})
+	type S struct {
+		s string
+	}
+	s1 := S{s: heapStr(t)}
+	s2 := S{s: heapStr(t)}
+	if unsafe.StringData(s1.s) == unsafe.StringData(s2.s) {
+		t.Fatalf("unexpected two heapStr ptr equal")
+	}
+	if s1.s != s2.s {
+		t.Fatalf("unexpected two heapStr value not equal")
+	}
+	testWriteComparable(t, s1, s2)
+	testWriteComparable(t, s1.s, s2.s)
+	testWriteComparable(t, float32(0), negativeZero[float32]())
+	testWriteComparable(t, float64(0), negativeZero[float64]())
+	testWriteComparableNoEqual(t, math.NaN(), math.NaN())
+	testWriteComparableNoEqual(t, [2]string{"a", ""}, [2]string{"", "a"})
+	testWriteComparableNoEqual(t, struct{ a, b string }{"foo", ""}, struct{ a, b string }{"", "foo"})
+	testWriteComparableNoEqual(t, struct{ a, b any }{int(0), struct{}{}}, struct{ a, b any }{struct{}{}, int(0)})
+}
+
+func testWriteComparableNoEqual[T comparable](t *testing.T, v1, v2 T) {
+	seed := MakeSeed()
+	h1 := Hash{}
+	h2 := Hash{}
+	h1.seed, h2.seed = seed, seed
+	WriteComparable(&h1, v1)
+	WriteComparable(&h2, v2)
+	if h1.Sum64() == h2.Sum64() {
+		t.Fatalf("WriteComparable(seed, %v) == WriteComparable(seed, %v)", v1, v2)
+	}
+
+}
+
+func testWriteComparable[T comparable](t *testing.T, v T, v2 ...T) {
+	t.Run(reflect.TypeFor[T]().String(), func(t *testing.T) {
+		var a, b T = v, v
+		if len(v2) != 0 {
+			b = v2[0]
+		}
+		var pa *T = &a
+		h1 := Hash{}
+		h2 := Hash{}
+		h1.seed = MakeSeed()
+		h2.seed = h1.seed
+		WriteComparable(&h1, a)
+		WriteComparable(&h2, b)
+		if h1.Sum64() != h2.Sum64() {
+			t.Fatalf("WriteComparable(h, %v) != WriteComparable(h, %v)", a, b)
+		}
+		WriteComparable(&h1, pa)
+		old := h1.Sum64()
+		stackGrow(8192)
+		WriteComparable(&h2, pa)
+		new := h2.Sum64()
+		if old != new {
+			t.Fatal("WriteComparable(seed, ptr) != WriteComparable(seed, ptr)")
+		}
+	})
+}
+
+func TestComparableShouldPanic(t *testing.T) {
+	s := []byte("s")
+	a := any(s)
+	defer func() {
+		err := recover()
+		if err == nil {
+			t.Fatalf("hash any([]byte) should panic in maphash.appendT")
+		}
+		s, ok := err.(string)
+		if !ok {
+			t.Fatalf("hash any([]byte) should panic in maphash.appendT")
+		}
+		want := "maphash: []uint8 not comparable"
+		if s != want {
+			t.Fatalf("want %s, got %s", want, s)
+		}
+	}()
+	Comparable(MakeSeed(), a)
+}
+
 // Make sure a Hash implements the hash.Hash and hash.Hash64 interfaces.
 var _ hash.Hash = &Hash{}
 var _ hash.Hash64 = &Hash{}