From 485a572243568530721009829393ba2c0deb7bd0 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Thu, 5 May 2022 09:41:53 -0400 Subject: [PATCH] cmd: use 128-bit SHA256 & encode in base64 for content hashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We used to use SHA1 for content hashes, but CL 402595 changed all the “don't care” hashes to cmd/internal/notsha256 (negated SHA256). This made object files a little bit bigger: fmt.a on my Mac laptop grows from 910678 to 937612 bytes (+3%). To remove that growth, truncate the hash we use for these purposes to 128 bits (half a SHA256), and also use base64 instead of hex for encoding it when a string form is needed. This brings fmt.a down to 901706 bytes (-1% from original, -4% from current). Change-Id: Id81da1cf3ee85ed130b3cda73aa697d8c0053a62 Reviewed-on: https://go-review.googlesource.com/c/go/+/404294 Auto-Submit: Russ Cox Run-TryBot: Russ Cox TryBot-Result: Gopher Robot Reviewed-by: Cherry Mui --- src/cmd/compile/internal/staticdata/data.go | 13 ++++++++++--- src/cmd/internal/goobj/objfile.go | 3 +-- src/cmd/internal/obj/sym.go | 5 ++++- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/cmd/compile/internal/staticdata/data.go b/src/cmd/compile/internal/staticdata/data.go index b8b645f246f..2ea1a81c7a7 100644 --- a/src/cmd/compile/internal/staticdata/data.go +++ b/src/cmd/compile/internal/staticdata/data.go @@ -5,6 +5,7 @@ package staticdata import ( + "encoding/base64" "fmt" "go/constant" "io" @@ -61,9 +62,15 @@ func InitSliceBytes(nam *ir.Name, off int64, s string) { const ( stringSymPrefix = "go.string." - stringSymPattern = ".gostring.%d.%x" + stringSymPattern = ".gostring.%d.%s" ) +// shortHashString converts the hash to a string for use with stringSymPattern. +// We cut it to 16 bytes and then base64-encode to make it even smaller. +func shortHashString(hash []byte) string { + return base64.StdEncoding.EncodeToString(hash[:16]) +} + // StringSym returns a symbol containing the string s. // The symbol contains the string data, not a string header. func StringSym(pos src.XPos, s string) (data *obj.LSym) { @@ -75,7 +82,7 @@ func StringSym(pos src.XPos, s string) (data *obj.LSym) { // Same pattern is known to fileStringSym below. h := notsha256.New() io.WriteString(h, s) - symname = fmt.Sprintf(stringSymPattern, len(s), h.Sum(nil)) + symname = fmt.Sprintf(stringSymPattern, len(s), shortHashString(h.Sum(nil))) } else { // Small strings get named directly by their contents. symname = strconv.Quote(s) @@ -162,7 +169,7 @@ func fileStringSym(pos src.XPos, file string, readonly bool, hash []byte) (*obj. var symdata *obj.LSym if readonly { - symname := fmt.Sprintf(stringSymPattern, size, sum) + symname := fmt.Sprintf(stringSymPattern, size, shortHashString(sum)) symdata = base.Ctxt.Lookup(stringSymPrefix + symname) if !symdata.OnList() { info := symdata.NewFileInfo() diff --git a/src/cmd/internal/goobj/objfile.go b/src/cmd/internal/goobj/objfile.go index e58be66e59b..1b676b3677b 100644 --- a/src/cmd/internal/goobj/objfile.go +++ b/src/cmd/internal/goobj/objfile.go @@ -20,7 +20,6 @@ package goobj import ( "cmd/internal/bio" - "cmd/internal/notsha256" "encoding/binary" "errors" "fmt" @@ -367,7 +366,7 @@ const Hash64Size = 8 // Hash type HashType [HashSize]byte -const HashSize = notsha256.Size +const HashSize = 16 // truncated SHA256 // Relocation. // diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index 95dd07d0fab..40e5377a3d7 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -35,6 +35,7 @@ import ( "cmd/internal/goobj" "cmd/internal/notsha256" "cmd/internal/objabi" + "encoding/base64" "fmt" "internal/buildcfg" "log" @@ -175,7 +176,9 @@ func (ctxt *Link) Int64Sym(i int64) *LSym { // GCLocalsSym generates a content-addressable sym containing data. func (ctxt *Link) GCLocalsSym(data []byte) *LSym { - return ctxt.LookupInit(fmt.Sprintf("gclocals·%x", notsha256.Sum256(data)), func(lsym *LSym) { + sum := notsha256.Sum256(data) + str := base64.StdEncoding.EncodeToString(sum[:16]) + return ctxt.LookupInit(fmt.Sprintf("gclocals·%s", str), func(lsym *LSym) { lsym.P = data lsym.Set(AttrContentAddressable, true) })