diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go index 31d8549d2d..a6eb44de73 100644 --- a/src/cmd/asm/main.go +++ b/src/cmd/asm/main.go @@ -41,6 +41,7 @@ func main() { ctxt.Flag_dynlink = *flags.Dynlink ctxt.Flag_shared = *flags.Shared || *flags.Dynlink ctxt.IsAsm = true + ctxt.Pkgpath = *flags.Importpath switch *flags.Spectre { default: log.Printf("unknown setting -spectre=%s", *flags.Spectre) @@ -97,7 +98,7 @@ func main() { } if ok && !*flags.SymABIs { ctxt.NumberSyms() - obj.WriteObjFile(ctxt, buf, *flags.Importpath) + obj.WriteObjFile(ctxt, buf) } if !ok || diag { if failedFile != "" { diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 6e204f49bc..bb28ef01a1 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -789,6 +789,7 @@ func Main(archInit func(*Arch)) { // Write object data to disk. timings.Start("be", "dumpobj") dumpdata() + Ctxt.Pkgpath = myimportpath Ctxt.NumberSyms() dumpobj() if asmhdr != "" { diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go index 26ea775368..0826b04e33 100644 --- a/src/cmd/compile/internal/gc/obj.go +++ b/src/cmd/compile/internal/gc/obj.go @@ -166,7 +166,7 @@ func dumpLinkerObj(bout *bio.Writer) { fmt.Fprintf(bout, "\n!\n") - obj.WriteObjFile(Ctxt, bout, myimportpath) + obj.WriteObjFile(Ctxt, bout) } func addptabs() { diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index ffc3e99a20..195af8494c 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -671,6 +671,7 @@ type Link struct { Retpoline bool // emit use of retpoline stubs for indirect jmp/call Bso *bufio.Writer Pathname string + Pkgpath string // the current package's import path, "" if unknown hashmu sync.Mutex // protects hash, funchash hash map[string]*LSym // name -> sym mapping funchash map[string]*LSym // name -> sym mapping for ABIInternal syms diff --git a/src/cmd/internal/obj/objfile2.go b/src/cmd/internal/obj/objfile2.go index 6ac23bc418..6a5f3726f8 100644 --- a/src/cmd/internal/obj/objfile2.go +++ b/src/cmd/internal/obj/objfile2.go @@ -12,13 +12,15 @@ import ( "cmd/internal/goobj2" "cmd/internal/objabi" "crypto/sha1" + "encoding/binary" "fmt" + "io" "path/filepath" "strings" ) // Entry point of writing new object file. -func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) { +func WriteObjFile(ctxt *Link, b *bio.Writer) { debugAsmEmit(ctxt) @@ -27,7 +29,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) { w := writer{ Writer: goobj2.NewWriter(b), ctxt: ctxt, - pkgpath: objabi.PathToPrefix(pkgpath), + pkgpath: objabi.PathToPrefix(ctxt.Pkgpath), } start := b.Offset() @@ -39,7 +41,7 @@ func WriteObjFile(ctxt *Link, b *bio.Writer, pkgpath string) { if ctxt.Flag_shared { flags |= goobj2.ObjFlagShared } - if pkgpath == "" { + if w.pkgpath == "" { flags |= goobj2.ObjFlagNeedNameExpansion } if ctxt.IsAsm { @@ -336,19 +338,80 @@ func (w *writer) Hash64(s *LSym) { if !s.ContentAddressable() || len(s.R) != 0 { panic("Hash of non-content-addresable symbol") } - var b goobj2.Hash64Type - copy(b[:], s.P) + b := contentHash64(s) w.Bytes(b[:]) } func (w *writer) Hash(s *LSym) { - if !s.ContentAddressable() || len(s.R) != 0 { // TODO: currently we don't support content-addressable symbols with relocations + if !s.ContentAddressable() { panic("Hash of non-content-addresable symbol") } - b := goobj2.HashType(sha1.Sum(s.P)) + b := w.contentHash(s) w.Bytes(b[:]) } +func contentHash64(s *LSym) goobj2.Hash64Type { + var b goobj2.Hash64Type + copy(b[:], s.P) + return b +} + +// Compute the content hash for a content-addressable symbol. +// We build a content hash based on its content and relocations. +// Depending on the category of the referenced symbol, we choose +// different hash algorithms such that the hash is globally +// consistent. +// - For referenced content-addressable symbol, its content hash +// is globally consistent. +// - For package symbol, its local index is globally consistent. +// - For non-package symbol, its fully-expanded name is globally +// consistent. For now, we require we know the current package +// path so we can always expand symbol names. (Otherwise, +// symbols with relocations are not considered hashable.) +// +// For now, we assume there is no circular dependencies among +// hashed symbols. +func (w *writer) contentHash(s *LSym) goobj2.HashType { + h := sha1.New() + h.Write(s.P) + var tmp [14]byte + for i := range s.R { + r := &s.R[i] + binary.LittleEndian.PutUint32(tmp[:4], uint32(r.Off)) + tmp[4] = r.Siz + tmp[5] = uint8(r.Type) + binary.LittleEndian.PutUint64(tmp[6:14], uint64(r.Add)) + h.Write(tmp[:]) + rs := r.Sym + switch rs.PkgIdx { + case goobj2.PkgIdxHashed64: + h.Write([]byte{0}) + t := contentHash64(rs) + h.Write(t[:]) + case goobj2.PkgIdxHashed: + h.Write([]byte{1}) + t := w.contentHash(rs) + h.Write(t[:]) + case goobj2.PkgIdxBuiltin: + panic("unsupported") + case goobj2.PkgIdxNone: + h.Write([]byte{2}) + io.WriteString(h, rs.Name) // name is already expanded at this point + case goobj2.PkgIdxSelf: + io.WriteString(h, w.pkgpath) + binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx)) + h.Write(tmp[:4]) + default: + io.WriteString(h, rs.Pkg) + binary.LittleEndian.PutUint32(tmp[:4], uint32(rs.SymIdx)) + h.Write(tmp[:4]) + } + } + var b goobj2.HashType + copy(b[:], h.Sum(nil)) + return b +} + func makeSymRef(s *LSym) goobj2.SymRef { if s == nil { return goobj2.SymRef{} diff --git a/src/cmd/internal/obj/objfile_test.go b/src/cmd/internal/obj/objfile_test.go new file mode 100644 index 0000000000..ed3be20760 --- /dev/null +++ b/src/cmd/internal/obj/objfile_test.go @@ -0,0 +1,87 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package obj + +import ( + "cmd/internal/goobj2" + "cmd/internal/sys" + "testing" +) + +var dummyArch = LinkArch{Arch: sys.ArchAMD64} + +func TestContentHash64(t *testing.T) { + s1 := &LSym{P: []byte("A")} + s2 := &LSym{P: []byte("A\x00\x00\x00")} + s1.Set(AttrContentAddressable, true) + s2.Set(AttrContentAddressable, true) + h1 := contentHash64(s1) + h2 := contentHash64(s2) + if h1 != h2 { + t.Errorf("contentHash64(s1)=%x, contentHash64(s2)=%x, expect equal", h1, h2) + } + + ctxt := Linknew(&dummyArch) // little endian + s3 := ctxt.Int64Sym(int64('A')) + h3 := contentHash64(s3) + if h1 != h3 { + t.Errorf("contentHash64(s1)=%x, contentHash64(s3)=%x, expect equal", h1, h3) + } +} + +func TestContentHash(t *testing.T) { + syms := []*LSym{ + &LSym{P: []byte("TestSymbol")}, // 0 + &LSym{P: []byte("TestSymbol")}, // 1 + &LSym{P: []byte("TestSymbol2")}, // 2 + &LSym{P: []byte("")}, // 3 + &LSym{P: []byte("")}, // 4 + &LSym{P: []byte("")}, // 5 + &LSym{P: []byte("")}, // 6 + } + for _, s := range syms { + s.Set(AttrContentAddressable, true) + s.PkgIdx = goobj2.PkgIdxHashed + } + // s3 references s0 + r := Addrel(syms[3]) + r.Sym = syms[0] + // s4 references s0 + r = Addrel(syms[4]) + r.Sym = syms[0] + // s5 references s1 + r = Addrel(syms[5]) + r.Sym = syms[1] + // s6 references s2 + r = Addrel(syms[6]) + r.Sym = syms[2] + + // compute hashes + h := make([]goobj2.HashType, len(syms)) + w := &writer{} + for i := range h { + h[i] = w.contentHash(syms[i]) + } + + tests := []struct { + a, b int + equal bool + }{ + {0, 1, true}, // same contents, no relocs + {0, 2, false}, // different contents + {3, 4, true}, // same contents, same relocs + {3, 5, true}, // recursively same contents + {3, 6, false}, // same contents, different relocs + } + for _, test := range tests { + if (h[test.a] == h[test.b]) != test.equal { + eq := "equal" + if !test.equal { + eq = "not equal" + } + t.Errorf("h%d=%x, h%d=%x, expect %s", test.a, h[test.a], test.b, h[test.b], eq) + } + } +} diff --git a/src/cmd/internal/obj/sym.go b/src/cmd/internal/obj/sym.go index 6285486c66..67e4081f74 100644 --- a/src/cmd/internal/obj/sym.go +++ b/src/cmd/internal/obj/sym.go @@ -202,8 +202,10 @@ func (ctxt *Link) NumberSyms() { var idx, hashedidx, hashed64idx, nonpkgidx int32 ctxt.traverseSyms(traverseDefs, func(s *LSym) { - if s.ContentAddressable() && len(s.R) == 0 { // TODO: currently we don't support content-addressable symbols with relocations - if len(s.P) <= 8 { + // if Pkgpath is unknown, cannot hash symbols with relocations, as it + // may reference named symbols whose names are not fully expanded. + if s.ContentAddressable() && (ctxt.Pkgpath != "" || len(s.R) == 0) { + if len(s.P) <= 8 && len(s.R) == 0 { // we can use short hash only for symbols without relocations s.PkgIdx = goobj2.PkgIdxHashed64 s.SymIdx = hashed64idx if hashed64idx != int32(len(ctxt.hashed64defs)) {