From 6051fd0ad9f3e9e2ee405da709ee9f087747e4c1 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Tue, 17 Sep 2019 16:14:37 -0400 Subject: [PATCH] [dev.link] cmd/link: support new object file format Parse new object file format in the linker. At least we can link a hello-world program. Add a basic "loader", which handles symbol references in the object file. - mapping between local and global indices - resolve by-name references (TODO: the overwrite logic isn't implemented yet) Currently we still create sym.Symbol rather early, and, after all the object files are loaded and indexed references are resolved, add all symbols to sym.Symbols. The code here is probably not going in the final version. This is basically only for debugging purposes -- to make sure the writer and the reader work as expected. Change-Id: I895aeea68326fabdb7e5aa1371b8cac7211a09dd Reviewed-on: https://go-review.googlesource.com/c/go/+/196032 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Than McIntosh --- src/cmd/link/internal/ld/go.go | 7 +- src/cmd/link/internal/ld/lib.go | 75 +++- src/cmd/link/internal/ld/link.go | 4 + src/cmd/link/internal/ld/main.go | 2 + src/cmd/link/internal/objfile/objfile2.go | 419 ++++++++++++++++++++++ src/cmd/link/internal/sym/library.go | 7 + src/cmd/link/internal/sym/symbols.go | 11 + 7 files changed, 513 insertions(+), 12 deletions(-) create mode 100644 src/cmd/link/internal/objfile/objfile2.go diff --git a/src/cmd/link/internal/ld/go.go b/src/cmd/link/internal/ld/go.go index 80d7ac32f5..d1d68b0704 100644 --- a/src/cmd/link/internal/ld/go.go +++ b/src/cmd/link/internal/ld/go.go @@ -111,7 +111,12 @@ func ldpkg(ctxt *Link, f *bio.Reader, lib *sym.Library, length int64, filename s } p1 += p0 - loadcgo(ctxt, filename, objabi.PathToPrefix(lib.Pkg), data[p0:p1]) + if *flagNewobj { + // loadcgo creates sym.Symbol. Delay this until all the symbols are added. + ctxt.cgodata = append(ctxt.cgodata, [3]string{filename, objabi.PathToPrefix(lib.Pkg), data[p0:p1]}) + } else { + loadcgo(ctxt, filename, objabi.PathToPrefix(lib.Pkg), data[p0:p1]) + } } } diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go index 182e5b0769..e2efb95ff4 100644 --- a/src/cmd/link/internal/ld/lib.go +++ b/src/cmd/link/internal/ld/lib.go @@ -375,15 +375,8 @@ func (ctxt *Link) findLibPath(libname string) string { } func (ctxt *Link) loadlib() { - switch ctxt.BuildMode { - case BuildModeCShared, BuildModePlugin: - s := ctxt.Syms.Lookup("runtime.islibrary", 0) - s.Attr |= sym.AttrDuplicateOK - s.AddUint8(1) - case BuildModeCArchive: - s := ctxt.Syms.Lookup("runtime.isarchive", 0) - s.Attr |= sym.AttrDuplicateOK - s.AddUint8(1) + if *flagNewobj { + ctxt.loader = objfile.NewLoader() } loadinternal(ctxt, "runtime") @@ -408,6 +401,11 @@ func (ctxt *Link) loadlib() { } } + // XXX do it here for now + if *flagNewobj { + ctxt.loadlibfull() + } + for _, lib := range ctxt.Library { if lib.Shlib != "" { if ctxt.Debugvlog > 1 { @@ -417,6 +415,19 @@ func (ctxt *Link) loadlib() { } } + switch ctxt.BuildMode { + case BuildModeCShared, BuildModePlugin: + s := ctxt.Syms.Lookup("runtime.islibrary", 0) + s.Type = sym.SNOPTRDATA + s.Attr |= sym.AttrDuplicateOK + s.AddUint8(1) + case BuildModeCArchive: + s := ctxt.Syms.Lookup("runtime.isarchive", 0) + s.Type = sym.SNOPTRDATA + s.Attr |= sym.AttrDuplicateOK + s.AddUint8(1) + } + iscgo = ctxt.Syms.ROLookup("x_cgo_init", 0) != nil // We now have enough information to determine the link mode. @@ -843,7 +854,7 @@ func loadobjfile(ctxt *Link, lib *sym.Library) { if err != nil { Exitf("cannot open file %s: %v", lib.File, err) } - defer f.Close() + //defer f.Close() defer func() { if pkg == "main" && !lib.Main { Exitf("%s: not package main", lib.File) @@ -1773,7 +1784,12 @@ func ldobj(ctxt *Link, f *bio.Reader, lib *sym.Library, length int64, pn string, default: log.Fatalf("invalid -strictdups flag value %d", *FlagStrictDups) } - c := objfile.Load(ctxt.Arch, ctxt.Syms, f, lib, unit, eof-f.Offset(), pn, flags) + var c int + if *flagNewobj { + objfile.LoadNew(ctxt.loader, ctxt.Arch, ctxt.Syms, f, lib, unit, eof-f.Offset(), pn, flags) + } else { + c = objfile.Load(ctxt.Arch, ctxt.Syms, f, lib, unit, eof-f.Offset(), pn, flags) + } strictDupMsgCount += c addImports(ctxt, lib, pn) return nil @@ -2545,3 +2561,40 @@ func dfs(lib *sym.Library, mark map[*sym.Library]markKind, order *[]*sym.Library mark[lib] = visited *order = append(*order, lib) } + +func (ctxt *Link) loadlibfull() { + // Add references of externally defined symbols. + for _, lib := range ctxt.Library { + for _, r := range lib.Readers { + objfile.LoadRefs(ctxt.loader, r.Reader, lib, ctxt.Arch, ctxt.Syms, r.Version) + } + } + + // Load full symbol contents, resolve indexed references. + for _, lib := range ctxt.Library { + for _, r := range lib.Readers { + objfile.LoadFull(ctxt.loader, r.Reader, lib, ctxt.Syms, r.Version, ctxt.LibraryByPkg) + } + } + + // For now, add all symbols to ctxt.Syms. + for _, s := range ctxt.loader.Syms { + if s != nil && s.Name != "" { + ctxt.Syms.Add(s) + } + } + + // Now load cgo directives. + for _, p := range ctxt.cgodata { + loadcgo(ctxt, p[0], p[1], p[2]) + } +} + +func (ctxt *Link) dumpsyms() { + for _, s := range ctxt.Syms.Allsym { + fmt.Printf("%s %s %p\n", s, s.Type, s) + for i := range s.R { + fmt.Println("\t", s.R[i].Type, s.R[i].Sym) + } + } +} diff --git a/src/cmd/link/internal/ld/link.go b/src/cmd/link/internal/ld/link.go index 53092d2e8f..bbdb0e50ed 100644 --- a/src/cmd/link/internal/ld/link.go +++ b/src/cmd/link/internal/ld/link.go @@ -35,6 +35,7 @@ import ( "cmd/internal/obj" "cmd/internal/objabi" "cmd/internal/sys" + "cmd/link/internal/objfile" "cmd/link/internal/sym" "debug/elf" "fmt" @@ -96,6 +97,9 @@ type Link struct { runtimeCU *sym.CompilationUnit // One of the runtime CUs, the last one seen. relocbuf []byte // temporary buffer for applying relocations + + loader *objfile.Loader + cgodata [][3]string // cgo directives to load, three strings are args for loadcgo } type unresolvedSymKey struct { diff --git a/src/cmd/link/internal/ld/main.go b/src/cmd/link/internal/ld/main.go index 67e5ef9392..54a5fa199c 100644 --- a/src/cmd/link/internal/ld/main.go +++ b/src/cmd/link/internal/ld/main.go @@ -86,6 +86,7 @@ var ( flagInterpreter = flag.String("I", "", "use `linker` as ELF dynamic linker") FlagDebugTramp = flag.Int("debugtramp", 0, "debug trampolines") FlagStrictDups = flag.Int("strictdups", 0, "sanity check duplicate symbol contents during object file reading (1=warn 2=err).") + flagNewobj = flag.Bool("newobj", false, "use new object file format") FlagRound = flag.Int("R", -1, "set address rounding `quantum`") FlagTextAddr = flag.Int64("T", -1, "set text segment `address`") @@ -209,6 +210,7 @@ func Main(arch *sys.Arch, theArch Arch) { ctxt.dostrdata() deadcode(ctxt) + //ctxt.dumpsyms() // XXX dwarfGenerateDebugInfo(ctxt) if objabi.Fieldtrack_enabled != 0 { fieldtrack(ctxt) diff --git a/src/cmd/link/internal/objfile/objfile2.go b/src/cmd/link/internal/objfile/objfile2.go new file mode 100644 index 0000000000..5a92fef4ec --- /dev/null +++ b/src/cmd/link/internal/objfile/objfile2.go @@ -0,0 +1,419 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package objfile + +import ( + "cmd/internal/bio" + "cmd/internal/goobj2" + "cmd/internal/obj" + "cmd/internal/objabi" + "cmd/internal/sys" + "cmd/link/internal/sym" + "fmt" + "log" + "os" + "sort" + "strconv" + "strings" +) + +var _ = fmt.Print + +type objIdx struct { + r *goobj2.Reader + i int // start index +} + +type nameVer struct { + name string + v int +} + +// A Loader loads new object files and resolves indexed symbol references. +// +// TODO: describe local-global index mapping. +type Loader struct { + start map[*goobj2.Reader]int // map from object file to its start index + objs []objIdx // sorted by start index (i.e. objIdx.i) + max int // current max index + + symsByName map[nameVer]int // map symbol name to index + + Syms []*sym.Symbol // indexed symbols. XXX we still make sym.Symbol for now. +} + +func NewLoader() *Loader { + return &Loader{ + start: make(map[*goobj2.Reader]int), + objs: []objIdx{{nil, 0}}, + symsByName: make(map[nameVer]int), + Syms: []*sym.Symbol{nil}, + } +} + +// Return the start index in the global index space for a given object file. +func (l *Loader) StartIndex(r *goobj2.Reader) int { + return l.start[r] +} + +// Add object file r, return the start index. +func (l *Loader) AddObj(r *goobj2.Reader) int { + if _, ok := l.start[r]; ok { + panic("already added") + } + n := r.NSym() + r.NNonpkgdef() + i := l.max + 1 + l.start[r] = i + l.objs = append(l.objs, objIdx{r, i}) + l.max += n + return i +} + +// Add a symbol with a given index, return if it is added. +func (l *Loader) AddSym(name string, ver int, i int, dupok bool) bool { + nv := nameVer{name, ver} + if _, ok := l.symsByName[nv]; ok { + if dupok || true { // TODO: "true" isn't quite right. need to implement "overwrite" logic. + return false + } + panic("duplicated definition of symbol " + name) + } + l.symsByName[nv] = i + return true +} + +// Add an external symbol (without index). Return the index of newly added +// symbol, or 0 if not added. +func (l *Loader) AddExtSym(name string, ver int) int { + nv := nameVer{name, ver} + if _, ok := l.symsByName[nv]; ok { + return 0 + } + i := l.max + 1 + l.symsByName[nv] = i + l.max++ + return i +} + +// Convert a local index to a global index. +func (l *Loader) ToGlobal(r *goobj2.Reader, i int) int { + return l.StartIndex(r) + i +} + +// Convert a global index to a global index. Is it useful? +func (l *Loader) ToLocal(i int) (*goobj2.Reader, int) { + k := sort.Search(i, func(k int) bool { + return l.objs[k].i >= i + }) + if k == len(l.objs) { + return nil, 0 + } + return l.objs[k].r, i - l.objs[k].i +} + +// Look up a symbol by name, return global index, or 0 if not found. +// This is more like Syms.ROLookup than Lookup -- it doesn't create +// new symbol. +func (l *Loader) Lookup(name string, ver int) int { + nv := nameVer{name, ver} + return l.symsByName[nv] +} + +// Preload a package: add autolibs, add symbols to the symbol table. +// Does not read symbol data yet. +func LoadNew(l *Loader, arch *sys.Arch, syms *sym.Symbols, f *bio.Reader, lib *sym.Library, unit *sym.CompilationUnit, length int64, pn string, flags int) { + start := f.Offset() + r := goobj2.NewReader(f.File(), uint32(start)) + if r == nil { + panic("cannot read object file") + } + localSymVersion := syms.IncVersion() + lib.Readers = append(lib.Readers, struct { + Reader *goobj2.Reader + Version int + }{r, localSymVersion}) + + pkgprefix := objabi.PathToPrefix(lib.Pkg) + "." + + // Autolib + lib.ImportStrings = append(lib.ImportStrings, r.Pkglist()[1:]...) + + istart := l.AddObj(r) + + ndef := r.NSym() + nnonpkgdef := r.NNonpkgdef() + + // XXX add all symbols for now + l.Syms = append(l.Syms, make([]*sym.Symbol, ndef+nnonpkgdef)...) + for i, n := 0, ndef+nnonpkgdef; i < n; i++ { + osym := goobj2.Sym{} + osym.Read(r, r.SymOff(i)) + name := strings.Replace(osym.Name, "\"\".", pkgprefix, -1) + if name == "" { + continue // don't add unnamed aux symbol + } + v := abiToVer(osym.ABI, localSymVersion) + dupok := osym.Flag&goobj2.SymFlagDupok != 0 + if l.AddSym(name, v, istart+i, dupok) { + s := syms.Newsym(name, v) + preprocess(arch, s) // TODO: put this at a better place + l.Syms[istart+i] = s + } + } + + // The caller expects us consuming all the data + f.MustSeek(length, os.SEEK_CUR) +} + +// Make sure referenced symbols are added. Most of them should already be added. +// This should only be needed for referenced external symbols. +func LoadRefs(l *Loader, r *goobj2.Reader, lib *sym.Library, arch *sys.Arch, syms *sym.Symbols, localSymVersion int) { + pkgprefix := objabi.PathToPrefix(lib.Pkg) + "." + ndef := r.NSym() + r.NNonpkgdef() + for i, n := 0, r.NNonpkgref(); i < n; i++ { + osym := goobj2.Sym{} + osym.Read(r, r.SymOff(ndef+i)) + name := strings.Replace(osym.Name, "\"\".", pkgprefix, -1) + v := abiToVer(osym.ABI, localSymVersion) + if ii := l.AddExtSym(name, v); ii != 0 { + s := syms.Newsym(name, v) + preprocess(arch, s) // TODO: put this at a better place + if ii != len(l.Syms) { + panic("AddExtSym returned bad index") + } + l.Syms = append(l.Syms, s) + } + } +} + +func abiToVer(abi uint16, localSymVersion int) int { + var v int + if abi == goobj2.SymABIstatic { + // Static + v = localSymVersion + } else if abiver := sym.ABIToVersion(obj.ABI(abi)); abiver != -1 { + // Note that data symbols are "ABI0", which maps to version 0. + v = abiver + } else { + log.Fatalf("invalid symbol ABI: %d", abi) + } + return v +} + +func preprocess(arch *sys.Arch, s *sym.Symbol) { + if s.Name != "" && s.Name[0] == '$' && len(s.Name) > 5 && s.Type == 0 && len(s.P) == 0 { + x, err := strconv.ParseUint(s.Name[5:], 16, 64) + if err != nil { + log.Panicf("failed to parse $-symbol %s: %v", s.Name, err) + } + s.Type = sym.SRODATA + s.Attr |= sym.AttrLocal + switch s.Name[:5] { + case "$f32.": + if uint64(uint32(x)) != x { + log.Panicf("$-symbol %s too large: %d", s.Name, x) + } + s.AddUint32(arch, uint32(x)) + case "$f64.", "$i64.": + s.AddUint64(arch, x) + default: + log.Panicf("unrecognized $-symbol: %s", s.Name) + } + s.Attr.Set(sym.AttrReachable, false) + } + if strings.HasPrefix(s.Name, "runtime.gcbits.") { + s.Attr |= sym.AttrLocal + } +} + +func LoadFull(l *Loader, r *goobj2.Reader, lib *sym.Library, syms *sym.Symbols, localSymVersion int, libByPkg map[string]*sym.Library) { + // PkgIdx + pkglist := r.Pkglist() + + pkgprefix := objabi.PathToPrefix(lib.Pkg) + "." + istart := l.StartIndex(r) + + resolveSymRef := func(s goobj2.SymRef) *sym.Symbol { + var rr *goobj2.Reader + switch p := s.PkgIdx; p { + case goobj2.PkgIdxInvalid: + if s.SymIdx != 0 { + panic("bad sym ref") + } + return nil + case goobj2.PkgIdxNone: + // Resolve by name + i := int(s.SymIdx) + r.NSym() + osym := goobj2.Sym{} + osym.Read(r, r.SymOff(i)) + name := strings.Replace(osym.Name, "\"\".", pkgprefix, -1) + v := abiToVer(osym.ABI, localSymVersion) + nv := nameVer{name, v} + i = l.symsByName[nv] + return l.Syms[i] + case goobj2.PkgIdxBuiltin: + panic("PkgIdxBuiltin is not used") + case goobj2.PkgIdxSelf: + rr = r + default: + pkg := pkglist[p] + rr = libByPkg[pkg].Readers[0].Reader // typically Readers[0] is go object (others are asm) + } + i := l.ToGlobal(rr, int(s.SymIdx)) + return l.Syms[i] + } + + pcdataBase := r.PcdataBase() + for i, n := 0, r.NSym()+r.NNonpkgdef(); i < n; i++ { + s := l.Syms[istart+i] + if s == nil || s.Name == "" { + continue + } + + osym := goobj2.Sym{} + osym.Read(r, r.SymOff(i)) + name := strings.Replace(osym.Name, "\"\".", pkgprefix, -1) + if s.Name != name { + fmt.Println("name mismatch:", lib, i, s.Name, name) + panic("name mismatch") + } + + dupok := osym.Flag&goobj2.SymFlagDupok != 0 + local := osym.Flag&goobj2.SymFlagLocal != 0 + makeTypelink := osym.Flag&goobj2.SymFlagTypelink != 0 + nreloc := r.NReloc(i) + datasize := r.DataSize(i) + size := osym.Siz + + t := sym.AbiSymKindToSymKind[objabi.SymKind(osym.Type)] + if s.Type != 0 && s.Type != sym.SXREF { + fmt.Println("symbol already processed:", lib, i, s) + panic("symbol already processed") + } + + // Symbol data + s.P = r.BytesAt(r.DataOff(i), datasize) + + // Reloc + s.R = make([]sym.Reloc, nreloc) + for j := range s.R { + rel := goobj2.Reloc{} + rel.Read(r, r.RelocOff(i, j)) + s.R[j] = sym.Reloc{ + Off: rel.Off, + Siz: rel.Siz, + Type: objabi.RelocType(rel.Type), + Add: rel.Add, + Sym: resolveSymRef(rel.Sym), + } + } + + // Aux symbol info + isym := -1 + funcdata := make([]goobj2.SymRef, 0, 4) + naux := r.NAux(i) + for j := 0; j < naux; j++ { + a := goobj2.Aux{} + a.Read(r, r.AuxOff(i, j)) + switch a.Type { + case goobj2.AuxGotype: + typ := resolveSymRef(a.Sym) + if typ != nil { + s.Gotype = typ + } + case goobj2.AuxFuncInfo: + if a.Sym.PkgIdx != goobj2.PkgIdxSelf { + panic("funcinfo symbol not defined in current package") + } + isym = int(a.Sym.SymIdx) + case goobj2.AuxFuncdata: + funcdata = append(funcdata, a.Sym) + default: + panic("unknown aux type") + } + } + + s.File = pkgprefix[:len(pkgprefix)-1] + if dupok { + s.Attr |= sym.AttrDuplicateOK + } + if t == sym.SXREF { + log.Fatalf("bad sxref") + } + if t == 0 { + log.Fatalf("missing type for %s in %s", s.Name, lib) + } + if t == sym.SBSS && (s.Type == sym.SRODATA || s.Type == sym.SNOPTRBSS) { + t = s.Type + } + s.Type = t + if s.Size < int64(size) { + s.Size = int64(size) + } + s.Attr.Set(sym.AttrLocal, local) + s.Attr.Set(sym.AttrMakeTypelink, makeTypelink) + + if s.Type != sym.STEXT { + continue + } + if !dupok { + if s.Attr.OnList() { + log.Fatalf("symbol %s listed multiple times", s.Name) + } + s.Attr |= sym.AttrOnList + lib.Textp = append(lib.Textp, s) + } else { + // there may ba a dup in another package + // put into a temp list and add to text later + lib.DupTextSyms = append(lib.DupTextSyms, s) + } + + // FuncInfo + if isym == -1 { + continue + } + b := r.BytesAt(r.DataOff(isym), r.DataSize(isym)) + info := goobj2.FuncInfo{} + info.Read(b) + + if info.NoSplit != 0 { + s.Attr |= sym.AttrNoSplit + } + if info.Flags&goobj2.FuncFlagReflectMethod != 0 { + s.Attr |= sym.AttrReflectMethod + } + if info.Flags&goobj2.FuncFlagShared != 0 { + s.Attr |= sym.AttrShared + } + if info.Flags&goobj2.FuncFlagTopFrame != 0 { + s.Attr |= sym.AttrTopFrame + } + + info.Pcdata = append(info.Pcdata, info.PcdataEnd) // for the ease of knowing where it ends + pc := &sym.FuncInfo{ + Args: int32(info.Args), + Locals: int32(info.Locals), + Pcdata: make([]sym.Pcdata, len(info.Pcdata)-1), // -1 as we appended one above + Funcdata: make([]*sym.Symbol, len(info.Funcdataoff)), + Funcdataoff: make([]int64, len(info.Funcdataoff)), + File: make([]*sym.Symbol, len(info.File)), + } + s.FuncInfo = pc + pc.Pcsp.P = r.BytesAt(pcdataBase+info.Pcsp, int(info.Pcfile-info.Pcsp)) + pc.Pcfile.P = r.BytesAt(pcdataBase+info.Pcfile, int(info.Pcline-info.Pcfile)) + pc.Pcline.P = r.BytesAt(pcdataBase+info.Pcline, int(info.Pcinline-info.Pcline)) + pc.Pcinline.P = r.BytesAt(pcdataBase+info.Pcinline, int(info.Pcdata[0]-info.Pcinline)) + for k := range pc.Pcdata { + pc.Pcdata[k].P = r.BytesAt(pcdataBase+info.Pcdata[k], int(info.Pcdata[k+1]-info.Pcdata[k])) + } + for k := range pc.Funcdata { + pc.Funcdata[k] = resolveSymRef(funcdata[k]) + pc.Funcdataoff[k] = int64(info.Funcdataoff[k]) + } + for k := range pc.File { + pc.File[k] = resolveSymRef(info.File[k]) + } + } +} diff --git a/src/cmd/link/internal/sym/library.go b/src/cmd/link/internal/sym/library.go index 4f2023b8f7..b319c6b54e 100644 --- a/src/cmd/link/internal/sym/library.go +++ b/src/cmd/link/internal/sym/library.go @@ -4,6 +4,8 @@ package sym +import "cmd/internal/goobj2" + type Library struct { Objref string Srcref string @@ -18,6 +20,11 @@ type Library struct { Main bool Safe bool Units []*CompilationUnit + + Readers []struct { // TODO: probably move this to Loader + Reader *goobj2.Reader + Version int + } } func (l Library) String() string { diff --git a/src/cmd/link/internal/sym/symbols.go b/src/cmd/link/internal/sym/symbols.go index f0fcf2361b..e772496534 100644 --- a/src/cmd/link/internal/sym/symbols.go +++ b/src/cmd/link/internal/sym/symbols.go @@ -86,6 +86,17 @@ func (syms *Symbols) ROLookup(name string, v int) *Symbol { return syms.hash[v][name] } +// Add an existing symbol to the symbol table. +func (syms *Symbols) Add(s *Symbol) { + name := s.Name + v := int(s.Version) + m := syms.hash[v] + if _, ok := m[name]; ok { + panic(name + " already added") + } + m[name] = s +} + // Allocate a new version (i.e. symbol namespace). func (syms *Symbols) IncVersion() int { syms.hash = append(syms.hash, make(map[string]*Symbol))