From a99f812cba4c5a5207fed9be5488312a44a5df34 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Sat, 2 Jul 2016 17:19:25 -0700 Subject: [PATCH] cmd/objdump: implement objdump of .o files Update goobj reader so it can provide all the information necessary to disassemble .o (and .a) files. Grab architecture of .o files from header. .o files have relocations in them. This CL also contains a simple mechanism to disassemble relocations and add relocation info as an extra column in the output. Fixes #13862 Change-Id: I608fd253ff1522ea47f18be650b38d528dae9054 Reviewed-on: https://go-review.googlesource.com/24818 Reviewed-by: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot --- src/cmd/internal/goobj/read.go | 35 +++++++++--- src/cmd/internal/obj/link.go | 8 ++- src/cmd/internal/obj/reloctype_string.go | 17 ++++++ src/cmd/internal/objfile/disasm.go | 24 +++++++- src/cmd/internal/objfile/goobj.go | 73 ++++++++++++++++++++---- src/cmd/internal/objfile/objfile.go | 31 ++++++++-- src/cmd/internal/sys/arch.go | 13 +++++ src/cmd/link/internal/arm/asm.go | 4 +- src/cmd/link/internal/ld/ldelf.go | 2 +- src/cmd/link/internal/ld/ldmacho.go | 2 +- src/cmd/link/internal/ld/link.go | 3 +- src/cmd/link/internal/ld/objfile.go | 2 +- src/cmd/pprof/pprof.go | 5 +- src/debug/gosym/pclntab.go | 10 +++- 14 files changed, 187 insertions(+), 42 deletions(-) create mode 100644 src/cmd/internal/obj/reloctype_string.go diff --git a/src/cmd/internal/goobj/read.go b/src/cmd/internal/goobj/read.go index 214f65cbc4c..329f80146d4 100644 --- a/src/cmd/internal/goobj/read.go +++ b/src/cmd/internal/goobj/read.go @@ -163,7 +163,7 @@ type Data struct { // A Reloc describes a relocation applied to a memory image to refer // to an address within a particular symbol. type Reloc struct { - // The bytes at [Offset, Offset+Size) within the memory image + // The bytes at [Offset, Offset+Size) within the containing Sym // should be updated to refer to the address Add bytes after the start // of the symbol Sym. Offset int @@ -174,7 +174,7 @@ type Reloc struct { // The Type records the form of address expected in the bytes // described by the previous fields: absolute, PC-relative, and so on. // TODO(rsc): The interpretation of Type is not exposed by this package. - Type int + Type obj.RelocType } // A Var describes a variable in a function stack frame: a declared @@ -220,6 +220,7 @@ type Package struct { SymRefs []SymID // list of symbol names and versions referred to by this pack Syms []*Sym // symbols defined by this package MaxVersion int // maximum Version in any SymID in Syms + Arch string // architecture } var ( @@ -561,14 +562,13 @@ func (r *objReader) parseArchive() error { // The format of that part is defined in a comment at the top // of src/liblink/objfile.c. func (r *objReader) parseObject(prefix []byte) error { - // TODO(rsc): Maybe use prefix and the initial input to - // record the header line from the file, which would - // give the architecture and other version information. - r.p.MaxVersion++ + h := make([]byte, 0, 256) + h = append(h, prefix...) var c1, c2, c3 byte for { c1, c2, c3 = c2, c3, r.readByte() + h = append(h, c3) // The new export format can contain 0 bytes. // Don't consider them errors, only look for r.err != nil. if r.err != nil { @@ -579,6 +579,12 @@ func (r *objReader) parseObject(prefix []byte) error { } } + hs := strings.Fields(string(h)) + if len(hs) >= 4 { + r.p.Arch = hs[3] + } + // TODO: extract OS + build ID if/when we need it + r.readFull(r.tmp[:8]) if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go17ld")) { return r.error(errCorruptObject) @@ -643,7 +649,7 @@ func (r *objReader) parseObject(prefix []byte) error { rel := &s.Reloc[i] rel.Offset = r.readInt() rel.Size = r.readInt() - rel.Type = r.readInt() + rel.Type = obj.RelocType(r.readInt()) rel.Add = r.readInt() rel.Sym = r.readSymID() } @@ -693,3 +699,18 @@ func (r *objReader) parseObject(prefix []byte) error { return nil } + +func (r *Reloc) String(insnOffset uint64) string { + delta := r.Offset - int(insnOffset) + s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type) + if r.Sym.Name != "" { + if r.Add != 0 { + return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add) + } + return fmt.Sprintf("%s:%s", s, r.Sym.Name) + } + if r.Add != 0 { + return fmt.Sprintf("%s:%d", s, r.Add) + } + return s +} diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index e5ed859eb1c..f72a191d02f 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -442,14 +442,16 @@ const ( type Reloc struct { Off int32 Siz uint8 - Type int32 + Type RelocType Add int64 Sym *LSym } -// Reloc.type +type RelocType int32 + +//go:generate stringer -type=RelocType const ( - R_ADDR = 1 + iota + R_ADDR RelocType = 1 + iota // R_ADDRPOWER relocates a pair of "D-form" instructions (instructions with 16-bit // immediates in the low half of the instruction word), usually addis followed by // another add or a load, inserting the "high adjusted" 16 bits of the address of diff --git a/src/cmd/internal/obj/reloctype_string.go b/src/cmd/internal/obj/reloctype_string.go new file mode 100644 index 00000000000..6de617cd788 --- /dev/null +++ b/src/cmd/internal/obj/reloctype_string.go @@ -0,0 +1,17 @@ +// Code generated by "stringer -type=RelocType"; DO NOT EDIT + +package obj + +import "fmt" + +const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_METHODOFFR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_PCRELDBLR_ADDRMIPSUR_ADDRMIPSTLS" + +var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 195, 206, 216, 225, 235, 249, 263, 279, 293, 307, 318, 332, 347, 364, 382, 403, 413, 424, 437} + +func (i RelocType) String() string { + i -= 1 + if i < 0 || i >= RelocType(len(_RelocType_index)-1) { + return fmt.Sprintf("RelocType(%d)", i+1) + } + return _RelocType_name[_RelocType_index[i]:_RelocType_index[i+1]] +} diff --git a/src/cmd/internal/objfile/disasm.go b/src/cmd/internal/objfile/disasm.go index 25c3301ab88..771187bfe4a 100644 --- a/src/cmd/internal/objfile/disasm.go +++ b/src/cmd/internal/objfile/disasm.go @@ -22,7 +22,7 @@ import ( // Disasm is a disassembler for a given File. type Disasm struct { syms []Sym //symbols in file, sorted by address - pcln *gosym.Table // pcln table + pcln Liner // pcln table text []byte // bytes of text segment (actual instructions) textStart uint64 // start PC of text textEnd uint64 // end PC of text @@ -116,6 +116,7 @@ func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64) { for _, sym := range d.syms { symStart := sym.Addr symEnd := sym.Addr + uint64(sym.Size) + relocs := sym.Relocs if sym.Code != 'T' && sym.Code != 't' || symStart < d.textStart || symEnd <= start || end <= symStart || @@ -135,7 +136,7 @@ func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64) { symEnd = end } code := d.text[:end-d.textStart] - d.Decode(symStart, symEnd, func(pc, size uint64, file string, line int, text string) { + d.Decode(symStart, symEnd, relocs, func(pc, size uint64, file string, line int, text string) { i := pc - d.textStart fmt.Fprintf(tw, "\t%s:%d\t%#x\t", base(file), line, pc) if size%4 != 0 || d.goarch == "386" || d.goarch == "amd64" { @@ -158,7 +159,7 @@ func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64) { } // Decode disassembles the text segment range [start, end), calling f for each instruction. -func (d *Disasm) Decode(start, end uint64, f func(pc, size uint64, file string, line int, text string)) { +func (d *Disasm) Decode(start, end uint64, relocs []Reloc, f func(pc, size uint64, file string, line int, text string)) { if start < d.textStart { start = d.textStart } @@ -171,6 +172,17 @@ func (d *Disasm) Decode(start, end uint64, f func(pc, size uint64, file string, i := pc - d.textStart text, size := d.disasm(code[i:], pc, lookup) file, line, _ := d.pcln.PCToLine(pc) + text += "\t" + first := true + for len(relocs) > 0 && relocs[0].Addr < i+uint64(size) { + if first { + first = false + } else { + text += " " + } + text += relocs[0].Stringer.String(pc - start) + relocs = relocs[1:] + } f(pc, uint64(size), file, line, text) pc += uint64(size) } @@ -247,3 +259,9 @@ var byteOrders = map[string]binary.ByteOrder{ "ppc64le": binary.LittleEndian, "s390x": binary.BigEndian, } + +type Liner interface { + // Given a pc, returns the corresponding file, line, and function data. + // If unknown, returns "",0,nil. + PCToLine(uint64) (string, int, *gosym.Func) +} diff --git a/src/cmd/internal/objfile/goobj.go b/src/cmd/internal/objfile/goobj.go index 43435efc68b..230137e0f5c 100644 --- a/src/cmd/internal/objfile/goobj.go +++ b/src/cmd/internal/objfile/goobj.go @@ -8,7 +8,9 @@ package objfile import ( "cmd/internal/goobj" + "cmd/internal/sys" "debug/dwarf" + "debug/gosym" "errors" "fmt" "os" @@ -16,6 +18,7 @@ import ( type goobjFile struct { goobj *goobj.Package + f *os.File // the underlying .o or .a file } func openGoobj(r *os.File) (rawFile, error) { @@ -23,7 +26,7 @@ func openGoobj(r *os.File) (rawFile, error) { if err != nil { return nil, err } - return &goobjFile{f}, nil + return &goobjFile{goobj: f, f: r}, nil } func goobjName(id goobj.SymID) string { @@ -55,6 +58,9 @@ func (f *goobjFile) symbols() ([]Sym, error) { if s.Version != 0 { sym.Code += 'a' - 'A' } + for i, r := range s.Reloc { + sym.Relocs = append(sym.Relocs, Reloc{Addr: uint64(s.Data.Offset) + uint64(r.Offset), Size: uint64(r.Size), Stringer: &s.Reloc[i]}) + } syms = append(syms, sym) } @@ -75,23 +81,68 @@ func (f *goobjFile) symbols() ([]Sym, error) { return syms, nil } -// pcln does not make sense for Go object files, because each -// symbol has its own individual pcln table, so there is no global -// space of addresses to map. func (f *goobjFile) pcln() (textStart uint64, symtab, pclntab []byte, err error) { + // Should never be called. We implement Liner below, callers + // should use that instead. return 0, nil, nil, fmt.Errorf("pcln not available in go object file") } -// text does not make sense for Go object files, because -// each function has a separate section. -func (f *goobjFile) text() (textStart uint64, text []byte, err error) { - return 0, nil, fmt.Errorf("text not available in go object file") +// Find returns the file name, line, and function data for the given pc. +// Returns "",0,nil if unknown. +// This function implements the Liner interface in preference to pcln() above. +func (f *goobjFile) PCToLine(pc uint64) (string, int, *gosym.Func) { + // TODO: this is really inefficient. Binary search? Memoize last result? + var arch *sys.Arch + for _, a := range sys.Archs { + if a.Name == f.goobj.Arch { + arch = a + break + } + } + if arch == nil { + return "", 0, nil + } + for _, s := range f.goobj.Syms { + if pc < uint64(s.Data.Offset) || pc >= uint64(s.Data.Offset+s.Data.Size) { + continue + } + if s.Func == nil { + return "", 0, nil + } + pcfile := make([]byte, s.Func.PCFile.Size) + _, err := f.f.ReadAt(pcfile, s.Func.PCFile.Offset) + if err != nil { + return "", 0, nil + } + fileID := gosym.PCValue(pcfile, pc-uint64(s.Data.Offset), arch.MinLC) + fileName := s.Func.File[fileID] + pcline := make([]byte, s.Func.PCLine.Size) + _, err = f.f.ReadAt(pcline, s.Func.PCLine.Offset) + if err != nil { + return "", 0, nil + } + line := gosym.PCValue(pcline, pc-uint64(s.Data.Offset), arch.MinLC) + // Note: we provide only the name in the Func structure. + // We could provide more if needed. + return fileName, line, &gosym.Func{Sym: &gosym.Sym{Name: s.Name}} + } + return "", 0, nil +} + +// We treat the whole object file as the text section. +func (f *goobjFile) text() (textStart uint64, text []byte, err error) { + var info os.FileInfo + info, err = f.f.Stat() + if err != nil { + return + } + text = make([]byte, info.Size()) + _, err = f.f.ReadAt(text, 0) + return } -// goarch makes sense but is not exposed in debug/goobj's API, -// and we don't need it yet for any users of internal/objfile. func (f *goobjFile) goarch() string { - return "GOARCH unimplemented for debug/goobj files" + return f.goobj.Arch } func (f *goobjFile) loadAddress() (uint64, error) { diff --git a/src/cmd/internal/objfile/objfile.go b/src/cmd/internal/objfile/objfile.go index e5d99f086b5..2bf6363f292 100644 --- a/src/cmd/internal/objfile/objfile.go +++ b/src/cmd/internal/objfile/objfile.go @@ -30,11 +30,24 @@ type File struct { // A Sym is a symbol defined in an executable file. type Sym struct { - Name string // symbol name - Addr uint64 // virtual address of symbol - Size int64 // size in bytes - Code rune // nm code (T for text, D for data, and so on) - Type string // XXX? + Name string // symbol name + Addr uint64 // virtual address of symbol + Size int64 // size in bytes + Code rune // nm code (T for text, D for data, and so on) + Type string // XXX? + Relocs []Reloc // in increasing Addr order +} + +type Reloc struct { + Addr uint64 // Address of first byte that reloc applies to. + Size uint64 // Number of bytes + Stringer RelocStringer +} + +type RelocStringer interface { + // insnOffset is the offset of the instruction containing the relocation + // from the start of the symbol containing the relocation. + String(insnOffset uint64) string } var openers = []func(*os.File) (rawFile, error){ @@ -80,7 +93,13 @@ func (x byAddr) Less(i, j int) bool { return x[i].Addr < x[j].Addr } func (x byAddr) Len() int { return len(x) } func (x byAddr) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (f *File) PCLineTable() (*gosym.Table, error) { +func (f *File) PCLineTable() (Liner, error) { + // If the raw file implements Liner directly, use that. + // Currently, only Go intermediate objects and archives (goobj) use this path. + if pcln, ok := f.raw.(Liner); ok { + return pcln, nil + } + // Otherwise, read the pcln tables and build a Liner out of that. textStart, symtab, pclntab, err := f.raw.pcln() if err != nil { return nil, err diff --git a/src/cmd/internal/sys/arch.go b/src/cmd/internal/sys/arch.go index 18accdeb0c2..7033f3fb780 100644 --- a/src/cmd/internal/sys/arch.go +++ b/src/cmd/internal/sys/arch.go @@ -146,3 +146,16 @@ var ArchS390X = &Arch{ RegSize: 8, MinLC: 2, } + +var Archs = [...]*Arch{ + Arch386, + ArchAMD64, + ArchAMD64P32, + ArchARM, + ArchARM64, + ArchMIPS64, + ArchMIPS64LE, + ArchPPC64, + ArchPPC64LE, + ArchS390X, +} diff --git a/src/cmd/link/internal/arm/asm.go b/src/cmd/link/internal/arm/asm.go index 84060c43616..b1a05453903 100644 --- a/src/cmd/link/internal/arm/asm.go +++ b/src/cmd/link/internal/arm/asm.go @@ -492,12 +492,12 @@ func archrelocvariant(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, t int64) int64 { return t } -func addpltreloc(ctxt *ld.Link, plt *ld.Symbol, got *ld.Symbol, sym *ld.Symbol, typ int) *ld.Reloc { +func addpltreloc(ctxt *ld.Link, plt *ld.Symbol, got *ld.Symbol, sym *ld.Symbol, typ obj.RelocType) *ld.Reloc { r := ld.Addrel(plt) r.Sym = got r.Off = int32(plt.Size) r.Siz = 4 - r.Type = int32(typ) + r.Type = typ r.Add = int64(sym.Got) - 8 plt.Attr |= ld.AttrReachable diff --git a/src/cmd/link/internal/ld/ldelf.go b/src/cmd/link/internal/ld/ldelf.go index 2b60343bc60..7848369272b 100644 --- a/src/cmd/link/internal/ld/ldelf.go +++ b/src/cmd/link/internal/ld/ldelf.go @@ -912,7 +912,7 @@ func ldelf(ctxt *Link, f *bio.Reader, pkg string, length int64, pn string) { rp.Sym = sym.sym } - rp.Type = 256 + int32(info) + rp.Type = 256 + obj.RelocType(info) rp.Siz = relSize(ctxt, pn, uint32(info)) if rela != 0 { rp.Add = int64(add) diff --git a/src/cmd/link/internal/ld/ldmacho.go b/src/cmd/link/internal/ld/ldmacho.go index 0688d2386e6..b846f0cbaed 100644 --- a/src/cmd/link/internal/ld/ldmacho.go +++ b/src/cmd/link/internal/ld/ldmacho.go @@ -828,7 +828,7 @@ func ldmacho(ctxt *Link, f *bio.Reader, pkg string, length int64, pn string) { } rp.Siz = rel.length - rp.Type = 512 + (int32(rel.type_) << 1) + int32(rel.pcrel) + rp.Type = 512 + (obj.RelocType(rel.type_) << 1) + obj.RelocType(rel.pcrel) rp.Off = int32(rel.addr) // Handle X86_64_RELOC_SIGNED referencing a section (rel->extrn == 0). diff --git a/src/cmd/link/internal/ld/link.go b/src/cmd/link/internal/ld/link.go index 50abeb5773c..56b98a69740 100644 --- a/src/cmd/link/internal/ld/link.go +++ b/src/cmd/link/internal/ld/link.go @@ -32,6 +32,7 @@ package ld import ( "bufio" + "cmd/internal/obj" "cmd/internal/sys" "debug/elf" "fmt" @@ -135,7 +136,7 @@ type Reloc struct { Off int32 Siz uint8 Done uint8 - Type int32 + Type obj.RelocType Variant int32 Add int64 Xadd int64 diff --git a/src/cmd/link/internal/ld/objfile.go b/src/cmd/link/internal/ld/objfile.go index cb77fb5536d..dace73161a9 100644 --- a/src/cmd/link/internal/ld/objfile.go +++ b/src/cmd/link/internal/ld/objfile.go @@ -326,7 +326,7 @@ overwrite: s.R[i] = Reloc{ Off: r.readInt32(), Siz: r.readUint8(), - Type: r.readInt32(), + Type: obj.RelocType(r.readInt32()), Add: r.readInt64(), Sym: r.readSymIndex(), } diff --git a/src/cmd/pprof/pprof.go b/src/cmd/pprof/pprof.go index 5ee8a112e0d..0c979b18316 100644 --- a/src/cmd/pprof/pprof.go +++ b/src/cmd/pprof/pprof.go @@ -6,7 +6,6 @@ package main import ( "debug/dwarf" - "debug/gosym" "flag" "fmt" "net/url" @@ -161,7 +160,7 @@ func (t *objTool) Disasm(file string, start, end uint64) ([]plugin.Inst, error) return nil, err } var asm []plugin.Inst - d.Decode(start, end, func(pc, size uint64, file string, line int, text string) { + d.Decode(start, end, nil, func(pc, size uint64, file string, line int, text string) { asm = append(asm, plugin.Inst{Addr: pc, File: file, Line: line, Text: text}) }) return asm, nil @@ -203,7 +202,7 @@ type file struct { offset uint64 sym []objfile.Sym file *objfile.File - pcln *gosym.Table + pcln objfile.Liner triedDwarf bool dwarf *dwarf.Data diff --git a/src/debug/gosym/pclntab.go b/src/debug/gosym/pclntab.go index e859d5aed50..e94ed19d7dd 100644 --- a/src/debug/gosym/pclntab.go +++ b/src/debug/gosym/pclntab.go @@ -291,13 +291,17 @@ func (t *LineTable) step(p *[]byte, pc *uint64, val *int32, first bool) bool { return true } +// PCValue looks up the given PC in a pc value table. target is the +// offset of the pc from the entry point. +func PCValue(tab []byte, target uint64, quantum int) int { + t := LineTable{Data: tab, quantum: uint32(quantum)} + return int(t.pcvalue(0, 0, target)) +} + // pcvalue reports the value associated with the target pc. // off is the offset to the beginning of the pc-value table, // and entry is the start PC for the corresponding function. func (t *LineTable) pcvalue(off uint32, entry, targetpc uint64) int32 { - if off == 0 { - return -1 - } p := t.Data[off:] val := int32(-1)