mirror of
https://github.com/golang/go
synced 2024-11-05 14:56:10 -07:00
d30e00c240
splitdwarf osxMachoFile [ osxDsymFile ] splitdwarf takes an executable produced by go build as input, and uncompresses and copies the DWARF segment into a separate file in the way that is expected by OSX-hosted tools (lldb and ports of gdb). If osxDsymFile is not named explicitly, the default of "<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>" is used instead, with directories created as needed. If the input file contains no UUID, then one is created by hashing non-DWARF segment contents, and added to the executable. This is necessary because gdb and lldb both expect matching UUIDs to be present in the executable and its debugging symbols. Includes a modified version of debug/macho, with additional definitions and the ability to write segments, sections, and some MachO load commands added. Change-Id: Ia5b0e289260f72bbca392cdf2c7c0a75e3ca40e5 Reviewed-on: https://go-review.googlesource.com/c/143357 Reviewed-by: Austin Clements <austin@google.com>
395 lines
13 KiB
Go
395 lines
13 KiB
Go
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// +build !js,!nacl,!plan9,!solaris,!windows
|
|
|
|
package main
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"syscall"
|
|
|
|
"golang.org/x/tools/cmd/splitdwarf/internal/macho"
|
|
)
|
|
|
|
const (
|
|
pageAlign = 12 // 4096 = 1 << 12
|
|
)
|
|
|
|
func note(format string, why ...interface{}) {
|
|
fmt.Fprintf(os.Stderr, format+"\n", why...)
|
|
}
|
|
|
|
func fail(format string, why ...interface{}) {
|
|
note(format, why...)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// splitdwarf inputexe [ outputdwarf ]
|
|
func main() {
|
|
if len(os.Args) < 2 || len(os.Args) > 3 {
|
|
fmt.Printf(`
|
|
Usage: %s input_exe [ output_dsym ]
|
|
Reads the executable input_exe, uncompresses and copies debugging
|
|
information into output_dsym. If output_dsym is not specified,
|
|
the path
|
|
input_exe.dSYM/Contents/Resources/DWARF/input_exe
|
|
is used instead. That is the path that gdb and lldb expect
|
|
on OSX. Input_exe needs a UUID segment; if that is missing,
|
|
then one is created and added. In that case, the permissions
|
|
for input_exe need to allow writing.
|
|
`, os.Args[0])
|
|
return
|
|
}
|
|
|
|
// Read input, find DWARF, be sure it looks right
|
|
inputExe := os.Args[1]
|
|
exeFile, err := os.Open(inputExe)
|
|
if err != nil {
|
|
fail("%v", err)
|
|
}
|
|
exeMacho, err := macho.NewFile(exeFile)
|
|
if err != nil {
|
|
fail("(internal) Couldn't create macho, %v", err)
|
|
}
|
|
// Postpone dealing with output till input is known-good
|
|
|
|
// describe(&exeMacho.FileTOC)
|
|
|
|
// Offsets into __LINKEDIT:
|
|
//
|
|
// Command LC_SYMTAB =
|
|
// (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries
|
|
// struct {
|
|
// StringTableIndex uint32
|
|
// Type, SectionIndex uint8
|
|
// Description uint16
|
|
// Value uint64
|
|
// }
|
|
//
|
|
// (2) string table offset and size. Strings are zero-byte terminated. First must be " ".
|
|
//
|
|
// Command LC_DYSYMTAB = indices within symtab (above), except for IndSym
|
|
// IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab.
|
|
//
|
|
// Section __TEXT.__symbol_stub1.
|
|
// Offset and size (Reserved2) locate and describe a table for thios section.
|
|
// Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table.
|
|
// (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline])
|
|
//
|
|
// Section __DATA.__nl_symbol_ptr.
|
|
// Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
|
|
// Some of these symbols appear to be duplicates of other indirect symbols appearing early
|
|
//
|
|
// Section __DATA.__la_symbol_ptr.
|
|
// Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
|
|
// Some of these symbols appear to be duplicates of other indirect symbols appearing early
|
|
//
|
|
|
|
// Create a File for the output dwarf.
|
|
// Copy header, file type is MH_DSYM
|
|
// Copy the relevant load commands
|
|
|
|
// LoadCmdUuid
|
|
// Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these).
|
|
// Segment __PAGEZERO
|
|
// Segment __TEXT (zero the size, zero the offset of each section)
|
|
// Segment __DATA (zero the size, zero the offset of each section)
|
|
// Segment __LINKEDIT (contains the symbols and strings from Symtab)
|
|
// Segment __DWARF (uncompressed)
|
|
|
|
var uuid *macho.Uuid
|
|
for _, l := range exeMacho.Loads {
|
|
switch l.Command() {
|
|
case macho.LcUuid:
|
|
uuid = l.(*macho.Uuid)
|
|
}
|
|
}
|
|
|
|
// Ensure a given load is not nil
|
|
nonnilC := func(l macho.Load, s string) {
|
|
if l == nil {
|
|
fail("input file %s lacks load command %s", inputExe, s)
|
|
}
|
|
}
|
|
|
|
// Find a segment by name and ensure it is not nil
|
|
nonnilS := func(s string) *macho.Segment {
|
|
l := exeMacho.Segment(s)
|
|
if l == nil {
|
|
fail("input file %s lacks segment %s", inputExe, s)
|
|
}
|
|
return l
|
|
}
|
|
|
|
newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0)
|
|
|
|
symtab := exeMacho.Symtab
|
|
dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output
|
|
nonnilC(symtab, "symtab")
|
|
nonnilC(dysymtab, "dysymtab")
|
|
text := nonnilS("__TEXT")
|
|
data := nonnilS("__DATA")
|
|
linkedit := nonnilS("__LINKEDIT")
|
|
pagezero := nonnilS("__PAGEZERO")
|
|
|
|
newtext := text.CopyZeroed()
|
|
newdata := data.CopyZeroed()
|
|
newsymtab := symtab.Copy()
|
|
|
|
// Linkedit segment contain symbols and strings;
|
|
// Symtab refers to offsets into linkedit.
|
|
// This next bit initializes newsymtab and sets up data structures for the linkedit segment
|
|
linkeditsyms := []macho.Nlist64{}
|
|
linkeditstrings := []string{}
|
|
|
|
// Linkedit will begin at the second page, i.e., offset is one page from beginning
|
|
// Symbols come first
|
|
linkeditsymbase := uint32(1) << pageAlign
|
|
|
|
// Strings come second, offset by the number of symbols times their size.
|
|
// Only those symbols from dysymtab.defsym are written into the debugging information.
|
|
linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym
|
|
|
|
// The first two bytes of the strings are reserved for space, null (' ', \000)
|
|
linkeditstringcur := uint32(2)
|
|
|
|
newsymtab.Syms = newsymtab.Syms[:0]
|
|
newsymtab.Symoff = linkeditsymbase
|
|
newsymtab.Stroff = linkeditstringbase
|
|
newsymtab.Nsyms = dysymtab.Nextdefsym
|
|
for i := uint32(0); i < dysymtab.Nextdefsym; i++ {
|
|
ii := i + dysymtab.Iextdefsym
|
|
oldsym := symtab.Syms[ii]
|
|
newsymtab.Syms = append(newsymtab.Syms, oldsym)
|
|
|
|
linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: uint32(linkeditstringcur),
|
|
Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value})
|
|
linkeditstringcur += uint32(len(oldsym.Name)) + 1
|
|
linkeditstrings = append(linkeditstrings, oldsym.Name)
|
|
}
|
|
newsymtab.Strsize = linkeditstringcur
|
|
|
|
exeNeedsUuid := uuid == nil
|
|
if exeNeedsUuid {
|
|
uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}}
|
|
uuid.Len = uuid.LoadSize(newtoc)
|
|
copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16])
|
|
uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3
|
|
uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1
|
|
}
|
|
newtoc.AddLoad(uuid)
|
|
|
|
// For the specified segment (assumed to be in exeMacho) make a copy of its
|
|
// sections with appropriate fields zeroed out, and append them to the
|
|
// currently-last segment in newtoc.
|
|
copyZOdSections := func(g *macho.Segment) {
|
|
for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ {
|
|
s := exeMacho.Sections[i].Copy()
|
|
s.Offset = 0
|
|
s.Reloff = 0
|
|
s.Nreloc = 0
|
|
newtoc.AddSection(s)
|
|
}
|
|
}
|
|
|
|
newtoc.AddLoad(newsymtab)
|
|
newtoc.AddSegment(pagezero)
|
|
newtoc.AddSegment(newtext)
|
|
copyZOdSections(text)
|
|
newtoc.AddSegment(newdata)
|
|
copyZOdSections(data)
|
|
|
|
newlinkedit := linkedit.Copy()
|
|
newlinkedit.Offset = uint64(linkeditsymbase)
|
|
newlinkedit.Filesz = uint64(linkeditstringcur)
|
|
newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file
|
|
newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign)
|
|
// The rest should copy over fine.
|
|
newtoc.AddSegment(newlinkedit)
|
|
|
|
dwarf := nonnilS("__DWARF")
|
|
newdwarf := dwarf.CopyZeroed()
|
|
newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign)
|
|
newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1)
|
|
newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file.
|
|
newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign)
|
|
newtoc.AddSegment(newdwarf)
|
|
|
|
// Map out Dwarf sections (that is, this is section descriptors, not their contents).
|
|
offset := uint32(newdwarf.Offset)
|
|
for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
|
|
o := exeMacho.Sections[i]
|
|
s := o.Copy()
|
|
s.Offset = offset
|
|
us := o.UncompressedSize()
|
|
if s.Size < us {
|
|
s.Size = uint64(us)
|
|
s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes.
|
|
}
|
|
offset += uint32(us)
|
|
if strings.HasPrefix(s.Name, "__z") {
|
|
s.Name = "__" + s.Name[3:] // remove "z"
|
|
}
|
|
s.Reloff = 0
|
|
s.Nreloc = 0
|
|
newtoc.AddSection(s)
|
|
}
|
|
|
|
// Write segments/sections.
|
|
// Only dwarf and linkedit contain anything interesting.
|
|
|
|
// Memory map the output file to get the buffer directly.
|
|
outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF"
|
|
if len(os.Args) > 2 {
|
|
outDwarf = os.Args[2]
|
|
} else {
|
|
err := os.MkdirAll(outDwarf, 0755)
|
|
if err != nil {
|
|
fail("%v", err)
|
|
}
|
|
outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe))
|
|
}
|
|
dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize()))
|
|
|
|
// (1) Linkedit segment
|
|
// Symbol table
|
|
offset = uint32(newlinkedit.Offset)
|
|
for i := range linkeditsyms {
|
|
if exeMacho.Magic == macho.Magic64 {
|
|
offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder)
|
|
} else {
|
|
offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder)
|
|
}
|
|
}
|
|
|
|
// Initial two bytes of string table, followed by actual zero-terminated strings.
|
|
buffer[linkeditstringbase] = ' '
|
|
buffer[linkeditstringbase+1] = 0
|
|
offset = linkeditstringbase + 2
|
|
for _, str := range linkeditstrings {
|
|
for i := 0; i < len(str); i++ {
|
|
buffer[offset] = str[i]
|
|
offset++
|
|
}
|
|
buffer[offset] = 0
|
|
offset++
|
|
}
|
|
|
|
// (2) DWARF segment
|
|
ioff := newdwarf.Firstsect - dwarf.Firstsect
|
|
for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
|
|
s := exeMacho.Sections[i]
|
|
j := i + ioff
|
|
s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:])
|
|
}
|
|
|
|
// Because "text" overlaps the header and the loads, write them afterwards, just in case.
|
|
// Write header.
|
|
newtoc.Put(buffer)
|
|
|
|
err = syscall.Munmap(buffer)
|
|
if err != nil {
|
|
fail("Munmap %s for dwarf output failed, %v", outDwarf, err)
|
|
}
|
|
err = dwarfFile.Close()
|
|
if err != nil {
|
|
fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err)
|
|
}
|
|
|
|
if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command
|
|
hdr := exeMacho.FileTOC.FileHeader
|
|
oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize()
|
|
hdr.NCommands += 1
|
|
hdr.SizeCommands += uuid.LoadSize(newtoc)
|
|
|
|
mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0)
|
|
if err != nil {
|
|
fail("Updating UUID in binary failed, %v", err)
|
|
}
|
|
exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)),
|
|
syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
|
|
if err != nil {
|
|
fail("Mmap of %s for UUID update failed, %v", inputExe, err)
|
|
}
|
|
_ = hdr.Put(exebuf, newtoc.ByteOrder)
|
|
_ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder)
|
|
err = syscall.Munmap(exebuf)
|
|
if err != nil {
|
|
fail("Munmap of %s for UUID update failed, %v", inputExe, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file,
|
|
// and returns the file descriptor and mapped buffer.
|
|
func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) {
|
|
dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
|
|
if err != nil {
|
|
fail("Open for mmap failed, %v", err)
|
|
}
|
|
err = os.Truncate(outDwarf, size)
|
|
if err != nil {
|
|
fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err)
|
|
}
|
|
buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
|
|
if err != nil {
|
|
fail("Mmap %s for dwarf output update failed, %v", outDwarf, err)
|
|
}
|
|
return dwarfFile, buffer
|
|
}
|
|
|
|
func describe(exem *macho.FileTOC) {
|
|
note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags))
|
|
for i, l := range exem.Loads {
|
|
if s, ok := l.(*macho.Segment); ok {
|
|
fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name,
|
|
s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect)
|
|
for j := uint32(0); j < s.Nsect; j++ {
|
|
c := exem.Sections[j+s.Firstsect]
|
|
fmt.Printf(" Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3)
|
|
}
|
|
} else {
|
|
fmt.Printf("Load %d is %v\n", i, l)
|
|
}
|
|
}
|
|
if exem.SizeCommands != exem.LoadSize() {
|
|
fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize())
|
|
} else {
|
|
note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize())
|
|
}
|
|
note("File size is %d", exem.FileSize())
|
|
}
|
|
|
|
// contentuuid returns a UUID derived from (some of) the content of an executable.
|
|
// specifically included are the non-DWARF sections, specifically excluded are things
|
|
// that surely depend on the presence or absence of DWARF sections (e.g., section
|
|
// numbers, positions with file, number of load commands).
|
|
// (It was considered desirable if this was insensitive to the presence of the
|
|
// __DWARF segment, however because it is not last, it moves other segments,
|
|
// whose contents appear to contain file offset references.)
|
|
func contentuuid(exem *macho.FileTOC) []byte {
|
|
h := sha256.New()
|
|
for _, l := range exem.Loads {
|
|
if l.Command() == macho.LcUuid {
|
|
continue
|
|
}
|
|
if s, ok := l.(*macho.Segment); ok {
|
|
if s.Name == "__DWARF" || s.Name == "__PAGEZERO" {
|
|
continue
|
|
}
|
|
for j := uint32(0); j < s.Nsect; j++ {
|
|
c := exem.Sections[j+s.Firstsect]
|
|
io.Copy(h, c.Open())
|
|
}
|
|
} // Getting dependence on other load commands right is fiddly.
|
|
}
|
|
return h.Sum(nil)
|
|
}
|