mirror of
https://github.com/golang/go
synced 2024-11-23 17:00:07 -07:00
[dev.link] cmd/link: performance changes for relocsym
Revise the signature for "relocsym" to reflect the fact that many of its arguments are invariant: push the invariant args into a struct and pass the struct by reference. Add a facility for doing batch allocation of external relocations in relocsym, so that we don't wind up with wasted space due to the default "append" behavior. This produces a small speedup in linking kubelet: $ benchstat out.devlink.txt out.dodata.txt name old time/op new time/op delta RelinkKubelet 14.2s ± 2% 13.8s ± 2% -3.11% (p=0.000 n=19+19) RelinkKubelet-WithoutDebug 8.02s ± 3% 7.73s ± 3% -3.67% (p=0.000 n=20+20) Change-Id: I8bc94c366ae792a5b0f23697b8e0108443a7a748 Reviewed-on: https://go-review.googlesource.com/c/go/+/231138 Run-TryBot: Than McIntosh <thanm@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
1419445926
commit
ca290169ab
@ -150,12 +150,20 @@ func foldSubSymbolOffset(ldr *loader.Loader, s loader.Sym) (loader.Sym, int64) {
|
||||
//
|
||||
// This is a performance-critical function for the linker; be careful
|
||||
// to avoid introducing unnecessary allocations in the main loop.
|
||||
func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *ArchSyms, s loader.Sym, P []byte) {
|
||||
func (st *relocSymState) relocsym(s loader.Sym, P []byte) {
|
||||
ldr := st.ldr
|
||||
relocs := ldr.Relocs(s)
|
||||
if relocs.Count() == 0 {
|
||||
return
|
||||
}
|
||||
target := st.target
|
||||
syms := st.syms
|
||||
var extRelocs []loader.ExtReloc
|
||||
if target.IsExternal() {
|
||||
// preallocate a slice conservatively assuming that all
|
||||
// relocs will require an external reloc
|
||||
extRelocs = st.preallocExtRelocSlice(relocs.Count())
|
||||
}
|
||||
for ri := 0; ri < relocs.Count(); ri++ {
|
||||
r := relocs.At2(ri)
|
||||
off := r.Off()
|
||||
@ -168,7 +176,7 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
if rs != 0 {
|
||||
rname = ldr.SymName(rs)
|
||||
}
|
||||
err.Errorf(s, "invalid relocation %s: %d+%d not in [%d,%d)", rname, off, siz, 0, len(P))
|
||||
st.err.Errorf(s, "invalid relocation %s: %d+%d not in [%d,%d)", rname, off, siz, 0, len(P))
|
||||
continue
|
||||
}
|
||||
|
||||
@ -190,7 +198,7 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
err.errorUnresolved(ldr, s, rs)
|
||||
st.err.errorUnresolved(ldr, s, rs)
|
||||
continue
|
||||
}
|
||||
}
|
||||
@ -206,11 +214,11 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
// shared libraries, and Solaris, Darwin and AIX need it always
|
||||
if !target.IsSolaris() && !target.IsDarwin() && !target.IsAIX() && rs != 0 && rst == sym.SDYNIMPORT && !target.IsDynlinkingGo() && !ldr.AttrSubSymbol(rs) {
|
||||
if !(target.IsPPC64() && target.IsExternal() && ldr.SymName(rs) == ".TOC.") {
|
||||
err.Errorf(s, "unhandled relocation for %s (type %d (%s) rtype %d (%s))", ldr.SymName(rs), rst, rst, rt, sym.RelocName(target.Arch, rt))
|
||||
st.err.Errorf(s, "unhandled relocation for %s (type %d (%s) rtype %d (%s))", ldr.SymName(rs), rst, rst, rt, sym.RelocName(target.Arch, rt))
|
||||
}
|
||||
}
|
||||
if rs != 0 && rst != sym.STLSBSS && rt != objabi.R_WEAKADDROFF && rt != objabi.R_METHODOFF && !ldr.AttrReachable(rs) {
|
||||
err.Errorf(s, "unreachable sym in relocation: %s", ldr.SymName(rs))
|
||||
st.err.Errorf(s, "unreachable sym in relocation: %s", ldr.SymName(rs))
|
||||
}
|
||||
|
||||
var rr loader.ExtReloc
|
||||
@ -241,7 +249,7 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
default:
|
||||
switch siz {
|
||||
default:
|
||||
err.Errorf(s, "bad reloc size %#x for %s", uint32(siz), ldr.SymName(rs))
|
||||
st.err.Errorf(s, "bad reloc size %#x for %s", uint32(siz), ldr.SymName(rs))
|
||||
case 1:
|
||||
o = int64(P[off])
|
||||
case 2:
|
||||
@ -269,7 +277,7 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
if ok {
|
||||
o = out
|
||||
} else {
|
||||
err.Errorf(s, "unknown reloc to %v: %d (%s)", ldr.SymName(rs), rt, sym.RelocName(target.Arch, rt))
|
||||
st.err.Errorf(s, "unknown reloc to %v: %d (%s)", ldr.SymName(rs), rt, sym.RelocName(target.Arch, rt))
|
||||
}
|
||||
case objabi.R_TLS_LE:
|
||||
if target.IsExternal() && target.IsElf() {
|
||||
@ -337,7 +345,7 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
rr.Xadd = rr.Add + off
|
||||
rst := ldr.SymType(rs)
|
||||
if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && rst != sym.SUNDEFEXT && ldr.SymSect(rs) == nil {
|
||||
err.Errorf(s, "missing section for relocation target %s", ldr.SymName(rs))
|
||||
st.err.Errorf(s, "missing section for relocation target %s", ldr.SymName(rs))
|
||||
}
|
||||
rr.Xsym = rs
|
||||
|
||||
@ -355,7 +363,7 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
} else if target.IsAIX() {
|
||||
o = ldr.SymValue(rr.Sym) + rr.Add
|
||||
} else {
|
||||
err.Errorf(s, "unhandled pcrel relocation to %s on %v", ldr.SymName(rs), target.HeadType)
|
||||
st.err.Errorf(s, "unhandled pcrel relocation to %s on %v", ldr.SymName(rs), target.HeadType)
|
||||
}
|
||||
|
||||
break
|
||||
@ -385,12 +393,12 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
// Instead of special casing only amd64, we treat this as an error on all
|
||||
// 64-bit architectures so as to be future-proof.
|
||||
if int32(o) < 0 && target.Arch.PtrSize > 4 && siz == 4 {
|
||||
err.Errorf(s, "non-pc-relative relocation address for %s is too big: %#x (%#x + %#x)", ldr.SymName(rs), uint64(o), ldr.SymValue(rs), r.Add())
|
||||
st.err.Errorf(s, "non-pc-relative relocation address for %s is too big: %#x (%#x + %#x)", ldr.SymName(rs), uint64(o), ldr.SymValue(rs), r.Add())
|
||||
errorexit()
|
||||
}
|
||||
case objabi.R_DWARFSECREF:
|
||||
if ldr.SymSect(rs) == nil {
|
||||
err.Errorf(s, "missing DWARF section for relocation target %s", ldr.SymName(rs))
|
||||
st.err.Errorf(s, "missing DWARF section for relocation target %s", ldr.SymName(rs))
|
||||
}
|
||||
|
||||
if target.IsExternal() {
|
||||
@ -478,7 +486,7 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
rr.Xadd -= int64(rr.Siz) // relative to address after the relocated chunk
|
||||
rst := ldr.SymType(rs)
|
||||
if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && ldr.SymSect(rs) == nil {
|
||||
err.Errorf(s, "missing section for relocation target %s", ldr.SymName(rs))
|
||||
st.err.Errorf(s, "missing section for relocation target %s", ldr.SymName(rs))
|
||||
}
|
||||
rr.Xsym = rs
|
||||
|
||||
@ -508,7 +516,7 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
// bytes as the base. Compensate by skewing the addend.
|
||||
o += int64(rr.Siz)
|
||||
} else {
|
||||
err.Errorf(s, "unhandled pcrel relocation to %s on %v", ldr.SymName(rs), target.HeadType)
|
||||
st.err.Errorf(s, "unhandled pcrel relocation to %s on %v", ldr.SymName(rs), target.HeadType)
|
||||
}
|
||||
|
||||
break
|
||||
@ -525,10 +533,10 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
|
||||
case objabi.R_XCOFFREF:
|
||||
if !target.IsAIX() {
|
||||
err.Errorf(s, "find XCOFF R_REF on non-XCOFF files")
|
||||
st.err.Errorf(s, "find XCOFF R_REF on non-XCOFF files")
|
||||
}
|
||||
if !target.IsExternal() {
|
||||
err.Errorf(s, "find XCOFF R_REF with internal linking")
|
||||
st.err.Errorf(s, "find XCOFF R_REF with internal linking")
|
||||
}
|
||||
needExtReloc = true
|
||||
rr.Xsym = rr.Sym
|
||||
@ -558,22 +566,22 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
|
||||
switch siz {
|
||||
default:
|
||||
err.Errorf(s, "bad reloc size %#x for %s", uint32(siz), ldr.SymName(rs))
|
||||
st.err.Errorf(s, "bad reloc size %#x for %s", uint32(siz), ldr.SymName(rs))
|
||||
case 1:
|
||||
P[off] = byte(int8(o))
|
||||
case 2:
|
||||
if o != int64(int16(o)) {
|
||||
err.Errorf(s, "relocation address for %s is too big: %#x", ldr.SymName(rs), o)
|
||||
st.err.Errorf(s, "relocation address for %s is too big: %#x", ldr.SymName(rs), o)
|
||||
}
|
||||
target.Arch.ByteOrder.PutUint16(P[off:], uint16(o))
|
||||
case 4:
|
||||
if rt == objabi.R_PCREL || rt == objabi.R_CALL {
|
||||
if o != int64(int32(o)) {
|
||||
err.Errorf(s, "pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), o)
|
||||
st.err.Errorf(s, "pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), o)
|
||||
}
|
||||
} else {
|
||||
if o != int64(int32(o)) && o != int64(uint32(o)) {
|
||||
err.Errorf(s, "non-pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), uint64(o))
|
||||
st.err.Errorf(s, "non-pc-relative relocation address for %s is too big: %#x", ldr.SymName(rs), uint64(o))
|
||||
}
|
||||
}
|
||||
target.Arch.ByteOrder.PutUint32(P[off:], uint32(o))
|
||||
@ -586,38 +594,95 @@ func relocsym(target *Target, ldr *loader.Loader, err *ErrorReporter, syms *Arch
|
||||
}
|
||||
}
|
||||
if len(extRelocs) != 0 {
|
||||
st.finalizeExtRelocSlice(extRelocs)
|
||||
ldr.SetExtRelocs(s, extRelocs)
|
||||
}
|
||||
}
|
||||
|
||||
const extRelocSlabSize = 2048
|
||||
|
||||
// relocSymState hold state information needed when making a series of
|
||||
// successive calls to relocsym(). The items here are invariant
|
||||
// (meaning that they are set up once initially and then don't change
|
||||
// during the execution of relocsym), with the exception of a slice
|
||||
// used to facilitate batch allocation of external relocations. Calls
|
||||
// to relocsym happen in parallel; the assumption is that each
|
||||
// parallel thread will have its own state object.
|
||||
type relocSymState struct {
|
||||
target *Target
|
||||
ldr *loader.Loader
|
||||
err *ErrorReporter
|
||||
syms *ArchSyms
|
||||
batch []loader.ExtReloc
|
||||
}
|
||||
|
||||
// preallocExtRelocs returns a subslice from an internally allocated
|
||||
// slab owned by the state object. Client requests a slice of size
|
||||
// 'sz', however it may be that fewer relocs are needed; the
|
||||
// assumption is that the final size is set in a [required] subsequent
|
||||
// call to 'finalizeExtRelocSlice'.
|
||||
func (st *relocSymState) preallocExtRelocSlice(sz int) []loader.ExtReloc {
|
||||
if len(st.batch) < sz {
|
||||
slabSize := extRelocSlabSize
|
||||
if sz > extRelocSlabSize {
|
||||
slabSize = sz
|
||||
}
|
||||
st.batch = make([]loader.ExtReloc, slabSize)
|
||||
}
|
||||
rval := st.batch[:sz:sz]
|
||||
return rval[:0]
|
||||
}
|
||||
|
||||
// finalizeExtRelocSlice takes a slice returned from preallocExtRelocSlice,
|
||||
// from which it determines how many of the pre-allocated relocs were
|
||||
// actually needed; it then carves that number off the batch slice.
|
||||
func (st *relocSymState) finalizeExtRelocSlice(finalsl []loader.ExtReloc) {
|
||||
if &st.batch[0] != &finalsl[0] {
|
||||
panic("preallocExtRelocSlice size invariant violation")
|
||||
}
|
||||
st.batch = st.batch[len(finalsl):]
|
||||
}
|
||||
|
||||
// makeRelocSymState creates a relocSymState container object to
|
||||
// pass to relocsym(). If relocsym() calls happen in parallel,
|
||||
// each parallel thread should have its own state object.
|
||||
func (ctxt *Link) makeRelocSymState() *relocSymState {
|
||||
return &relocSymState{
|
||||
target: &ctxt.Target,
|
||||
ldr: ctxt.loader,
|
||||
err: &ctxt.ErrorReporter,
|
||||
syms: &ctxt.ArchSyms,
|
||||
}
|
||||
}
|
||||
|
||||
func (ctxt *Link) reloc() {
|
||||
var wg sync.WaitGroup
|
||||
target := &ctxt.Target
|
||||
ldr := ctxt.loader
|
||||
reporter := &ctxt.ErrorReporter
|
||||
syms := &ctxt.ArchSyms
|
||||
if ctxt.IsExternal() {
|
||||
ldr.InitExtRelocs()
|
||||
}
|
||||
wg.Add(3)
|
||||
go func() {
|
||||
if !ctxt.IsWasm() { // On Wasm, text relocations are applied in Asmb2.
|
||||
st := ctxt.makeRelocSymState()
|
||||
for _, s := range ctxt.Textp2 {
|
||||
relocsym(target, ldr, reporter, syms, s, ldr.OutData(s))
|
||||
st.relocsym(s, ldr.OutData(s))
|
||||
}
|
||||
}
|
||||
wg.Done()
|
||||
}()
|
||||
go func() {
|
||||
st := ctxt.makeRelocSymState()
|
||||
for _, s := range ctxt.datap2 {
|
||||
relocsym(target, ldr, reporter, syms, s, ldr.OutData(s))
|
||||
st.relocsym(s, ldr.OutData(s))
|
||||
}
|
||||
wg.Done()
|
||||
}()
|
||||
go func() {
|
||||
st := ctxt.makeRelocSymState()
|
||||
for _, si := range dwarfp2 {
|
||||
for _, s := range si.syms {
|
||||
relocsym(target, ldr, reporter, syms, s, ldr.OutData(s))
|
||||
st.relocsym(s, ldr.OutData(s))
|
||||
}
|
||||
}
|
||||
wg.Done()
|
||||
@ -2538,9 +2603,7 @@ func compressSyms(ctxt *Link, syms []loader.Sym) []byte {
|
||||
if err != nil {
|
||||
log.Fatalf("NewWriterLevel failed: %s", err)
|
||||
}
|
||||
target := &ctxt.Target
|
||||
reporter := &ctxt.ErrorReporter
|
||||
archSyms := &ctxt.ArchSyms
|
||||
st := ctxt.makeRelocSymState()
|
||||
for _, s := range syms {
|
||||
// Symbol data may be read-only. Apply relocations in a
|
||||
// temporary buffer, and immediately write it out.
|
||||
@ -2550,7 +2613,7 @@ func compressSyms(ctxt *Link, syms []loader.Sym) []byte {
|
||||
relocbuf = append(relocbuf[:0], P...)
|
||||
P = relocbuf
|
||||
}
|
||||
relocsym(target, ldr, reporter, archSyms, s, P)
|
||||
st.relocsym(s, P)
|
||||
if _, err := z.Write(P); err != nil {
|
||||
log.Fatalf("compression failed: %s", err)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user