mirror of
https://github.com/golang/go
synced 2024-11-23 05:50:05 -07:00
all: generate NOTOC shared code on power10/PPC64/linux
An explicit TOC pointer is not needed when building with GOPPC64=power10 on linux/PPC64 as all addressing is PC relative. Apply changes to the compiler, assembler, and linker to remove R2/TOC maintenance in these build configurations. This results in noticeably smaller PIC binaries. For example the size (in bytes) of the k8s binaries before and with this patch: GOFLAGS="-buildmode=pie" \ FORCE_HOST_GO=y \ GOPPC64=power10 \ CGO_CFLAGS="-mcpu=power10 -O2 -g" \ make all apiextensions-apiserver 66060288 64487424 -1572864 -2.38% e2e_node.test 163520856 159850760 -3670096 -2.24% e2e.test 178167304 174890432 -3276872 -1.83% ginkgo 11010048 10747904 -262144 -2.38% go-runner 2162688 2162688 0 0% k8s-tests 170182216 166970880 -3211336 -1.88% kubeadm 52625408 51314688 -1310720 -2.49% kube-aggregator 62849024 61341696 -1507328 -2.39% kube-apiserver 147783680 144375808 -3407872 -2.30% kube-controller-manager 131137536 127991808 -3145728 -2.39% kubectl 53608448 52363264 -1245184 -2.32% kubectl-convert 52625408 51314688 -1310720 -2.49% kubelet 120913920 118095872 -2818048 -2.33% kube-log-runner 1900544 1835008 -65536 -3.44% kubemark 119078912 116326400 -2752512 -2.31% kube-proxy 58392576 56885248 -1507328 -2.58% kube-scheduler 60751872 59244544 -1507328 -2.48% mounter 1835008 1769472 -65536 -3.57% watch-termination 38076416 37158912 -917504 -2.40% And text size changes: apiextensions-apiserver 30243288 28654116 -1589172 -5.25% e2e_node.test 71132064 67520288 -3611776 -5.07% e2e.test 61843984 58635088 -3208896 -5.18% ginkgo 4975916 4769304 -206612 -4.15% go-runner 896532 858400 -38132 -4.25% k8s-tests 60925792 57752032 -3173760 -5.20% kubeadm 24643240 23404100 -1239140 -5.02% kube-aggregator 28688060 27160976 -1527084 -5.32% kube-apiserver 65627332 62259460 -3367872 -5.13% kube-controller-manager 56773844 53706532 -3067312 -5.40% kubectl 24344276 23080640 -1263636 -5.19% kubectl-convert 23733764 22521768 -1211996 -5.10% kubelet 52494580 49720340 -2774240 -5.28% kube-log-runner 787128 753232 -33896 -4.30% kubemark 51576580 48837380 -2739200 -5.31% kube-proxy 26541092 25124080 -1417012 -5.33% kube-scheduler 27448512 25976172 -1472340 -5.36% mounter 744100 712628 -31472 -4.22% watch-termination 18047276 17139912 -907364 -5.02% Change-Id: Ib4872823b06f93861e46a00679b5d4e5e30b538a Reviewed-on: https://go-review.googlesource.com/c/go/+/495416 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Run-TryBot: Paul Murphy <murp@ibm.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com>
This commit is contained in:
parent
1524bd1c78
commit
29d5272da8
@ -1869,7 +1869,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
pp.To.Reg = ppc64.REG_LR
|
||||
pp.SetFrom3Const(1)
|
||||
|
||||
if base.Ctxt.Flag_shared {
|
||||
if ppc64.NeedTOCpointer(base.Ctxt) {
|
||||
// When compiling Go into PIC, the function we just
|
||||
// called via pointer might have been implemented in
|
||||
// a separate module and so overwritten the TOC
|
||||
|
@ -1071,7 +1071,7 @@ func (c *ctxt9) aclass(a *obj.Addr) int {
|
||||
}
|
||||
|
||||
case obj.TYPE_BRANCH:
|
||||
if a.Sym != nil && c.ctxt.Flag_dynlink {
|
||||
if a.Sym != nil && c.ctxt.Flag_dynlink && !pfxEnabled {
|
||||
return C_LBRAPIC
|
||||
}
|
||||
return C_SBRA
|
||||
@ -1275,6 +1275,12 @@ func opset(a, b0 obj.As) {
|
||||
oprange[a&obj.AMask] = oprange[b0]
|
||||
}
|
||||
|
||||
// Determine if the build configuration requires a TOC pointer.
|
||||
// It is assumed this always called after buildop.
|
||||
func NeedTOCpointer(ctxt *obj.Link) bool {
|
||||
return !pfxEnabled && ctxt.Flag_shared
|
||||
}
|
||||
|
||||
// Build the opcode table
|
||||
func buildop(ctxt *obj.Link) {
|
||||
// Limit PC-relative prefix instruction usage to supported and tested targets.
|
||||
|
@ -643,8 +643,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
|
||||
q = p
|
||||
|
||||
if c.ctxt.Flag_shared && c.cursym.Name != "runtime.duffzero" && c.cursym.Name != "runtime.duffcopy" {
|
||||
// When compiling Go into PIC, all functions must start
|
||||
if NeedTOCpointer(c.ctxt) && c.cursym.Name != "runtime.duffzero" && c.cursym.Name != "runtime.duffcopy" {
|
||||
// When compiling Go into PIC, without PCrel support, all functions must start
|
||||
// with instructions to load the TOC pointer into r2:
|
||||
//
|
||||
// addis r2, r12, .TOC.-func@ha
|
||||
@ -763,7 +763,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
|
||||
break
|
||||
}
|
||||
|
||||
if c.ctxt.Flag_shared {
|
||||
if NeedTOCpointer(c.ctxt) {
|
||||
q = obj.Appendp(q, c.newprog)
|
||||
q.As = AMOVD
|
||||
q.Pos = p.Pos
|
||||
@ -1289,7 +1289,7 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
|
||||
morestacksym = c.ctxt.Lookup("runtime.morestack")
|
||||
}
|
||||
|
||||
if c.ctxt.Flag_shared {
|
||||
if NeedTOCpointer(c.ctxt) {
|
||||
// In PPC64 PIC code, R2 is used as TOC pointer derived from R12
|
||||
// which is the address of function entry point when entering
|
||||
// the function. We need to preserve R2 across call to morestack.
|
||||
@ -1352,7 +1352,7 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
|
||||
p.To.Sym = morestacksym
|
||||
}
|
||||
|
||||
if c.ctxt.Flag_shared {
|
||||
if NeedTOCpointer(c.ctxt) {
|
||||
// MOVD 8(SP), R2
|
||||
p = obj.Appendp(p, c.newprog)
|
||||
p.As = AMOVD
|
||||
|
@ -137,14 +137,17 @@ func putelfsym(ctxt *Link, x loader.Sym, typ elf.SymType, curbind elf.SymBind) {
|
||||
// externally linking, I don't think this makes a lot of sense.
|
||||
other = int(elf.STV_HIDDEN)
|
||||
}
|
||||
if ctxt.IsPPC64() && typ == elf.STT_FUNC && ldr.AttrShared(x) && ldr.SymName(x) != "runtime.duffzero" && ldr.SymName(x) != "runtime.duffcopy" {
|
||||
// On ppc64 the top three bits of the st_other field indicate how
|
||||
// many instructions separate the global and local entry points. In
|
||||
// our case it is two instructions, indicated by the value 3.
|
||||
// The conditions here match those in preprocess in
|
||||
// cmd/internal/obj/ppc64/obj9.go, which is where the
|
||||
// instructions are inserted.
|
||||
other |= 3 << 5
|
||||
if ctxt.IsPPC64() && typ == elf.STT_FUNC && ldr.AttrShared(x) {
|
||||
// On ppc64 the top three bits of the st_other field indicate how many
|
||||
// bytes separate the global and local entry points. For non-PCrel shared
|
||||
// symbols this is always 8 bytes except for some special functions.
|
||||
hasPCrel := buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
|
||||
|
||||
// This should match the preprocessing behavior in cmd/internal/obj/ppc64/obj9.go
|
||||
// where the distinct global entry is inserted.
|
||||
if !hasPCrel && ldr.SymName(x) != "runtime.duffzero" && ldr.SymName(x) != "runtime.duffcopy" {
|
||||
other |= 3 << 5
|
||||
}
|
||||
}
|
||||
|
||||
// When dynamically linking, we create Symbols by reading the names from
|
||||
|
@ -221,10 +221,10 @@ func genpltstub(ctxt *ld.Link, ldr *loader.Loader, r loader.Reloc, ri int, s loa
|
||||
// An R_PPC64_REL24_NOTOC relocation does not use or maintain
|
||||
// a TOC pointer, and almost always implies a Power10 target.
|
||||
//
|
||||
// For dynamic calls made from a Go object, the shared attribute
|
||||
// indicates a PIC symbol, which requires a TOC pointer be
|
||||
// maintained. Otherwise, a simpler non-PIC stub suffices.
|
||||
if (r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_PPC64_REL24)) || (!ldr.AttrExternal(s) && ldr.AttrShared(s)) {
|
||||
// For dynamic calls made from a Go caller, a TOC relative stub is
|
||||
// always needed when a TOC pointer is maintained (specifically, if
|
||||
// the Go caller is PIC, and cannot use PCrel instructions).
|
||||
if (r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_PPC64_REL24)) || (!ldr.AttrExternal(s) && ldr.AttrShared(s) && !hasPCrel) {
|
||||
stubTypeStr = "_tocrel"
|
||||
stubType = 1
|
||||
} else {
|
||||
@ -318,7 +318,7 @@ func genstubs(ctxt *ld.Link, ldr *loader.Loader) {
|
||||
|
||||
case sym.STEXT:
|
||||
targ := r.Sym()
|
||||
if (ldr.AttrExternal(targ) && ldr.SymLocalentry(targ) <= 1) || (!ldr.AttrExternal(targ) && !ldr.AttrShared(targ)) {
|
||||
if (ldr.AttrExternal(targ) && ldr.SymLocalentry(targ) <= 1) || (!ldr.AttrExternal(targ) && (!ldr.AttrShared(targ) || hasPCrel)) {
|
||||
// This is NOTOC to NOTOC call (st_other is 0 or 1). No call stub is needed.
|
||||
} else {
|
||||
// This is a NOTOC to TOC function. Generate a calling stub.
|
||||
@ -387,10 +387,12 @@ func genaddmoduledata(ctxt *ld.Link, ldr *loader.Loader) {
|
||||
// runtime.addmoduledata(local.moduledata)
|
||||
// }
|
||||
|
||||
// Regenerate TOC from R12 (the address of this function).
|
||||
sz := initfunc.AddSymRef(ctxt.Arch, ctxt.DotTOC[0], 0, objabi.R_ADDRPOWER_PCREL, 8)
|
||||
initfunc.SetUint32(ctxt.Arch, sz-8, 0x3c4c0000) // addis r2, r12, .TOC.-func@ha
|
||||
initfunc.SetUint32(ctxt.Arch, sz-4, 0x38420000) // addi r2, r2, .TOC.-func@l
|
||||
if !hasPCrel {
|
||||
// Regenerate TOC from R12 (the address of this function).
|
||||
sz := initfunc.AddSymRef(ctxt.Arch, ctxt.DotTOC[0], 0, objabi.R_ADDRPOWER_PCREL, 8)
|
||||
initfunc.SetUint32(ctxt.Arch, sz-8, 0x3c4c0000) // addis r2, r12, .TOC.-func@ha
|
||||
initfunc.SetUint32(ctxt.Arch, sz-4, 0x38420000) // addi r2, r2, .TOC.-func@l
|
||||
}
|
||||
|
||||
// This is Go ABI. Stack a frame and save LR.
|
||||
o(OP_MFLR_R0) // mflr r0
|
||||
@ -407,11 +409,11 @@ func genaddmoduledata(ctxt *ld.Link, ldr *loader.Loader) {
|
||||
}
|
||||
|
||||
if !hasPCrel {
|
||||
sz = initfunc.AddSymRef(ctxt.Arch, tgt, 0, objabi.R_ADDRPOWER_GOT, 8)
|
||||
sz := initfunc.AddSymRef(ctxt.Arch, tgt, 0, objabi.R_ADDRPOWER_GOT, 8)
|
||||
initfunc.SetUint32(ctxt.Arch, sz-8, 0x3c620000) // addis r3, r2, local.moduledata@got@ha
|
||||
initfunc.SetUint32(ctxt.Arch, sz-4, 0xe8630000) // ld r3, local.moduledata@got@l(r3)
|
||||
} else {
|
||||
sz = initfunc.AddSymRef(ctxt.Arch, tgt, 0, objabi.R_ADDRPOWER_GOT_PCREL34, 8)
|
||||
sz := initfunc.AddSymRef(ctxt.Arch, tgt, 0, objabi.R_ADDRPOWER_GOT_PCREL34, 8)
|
||||
// Note, this is prefixed instruction. It must not cross a 64B boundary.
|
||||
// It is doubleworld aligned here, so it will never cross (this function is 16B aligned, minimum).
|
||||
initfunc.SetUint32(ctxt.Arch, sz-8, OP_PLD_PFX_PCREL)
|
||||
@ -419,7 +421,7 @@ func genaddmoduledata(ctxt *ld.Link, ldr *loader.Loader) {
|
||||
}
|
||||
|
||||
// Call runtime.addmoduledata
|
||||
sz = initfunc.AddSymRef(ctxt.Arch, addmoduledata, 0, objabi.R_CALLPOWER, 4)
|
||||
sz := initfunc.AddSymRef(ctxt.Arch, addmoduledata, 0, objabi.R_CALLPOWER, 4)
|
||||
initfunc.SetUint32(ctxt.Arch, sz-4, OP_BL) // bl runtime.addmoduledata
|
||||
o(OP_NOP) // nop (for TOC restore)
|
||||
|
||||
@ -995,7 +997,12 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
|
||||
if r.Size != 4 {
|
||||
return false
|
||||
}
|
||||
out.Write64(uint64(elf.R_PPC64_REL24) | uint64(elfsym)<<32)
|
||||
if !hasPCrel {
|
||||
out.Write64(uint64(elf.R_PPC64_REL24) | uint64(elfsym)<<32)
|
||||
} else {
|
||||
// TOC is not used in PCrel compiled Go code.
|
||||
out.Write64(uint64(elf.R_PPC64_REL24_NOTOC) | uint64(elfsym)<<32)
|
||||
}
|
||||
|
||||
}
|
||||
out.Write64(uint64(r.Xadd))
|
||||
@ -1441,10 +1448,10 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
|
||||
|
||||
tgtName := ldr.SymName(rs)
|
||||
|
||||
// If we are linking PIE or shared code, all golang generated object files have an extra 2 instruction prologue
|
||||
// If we are linking PIE or shared code, non-PCrel golang generated object files have an extra 2 instruction prologue
|
||||
// to regenerate the TOC pointer from R12. The exception are two special case functions tested below. Note,
|
||||
// local call offsets for externally generated objects are accounted for when converting into golang relocs.
|
||||
if !ldr.AttrExternal(rs) && ldr.AttrShared(rs) && tgtName != "runtime.duffzero" && tgtName != "runtime.duffcopy" {
|
||||
if !hasPCrel && !ldr.AttrExternal(rs) && ldr.AttrShared(rs) && tgtName != "runtime.duffzero" && tgtName != "runtime.duffcopy" {
|
||||
// Furthermore, only apply the offset if the target looks like the start of a function call.
|
||||
if r.Add() == 0 && ldr.SymType(rs) == sym.STEXT {
|
||||
t += 8
|
||||
|
Loading…
Reference in New Issue
Block a user