1
0
mirror of https://github.com/golang/go synced 2024-11-23 05:50:05 -07:00

all: generate NOTOC shared code on power10/PPC64/linux

An explicit TOC pointer is not needed when building with GOPPC64=power10
on linux/PPC64 as all addressing is PC relative.

Apply changes to the compiler, assembler, and linker to remove R2/TOC
maintenance in these build configurations.

This results in noticeably smaller PIC binaries. For example the size
(in bytes) of the k8s binaries before and with this patch:

	GOFLAGS="-buildmode=pie" \
	FORCE_HOST_GO=y \
	GOPPC64=power10 \
	CGO_CFLAGS="-mcpu=power10 -O2 -g" \
	make all

         apiextensions-apiserver   66060288   64487424   -1572864  -2.38%
                   e2e_node.test  163520856  159850760   -3670096  -2.24%
                        e2e.test  178167304  174890432   -3276872  -1.83%
                          ginkgo   11010048   10747904    -262144  -2.38%
                       go-runner    2162688    2162688          0      0%
                       k8s-tests  170182216  166970880   -3211336  -1.88%
                         kubeadm   52625408   51314688   -1310720  -2.49%
                 kube-aggregator   62849024   61341696   -1507328  -2.39%
                  kube-apiserver  147783680  144375808   -3407872  -2.30%
         kube-controller-manager  131137536  127991808   -3145728  -2.39%
                         kubectl   53608448   52363264   -1245184  -2.32%
                 kubectl-convert   52625408   51314688   -1310720  -2.49%
                         kubelet  120913920  118095872   -2818048  -2.33%
                 kube-log-runner    1900544    1835008     -65536  -3.44%
                        kubemark  119078912  116326400   -2752512  -2.31%
                      kube-proxy   58392576   56885248   -1507328  -2.58%
                  kube-scheduler   60751872   59244544   -1507328  -2.48%
                         mounter    1835008    1769472     -65536  -3.57%
               watch-termination   38076416   37158912    -917504  -2.40%

And text size changes:

         apiextensions-apiserver   30243288   28654116   -1589172  -5.25%
                   e2e_node.test   71132064   67520288   -3611776  -5.07%
                        e2e.test   61843984   58635088   -3208896  -5.18%
                          ginkgo    4975916    4769304    -206612  -4.15%
                       go-runner     896532     858400     -38132  -4.25%
                       k8s-tests   60925792   57752032   -3173760  -5.20%
                         kubeadm   24643240   23404100   -1239140  -5.02%
                 kube-aggregator   28688060   27160976   -1527084  -5.32%
                  kube-apiserver   65627332   62259460   -3367872  -5.13%
         kube-controller-manager   56773844   53706532   -3067312  -5.40%
                         kubectl   24344276   23080640   -1263636  -5.19%
                 kubectl-convert   23733764   22521768   -1211996  -5.10%
                         kubelet   52494580   49720340   -2774240  -5.28%
                 kube-log-runner     787128     753232     -33896  -4.30%
                        kubemark   51576580   48837380   -2739200  -5.31%
                      kube-proxy   26541092   25124080   -1417012  -5.33%
                  kube-scheduler   27448512   25976172   -1472340  -5.36%
                         mounter     744100     712628     -31472  -4.22%
               watch-termination   18047276   17139912    -907364  -5.02%

Change-Id: Ib4872823b06f93861e46a00679b5d4e5e30b538a
Reviewed-on: https://go-review.googlesource.com/c/go/+/495416
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
This commit is contained in:
Paul E. Murphy 2023-01-10 15:18:04 -06:00 committed by Paul Murphy
parent 1524bd1c78
commit 29d5272da8
5 changed files with 46 additions and 30 deletions

View File

@ -1869,7 +1869,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
pp.To.Reg = ppc64.REG_LR
pp.SetFrom3Const(1)
if base.Ctxt.Flag_shared {
if ppc64.NeedTOCpointer(base.Ctxt) {
// When compiling Go into PIC, the function we just
// called via pointer might have been implemented in
// a separate module and so overwritten the TOC

View File

@ -1071,7 +1071,7 @@ func (c *ctxt9) aclass(a *obj.Addr) int {
}
case obj.TYPE_BRANCH:
if a.Sym != nil && c.ctxt.Flag_dynlink {
if a.Sym != nil && c.ctxt.Flag_dynlink && !pfxEnabled {
return C_LBRAPIC
}
return C_SBRA
@ -1275,6 +1275,12 @@ func opset(a, b0 obj.As) {
oprange[a&obj.AMask] = oprange[b0]
}
// Determine if the build configuration requires a TOC pointer.
// It is assumed this always called after buildop.
func NeedTOCpointer(ctxt *obj.Link) bool {
return !pfxEnabled && ctxt.Flag_shared
}
// Build the opcode table
func buildop(ctxt *obj.Link) {
// Limit PC-relative prefix instruction usage to supported and tested targets.

View File

@ -643,8 +643,8 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
q = p
if c.ctxt.Flag_shared && c.cursym.Name != "runtime.duffzero" && c.cursym.Name != "runtime.duffcopy" {
// When compiling Go into PIC, all functions must start
if NeedTOCpointer(c.ctxt) && c.cursym.Name != "runtime.duffzero" && c.cursym.Name != "runtime.duffcopy" {
// When compiling Go into PIC, without PCrel support, all functions must start
// with instructions to load the TOC pointer into r2:
//
// addis r2, r12, .TOC.-func@ha
@ -763,7 +763,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
break
}
if c.ctxt.Flag_shared {
if NeedTOCpointer(c.ctxt) {
q = obj.Appendp(q, c.newprog)
q.As = AMOVD
q.Pos = p.Pos
@ -1289,7 +1289,7 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
morestacksym = c.ctxt.Lookup("runtime.morestack")
}
if c.ctxt.Flag_shared {
if NeedTOCpointer(c.ctxt) {
// In PPC64 PIC code, R2 is used as TOC pointer derived from R12
// which is the address of function entry point when entering
// the function. We need to preserve R2 across call to morestack.
@ -1352,7 +1352,7 @@ func (c *ctxt9) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
p.To.Sym = morestacksym
}
if c.ctxt.Flag_shared {
if NeedTOCpointer(c.ctxt) {
// MOVD 8(SP), R2
p = obj.Appendp(p, c.newprog)
p.As = AMOVD

View File

@ -137,14 +137,17 @@ func putelfsym(ctxt *Link, x loader.Sym, typ elf.SymType, curbind elf.SymBind) {
// externally linking, I don't think this makes a lot of sense.
other = int(elf.STV_HIDDEN)
}
if ctxt.IsPPC64() && typ == elf.STT_FUNC && ldr.AttrShared(x) && ldr.SymName(x) != "runtime.duffzero" && ldr.SymName(x) != "runtime.duffcopy" {
// On ppc64 the top three bits of the st_other field indicate how
// many instructions separate the global and local entry points. In
// our case it is two instructions, indicated by the value 3.
// The conditions here match those in preprocess in
// cmd/internal/obj/ppc64/obj9.go, which is where the
// instructions are inserted.
other |= 3 << 5
if ctxt.IsPPC64() && typ == elf.STT_FUNC && ldr.AttrShared(x) {
// On ppc64 the top three bits of the st_other field indicate how many
// bytes separate the global and local entry points. For non-PCrel shared
// symbols this is always 8 bytes except for some special functions.
hasPCrel := buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
// This should match the preprocessing behavior in cmd/internal/obj/ppc64/obj9.go
// where the distinct global entry is inserted.
if !hasPCrel && ldr.SymName(x) != "runtime.duffzero" && ldr.SymName(x) != "runtime.duffcopy" {
other |= 3 << 5
}
}
// When dynamically linking, we create Symbols by reading the names from

View File

@ -221,10 +221,10 @@ func genpltstub(ctxt *ld.Link, ldr *loader.Loader, r loader.Reloc, ri int, s loa
// An R_PPC64_REL24_NOTOC relocation does not use or maintain
// a TOC pointer, and almost always implies a Power10 target.
//
// For dynamic calls made from a Go object, the shared attribute
// indicates a PIC symbol, which requires a TOC pointer be
// maintained. Otherwise, a simpler non-PIC stub suffices.
if (r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_PPC64_REL24)) || (!ldr.AttrExternal(s) && ldr.AttrShared(s)) {
// For dynamic calls made from a Go caller, a TOC relative stub is
// always needed when a TOC pointer is maintained (specifically, if
// the Go caller is PIC, and cannot use PCrel instructions).
if (r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_PPC64_REL24)) || (!ldr.AttrExternal(s) && ldr.AttrShared(s) && !hasPCrel) {
stubTypeStr = "_tocrel"
stubType = 1
} else {
@ -318,7 +318,7 @@ func genstubs(ctxt *ld.Link, ldr *loader.Loader) {
case sym.STEXT:
targ := r.Sym()
if (ldr.AttrExternal(targ) && ldr.SymLocalentry(targ) <= 1) || (!ldr.AttrExternal(targ) && !ldr.AttrShared(targ)) {
if (ldr.AttrExternal(targ) && ldr.SymLocalentry(targ) <= 1) || (!ldr.AttrExternal(targ) && (!ldr.AttrShared(targ) || hasPCrel)) {
// This is NOTOC to NOTOC call (st_other is 0 or 1). No call stub is needed.
} else {
// This is a NOTOC to TOC function. Generate a calling stub.
@ -387,10 +387,12 @@ func genaddmoduledata(ctxt *ld.Link, ldr *loader.Loader) {
// runtime.addmoduledata(local.moduledata)
// }
// Regenerate TOC from R12 (the address of this function).
sz := initfunc.AddSymRef(ctxt.Arch, ctxt.DotTOC[0], 0, objabi.R_ADDRPOWER_PCREL, 8)
initfunc.SetUint32(ctxt.Arch, sz-8, 0x3c4c0000) // addis r2, r12, .TOC.-func@ha
initfunc.SetUint32(ctxt.Arch, sz-4, 0x38420000) // addi r2, r2, .TOC.-func@l
if !hasPCrel {
// Regenerate TOC from R12 (the address of this function).
sz := initfunc.AddSymRef(ctxt.Arch, ctxt.DotTOC[0], 0, objabi.R_ADDRPOWER_PCREL, 8)
initfunc.SetUint32(ctxt.Arch, sz-8, 0x3c4c0000) // addis r2, r12, .TOC.-func@ha
initfunc.SetUint32(ctxt.Arch, sz-4, 0x38420000) // addi r2, r2, .TOC.-func@l
}
// This is Go ABI. Stack a frame and save LR.
o(OP_MFLR_R0) // mflr r0
@ -407,11 +409,11 @@ func genaddmoduledata(ctxt *ld.Link, ldr *loader.Loader) {
}
if !hasPCrel {
sz = initfunc.AddSymRef(ctxt.Arch, tgt, 0, objabi.R_ADDRPOWER_GOT, 8)
sz := initfunc.AddSymRef(ctxt.Arch, tgt, 0, objabi.R_ADDRPOWER_GOT, 8)
initfunc.SetUint32(ctxt.Arch, sz-8, 0x3c620000) // addis r3, r2, local.moduledata@got@ha
initfunc.SetUint32(ctxt.Arch, sz-4, 0xe8630000) // ld r3, local.moduledata@got@l(r3)
} else {
sz = initfunc.AddSymRef(ctxt.Arch, tgt, 0, objabi.R_ADDRPOWER_GOT_PCREL34, 8)
sz := initfunc.AddSymRef(ctxt.Arch, tgt, 0, objabi.R_ADDRPOWER_GOT_PCREL34, 8)
// Note, this is prefixed instruction. It must not cross a 64B boundary.
// It is doubleworld aligned here, so it will never cross (this function is 16B aligned, minimum).
initfunc.SetUint32(ctxt.Arch, sz-8, OP_PLD_PFX_PCREL)
@ -419,7 +421,7 @@ func genaddmoduledata(ctxt *ld.Link, ldr *loader.Loader) {
}
// Call runtime.addmoduledata
sz = initfunc.AddSymRef(ctxt.Arch, addmoduledata, 0, objabi.R_CALLPOWER, 4)
sz := initfunc.AddSymRef(ctxt.Arch, addmoduledata, 0, objabi.R_CALLPOWER, 4)
initfunc.SetUint32(ctxt.Arch, sz-4, OP_BL) // bl runtime.addmoduledata
o(OP_NOP) // nop (for TOC restore)
@ -995,7 +997,12 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
if r.Size != 4 {
return false
}
out.Write64(uint64(elf.R_PPC64_REL24) | uint64(elfsym)<<32)
if !hasPCrel {
out.Write64(uint64(elf.R_PPC64_REL24) | uint64(elfsym)<<32)
} else {
// TOC is not used in PCrel compiled Go code.
out.Write64(uint64(elf.R_PPC64_REL24_NOTOC) | uint64(elfsym)<<32)
}
}
out.Write64(uint64(r.Xadd))
@ -1441,10 +1448,10 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
tgtName := ldr.SymName(rs)
// If we are linking PIE or shared code, all golang generated object files have an extra 2 instruction prologue
// If we are linking PIE or shared code, non-PCrel golang generated object files have an extra 2 instruction prologue
// to regenerate the TOC pointer from R12. The exception are two special case functions tested below. Note,
// local call offsets for externally generated objects are accounted for when converting into golang relocs.
if !ldr.AttrExternal(rs) && ldr.AttrShared(rs) && tgtName != "runtime.duffzero" && tgtName != "runtime.duffcopy" {
if !hasPCrel && !ldr.AttrExternal(rs) && ldr.AttrShared(rs) && tgtName != "runtime.duffzero" && tgtName != "runtime.duffcopy" {
// Furthermore, only apply the offset if the target looks like the start of a function call.
if r.Add() == 0 && ldr.SymType(rs) == sym.STEXT {
t += 8