From 9c9090eb1da540c7d757df0c60423319a28759d3 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 15 Apr 2022 17:06:48 -0500 Subject: [PATCH] cmd/link: generate PPC64 ABI register save/restore functions if needed They are usually needed when internally linking gcc code compiled with -Os. These are typically generated by ld or gold, but are missing when linking internally. The PPC64 ELF ABI describes a set of functions to save/restore non-volatile, callee-save registers using R1/R0/R12: _savegpr0_n: Save Rn-R31 relative to R1, save LR (in R0), return _restgpr0_n: Restore Rn-R31 from R1, and return to saved LR _savefpr_n: Save Fn-F31 based on R1, and save LR (in R0), return _restfpr_n: Restore Fn-F31 from R1, and return to 16(R1) _savegpr1_n: Save Rn-R31 based on R12, return _restgpr1_n: Restore Rn-R31 based on R12, return _savevr_m: Save VRm-VR31 based on R0, R12 is scratch, return _restvr_m: Restore VRm-VR31 based on R0, R12 is scratch, return m is a value 20<=m<=31 n is a value 14<=n<=31 Add several new functions similar to those suggested by the PPC64 ELFv2 ABI. And update the linker to scan external relocs for these calls, and redirect them to runtime.elf_+offset in runtime/asm_ppc64x.go. Similarly, code which generates plt stubs is moved into a dedicated function. This avoids an extra scan of relocs. fixes #52336 Change-Id: I2f0f8b5b081a7b294dff5c92b4b1db8eba9a9400 Reviewed-on: https://go-review.googlesource.com/c/go/+/400796 Reviewed-by: Cherry Mui Reviewed-by: David Chase Run-TryBot: Paul Murphy TryBot-Result: Gopher Robot --- .../script/test_ppc64_linker_funcs.txt | 49 +++++ src/cmd/link/internal/ppc64/asm.go | 182 ++++++++++++---- src/runtime/asm_ppc64x.s | 203 ++++++++++++++++++ 3 files changed, 392 insertions(+), 42 deletions(-) create mode 100644 src/cmd/go/testdata/script/test_ppc64_linker_funcs.txt diff --git a/src/cmd/go/testdata/script/test_ppc64_linker_funcs.txt b/src/cmd/go/testdata/script/test_ppc64_linker_funcs.txt new file mode 100644 index 00000000000..a33f9df724d --- /dev/null +++ b/src/cmd/go/testdata/script/test_ppc64_linker_funcs.txt @@ -0,0 +1,49 @@ +# Tests that the linker implements the PPC64 ELFv2 ABI +# register save and restore functions as defined in +# section 2.3.3.1 of the PPC64 ELFv2 ABI when linking +# external objects most likely compiled with gcc's +# -Os option. +# +# Verifies golang.org/issue/52366 for linux/ppc64le +[!linux] skip +[!gc] skip +[!cgo] skip +[!ppc64le] skip + +go build -ldflags='-linkmode=internal' +exec ./abitest +stdout success + +-- go.mod -- +module abitest + +-- abitest.go -- +package main + +/* +#cgo CFLAGS: -Os + +int foo_fpr() { + asm volatile("":::"fr31","fr30","fr29","fr28"); +} +int foo_gpr0() { + asm volatile("":::"r30","r29","r28"); +} +int foo_gpr1() { + asm volatile("":::"fr31", "fr30","fr29","fr28","r30","r29","r28"); +} +int foo_vr() { + asm volatile("":::"v31","v30","v29","v28"); +} +*/ +import "C" + +import "fmt" + +func main() { + C.foo_fpr() + C.foo_gpr0() + C.foo_gpr1() + C.foo_vr() + fmt.Println("success") +} diff --git a/src/cmd/link/internal/ppc64/asm.go b/src/cmd/link/internal/ppc64/asm.go index 73c2718a336..5d5fbe2a977 100644 --- a/src/cmd/link/internal/ppc64/asm.go +++ b/src/cmd/link/internal/ppc64/asm.go @@ -40,10 +40,11 @@ import ( "encoding/binary" "fmt" "log" + "strconv" "strings" ) -func genplt(ctxt *ld.Link, ldr *loader.Loader) { +func genpltstub(ctxt *ld.Link, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (sym loader.Sym, firstUse bool) { // The ppc64 ABI PLT has similar concepts to other // architectures, but is laid out quite differently. When we // see an R_PPC64_REL24 relocation to a dynamic symbol @@ -92,53 +93,82 @@ func genplt(ctxt *ld.Link, ldr *loader.Loader) { // // This assumes "case 1" from the ABI, where the caller needs // us to save and restore the TOC pointer. + + // Reserve PLT entry and generate symbol + // resolver + addpltsym(ctxt, ldr, r.Sym()) + + // Generate call stub. Important to note that we're looking + // up the stub using the same version as the parent symbol (s), + // needed so that symtoc() will select the right .TOC. symbol + // when processing the stub. In older versions of the linker + // this was done by setting stub.Outer to the parent, but + // if the stub has the right version initially this is not needed. + n := fmt.Sprintf("%s.%s", ldr.SymName(s), ldr.SymName(r.Sym())) + stub := ldr.CreateSymForUpdate(n, ldr.SymVersion(s)) + firstUse = stub.Size() == 0 + if firstUse { + gencallstub(ctxt, ldr, 1, stub, r.Sym()) + } + + // Update the relocation to use the call stub + r.SetSym(stub.Sym()) + + // Make the symbol writeable so we can fixup toc. + su := ldr.MakeSymbolUpdater(s) + su.MakeWritable() + p := su.Data() + + // Check for toc restore slot (a nop), and replace with toc restore. + var nop uint32 + if len(p) >= int(r.Off()+8) { + nop = ctxt.Arch.ByteOrder.Uint32(p[r.Off()+4:]) + } + if nop != 0x60000000 { + ldr.Errorf(s, "Symbol %s is missing toc restoration slot at offset %d", ldr.SymName(s), r.Off()+4) + } + const o1 = 0xe8410018 // ld r2,24(r1) + ctxt.Arch.ByteOrder.PutUint32(p[r.Off()+4:], o1) + + return stub.Sym(), firstUse +} + +// Scan relocs and generate PLT stubs and generate/fixup ABI defined functions created by the linker +func genstubs(ctxt *ld.Link, ldr *loader.Loader) { var stubs []loader.Sym + var abifuncs []loader.Sym for _, s := range ctxt.Textp { relocs := ldr.Relocs(s) for i := 0; i < relocs.Count(); i++ { - r := relocs.At(i) - if r.Type() != objabi.ElfRelocOffset+objabi.RelocType(elf.R_PPC64_REL24) || ldr.SymType(r.Sym()) != sym.SDYNIMPORT { - continue + if r := relocs.At(i); r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_PPC64_REL24) { + switch ldr.SymType(r.Sym()) { + case sym.SDYNIMPORT: + // This call goes throught the PLT, generate and call through a PLT stub. + if sym, firstUse := genpltstub(ctxt, ldr, r, s); firstUse { + stubs = append(stubs, sym) + } + + case sym.SXREF: + // Is this an ELF ABI defined function which is (in practice) + // generated by the linker to save/restore callee save registers? + // These are defined similarly for both PPC64 ELF and ELFv2. + targName := ldr.SymName(r.Sym()) + if strings.HasPrefix(targName, "_save") || strings.HasPrefix(targName, "_rest") { + if sym, firstUse := rewriteABIFuncReloc(ctxt, ldr, targName, r); firstUse { + abifuncs = append(abifuncs, sym) + } + } + } } - - // Reserve PLT entry and generate symbol - // resolver - addpltsym(ctxt, ldr, r.Sym()) - - // Generate call stub. Important to note that we're looking - // up the stub using the same version as the parent symbol (s), - // needed so that symtoc() will select the right .TOC. symbol - // when processing the stub. In older versions of the linker - // this was done by setting stub.Outer to the parent, but - // if the stub has the right version initially this is not needed. - n := fmt.Sprintf("%s.%s", ldr.SymName(s), ldr.SymName(r.Sym())) - stub := ldr.CreateSymForUpdate(n, ldr.SymVersion(s)) - if stub.Size() == 0 { - stubs = append(stubs, stub.Sym()) - gencallstub(ctxt, ldr, 1, stub, r.Sym()) - } - - // Update the relocation to use the call stub - r.SetSym(stub.Sym()) - - // Make the symbol writeable so we can fixup toc. - su := ldr.MakeSymbolUpdater(s) - su.MakeWritable() - p := su.Data() - - // Check for toc restore slot (a nop), and replace with toc restore. - var nop uint32 - if len(p) >= int(r.Off()+8) { - nop = ctxt.Arch.ByteOrder.Uint32(p[r.Off()+4:]) - } - if nop != 0x60000000 { - ldr.Errorf(s, "Symbol %s is missing toc restoration slot at offset %d", ldr.SymName(s), r.Off()+4) - } - const o1 = 0xe8410018 // ld r2,24(r1) - ctxt.Arch.ByteOrder.PutUint32(p[r.Off()+4:], o1) } } - // Put call stubs at the beginning (instead of the end). + + // Append any usage of the go versions of ELF save/restore + // functions to the end of the callstub list to minimize + // chances a trampoline might be needed. + stubs = append(stubs, abifuncs...) + + // Put stubs at the beginning (instead of the end). // So when resolving the relocations to calls to the stubs, // the addresses are known and trampolines can be inserted // when necessary. @@ -202,13 +232,74 @@ func genaddmoduledata(ctxt *ld.Link, ldr *loader.Loader) { o(0x4e800020) } +// Rewrite ELF (v1 or v2) calls to _savegpr0_n, _savegpr1_n, _savefpr_n, _restfpr_n, _savevr_m, or +// _restvr_m (14<=n<=31, 20<=m<=31). Redirect them to runtime.elf_restgpr0+(n-14)*4, +// runtime.elf_restvr+(m-20)*8, and similar. +// +// These functions are defined in the ELFv2 ABI (generated when using gcc -Os option) to save and +// restore callee-saved registers (as defined in the PPC64 ELF ABIs) from registers n or m to 31 of +// the named type. R12 and R0 are sometimes used in exceptional ways described in the ABI. +// +// Final note, this is only needed when linking internally. The external linker will generate these +// functions if they are used. +func rewriteABIFuncReloc(ctxt *ld.Link, ldr *loader.Loader, tname string, r loader.Reloc) (sym loader.Sym, firstUse bool) { + s := strings.Split(tname, "_") + // A valid call will split like {"", "savegpr0", "20"} + if len(s) != 3 { + return 0, false // Not an abi func. + } + minReg := 14 // _savegpr0_{n}, _savegpr1_{n}, _savefpr_{n}, 14 <= n <= 31 + offMul := 4 // 1 instruction per register op. + switch s[1] { + case "savegpr0", "savegpr1", "savefpr": + case "restgpr0", "restgpr1", "restfpr": + case "savevr", "restvr": + minReg = 20 // _savevr_{n} or _restvr_{n}, 20 <= n <= 31 + offMul = 8 // 2 instructions per register op. + default: + return 0, false // Not an abi func + } + n, e := strconv.Atoi(s[2]) + if e != nil || n < minReg || n > 31 || r.Add() != 0 { + return 0, false // Invalid register number, or non-zero addend. Not an abi func. + } + + // tname is a valid relocation to an ABI defined register save/restore function. Re-relocate + // them to a go version of these functions in runtime/asm_ppc64x.s + ts := ldr.LookupOrCreateSym("runtime.elf_"+s[1], 0) + r.SetSym(ts) + r.SetAdd(int64((n - minReg) * offMul)) + firstUse = !ldr.AttrReachable(ts) + if firstUse { + ldr.SetAttrReachable(ts, true) + // This function only becomes reachable now. It has been dropped from + // the text section (it was unreachable until now), it needs included. + // + // Similarly, TOC regeneration should not happen for these functions, + // remove it from this save/restore function. + if ldr.AttrShared(ts) { + sb := ldr.MakeSymbolUpdater(ts) + sb.SetData(sb.Data()[8:]) + sb.SetSize(sb.Size() - 8) + relocs := sb.Relocs() + // Only one PCREL reloc to .TOC. should be present. + if relocs.Count() != 1 { + log.Fatalf("Unexpected number of relocs in %s\n", ldr.SymName(ts)) + } + sb.ResetRelocs() + + } + } + return ts, firstUse +} + func gentext(ctxt *ld.Link, ldr *loader.Loader) { if ctxt.DynlinkingGo() { genaddmoduledata(ctxt, ldr) } if ctxt.LinkMode == ld.LinkInternal { - genplt(ctxt, ldr) + genstubs(ctxt, ldr) } } @@ -863,6 +954,13 @@ func gentramp(ctxt *ld.Link, ldr *loader.Loader, tramp *loader.SymbolBuilder, ta o1 = uint32(0x3c000000) | 12<<21 // lis r12,targetaddr hi o2 = uint32(0x38000000) | 12<<21 | 12<<16 // addi r12,r12,targetaddr lo + // ELFv2 save/restore functions use R0/R12 in special ways, therefore trampolines + // as generated here will not always work correctly. + if strings.HasPrefix(ldr.SymName(target), "runtime.elf_") { + log.Fatalf("Internal linker does not support trampolines to ELFv2 ABI"+ + " register save/restore function %s", ldr.SymName(target)) + } + t := ldr.SymValue(target) if t == 0 || r2Valid(ctxt) || ctxt.IsExternal() { // Target address is unknown, generate relocations diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 1d292b4e602..ec17dc9ce35 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -1051,3 +1051,206 @@ TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-16 MOVD R5, R3 MOVD R6, R4 JMP runtime·goPanicSliceConvert(SB) + +// These functions are used when internal linking cgo with external +// objects compiled with the -Os on gcc. They reduce prologue/epilogue +// size by deferring preservation of callee-save registers to a shared +// function. These are defined in PPC64 ELFv2 2.3.3 (but also present +// in ELFv1) +// +// These appear unused, but the linker will redirect calls to functions +// like _savegpr0_14 or _restgpr1_14 to runtime.elf_savegpr0 or +// runtime.elf_restgpr1 with an appropriate offset based on the number +// register operations required when linking external objects which +// make these calls. For GPR/FPR saves, the minimum register value is +// 14, for VR it is 20. +// +// These are only used when linking such cgo code internally. Note, R12 +// and R0 may be used in different ways than regular ELF compliant +// functions. +TEXT runtime·elf_savegpr0(SB),NOSPLIT|NOFRAME,$0 + // R0 holds the LR of the caller's caller, R1 holds save location + MOVD R14, -144(R1) + MOVD R15, -136(R1) + MOVD R16, -128(R1) + MOVD R17, -120(R1) + MOVD R18, -112(R1) + MOVD R19, -104(R1) + MOVD R20, -96(R1) + MOVD R21, -88(R1) + MOVD R22, -80(R1) + MOVD R23, -72(R1) + MOVD R24, -64(R1) + MOVD R25, -56(R1) + MOVD R26, -48(R1) + MOVD R27, -40(R1) + MOVD R28, -32(R1) + MOVD R29, -24(R1) + MOVD g, -16(R1) + MOVD R31, -8(R1) + MOVD R0, 16(R1) + RET +TEXT runtime·elf_restgpr0(SB),NOSPLIT|NOFRAME,$0 + // R1 holds save location. This returns to the LR saved on stack (bypassing the caller) + MOVD -144(R1), R14 + MOVD -136(R1), R15 + MOVD -128(R1), R16 + MOVD -120(R1), R17 + MOVD -112(R1), R18 + MOVD -104(R1), R19 + MOVD -96(R1), R20 + MOVD -88(R1), R21 + MOVD -80(R1), R22 + MOVD -72(R1), R23 + MOVD -64(R1), R24 + MOVD -56(R1), R25 + MOVD -48(R1), R26 + MOVD -40(R1), R27 + MOVD -32(R1), R28 + MOVD -24(R1), R29 + MOVD -16(R1), g + MOVD -8(R1), R31 + MOVD 16(R1), R0 // Load and return to saved LR + MOVD R0, LR + RET +TEXT runtime·elf_savegpr1(SB),NOSPLIT|NOFRAME,$0 + // R12 holds the save location + MOVD R14, -144(R12) + MOVD R15, -136(R12) + MOVD R16, -128(R12) + MOVD R17, -120(R12) + MOVD R18, -112(R12) + MOVD R19, -104(R12) + MOVD R20, -96(R12) + MOVD R21, -88(R12) + MOVD R22, -80(R12) + MOVD R23, -72(R12) + MOVD R24, -64(R12) + MOVD R25, -56(R12) + MOVD R26, -48(R12) + MOVD R27, -40(R12) + MOVD R28, -32(R12) + MOVD R29, -24(R12) + MOVD g, -16(R12) + MOVD R31, -8(R12) + RET +TEXT runtime·elf_restgpr1(SB),NOSPLIT|NOFRAME,$0 + // R12 holds the save location + MOVD -144(R12), R14 + MOVD -136(R12), R15 + MOVD -128(R12), R16 + MOVD -120(R12), R17 + MOVD -112(R12), R18 + MOVD -104(R12), R19 + MOVD -96(R12), R20 + MOVD -88(R12), R21 + MOVD -80(R12), R22 + MOVD -72(R12), R23 + MOVD -64(R12), R24 + MOVD -56(R12), R25 + MOVD -48(R12), R26 + MOVD -40(R12), R27 + MOVD -32(R12), R28 + MOVD -24(R12), R29 + MOVD -16(R12), g + MOVD -8(R12), R31 + RET +TEXT runtime·elf_savefpr(SB),NOSPLIT|NOFRAME,$0 + // R0 holds the LR of the caller's caller, R1 holds save location + FMOVD F14, -144(R1) + FMOVD F15, -136(R1) + FMOVD F16, -128(R1) + FMOVD F17, -120(R1) + FMOVD F18, -112(R1) + FMOVD F19, -104(R1) + FMOVD F20, -96(R1) + FMOVD F21, -88(R1) + FMOVD F22, -80(R1) + FMOVD F23, -72(R1) + FMOVD F24, -64(R1) + FMOVD F25, -56(R1) + FMOVD F26, -48(R1) + FMOVD F27, -40(R1) + FMOVD F28, -32(R1) + FMOVD F29, -24(R1) + FMOVD F30, -16(R1) + FMOVD F31, -8(R1) + MOVD R0, 16(R1) + RET +TEXT runtime·elf_restfpr(SB),NOSPLIT|NOFRAME,$0 + // R1 holds save location. This returns to the LR saved on stack (bypassing the caller) + FMOVD -144(R1), F14 + FMOVD -136(R1), F15 + FMOVD -128(R1), F16 + FMOVD -120(R1), F17 + FMOVD -112(R1), F18 + FMOVD -104(R1), F19 + FMOVD -96(R1), F20 + FMOVD -88(R1), F21 + FMOVD -80(R1), F22 + FMOVD -72(R1), F23 + FMOVD -64(R1), F24 + FMOVD -56(R1), F25 + FMOVD -48(R1), F26 + FMOVD -40(R1), F27 + FMOVD -32(R1), F28 + FMOVD -24(R1), F29 + FMOVD -16(R1), F30 + FMOVD -8(R1), F31 + MOVD 16(R1), R0 // Load and return to saved LR + MOVD R0, LR + RET +TEXT runtime·elf_savevr(SB),NOSPLIT|NOFRAME,$0 + // R0 holds the save location, R12 is clobbered + MOVD $-192, R12 + STVX V20, (R0+R12) + MOVD $-176, R12 + STVX V21, (R0+R12) + MOVD $-160, R12 + STVX V22, (R0+R12) + MOVD $-144, R12 + STVX V23, (R0+R12) + MOVD $-128, R12 + STVX V24, (R0+R12) + MOVD $-112, R12 + STVX V25, (R0+R12) + MOVD $-96, R12 + STVX V26, (R0+R12) + MOVD $-80, R12 + STVX V27, (R0+R12) + MOVD $-64, R12 + STVX V28, (R0+R12) + MOVD $-48, R12 + STVX V29, (R0+R12) + MOVD $-32, R12 + STVX V30, (R0+R12) + MOVD $-16, R12 + STVX V31, (R0+R12) + RET +TEXT runtime·elf_restvr(SB),NOSPLIT|NOFRAME,$0 + // R0 holds the save location, R12 is clobbered + MOVD $-192, R12 + LVX (R0+R12), V20 + MOVD $-176, R12 + LVX (R0+R12), V21 + MOVD $-160, R12 + LVX (R0+R12), V22 + MOVD $-144, R12 + LVX (R0+R12), V23 + MOVD $-128, R12 + LVX (R0+R12), V24 + MOVD $-112, R12 + LVX (R0+R12), V25 + MOVD $-96, R12 + LVX (R0+R12), V26 + MOVD $-80, R12 + LVX (R0+R12), V27 + MOVD $-64, R12 + LVX (R0+R12), V28 + MOVD $-48, R12 + LVX (R0+R12), V29 + MOVD $-32, R12 + LVX (R0+R12), V30 + MOVD $-16, R12 + LVX (R0+R12), V31 + RET