mirror of
https://github.com/golang/go
synced 2024-11-17 07:14:45 -07:00
cmd/link: support 32b TLS_LE offsets on PPC64
When using the GCC thread sanitizer, it links in additional code which uses TLS, which causes us to exceed the range of the 16 bit TLS relocation used by statically compiled go code. Rewrite objabi.R_POWER_TLS_LE to handle 32b offsets when linking internally or externally into an ELF binary. The elf relocation translation is changed to generate a pair of R_PPC64_TPREL16_HA/LO relocations instead of a single R_PPC64_TPREL16. Likewise, updating the above exposed some behavioral differences in gnu ld which can rewrite TLS sequences. It expects the sequence to generate a valid TLS address, not offset. This was exposed when compiling PIC code. The proper fix is to generate the full TLS address in the destination register of the "MOVD tlsaddr, $Rx" pseudo-op. This removes the need to insert special objabi.R_POWER_TLS relocations elsewhere. Unfortunately, XCOFF (used by aix) doesn't appear to support 32 bit offsets, so we rewrite this back into a 16b relocation when externally linking a static binary. Fixes #45040 Change-Id: I1ee9afd0b427cd79888032aa1f60d3e265073e1d Reviewed-on: https://go-review.googlesource.com/c/go/+/302209 Run-TryBot: Paul Murphy <murp@ibm.com> Reviewed-by: Cherry Zhang <cherryyz@google.com> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
parent
d948b8633d
commit
3e5bba0a44
@ -229,8 +229,8 @@ var optab = []Optab{
|
||||
{as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
|
||||
{as: AMOVD, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
|
||||
{as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
|
||||
{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 4},
|
||||
{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 8},
|
||||
{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8},
|
||||
{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12},
|
||||
{as: AMOVD, a1: C_TOCADDR, a6: C_REG, type_: 95, size: 8},
|
||||
{as: AMOVD, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
|
||||
{as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
|
||||
@ -2500,18 +2500,6 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
if v != 0 {
|
||||
c.ctxt.Diag("illegal indexed instruction\n%v", p)
|
||||
}
|
||||
if c.ctxt.Flag_shared && r == REG_R13 {
|
||||
rel := obj.Addrel(c.cursym)
|
||||
rel.Off = int32(c.pc)
|
||||
rel.Siz = 4
|
||||
// This (and the matching part in the load case
|
||||
// below) are the only places in the ppc64 toolchain
|
||||
// that knows the name of the tls variable. Possibly
|
||||
// we could add some assembly syntax so that the name
|
||||
// of the variable does not have to be assumed.
|
||||
rel.Sym = c.ctxt.Lookup("runtime.tls_g")
|
||||
rel.Type = objabi.R_POWER_TLS
|
||||
}
|
||||
o1 = AOP_RRR(c.opstorex(p.As), uint32(p.From.Reg), uint32(p.To.Index), uint32(r))
|
||||
} else {
|
||||
if int32(int16(v)) != v {
|
||||
@ -2536,13 +2524,6 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
if v != 0 {
|
||||
c.ctxt.Diag("illegal indexed instruction\n%v", p)
|
||||
}
|
||||
if c.ctxt.Flag_shared && r == REG_R13 {
|
||||
rel := obj.Addrel(c.cursym)
|
||||
rel.Off = int32(c.pc)
|
||||
rel.Siz = 4
|
||||
rel.Sym = c.ctxt.Lookup("runtime.tls_g")
|
||||
rel.Type = objabi.R_POWER_TLS
|
||||
}
|
||||
o1 = AOP_RRR(c.oploadx(p.As), uint32(p.To.Reg), uint32(p.From.Index), uint32(r))
|
||||
} else {
|
||||
if int32(int16(v)) != v {
|
||||
@ -3511,10 +3492,11 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
if p.From.Offset != 0 {
|
||||
c.ctxt.Diag("invalid offset against tls var %v", p)
|
||||
}
|
||||
o1 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), REGZERO, 0)
|
||||
o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
|
||||
o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
|
||||
rel := obj.Addrel(c.cursym)
|
||||
rel.Off = int32(c.pc)
|
||||
rel.Siz = 4
|
||||
rel.Siz = 8
|
||||
rel.Sym = p.From.Sym
|
||||
rel.Type = objabi.R_POWER_TLS_LE
|
||||
|
||||
@ -3524,11 +3506,17 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
}
|
||||
o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
|
||||
o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
|
||||
o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
|
||||
rel := obj.Addrel(c.cursym)
|
||||
rel.Off = int32(c.pc)
|
||||
rel.Siz = 8
|
||||
rel.Sym = p.From.Sym
|
||||
rel.Type = objabi.R_POWER_TLS_IE
|
||||
rel = obj.Addrel(c.cursym)
|
||||
rel.Off = int32(c.pc) + 8
|
||||
rel.Siz = 4
|
||||
rel.Sym = p.From.Sym
|
||||
rel.Type = objabi.R_POWER_TLS
|
||||
|
||||
case 81:
|
||||
v := c.vregoff(&p.To)
|
||||
|
@ -167,8 +167,8 @@ const (
|
||||
|
||||
// R_POWER_TLS_LE is used to implement the "local exec" model for tls
|
||||
// access. It resolves to the offset of the thread-local symbol from the
|
||||
// thread pointer (R13) and inserts this value into the low 16 bits of an
|
||||
// instruction word.
|
||||
// thread pointer (R13) and is split against a pair of instructions to
|
||||
// support a 32 bit displacement.
|
||||
R_POWER_TLS_LE
|
||||
|
||||
// R_POWER_TLS_IE is used to implement the "initial exec" model for tls access. It
|
||||
@ -178,10 +178,12 @@ const (
|
||||
// symbol from the thread pointer (R13)).
|
||||
R_POWER_TLS_IE
|
||||
|
||||
// R_POWER_TLS marks an X-form instruction such as "MOVD 0(R13)(R31*1), g" as
|
||||
// accessing a particular thread-local symbol. It does not affect code generation
|
||||
// but is used by the system linker when relaxing "initial exec" model code to
|
||||
// "local exec" model code.
|
||||
// R_POWER_TLS marks an X-form instruction such as "ADD R3,R13,R4" as completing
|
||||
// a sequence of GOT-relative relocations to compute a TLS address. This can be
|
||||
// used by the system linker to to rewrite the GOT-relative TLS relocation into a
|
||||
// simpler thread-pointer relative relocation. See table 3.26 and 3.28 in the
|
||||
// ppc64 elfv2 1.4 ABI on this transformation. Likewise, the second argument
|
||||
// (usually called RB in X-form instructions) is assumed to be R13.
|
||||
R_POWER_TLS
|
||||
|
||||
// R_ADDRPOWER_DS is similar to R_ADDRPOWER above, but assumes the second
|
||||
|
@ -418,6 +418,7 @@ func xcoffreloc1(arch *sys.Arch, out *ld.OutBuf, ldr *loader.Loader, s loader.Sy
|
||||
emitReloc(ld.XCOFF_R_TOCU|(0x0F<<8), 2)
|
||||
emitReloc(ld.XCOFF_R_TOCL|(0x0F<<8), 6)
|
||||
case objabi.R_POWER_TLS_LE:
|
||||
// This only supports 16b relocations. It is fixed up in archreloc.
|
||||
emitReloc(ld.XCOFF_R_TLS_LE|0x0F<<8, 2)
|
||||
case objabi.R_CALLPOWER:
|
||||
if r.Size != 4 {
|
||||
@ -458,7 +459,10 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym,
|
||||
case objabi.R_POWER_TLS:
|
||||
out.Write64(uint64(elf.R_PPC64_TLS) | uint64(elfsym)<<32)
|
||||
case objabi.R_POWER_TLS_LE:
|
||||
out.Write64(uint64(elf.R_PPC64_TPREL16) | uint64(elfsym)<<32)
|
||||
out.Write64(uint64(elf.R_PPC64_TPREL16_HA) | uint64(elfsym)<<32)
|
||||
out.Write64(uint64(r.Xadd))
|
||||
out.Write64(uint64(sectoff + 4))
|
||||
out.Write64(uint64(elf.R_PPC64_TPREL16_LO) | uint64(elfsym)<<32)
|
||||
case objabi.R_POWER_TLS_IE:
|
||||
out.Write64(uint64(elf.R_PPC64_GOT_TPREL16_HA) | uint64(elfsym)<<32)
|
||||
out.Write64(uint64(r.Xadd))
|
||||
@ -797,11 +801,25 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
|
||||
if !target.IsAIX() {
|
||||
return val, nExtReloc, false
|
||||
}
|
||||
case objabi.R_POWER_TLS, objabi.R_POWER_TLS_LE, objabi.R_POWER_TLS_IE:
|
||||
// check Outer is nil, Type is TLSBSS?
|
||||
case objabi.R_POWER_TLS:
|
||||
nExtReloc = 1
|
||||
if rt == objabi.R_POWER_TLS_IE {
|
||||
nExtReloc = 2 // need two ELF relocations, see elfreloc1
|
||||
return val, nExtReloc, true
|
||||
case objabi.R_POWER_TLS_LE, objabi.R_POWER_TLS_IE:
|
||||
if target.IsAIX() && rt == objabi.R_POWER_TLS_LE {
|
||||
// Fixup val, an addis/addi pair of instructions, which generate a 32b displacement
|
||||
// from the threadpointer (R13), into a 16b relocation. XCOFF only supports 16b
|
||||
// TLS LE relocations. Likewise, verify this is an addis/addi sequence.
|
||||
const expectedOpcodes = 0x3C00000038000000
|
||||
const expectedOpmasks = 0xFC000000FC000000
|
||||
if uint64(val)&expectedOpmasks != expectedOpcodes {
|
||||
ldr.Errorf(s, "relocation for %s+%d is not an addis/addi pair: %16x", ldr.SymName(rs), r.Off(), uint64(val))
|
||||
}
|
||||
nval := (int64(uint32(0x380d0000)) | val&0x03e00000) << 32 // addi rX, r13, $0
|
||||
nval |= int64(0x60000000) // nop
|
||||
val = nval
|
||||
nExtReloc = 1
|
||||
} else {
|
||||
nExtReloc = 2
|
||||
}
|
||||
return val, nExtReloc, true
|
||||
case objabi.R_ADDRPOWER,
|
||||
@ -855,10 +873,26 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
|
||||
// the TLS.
|
||||
v -= 0x800
|
||||
}
|
||||
if int64(int16(v)) != v {
|
||||
|
||||
var o1, o2 uint32
|
||||
if int64(int32(v)) != v {
|
||||
ldr.Errorf(s, "TLS offset out of range %d", v)
|
||||
}
|
||||
return (val &^ 0xffff) | (v & 0xffff), nExtReloc, true
|
||||
if target.IsBigEndian() {
|
||||
o1 = uint32(val >> 32)
|
||||
o2 = uint32(val)
|
||||
} else {
|
||||
o1 = uint32(val)
|
||||
o2 = uint32(val >> 32)
|
||||
}
|
||||
|
||||
o1 |= uint32(((v + 0x8000) >> 16) & 0xFFFF)
|
||||
o2 |= uint32(v & 0xFFFF)
|
||||
|
||||
if target.IsBigEndian() {
|
||||
return int64(o1)<<32 | int64(o2), nExtReloc, true
|
||||
}
|
||||
return int64(o2)<<32 | int64(o1), nExtReloc, true
|
||||
}
|
||||
|
||||
return val, nExtReloc, false
|
||||
|
@ -36,9 +36,9 @@
|
||||
// racecalladdr.
|
||||
//
|
||||
// The sequence used to get the race ctx:
|
||||
// MOVD runtime·tls_g(SB), R10 // offset to TLS
|
||||
// MOVD 0(R13)(R10*1), g // R13=TLS for this thread, g = R30
|
||||
// MOVD g_racectx(g), R3 // racectx == ThreadState
|
||||
// MOVD runtime·tls_g(SB), R10 // Address of TLS variable
|
||||
// MOVD 0(R10), g // g = R30
|
||||
// MOVD g_racectx(g), R3 // racectx == ThreadState
|
||||
|
||||
// func runtime·RaceRead(addr uintptr)
|
||||
// Called from instrumented Go code
|
||||
@ -137,7 +137,7 @@ TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
|
||||
// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
|
||||
TEXT racecalladdr<>(SB), NOSPLIT, $0-0
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
MOVD g_racectx(g), R3 // goroutine context
|
||||
// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
|
||||
MOVD runtime·racearenastart(SB), R9
|
||||
@ -173,7 +173,7 @@ TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
|
||||
// R11 = caller's return address
|
||||
TEXT racefuncenter<>(SB), NOSPLIT, $0-0
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
MOVD g_racectx(g), R3 // goroutine racectx aka *ThreadState
|
||||
MOVD R8, R4 // caller pc set by caller in R8
|
||||
// void __tsan_func_enter(ThreadState *thr, void *pc);
|
||||
@ -185,7 +185,7 @@ TEXT racefuncenter<>(SB), NOSPLIT, $0-0
|
||||
// Called from Go instrumented code.
|
||||
TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
MOVD g_racectx(g), R3 // goroutine racectx aka *ThreadState
|
||||
// void __tsan_func_exit(ThreadState *thr);
|
||||
MOVD $__tsan_func_exit(SB), R8
|
||||
@ -380,7 +380,7 @@ racecallatomic_data:
|
||||
racecallatomic_ok:
|
||||
// Addr is within the good range, call the atomic function.
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
MOVD g_racectx(g), R3 // goroutine racectx aka *ThreadState
|
||||
MOVD R8, R5 // pc is the function called
|
||||
MOVD (R1), R4 // caller pc from stack
|
||||
@ -394,7 +394,7 @@ racecallatomic_ignore:
|
||||
MOVD R6, R17 // save the original arg list addr
|
||||
MOVD $__tsan_go_ignore_sync_begin(SB), R8 // func addr to call
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
MOVD g_racectx(g), R3 // goroutine context
|
||||
BL racecall<>(SB)
|
||||
MOVD R15, R8 // restore the original function
|
||||
@ -402,7 +402,7 @@ racecallatomic_ignore:
|
||||
// Call the atomic function.
|
||||
// racecall will call LLVM race code which might clobber r30 (g)
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
|
||||
MOVD g_racectx(g), R3
|
||||
MOVD R8, R4 // pc being called same TODO as above
|
||||
@ -434,7 +434,7 @@ TEXT racecall<>(SB), NOSPLIT, $0-0
|
||||
MOVD R10, 16(R1) // C ABI
|
||||
// Get info from the current goroutine
|
||||
MOVD runtime·tls_g(SB), R10 // g offset in TLS
|
||||
MOVD 0(R13)(R10*1), g // R13 = current TLS
|
||||
MOVD 0(R10), g
|
||||
MOVD g_m(g), R7 // m for g
|
||||
MOVD R1, R16 // callee-saved, preserved across C call
|
||||
MOVD m_g0(R7), R10 // g0 for m
|
||||
@ -448,7 +448,7 @@ call:
|
||||
XOR R0, R0 // clear R0 on return from Clang
|
||||
MOVD R16, R1 // restore R1; R16 nonvol in Clang
|
||||
MOVD runtime·tls_g(SB), R10 // find correct g
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
MOVD 16(R1), R10 // LR was saved away, restore for return
|
||||
MOVD R10, LR
|
||||
RET
|
||||
@ -469,7 +469,7 @@ TEXT runtime·racecallbackthunk(SB), NOSPLIT, $-8
|
||||
// g0 TODO: Don't modify g here since R30 is nonvolatile
|
||||
MOVD g, R9
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
MOVD g_m(g), R3
|
||||
MOVD m_p(R3), R3
|
||||
MOVD p_raceprocctx(R3), R3
|
||||
@ -527,7 +527,7 @@ rest:
|
||||
MOVD R4, FIXED_FRAME+8(R1)
|
||||
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
|
||||
MOVD g_m(g), R7
|
||||
MOVD m_g0(R7), R8
|
||||
@ -540,7 +540,7 @@ rest:
|
||||
|
||||
// All registers are clobbered after Go code, reload.
|
||||
MOVD runtime·tls_g(SB), R10
|
||||
MOVD 0(R13)(R10*1), g
|
||||
MOVD 0(R10), g
|
||||
|
||||
MOVD g_m(g), R7
|
||||
MOVD m_curg(R7), g // restore g = m->curg
|
||||
|
@ -29,7 +29,7 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
|
||||
BEQ nocgo
|
||||
#endif
|
||||
MOVD runtime·tls_g(SB), R31
|
||||
MOVD g, 0(R13)(R31*1)
|
||||
MOVD g, 0(R31)
|
||||
|
||||
nocgo:
|
||||
RET
|
||||
@ -45,7 +45,7 @@ nocgo:
|
||||
// NOTE: _cgo_topofstack assumes this only clobbers g (R30), and R31.
|
||||
TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
|
||||
MOVD runtime·tls_g(SB), R31
|
||||
MOVD 0(R13)(R31*1), g
|
||||
MOVD 0(R31), g
|
||||
RET
|
||||
|
||||
GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
|
||||
|
Loading…
Reference in New Issue
Block a user