From 80d9106487e451f5de5d4c4e5de97002c33a0363 Mon Sep 17 00:00:00 2001 From: Michael Hudson-Doyle Date: Wed, 28 Oct 2015 15:27:51 +1300 Subject: [PATCH] cmd/internal/obj, cmd/link: support inital-exec TLS on arm64 Change-Id: Iaf9159a68fa395245bc20ccb4a2a377f89371a7e Reviewed-on: https://go-review.googlesource.com/13996 Reviewed-by: David Crawshaw --- src/cmd/internal/obj/arm64/a.out.go | 11 +++++++++- src/cmd/internal/obj/arm64/anames7.go | 3 ++- src/cmd/internal/obj/arm64/asm7.go | 30 ++++++++++++++++++++++----- src/cmd/internal/obj/link.go | 5 +++++ src/cmd/link/internal/arm64/asm.go | 9 +++++++- src/cmd/link/internal/ld/elf.go | 14 +++++++------ 6 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index 534a6d0a6d..06c4ea552d 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -321,7 +321,16 @@ const ( C_LOREG C_ADDR // TODO(aram): explain difference from C_VCONADDR - C_TLS // TLS var, i.e. memory address containing offset for the var + + // TLS "var" in local exec mode: will become a constant offset from + // thread local base that is ultimately chosen by the program linker. + C_TLS_LE + + // TLS "var" in initial exec mode: will become a memory address (chosen + // by the program linker) that the dynamic linker will fill with the + // offset from the thread local base. + C_TLS_IE + C_ROFF // register offset (including register extended) C_GOK diff --git a/src/cmd/internal/obj/arm64/anames7.go b/src/cmd/internal/obj/arm64/anames7.go index 8ea42fd5b6..f9df74ff89 100644 --- a/src/cmd/internal/obj/arm64/anames7.go +++ b/src/cmd/internal/obj/arm64/anames7.go @@ -55,7 +55,8 @@ var cnames7 = []string{ "UOREG64K", "LOREG", "ADDR", - "TLS", + "TLS_LE", + "TLS_IE", "ROFF", "GOK", "TEXTSIZE", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 49060600ea..1fd8982a98 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -270,7 +270,8 @@ var optab = []Optab{ {AMOVH, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0}, {AMOVW, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0}, {AMOVD, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0}, - {AMOVD, C_TLS, C_NONE, C_REG, 69, 4, 0, 0, 0}, + {AMOVD, C_TLS_LE, C_NONE, C_REG, 69, 4, 0, 0, 0}, + {AMOVD, C_TLS_IE, C_NONE, C_REG, 70, 8, 0, 0, 0}, {AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0}, {AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0}, {AMADD, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0}, @@ -970,7 +971,11 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int { ctxt.Instoffset = a.Offset if a.Sym != nil { // use relocation if a.Sym.Type == obj.STLSBSS { - return C_TLS + if ctxt.Flag_shared != 0 { + return C_TLS_IE + } else { + return C_TLS_LE + } } return C_ADDR } @@ -1045,10 +1050,12 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int { case obj.NAME_EXTERN, obj.NAME_STATIC: - s := a.Sym - if s == nil { + if a.Sym == nil { break } + if a.Sym.Type == obj.STLSBSS { + ctxt.Diag("taking address of TLS variable is not supported") + } ctxt.Instoffset = a.Offset return C_VCONADDR @@ -2757,7 +2764,7 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { rel.Add = p.From.Offset rel.Type = obj.R_ADDRARM64 - case 69: /* movd $tlsvar, reg -> movz reg, 0 + reloc */ + case 69: /* LE model movd $tlsvar, reg -> movz reg, 0 + reloc */ o1 = opirr(ctxt, AMOVZ) o1 |= uint32(p.To.Reg & 31) rel := obj.Addrel(ctxt.Cursym) @@ -2769,6 +2776,19 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { ctxt.Diag("invalid offset on MOVW $tlsvar") } + case 70: /* IE model movd $tlsvar, reg -> adrp REGTMP, 0; ldr reg, [REGTMP, #0] + relocs */ + o1 = ADR(1, 0, REGTMP) + o2 = olsr12u(ctxt, int32(opldr12(ctxt, AMOVD)), 0, REGTMP, int(p.To.Reg)) + rel := obj.Addrel(ctxt.Cursym) + rel.Off = int32(ctxt.Pc) + rel.Siz = 8 + rel.Sym = p.From.Sym + rel.Add = 0 + rel.Type = obj.R_ARM64_TLS_IE + if p.From.Offset != 0 { + ctxt.Diag("invalid offset on MOVW $tlsvar") + } + // This is supposed to be something that stops execution. // It's not supposed to be reached, ever, but if it is, we'd // like to be able to tell how we got there. Assemble as diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index cddcdcef3b..6ca34f28d3 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -446,6 +446,11 @@ const ( // local base to the thread local variable defined by the referenced (thread // local) symbol. Error if the offset does not fit into 16 bits. R_ARM64_TLS_LE + + // Relocates an ADRP; LD64 instruction sequence to load the offset between + // the thread local base and the thread local variable defined by the + // referenced (thread local) symbol from the GOT. + R_ARM64_TLS_IE ) type Auto struct { diff --git a/src/cmd/link/internal/arm64/asm.go b/src/cmd/link/internal/arm64/asm.go index 3d4d8c623d..58d5236398 100644 --- a/src/cmd/link/internal/arm64/asm.go +++ b/src/cmd/link/internal/arm64/asm.go @@ -76,6 +76,12 @@ func elfreloc1(r *ld.Reloc, sectoff int64) int { case obj.R_ARM64_TLS_LE: ld.Thearch.Vput(ld.R_AARCH64_TLSLE_MOVW_TPREL_G0 | uint64(elfsym)<<32) + case obj.R_ARM64_TLS_IE: + ld.Thearch.Vput(ld.R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 | uint64(elfsym)<<32) + ld.Thearch.Vput(uint64(r.Xadd)) + ld.Thearch.Vput(uint64(sectoff + 4)) + ld.Thearch.Vput(ld.R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC | uint64(elfsym)<<32) + case obj.R_CALLARM64: if r.Siz != 4 { return -1 @@ -229,7 +235,8 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int { return 0 case obj.R_CALLARM64, - obj.R_ARM64_TLS_LE: + obj.R_ARM64_TLS_LE, + obj.R_ARM64_TLS_IE: r.Done = 0 r.Xsym = r.Sym r.Xadd = r.Add diff --git a/src/cmd/link/internal/ld/elf.go b/src/cmd/link/internal/ld/elf.go index d3baf05d37..16c669e806 100644 --- a/src/cmd/link/internal/ld/elf.go +++ b/src/cmd/link/internal/ld/elf.go @@ -351,12 +351,14 @@ const ( R_X86_64_TPOFF32 = 23 R_X86_64_COUNT = 24 - R_AARCH64_ABS64 = 257 - R_AARCH64_ABS32 = 258 - R_AARCH64_CALL26 = 283 - R_AARCH64_ADR_PREL_PG_HI21 = 275 - R_AARCH64_ADD_ABS_LO12_NC = 277 - R_AARCH64_TLSLE_MOVW_TPREL_G0 = 547 + R_AARCH64_ABS64 = 257 + R_AARCH64_ABS32 = 258 + R_AARCH64_CALL26 = 283 + R_AARCH64_ADR_PREL_PG_HI21 = 275 + R_AARCH64_ADD_ABS_LO12_NC = 277 + R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 = 541 + R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 542 + R_AARCH64_TLSLE_MOVW_TPREL_G0 = 547 R_ALPHA_NONE = 0 R_ALPHA_REFLONG = 1