mirror of
https://github.com/golang/go
synced 2024-11-17 12:14:47 -07:00
runtime,cmd/internal/obj/x86: use TEB TLS slots on windows/amd64
This CL redesign how we get the TLS pointer on windows/amd64. We were previously reading it from the [TEB] arbitrary data slot, located at 0x28(GS), which can only hold 1 TLS pointer. With this CL, we will read the TLS pointer from the TEB TLS slot array, located at 0x1480(GS). The TLS slot array can hold multiple TLS pointers, up to 64, so multiple Go runtimes running on the same thread can coexists with different TLS. Each new TLS slot has to be allocated via [TlsAlloc], which returns the slot index. This index can then be used to get the slot offset from GS with the following formula: 0x1480 + index*8 The slot index is fixed per Go runtime, so we can store it in runtime.tls_g and use it latter on to read/update the TLS pointer. Loading the TLS pointer requires the following asm instructions: MOVQ runtime.tls_g, AX MOVQ AX(GS), AX Notice that this approach is also implemented on windows/arm64. [TEB]: https://en.wikipedia.org/wiki/Win32_Thread_Information_Block [TlsAlloc]: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-tlsalloc Updates #22192 Change-Id: Idea7119fd76a3cd083979a4d57ed64b552fa101b Reviewed-on: https://go-review.googlesource.com/c/go/+/431775 Reviewed-by: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Quim Muntal <quimmuntal@gmail.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
This commit is contained in:
parent
0f0aa5d8a6
commit
da564d0006
@ -3612,7 +3612,7 @@ func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj
|
|||||||
goto bad
|
goto bad
|
||||||
}
|
}
|
||||||
|
|
||||||
if a.Index != REG_NONE && a.Index != REG_TLS {
|
if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
|
||||||
base := int(a.Reg)
|
base := int(a.Reg)
|
||||||
switch a.Name {
|
switch a.Name {
|
||||||
case obj.NAME_EXTERN,
|
case obj.NAME_EXTERN,
|
||||||
@ -3724,7 +3724,8 @@ func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj
|
|||||||
}
|
}
|
||||||
|
|
||||||
if REG_AX <= base && base <= REG_R15 {
|
if REG_AX <= base && base <= REG_R15 {
|
||||||
if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid {
|
if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
|
||||||
|
!(ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64) {
|
||||||
rel = obj.Reloc{}
|
rel = obj.Reloc{}
|
||||||
rel.Type = objabi.R_TLS_LE
|
rel.Type = objabi.R_TLS_LE
|
||||||
rel.Siz = 4
|
rel.Siz = 4
|
||||||
@ -5205,21 +5206,6 @@ func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
|
|||||||
ab.Put2(0x64, // FS
|
ab.Put2(0x64, // FS
|
||||||
0x8B)
|
0x8B)
|
||||||
ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
|
ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
|
||||||
|
|
||||||
case objabi.Hwindows:
|
|
||||||
// Windows TLS base is always 0x28(GS).
|
|
||||||
pp.From = p.From
|
|
||||||
|
|
||||||
pp.From.Type = obj.TYPE_MEM
|
|
||||||
pp.From.Name = obj.NAME_NONE
|
|
||||||
pp.From.Reg = REG_GS
|
|
||||||
pp.From.Offset = 0x28
|
|
||||||
pp.From.Index = REG_NONE
|
|
||||||
pp.From.Scale = 0
|
|
||||||
ab.rexflag |= Pw
|
|
||||||
ab.Put2(0x65, // GS
|
|
||||||
0x8B)
|
|
||||||
ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
@ -158,16 +158,33 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Android uses a tls offset determined at runtime. Rewrite
|
// Android and Win64 use a tls offset determined at runtime. Rewrite
|
||||||
// MOVQ TLS, BX
|
// MOVQ TLS, BX
|
||||||
// to
|
// to
|
||||||
// MOVQ runtime.tls_g(SB), BX
|
// MOVQ runtime.tls_g(SB), BX
|
||||||
if isAndroid && (p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
|
if (isAndroid || (ctxt.Headtype == objabi.Hwindows && ctxt.Arch.Family == sys.AMD64)) &&
|
||||||
|
(p.As == AMOVQ || p.As == AMOVL) && p.From.Type == obj.TYPE_REG && p.From.Reg == REG_TLS && p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_R15 {
|
||||||
p.From.Type = obj.TYPE_MEM
|
p.From.Type = obj.TYPE_MEM
|
||||||
p.From.Name = obj.NAME_EXTERN
|
p.From.Name = obj.NAME_EXTERN
|
||||||
p.From.Reg = REG_NONE
|
p.From.Reg = REG_NONE
|
||||||
p.From.Sym = ctxt.Lookup("runtime.tls_g")
|
p.From.Sym = ctxt.Lookup("runtime.tls_g")
|
||||||
p.From.Index = REG_NONE
|
p.From.Index = REG_NONE
|
||||||
|
if ctxt.Headtype == objabi.Hwindows {
|
||||||
|
// Win64 requires an additional indirection
|
||||||
|
// to retrieve the TLS pointer,
|
||||||
|
// as runtime.tls_g contains the TLS offset from GS.
|
||||||
|
// add
|
||||||
|
// MOVQ 0(BX)(GS*1), BX
|
||||||
|
q := obj.Appendp(p, newprog)
|
||||||
|
q.As = p.As
|
||||||
|
q.From = obj.Addr{}
|
||||||
|
q.From.Type = obj.TYPE_MEM
|
||||||
|
q.From.Reg = p.To.Reg
|
||||||
|
q.From.Index = REG_GS
|
||||||
|
q.From.Scale = 1
|
||||||
|
q.From.Offset = 0
|
||||||
|
q.To = p.To
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Remove.
|
// TODO: Remove.
|
||||||
|
@ -201,16 +201,16 @@ nocpuinfo:
|
|||||||
JZ needtls
|
JZ needtls
|
||||||
// arg 1: g0, already in DI
|
// arg 1: g0, already in DI
|
||||||
MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
|
MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
|
||||||
|
MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
|
||||||
|
MOVQ $0, CX
|
||||||
#ifdef GOOS_android
|
#ifdef GOOS_android
|
||||||
MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
|
MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
|
||||||
// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
|
// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
|
||||||
// Compensate for tls_g (+16).
|
// Compensate for tls_g (+16).
|
||||||
MOVQ -16(TLS), CX
|
MOVQ -16(TLS), CX
|
||||||
#else
|
|
||||||
MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
|
|
||||||
MOVQ $0, CX
|
|
||||||
#endif
|
#endif
|
||||||
#ifdef GOOS_windows
|
#ifdef GOOS_windows
|
||||||
|
MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
|
||||||
// Adjust for the Win64 calling convention.
|
// Adjust for the Win64 calling convention.
|
||||||
MOVQ CX, R9 // arg 4
|
MOVQ CX, R9 // arg 4
|
||||||
MOVQ DX, R8 // arg 3
|
MOVQ DX, R8 // arg 3
|
||||||
@ -251,6 +251,10 @@ needtls:
|
|||||||
JMP ok
|
JMP ok
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef GOOS_windows
|
||||||
|
CALL runtime·wintls(SB)
|
||||||
|
#endif
|
||||||
|
|
||||||
LEAQ runtime·m0+m_tls(SB), DI
|
LEAQ runtime·m0+m_tls(SB), DI
|
||||||
CALL runtime·settls(SB)
|
CALL runtime·settls(SB)
|
||||||
|
|
||||||
@ -2026,6 +2030,9 @@ TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
|
|||||||
DATA runtime·tls_g+0(SB)/8, $16
|
DATA runtime·tls_g+0(SB)/8, $16
|
||||||
GLOBL runtime·tls_g+0(SB), NOPTR, $8
|
GLOBL runtime·tls_g+0(SB), NOPTR, $8
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef GOOS_windows
|
||||||
|
GLOBL runtime·tls_g+0(SB), NOPTR, $8
|
||||||
|
#endif
|
||||||
|
|
||||||
// The compiler and assembler's -spectre=ret mode rewrites
|
// The compiler and assembler's -spectre=ret mode rewrites
|
||||||
// all indirect CALL AX / JMP AX instructions to be
|
// all indirect CALL AX / JMP AX instructions to be
|
||||||
|
@ -13,11 +13,13 @@
|
|||||||
|
|
||||||
static void threadentry(void*);
|
static void threadentry(void*);
|
||||||
static void (*setg_gcc)(void*);
|
static void (*setg_gcc)(void*);
|
||||||
|
static DWORD *tls_g;
|
||||||
|
|
||||||
void
|
void
|
||||||
x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
|
x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
|
||||||
{
|
{
|
||||||
setg_gcc = setg;
|
setg_gcc = setg;
|
||||||
|
tls_g = (DWORD *)tlsg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -41,8 +43,8 @@ threadentry(void *v)
|
|||||||
* Set specific keys in thread local storage.
|
* Set specific keys in thread local storage.
|
||||||
*/
|
*/
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movq %0, %%gs:0x28\n" // MOVL tls0, 0x28(GS)
|
"movq %0, %%gs:0(%1)\n" // MOVL tls0, 0(tls_g)(GS)
|
||||||
:: "r"(ts.tls)
|
:: "r"(ts.tls), "r"(*tls_g)
|
||||||
);
|
);
|
||||||
|
|
||||||
crosscall_amd64(ts.fn, setg_gcc, (void*)ts.g);
|
crosscall_amd64(ts.fn, setg_gcc, (void*)ts.g);
|
||||||
|
@ -8,6 +8,9 @@
|
|||||||
#include "time_windows.h"
|
#include "time_windows.h"
|
||||||
#include "cgo/abi_amd64.h"
|
#include "cgo/abi_amd64.h"
|
||||||
|
|
||||||
|
// Offsets into Thread Environment Block (pointer in GS)
|
||||||
|
#define TEB_TlsSlots 0x1480
|
||||||
|
|
||||||
// void runtime·asmstdcall(void *c);
|
// void runtime·asmstdcall(void *c);
|
||||||
TEXT runtime·asmstdcall(SB),NOSPLIT|NOFRAME,$0
|
TEXT runtime·asmstdcall(SB),NOSPLIT|NOFRAME,$0
|
||||||
// asmcgocall will put first argument into CX.
|
// asmcgocall will put first argument into CX.
|
||||||
@ -303,10 +306,10 @@ TEXT runtime·tstart_stdcall(SB),NOSPLIT,$0
|
|||||||
MOVQ AX, g_stackguard1(DX)
|
MOVQ AX, g_stackguard1(DX)
|
||||||
|
|
||||||
// Set up tls.
|
// Set up tls.
|
||||||
LEAQ m_tls(CX), SI
|
LEAQ m_tls(CX), DI
|
||||||
MOVQ SI, 0x28(GS)
|
|
||||||
MOVQ CX, g_m(DX)
|
MOVQ CX, g_m(DX)
|
||||||
MOVQ DX, g(SI)
|
MOVQ DX, g(DI)
|
||||||
|
CALL runtime·settls(SB) // clobbers CX
|
||||||
|
|
||||||
CALL runtime·stackcheck(SB) // clobbers AX,CX
|
CALL runtime·stackcheck(SB) // clobbers AX,CX
|
||||||
CALL runtime·mstart(SB)
|
CALL runtime·mstart(SB)
|
||||||
@ -318,7 +321,8 @@ TEXT runtime·tstart_stdcall(SB),NOSPLIT,$0
|
|||||||
|
|
||||||
// set tls base to DI
|
// set tls base to DI
|
||||||
TEXT runtime·settls(SB),NOSPLIT,$0
|
TEXT runtime·settls(SB),NOSPLIT,$0
|
||||||
MOVQ DI, 0x28(GS)
|
MOVQ runtime·tls_g(SB), CX
|
||||||
|
MOVQ DI, 0(CX)(GS)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// Runs on OS stack.
|
// Runs on OS stack.
|
||||||
@ -404,3 +408,32 @@ TEXT runtime·osSetupTLS(SB),NOSPLIT,$0-8
|
|||||||
LEAQ m_tls(AX), DI
|
LEAQ m_tls(AX), DI
|
||||||
CALL runtime·settls(SB)
|
CALL runtime·settls(SB)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
// This is called from rt0_go, which runs on the system stack
|
||||||
|
// using the initial stack allocated by the OS.
|
||||||
|
TEXT runtime·wintls(SB),NOSPLIT|NOFRAME,$0
|
||||||
|
// Allocate a TLS slot to hold g across calls to external code
|
||||||
|
MOVQ SP, AX
|
||||||
|
ANDQ $~15, SP // alignment as per Windows requirement
|
||||||
|
SUBQ $48, SP // room for SP and 4 args as per Windows requirement
|
||||||
|
// plus one extra word to keep stack 16 bytes aligned
|
||||||
|
MOVQ AX, 32(SP)
|
||||||
|
MOVQ runtime·_TlsAlloc(SB), AX
|
||||||
|
CALL AX
|
||||||
|
MOVQ 32(SP), SP
|
||||||
|
|
||||||
|
MOVQ AX, CX // TLS index
|
||||||
|
|
||||||
|
// Assert that slot is less than 64 so we can use _TEB->TlsSlots
|
||||||
|
CMPQ CX, $64
|
||||||
|
JB ok
|
||||||
|
CALL runtime·abort(SB)
|
||||||
|
ok:
|
||||||
|
// Convert the TLS index at CX into
|
||||||
|
// an offset from TEB_TlsSlots.
|
||||||
|
SHLQ $3, CX
|
||||||
|
|
||||||
|
// Save offset from TLS into tls_g.
|
||||||
|
ADDQ $TEB_TlsSlots, CX
|
||||||
|
MOVQ CX, runtime·tls_g(SB)
|
||||||
|
RET
|
||||||
|
Loading…
Reference in New Issue
Block a user