From 8bb8eaff626796908938e7f0da102dc51a265237 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 10 Apr 2018 18:40:01 +0000 Subject: [PATCH] Revert "runtime: use fixed TLS offsets on darwin/amd64 and darwin/386" This reverts commit 76e92d1c9e2943de7093af14d58663fa2993e608. Reason for revert: Seems to have broken the darwin/386 builder, the toolchain is barfing on the new inline assembly. Change-Id: Ic83fa3c85148946529c5fd47d1e1669898031ace Reviewed-on: https://go-review.googlesource.com/106155 Reviewed-by: Keith Randall --- src/cmd/link/internal/ld/sym.go | 20 +++---- src/runtime/cgo/gcc_darwin_386.c | 92 +++++++++++++++++++++++++++++- src/runtime/cgo/gcc_darwin_amd64.c | 63 +++++++++++++++++++- src/runtime/sys_darwin_386.s | 11 ++-- src/runtime/sys_darwin_amd64.s | 9 +-- 5 files changed, 167 insertions(+), 28 deletions(-) diff --git a/src/cmd/link/internal/ld/sym.go b/src/cmd/link/internal/ld/sym.go index 7bc57eff8d..6f019de8cc 100644 --- a/src/cmd/link/internal/ld/sym.go +++ b/src/cmd/link/internal/ld/sym.go @@ -113,30 +113,24 @@ func (ctxt *Link) computeTLSOffset() { /* * OS X system constants - offset from 0(GS) to our TLS. + * Explained in src/runtime/cgo/gcc_darwin_*.c. */ case objabi.Hdarwin: switch ctxt.Arch.Family { default: log.Fatalf("unknown thread-local storage offset for darwin/%s", ctxt.Arch.Name) - /* - * For x86, Apple has reserved a slot in the TLS for Go. See issue 23617. - * That slot is at offset 0x30 on amd64, and 0x18 on 386. - * The slot will hold the G pointer. - * These constants should match those in runtime/sys_darwin_{386,amd64}.s - * and runtime/cgo/gcc_darwin_{386,amd64}.c. - */ - case sys.I386: - ctxt.Tlsoffset = 0x18 - - case sys.AMD64: - ctxt.Tlsoffset = 0x30 - case sys.ARM: ctxt.Tlsoffset = 0 // dummy value, not needed + case sys.AMD64: + ctxt.Tlsoffset = 0x8a0 + case sys.ARM64: ctxt.Tlsoffset = 0 // dummy value, not needed + + case sys.I386: + ctxt.Tlsoffset = 0x468 } } diff --git a/src/runtime/cgo/gcc_darwin_386.c b/src/runtime/cgo/gcc_darwin_386.c index c947f23073..7d3c55cd56 100644 --- a/src/runtime/cgo/gcc_darwin_386.c +++ b/src/runtime/cgo/gcc_darwin_386.c @@ -9,6 +9,89 @@ #include "libcgo_unix.h" static void* threadentry(void*); +static pthread_key_t k1; + +#define magic1 (0x23581321U) + +static void +inittls(void) +{ + uint32 x; + pthread_key_t tofree[128], k; + int i, ntofree; + + /* + * Allocate thread-local storage slot for g. + * The key numbers start at 0x100, and we expect to be + * one of the early calls to pthread_key_create, so we + * should be able to get a pretty low number. + * + * In Darwin/386 pthreads, %gs points at the thread + * structure, and each key is an index into the thread-local + * storage array that begins at offset 0x48 within in that structure. + * It may happen that we are not quite the first function to try + * to allocate thread-local storage keys, so instead of depending + * on getting 0x100, we try for 0x108, allocating keys until + * we get the one we want and then freeing the ones we didn't want. + * + * Thus the final offset to use in %gs references is + * 0x48+4*0x108 = 0x468. + * + * The linker and runtime hard-code this constant offset + * from %gs where we expect to find g. + * Known to src/cmd/link/internal/ld/sym.go:/0x468 + * and to src/runtime/sys_darwin_386.s:/0x468 + * + * This is truly disgusting and a bit fragile, but taking care + * of it here protects the rest of the system from damage. + * The alternative would be to use a global variable that + * held the offset and refer to that variable each time we + * need a %gs variable (g). That approach would + * require an extra instruction and memory reference in + * every stack growth prolog and would also require + * rewriting the code that 8c generates for extern registers. + * + * Things get more disgusting on OS X 10.7 Lion. + * The 0x48 base mentioned above is the offset of the tsd + * array within the per-thread structure on Leopard and Snow Leopard. + * On Lion, the base moved a little, so while the math above + * still applies, the base is different. Thus, we cannot + * look for specific key values if we want to build binaries + * that run on both systems. Instead, forget about the + * specific key values and just allocate and initialize per-thread + * storage until we find a key that writes to the memory location + * we want. Then keep that key. + */ + ntofree = 0; + for(;;) { + if(pthread_key_create(&k, nil) != 0) { + fprintf(stderr, "runtime/cgo: pthread_key_create failed\n"); + abort(); + } + pthread_setspecific(k, (void*)magic1); + asm volatile("movl %%gs:0x468, %0" : "=r"(x)); + pthread_setspecific(k, 0); + if(x == magic1) { + k1 = k; + break; + } + if(ntofree >= nelem(tofree)) { + fprintf(stderr, "runtime/cgo: could not obtain pthread_keys\n"); + fprintf(stderr, "\ttried"); + for(i=0; istacklo = (uintptr)&attr - size + 4096; pthread_attr_destroy(&attr); + + inittls(); } @@ -57,9 +142,10 @@ threadentry(void *v) ts = *(ThreadStart*)v; free(v); - // Move the g pointer into the slot reserved in thread local storage. - // Constant must match the one in cmd/link/internal/ld/sym.go. - asm volatile("movq %0, %%gs:0x18" :: "r"(ts.g)); + if (pthread_setspecific(k1, (void*)ts.g) != 0) { + fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n"); + abort(); + } crosscall_386(ts.fn); return nil; diff --git a/src/runtime/cgo/gcc_darwin_amd64.c b/src/runtime/cgo/gcc_darwin_amd64.c index 51410d5026..c57608c675 100644 --- a/src/runtime/cgo/gcc_darwin_amd64.c +++ b/src/runtime/cgo/gcc_darwin_amd64.c @@ -9,6 +9,60 @@ #include "libcgo_unix.h" static void* threadentry(void*); +static pthread_key_t k1; + +#define magic1 (0x23581321345589ULL) + +static void +inittls(void) +{ + uint64 x; + pthread_key_t tofree[128], k; + int i, ntofree; + + /* + * Same logic, code as darwin_386.c:/inittls, except that words + * are 8 bytes long now, and the thread-local storage starts + * at 0x60 on Leopard / Snow Leopard. So the offset is + * 0x60+8*0x108 = 0x8a0. + * + * The linker and runtime hard-code this constant offset + * from %gs where we expect to find g. + * Known to src/cmd/link/internal/ld/sym.go:/0x8a0 + * and to src/runtime/sys_darwin_amd64.s:/0x8a0 + * + * As disgusting as on the 386; same justification. + */ + ntofree = 0; + for(;;) { + if(pthread_key_create(&k, nil) != 0) { + fprintf(stderr, "runtime/cgo: pthread_key_create failed\n"); + abort(); + } + pthread_setspecific(k, (void*)magic1); + asm volatile("movq %%gs:0x8a0, %0" : "=r"(x)); + pthread_setspecific(k, 0); + if(x == magic1) { + k1 = k; + break; + } + if(ntofree >= nelem(tofree)) { + fprintf(stderr, "runtime/cgo: could not obtain pthread_keys\n"); + fprintf(stderr, "\ttried"); + for(i=0; istacklo = (uintptr)&attr - size + 4096; pthread_attr_destroy(&attr); + + inittls(); } @@ -57,9 +113,10 @@ threadentry(void *v) ts = *(ThreadStart*)v; free(v); - // Move the g pointer into the slot reserved in thread local storage. - // Constant must match the one in cmd/link/internal/ld/sym.go. - asm volatile("movq %0, %%gs:0x30" :: "r"(ts.g)); + if (pthread_setspecific(k1, (void*)ts.g) != 0) { + fprintf(stderr, "runtime/cgo: pthread_setspecific failed\n"); + abort(); + } crosscall_amd64(ts.fn); return nil; diff --git a/src/runtime/sys_darwin_386.s b/src/runtime/sys_darwin_386.s index 3831ba5a79..3b5ed44724 100644 --- a/src/runtime/sys_darwin_386.s +++ b/src/runtime/sys_darwin_386.s @@ -525,14 +525,15 @@ TEXT runtime·setldt(SB),NOSPLIT,$32 * When linking against the system libraries, * we use its pthread_create and let it set up %gs * for us. When we do that, the private storage - * we get is not at 0(GS) but at 0x18(GS). - * The linker rewrites 0(TLS) into 0x18(GS) for us. + * we get is not at 0(GS) but at 0x468(GS). + * 8l rewrites 0(TLS) into 0x468(GS) for us. * To accommodate that rewrite, we translate the - * address here so that 0x18(GS) maps to 0(address). + * address and limit here so that 0x468(GS) maps to 0(address). * - * Constant must match the one in cmd/link/internal/ld/sym.go. + * See cgo/gcc_darwin_386.c:/468 for the derivation + * of the constant. */ - SUBL $0x18, BX + SUBL $0x468, BX /* * Must set up as USER_CTHREAD segment because diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s index 7a4f8ba39a..ab57843dfa 100644 --- a/src/runtime/sys_darwin_amd64.s +++ b/src/runtime/sys_darwin_amd64.s @@ -569,10 +569,11 @@ TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0 // set tls base to DI TEXT runtime·settls(SB),NOSPLIT,$32 /* - * Same as in sys_darwin_386.s, but a different constant. - * Constant must match the one in cmd/link/internal/ld/sym.go. - */ - SUBQ $0x30, DI + * Same as in sys_darwin_386.s:/ugliness, different constant. + * See cgo/gcc_darwin_amd64.c for the derivation + * of the constant. + */ + SUBQ $0x8a0, DI MOVL $(0x3000000+3), AX // thread_fast_set_cthread_self - machdep call #3 SYSCALL