1
0
mirror of https://github.com/golang/go synced 2024-10-05 00:11:21 -06:00
go/src/pkg/runtime/sys_darwin_386.s

520 lines
12 KiB
ArmAsm
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// System calls and other sys.stuff for 386, Darwin
// See http://fxr.watson.org/fxr/source/bsd/kern/syscalls.c?v=xnu-1228
// or /usr/include/sys/syscall.h (on a Mac) for system call numbers.
#include "zasm_GOOS_GOARCH.h"
// Exit the entire program (like C exit)
TEXT runtime·exit(SB),7,$0
MOVL $1, AX
INT $0x80
MOVL $0xf1, 0xf1 // crash
RET
// Exit this OS thread (like pthread_exit, which eventually
// calls __bsdthread_terminate).
TEXT runtime·exit1(SB),7,$0
MOVL $361, AX
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·open(SB),7,$0
MOVL $5, AX
INT $0x80
RET
TEXT runtime·close(SB),7,$0
MOVL $6, AX
INT $0x80
RET
TEXT runtime·read(SB),7,$0
MOVL $3, AX
INT $0x80
RET
TEXT runtime·write(SB),7,$0
MOVL $4, AX
INT $0x80
RET
TEXT runtime·raisesigpipe(SB),7,$8
get_tls(CX)
MOVL m(CX), DX
MOVL m_procid(DX), DX
MOVL DX, 0(SP) // thread_port
MOVL $13, 4(SP) // signal: SIGPIPE
MOVL $328, AX // __pthread_kill
INT $0x80
RET
TEXT runtime·mmap(SB),7,$0
MOVL $197, AX
INT $0x80
RET
TEXT runtime·madvise(SB),7,$0
MOVL $75, AX
INT $0x80
// ignore failure - maybe pages are locked
RET
TEXT runtime·munmap(SB),7,$0
MOVL $73, AX
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·setitimer(SB),7,$0
MOVL $83, AX
INT $0x80
RET
// OS X comm page time offsets
// http://www.opensource.apple.com/source/xnu/xnu-1699.26.8/osfmk/i386/cpu_capabilities.h
#define cpu_capabilities 0x20
#define nt_tsc_base 0x50
#define nt_scale 0x58
#define nt_shift 0x5c
#define nt_ns_base 0x60
#define nt_generation 0x68
#define gtod_generation 0x6c
#define gtod_ns_base 0x70
#define gtod_sec_base 0x78
// called from assembly
// 64-bit unix nanoseconds returned in DX:AX.
// I'd much rather write this in C but we need
// assembly for the 96-bit multiply and RDTSC.
TEXT runtime·now(SB),7,$40
MOVL $0xffff0000, BP /* comm page base */
// Test for slow CPU. If so, the math is completely
// different, and unimplemented here, so use the
// system call.
MOVL cpu_capabilities(BP), AX
TESTL $0x4000, AX
JNZ systime
// Loop trying to take a consistent snapshot
// of the time parameters.
timeloop:
MOVL gtod_generation(BP), BX
TESTL BX, BX
JZ systime
MOVL nt_generation(BP), CX
TESTL CX, CX
JZ timeloop
RDTSC
MOVL nt_tsc_base(BP), SI
MOVL (nt_tsc_base+4)(BP), DI
MOVL SI, 0(SP)
MOVL DI, 4(SP)
MOVL nt_scale(BP), SI
MOVL SI, 8(SP)
MOVL nt_ns_base(BP), SI
MOVL (nt_ns_base+4)(BP), DI
MOVL SI, 12(SP)
MOVL DI, 16(SP)
CMPL nt_generation(BP), CX
JNE timeloop
MOVL gtod_ns_base(BP), SI
MOVL (gtod_ns_base+4)(BP), DI
MOVL SI, 20(SP)
MOVL DI, 24(SP)
MOVL gtod_sec_base(BP), SI
MOVL (gtod_sec_base+4)(BP), DI
MOVL SI, 28(SP)
MOVL DI, 32(SP)
CMPL gtod_generation(BP), BX
JNE timeloop
// Gathered all the data we need. Compute time.
// ((tsc - nt_tsc_base) * nt_scale) >> 32 + nt_ns_base - gtod_ns_base + gtod_sec_base*1e9
// The multiply and shift extracts the top 64 bits of the 96-bit product.
SUBL 0(SP), AX // DX:AX = (tsc - nt_tsc_base)
SBBL 4(SP), DX
// We have x = tsc - nt_tsc_base - DX:AX to be
// multiplied by y = nt_scale = 8(SP), keeping the top 64 bits of the 96-bit product.
// x*y = (x&0xffffffff)*y + (x&0xffffffff00000000)*y
// (x*y)>>32 = ((x&0xffffffff)*y)>>32 + (x>>32)*y
MOVL DX, CX // SI = (x&0xffffffff)*y >> 32
MOVL $0, DX
MULL 8(SP)
MOVL DX, SI
MOVL CX, AX // DX:AX = (x>>32)*y
MOVL $0, DX
MULL 8(SP)
ADDL SI, AX // DX:AX += (x&0xffffffff)*y >> 32
ADCL $0, DX
// DX:AX is now ((tsc - nt_tsc_base) * nt_scale) >> 32.
ADDL 12(SP), AX // DX:AX += nt_ns_base
ADCL 16(SP), DX
SUBL 20(SP), AX // DX:AX -= gtod_ns_base
SBBL 24(SP), DX
MOVL AX, SI // DI:SI = DX:AX
MOVL DX, DI
MOVL 28(SP), AX // DX:AX = gtod_sec_base*1e9
MOVL 32(SP), DX
MOVL $1000000000, CX
MULL CX
ADDL SI, AX // DX:AX += DI:SI
ADCL DI, DX
RET
systime:
// Fall back to system call (usually first call in this thread)
LEAL 12(SP), AX // must be non-nil, unused
MOVL AX, 4(SP)
MOVL $0, 8(SP) // time zone pointer
MOVL $116, AX
INT $0x80
// sec is in AX, usec in DX
// convert to DX:AX nsec
MOVL DX, BX
MOVL $1000000000, CX
MULL CX
IMULL $1000, BX
ADDL BX, AX
ADCL $0, DX
RET
// func now() (sec int64, nsec int32)
TEXT time·now(SB),7,$0
CALL runtime·now(SB)
MOVL $1000000000, CX
DIVL CX
MOVL AX, sec+0(FP)
MOVL $0, sec+4(FP)
MOVL DX, nsec+8(FP)
RET
// int64 nanotime(void) so really
// void nanotime(int64 *nsec)
TEXT runtime·nanotime(SB),7,$0
CALL runtime·now(SB)
MOVL ret+0(FP), DI
MOVL AX, 0(DI)
MOVL DX, 4(DI)
RET
TEXT runtime·sigprocmask(SB),7,$0
MOVL $329, AX // pthread_sigmask (on OS X, sigprocmask==entire process)
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·sigaction(SB),7,$0
MOVL $46, AX
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
// Sigtramp's job is to call the actual signal handler.
// It is called with the following arguments on the stack:
// 0(FP) "return address" - ignored
// 4(FP) actual handler
runtime: discard SIGPROF delivered to non-Go threads. Signal handlers are global resources but many language environments (Go, C++ at Google, etc) assume they have sole ownership of a particular handler. Signal handlers in mixed-language applications must therefore be robust against unexpected delivery of certain signals, such as SIGPROF. The default Go signal handler runtime·sigtramp assumes that it will never be called on a non-Go thread, but this assumption is violated by when linking in C++ code that spawns threads. Specifically, the handler asserts the thread has an associated "m" (Go scheduler). This CL is a very simple workaround: discard SIGPROF delivered to non-Go threads. runtime.badsignal(int32) now receives the signal number; if it returns without panicking (e.g. sig==SIGPROF) the signal is discarded. I don't think there is any really satisfactory solution to the problem of signal-based profiling in a mixed-language application. It's not only the issue of handler clobbering, but also that a C++ SIGPROF handler called in a Go thread can't unwind the Go stack (and vice versa). The best we can hope for is not crashing. Note: - I've ported this to all POSIX platforms, except ARM-linux which already ignores unexpected signals on m-less threads. - I've avoided tail-calling runtime.badsignal because AFAICT the 6a/6l don't support it. - I've avoided hoisting 'push sig' (common to both function calls) because it makes the code harder to read. - Fixed an (apparently incorrect?) docstring. R=iant, rsc, minux.ma CC=golang-dev https://golang.org/cl/6498057
2012-09-04 12:40:49 -06:00
// 8(FP) signal number
// 12(FP) siginfo style
// 16(FP) siginfo
// 20(FP) context
TEXT runtime·sigtramp(SB),7,$40
get_tls(CX)
// check that m exists
MOVL m(CX), BP
CMPL BP, $0
runtime: discard SIGPROF delivered to non-Go threads. Signal handlers are global resources but many language environments (Go, C++ at Google, etc) assume they have sole ownership of a particular handler. Signal handlers in mixed-language applications must therefore be robust against unexpected delivery of certain signals, such as SIGPROF. The default Go signal handler runtime·sigtramp assumes that it will never be called on a non-Go thread, but this assumption is violated by when linking in C++ code that spawns threads. Specifically, the handler asserts the thread has an associated "m" (Go scheduler). This CL is a very simple workaround: discard SIGPROF delivered to non-Go threads. runtime.badsignal(int32) now receives the signal number; if it returns without panicking (e.g. sig==SIGPROF) the signal is discarded. I don't think there is any really satisfactory solution to the problem of signal-based profiling in a mixed-language application. It's not only the issue of handler clobbering, but also that a C++ SIGPROF handler called in a Go thread can't unwind the Go stack (and vice versa). The best we can hope for is not crashing. Note: - I've ported this to all POSIX platforms, except ARM-linux which already ignores unexpected signals on m-less threads. - I've avoided tail-calling runtime.badsignal because AFAICT the 6a/6l don't support it. - I've avoided hoisting 'push sig' (common to both function calls) because it makes the code harder to read. - Fixed an (apparently incorrect?) docstring. R=iant, rsc, minux.ma CC=golang-dev https://golang.org/cl/6498057
2012-09-04 12:40:49 -06:00
JNE 5(PC)
MOVL sig+8(FP), BX
MOVL BX, 0(SP)
CALL runtime·badsignal(SB)
runtime: discard SIGPROF delivered to non-Go threads. Signal handlers are global resources but many language environments (Go, C++ at Google, etc) assume they have sole ownership of a particular handler. Signal handlers in mixed-language applications must therefore be robust against unexpected delivery of certain signals, such as SIGPROF. The default Go signal handler runtime·sigtramp assumes that it will never be called on a non-Go thread, but this assumption is violated by when linking in C++ code that spawns threads. Specifically, the handler asserts the thread has an associated "m" (Go scheduler). This CL is a very simple workaround: discard SIGPROF delivered to non-Go threads. runtime.badsignal(int32) now receives the signal number; if it returns without panicking (e.g. sig==SIGPROF) the signal is discarded. I don't think there is any really satisfactory solution to the problem of signal-based profiling in a mixed-language application. It's not only the issue of handler clobbering, but also that a C++ SIGPROF handler called in a Go thread can't unwind the Go stack (and vice versa). The best we can hope for is not crashing. Note: - I've ported this to all POSIX platforms, except ARM-linux which already ignores unexpected signals on m-less threads. - I've avoided tail-calling runtime.badsignal because AFAICT the 6a/6l don't support it. - I've avoided hoisting 'push sig' (common to both function calls) because it makes the code harder to read. - Fixed an (apparently incorrect?) docstring. R=iant, rsc, minux.ma CC=golang-dev https://golang.org/cl/6498057
2012-09-04 12:40:49 -06:00
RET
// save g
MOVL g(CX), DI
MOVL DI, 20(SP)
// g = m->gsignal
MOVL m_gsignal(BP), BP
MOVL BP, g(CX)
// copy arguments to sighandler
MOVL sig+8(FP), BX
MOVL BX, 0(SP)
MOVL info+12(FP), BX
MOVL BX, 4(SP)
MOVL context+16(FP), BX
MOVL BX, 8(SP)
MOVL DI, 12(SP)
MOVL handler+0(FP), BX
CALL BX
// restore g
get_tls(CX)
MOVL 20(SP), DI
MOVL DI, g(CX)
// call sigreturn
MOVL context+16(FP), CX
MOVL style+4(FP), BX
MOVL $0, 0(SP) // "caller PC" - ignored
MOVL CX, 4(SP)
MOVL BX, 8(SP)
MOVL $184, AX // sigreturn(ucontext, infostyle)
INT $0x80
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·sigaltstack(SB),7,$0
MOVL $53, AX
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
RET
TEXT runtime·usleep(SB),7,$32
MOVL $0, DX
MOVL usec+0(FP), AX
MOVL $1000000, CX
DIVL CX
MOVL AX, 24(SP) // sec
MOVL DX, 28(SP) // usec
// select(0, 0, 0, 0, &tv)
MOVL $0, 0(SP) // "return PC" - ignored
MOVL $0, 4(SP)
MOVL $0, 8(SP)
MOVL $0, 12(SP)
MOVL $0, 16(SP)
LEAL 24(SP), AX
MOVL AX, 20(SP)
MOVL $93, AX
INT $0x80
RET
// void bsdthread_create(void *stk, M *mp, G *gp, void (*fn)(void))
// System call args are: func arg stack pthread flags.
TEXT runtime·bsdthread_create(SB),7,$32
MOVL $360, AX
// 0(SP) is where the caller PC would be; kernel skips it
MOVL func+12(FP), BX
MOVL BX, 4(SP) // func
MOVL mm+4(FP), BX
MOVL BX, 8(SP) // arg
MOVL stk+0(FP), BX
MOVL BX, 12(SP) // stack
MOVL gg+8(FP), BX
MOVL BX, 16(SP) // pthread
MOVL $0x1000000, 20(SP) // flags = PTHREAD_START_CUSTOM
INT $0x80
JAE 3(PC)
NEGL AX
RET
MOVL $0, AX
RET
// The thread that bsdthread_create creates starts executing here,
// because we registered this function using bsdthread_register
// at startup.
// AX = "pthread" (= g)
// BX = mach thread port
// CX = "func" (= fn)
// DX = "arg" (= m)
// DI = stack top
// SI = flags (= 0x1000000)
// SP = stack - C_32_STK_ALIGN
TEXT runtime·bsdthread_start(SB),7,$0
// set up ldt 7+id to point at m->tls.
// m->tls is at m+40. newosproc left
// the m->id in tls[0].
LEAL m_tls(DX), BP
MOVL 0(BP), DI
ADDL $7, DI // m0 is LDT#7. count up.
// setldt(tls#, &tls, sizeof tls)
PUSHAL // save registers
PUSHL $32 // sizeof tls
PUSHL BP // &tls
PUSHL DI // tls #
CALL runtime·setldt(SB)
POPL AX
POPL AX
POPL AX
POPAL
// Now segment is established. Initialize m, g.
get_tls(BP)
MOVL AX, g(BP)
MOVL DX, m(BP)
MOVL BX, m_procid(DX) // m->procid = thread port (for debuggers)
CALL runtime·stackcheck(SB) // smashes AX
CALL CX // fn()
CALL runtime·exit1(SB)
RET
// void bsdthread_register(void)
// registers callbacks for threadstart (see bsdthread_create above
// and wqthread and pthsize (not used). returns 0 on success.
TEXT runtime·bsdthread_register(SB),7,$40
MOVL $366, AX
// 0(SP) is where kernel expects caller PC; ignored
MOVL $runtime·bsdthread_start(SB), 4(SP) // threadstart
MOVL $0, 8(SP) // wqthread, not used by us
MOVL $0, 12(SP) // pthsize, not used by us
runtime: work around kernel bug in Snow Leopard signal handling Could not take a signal on threads other than the main thread. If you look at the spinning binary with dtrace, you can see a fault happening over and over: $ dtrace -n ' fbt::user_trap:entry /execname=="boot32" && self->count < 10/ { self->count++; printf("%s %x %x %x %x", probefunc, arg1, arg2, arg3, arg4); stack(); tracemem(arg4, 256); }' dtrace: description 'fbt::user_trap:entry ' matched 1 probe CPU ID FUNCTION:NAME 1 17015 user_trap:entry user_trap 0 10 79af0a0 79af0a0 mach_kernel`lo_alltraps+0x12a 0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef 0: 0e 00 00 00 37 00 00 00 00 00 00 00 1f 00 00 00 ....7........... 10: 1f 00 00 00 a8 33 00 00 00 00 00 01 00 00 00 00 .....3.......... 20: 98 ba dc fe 07 09 00 00 00 00 00 00 98 ba dc fe ................ 30: 06 00 00 00 0d 00 00 00 34 00 00 00 9e 1c 00 00 ........4....... 40: 17 00 00 00 00 02 00 00 ac 30 00 00 1f 00 00 00 .........0...... 50: 00 00 00 00 00 00 00 00 0d 00 00 00 e0 e6 29 00 ..............). 60: 34 00 00 00 00 00 00 00 9e 1c 00 00 00 00 00 00 4............... 70: 17 00 00 00 00 00 00 00 00 02 00 00 00 00 00 00 ................ 80: ac 30 00 00 00 00 00 00 1f 00 00 00 00 00 00 00 .0.............. 90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ a0: 48 00 00 00 10 00 00 00 85 00 00 00 a0 f2 29 00 H.............). b0: 69 01 00 02 00 00 00 00 e6 93 04 82 ff 7f 00 00 i............... c0: 2f 00 00 00 00 00 00 00 06 02 00 00 00 00 00 00 /............... d0: 78 ee 42 01 01 00 00 00 1f 00 00 00 00 00 00 00 x.B............. e0: 00 ed 9a 07 00 00 00 00 00 00 00 00 00 00 00 00 ................ f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ... The memory dump shows a 32-bit exception frame: x86_saved_state32 gs = 0x37 fs = 0 es = 0x1f ds = 0x1f edi = 0x33a8 esi = 0x01000000 ebp = 0 cr2 = 0xfedcba98 ebx = 0x0907 edx = 0 ecx = 0xfedcba98 eax = 0x06 trapno = 0x0d err = 0x34 eip = 0x1c9e cs = 0x17 efl = 0x0200 uesp = 0x30ac ss = 0x1f The cr2 of 0xfedcba98 is the address that the new thread read to cause the fault, but note that the trap is now a GP fault with error code 0x34, meaning it's moved past the cr2 problem and on to an invaild segment selector. The 0x34 is suspiciously similar to the 0x37 in gs, and sure enough, OS X forces gs to have that value in the signal handler, and if your thread hasn't set up that segment (known as USER_CTHREAD), you'll fault on the IRET into the signal handler and never be able to handle a signal. The kernel bug is that it forces segment 0x37 without making sure it is a valid segment. Leopard also forced 0x37 but had the courtesy to set it up first. Since OS X requires us to set up that segment (using the thread_fast_set_cthread_self system call), we might as well use it instead of the more complicated i386_set_ldt call to set up our per-OS thread storage. Also add some more zeros to bsdthread_register for new arguments in Snow Leopard (apparently unnecessary, but being careful). Fixes #510. R=r CC=golang-dev https://golang.org/cl/824046
2010-04-08 14:24:37 -06:00
MOVL $0, 16(SP) // dummy_value [sic]
MOVL $0, 20(SP) // targetconc_ptr
MOVL $0, 24(SP) // dispatchqueue_offset
INT $0x80
JAE 3(PC)
NEGL AX
RET
MOVL $0, AX
RET
// Invoke Mach system call.
// Assumes system call number in AX,
// caller PC on stack, caller's caller PC next,
// and then the system call arguments.
//
// Can be used for BSD too, but we don't,
// because if you use this interface the BSD
// system call numbers need an extra field
// in the high 16 bits that seems to be the
// argument count in bytes but is not always.
// INT $0x80 works fine for those.
TEXT runtime·sysenter(SB),7,$0
POPL DX
MOVL SP, CX
BYTE $0x0F; BYTE $0x34; // SYSENTER
// returns to DX with SP set to CX
TEXT runtime·mach_msg_trap(SB),7,$0
MOVL $-31, AX
CALL runtime·sysenter(SB)
RET
TEXT runtime·mach_reply_port(SB),7,$0
MOVL $-26, AX
CALL runtime·sysenter(SB)
RET
TEXT runtime·mach_task_self(SB),7,$0
MOVL $-28, AX
CALL runtime·sysenter(SB)
RET
// Mach provides trap versions of the semaphore ops,
// instead of requiring the use of RPC.
// uint32 mach_semaphore_wait(uint32)
TEXT runtime·mach_semaphore_wait(SB),7,$0
MOVL $-36, AX
CALL runtime·sysenter(SB)
RET
// uint32 mach_semaphore_timedwait(uint32, uint32, uint32)
TEXT runtime·mach_semaphore_timedwait(SB),7,$0
MOVL $-38, AX
CALL runtime·sysenter(SB)
RET
// uint32 mach_semaphore_signal(uint32)
TEXT runtime·mach_semaphore_signal(SB),7,$0
MOVL $-33, AX
CALL runtime·sysenter(SB)
RET
// uint32 mach_semaphore_signal_all(uint32)
TEXT runtime·mach_semaphore_signal_all(SB),7,$0
MOVL $-34, AX
CALL runtime·sysenter(SB)
RET
// setldt(int entry, int address, int limit)
runtime: work around kernel bug in Snow Leopard signal handling Could not take a signal on threads other than the main thread. If you look at the spinning binary with dtrace, you can see a fault happening over and over: $ dtrace -n ' fbt::user_trap:entry /execname=="boot32" && self->count < 10/ { self->count++; printf("%s %x %x %x %x", probefunc, arg1, arg2, arg3, arg4); stack(); tracemem(arg4, 256); }' dtrace: description 'fbt::user_trap:entry ' matched 1 probe CPU ID FUNCTION:NAME 1 17015 user_trap:entry user_trap 0 10 79af0a0 79af0a0 mach_kernel`lo_alltraps+0x12a 0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef 0: 0e 00 00 00 37 00 00 00 00 00 00 00 1f 00 00 00 ....7........... 10: 1f 00 00 00 a8 33 00 00 00 00 00 01 00 00 00 00 .....3.......... 20: 98 ba dc fe 07 09 00 00 00 00 00 00 98 ba dc fe ................ 30: 06 00 00 00 0d 00 00 00 34 00 00 00 9e 1c 00 00 ........4....... 40: 17 00 00 00 00 02 00 00 ac 30 00 00 1f 00 00 00 .........0...... 50: 00 00 00 00 00 00 00 00 0d 00 00 00 e0 e6 29 00 ..............). 60: 34 00 00 00 00 00 00 00 9e 1c 00 00 00 00 00 00 4............... 70: 17 00 00 00 00 00 00 00 00 02 00 00 00 00 00 00 ................ 80: ac 30 00 00 00 00 00 00 1f 00 00 00 00 00 00 00 .0.............. 90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ a0: 48 00 00 00 10 00 00 00 85 00 00 00 a0 f2 29 00 H.............). b0: 69 01 00 02 00 00 00 00 e6 93 04 82 ff 7f 00 00 i............... c0: 2f 00 00 00 00 00 00 00 06 02 00 00 00 00 00 00 /............... d0: 78 ee 42 01 01 00 00 00 1f 00 00 00 00 00 00 00 x.B............. e0: 00 ed 9a 07 00 00 00 00 00 00 00 00 00 00 00 00 ................ f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ... The memory dump shows a 32-bit exception frame: x86_saved_state32 gs = 0x37 fs = 0 es = 0x1f ds = 0x1f edi = 0x33a8 esi = 0x01000000 ebp = 0 cr2 = 0xfedcba98 ebx = 0x0907 edx = 0 ecx = 0xfedcba98 eax = 0x06 trapno = 0x0d err = 0x34 eip = 0x1c9e cs = 0x17 efl = 0x0200 uesp = 0x30ac ss = 0x1f The cr2 of 0xfedcba98 is the address that the new thread read to cause the fault, but note that the trap is now a GP fault with error code 0x34, meaning it's moved past the cr2 problem and on to an invaild segment selector. The 0x34 is suspiciously similar to the 0x37 in gs, and sure enough, OS X forces gs to have that value in the signal handler, and if your thread hasn't set up that segment (known as USER_CTHREAD), you'll fault on the IRET into the signal handler and never be able to handle a signal. The kernel bug is that it forces segment 0x37 without making sure it is a valid segment. Leopard also forced 0x37 but had the courtesy to set it up first. Since OS X requires us to set up that segment (using the thread_fast_set_cthread_self system call), we might as well use it instead of the more complicated i386_set_ldt call to set up our per-OS thread storage. Also add some more zeros to bsdthread_register for new arguments in Snow Leopard (apparently unnecessary, but being careful). Fixes #510. R=r CC=golang-dev https://golang.org/cl/824046
2010-04-08 14:24:37 -06:00
// entry and limit are ignored.
TEXT runtime·setldt(SB),7,$32
MOVL address+4(FP), BX // aka base
/*
* When linking against the system libraries,
* we use its pthread_create and let it set up %gs
* for us. When we do that, the private storage
* we get is not at 0(GS) but at 0x468(GS).
* To insulate the rest of the tool chain from this ugliness,
* 8l rewrites 0(GS) into 0x468(GS) for us.
* To accommodate that rewrite, we translate the
* address and limit here so that 0x468(GS) maps to 0(address).
*
* See cgo/gcc_darwin_386.c:/468 for the derivation
* of the constant.
*/
SUBL $0x468, BX
runtime: work around kernel bug in Snow Leopard signal handling Could not take a signal on threads other than the main thread. If you look at the spinning binary with dtrace, you can see a fault happening over and over: $ dtrace -n ' fbt::user_trap:entry /execname=="boot32" && self->count < 10/ { self->count++; printf("%s %x %x %x %x", probefunc, arg1, arg2, arg3, arg4); stack(); tracemem(arg4, 256); }' dtrace: description 'fbt::user_trap:entry ' matched 1 probe CPU ID FUNCTION:NAME 1 17015 user_trap:entry user_trap 0 10 79af0a0 79af0a0 mach_kernel`lo_alltraps+0x12a 0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef 0: 0e 00 00 00 37 00 00 00 00 00 00 00 1f 00 00 00 ....7........... 10: 1f 00 00 00 a8 33 00 00 00 00 00 01 00 00 00 00 .....3.......... 20: 98 ba dc fe 07 09 00 00 00 00 00 00 98 ba dc fe ................ 30: 06 00 00 00 0d 00 00 00 34 00 00 00 9e 1c 00 00 ........4....... 40: 17 00 00 00 00 02 00 00 ac 30 00 00 1f 00 00 00 .........0...... 50: 00 00 00 00 00 00 00 00 0d 00 00 00 e0 e6 29 00 ..............). 60: 34 00 00 00 00 00 00 00 9e 1c 00 00 00 00 00 00 4............... 70: 17 00 00 00 00 00 00 00 00 02 00 00 00 00 00 00 ................ 80: ac 30 00 00 00 00 00 00 1f 00 00 00 00 00 00 00 .0.............. 90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ a0: 48 00 00 00 10 00 00 00 85 00 00 00 a0 f2 29 00 H.............). b0: 69 01 00 02 00 00 00 00 e6 93 04 82 ff 7f 00 00 i............... c0: 2f 00 00 00 00 00 00 00 06 02 00 00 00 00 00 00 /............... d0: 78 ee 42 01 01 00 00 00 1f 00 00 00 00 00 00 00 x.B............. e0: 00 ed 9a 07 00 00 00 00 00 00 00 00 00 00 00 00 ................ f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ... The memory dump shows a 32-bit exception frame: x86_saved_state32 gs = 0x37 fs = 0 es = 0x1f ds = 0x1f edi = 0x33a8 esi = 0x01000000 ebp = 0 cr2 = 0xfedcba98 ebx = 0x0907 edx = 0 ecx = 0xfedcba98 eax = 0x06 trapno = 0x0d err = 0x34 eip = 0x1c9e cs = 0x17 efl = 0x0200 uesp = 0x30ac ss = 0x1f The cr2 of 0xfedcba98 is the address that the new thread read to cause the fault, but note that the trap is now a GP fault with error code 0x34, meaning it's moved past the cr2 problem and on to an invaild segment selector. The 0x34 is suspiciously similar to the 0x37 in gs, and sure enough, OS X forces gs to have that value in the signal handler, and if your thread hasn't set up that segment (known as USER_CTHREAD), you'll fault on the IRET into the signal handler and never be able to handle a signal. The kernel bug is that it forces segment 0x37 without making sure it is a valid segment. Leopard also forced 0x37 but had the courtesy to set it up first. Since OS X requires us to set up that segment (using the thread_fast_set_cthread_self system call), we might as well use it instead of the more complicated i386_set_ldt call to set up our per-OS thread storage. Also add some more zeros to bsdthread_register for new arguments in Snow Leopard (apparently unnecessary, but being careful). Fixes #510. R=r CC=golang-dev https://golang.org/cl/824046
2010-04-08 14:24:37 -06:00
/*
* Must set up as USER_CTHREAD segment because
* Darwin forces that value into %gs for signal handlers,
* and if we don't set one up, we'll get a recursive
* fault trying to get into the signal handler.
* Since we have to set one up anyway, it might as
* well be the value we want. So don't bother with
* i386_set_ldt.
*/
MOVL BX, 4(SP)
MOVL $3, AX // thread_fast_set_cthread_self - machdep call #3
INT $0x82 // sic: 0x82, not 0x80, for machdep call
runtime: work around kernel bug in Snow Leopard signal handling Could not take a signal on threads other than the main thread. If you look at the spinning binary with dtrace, you can see a fault happening over and over: $ dtrace -n ' fbt::user_trap:entry /execname=="boot32" && self->count < 10/ { self->count++; printf("%s %x %x %x %x", probefunc, arg1, arg2, arg3, arg4); stack(); tracemem(arg4, 256); }' dtrace: description 'fbt::user_trap:entry ' matched 1 probe CPU ID FUNCTION:NAME 1 17015 user_trap:entry user_trap 0 10 79af0a0 79af0a0 mach_kernel`lo_alltraps+0x12a 0 1 2 3 4 5 6 7 8 9 a b c d e f 0123456789abcdef 0: 0e 00 00 00 37 00 00 00 00 00 00 00 1f 00 00 00 ....7........... 10: 1f 00 00 00 a8 33 00 00 00 00 00 01 00 00 00 00 .....3.......... 20: 98 ba dc fe 07 09 00 00 00 00 00 00 98 ba dc fe ................ 30: 06 00 00 00 0d 00 00 00 34 00 00 00 9e 1c 00 00 ........4....... 40: 17 00 00 00 00 02 00 00 ac 30 00 00 1f 00 00 00 .........0...... 50: 00 00 00 00 00 00 00 00 0d 00 00 00 e0 e6 29 00 ..............). 60: 34 00 00 00 00 00 00 00 9e 1c 00 00 00 00 00 00 4............... 70: 17 00 00 00 00 00 00 00 00 02 00 00 00 00 00 00 ................ 80: ac 30 00 00 00 00 00 00 1f 00 00 00 00 00 00 00 .0.............. 90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ a0: 48 00 00 00 10 00 00 00 85 00 00 00 a0 f2 29 00 H.............). b0: 69 01 00 02 00 00 00 00 e6 93 04 82 ff 7f 00 00 i............... c0: 2f 00 00 00 00 00 00 00 06 02 00 00 00 00 00 00 /............... d0: 78 ee 42 01 01 00 00 00 1f 00 00 00 00 00 00 00 x.B............. e0: 00 ed 9a 07 00 00 00 00 00 00 00 00 00 00 00 00 ................ f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ ... The memory dump shows a 32-bit exception frame: x86_saved_state32 gs = 0x37 fs = 0 es = 0x1f ds = 0x1f edi = 0x33a8 esi = 0x01000000 ebp = 0 cr2 = 0xfedcba98 ebx = 0x0907 edx = 0 ecx = 0xfedcba98 eax = 0x06 trapno = 0x0d err = 0x34 eip = 0x1c9e cs = 0x17 efl = 0x0200 uesp = 0x30ac ss = 0x1f The cr2 of 0xfedcba98 is the address that the new thread read to cause the fault, but note that the trap is now a GP fault with error code 0x34, meaning it's moved past the cr2 problem and on to an invaild segment selector. The 0x34 is suspiciously similar to the 0x37 in gs, and sure enough, OS X forces gs to have that value in the signal handler, and if your thread hasn't set up that segment (known as USER_CTHREAD), you'll fault on the IRET into the signal handler and never be able to handle a signal. The kernel bug is that it forces segment 0x37 without making sure it is a valid segment. Leopard also forced 0x37 but had the courtesy to set it up first. Since OS X requires us to set up that segment (using the thread_fast_set_cthread_self system call), we might as well use it instead of the more complicated i386_set_ldt call to set up our per-OS thread storage. Also add some more zeros to bsdthread_register for new arguments in Snow Leopard (apparently unnecessary, but being careful). Fixes #510. R=r CC=golang-dev https://golang.org/cl/824046
2010-04-08 14:24:37 -06:00
XORL AX, AX
MOVW GS, AX
RET
TEXT runtime·sysctl(SB),7,$0
MOVL $202, AX
INT $0x80
JAE 3(PC)
NEGL AX
RET
MOVL $0, AX
RET
// int32 runtime·kqueue(void);
TEXT runtime·kqueue(SB),7,$0
MOVL $362, AX
INT $0x80
JAE 2(PC)
NEGL AX
RET
// int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
TEXT runtime·kevent(SB),7,$0
MOVL $363, AX
INT $0x80
JAE 2(PC)
NEGL AX
RET
// int32 runtime·closeonexec(int32 fd);
TEXT runtime·closeonexec(SB),7,$32
MOVL $92, AX // fcntl
// 0(SP) is where the caller PC would be; kernel skips it
MOVL fd+0(FP), BX
MOVL BX, 4(SP) // fd
MOVL $2, 8(SP) // F_SETFD
MOVL $1, 12(SP) // FD_CLOEXEC
INT $0x80
JAE 2(PC)
NEGL AX
RET