1
0
mirror of https://github.com/golang/go synced 2024-11-17 22:14:43 -07:00

runtime: handle volatility of CX when dynamically linking on linux/386

Mostly by avoiding CX entirely, sometimes by reloading it.

I also vetted the assembly in other packages, it's all fine.

Change-Id: I50059669aaaa04efa303cf22ac228f9d14d83db0
Reviewed-on: https://go-review.googlesource.com/16386
Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
Michael Hudson-Doyle 2015-10-28 12:10:28 +13:00
parent c8abb02178
commit 6056cc5df6
2 changed files with 48 additions and 44 deletions

View File

@ -45,7 +45,8 @@ notintel:
MOVL $1, AX
CPUID
MOVL CX, runtime·cpuid_ecx(SB)
MOVL CX, AX // Move to global variable clobbers CX when generating PIC
MOVL AX, runtime·cpuid_ecx(SB)
MOVL DX, runtime·cpuid_edx(SB)
nocpuinfo:
@ -90,14 +91,14 @@ needtls:
ok:
// set up m and g "registers"
get_tls(BX)
LEAL runtime·g0(SB), CX
MOVL CX, g(BX)
LEAL runtime·g0(SB), DX
MOVL DX, g(BX)
LEAL runtime·m0(SB), AX
// save m->g0 = g0
MOVL CX, m_g0(AX)
MOVL DX, m_g0(AX)
// save g0->m = m0
MOVL AX, g_m(CX)
MOVL AX, g_m(DX)
CALL runtime·emptyfunc(SB) // fault if stack check is wrong
@ -187,9 +188,9 @@ TEXT runtime·gogo(SB), NOSPLIT, $0-4
// to keep running g.
TEXT runtime·mcall(SB), NOSPLIT, $0-4
MOVL fn+0(FP), DI
get_tls(CX)
MOVL g(CX), AX // save state in g->sched
get_tls(DX)
MOVL g(DX), AX // save state in g->sched
MOVL 0(SP), BX // caller's PC
MOVL BX, (g_sched+gobuf_pc)(AX)
LEAL fn+0(FP), BX // caller's SP
@ -197,14 +198,14 @@ TEXT runtime·mcall(SB), NOSPLIT, $0-4
MOVL AX, (g_sched+gobuf_g)(AX)
// switch to m->g0 & its stack, call fn
MOVL g(CX), BX
MOVL g(DX), BX
MOVL g_m(BX), BX
MOVL m_g0(BX), SI
CMPL SI, AX // if g == m->g0 call badmcall
JNE 3(PC)
MOVL $runtime·badmcall(SB), AX
JMP AX
MOVL SI, g(CX) // g = m->g0
MOVL SI, g(DX) // g = m->g0
MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
PUSHL AX
MOVL DI, DX
@ -255,6 +256,7 @@ switch:
MOVL AX, (g_sched+gobuf_g)(AX)
// switch to g0
get_tls(CX)
MOVL DX, g(CX)
MOVL (g_sched+gobuf_sp)(DX), BX
// make it look like mstart called systemstack on g0, to stop traceback
@ -530,11 +532,13 @@ TEXT ·asmcgocall(SB),NOSPLIT,$0-12
MOVL m_g0(BP), SI
MOVL g(CX), DI
CMPL SI, DI
JEQ 4(PC)
JEQ noswitch
CALL gosave<>(SB)
get_tls(CX)
MOVL SI, g(CX)
MOVL (g_sched+gobuf_sp)(SI), SP
noswitch:
// Now on a scheduling stack (a pthread-created stack).
SUBL $32, SP
ANDL $~15, SP // alignment, perhaps unnecessary
@ -732,8 +736,8 @@ nobar:
TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8
MOVL argp+0(FP),AX // addr of first arg
MOVL pc+4(FP), BX
MOVL -4(AX), CX
CMPL CX, runtime·stackBarrierPC(SB)
MOVL -4(AX), DX
CMPL DX, runtime·stackBarrierPC(SB)
JEQ setbar
MOVL BX, -4(AX) // set calling pc
RET
@ -801,39 +805,39 @@ TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
// hash function using AES hardware instructions
TEXT runtime·aeshash(SB),NOSPLIT,$0-16
MOVL p+0(FP), AX // ptr to data
MOVL s+8(FP), CX // size
MOVL s+8(FP), BX // size
LEAL ret+12(FP), DX
JMP runtime·aeshashbody(SB)
TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
MOVL p+0(FP), AX // ptr to string object
MOVL 4(AX), CX // length of string
MOVL 4(AX), BX // length of string
MOVL (AX), AX // string data
LEAL ret+8(FP), DX
JMP runtime·aeshashbody(SB)
// AX: data
// CX: length
// BX: length
// DX: address to put return value
TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
MOVL h+4(FP), X0 // 32 bits of per-table hash seed
PINSRW $4, CX, X0 // 16 bits of length
PINSRW $4, BX, X0 // 16 bits of length
PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
MOVO X0, X1 // save unscrambled seed
PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
AESENC X0, X0 // scramble seed
CMPL CX, $16
CMPL BX, $16
JB aes0to15
JE aes16
CMPL CX, $32
CMPL BX, $32
JBE aes17to32
CMPL CX, $64
CMPL BX, $64
JBE aes33to64
JMP aes65plus
aes0to15:
TESTL CX, CX
TESTL BX, BX
JE aes0
ADDL $16, AX
@ -843,8 +847,8 @@ aes0to15:
// 16 bytes loaded at this address won't cross
// a page boundary, so we can load it directly.
MOVOU -16(AX), X1
ADDL CX, CX
PAND masks<>(SB)(CX*8), X1
ADDL BX, BX
PAND masks<>(SB)(BX*8), X1
final1:
AESENC X0, X1 // scramble input, xor in seed
@ -857,9 +861,9 @@ endofpage:
// address ends in 1111xxxx. Might be up against
// a page boundary, so load ending at last byte.
// Then shift bytes down using pshufb.
MOVOU -32(AX)(CX*1), X1
ADDL CX, CX
PSHUFB shifts<>(SB)(CX*8), X1
MOVOU -32(AX)(BX*1), X1
ADDL BX, BX
PSHUFB shifts<>(SB)(BX*8), X1
JMP final1
aes0:
@ -879,7 +883,7 @@ aes17to32:
// load data to be hashed
MOVOU (AX), X2
MOVOU -16(AX)(CX*1), X3
MOVOU -16(AX)(BX*1), X3
// scramble 3 times
AESENC X0, X2
@ -907,8 +911,8 @@ aes33to64:
MOVOU (AX), X4
MOVOU 16(AX), X5
MOVOU -32(AX)(CX*1), X6
MOVOU -16(AX)(CX*1), X7
MOVOU -32(AX)(BX*1), X6
MOVOU -16(AX)(BX*1), X7
AESENC X0, X4
AESENC X1, X5
@ -943,10 +947,10 @@ aes65plus:
AESENC X3, X3
// start with last (possibly overlapping) block
MOVOU -64(AX)(CX*1), X4
MOVOU -48(AX)(CX*1), X5
MOVOU -32(AX)(CX*1), X6
MOVOU -16(AX)(CX*1), X7
MOVOU -64(AX)(BX*1), X4
MOVOU -48(AX)(BX*1), X5
MOVOU -32(AX)(BX*1), X6
MOVOU -16(AX)(BX*1), X7
// scramble state once
AESENC X0, X4
@ -955,8 +959,8 @@ aes65plus:
AESENC X3, X7
// compute number of remaining 64-byte blocks
DECL CX
SHRL $6, CX
DECL BX
SHRL $6, BX
aesloop:
// scramble state, xor in a block
@ -976,7 +980,7 @@ aesloop:
AESENC X7, X7
ADDL $64, AX
DECL CX
DECL BX
JNE aesloop
// 2 more scrambles to finish

View File

@ -408,7 +408,7 @@ TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
// setldt(int entry, int address, int limit)
TEXT runtime·setldt(SB),NOSPLIT,$32
MOVL entry+0(FP), BX // entry
MOVL address+4(FP), CX // base address
MOVL address+4(FP), DX // base address
#ifdef GOOS_android
/*
@ -416,8 +416,8 @@ TEXT runtime·setldt(SB),NOSPLIT,$32
* address currently holds m->tls, which must be %gs:0xf8.
* See cgo/gcc_android_386.c for the derivation of the constant.
*/
SUBL $0xf8, CX
MOVL CX, 0(CX)
SUBL $0xf8, DX
MOVL DX, 0(DX)
#else
/*
* When linking against the system libraries,
@ -429,23 +429,23 @@ TEXT runtime·setldt(SB),NOSPLIT,$32
* To accommodate that rewrite, we translate
* the address here and bump the limit to 0xffffffff (no limit)
* so that -4(GS) maps to 0(address).
* Also, the final 0(GS) (current 4(CX)) has to point
* Also, the final 0(GS) (current 4(DX)) has to point
* to itself, to mimic ELF.
*/
ADDL $0x4, CX // address
MOVL CX, 0(CX)
ADDL $0x4, DX // address
MOVL DX, 0(DX)
// We copy the glibc dynamic linker behaviour of storing the
// __kernel_vsyscall entry point at 0x10(GS) so that it can be invoked
// by "CALL 0x10(GS)" in all situations, not only those where the
// binary is actually dynamically linked.
MOVL runtime·_vdso(SB), AX
MOVL AX, 0x10(CX)
MOVL AX, 0x10(DX)
#endif
// set up user_desc
LEAL 16(SP), AX // struct user_desc
MOVL BX, 0(AX)
MOVL CX, 4(AX)
MOVL DX, 4(AX)
MOVL $0xfffff, 8(AX)
MOVL $(SEG_32BIT|LIMIT_IN_PAGES|USEABLE|CONTENTS_DATA), 12(AX) // flag bits