mirror of
https://github.com/golang/go
synced 2024-11-26 07:27:59 -07:00
runtime: save R15 before checking AVX state
When in dynlink mode, reading a global can clobber R15. Just to be safe, save R15 before checking the AVX state to see if we need to VZEROUPPER or not. This could cause a problem in buildmodes that aren't supported yet. Change-Id: I8fda62d3fbe808584774fa5e8d9810a4612a84e5 Reviewed-on: https://go-review.googlesource.com/c/go/+/288452 Trust: Keith Randall <khr@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
d2911d7612
commit
c49c7a675a
@ -230,12 +230,16 @@ func genAMD64() {
|
|||||||
if reg == "SP" || reg == "BP" {
|
if reg == "SP" || reg == "BP" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(reg, "X") {
|
if !strings.HasPrefix(reg, "X") {
|
||||||
l.add("MOVUPS", reg, 16)
|
|
||||||
} else {
|
|
||||||
l.add("MOVQ", reg, 8)
|
l.add("MOVQ", reg, 8)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
lSSE := layout{stack: l.stack, sp: "SP"}
|
||||||
|
for _, reg := range regNamesAMD64 {
|
||||||
|
if strings.HasPrefix(reg, "X") {
|
||||||
|
lSSE.add("MOVUPS", reg, 16)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: MXCSR register?
|
// TODO: MXCSR register?
|
||||||
|
|
||||||
@ -244,10 +248,12 @@ func genAMD64() {
|
|||||||
p("// Save flags before clobbering them")
|
p("// Save flags before clobbering them")
|
||||||
p("PUSHFQ")
|
p("PUSHFQ")
|
||||||
p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
|
p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
|
||||||
p("ADJSP $%d", l.stack)
|
p("ADJSP $%d", lSSE.stack)
|
||||||
p("// But vet doesn't know ADJSP, so suppress vet stack checking")
|
p("// But vet doesn't know ADJSP, so suppress vet stack checking")
|
||||||
p("NOP SP")
|
p("NOP SP")
|
||||||
|
|
||||||
|
l.save()
|
||||||
|
|
||||||
// Apparently, the signal handling code path in darwin kernel leaves
|
// Apparently, the signal handling code path in darwin kernel leaves
|
||||||
// the upper bits of Y registers in a dirty state, which causes
|
// the upper bits of Y registers in a dirty state, which causes
|
||||||
// many SSE operations (128-bit and narrower) become much slower.
|
// many SSE operations (128-bit and narrower) become much slower.
|
||||||
@ -259,10 +265,11 @@ func genAMD64() {
|
|||||||
p("VZEROUPPER")
|
p("VZEROUPPER")
|
||||||
p("#endif")
|
p("#endif")
|
||||||
|
|
||||||
l.save()
|
lSSE.save()
|
||||||
p("CALL ·asyncPreempt2(SB)")
|
p("CALL ·asyncPreempt2(SB)")
|
||||||
|
lSSE.restore()
|
||||||
l.restore()
|
l.restore()
|
||||||
p("ADJSP $%d", -l.stack)
|
p("ADJSP $%d", -lSSE.stack)
|
||||||
p("POPFQ")
|
p("POPFQ")
|
||||||
p("POPQ BP")
|
p("POPQ BP")
|
||||||
p("RET")
|
p("RET")
|
||||||
|
@ -13,11 +13,6 @@ TEXT ·asyncPreempt<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
|
|||||||
ADJSP $368
|
ADJSP $368
|
||||||
// But vet doesn't know ADJSP, so suppress vet stack checking
|
// But vet doesn't know ADJSP, so suppress vet stack checking
|
||||||
NOP SP
|
NOP SP
|
||||||
#ifdef GOOS_darwin
|
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0
|
|
||||||
JE 2(PC)
|
|
||||||
VZEROUPPER
|
|
||||||
#endif
|
|
||||||
MOVQ AX, 0(SP)
|
MOVQ AX, 0(SP)
|
||||||
MOVQ CX, 8(SP)
|
MOVQ CX, 8(SP)
|
||||||
MOVQ DX, 16(SP)
|
MOVQ DX, 16(SP)
|
||||||
@ -32,6 +27,11 @@ TEXT ·asyncPreempt<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
|
|||||||
MOVQ R13, 88(SP)
|
MOVQ R13, 88(SP)
|
||||||
MOVQ R14, 96(SP)
|
MOVQ R14, 96(SP)
|
||||||
MOVQ R15, 104(SP)
|
MOVQ R15, 104(SP)
|
||||||
|
#ifdef GOOS_darwin
|
||||||
|
CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0
|
||||||
|
JE 2(PC)
|
||||||
|
VZEROUPPER
|
||||||
|
#endif
|
||||||
MOVUPS X0, 112(SP)
|
MOVUPS X0, 112(SP)
|
||||||
MOVUPS X1, 128(SP)
|
MOVUPS X1, 128(SP)
|
||||||
MOVUPS X2, 144(SP)
|
MOVUPS X2, 144(SP)
|
||||||
|
Loading…
Reference in New Issue
Block a user