1
0
mirror of https://github.com/golang/go synced 2024-11-26 07:27:59 -07:00

runtime: save R15 before checking AVX state

When in dynlink mode, reading a global can clobber R15.
Just to be safe, save R15 before checking the AVX state to see
if we need to VZEROUPPER or not.

This could cause a problem in buildmodes that aren't supported yet.

Change-Id: I8fda62d3fbe808584774fa5e8d9810a4612a84e5
Reviewed-on: https://go-review.googlesource.com/c/go/+/288452
Trust: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
Keith Randall 2021-02-01 00:45:41 -08:00
parent d2911d7612
commit c49c7a675a
2 changed files with 18 additions and 11 deletions

View File

@ -230,12 +230,16 @@ func genAMD64() {
if reg == "SP" || reg == "BP" { if reg == "SP" || reg == "BP" {
continue continue
} }
if strings.HasPrefix(reg, "X") { if !strings.HasPrefix(reg, "X") {
l.add("MOVUPS", reg, 16)
} else {
l.add("MOVQ", reg, 8) l.add("MOVQ", reg, 8)
} }
} }
lSSE := layout{stack: l.stack, sp: "SP"}
for _, reg := range regNamesAMD64 {
if strings.HasPrefix(reg, "X") {
lSSE.add("MOVUPS", reg, 16)
}
}
// TODO: MXCSR register? // TODO: MXCSR register?
@ -244,10 +248,12 @@ func genAMD64() {
p("// Save flags before clobbering them") p("// Save flags before clobbering them")
p("PUSHFQ") p("PUSHFQ")
p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP") p("// obj doesn't understand ADD/SUB on SP, but does understand ADJSP")
p("ADJSP $%d", l.stack) p("ADJSP $%d", lSSE.stack)
p("// But vet doesn't know ADJSP, so suppress vet stack checking") p("// But vet doesn't know ADJSP, so suppress vet stack checking")
p("NOP SP") p("NOP SP")
l.save()
// Apparently, the signal handling code path in darwin kernel leaves // Apparently, the signal handling code path in darwin kernel leaves
// the upper bits of Y registers in a dirty state, which causes // the upper bits of Y registers in a dirty state, which causes
// many SSE operations (128-bit and narrower) become much slower. // many SSE operations (128-bit and narrower) become much slower.
@ -259,10 +265,11 @@ func genAMD64() {
p("VZEROUPPER") p("VZEROUPPER")
p("#endif") p("#endif")
l.save() lSSE.save()
p("CALL ·asyncPreempt2(SB)") p("CALL ·asyncPreempt2(SB)")
lSSE.restore()
l.restore() l.restore()
p("ADJSP $%d", -l.stack) p("ADJSP $%d", -lSSE.stack)
p("POPFQ") p("POPFQ")
p("POPQ BP") p("POPQ BP")
p("RET") p("RET")

View File

@ -13,11 +13,6 @@ TEXT ·asyncPreempt<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
ADJSP $368 ADJSP $368
// But vet doesn't know ADJSP, so suppress vet stack checking // But vet doesn't know ADJSP, so suppress vet stack checking
NOP SP NOP SP
#ifdef GOOS_darwin
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $0
JE 2(PC)
VZEROUPPER
#endif
MOVQ AX, 0(SP) MOVQ AX, 0(SP)
MOVQ CX, 8(SP) MOVQ CX, 8(SP)
MOVQ DX, 16(SP) MOVQ DX, 16(SP)
@ -32,6 +27,11 @@ TEXT ·asyncPreempt<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
MOVQ R13, 88(SP) MOVQ R13, 88(SP)
MOVQ R14, 96(SP) MOVQ R14, 96(SP)
MOVQ R15, 104(SP) MOVQ R15, 104(SP)
#ifdef GOOS_darwin
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $0
JE 2(PC)
VZEROUPPER
#endif
MOVUPS X0, 112(SP) MOVUPS X0, 112(SP)
MOVUPS X1, 128(SP) MOVUPS X1, 128(SP)
MOVUPS X2, 144(SP) MOVUPS X2, 144(SP)