From c46ffdd2eca339918ed30b6ba9d4715ba769d35d Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Tue, 25 Feb 2020 20:30:37 -0500 Subject: [PATCH] runtime: guard VZEROUPPER on CPU feature In CL 219131 we inserted a VZEROUPPER instruction on darwin/amd64. The instruction is not available on pre-AVX machines. Guard it with CPU feature. Fixes #37459. Change-Id: I9a064df277d091be4ee594eda5c7fd8ee323102b Reviewed-on: https://go-review.googlesource.com/c/go/+/221057 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/runtime/cpuflags.go | 1 + src/runtime/mkpreempt.go | 21 ++++++++++++--------- src/runtime/preempt_amd64.s | 8 +++++--- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/runtime/cpuflags.go b/src/runtime/cpuflags.go index 94f9331d15..4bd894d984 100644 --- a/src/runtime/cpuflags.go +++ b/src/runtime/cpuflags.go @@ -11,6 +11,7 @@ import ( // Offsets into internal/cpu records for use in assembly. const ( + offsetX86HasAVX = unsafe.Offsetof(cpu.X86.HasAVX) offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2) offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS) offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2) diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 31b6f5cbac..c26406e55f 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -244,15 +244,6 @@ func genAMD64() { // TODO: MXCSR register? - // Apparently, the signal handling code path in darwin kernel leaves - // the upper bits of Y registers in a dirty state, which causes - // many SSE operations (128-bit and narrower) become much slower. - // Clear the upper bits to get to a clean state. See issue #37174. - // It is safe here as Go code don't use the upper bits of Y registers. - p("#ifdef GOOS_darwin") - p("VZEROUPPER") - p("#endif") - p("PUSHQ BP") p("MOVQ SP, BP") p("// Save flags before clobbering them") @@ -261,6 +252,18 @@ func genAMD64() { p("ADJSP $%d", l.stack) p("// But vet doesn't know ADJSP, so suppress vet stack checking") p("NOP SP") + + // Apparently, the signal handling code path in darwin kernel leaves + // the upper bits of Y registers in a dirty state, which causes + // many SSE operations (128-bit and narrower) become much slower. + // Clear the upper bits to get to a clean state. See issue #37174. + // It is safe here as Go code don't use the upper bits of Y registers. + p("#ifdef GOOS_darwin") + p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0") + p("JE 2(PC)") + p("VZEROUPPER") + p("#endif") + l.save() p("CALL ·asyncPreempt2(SB)") l.restore() diff --git a/src/runtime/preempt_amd64.s b/src/runtime/preempt_amd64.s index 0f2fd7d8dd..4765e9f448 100644 --- a/src/runtime/preempt_amd64.s +++ b/src/runtime/preempt_amd64.s @@ -4,9 +4,6 @@ #include "textflag.h" TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 - #ifdef GOOS_darwin - VZEROUPPER - #endif PUSHQ BP MOVQ SP, BP // Save flags before clobbering them @@ -15,6 +12,11 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 ADJSP $368 // But vet doesn't know ADJSP, so suppress vet stack checking NOP SP + #ifdef GOOS_darwin + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0 + JE 2(PC) + VZEROUPPER + #endif MOVQ AX, 0(SP) MOVQ CX, 8(SP) MOVQ DX, 16(SP)