1
0
mirror of https://github.com/golang/go synced 2024-11-17 15:44:40 -07:00

runtime: guard VZEROUPPER on CPU feature

In CL 219131 we inserted a VZEROUPPER instruction on darwin/amd64.
The instruction is not available on pre-AVX machines. Guard it
with CPU feature.

Fixes #37459.

Change-Id: I9a064df277d091be4ee594eda5c7fd8ee323102b
Reviewed-on: https://go-review.googlesource.com/c/go/+/221057
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Cherry Zhang 2020-02-25 20:30:37 -05:00
parent 089e482b3d
commit c46ffdd2ec
3 changed files with 18 additions and 12 deletions

View File

@ -11,6 +11,7 @@ import (
// Offsets into internal/cpu records for use in assembly.
const (
offsetX86HasAVX = unsafe.Offsetof(cpu.X86.HasAVX)
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)

View File

@ -244,15 +244,6 @@ func genAMD64() {
// TODO: MXCSR register?
// Apparently, the signal handling code path in darwin kernel leaves
// the upper bits of Y registers in a dirty state, which causes
// many SSE operations (128-bit and narrower) become much slower.
// Clear the upper bits to get to a clean state. See issue #37174.
// It is safe here as Go code don't use the upper bits of Y registers.
p("#ifdef GOOS_darwin")
p("VZEROUPPER")
p("#endif")
p("PUSHQ BP")
p("MOVQ SP, BP")
p("// Save flags before clobbering them")
@ -261,6 +252,18 @@ func genAMD64() {
p("ADJSP $%d", l.stack)
p("// But vet doesn't know ADJSP, so suppress vet stack checking")
p("NOP SP")
// Apparently, the signal handling code path in darwin kernel leaves
// the upper bits of Y registers in a dirty state, which causes
// many SSE operations (128-bit and narrower) become much slower.
// Clear the upper bits to get to a clean state. See issue #37174.
// It is safe here as Go code don't use the upper bits of Y registers.
p("#ifdef GOOS_darwin")
p("CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $0")
p("JE 2(PC)")
p("VZEROUPPER")
p("#endif")
l.save()
p("CALL ·asyncPreempt2(SB)")
l.restore()

View File

@ -4,9 +4,6 @@
#include "textflag.h"
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
#ifdef GOOS_darwin
VZEROUPPER
#endif
PUSHQ BP
MOVQ SP, BP
// Save flags before clobbering them
@ -15,6 +12,11 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
ADJSP $368
// But vet doesn't know ADJSP, so suppress vet stack checking
NOP SP
#ifdef GOOS_darwin
CMPB internalcpu·X86+const_offsetX86HasAVX(SB), $0
JE 2(PC)
VZEROUPPER
#endif
MOVQ AX, 0(SP)
MOVQ CX, 8(SP)
MOVQ DX, 16(SP)