mirror of
https://github.com/golang/go
synced 2024-11-23 03:50:03 -07:00
all: replace runtime SSE2 detection with GO386 setting
When GO386=sse2 we can assume sse2 to be present without a runtime check. If GO386=softfloat is set we can avoid the usage of SSE2 even if detected. This might cause a memcpy, memclr and bytealg slowdown of Go binaries compiled with softfloat on machines that support SSE2. Such setups are rare and should use GO386=sse2 instead if performance matters. On targets that support SSE2 we avoid the runtime overhead of dynamic cpu feature dispatch. The removal of runtime sse2 checks also allows to simplify internal/cpu further by removing handling of the required feature option as a followup after this CL. Change-Id: I90a853a8853a405cb665497c6d1a86556947ba17 Reviewed-on: https://go-review.googlesource.com/c/go/+/344350 Trust: Martin Möhrmann <martin@golang.org> Run-TryBot: Martin Möhrmann <martin@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
22540abf76
commit
8157960d7f
@ -374,6 +374,11 @@ func asmArgs(a *Action, p *load.Package) []interface{} {
|
|||||||
args = append(args, "-compiling-runtime")
|
args = append(args, "-compiling-runtime")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cfg.Goarch == "386" {
|
||||||
|
// Define GO386_value from cfg.GO386.
|
||||||
|
args = append(args, "-D", "GO386_"+cfg.GO386)
|
||||||
|
}
|
||||||
|
|
||||||
if cfg.Goarch == "mips" || cfg.Goarch == "mipsle" {
|
if cfg.Goarch == "mips" || cfg.Goarch == "mipsle" {
|
||||||
// Define GOMIPS_value from cfg.GOMIPS.
|
// Define GOMIPS_value from cfg.GOMIPS.
|
||||||
args = append(args, "-D", "GOMIPS_"+cfg.GOMIPS)
|
args = append(args, "-D", "GOMIPS_"+cfg.GOMIPS)
|
||||||
|
@ -11,7 +11,6 @@ import (
|
|||||||
|
|
||||||
// Offsets into internal/cpu records for use in assembly.
|
// Offsets into internal/cpu records for use in assembly.
|
||||||
const (
|
const (
|
||||||
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
|
|
||||||
offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
|
offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
|
||||||
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
||||||
offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
|
offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
|
||||||
|
@ -36,8 +36,9 @@ TEXT cmpbody<>(SB),NOSPLIT,$0-0
|
|||||||
JEQ allsame
|
JEQ allsame
|
||||||
CMPL BP, $4
|
CMPL BP, $4
|
||||||
JB small
|
JB small
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
|
#ifdef GO386_softfloat
|
||||||
JNE mediumloop
|
JMP mediumloop
|
||||||
|
#endif
|
||||||
largeloop:
|
largeloop:
|
||||||
CMPL BP, $16
|
CMPL BP, $16
|
||||||
JB mediumloop
|
JB mediumloop
|
||||||
|
@ -43,8 +43,9 @@ TEXT memeqbody<>(SB),NOSPLIT,$0-0
|
|||||||
hugeloop:
|
hugeloop:
|
||||||
CMPL BX, $64
|
CMPL BX, $64
|
||||||
JB bigloop
|
JB bigloop
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
|
#ifdef GO386_softfloat
|
||||||
JNE bigloop
|
JMP bigloop
|
||||||
|
#endif
|
||||||
MOVOU (SI), X0
|
MOVOU (SI), X0
|
||||||
MOVOU (DI), X1
|
MOVOU (DI), X1
|
||||||
MOVOU 16(SI), X2
|
MOVOU 16(SI), X2
|
||||||
|
@ -37,7 +37,6 @@ var X86 struct {
|
|||||||
HasPCLMULQDQ bool
|
HasPCLMULQDQ bool
|
||||||
HasPOPCNT bool
|
HasPOPCNT bool
|
||||||
HasRDTSCP bool
|
HasRDTSCP bool
|
||||||
HasSSE2 bool
|
|
||||||
HasSSE3 bool
|
HasSSE3 bool
|
||||||
HasSSSE3 bool
|
HasSSSE3 bool
|
||||||
HasSSE41 bool
|
HasSSE41 bool
|
||||||
|
@ -1,7 +0,0 @@
|
|||||||
// Copyright 2018 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package cpu
|
|
||||||
|
|
||||||
const GOARCH = "386"
|
|
@ -1,7 +0,0 @@
|
|||||||
// Copyright 2018 The Go Authors. All rights reserved.
|
|
||||||
// Use of this source code is governed by a BSD-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package cpu
|
|
||||||
|
|
||||||
const GOARCH = "amd64"
|
|
@ -61,9 +61,6 @@ func doinit() {
|
|||||||
{Name: "sse41", Feature: &X86.HasSSE41},
|
{Name: "sse41", Feature: &X86.HasSSE41},
|
||||||
{Name: "sse42", Feature: &X86.HasSSE42},
|
{Name: "sse42", Feature: &X86.HasSSE42},
|
||||||
{Name: "ssse3", Feature: &X86.HasSSSE3},
|
{Name: "ssse3", Feature: &X86.HasSSSE3},
|
||||||
|
|
||||||
// These capabilities should always be enabled on amd64:
|
|
||||||
{Name: "sse2", Feature: &X86.HasSSE2, Required: GOARCH == "amd64"},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
maxID, _, _, _ := cpuid(0, 0)
|
maxID, _, _, _ := cpuid(0, 0)
|
||||||
@ -74,8 +71,7 @@ func doinit() {
|
|||||||
|
|
||||||
maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0)
|
maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0)
|
||||||
|
|
||||||
_, _, ecx1, edx1 := cpuid(1, 0)
|
_, _, ecx1, _ := cpuid(1, 0)
|
||||||
X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
|
|
||||||
|
|
||||||
X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
|
X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
|
||||||
X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
|
X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)
|
||||||
|
@ -10,7 +10,6 @@ package cpu_test
|
|||||||
import (
|
import (
|
||||||
. "internal/cpu"
|
. "internal/cpu"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -20,23 +19,6 @@ func TestX86ifAVX2hasAVX(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDisableSSE2(t *testing.T) {
|
|
||||||
runDebugOptionsTest(t, "TestSSE2DebugOption", "cpu.sse2=off")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSSE2DebugOption(t *testing.T) {
|
|
||||||
MustHaveDebugOptionsSupport(t)
|
|
||||||
|
|
||||||
if os.Getenv("GODEBUG") != "cpu.sse2=off" {
|
|
||||||
t.Skipf("skipping test: GODEBUG=cpu.sse2=off not set")
|
|
||||||
}
|
|
||||||
|
|
||||||
want := runtime.GOARCH != "386" // SSE2 can only be disabled on 386.
|
|
||||||
if got := X86.HasSSE2; got != want {
|
|
||||||
t.Errorf("X86.HasSSE2 on %s expected %v, got %v", runtime.GOARCH, want, got)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDisableSSE3(t *testing.T) {
|
func TestDisableSSE3(t *testing.T) {
|
||||||
runDebugOptionsTest(t, "TestSSE3DebugOption", "cpu.sse3=off")
|
runDebugOptionsTest(t, "TestSSE3DebugOption", "cpu.sse3=off")
|
||||||
}
|
}
|
||||||
|
@ -838,8 +838,9 @@ TEXT runtime·cputicks(SB),NOSPLIT,$0-8
|
|||||||
// When no SSE2 support is present do not enforce any serialization
|
// When no SSE2 support is present do not enforce any serialization
|
||||||
// since using CPUID to serialize the instruction stream is
|
// since using CPUID to serialize the instruction stream is
|
||||||
// very costly.
|
// very costly.
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
|
#ifdef GO386_softfloat
|
||||||
JNE rdtsc
|
JMP rdtsc // no fence instructions available
|
||||||
|
#endif
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
|
CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
|
||||||
JNE fences
|
JNE fences
|
||||||
// Instruction stream serializing RDTSCP is supported.
|
// Instruction stream serializing RDTSCP is supported.
|
||||||
|
@ -15,7 +15,6 @@ const (
|
|||||||
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
||||||
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
|
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
|
||||||
offsetX86HasRDTSCP = unsafe.Offsetof(cpu.X86.HasRDTSCP)
|
offsetX86HasRDTSCP = unsafe.Offsetof(cpu.X86.HasRDTSCP)
|
||||||
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
|
|
||||||
|
|
||||||
offsetARMHasIDIVA = unsafe.Offsetof(cpu.ARM.HasIDIVA)
|
offsetARMHasIDIVA = unsafe.Offsetof(cpu.ARM.HasIDIVA)
|
||||||
|
|
||||||
|
@ -30,8 +30,9 @@ tail:
|
|||||||
JBE _5through8
|
JBE _5through8
|
||||||
CMPL BX, $16
|
CMPL BX, $16
|
||||||
JBE _9through16
|
JBE _9through16
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
|
#ifdef GO386_softfloat
|
||||||
JNE nosse2
|
JMP nosse2
|
||||||
|
#endif
|
||||||
PXOR X0, X0
|
PXOR X0, X0
|
||||||
CMPL BX, $32
|
CMPL BX, $32
|
||||||
JBE _17through32
|
JBE _17through32
|
||||||
|
@ -55,8 +55,9 @@ tail:
|
|||||||
JBE move_5through8
|
JBE move_5through8
|
||||||
CMPL BX, $16
|
CMPL BX, $16
|
||||||
JBE move_9through16
|
JBE move_9through16
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
|
#ifdef GO386_softfloat
|
||||||
JNE nosse2
|
JMP nosse2
|
||||||
|
#endif
|
||||||
CMPL BX, $32
|
CMPL BX, $32
|
||||||
JBE move_17through32
|
JBE move_17through32
|
||||||
CMPL BX, $64
|
CMPL BX, $64
|
||||||
|
@ -200,6 +200,8 @@ func gen386() {
|
|||||||
l.add("MOVL", reg, 4)
|
l.add("MOVL", reg, 4)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
softfloat := "GO386_softfloat"
|
||||||
|
|
||||||
// Save SSE state only if supported.
|
// Save SSE state only if supported.
|
||||||
lSSE := layout{stack: l.stack, sp: "SP"}
|
lSSE := layout{stack: l.stack, sp: "SP"}
|
||||||
for i := 0; i < 8; i++ {
|
for i := 0; i < 8; i++ {
|
||||||
@ -209,13 +211,13 @@ func gen386() {
|
|||||||
p("ADJSP $%d", lSSE.stack)
|
p("ADJSP $%d", lSSE.stack)
|
||||||
p("NOP SP")
|
p("NOP SP")
|
||||||
l.save()
|
l.save()
|
||||||
p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse")
|
p("#ifndef %s", softfloat)
|
||||||
lSSE.save()
|
lSSE.save()
|
||||||
label("nosse:")
|
p("#endif")
|
||||||
p("CALL ·asyncPreempt2(SB)")
|
p("CALL ·asyncPreempt2(SB)")
|
||||||
p("CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2")
|
p("#ifndef %s", softfloat)
|
||||||
lSSE.restore()
|
lSSE.restore()
|
||||||
label("nosse2:")
|
p("#endif")
|
||||||
l.restore()
|
l.restore()
|
||||||
p("ADJSP $%d", -lSSE.stack)
|
p("ADJSP $%d", -lSSE.stack)
|
||||||
|
|
||||||
|
@ -14,8 +14,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
|||||||
MOVL BP, 16(SP)
|
MOVL BP, 16(SP)
|
||||||
MOVL SI, 20(SP)
|
MOVL SI, 20(SP)
|
||||||
MOVL DI, 24(SP)
|
MOVL DI, 24(SP)
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
|
#ifndef GO386_softfloat
|
||||||
JNE nosse
|
|
||||||
MOVUPS X0, 28(SP)
|
MOVUPS X0, 28(SP)
|
||||||
MOVUPS X1, 44(SP)
|
MOVUPS X1, 44(SP)
|
||||||
MOVUPS X2, 60(SP)
|
MOVUPS X2, 60(SP)
|
||||||
@ -24,10 +23,9 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
|||||||
MOVUPS X5, 108(SP)
|
MOVUPS X5, 108(SP)
|
||||||
MOVUPS X6, 124(SP)
|
MOVUPS X6, 124(SP)
|
||||||
MOVUPS X7, 140(SP)
|
MOVUPS X7, 140(SP)
|
||||||
nosse:
|
#endif
|
||||||
CALL ·asyncPreempt2(SB)
|
CALL ·asyncPreempt2(SB)
|
||||||
CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
|
#ifndef GO386_softfloat
|
||||||
JNE nosse2
|
|
||||||
MOVUPS 140(SP), X7
|
MOVUPS 140(SP), X7
|
||||||
MOVUPS 124(SP), X6
|
MOVUPS 124(SP), X6
|
||||||
MOVUPS 108(SP), X5
|
MOVUPS 108(SP), X5
|
||||||
@ -36,7 +34,7 @@ nosse:
|
|||||||
MOVUPS 60(SP), X2
|
MOVUPS 60(SP), X2
|
||||||
MOVUPS 44(SP), X1
|
MOVUPS 44(SP), X1
|
||||||
MOVUPS 28(SP), X0
|
MOVUPS 28(SP), X0
|
||||||
nosse2:
|
#endif
|
||||||
MOVL 24(SP), DI
|
MOVL 24(SP), DI
|
||||||
MOVL 20(SP), SI
|
MOVL 20(SP), SI
|
||||||
MOVL 16(SP), BP
|
MOVL 16(SP), BP
|
||||||
|
Loading…
Reference in New Issue
Block a user