mirror of
https://github.com/golang/go
synced 2024-11-05 17:36:15 -07:00
runtime: simplify detection of preference to use AVX memmove
Reduces cmd/go by 4464 bytes on amd64. Removes the duplicate detection of AVX support and presence of Intel processors. Change-Id: I4670189951a63760fae217708f68d65e94a30dc5 Reviewed-on: https://go-review.googlesource.com/41570 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
16271b8b52
commit
b64e817853
@ -67,14 +67,16 @@ has_cpuid:
|
||||
JNE notintel
|
||||
CMPL CX, $0x6C65746E // "ntel"
|
||||
JNE notintel
|
||||
MOVB $1, runtime·isIntel(SB)
|
||||
MOVB $1, runtime·lfenceBeforeRdtsc(SB)
|
||||
notintel:
|
||||
|
||||
// Load EAX=1 cpuid flags
|
||||
MOVL $1, AX
|
||||
CPUID
|
||||
MOVL CX, AX // Move to global variable clobbers CX when generating PIC
|
||||
MOVL AX, runtime·cpuid_ecx(SB)
|
||||
MOVL CX, DI // Move to global variable clobbers CX when generating PIC
|
||||
MOVL AX, runtime·cpuid_eax(SB)
|
||||
MOVL DI, runtime·cpuid_ecx(SB)
|
||||
MOVL DX, runtime·cpuid_edx(SB)
|
||||
|
||||
// Check for MMX support
|
||||
|
@ -41,12 +41,14 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
|
||||
JNE notintel
|
||||
CMPL CX, $0x6C65746E // "ntel"
|
||||
JNE notintel
|
||||
MOVB $1, runtime·isIntel(SB)
|
||||
MOVB $1, runtime·lfenceBeforeRdtsc(SB)
|
||||
notintel:
|
||||
|
||||
// Load EAX=1 cpuid flags
|
||||
MOVQ $1, AX
|
||||
CPUID
|
||||
MOVL AX, runtime·cpuid_eax(SB)
|
||||
MOVL CX, runtime·cpuid_ecx(SB)
|
||||
MOVL DX, runtime·cpuid_edx(SB)
|
||||
|
||||
|
@ -32,8 +32,19 @@ TEXT runtime·rt0_go(SB),NOSPLIT,$0
|
||||
CPUID
|
||||
CMPQ AX, $0
|
||||
JE nocpuinfo
|
||||
|
||||
CMPL BX, $0x756E6547 // "Genu"
|
||||
JNE notintel
|
||||
CMPL DX, $0x49656E69 // "ineI"
|
||||
JNE notintel
|
||||
CMPL CX, $0x6C65746E // "ntel"
|
||||
JNE notintel
|
||||
MOVB $1, runtime·isIntel(SB)
|
||||
notintel:
|
||||
|
||||
MOVQ $1, AX
|
||||
CPUID
|
||||
MOVL AX, runtime·cpuid_eax(SB)
|
||||
MOVL CX, runtime·cpuid_ecx(SB)
|
||||
MOVL DX, runtime·cpuid_edx(SB)
|
||||
nocpuinfo:
|
||||
|
@ -4,72 +4,17 @@
|
||||
|
||||
package runtime
|
||||
|
||||
var vendorStringBytes [12]byte
|
||||
var maxInputValue uint32
|
||||
var featureFlags uint32
|
||||
var processorVersionInfo uint32
|
||||
|
||||
var useRepMovs = true
|
||||
|
||||
func hasFeature(feature uint32) bool {
|
||||
return (featureFlags & feature) != 0
|
||||
}
|
||||
|
||||
func cpuid_low(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32) // implemented in cpuidlow_amd64.s
|
||||
func xgetbv_low(arg1 uint32) (eax, edx uint32) // implemented in cpuidlow_amd64.s
|
||||
var useAVXmemmove bool
|
||||
|
||||
func init() {
|
||||
const cfOSXSAVE uint32 = 1 << 27
|
||||
const cfAVX uint32 = 1 << 28
|
||||
|
||||
leaf0()
|
||||
leaf1()
|
||||
|
||||
enabledAVX := false
|
||||
// Let's check if OS has set CR4.OSXSAVE[bit 18]
|
||||
// to enable XGETBV instruction.
|
||||
if hasFeature(cfOSXSAVE) {
|
||||
eax, _ := xgetbv_low(0)
|
||||
// Let's check that XCR0[2:1] = ‘11b’
|
||||
// i.e. XMM state and YMM state are enabled by OS.
|
||||
enabledAVX = (eax & 0x6) == 0x6
|
||||
}
|
||||
|
||||
isIntelBridgeFamily := (processorVersionInfo == 0x206A0 ||
|
||||
processorVersionInfo == 0x206D0 ||
|
||||
processorVersionInfo == 0x306A0 ||
|
||||
processorVersionInfo == 0x306E0) &&
|
||||
isIntel()
|
||||
|
||||
useRepMovs = !(hasFeature(cfAVX) && enabledAVX) || isIntelBridgeFamily
|
||||
}
|
||||
|
||||
func leaf0() {
|
||||
eax, ebx, ecx, edx := cpuid_low(0, 0)
|
||||
maxInputValue = eax
|
||||
int32ToBytes(ebx, vendorStringBytes[0:4])
|
||||
int32ToBytes(edx, vendorStringBytes[4:8])
|
||||
int32ToBytes(ecx, vendorStringBytes[8:12])
|
||||
}
|
||||
|
||||
func leaf1() {
|
||||
if maxInputValue < 1 {
|
||||
return
|
||||
}
|
||||
eax, _, ecx, _ := cpuid_low(1, 0)
|
||||
// Let's remove stepping and reserved fields
|
||||
processorVersionInfo = eax & 0x0FFF3FF0
|
||||
featureFlags = ecx
|
||||
}
|
||||
processorVersionInfo := cpuid_eax & 0x0FFF3FF0
|
||||
|
||||
func int32ToBytes(arg uint32, buffer []byte) {
|
||||
buffer[3] = byte(arg >> 24)
|
||||
buffer[2] = byte(arg >> 16)
|
||||
buffer[1] = byte(arg >> 8)
|
||||
buffer[0] = byte(arg)
|
||||
}
|
||||
isIntelBridgeFamily := isIntel &&
|
||||
(processorVersionInfo == 0x206A0 ||
|
||||
processorVersionInfo == 0x206D0 ||
|
||||
processorVersionInfo == 0x306A0 ||
|
||||
processorVersionInfo == 0x306E0)
|
||||
|
||||
func isIntel() bool {
|
||||
intelSignature := [12]byte{'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l'}
|
||||
return vendorStringBytes == intelSignature
|
||||
useAVXmemmove = support_avx && !isIntelBridgeFamily
|
||||
}
|
||||
|
@ -1,22 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// func cpuid_low(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·cpuid_low(SB), 4, $0-24
|
||||
MOVL arg1+0(FP), AX
|
||||
MOVL arg2+4(FP), CX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
// func xgetbv_low(arg1 uint32) (eax, edx uint32)
|
||||
TEXT ·xgetbv_low(SB), 4, $0-16
|
||||
MOVL arg1+0(FP), CX
|
||||
// XGETBV
|
||||
BYTE $0x0F; BYTE $0x01; BYTE $0xD0
|
||||
MOVL AX,eax+8(FP)
|
||||
MOVL DX,edx+12(FP)
|
||||
RET
|
@ -64,8 +64,8 @@ tail:
|
||||
JBE move_129through256
|
||||
// TODO: use branch table and BSR to make this just a single dispatch
|
||||
|
||||
TESTB $1, runtime·useRepMovs(SB)
|
||||
JZ avxUnaligned
|
||||
TESTB $1, runtime·useAVXmemmove(SB)
|
||||
JNZ avxUnaligned
|
||||
|
||||
/*
|
||||
* check and set for backwards
|
||||
|
@ -727,10 +727,12 @@ var (
|
||||
newprocs int32
|
||||
|
||||
// Information about what cpu features are available.
|
||||
// Set on startup in asm_{x86,amd64}.s.
|
||||
// Set on startup in asm_{386,amd64,amd64p32}.s.
|
||||
cpuid_eax uint32
|
||||
cpuid_ecx uint32
|
||||
cpuid_edx uint32
|
||||
cpuid_ebx7 uint32
|
||||
cpuid_ebx7 uint32 // not set on amd64p32
|
||||
isIntel bool
|
||||
lfenceBeforeRdtsc bool
|
||||
support_avx bool
|
||||
support_avx2 bool
|
||||
|
Loading…
Reference in New Issue
Block a user