From 7289607b1bee260912311b4eb407d9764d9a3687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20M=C3=B6hrmann?= Date: Sun, 4 Feb 2018 20:39:39 +0100 Subject: [PATCH] internal/cpu: align capability definitions for x86 with other architectures Use constant masks and align the definition of isSet with arm64 and ppc64x. Change-Id: I0c6eae30da5e3ce797cde0dab4a39855d4d245d9 Reviewed-on: https://go-review.googlesource.com/94759 Reviewed-by: Brad Fitzpatrick --- src/internal/cpu/cpu_x86.go | 62 +++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/internal/cpu/cpu_x86.go b/src/internal/cpu/cpu_x86.go index 239e728900..17be6eed26 100644 --- a/src/internal/cpu/cpu_x86.go +++ b/src/internal/cpu/cpu_x86.go @@ -14,6 +14,30 @@ func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) // xgetbv with ecx = 0 is implemented in cpu_x86.s. func xgetbv() (eax, edx uint32) +const ( + // edx bits + cpuid_SSE2 = 1 << 26 + + // ecx bits + cpuid_SSE3 = 1 << 0 + cpuid_PCLMULQDQ = 1 << 1 + cpuid_SSSE3 = 1 << 9 + cpuid_FMA = 1 << 12 + cpuid_SSE41 = 1 << 19 + cpuid_SSE42 = 1 << 20 + cpuid_POPCNT = 1 << 23 + cpuid_AES = 1 << 25 + cpuid_OSXSAVE = 1 << 27 + cpuid_AVX = 1 << 28 + + // ebx bits + cpuid_BMI1 = 1 << 3 + cpuid_AVX2 = 1 << 5 + cpuid_BMI2 = 1 << 8 + cpuid_ERMS = 1 << 9 + cpuid_ADX = 1 << 19 +) + func doinit() { maxID, _, _, _ := cpuid(0, 0) @@ -22,40 +46,40 @@ func doinit() { } _, _, ecx1, edx1 := cpuid(1, 0) - X86.HasSSE2 = isSet(26, edx1) + X86.HasSSE2 = isSet(edx1, cpuid_SSE2) - X86.HasSSE3 = isSet(0, ecx1) - X86.HasPCLMULQDQ = isSet(1, ecx1) - X86.HasSSSE3 = isSet(9, ecx1) - X86.HasFMA = isSet(12, ecx1) - X86.HasSSE41 = isSet(19, ecx1) - X86.HasSSE42 = isSet(20, ecx1) - X86.HasPOPCNT = isSet(23, ecx1) - X86.HasAES = isSet(25, ecx1) - X86.HasOSXSAVE = isSet(27, ecx1) + X86.HasSSE3 = isSet(ecx1, cpuid_SSE3) + X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ) + X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3) + X86.HasFMA = isSet(ecx1, cpuid_FMA) + X86.HasSSE41 = isSet(ecx1, cpuid_SSE41) + X86.HasSSE42 = isSet(ecx1, cpuid_SSE42) + X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT) + X86.HasAES = isSet(ecx1, cpuid_AES) + X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE) osSupportsAVX := false // For XGETBV, OSXSAVE bit is required and sufficient. if X86.HasOSXSAVE { eax, _ := xgetbv() // Check if XMM and YMM registers have OS support. - osSupportsAVX = isSet(1, eax) && isSet(2, eax) + osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2) } - X86.HasAVX = isSet(28, ecx1) && osSupportsAVX + X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX if maxID < 7 { return } _, ebx7, _, _ := cpuid(7, 0) - X86.HasBMI1 = isSet(3, ebx7) - X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX - X86.HasBMI2 = isSet(8, ebx7) - X86.HasERMS = isSet(9, ebx7) - X86.HasADX = isSet(19, ebx7) + X86.HasBMI1 = isSet(ebx7, cpuid_BMI1) + X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX + X86.HasBMI2 = isSet(ebx7, cpuid_BMI2) + X86.HasERMS = isSet(ebx7, cpuid_ERMS) + X86.HasADX = isSet(ebx7, cpuid_ADX) } -func isSet(bitpos uint, value uint32) bool { - return value&(1<