From f1ad4863aae1fd5cd5d0e3e4e6cb6bfae62951a6 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Mon, 17 Oct 2016 17:10:24 -0400 Subject: [PATCH] runtime: get s390x vector facility availability from AT_HWCAP This is a more robust method for obtaining the availability of vx. Since this variable may be checked frequently I've also now padded it so that it will be in its own cache line. I've kept the other check (in hash/crc32) the same for now until I can figure out the best way to update it. Updates #15403. Change-Id: I74eed651afc6f6a9c5fa3b88fa6a2b0c9ecf5875 Reviewed-on: https://go-review.googlesource.com/31149 Reviewed-by: Austin Clements --- src/runtime/asm_s390x.s | 57 ++++------------------------------ src/runtime/os_linux.go | 1 + src/runtime/os_linux_arm.go | 1 - src/runtime/os_linux_noauxv.go | 2 +- src/runtime/os_linux_s390x.go | 32 +++++++++++++++++++ 5 files changed, 40 insertions(+), 53 deletions(-) create mode 100644 src/runtime/os_linux_s390x.go diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s index 9ed4d91868..d8f529ef90 100644 --- a/src/runtime/asm_s390x.s +++ b/src/runtime/asm_s390x.s @@ -7,44 +7,6 @@ #include "funcdata.h" #include "textflag.h" -// Indicate the status of vector facility -// -1: init value -// 0: vector not installed -// 1: vector installed and enabled -// 2: vector installed but not enabled - -DATA runtime·vectorfacility+0x00(SB)/4, $-1 -GLOBL runtime·vectorfacility(SB), NOPTR, $4 - -TEXT runtime·checkvectorfacility(SB),NOSPLIT,$32-0 - MOVD $2, R0 - MOVD R1, tmp-32(SP) - MOVD $x-24(SP), R1 - XC $24, 0(R1), 0(R1) -// STFLE 0(R1) - WORD $0xB2B01000 - MOVBZ z-8(SP), R1 - AND $0x40, R1 - BNE vectorinstalled - MOVB $0, runtime·vectorfacility(SB) //Vector not installed - MOVD tmp-32(SP), R1 - MOVD $0, R0 - RET -vectorinstalled: - // check if the vector instruction has been enabled - VLEIB $0, $0xF, V16 - VLGVB $0, V16, R0 - CMPBEQ R0, $0xF, vectorenabled - MOVB $2, runtime·vectorfacility(SB) //Vector installed but not enabled - MOVD tmp-32(SP), R1 - MOVD $0, R0 - RET -vectorenabled: - MOVB $1, runtime·vectorfacility(SB) //Vector installed and enabled - MOVD tmp-32(SP), R1 - MOVD $0, R0 - RET - TEXT runtime·rt0_go(SB),NOSPLIT,$0 // R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer // C TLS base pointer in AR0:AR1 @@ -888,14 +850,14 @@ TEXT runtime·fastrand(SB), NOSPLIT, $0-4 MOVW R3, ret+0(FP) RET -TEXT bytes·IndexByte(SB),NOSPLIT,$0-40 +TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 MOVD s+0(FP), R3 // s => R3 MOVD s_len+8(FP), R4 // s_len => R4 MOVBZ c+24(FP), R5 // c => R5 MOVD $ret+32(FP), R2 // &ret => R9 BR runtime·indexbytebody(SB) -TEXT strings·IndexByte(SB),NOSPLIT,$0-32 +TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32 MOVD s+0(FP), R3 // s => R3 MOVD s_len+8(FP), R4 // s_len => R4 MOVBZ c+16(FP), R5 // c => R5 @@ -907,7 +869,7 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0-32 // R4: s_len // R5: c -- byte sought // R2: &ret -- address to put index into -TEXT runtime·indexbytebody(SB),NOSPLIT,$0 +TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0 CMPBEQ R4, $0, notfound MOVD R3, R6 // store base for later ADD R3, R4, R8 // the address after the end of the string @@ -931,12 +893,10 @@ notfound: RET large: - MOVB runtime·vectorfacility(SB), R1 - CMPBEQ R1, $-1, checkvector // vectorfacility = -1, vector not checked yet -vectorchecked: - CMPBEQ R1, $1, vectorimpl // vectorfacility = 1, vector supported + MOVBZ ·cpu+facilities_hasVX(SB), R1 + CMPBNE R1, $0, vectorimpl -srstimpl: // vectorfacility != 1, not support or enable vector +srstimpl: // no vector facility MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 srstloop: WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8)) @@ -989,11 +949,6 @@ notalignedloop: CMPBNE R7, R5, notalignedloop BR found -checkvector: - CALL runtime·checkvectorfacility(SB) - MOVB runtime·vectorfacility(SB), R1 - BR vectorchecked - TEXT runtime·return0(SB), NOSPLIT, $0 MOVW $0, R3 RET diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go index 1adabe1a42..4fae7aafcb 100644 --- a/src/runtime/os_linux.go +++ b/src/runtime/os_linux.go @@ -182,6 +182,7 @@ var failthreadcreate = []byte("runtime: failed to create new OS thread\n") const ( _AT_NULL = 0 // End of vector _AT_PAGESZ = 6 // System physical page size + _AT_HWCAP = 16 // hardware capability bit vector _AT_RANDOM = 25 // introduced in 2.6.29 ) diff --git a/src/runtime/os_linux_arm.go b/src/runtime/os_linux_arm.go index d7d734bc7f..2b0834a5aa 100644 --- a/src/runtime/os_linux_arm.go +++ b/src/runtime/os_linux_arm.go @@ -8,7 +8,6 @@ import "unsafe" const ( _AT_PLATFORM = 15 // introduced in at least 2.6.11 - _AT_HWCAP = 16 // introduced in at least 2.6.11 _HWCAP_VFP = 1 << 6 // introduced in at least 2.6.11 _HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30 diff --git a/src/runtime/os_linux_noauxv.go b/src/runtime/os_linux_noauxv.go index 22522dd803..8f26589e0b 100644 --- a/src/runtime/os_linux_noauxv.go +++ b/src/runtime/os_linux_noauxv.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build !amd64,!arm,!arm64,!mips64,!mips64le +// +build !amd64,!arm,!arm64,!mips64,!mips64le,!s390x package runtime diff --git a/src/runtime/os_linux_s390x.go b/src/runtime/os_linux_s390x.go new file mode 100644 index 0000000000..3ca6d4c8c8 --- /dev/null +++ b/src/runtime/os_linux_s390x.go @@ -0,0 +1,32 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "runtime/internal/sys" +) + +const ( + // bit masks taken from bits/hwcap.h + _HWCAP_S390_VX = 2048 // vector facility +) + +// facilities is padded to avoid false sharing. +type facilities struct { + _ [sys.CacheLineSize]byte + hasVX bool // vector facility + _ [sys.CacheLineSize]byte +} + +// cpu indicates the availability of s390x facilities that can be used in +// Go assembly but are optional on models supported by Go. +var cpu facilities + +func archauxv(tag, val uintptr) { + switch tag { + case _AT_HWCAP: // CPU capability bit flags + cpu.hasVX = val&_HWCAP_S390_VX != 0 + } +}