mirror of
https://github.com/golang/go
synced 2024-11-17 22:24:47 -07:00
967465da29
This change adds rules to find pairs of instructions that can be combined into a single shifts. These instruction sequences are common in array addressing within loops. Improvements can be seen in many crypto packages and the hash packages. These are based on the extended mnemonics found in the ISA sections C.8.1 and C.8.2. Some rules in PPC64.rules were moved because the ordering prevented some matching. The following results were generated on power9. hash/crc32: CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41% CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50% CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46% CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80% CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27% CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15% CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22% CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79% CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56% CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53% crypto/rc4: RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1) RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1) RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1) crypto/sha1: Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1) Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1) Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1) Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1) There are other improvements found in golang.org/x/crypto which are all in the range of 5-15%. Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c Reviewed-on: https://go-review.googlesource.com/c/go/+/252097 Trust: Lynn Boger <laboger@linux.vnet.ibm.com> Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
230 lines
5.6 KiB
Go
230 lines
5.6 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
// ------------------ //
|
|
// masked shifts //
|
|
// ------------------ //
|
|
|
|
func lshMask64x64(v int64, s uint64) int64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask64Ux64(v uint64, s uint64) uint64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rshMask64x64(v int64, s uint64) int64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-ORN",-"ISEL"
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func lshMask32x64(v int32, s uint64) int32 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ISEL",-"ORN"
|
|
// ppc64:"ISEL",-"ORN"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask32Ux64(v uint32, s uint64) uint32 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ISEL",-"ORN"
|
|
// ppc64:"ISEL",-"ORN"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rshMask32x64(v int32, s uint64) int32 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ISEL",-"ORN"
|
|
// ppc64:"ISEL",-"ORN"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func lshMask64x32(v int64, s uint32) int64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-"ORN"
|
|
// ppc64:"ANDCC",-"ORN"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask64Ux32(v uint64, s uint32) uint64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-"ORN"
|
|
// ppc64:"ANDCC",-"ORN"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rshMask64x32(v int64, s uint32) int64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func lshMask64x32Ext(v int64, s int32) int64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
|
return v << uint(s&63)
|
|
}
|
|
|
|
func rshMask64Ux32Ext(v uint64, s int32) uint64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
|
return v >> uint(s&63)
|
|
}
|
|
|
|
func rshMask64x32Ext(v int64, s int32) int64 {
|
|
// s390x:-".*AND",-".*MOVDGE"
|
|
// ppc64le:"ANDCC",-"ORN",-"ISEL"
|
|
// ppc64:"ANDCC",-"ORN",-"ISEL"
|
|
return v >> uint(s&63)
|
|
}
|
|
|
|
// --------------- //
|
|
// signed shifts //
|
|
// --------------- //
|
|
|
|
// We do want to generate a test + panicshift for these cases.
|
|
func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
|
|
// amd64:"TESTB"
|
|
_ = x << v8
|
|
// amd64:"TESTW"
|
|
_ = x << v16
|
|
// amd64:"TESTL"
|
|
_ = x << v32
|
|
// amd64:"TESTQ"
|
|
_ = x << v64
|
|
}
|
|
|
|
// We want to avoid generating a test + panicshift for these cases.
|
|
func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
|
|
// amd64:-"TESTB"
|
|
_ = x << (v8 & 7)
|
|
// amd64:-"TESTW"
|
|
_ = x << (v16 & 15)
|
|
// amd64:-"TESTL"
|
|
_ = x << (v32 & 31)
|
|
// amd64:-"TESTQ"
|
|
_ = x << (v64 & 63)
|
|
}
|
|
|
|
// ------------------ //
|
|
// bounded shifts //
|
|
// ------------------ //
|
|
|
|
func rshGuarded64(v int64, s uint) int64 {
|
|
if s < 64 {
|
|
// s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
|
|
return v >> s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func rshGuarded64U(v uint64, s uint) uint64 {
|
|
if s < 64 {
|
|
// s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
|
|
return v >> s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func lshGuarded64(v int64, s uint) int64 {
|
|
if s < 64 {
|
|
// s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
|
|
return v << s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
|
|
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f := tab[byte(v)^b]
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[byte(v)&b]
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[byte(v)|b]
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)&h]
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)^h]
|
|
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)|h]
|
|
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
f += tab[v&0xff]
|
|
// ppc64le:-".*AND",".*CLRLSLWI"
|
|
// ppc64:-".*AND",".*CLRLSLWI"
|
|
f += 2*uint32(uint16(d))
|
|
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
g := 2*uint64(uint32(d))
|
|
return f, g
|
|
}
|
|
|
|
func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) {
|
|
|
|
// ppc64le:-"AND","CLRLSLWI"
|
|
// ppc64:-"AND","CLRLSLWI"
|
|
f := (v8 &0xF) << 2
|
|
// ppc64le:-"AND","CLRLSLWI"
|
|
// ppc64:-"AND","CLRLSLWI"
|
|
f += byte(v16)<<3
|
|
// ppc64le:-"AND","CLRLSLWI"
|
|
// ppc64:-"AND","CLRLSLWI"
|
|
g := (v16 & 0xFF) << 3
|
|
// ppc64le:-"AND","CLRLSLWI"
|
|
// ppc64:-"AND","CLRLSLWI"
|
|
h := (v32 & 0xFFFFF) << 2
|
|
// ppc64le:-"AND","CLRLSLWI"
|
|
// ppc64:-"AND","CLRLSLWI"
|
|
h += uint32(v64)<<4
|
|
// ppc64le:-"AND","CLRLSLDI"
|
|
// ppc64:-"AND","CLRLSLDI"
|
|
i := (v64 & 0xFFFFFFFF) << 5
|
|
return f, g, h, i
|
|
}
|
|
|
|
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
|
|
|
|
// ppc64le:-".*MOVW"
|
|
f := int32(v>>32)
|
|
// ppc64le:".*MOVW"
|
|
f += int32(v>>31)
|
|
// ppc64le:-".*MOVH"
|
|
g := int16(v>>48)
|
|
// ppc64le:".*MOVH"
|
|
g += int16(v>>30)
|
|
// ppc64le:-".*MOVH"
|
|
g += int16(f>>16)
|
|
// ppc64le:-".*MOVB"
|
|
h := int8(v>>56)
|
|
// ppc64le:".*MOVB"
|
|
h += int8(v>>28)
|
|
// ppc64le:-".*MOVB"
|
|
h += int8(f>>24)
|
|
// ppc64le:".*MOVB"
|
|
h += int8(f>>16)
|
|
return int64(h),uint64(g)
|
|
}
|