1
0
mirror of https://github.com/golang/go synced 2024-11-17 02:14:42 -07:00
go/test/codegen/shift.go
Lynn Boger 967465da29 cmd/compile: use combined shifts to improve array addressing on ppc64x
This change adds rules to find pairs of instructions that can
be combined into a single shifts. These instruction sequences
are common in array addressing within loops. Improvements can
be seen in many crypto packages and the hash packages.

These are based on the extended mnemonics found in the ISA
sections C.8.1 and C.8.2.

Some rules in PPC64.rules were moved because the ordering prevented
some matching.

The following results were generated on power9.

hash/crc32:
    CRC32/poly=Koopman/size=40/align=0          195ns ± 0%     163ns ± 0%  -16.41%
    CRC32/poly=Koopman/size=40/align=1          200ns ± 0%     163ns ± 0%  -18.50%
    CRC32/poly=Koopman/size=512/align=0        1.98µs ± 0%    1.67µs ± 0%  -15.46%
    CRC32/poly=Koopman/size=512/align=1        1.98µs ± 0%    1.69µs ± 0%  -14.80%
    CRC32/poly=Koopman/size=1kB/align=0        3.90µs ± 0%    3.31µs ± 0%  -15.27%
    CRC32/poly=Koopman/size=1kB/align=1        3.85µs ± 0%    3.31µs ± 0%  -14.15%
    CRC32/poly=Koopman/size=4kB/align=0        15.3µs ± 0%    13.1µs ± 0%  -14.22%
    CRC32/poly=Koopman/size=4kB/align=1        15.4µs ± 0%    13.1µs ± 0%  -14.79%
    CRC32/poly=Koopman/size=32kB/align=0        137µs ± 0%     105µs ± 0%  -23.56%
    CRC32/poly=Koopman/size=32kB/align=1        137µs ± 0%     105µs ± 0%  -23.53%

crypto/rc4:
    RC4_128    733ns ± 0%    650ns ± 0%  -11.32%  (p=1.000 n=1+1)
    RC4_1K    5.80µs ± 0%   5.17µs ± 0%  -10.89%  (p=1.000 n=1+1)
    RC4_8K    45.7µs ± 0%   40.8µs ± 0%  -10.73%  (p=1.000 n=1+1)

crypto/sha1:
    Hash8Bytes       635ns ± 0%     613ns ± 0%   -3.46%  (p=1.000 n=1+1)
    Hash320Bytes    2.30µs ± 0%    2.18µs ± 0%   -5.38%  (p=1.000 n=1+1)
    Hash1K          5.88µs ± 0%    5.38µs ± 0%   -8.62%  (p=1.000 n=1+1)
    Hash8K          42.0µs ± 0%    37.9µs ± 0%   -9.75%  (p=1.000 n=1+1)

There are other improvements found in golang.org/x/crypto which are all in the
range of 5-15%.

Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c
Reviewed-on: https://go-review.googlesource.com/c/go/+/252097
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
2020-09-17 12:37:40 +00:00

230 lines
5.6 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
// ------------------ //
// masked shifts //
// ------------------ //
func lshMask64x64(v int64, s uint64) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v << (s & 63)
}
func rshMask64Ux64(v uint64, s uint64) uint64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func rshMask64x64(v int64, s uint64) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func lshMask32x64(v int32, s uint64) int32 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v << (s & 63)
}
func rshMask32Ux64(v uint32, s uint64) uint32 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v >> (s & 63)
}
func rshMask32x64(v int32, s uint64) int32 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v >> (s & 63)
}
func lshMask64x32(v int64, s uint32) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN"
// ppc64:"ANDCC",-"ORN"
return v << (s & 63)
}
func rshMask64Ux32(v uint64, s uint32) uint64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN"
// ppc64:"ANDCC",-"ORN"
return v >> (s & 63)
}
func rshMask64x32(v int64, s uint32) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func lshMask64x32Ext(v int64, s int32) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v << uint(s&63)
}
func rshMask64Ux32Ext(v uint64, s int32) uint64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> uint(s&63)
}
func rshMask64x32Ext(v int64, s int32) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> uint(s&63)
}
// --------------- //
// signed shifts //
// --------------- //
// We do want to generate a test + panicshift for these cases.
func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
// amd64:"TESTB"
_ = x << v8
// amd64:"TESTW"
_ = x << v16
// amd64:"TESTL"
_ = x << v32
// amd64:"TESTQ"
_ = x << v64
}
// We want to avoid generating a test + panicshift for these cases.
func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
// amd64:-"TESTB"
_ = x << (v8 & 7)
// amd64:-"TESTW"
_ = x << (v16 & 15)
// amd64:-"TESTL"
_ = x << (v32 & 31)
// amd64:-"TESTQ"
_ = x << (v64 & 63)
}
// ------------------ //
// bounded shifts //
// ------------------ //
func rshGuarded64(v int64, s uint) int64 {
if s < 64 {
// s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
return v >> s
}
panic("shift too large")
}
func rshGuarded64U(v uint64, s uint) uint64 {
if s < 64 {
// s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
return v >> s
}
panic("shift too large")
}
func lshGuarded64(v int64, s uint) int64 {
if s < 64 {
// s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
return v << s
}
panic("shift too large")
}
func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f := tab[byte(v)^b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[byte(v)&b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[byte(v)|b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)&h]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)^h]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)|h]
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
f += tab[v&0xff]
// ppc64le:-".*AND",".*CLRLSLWI"
// ppc64:-".*AND",".*CLRLSLWI"
f += 2*uint32(uint16(d))
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
g := 2*uint64(uint32(d))
return f, g
}
func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) {
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
f := (v8 &0xF) << 2
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
f += byte(v16)<<3
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
g := (v16 & 0xFF) << 3
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
h := (v32 & 0xFFFFF) << 2
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
h += uint32(v64)<<4
// ppc64le:-"AND","CLRLSLDI"
// ppc64:-"AND","CLRLSLDI"
i := (v64 & 0xFFFFFFFF) << 5
return f, g, h, i
}
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
// ppc64le:-".*MOVW"
f := int32(v>>32)
// ppc64le:".*MOVW"
f += int32(v>>31)
// ppc64le:-".*MOVH"
g := int16(v>>48)
// ppc64le:".*MOVH"
g += int16(v>>30)
// ppc64le:-".*MOVH"
g += int16(f>>16)
// ppc64le:-".*MOVB"
h := int8(v>>56)
// ppc64le:".*MOVB"
h += int8(v>>28)
// ppc64le:-".*MOVB"
h += int8(f>>24)
// ppc64le:".*MOVB"
h += int8(f>>16)
return int64(h),uint64(g)
}