1
0
mirror of https://github.com/golang/go synced 2024-11-17 01:04:50 -07:00

cmd/compile: use isel with variable shifts on ppc64x

This changes the code generated for variable length shift
counts to use isel instead of instructions that set and
read the carry flag.

This reduces the generated code for shifts like this
by 1 instruction and avoids the use of instructions to
set and read the carry flag.

This sequence can be found in strconv with these results
on power9:

Atof64Decimal                          71.6ns ± 0%  68.3ns ± 0%   -4.61%
Atof64Float                            95.3ns ± 0%  90.9ns ± 0%   -4.62%
Atof64FloatExp                          153ns ± 0%   149ns ± 0%   -2.61%
Atof64Big                               234ns ± 0%   232ns ± 0%   -0.85%
Atof64RandomBits                        348ns ± 0%   369ns ± 0%   +6.03%
Atof64RandomFloats                      262ns ± 0%   262ns ± 0%     ~
Atof32Decimal                          72.0ns ± 0%  68.2ns ± 0%   -5.28%
Atof32Float                            92.1ns ± 0%  87.1ns ± 0%   -5.43%
Atof32FloatExp                          159ns ± 0%   158ns ± 0%   -0.63%
Atof32Random                            194ns ± 0%   191ns ± 0%   -1.55%

Some tests in codegen/shift.go are enabled to verify the
expected instructions are generated.

Change-Id: I968715d10ada405a8c46132bf19b8ed9b85796d1
Reviewed-on: https://go-review.googlesource.com/c/go/+/227337
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
Lynn Boger 2020-04-01 10:30:05 -04:00
parent 7c0ee1127b
commit a1550d3ca3
3 changed files with 606 additions and 457 deletions

View File

@ -191,9 +191,9 @@
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) -> (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) -> (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Rsh64Ux64 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Lsh64x64 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Lsh64x64 x y) -> (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
(Rsh64x64 x y) -> (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
(Rsh64Ux64 x y) -> (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
(Lsh32x64 x (AND y (MOVDconst [31]))) -> (SLW x (ANDconst <typ.Int32> [31] y))
(Lsh32x64 x (ANDconst <typ.Int32> [31] y)) -> (SLW x (ANDconst <typ.Int32> [31] y))
@ -208,70 +208,71 @@
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) -> (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) -> (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x y) -> (SRAW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
(Rsh32Ux64 x y) -> (SRW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
(Lsh32x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
(Rsh32x64 x y) -> (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
(Rsh32Ux64 x y) -> (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
(Lsh32x64 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
(Rsh16x64 x y) -> (SRAW (SignExt16to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
(Rsh16Ux64 x y) -> (SRW (ZeroExt16to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
(Lsh16x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] y))))
(Rsh16x64 x y) -> (SRAW (SignExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
(Rsh16Ux64 x y) -> (SRW (ZeroExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
(Lsh16x64 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
(Rsh8x64 x y) -> (SRAW (SignExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
(Rsh8Ux64 x y) -> (SRW (ZeroExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
(Lsh8x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
(Rsh8x64 x y) -> (SRAW (SignExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
(Rsh8Ux64 x y) -> (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
(Lsh8x64 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
(Rsh64x32 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
(Rsh64Ux32 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
(Lsh64x32 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
(Rsh64x32 x y) -> (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
(Rsh64Ux32 x y) -> (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
(Lsh64x32 x y) -> (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
(Rsh32x32 x y) -> (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
(Rsh32Ux32 x y) -> (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
(Lsh32x32 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
(Rsh32x32 x y) -> (SRAW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
(Rsh32Ux32 x y) -> (SRW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
(Lsh32x32 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt32to64 y)))))
(Rsh16x32 x y) -> (SRAW (SignExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
(Rsh16Ux32 x y) -> (SRW (ZeroExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
(Lsh16x32 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
(Rsh16x32 x y) -> (SRAW (SignExt16to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
(Rsh16Ux32 x y) -> (SRW (ZeroExt16to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
(Lsh16x32 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt32to64 y)))))
(Rsh8x32 x y) -> (SRAW (SignExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
(Rsh8Ux32 x y) -> (SRW (ZeroExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
(Lsh8x32 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt32to64 y)))))
(Rsh8x32 x y) -> (SRAW (SignExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
(Rsh8Ux32 x y) -> (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
(Lsh8x32 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
(Rsh64x16 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
(Rsh64Ux16 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
(Lsh64x16 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt16to64 y)))))
(Rsh64x16 x y) -> (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [64]))))
(Rsh64Ux16 x y) -> (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [64]))))
(Lsh64x16 x y) -> (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [64]))))
(Rsh32x16 x y) -> (SRAW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
(Rsh32Ux16 x y) -> (SRW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
(Lsh32x16 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt16to64 y)))))
(Rsh32x16 x y) -> (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [32]))))
(Rsh32Ux16 x y) -> (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [32]))))
(Lsh32x16 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [32]))))
(Rsh16x16 x y) -> (SRAW (SignExt16to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
(Rsh16Ux16 x y) -> (SRW (ZeroExt16to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
(Lsh16x16 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt16to64 y)))))
(Rsh16x16 x y) -> (SRAW (SignExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16]))))
(Rsh16Ux16 x y) -> (SRW (ZeroExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16]))))
(Lsh16x16 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16]))))
(Rsh8x16 x y) -> (SRAW (SignExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
(Rsh8Ux16 x y) -> (SRW (ZeroExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
(Lsh8x16 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt16to64 y)))))
(Rsh8x16 x y) -> (SRAW (SignExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8]))))
(Rsh8Ux16 x y) -> (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8]))))
(Lsh8x16 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8]))))
(Rsh64x8 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
(Rsh64Ux8 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
(Lsh64x8 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt8to64 y)))))
(Rsh64x8 x y) -> (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [64]))))
(Rsh64Ux8 x y) -> (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [64]))))
(Lsh64x8 x y) -> (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [64]))))
(Rsh32x8 x y) -> (SRAW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
(Rsh32Ux8 x y) -> (SRW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
(Lsh32x8 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] (ZeroExt8to64 y)))))
(Rsh32x8 x y) -> (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [32]))))
(Rsh32Ux8 x y) -> (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [32]))))
(Lsh32x8 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [32]))))
(Rsh16x8 x y) -> (SRAW (SignExt16to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
(Rsh16Ux8 x y) -> (SRW (ZeroExt16to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
(Lsh16x8 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-16] (ZeroExt8to64 y)))))
(Rsh16x8 x y) -> (SRAW (SignExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16]))))
(Rsh16Ux8 x y) -> (SRW (ZeroExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16]))))
(Lsh16x8 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16]))))
(Rsh8x8 x y) -> (SRAW (SignExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
(Rsh8Ux8 x y) -> (SRW (ZeroExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
(Lsh8x8 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] (ZeroExt8to64 y)))))
(Rsh8x8 x y) -> (SRAW (SignExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
(Rsh8Ux8 x y) -> (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
(Lsh8x8 x y) -> (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
// Cleaning up shift ops when input is masked
(MaskIfNotCarry (ADDconstForCarry [c] (ANDconst [d] _))) && c < 0 && d > 0 && c + d < 0 -> (MOVDconst [-1])
(ISEL [0] (ANDconst [d] y) (MOVDconst [-1]) (CMPU (ANDconst [d] y) (MOVDconst [c]))) && c >= d -> (ANDconst [d] y)
(ISEL [0] (ANDconst [d] y) (MOVDconst [-1]) (CMPUconst [c] (ANDconst [d] y))) && c >= d -> (ANDconst [d] y)
(ORN x (MOVDconst [-1])) -> x
(ADDconstForCarry [c] (MOVDconst [d])) && int64(int16(c)) < 0 && (int64(int16(c)) < 0 || int64(int16(c)) + d >= 0) -> (FlagCarryClear)

File diff suppressed because it is too large Load Diff

View File

@ -12,61 +12,85 @@ package codegen
func lshMask64x64(v int64, s uint64) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v << (s & 63)
}
func rshMask64Ux64(v uint64, s uint64) uint64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func rshMask64x64(v int64, s uint64) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func lshMask32x64(v int32, s uint64) int32 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v << (s & 63)
}
func rshMask32Ux64(v uint32, s uint64) uint32 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v >> (s & 63)
}
func rshMask32x64(v int32, s uint64) int32 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v >> (s & 63)
}
func lshMask64x32(v int64, s uint32) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN"
// ppc64:"ANDCC",-"ORN"
return v << (s & 63)
}
func rshMask64Ux32(v uint64, s uint32) uint64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN"
// ppc64:"ANDCC",-"ORN"
return v >> (s & 63)
}
func rshMask64x32(v int64, s uint32) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func lshMask64x32Ext(v int64, s int32) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v << uint(s&63)
}
func rshMask64Ux32Ext(v uint64, s int32) uint64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> uint(s&63)
}
func rshMask64x32Ext(v int64, s int32) int64 {
// s390x:-".*AND",-".*MOVDGE"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> uint(s&63)
}