mirror of
https://github.com/golang/go
synced 2024-11-20 06:54:42 -07:00
cmd/compile, math/bits: add rotate rules to PPC64.rules
This adds rules to match the code in math/bits RotateLeft, RotateLeft32, and RotateLef64 to allow them to be inlined. The rules are complicated because the code in these function use different types, and the non-const version of these shifts generate Mask and Carry instructions that become subexpressions during the match process. Also adds a testcase to asm_test.go. Improvement in math/bits: BenchmarkRotateLeft-16 1.57 1.32 -15.92% BenchmarkRotateLeft32-16 1.60 1.37 -14.37% BenchmarkRotateLeft64-16 1.57 1.32 -15.92% Updates #21390 Change-Id: Ib6f17669ecc9cab54f18d690be27e2225ca654a4 Reviewed-on: https://go-review.googlesource.com/59932 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
4768408e5f
commit
fa3fe2e3c6
@ -265,7 +265,7 @@ var allAsmTests = []*asmTests{
|
||||
{
|
||||
arch: "ppc64le",
|
||||
os: "linux",
|
||||
imports: []string{"math"},
|
||||
imports: []string{"math", "math/bits"},
|
||||
tests: linuxPPC64LETests,
|
||||
},
|
||||
{
|
||||
@ -2004,6 +2004,23 @@ var linuxPPC64LETests = []*asmTest{
|
||||
`,
|
||||
pos: []string{"\tROTL\t"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func f10(a uint32) uint32 {
|
||||
return bits.RotateLeft32(a, 9)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tROTLW\t"},
|
||||
},
|
||||
{
|
||||
fn: `
|
||||
func f11(a uint64) uint64 {
|
||||
return bits.RotateLeft64(a, 37)
|
||||
}
|
||||
`,
|
||||
pos: []string{"\tROTL\t"},
|
||||
},
|
||||
|
||||
{
|
||||
// check that stack store is optimized away
|
||||
fn: `
|
||||
|
@ -37,6 +37,7 @@ func initssaconfig() {
|
||||
Float32: types.Types[TFLOAT32],
|
||||
Float64: types.Types[TFLOAT64],
|
||||
Int: types.Types[TINT],
|
||||
UInt: types.Types[TUINT],
|
||||
Uintptr: types.Types[TUINTPTR],
|
||||
String: types.Types[TSTRING],
|
||||
BytePtr: types.NewPtr(types.Types[TUINT8]),
|
||||
|
@ -542,6 +542,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
|
||||
ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
|
||||
ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
|
||||
ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
|
||||
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
|
||||
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS,
|
||||
ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
|
||||
|
@ -59,6 +59,7 @@ type Types struct {
|
||||
Int *types.Type
|
||||
Float32 *types.Type
|
||||
Float64 *types.Type
|
||||
UInt *types.Type
|
||||
Uintptr *types.Type
|
||||
String *types.Type
|
||||
BytePtr *types.Type // TODO: use unsafe.Pointer instead?
|
||||
|
@ -88,7 +88,7 @@
|
||||
(ConstNil) -> (MOVDconst [0])
|
||||
(ConstBool [b]) -> (MOVDconst [b])
|
||||
|
||||
// Rotate generation
|
||||
// Rotate generation with const shift
|
||||
(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
|
||||
( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
|
||||
(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
|
||||
@ -97,6 +97,16 @@
|
||||
( OR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
|
||||
(XOR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
|
||||
|
||||
// Rotate generation with non-const shift
|
||||
// these match patterns from math/bits/RotateLeft[32|64], but there could be others
|
||||
(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
|
||||
( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
|
||||
(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
|
||||
|
||||
(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
|
||||
( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
|
||||
(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
|
||||
|
||||
(Lsh64x64 x (Const64 [c])) && uint64(c) < 64 -> (SLDconst x [c])
|
||||
(Rsh64x64 x (Const64 [c])) && uint64(c) < 64 -> (SRADconst x [c])
|
||||
(Rsh64Ux64 x (Const64 [c])) && uint64(c) < 64 -> (SRDconst x [c])
|
||||
@ -166,10 +176,38 @@
|
||||
(Rsh8x32 x (MOVDconst [c])) && uint32(c) < 8 -> (SRAWconst (SignExt8to32 x) [c])
|
||||
(Rsh8Ux32 x (MOVDconst [c])) && uint32(c) < 8 -> (SRWconst (ZeroExt8to32 x) [c])
|
||||
|
||||
// non-constant rotates
|
||||
// These are subexpressions found in statements that can become rotates
|
||||
// In these cases the shift count is known to be < 64 so the more complicated expressions
|
||||
// with Mask & Carry is not needed
|
||||
(Lsh64x64 x (AND y (MOVDconst [63]))) -> (SLD x (ANDconst <typ.Int64> [63] y))
|
||||
(Lsh64x64 x (ANDconst <typ.Int64> [63] y)) -> (SLD x (ANDconst <typ.Int64> [63] y))
|
||||
(Rsh64Ux64 x (AND y (MOVDconst [63]))) -> (SRD x (ANDconst <typ.Int64> [63] y))
|
||||
(Rsh64Ux64 x (ANDconst <typ.UInt> [63] y)) -> (SRD x (ANDconst <typ.UInt> [63] y))
|
||||
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) -> (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
|
||||
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) -> (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
|
||||
(Rsh64x64 x (AND y (MOVDconst [63]))) -> (SRAD x (ANDconst <typ.Int64> [63] y))
|
||||
(Rsh64x64 x (ANDconst <typ.UInt> [63] y)) -> (SRAD x (ANDconst <typ.UInt> [63] y))
|
||||
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) -> (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
|
||||
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) -> (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
|
||||
|
||||
(Rsh64x64 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
|
||||
(Rsh64Ux64 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
|
||||
(Lsh64x64 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
|
||||
|
||||
(Lsh32x64 x (AND y (MOVDconst [31]))) -> (SLW x (ANDconst <typ.Int32> [31] y))
|
||||
(Lsh32x64 x (ANDconst <typ.Int32> [31] y)) -> (SLW x (ANDconst <typ.Int32> [31] y))
|
||||
|
||||
(Rsh32Ux64 x (AND y (MOVDconst [31]))) -> (SRW x (ANDconst <typ.Int32> [31] y))
|
||||
(Rsh32Ux64 x (ANDconst <typ.UInt> [31] y)) -> (SRW x (ANDconst <typ.UInt> [31] y))
|
||||
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) -> (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
|
||||
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) -> (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
|
||||
|
||||
(Rsh32x64 x (AND y (MOVDconst [31]))) -> (SRAW x (ANDconst <typ.Int32> [31] y))
|
||||
(Rsh32x64 x (ANDconst <typ.UInt> [31] y)) -> (SRAW x (ANDconst <typ.UInt> [31] y))
|
||||
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) -> (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
|
||||
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) -> (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
|
||||
|
||||
(Rsh32x64 x y) -> (SRAW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
|
||||
(Rsh32Ux64 x y) -> (SRW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
|
||||
(Lsh32x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
|
||||
@ -182,7 +220,6 @@
|
||||
(Rsh8Ux64 x y) -> (SRW (ZeroExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
|
||||
(Lsh8x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
|
||||
|
||||
|
||||
(Rsh64x32 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
|
||||
(Rsh64Ux32 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
|
||||
(Lsh64x32 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
|
||||
|
@ -186,6 +186,9 @@ func init() {
|
||||
{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"}, // arg0 << arg1, 64 bits (0 if arg1 & 64 != 0)
|
||||
{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"}, // arg0 << arg1, 32 bits (0 if arg1 & 32 != 0)
|
||||
|
||||
{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64
|
||||
{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
|
||||
|
||||
{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
|
||||
{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"}, // carry - 1 (if carry then 0 else -1)
|
||||
|
||||
|
@ -1307,6 +1307,8 @@ const (
|
||||
OpPPC64SRW
|
||||
OpPPC64SLD
|
||||
OpPPC64SLW
|
||||
OpPPC64ROTL
|
||||
OpPPC64ROTLW
|
||||
OpPPC64ADDconstForCarry
|
||||
OpPPC64MaskIfNotCarry
|
||||
OpPPC64SRADconst
|
||||
@ -16732,6 +16734,34 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ROTL",
|
||||
argLen: 2,
|
||||
asm: ppc64.AROTL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ROTLW",
|
||||
argLen: 2,
|
||||
asm: ppc64.AROTLW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ADDconstForCarry",
|
||||
auxType: auxInt16,
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user