1
0
mirror of https://github.com/golang/go synced 2024-11-20 06:54:42 -07:00

cmd/compile, math/bits: add rotate rules to PPC64.rules

This adds rules to match the code in math/bits RotateLeft,
RotateLeft32, and RotateLef64 to allow them to be inlined.

The rules are complicated because the code in these function
use different types, and the non-const version of these
shifts generate Mask and Carry instructions that become
subexpressions during the match process.

Also adds a testcase to asm_test.go.

Improvement in math/bits:

BenchmarkRotateLeft-16       1.57     1.32      -15.92%
BenchmarkRotateLeft32-16     1.60     1.37      -14.37%
BenchmarkRotateLeft64-16     1.57     1.32      -15.92%

Updates #21390

Change-Id: Ib6f17669ecc9cab54f18d690be27e2225ca654a4
Reviewed-on: https://go-review.googlesource.com/59932
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Lynn Boger 2017-08-29 11:49:08 -04:00
parent 4768408e5f
commit fa3fe2e3c6
8 changed files with 1923 additions and 5 deletions

View File

@ -265,7 +265,7 @@ var allAsmTests = []*asmTests{
{
arch: "ppc64le",
os: "linux",
imports: []string{"math"},
imports: []string{"math", "math/bits"},
tests: linuxPPC64LETests,
},
{
@ -2004,6 +2004,23 @@ var linuxPPC64LETests = []*asmTest{
`,
pos: []string{"\tROTL\t"},
},
{
fn: `
func f10(a uint32) uint32 {
return bits.RotateLeft32(a, 9)
}
`,
pos: []string{"\tROTLW\t"},
},
{
fn: `
func f11(a uint64) uint64 {
return bits.RotateLeft64(a, 37)
}
`,
pos: []string{"\tROTL\t"},
},
{
// check that stack store is optimized away
fn: `

View File

@ -37,6 +37,7 @@ func initssaconfig() {
Float32: types.Types[TFLOAT32],
Float64: types.Types[TFLOAT64],
Int: types.Types[TINT],
UInt: types.Types[TUINT],
Uintptr: types.Types[TUINTPTR],
String: types.Types[TSTRING],
BytePtr: types.NewPtr(types.Types[TUINT8]),

View File

@ -542,6 +542,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpPPC64ADD, ssa.OpPPC64FADD, ssa.OpPPC64FADDS, ssa.OpPPC64SUB, ssa.OpPPC64FSUB, ssa.OpPPC64FSUBS,
ssa.OpPPC64MULLD, ssa.OpPPC64MULLW, ssa.OpPPC64DIVDU, ssa.OpPPC64DIVWU,
ssa.OpPPC64SRAD, ssa.OpPPC64SRAW, ssa.OpPPC64SRD, ssa.OpPPC64SRW, ssa.OpPPC64SLD, ssa.OpPPC64SLW,
ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS,
ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:

View File

@ -59,6 +59,7 @@ type Types struct {
Int *types.Type
Float32 *types.Type
Float64 *types.Type
UInt *types.Type
Uintptr *types.Type
String *types.Type
BytePtr *types.Type // TODO: use unsafe.Pointer instead?

View File

@ -88,7 +88,7 @@
(ConstNil) -> (MOVDconst [0])
(ConstBool [b]) -> (MOVDconst [b])
// Rotate generation
// Rotate generation with const shift
(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
@ -97,6 +97,16 @@
( OR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
(XOR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
// Rotate generation with non-const shift
// these match patterns from math/bits/RotateLeft[32|64], but there could be others
(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) -> (ROTL x y)
(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) -> (ROTLW x y)
(Lsh64x64 x (Const64 [c])) && uint64(c) < 64 -> (SLDconst x [c])
(Rsh64x64 x (Const64 [c])) && uint64(c) < 64 -> (SRADconst x [c])
(Rsh64Ux64 x (Const64 [c])) && uint64(c) < 64 -> (SRDconst x [c])
@ -166,10 +176,38 @@
(Rsh8x32 x (MOVDconst [c])) && uint32(c) < 8 -> (SRAWconst (SignExt8to32 x) [c])
(Rsh8Ux32 x (MOVDconst [c])) && uint32(c) < 8 -> (SRWconst (ZeroExt8to32 x) [c])
// non-constant rotates
// These are subexpressions found in statements that can become rotates
// In these cases the shift count is known to be < 64 so the more complicated expressions
// with Mask & Carry is not needed
(Lsh64x64 x (AND y (MOVDconst [63]))) -> (SLD x (ANDconst <typ.Int64> [63] y))
(Lsh64x64 x (ANDconst <typ.Int64> [63] y)) -> (SLD x (ANDconst <typ.Int64> [63] y))
(Rsh64Ux64 x (AND y (MOVDconst [63]))) -> (SRD x (ANDconst <typ.Int64> [63] y))
(Rsh64Ux64 x (ANDconst <typ.UInt> [63] y)) -> (SRD x (ANDconst <typ.UInt> [63] y))
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) -> (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) -> (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x (AND y (MOVDconst [63]))) -> (SRAD x (ANDconst <typ.Int64> [63] y))
(Rsh64x64 x (ANDconst <typ.UInt> [63] y)) -> (SRAD x (ANDconst <typ.UInt> [63] y))
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) -> (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) -> (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Rsh64Ux64 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Lsh64x64 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] y))))
(Lsh32x64 x (AND y (MOVDconst [31]))) -> (SLW x (ANDconst <typ.Int32> [31] y))
(Lsh32x64 x (ANDconst <typ.Int32> [31] y)) -> (SLW x (ANDconst <typ.Int32> [31] y))
(Rsh32Ux64 x (AND y (MOVDconst [31]))) -> (SRW x (ANDconst <typ.Int32> [31] y))
(Rsh32Ux64 x (ANDconst <typ.UInt> [31] y)) -> (SRW x (ANDconst <typ.UInt> [31] y))
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) -> (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) -> (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x (AND y (MOVDconst [31]))) -> (SRAW x (ANDconst <typ.Int32> [31] y))
(Rsh32x64 x (ANDconst <typ.UInt> [31] y)) -> (SRAW x (ANDconst <typ.UInt> [31] y))
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) -> (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) -> (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x y) -> (SRAW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
(Rsh32Ux64 x y) -> (SRW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
(Lsh32x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-32] y))))
@ -182,7 +220,6 @@
(Rsh8Ux64 x y) -> (SRW (ZeroExt8to32 x) (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
(Lsh8x64 x y) -> (SLW x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-8] y))))
(Rsh64x32 x y) -> (SRAD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
(Rsh64Ux32 x y) -> (SRD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))
(Lsh64x32 x y) -> (SLD x (ORN y <typ.Int64> (MaskIfNotCarry (ADDconstForCarry [-64] (ZeroExt32to64 y)))))

View File

@ -186,6 +186,9 @@ func init() {
{name: "SLD", argLength: 2, reg: gp21, asm: "SLD"}, // arg0 << arg1, 64 bits (0 if arg1 & 64 != 0)
{name: "SLW", argLength: 2, reg: gp21, asm: "SLW"}, // arg0 << arg1, 32 bits (0 if arg1 & 32 != 0)
{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64
{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
{name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + aux
{name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"}, // carry - 1 (if carry then 0 else -1)

View File

@ -1307,6 +1307,8 @@ const (
OpPPC64SRW
OpPPC64SLD
OpPPC64SLW
OpPPC64ROTL
OpPPC64ROTLW
OpPPC64ADDconstForCarry
OpPPC64MaskIfNotCarry
OpPPC64SRADconst
@ -16732,6 +16734,34 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ROTL",
argLen: 2,
asm: ppc64.AROTL,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "ROTLW",
argLen: 2,
asm: ppc64.AROTLW,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "ADDconstForCarry",
auxType: auxInt16,

File diff suppressed because it is too large Load Diff