mirror of
https://github.com/golang/go
synced 2024-11-23 07:20:06 -07:00
cmd/compile: instrinsify math/bits.Mul on ppc64x
Add SSA rules to intrinsify Mul/Mul64 on ppc64x. benchmark old ns/op new ns/op delta BenchmarkMul-40 8.80 0.93 -89.43% BenchmarkMul32-40 1.39 1.39 +0.00% BenchmarkMul64-40 5.39 0.93 -82.75% Updates #24813 Change-Id: I6e95bfbe976a2278bd17799df184a7fbc0e57829 Reviewed-on: https://go-review.googlesource.com/138917 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
parent
f5e58442da
commit
9aed4cc395
@ -3435,12 +3435,12 @@ func init() {
|
||||
addF("math/bits", "OnesCount",
|
||||
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
|
||||
sys.AMD64)
|
||||
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64)
|
||||
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64)
|
||||
addF("math/bits", "Mul64",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
|
||||
},
|
||||
sys.AMD64, sys.ARM64)
|
||||
sys.AMD64, sys.ARM64, sys.PPC64)
|
||||
|
||||
/******** sync/atomic ********/
|
||||
|
||||
|
@ -153,6 +153,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
p.To.Reg = y
|
||||
}
|
||||
|
||||
case ssa.OpPPC64LoweredMuluhilo:
|
||||
// MULHDU Rarg1, Rarg0, Reg0
|
||||
// MULLD Rarg1, Rarg0, Reg1
|
||||
r0 := v.Args[0].Reg()
|
||||
r1 := v.Args[1].Reg()
|
||||
p := s.Prog(ppc64.AMULHDU)
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = r1
|
||||
p.Reg = r0
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg0()
|
||||
p1 := s.Prog(ppc64.AMULLD)
|
||||
p1.From.Type = obj.TYPE_REG
|
||||
p1.From.Reg = r1
|
||||
p1.Reg = r0
|
||||
p1.To.Type = obj.TYPE_REG
|
||||
p1.To.Reg = v.Reg1()
|
||||
|
||||
case ssa.OpPPC64LoweredAtomicAnd8,
|
||||
ssa.OpPPC64LoweredAtomicOr8:
|
||||
// LWSYNC
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
(Mul64 x y) -> (MULLD x y)
|
||||
(Mul(32|16|8) x y) -> (MULLW x y)
|
||||
(Mul64uhilo x y) -> (LoweredMuluhilo x y)
|
||||
|
||||
(Div64 x y) -> (DIVD x y)
|
||||
(Div64u x y) -> (DIVDU x y)
|
||||
|
@ -135,6 +135,7 @@ func init() {
|
||||
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
|
||||
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
|
||||
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
|
||||
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
|
||||
gp1cr = regInfo{inputs: []regMask{gp | sp | sb}}
|
||||
gp2cr = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}}
|
||||
crgp = regInfo{inputs: nil, outputs: []regMask{gp}}
|
||||
@ -170,6 +171,7 @@ func init() {
|
||||
{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
|
||||
{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
|
||||
{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
|
||||
{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)
|
||||
|
||||
{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1
|
||||
{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
|
||||
|
@ -1581,6 +1581,7 @@ const (
|
||||
OpPPC64MULHW
|
||||
OpPPC64MULHDU
|
||||
OpPPC64MULHWU
|
||||
OpPPC64LoweredMuluhilo
|
||||
OpPPC64FMUL
|
||||
OpPPC64FMULS
|
||||
OpPPC64FMADD
|
||||
@ -21029,6 +21030,21 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredMuluhilo",
|
||||
argLen: 2,
|
||||
resultNotInArgs: true,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
{1, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FMUL",
|
||||
argLen: 2,
|
||||
|
@ -337,6 +337,8 @@ func rewriteValuePPC64(v *Value) bool {
|
||||
return rewriteValuePPC64_OpMul64_0(v)
|
||||
case OpMul64F:
|
||||
return rewriteValuePPC64_OpMul64F_0(v)
|
||||
case OpMul64uhilo:
|
||||
return rewriteValuePPC64_OpMul64uhilo_0(v)
|
||||
case OpMul8:
|
||||
return rewriteValuePPC64_OpMul8_0(v)
|
||||
case OpNeg16:
|
||||
@ -4809,6 +4811,20 @@ func rewriteValuePPC64_OpMul64F_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpMul64uhilo_0(v *Value) bool {
|
||||
// match: (Mul64uhilo x y)
|
||||
// cond:
|
||||
// result: (LoweredMuluhilo x y)
|
||||
for {
|
||||
_ = v.Args[1]
|
||||
x := v.Args[0]
|
||||
y := v.Args[1]
|
||||
v.reset(OpPPC64LoweredMuluhilo)
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpMul8_0(v *Value) bool {
|
||||
// match: (Mul8 x y)
|
||||
// cond:
|
||||
|
@ -310,11 +310,13 @@ func IterateBits8(n uint8) int {
|
||||
func Mul(x, y uint) (hi, lo uint) {
|
||||
// amd64:"MULQ"
|
||||
// arm64:"UMULH","MUL"
|
||||
// ppc64: "MULHDU", "MULLD"
|
||||
return bits.Mul(x, y)
|
||||
}
|
||||
|
||||
func Mul64(x, y uint64) (hi, lo uint64) {
|
||||
// amd64:"MULQ"
|
||||
// arm64:"UMULH","MUL"
|
||||
// ppc64: "MULHDU", "MULLD"
|
||||
return bits.Mul64(x, y)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user