mirror of
https://github.com/golang/go
synced 2024-11-26 02:17:58 -07:00
cmd/compile: intrinsify math.MulUintptr on PPC64
This can be done efficiently with few instructions. This also adds MULHDUCC for further codegen improvement. Change-Id: I06320ba4383a679341b911a237a360ef07b19168 Reviewed-on: https://go-review.googlesource.com/c/go/+/605975 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Archana Ravindar <aravinda@redhat.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
7c7d6d31f3
commit
2b0a157d68
@ -594,7 +594,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||||||
p.To.Reg = r
|
p.To.Reg = r
|
||||||
|
|
||||||
case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
|
case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
|
||||||
ssa.OpPPC64ANDNCC:
|
ssa.OpPPC64ANDNCC, ssa.OpPPC64MULHDUCC:
|
||||||
r1 := v.Args[0].Reg()
|
r1 := v.Args[0].Reg()
|
||||||
r2 := v.Args[1].Reg()
|
r2 := v.Args[1].Reg()
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
|
@ -40,6 +40,8 @@
|
|||||||
(Mul(32|16|8) ...) => (MULLW ...)
|
(Mul(32|16|8) ...) => (MULLW ...)
|
||||||
(Select0 (Mul64uhilo x y)) => (MULHDU x y)
|
(Select0 (Mul64uhilo x y)) => (MULHDU x y)
|
||||||
(Select1 (Mul64uhilo x y)) => (MULLD x y)
|
(Select1 (Mul64uhilo x y)) => (MULLD x y)
|
||||||
|
(Select0 (Mul64uover x y)) => (MULLD x y)
|
||||||
|
(Select1 (Mul64uover x y)) => (SETBCR [2] (CMPconst [0] (MULHDU <x.Type> x y)))
|
||||||
|
|
||||||
(Div64 [false] x y) => (DIVD x y)
|
(Div64 [false] x y) => (DIVD x y)
|
||||||
(Div64u ...) => (DIVDU ...)
|
(Div64u ...) => (DIVDU ...)
|
||||||
|
@ -199,10 +199,11 @@ func init() {
|
|||||||
{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
|
{name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
|
||||||
{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit)
|
{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit)
|
||||||
|
|
||||||
{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed
|
{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed
|
||||||
{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
|
{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
|
||||||
{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
|
{name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
|
||||||
{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
|
{name: "MULHDUCC", argLength: 2, reg: gp21, asm: "MULHDUCC", commutative: true, typ: "(Int64,Flags)"}, // (arg0 * arg1) >> 64, unsigned, sets CC
|
||||||
|
{name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
|
||||||
|
|
||||||
{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1
|
{name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1
|
||||||
{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
|
{name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
|
||||||
|
@ -43,13 +43,13 @@
|
|||||||
// 2. Rewrite (CMPconst [0] (Select0 (OpCC ...))) into (Select1 (OpCC...))
|
// 2. Rewrite (CMPconst [0] (Select0 (OpCC ...))) into (Select1 (OpCC...))
|
||||||
// Note: to minimize potentially expensive regeneration of CC opcodes during the flagalloc pass, only rewrite if
|
// Note: to minimize potentially expensive regeneration of CC opcodes during the flagalloc pass, only rewrite if
|
||||||
// both ops are in the same block.
|
// both ops are in the same block.
|
||||||
(CMPconst [0] z:((ADD|AND|ANDN|OR|SUB|NOR|XOR) x y)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
|
(CMPconst [0] z:((ADD|AND|ANDN|OR|SUB|NOR|XOR|MULHDU) x y)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
|
||||||
(CMPconst [0] z:((NEG|CNTLZD|RLDICL) x)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
|
(CMPconst [0] z:((NEG|CNTLZD|RLDICL) x)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
|
||||||
// Note: ADDCCconst only assembles to 1 instruction for int16 constants.
|
// Note: ADDCCconst only assembles to 1 instruction for int16 constants.
|
||||||
(CMPconst [0] z:(ADDconst [c] x)) && int64(int16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
|
(CMPconst [0] z:(ADDconst [c] x)) && int64(int16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
|
||||||
(CMPconst [0] z:(ANDconst [c] x)) && int64(uint16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
|
(CMPconst [0] z:(ANDconst [c] x)) && int64(uint16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
|
||||||
// And finally, fixup the flag user.
|
// And finally, fixup the flag user.
|
||||||
(CMPconst <t> [0] (Select0 z:((ADD|AND|ANDN|OR|SUB|NOR|XOR)CC x y))) => (Select1 <t> z)
|
(CMPconst <t> [0] (Select0 z:((ADD|AND|ANDN|OR|SUB|NOR|XOR|MULHDU)CC x y))) => (Select1 <t> z)
|
||||||
(CMPconst <t> [0] (Select0 z:((ADDCCconst|ANDCCconst|NEGCC|CNTLZDCC|RLDICLCC) y))) => (Select1 <t> z)
|
(CMPconst <t> [0] (Select0 z:((ADDCCconst|ANDCCconst|NEGCC|CNTLZDCC|RLDICLCC) y))) => (Select1 <t> z)
|
||||||
|
|
||||||
// After trying to convert ANDconst to ANDCCconst above, if the CC result is not needed, try to avoid using
|
// After trying to convert ANDconst to ANDCCconst above, if the CC result is not needed, try to avoid using
|
||||||
|
@ -2142,6 +2142,7 @@ const (
|
|||||||
OpPPC64MULHD
|
OpPPC64MULHD
|
||||||
OpPPC64MULHW
|
OpPPC64MULHW
|
||||||
OpPPC64MULHDU
|
OpPPC64MULHDU
|
||||||
|
OpPPC64MULHDUCC
|
||||||
OpPPC64MULHWU
|
OpPPC64MULHWU
|
||||||
OpPPC64FMUL
|
OpPPC64FMUL
|
||||||
OpPPC64FMULS
|
OpPPC64FMULS
|
||||||
@ -28869,6 +28870,21 @@ var opcodeTable = [...]opInfo{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "MULHDUCC",
|
||||||
|
argLen: 2,
|
||||||
|
commutative: true,
|
||||||
|
asm: ppc64.AMULHDUCC,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "MULHWU",
|
name: "MULHWU",
|
||||||
argLen: 2,
|
argLen: 2,
|
||||||
|
@ -1786,14 +1786,15 @@ func convertPPC64OpToOpCC(op *Value) *Value {
|
|||||||
OpPPC64ADD: OpPPC64ADDCC,
|
OpPPC64ADD: OpPPC64ADDCC,
|
||||||
OpPPC64ADDconst: OpPPC64ADDCCconst,
|
OpPPC64ADDconst: OpPPC64ADDCCconst,
|
||||||
OpPPC64AND: OpPPC64ANDCC,
|
OpPPC64AND: OpPPC64ANDCC,
|
||||||
OpPPC64ANDconst: OpPPC64ANDCCconst,
|
|
||||||
OpPPC64ANDN: OpPPC64ANDNCC,
|
OpPPC64ANDN: OpPPC64ANDNCC,
|
||||||
|
OpPPC64ANDconst: OpPPC64ANDCCconst,
|
||||||
OpPPC64CNTLZD: OpPPC64CNTLZDCC,
|
OpPPC64CNTLZD: OpPPC64CNTLZDCC,
|
||||||
|
OpPPC64MULHDU: OpPPC64MULHDUCC,
|
||||||
|
OpPPC64NEG: OpPPC64NEGCC,
|
||||||
|
OpPPC64NOR: OpPPC64NORCC,
|
||||||
OpPPC64OR: OpPPC64ORCC,
|
OpPPC64OR: OpPPC64ORCC,
|
||||||
OpPPC64RLDICL: OpPPC64RLDICLCC,
|
OpPPC64RLDICL: OpPPC64RLDICLCC,
|
||||||
OpPPC64SUB: OpPPC64SUBCC,
|
OpPPC64SUB: OpPPC64SUBCC,
|
||||||
OpPPC64NEG: OpPPC64NEGCC,
|
|
||||||
OpPPC64NOR: OpPPC64NORCC,
|
|
||||||
OpPPC64XOR: OpPPC64XORCC,
|
OpPPC64XOR: OpPPC64XORCC,
|
||||||
}
|
}
|
||||||
b := op.Block
|
b := op.Block
|
||||||
|
@ -14498,6 +14498,18 @@ func rewriteValuePPC64_OpSelect0(v *Value) bool {
|
|||||||
v.AddArg2(x, y)
|
v.AddArg2(x, y)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (Select0 (Mul64uover x y))
|
||||||
|
// result: (MULLD x y)
|
||||||
|
for {
|
||||||
|
if v_0.Op != OpMul64uover {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
y := v_0.Args[1]
|
||||||
|
x := v_0.Args[0]
|
||||||
|
v.reset(OpPPC64MULLD)
|
||||||
|
v.AddArg2(x, y)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (Select0 (Add64carry x y c))
|
// match: (Select0 (Add64carry x y c))
|
||||||
// result: (Select0 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1]))))
|
// result: (Select0 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1]))))
|
||||||
for {
|
for {
|
||||||
@ -14558,6 +14570,24 @@ func rewriteValuePPC64_OpSelect1(v *Value) bool {
|
|||||||
v.AddArg2(x, y)
|
v.AddArg2(x, y)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (Select1 (Mul64uover x y))
|
||||||
|
// result: (SETBCR [2] (CMPconst [0] (MULHDU <x.Type> x y)))
|
||||||
|
for {
|
||||||
|
if v_0.Op != OpMul64uover {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
y := v_0.Args[1]
|
||||||
|
x := v_0.Args[0]
|
||||||
|
v.reset(OpPPC64SETBCR)
|
||||||
|
v.AuxInt = int32ToAuxInt(2)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpPPC64CMPconst, types.TypeFlags)
|
||||||
|
v0.AuxInt = int64ToAuxInt(0)
|
||||||
|
v1 := b.NewValue0(v.Pos, OpPPC64MULHDU, x.Type)
|
||||||
|
v1.AddArg2(x, y)
|
||||||
|
v0.AddArg(v1)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (Select1 (Add64carry x y c))
|
// match: (Select1 (Add64carry x y c))
|
||||||
// result: (ADDZEzero (Select1 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1])))))
|
// result: (ADDZEzero (Select1 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1])))))
|
||||||
for {
|
for {
|
||||||
|
@ -296,6 +296,25 @@ func rewriteValuePPC64latelower_OpPPC64CMPconst(v *Value) bool {
|
|||||||
v.AddArg(convertPPC64OpToOpCC(z))
|
v.AddArg(convertPPC64OpToOpCC(z))
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (CMPconst [0] z:(MULHDU x y))
|
||||||
|
// cond: v.Block == z.Block
|
||||||
|
// result: (CMPconst [0] convertPPC64OpToOpCC(z))
|
||||||
|
for {
|
||||||
|
if auxIntToInt64(v.AuxInt) != 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
z := v_0
|
||||||
|
if z.Op != OpPPC64MULHDU {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !(v.Block == z.Block) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpPPC64CMPconst)
|
||||||
|
v.AuxInt = int64ToAuxInt(0)
|
||||||
|
v.AddArg(convertPPC64OpToOpCC(z))
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (CMPconst [0] z:(NEG x))
|
// match: (CMPconst [0] z:(NEG x))
|
||||||
// cond: v.Block == z.Block
|
// cond: v.Block == z.Block
|
||||||
// result: (CMPconst [0] convertPPC64OpToOpCC(z))
|
// result: (CMPconst [0] convertPPC64OpToOpCC(z))
|
||||||
@ -505,6 +524,22 @@ func rewriteValuePPC64latelower_OpPPC64CMPconst(v *Value) bool {
|
|||||||
v.AddArg(z)
|
v.AddArg(z)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// match: (CMPconst <t> [0] (Select0 z:(MULHDUCC x y)))
|
||||||
|
// result: (Select1 <t> z)
|
||||||
|
for {
|
||||||
|
t := v.Type
|
||||||
|
if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpSelect0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
z := v_0.Args[0]
|
||||||
|
if z.Op != OpPPC64MULHDUCC {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v.reset(OpSelect1)
|
||||||
|
v.Type = t
|
||||||
|
v.AddArg(z)
|
||||||
|
return true
|
||||||
|
}
|
||||||
// match: (CMPconst <t> [0] (Select0 z:(ADDCCconst y)))
|
// match: (CMPconst <t> [0] (Select0 z:(ADDCCconst y)))
|
||||||
// result: (Select1 <t> z)
|
// result: (Select1 <t> z)
|
||||||
for {
|
for {
|
||||||
|
@ -91,7 +91,7 @@ func initIntrinsics() {
|
|||||||
}
|
}
|
||||||
return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
|
return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
|
||||||
},
|
},
|
||||||
sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.RISCV64, sys.ARM64)
|
sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.ARM64)
|
||||||
add("runtime", "KeepAlive",
|
add("runtime", "KeepAlive",
|
||||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0])
|
data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0])
|
||||||
|
@ -782,6 +782,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
|||||||
{"ppc64", "internal/runtime/atomic", "Xchguintptr"}: struct{}{},
|
{"ppc64", "internal/runtime/atomic", "Xchguintptr"}: struct{}{},
|
||||||
{"ppc64", "internal/runtime/math", "Add64"}: struct{}{},
|
{"ppc64", "internal/runtime/math", "Add64"}: struct{}{},
|
||||||
{"ppc64", "internal/runtime/math", "Mul64"}: struct{}{},
|
{"ppc64", "internal/runtime/math", "Mul64"}: struct{}{},
|
||||||
|
{"ppc64", "internal/runtime/math", "MulUintptr"}: struct{}{},
|
||||||
{"ppc64", "internal/runtime/sys", "Len64"}: struct{}{},
|
{"ppc64", "internal/runtime/sys", "Len64"}: struct{}{},
|
||||||
{"ppc64", "internal/runtime/sys", "Len8"}: struct{}{},
|
{"ppc64", "internal/runtime/sys", "Len8"}: struct{}{},
|
||||||
{"ppc64", "internal/runtime/sys", "OnesCount64"}: struct{}{},
|
{"ppc64", "internal/runtime/sys", "OnesCount64"}: struct{}{},
|
||||||
@ -896,6 +897,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
|||||||
{"ppc64le", "internal/runtime/atomic", "Xchguintptr"}: struct{}{},
|
{"ppc64le", "internal/runtime/atomic", "Xchguintptr"}: struct{}{},
|
||||||
{"ppc64le", "internal/runtime/math", "Add64"}: struct{}{},
|
{"ppc64le", "internal/runtime/math", "Add64"}: struct{}{},
|
||||||
{"ppc64le", "internal/runtime/math", "Mul64"}: struct{}{},
|
{"ppc64le", "internal/runtime/math", "Mul64"}: struct{}{},
|
||||||
|
{"ppc64le", "internal/runtime/math", "MulUintptr"}: struct{}{},
|
||||||
{"ppc64le", "internal/runtime/sys", "Len64"}: struct{}{},
|
{"ppc64le", "internal/runtime/sys", "Len64"}: struct{}{},
|
||||||
{"ppc64le", "internal/runtime/sys", "Len8"}: struct{}{},
|
{"ppc64le", "internal/runtime/sys", "Len8"}: struct{}{},
|
||||||
{"ppc64le", "internal/runtime/sys", "OnesCount64"}: struct{}{},
|
{"ppc64le", "internal/runtime/sys", "OnesCount64"}: struct{}{},
|
||||||
|
@ -273,6 +273,12 @@ func TestLogicalCompareZero(x *[64]uint64) {
|
|||||||
x[12] = uint64(c)
|
x[12] = uint64(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ppc64x:"MULHDUCC",^"MULHDU"
|
||||||
|
hi, _ := bits.Mul64(x[13], x[14])
|
||||||
|
if hi != 0 {
|
||||||
|
x[14] = hi
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func constantWrite(b bool, p *bool) {
|
func constantWrite(b bool, p *bool) {
|
||||||
|
Loading…
Reference in New Issue
Block a user