mirror of
https://github.com/golang/go
synced 2024-11-23 19:30:05 -07:00
cmd/compile: intrinsify TrailingZeros16, OnesCount{8,16} for ppc64x
This change implements TrailingZeros16, OnesCount8 and OnesCount16 as intrinsics for ppc64x. benchmark old ns/op new ns/op delta BenchmarkTrailingZeros16-40 2.16 1.61 -25.46% benchmark old ns/op new ns/op delta BenchmarkOnesCount-40 0.71 0.71 +0.00% BenchmarkOnesCount8-40 0.93 0.69 -25.81% BenchmarkOnesCount16-40 1.54 0.75 -51.30% BenchmarkOnesCount32-40 0.75 0.74 -1.33% BenchmarkOnesCount64-40 0.71 0.71 +0.00% Change-Id: I010fa9c0ef596a09362870d81193c633e70da637 Reviewed-on: https://go-review.googlesource.com/c/139137 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
parent
c96c2a39bb
commit
23578f9d00
@ -3236,7 +3236,7 @@ func init() {
|
||||
y := s.newValue2(ssa.OpOr64, types.Types[TUINT64], x, c)
|
||||
return s.newValue1(ssa.OpCtz64, types.Types[TINT], y)
|
||||
},
|
||||
sys.ARM64, sys.S390X)
|
||||
sys.ARM64, sys.S390X, sys.PPC64)
|
||||
addF("math/bits", "TrailingZeros8",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
x := s.newValue1(ssa.OpZeroExt8to32, types.Types[TUINT32], args[0])
|
||||
@ -3427,12 +3427,12 @@ func init() {
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpPopCount16, types.Types[TINT], args[0])
|
||||
},
|
||||
sys.ARM64, sys.S390X)
|
||||
sys.ARM64, sys.S390X, sys.PPC64)
|
||||
addF("math/bits", "OnesCount8",
|
||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||
return s.newValue1(ssa.OpPopCount8, types.Types[TINT], args[0])
|
||||
},
|
||||
sys.S390X)
|
||||
sys.S390X, sys.PPC64)
|
||||
addF("math/bits", "OnesCount",
|
||||
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
|
||||
sys.AMD64)
|
||||
|
@ -297,6 +297,8 @@
|
||||
|
||||
(Ctz64 x) -> (POPCNTD (ANDN <typ.Int64> (ADDconst <typ.Int64> [-1] x) x))
|
||||
(Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN <typ.Int> (ADDconst <typ.Int> [-1] x) x)))
|
||||
(Ctz16 x) -> (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
|
||||
(Ctz8 x) -> (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
|
||||
|
||||
(BitLen64 x) -> (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
|
||||
(BitLen32 x) -> (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
|
||||
@ -304,7 +306,7 @@
|
||||
(PopCount64 x) -> (POPCNTD x)
|
||||
(PopCount32 x) -> (POPCNTW (MOVWZreg x))
|
||||
(PopCount16 x) -> (POPCNTW (MOVHZreg x))
|
||||
(PopCount8 x) -> (POPCNTB (MOVBreg x))
|
||||
(PopCount8 x) -> (POPCNTB (MOVBZreg x))
|
||||
|
||||
(And(64|32|16|8) x y) -> (AND x y)
|
||||
(Or(64|32|16|8) x y) -> (OR x y)
|
||||
|
@ -105,6 +105,8 @@ func rewriteValuePPC64(v *Value) bool {
|
||||
return rewriteValuePPC64_OpConstNil_0(v)
|
||||
case OpCopysign:
|
||||
return rewriteValuePPC64_OpCopysign_0(v)
|
||||
case OpCtz16:
|
||||
return rewriteValuePPC64_OpCtz16_0(v)
|
||||
case OpCtz32:
|
||||
return rewriteValuePPC64_OpCtz32_0(v)
|
||||
case OpCtz32NonZero:
|
||||
@ -113,6 +115,8 @@ func rewriteValuePPC64(v *Value) bool {
|
||||
return rewriteValuePPC64_OpCtz64_0(v)
|
||||
case OpCtz64NonZero:
|
||||
return rewriteValuePPC64_OpCtz64NonZero_0(v)
|
||||
case OpCtz8:
|
||||
return rewriteValuePPC64_OpCtz8_0(v)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValuePPC64_OpCvt32Fto32_0(v)
|
||||
case OpCvt32Fto64:
|
||||
@ -1323,6 +1327,29 @@ func rewriteValuePPC64_OpCopysign_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpCtz16_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (Ctz16 x)
|
||||
// cond:
|
||||
// result: (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpPPC64POPCNTW)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.Int16)
|
||||
v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, typ.Int16)
|
||||
v2.AuxInt = -1
|
||||
v2.AddArg(x)
|
||||
v1.AddArg(v2)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpCtz32_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
@ -1389,6 +1416,29 @@ func rewriteValuePPC64_OpCtz64NonZero_0(v *Value) bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpCtz8_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
typ := &b.Func.Config.Types
|
||||
_ = typ
|
||||
// match: (Ctz8 x)
|
||||
// cond:
|
||||
// result: (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpPPC64POPCNTB)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
|
||||
v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.UInt8)
|
||||
v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, typ.UInt8)
|
||||
v2.AuxInt = -1
|
||||
v2.AddArg(x)
|
||||
v1.AddArg(v2)
|
||||
v1.AddArg(x)
|
||||
v0.AddArg(v1)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
@ -26653,11 +26703,11 @@ func rewriteValuePPC64_OpPopCount8_0(v *Value) bool {
|
||||
_ = typ
|
||||
// match: (PopCount8 x)
|
||||
// cond:
|
||||
// result: (POPCNTB (MOVBreg x))
|
||||
// result: (POPCNTB (MOVBZreg x))
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpPPC64POPCNTB)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
|
||||
v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
return true
|
||||
|
Loading…
Reference in New Issue
Block a user