1
0
mirror of https://github.com/golang/go synced 2024-11-23 19:30:05 -07:00

cmd/compile: intrinsify TrailingZeros16, OnesCount{8,16} for ppc64x

This change implements TrailingZeros16, OnesCount8 and OnesCount16
as intrinsics for ppc64x.

benchmark                       old ns/op     new ns/op     delta
BenchmarkTrailingZeros16-40     2.16          1.61          -25.46%

benchmark                   old ns/op     new ns/op     delta
BenchmarkOnesCount-40       0.71          0.71          +0.00%
BenchmarkOnesCount8-40      0.93          0.69          -25.81%
BenchmarkOnesCount16-40     1.54          0.75          -51.30%
BenchmarkOnesCount32-40     0.75          0.74          -1.33%
BenchmarkOnesCount64-40     0.71          0.71          +0.00%

Change-Id: I010fa9c0ef596a09362870d81193c633e70da637
Reviewed-on: https://go-review.googlesource.com/c/139137
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
Carlos Eduardo Seo 2018-10-01 23:37:00 -03:00 committed by Lynn Boger
parent c96c2a39bb
commit 23578f9d00
3 changed files with 58 additions and 6 deletions

View File

@ -3236,7 +3236,7 @@ func init() {
y := s.newValue2(ssa.OpOr64, types.Types[TUINT64], x, c)
return s.newValue1(ssa.OpCtz64, types.Types[TINT], y)
},
sys.ARM64, sys.S390X)
sys.ARM64, sys.S390X, sys.PPC64)
addF("math/bits", "TrailingZeros8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
x := s.newValue1(ssa.OpZeroExt8to32, types.Types[TUINT32], args[0])
@ -3427,12 +3427,12 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount16, types.Types[TINT], args[0])
},
sys.ARM64, sys.S390X)
sys.ARM64, sys.S390X, sys.PPC64)
addF("math/bits", "OnesCount8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpPopCount8, types.Types[TINT], args[0])
},
sys.S390X)
sys.S390X, sys.PPC64)
addF("math/bits", "OnesCount",
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
sys.AMD64)

View File

@ -297,6 +297,8 @@
(Ctz64 x) -> (POPCNTD (ANDN <typ.Int64> (ADDconst <typ.Int64> [-1] x) x))
(Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN <typ.Int> (ADDconst <typ.Int> [-1] x) x)))
(Ctz16 x) -> (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
(Ctz8 x) -> (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
(BitLen64 x) -> (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
(BitLen32 x) -> (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
@ -304,7 +306,7 @@
(PopCount64 x) -> (POPCNTD x)
(PopCount32 x) -> (POPCNTW (MOVWZreg x))
(PopCount16 x) -> (POPCNTW (MOVHZreg x))
(PopCount8 x) -> (POPCNTB (MOVBreg x))
(PopCount8 x) -> (POPCNTB (MOVBZreg x))
(And(64|32|16|8) x y) -> (AND x y)
(Or(64|32|16|8) x y) -> (OR x y)

View File

@ -105,6 +105,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpConstNil_0(v)
case OpCopysign:
return rewriteValuePPC64_OpCopysign_0(v)
case OpCtz16:
return rewriteValuePPC64_OpCtz16_0(v)
case OpCtz32:
return rewriteValuePPC64_OpCtz32_0(v)
case OpCtz32NonZero:
@ -113,6 +115,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpCtz64_0(v)
case OpCtz64NonZero:
return rewriteValuePPC64_OpCtz64NonZero_0(v)
case OpCtz8:
return rewriteValuePPC64_OpCtz8_0(v)
case OpCvt32Fto32:
return rewriteValuePPC64_OpCvt32Fto32_0(v)
case OpCvt32Fto64:
@ -1323,6 +1327,29 @@ func rewriteValuePPC64_OpCopysign_0(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpCtz16_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (Ctz16 x)
// cond:
// result: (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
for {
x := v.Args[0]
v.reset(OpPPC64POPCNTW)
v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.Int16)
v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, typ.Int16)
v2.AuxInt = -1
v2.AddArg(x)
v1.AddArg(v2)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
}
func rewriteValuePPC64_OpCtz32_0(v *Value) bool {
b := v.Block
_ = b
@ -1389,6 +1416,29 @@ func rewriteValuePPC64_OpCtz64NonZero_0(v *Value) bool {
return true
}
}
func rewriteValuePPC64_OpCtz8_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (Ctz8 x)
// cond:
// result: (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
for {
x := v.Args[0]
v.reset(OpPPC64POPCNTB)
v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.UInt8)
v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, typ.UInt8)
v2.AuxInt = -1
v2.AddArg(x)
v1.AddArg(v2)
v1.AddArg(x)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
}
func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool {
b := v.Block
_ = b
@ -26653,11 +26703,11 @@ func rewriteValuePPC64_OpPopCount8_0(v *Value) bool {
_ = typ
// match: (PopCount8 x)
// cond:
// result: (POPCNTB (MOVBreg x))
// result: (POPCNTB (MOVBZreg x))
for {
x := v.Args[0]
v.reset(OpPPC64POPCNTB)
v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
v0.AddArg(x)
v.AddArg(v0)
return true