From 23578f9d00bb0d6e92f18d7d6ea961d7c6c52260 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Seo Date: Mon, 1 Oct 2018 23:37:00 -0300 Subject: [PATCH] cmd/compile: intrinsify TrailingZeros16, OnesCount{8,16} for ppc64x This change implements TrailingZeros16, OnesCount8 and OnesCount16 as intrinsics for ppc64x. benchmark old ns/op new ns/op delta BenchmarkTrailingZeros16-40 2.16 1.61 -25.46% benchmark old ns/op new ns/op delta BenchmarkOnesCount-40 0.71 0.71 +0.00% BenchmarkOnesCount8-40 0.93 0.69 -25.81% BenchmarkOnesCount16-40 1.54 0.75 -51.30% BenchmarkOnesCount32-40 0.75 0.74 -1.33% BenchmarkOnesCount64-40 0.71 0.71 +0.00% Change-Id: I010fa9c0ef596a09362870d81193c633e70da637 Reviewed-on: https://go-review.googlesource.com/c/139137 Run-TryBot: Lynn Boger TryBot-Result: Gobot Gobot Reviewed-by: Lynn Boger --- src/cmd/compile/internal/gc/ssa.go | 6 +-- src/cmd/compile/internal/ssa/gen/PPC64.rules | 4 +- src/cmd/compile/internal/ssa/rewritePPC64.go | 54 +++++++++++++++++++- 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 469fbb8c969..5b11e15655f 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3236,7 +3236,7 @@ func init() { y := s.newValue2(ssa.OpOr64, types.Types[TUINT64], x, c) return s.newValue1(ssa.OpCtz64, types.Types[TINT], y) }, - sys.ARM64, sys.S390X) + sys.ARM64, sys.S390X, sys.PPC64) addF("math/bits", "TrailingZeros8", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { x := s.newValue1(ssa.OpZeroExt8to32, types.Types[TUINT32], args[0]) @@ -3427,12 +3427,12 @@ func init() { func(s *state, n *Node, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpPopCount16, types.Types[TINT], args[0]) }, - sys.ARM64, sys.S390X) + sys.ARM64, sys.S390X, sys.PPC64) addF("math/bits", "OnesCount8", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpPopCount8, types.Types[TINT], args[0]) }, - sys.S390X) + sys.S390X, sys.PPC64) addF("math/bits", "OnesCount", makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32), sys.AMD64) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 21c12591c53..be1bd6de0bf 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -297,6 +297,8 @@ (Ctz64 x) -> (POPCNTD (ANDN (ADDconst [-1] x) x)) (Ctz32 x) -> (POPCNTW (MOVWZreg (ANDN (ADDconst [-1] x) x))) +(Ctz16 x) -> (POPCNTW (MOVHZreg (ANDN (ADDconst [-1] x) x))) +(Ctz8 x) -> (POPCNTB (MOVBZreg (ANDN (ADDconst [-1] x) x))) (BitLen64 x) -> (SUB (MOVDconst [64]) (CNTLZD x)) (BitLen32 x) -> (SUB (MOVDconst [32]) (CNTLZW x)) @@ -304,7 +306,7 @@ (PopCount64 x) -> (POPCNTD x) (PopCount32 x) -> (POPCNTW (MOVWZreg x)) (PopCount16 x) -> (POPCNTW (MOVHZreg x)) -(PopCount8 x) -> (POPCNTB (MOVBreg x)) +(PopCount8 x) -> (POPCNTB (MOVBZreg x)) (And(64|32|16|8) x y) -> (AND x y) (Or(64|32|16|8) x y) -> (OR x y) diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 9aff3106dba..8f100c1a38c 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -105,6 +105,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpConstNil_0(v) case OpCopysign: return rewriteValuePPC64_OpCopysign_0(v) + case OpCtz16: + return rewriteValuePPC64_OpCtz16_0(v) case OpCtz32: return rewriteValuePPC64_OpCtz32_0(v) case OpCtz32NonZero: @@ -113,6 +115,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpCtz64_0(v) case OpCtz64NonZero: return rewriteValuePPC64_OpCtz64NonZero_0(v) + case OpCtz8: + return rewriteValuePPC64_OpCtz8_0(v) case OpCvt32Fto32: return rewriteValuePPC64_OpCvt32Fto32_0(v) case OpCvt32Fto64: @@ -1323,6 +1327,29 @@ func rewriteValuePPC64_OpCopysign_0(v *Value) bool { return true } } +func rewriteValuePPC64_OpCtz16_0(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ + // match: (Ctz16 x) + // cond: + // result: (POPCNTW (MOVHZreg (ANDN (ADDconst [-1] x) x))) + for { + x := v.Args[0] + v.reset(OpPPC64POPCNTW) + v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64) + v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.Int16) + v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, typ.Int16) + v2.AuxInt = -1 + v2.AddArg(x) + v1.AddArg(v2) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } +} func rewriteValuePPC64_OpCtz32_0(v *Value) bool { b := v.Block _ = b @@ -1389,6 +1416,29 @@ func rewriteValuePPC64_OpCtz64NonZero_0(v *Value) bool { return true } } +func rewriteValuePPC64_OpCtz8_0(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ + // match: (Ctz8 x) + // cond: + // result: (POPCNTB (MOVBZreg (ANDN (ADDconst [-1] x) x))) + for { + x := v.Args[0] + v.reset(OpPPC64POPCNTB) + v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64) + v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.UInt8) + v2 := b.NewValue0(v.Pos, OpPPC64ADDconst, typ.UInt8) + v2.AuxInt = -1 + v2.AddArg(x) + v1.AddArg(v2) + v1.AddArg(x) + v0.AddArg(v1) + v.AddArg(v0) + return true + } +} func rewriteValuePPC64_OpCvt32Fto32_0(v *Value) bool { b := v.Block _ = b @@ -26653,11 +26703,11 @@ func rewriteValuePPC64_OpPopCount8_0(v *Value) bool { _ = typ // match: (PopCount8 x) // cond: - // result: (POPCNTB (MOVBreg x)) + // result: (POPCNTB (MOVBZreg x)) for { x := v.Args[0] v.reset(OpPPC64POPCNTB) - v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64) + v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64) v0.AddArg(x) v.AddArg(v0) return true