From 20046020c4f0b69e89411d9db35b78942392033e Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Thu, 8 Mar 2018 10:33:37 +0000 Subject: [PATCH] cmd/compile: fix an issue in MNEG of ARM64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are two less optimized SSA rules in my previous CL https://go-review.googlesource.com/c/go/+/95075 . This CL fixes that issue and a test case gets about 10% performance improvement. name old time/op new time/op delta MNEG-4 263µs ± 3% 235µs ± 3% -10.53% (p=0.000 n=20+20) (https://github.com/benshi001/ugo1/blob/master/mneg_7_test.go) Change-Id: I30087097e281dd9d9d1c870d32e13b4ef4a96ad3 Reviewed-on: https://go-review.googlesource.com/99495 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/gen/ARM64.rules | 10 +- src/cmd/compile/internal/ssa/rewriteARM64.go | 146 +++++++++---------- 2 files changed, 70 insertions(+), 86 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index c72ab54032..72458ca71f 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -862,9 +862,9 @@ (MNEG x (MOVDconst [c])) && isPowerOfTwo(c) -> (NEG (SLLconst [log2(c)] x)) (MNEG x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 -> (NEG (ADDshiftLL x x [log2(c-1)])) (MNEG x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 -> (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) -(MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (NEG (SLLconst [log2(c/3)] (ADDshiftLL x x [1]))) +(MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) (MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) -(MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (NEG (SLLconst [log2(c/7)] (ADDshiftLL (NEG x) x [3]))) +(MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) (MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) (MNEGW x (MOVDconst [c])) && int32(c)==-1 -> x @@ -873,9 +873,9 @@ (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c) -> (NEG (SLLconst [log2(c)] x)) (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (NEG (ADDshiftLL x x [log2(c-1)])) (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (NEG (ADDshiftLL (NEG x) x [log2(c+1)])) -(MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (NEG (SLLconst [log2(c/3)] (ADDshiftLL x x [1]))) +(MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) (MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))) -(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (NEG (SLLconst [log2(c/7)] (ADDshiftLL (NEG x) x [3]))) +(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) (MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))) // div by constant @@ -1667,4 +1667,4 @@ (FSUBS a (FNMULS x y)) -> (FMADDS a x y) (FSUBD a (FNMULD x y)) -> (FMADDD a x y) (FSUBS (FNMULS x y) a) -> (FNMADDS a x y) -(FSUBD (FNMULD x y) a) -> (FNMADDD a x y) +(FSUBD (FNMULD x y) a) -> (FNMADDD a x y) \ No newline at end of file diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index ee92c2f809..a5ff6637cf 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -4828,7 +4828,7 @@ func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool { } // match: (MNEG x (MOVDconst [c])) // cond: c%3 == 0 && isPowerOfTwo(c/3) - // result: (NEG (SLLconst [log2(c/3)] (ADDshiftLL x x [1]))) + // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) for { _ = v.Args[1] x := v.Args[0] @@ -4840,20 +4840,19 @@ func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool { if !(c%3 == 0 && isPowerOfTwo(c/3)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 3) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 1 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } // match: (MNEG (MOVDconst [c]) x) // cond: c%3 == 0 && isPowerOfTwo(c/3) - // result: (NEG (SLLconst [log2(c/3)] (ADDshiftLL x x [1]))) + // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) for { _ = v.Args[1] v_0 := v.Args[0] @@ -4865,14 +4864,13 @@ func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool { if !(c%3 == 0 && isPowerOfTwo(c/3)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 3) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 1 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } @@ -4928,7 +4926,7 @@ func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool { } // match: (MNEG x (MOVDconst [c])) // cond: c%7 == 0 && isPowerOfTwo(c/7) - // result: (NEG (SLLconst [log2(c/7)] (ADDshiftLL (NEG x) x [3]))) + // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) for { _ = v.Args[1] x := v.Args[0] @@ -4940,22 +4938,19 @@ func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool { if !(c%7 == 0 && isPowerOfTwo(c/7)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 7) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 3 - v2 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v2.AddArg(x) - v1.AddArg(v2) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 7) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } // match: (MNEG (MOVDconst [c]) x) // cond: c%7 == 0 && isPowerOfTwo(c/7) - // result: (NEG (SLLconst [log2(c/7)] (ADDshiftLL (NEG x) x [3]))) + // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) for { _ = v.Args[1] v_0 := v.Args[0] @@ -4967,16 +4962,13 @@ func rewriteValueARM64_OpARM64MNEG_10(v *Value) bool { if !(c%7 == 0 && isPowerOfTwo(c/7)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 7) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 3 - v2 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v2.AddArg(x) - v1.AddArg(v2) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 7) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } @@ -5325,7 +5317,7 @@ func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { } // match: (MNEGW x (MOVDconst [c])) // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) - // result: (NEG (SLLconst [log2(c/3)] (ADDshiftLL x x [1]))) + // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) for { _ = v.Args[1] x := v.Args[0] @@ -5337,20 +5329,19 @@ func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 3) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 1 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } // match: (MNEGW (MOVDconst [c]) x) // cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) - // result: (NEG (SLLconst [log2(c/3)] (ADDshiftLL x x [1]))) + // result: (SLLconst [log2(c/3)] (SUBshiftLL x x [2])) for { _ = v.Args[1] v_0 := v.Args[0] @@ -5362,14 +5353,13 @@ func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 3) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 1 - v1.AddArg(x) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 3) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 2 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } @@ -5425,7 +5415,7 @@ func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { } // match: (MNEGW x (MOVDconst [c])) // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) - // result: (NEG (SLLconst [log2(c/7)] (ADDshiftLL (NEG x) x [3]))) + // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) for { _ = v.Args[1] x := v.Args[0] @@ -5437,22 +5427,19 @@ func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 7) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 3 - v2 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v2.AddArg(x) - v1.AddArg(v2) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 7) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } // match: (MNEGW (MOVDconst [c]) x) // cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) - // result: (NEG (SLLconst [log2(c/7)] (ADDshiftLL (NEG x) x [3]))) + // result: (SLLconst [log2(c/7)] (SUBshiftLL x x [3])) for { _ = v.Args[1] v_0 := v.Args[0] @@ -5464,16 +5451,13 @@ func rewriteValueARM64_OpARM64MNEGW_10(v *Value) bool { if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) { break } - v.reset(OpARM64NEG) - v0 := b.NewValue0(v.Pos, OpARM64SLLconst, x.Type) - v0.AuxInt = log2(c / 7) - v1 := b.NewValue0(v.Pos, OpARM64ADDshiftLL, x.Type) - v1.AuxInt = 3 - v2 := b.NewValue0(v.Pos, OpARM64NEG, x.Type) - v2.AddArg(x) - v1.AddArg(v2) - v1.AddArg(x) - v0.AddArg(v1) + v.reset(OpARM64SLLconst) + v.Type = x.Type + v.AuxInt = log2(c / 7) + v0 := b.NewValue0(v.Pos, OpARM64SUBshiftLL, x.Type) + v0.AuxInt = 3 + v0.AddArg(x) + v0.AddArg(x) v.AddArg(v0) return true } @@ -22073,4 +22057,4 @@ func rewriteBlockARM64(b *Block) bool { } } return false -} +} \ No newline at end of file