From 2e5c32518ce6facc507862f4156d4e6ac776754f Mon Sep 17 00:00:00 2001 From: fanzha02 Date: Tue, 21 Aug 2018 04:57:03 +0000 Subject: [PATCH] cmd/compile: optimize math.Copysign on arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add rewrite rules to optimize math.Copysign() when the second argument is negative floating point constant. For example, math.Copysign(c, -2): The previous compile output is "AND $9223372036854775807, R0, R0; ORR $-9223372036854775808, R0, R0". The optimized compile output is "ORR $-9223372036854775808, R0, R0" Math package benchmark results. name old time/op new time/op delta Copysign-8 2.61ns ± 2% 2.49ns ± 0% -4.55% (p=0.000 n=10+10) Cos-8 43.0ns ± 0% 41.5ns ± 0% -3.49% (p=0.000 n=10+10) Cosh-8 98.6ns ± 0% 98.1ns ± 0% -0.51% (p=0.000 n=10+10) ExpGo-8 107ns ± 0% 105ns ± 0% -1.87% (p=0.000 n=10+10) Exp2Go-8 100ns ± 0% 100ns ± 0% +0.39% (p=0.000 n=10+8) Max-8 6.56ns ± 2% 6.45ns ± 1% -1.63% (p=0.002 n=10+10) Min-8 6.66ns ± 3% 6.47ns ± 2% -2.82% (p=0.006 n=10+10) Mod-8 107ns ± 1% 104ns ± 1% -2.72% (p=0.000 n=10+10) Frexp-8 11.5ns ± 1% 11.0ns ± 0% -4.56% (p=0.000 n=8+10) HypotGo-8 19.4ns ± 0% 19.4ns ± 0% +0.36% (p=0.019 n=10+10) Ilogb-8 8.63ns ± 0% 8.51ns ± 0% -1.36% (p=0.000 n=10+10) Jn-8 584ns ± 0% 585ns ± 0% +0.17% (p=0.000 n=7+8) Ldexp-8 13.8ns ± 0% 13.5ns ± 0% -2.17% (p=0.002 n=8+10) Logb-8 10.2ns ± 0% 9.9ns ± 0% -2.65% (p=0.000 n=10+7) Nextafter64-8 7.54ns ± 0% 7.51ns ± 0% -0.37% (p=0.000 n=10+10) Remainder-8 73.5ns ± 1% 70.4ns ± 1% -4.27% (p=0.000 n=10+10) SqrtGoLatency-8 79.6ns ± 0% 76.2ns ± 0% -4.30% (p=0.000 n=9+10) Yn-8 582ns ± 0% 579ns ± 0% -0.52% (p=0.000 n=10+10) Change-Id: I0c9cd1ea87435e7b8bab94b4e79e6e29785f25b1 Reviewed-on: https://go-review.googlesource.com/132915 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- src/cmd/compile/internal/ssa/gen/ARM64.rules | 5 +++ src/cmd/compile/internal/ssa/rewriteARM64.go | 45 ++++++++++++++++++++ test/codegen/math.go | 1 + 3 files changed, 51 insertions(+) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index ede7ed3d7a0..6c8f3860d18 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -101,6 +101,8 @@ // Load args directly into the register class where it will be used. (FMOVDgpfp (Arg [off] {sym})) -> @b.Func.Entry (Arg [off] {sym}) +(FMOVDfpgp (Arg [off] {sym})) -> @b.Func.Entry (Arg [off] {sym}) + // Similarly for stores, if we see a store after FPR <-> GPR move, then redirect store to use the other register set. (MOVDstore ptr (FMOVDfpgp val) mem) -> (FMOVDstore ptr val mem) (FMOVDstore ptr (FMOVDgpfp val) mem) -> (MOVDstore ptr val mem) @@ -1626,6 +1628,9 @@ (SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1< (ANDconst [^(1< (ORconst [c1] x) + // bitfield ops // sbfiz diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index fbdf3529981..219bc3676d8 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -87,6 +87,8 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FADDD_0(v) case OpARM64FADDS: return rewriteValueARM64_OpARM64FADDS_0(v) + case OpARM64FMOVDfpgp: + return rewriteValueARM64_OpARM64FMOVDfpgp_0(v) case OpARM64FMOVDgpfp: return rewriteValueARM64_OpARM64FMOVDgpfp_0(v) case OpARM64FMOVDload: @@ -3960,6 +3962,30 @@ func rewriteValueARM64_OpARM64FADDS_0(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64FMOVDfpgp_0(v *Value) bool { + b := v.Block + _ = b + // match: (FMOVDfpgp (Arg [off] {sym})) + // cond: + // result: @b.Func.Entry (Arg [off] {sym}) + for { + t := v.Type + v_0 := v.Args[0] + if v_0.Op != OpArg { + break + } + off := v_0.AuxInt + sym := v_0.Aux + b = b.Func.Entry + v0 := b.NewValue0(v.Pos, OpArg, t) + v.reset(OpCopy) + v.AddArg(v0) + v0.AuxInt = off + v0.Aux = sym + return true + } + return false +} func rewriteValueARM64_OpARM64FMOVDgpfp_0(v *Value) bool { b := v.Block _ = b @@ -21834,6 +21860,25 @@ func rewriteValueARM64_OpARM64ORconst_0(v *Value) bool { v.AddArg(x) return true } + // match: (ORconst [c1] (ANDconst [c2] x)) + // cond: c2|c1 == ^0 + // result: (ORconst [c1] x) + for { + c1 := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpARM64ANDconst { + break + } + c2 := v_0.AuxInt + x := v_0.Args[0] + if !(c2|c1 == ^0) { + break + } + v.reset(OpARM64ORconst) + v.AuxInt = c1 + v.AddArg(x) + return true + } return false } func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool { diff --git a/test/codegen/math.go b/test/codegen/math.go index 1ecba26847a..99335d2efc7 100644 --- a/test/codegen/math.go +++ b/test/codegen/math.go @@ -74,6 +74,7 @@ func copysign(a, b, c float64) { // amd64:"BTSQ\t[$]63" // s390x:"LNDFR\t",-"MOVD\t" (no integer load/store) // ppc64le:"FCPSGN" + // arm64:"ORR\t[$]-9223372036854775808" sink64[1] = math.Copysign(c, -1) // Like math.Copysign(c, -1), but with integer operations. Useful