mirror of
https://github.com/golang/go
synced 2024-11-11 22:20:22 -07:00
cmd/compile: optimize ARM's math.Abs
This CL optimizes math.Abs to an inline ABSD instruction on ARM. The benchmark results of src/math/ show big improvements. name old time/op new time/op delta Acos-4 181ns ± 0% 182ns ± 0% +0.30% (p=0.000 n=40+40) Acosh-4 202ns ± 0% 202ns ± 0% ~ (all equal) Asin-4 163ns ± 0% 163ns ± 0% ~ (all equal) Asinh-4 242ns ± 0% 242ns ± 0% ~ (all equal) Atan-4 120ns ± 0% 121ns ± 0% +0.83% (p=0.000 n=40+40) Atanh-4 202ns ± 0% 202ns ± 0% ~ (all equal) Atan2-4 173ns ± 0% 173ns ± 0% ~ (all equal) Cbrt-4 1.06µs ± 0% 1.06µs ± 0% +0.09% (p=0.000 n=39+37) Ceil-4 72.9ns ± 0% 72.8ns ± 0% ~ (p=0.237 n=40+40) Copysign-4 13.2ns ± 0% 13.2ns ± 0% ~ (all equal) Cos-4 193ns ± 0% 183ns ± 0% -5.18% (p=0.000 n=40+40) Cosh-4 254ns ± 0% 239ns ± 0% -5.91% (p=0.000 n=40+40) Erf-4 112ns ± 0% 112ns ± 0% ~ (all equal) Erfc-4 117ns ± 0% 117ns ± 0% ~ (all equal) Erfinv-4 127ns ± 0% 127ns ± 1% ~ (p=0.492 n=40+40) Erfcinv-4 128ns ± 0% 128ns ± 0% ~ (all equal) Exp-4 212ns ± 0% 206ns ± 0% -3.05% (p=0.000 n=40+40) ExpGo-4 216ns ± 0% 209ns ± 0% -3.24% (p=0.000 n=40+40) Expm1-4 142ns ± 0% 142ns ± 0% ~ (all equal) Exp2-4 191ns ± 0% 184ns ± 0% -3.45% (p=0.000 n=40+40) Exp2Go-4 194ns ± 0% 187ns ± 0% -3.61% (p=0.000 n=40+40) Abs-4 14.4ns ± 0% 6.3ns ± 0% -56.39% (p=0.000 n=38+39) Dim-4 12.6ns ± 0% 12.6ns ± 0% ~ (all equal) Floor-4 49.6ns ± 0% 49.6ns ± 0% ~ (all equal) Max-4 27.6ns ± 0% 27.6ns ± 0% ~ (all equal) Min-4 27.0ns ± 0% 27.0ns ± 0% ~ (all equal) Mod-4 349ns ± 0% 305ns ± 1% -12.55% (p=0.000 n=33+40) Frexp-4 54.0ns ± 0% 47.1ns ± 0% -12.78% (p=0.000 n=38+38) Gamma-4 242ns ± 0% 234ns ± 0% -3.16% (p=0.000 n=36+40) Hypot-4 84.8ns ± 0% 67.8ns ± 0% -20.05% (p=0.000 n=31+35) HypotGo-4 88.5ns ± 0% 71.6ns ± 0% -19.12% (p=0.000 n=40+38) Ilogb-4 45.8ns ± 0% 38.9ns ± 0% -15.12% (p=0.000 n=40+32) J0-4 821ns ± 0% 802ns ± 0% -2.33% (p=0.000 n=33+40) J1-4 816ns ± 0% 807ns ± 0% -1.05% (p=0.000 n=40+29) Jn-4 1.67µs ± 0% 1.65µs ± 0% -1.45% (p=0.000 n=40+39) Ldexp-4 61.5ns ± 0% 54.6ns ± 0% -11.27% (p=0.000 n=40+32) Lgamma-4 188ns ± 0% 188ns ± 0% ~ (all equal) Log-4 154ns ± 0% 147ns ± 0% -4.78% (p=0.000 n=40+40) Logb-4 50.9ns ± 0% 42.7ns ± 0% -16.11% (p=0.000 n=34+39) Log1p-4 160ns ± 0% 159ns ± 0% ~ (p=0.828 n=40+40) Log10-4 173ns ± 0% 166ns ± 0% -4.05% (p=0.000 n=40+40) Log2-4 65.3ns ± 0% 58.4ns ± 0% -10.57% (p=0.000 n=37+37) Modf-4 36.4ns ± 0% 36.4ns ± 0% ~ (all equal) Nextafter32-4 36.4ns ± 0% 36.4ns ± 0% ~ (all equal) Nextafter64-4 32.7ns ± 0% 32.6ns ± 0% ~ (p=0.375 n=40+40) PowInt-4 300ns ± 0% 277ns ± 0% -7.78% (p=0.000 n=40+40) PowFrac-4 676ns ± 0% 635ns ± 0% -6.00% (p=0.000 n=40+35) Pow10Pos-4 17.6ns ± 0% 17.6ns ± 0% ~ (all equal) Pow10Neg-4 22.0ns ± 0% 22.0ns ± 0% ~ (all equal) Round-4 30.1ns ± 0% 30.1ns ± 0% ~ (all equal) RoundToEven-4 38.9ns ± 0% 38.9ns ± 0% ~ (all equal) Remainder-4 291ns ± 0% 263ns ± 0% -9.62% (p=0.000 n=40+40) Signbit-4 11.3ns ± 0% 11.3ns ± 0% ~ (all equal) Sin-4 185ns ± 0% 185ns ± 0% ~ (all equal) Sincos-4 230ns ± 0% 230ns ± 0% ~ (all equal) Sinh-4 253ns ± 0% 246ns ± 0% -2.77% (p=0.000 n=39+39) SqrtIndirect-4 41.4ns ± 0% 41.4ns ± 0% ~ (all equal) SqrtLatency-4 13.8ns ± 0% 13.8ns ± 0% ~ (all equal) SqrtIndirectLatency-4 37.0ns ± 0% 37.0ns ± 0% ~ (p=0.632 n=40+40) SqrtGoLatency-4 911ns ± 0% 911ns ± 0% +0.08% (p=0.000 n=40+40) SqrtPrime-4 13.2µs ± 0% 13.2µs ± 0% +0.01% (p=0.038 n=38+40) Tan-4 205ns ± 0% 205ns ± 0% ~ (all equal) Tanh-4 264ns ± 0% 247ns ± 0% -6.44% (p=0.000 n=39+32) Trunc-4 45.2ns ± 0% 45.2ns ± 0% ~ (all equal) Y0-4 796ns ± 0% 792ns ± 0% -0.55% (p=0.000 n=35+40) Y1-4 804ns ± 0% 797ns ± 0% -0.82% (p=0.000 n=24+40) Yn-4 1.64µs ± 0% 1.62µs ± 0% -1.27% (p=0.000 n=40+39) Float64bits-4 8.16ns ± 0% 8.16ns ± 0% +0.04% (p=0.000 n=35+40) Float64frombits-4 10.7ns ± 0% 10.7ns ± 0% ~ (all equal) Float32bits-4 7.53ns ± 0% 7.53ns ± 0% ~ (p=0.760 n=40+40) Float32frombits-4 6.91ns ± 0% 6.91ns ± 0% -0.04% (p=0.002 n=32+38) [Geo mean] 111ns 106ns -3.98% Change-Id: I54f4fd7f5160db020b430b556bde59cc0fdb996d Reviewed-on: https://go-review.googlesource.com/c/go/+/188678 Run-TryBot: Ben Shi <powerman1st@163.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
8403d4ea90
commit
c683ab8128
@ -655,6 +655,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||||||
ssa.OpARMSQRTD,
|
ssa.OpARMSQRTD,
|
||||||
ssa.OpARMNEGF,
|
ssa.OpARMNEGF,
|
||||||
ssa.OpARMNEGD,
|
ssa.OpARMNEGD,
|
||||||
|
ssa.OpARMABSD,
|
||||||
ssa.OpARMMOVWF,
|
ssa.OpARMMOVWF,
|
||||||
ssa.OpARMMOVWD,
|
ssa.OpARMMOVWD,
|
||||||
ssa.OpARMMOVFW,
|
ssa.OpARMMOVFW,
|
||||||
|
@ -3297,7 +3297,7 @@ func init() {
|
|||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
|
return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
|
||||||
},
|
},
|
||||||
sys.ARM64, sys.PPC64, sys.Wasm)
|
sys.ARM64, sys.ARM, sys.PPC64, sys.Wasm)
|
||||||
addF("math", "Copysign",
|
addF("math", "Copysign",
|
||||||
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue2(ssa.OpCopysign, types.Types[TFLOAT64], args[0], args[1])
|
return s.newValue2(ssa.OpCopysign, types.Types[TFLOAT64], args[0], args[1])
|
||||||
|
@ -56,6 +56,7 @@
|
|||||||
(Com(32|16|8) x) -> (MVN x)
|
(Com(32|16|8) x) -> (MVN x)
|
||||||
|
|
||||||
(Sqrt x) -> (SQRTD x)
|
(Sqrt x) -> (SQRTD x)
|
||||||
|
(Abs x) -> (ABSD x)
|
||||||
|
|
||||||
// TODO: optimize this for ARMv5 and ARMv6
|
// TODO: optimize this for ARMv5 and ARMv6
|
||||||
(Ctz32NonZero x) -> (Ctz32 x)
|
(Ctz32NonZero x) -> (Ctz32 x)
|
||||||
|
@ -211,6 +211,7 @@ func init() {
|
|||||||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||||
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
||||||
|
{name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64
|
||||||
|
|
||||||
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero
|
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero
|
||||||
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // reverse byte order
|
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // reverse byte order
|
||||||
|
@ -926,6 +926,7 @@ const (
|
|||||||
OpARMNEGF
|
OpARMNEGF
|
||||||
OpARMNEGD
|
OpARMNEGD
|
||||||
OpARMSQRTD
|
OpARMSQRTD
|
||||||
|
OpARMABSD
|
||||||
OpARMCLZ
|
OpARMCLZ
|
||||||
OpARMREV
|
OpARMREV
|
||||||
OpARMREV16
|
OpARMREV16
|
||||||
@ -12298,6 +12299,19 @@ var opcodeTable = [...]opInfo{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ABSD",
|
||||||
|
argLen: 1,
|
||||||
|
asm: arm.AABSD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "CLZ",
|
name: "CLZ",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
@ -420,6 +420,8 @@ func rewriteValueARM(v *Value) bool {
|
|||||||
return rewriteValueARM_OpARMXORshiftRLreg_0(v)
|
return rewriteValueARM_OpARMXORshiftRLreg_0(v)
|
||||||
case OpARMXORshiftRR:
|
case OpARMXORshiftRR:
|
||||||
return rewriteValueARM_OpARMXORshiftRR_0(v)
|
return rewriteValueARM_OpARMXORshiftRR_0(v)
|
||||||
|
case OpAbs:
|
||||||
|
return rewriteValueARM_OpAbs_0(v)
|
||||||
case OpAdd16:
|
case OpAdd16:
|
||||||
return rewriteValueARM_OpAdd16_0(v)
|
return rewriteValueARM_OpAdd16_0(v)
|
||||||
case OpAdd32:
|
case OpAdd32:
|
||||||
@ -17179,6 +17181,17 @@ func rewriteValueARM_OpARMXORshiftRR_0(v *Value) bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
func rewriteValueARM_OpAbs_0(v *Value) bool {
|
||||||
|
// match: (Abs x)
|
||||||
|
// cond:
|
||||||
|
// result: (ABSD x)
|
||||||
|
for {
|
||||||
|
x := v.Args[0]
|
||||||
|
v.reset(OpARMABSD)
|
||||||
|
v.AddArg(x)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueARM_OpAdd16_0(v *Value) bool {
|
func rewriteValueARM_OpAdd16_0(v *Value) bool {
|
||||||
// match: (Add16 x y)
|
// match: (Add16 x y)
|
||||||
// cond:
|
// cond:
|
||||||
|
@ -63,6 +63,7 @@ func abs(x, y float64) {
|
|||||||
// ppc64:"FABS\t"
|
// ppc64:"FABS\t"
|
||||||
// ppc64le:"FABS\t"
|
// ppc64le:"FABS\t"
|
||||||
// wasm:"F64Abs"
|
// wasm:"F64Abs"
|
||||||
|
// arm/6:"ABSD\t"
|
||||||
sink64[0] = math.Abs(x)
|
sink64[0] = math.Abs(x)
|
||||||
|
|
||||||
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
|
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
|
||||||
|
Loading…
Reference in New Issue
Block a user