mirror of
https://github.com/golang/go
synced 2024-11-16 16:14:40 -07:00
math: optimize math.Abs on mipsx
This commit optimized math.Abs function implementation on mipsx. Tested on loongson 3A2000. goos: linux goarch: mipsle pkg: math │ oldmath │ newmath │ │ sec/op │ sec/op vs base │ Acos-4 282.6n ± 0% 282.3n ± 0% ~ (p=0.140 n=7) Acosh-4 506.1n ± 0% 451.8n ± 0% -10.73% (p=0.001 n=7) Asin-4 272.3n ± 0% 272.2n ± 0% ~ (p=0.808 n=7) Asinh-4 529.7n ± 0% 475.3n ± 0% -10.27% (p=0.001 n=7) Atan-4 208.2n ± 0% 207.9n ± 0% ~ (p=0.134 n=7) Atanh-4 503.4n ± 1% 449.7n ± 0% -10.67% (p=0.001 n=7) Atan2-4 310.5n ± 0% 310.5n ± 0% ~ (p=0.928 n=7) Cbrt-4 359.3n ± 0% 358.8n ± 0% ~ (p=0.121 n=7) Ceil-4 203.9n ± 0% 204.0n ± 0% ~ (p=0.600 n=7) Compare-4 23.11n ± 0% 23.11n ± 0% ~ (p=0.702 n=7) Compare32-4 19.09n ± 0% 19.12n ± 0% ~ (p=0.070 n=7) Copysign-4 33.20n ± 0% 34.02n ± 0% +2.47% (p=0.001 n=7) Cos-4 422.5n ± 0% 385.4n ± 1% -8.78% (p=0.001 n=7) Cosh-4 628.0n ± 0% 545.5n ± 0% -13.14% (p=0.001 n=7) Erf-4 193.7n ± 2% 192.7n ± 1% ~ (p=0.430 n=7) Erfc-4 192.8n ± 1% 193.0n ± 0% ~ (p=0.245 n=7) Erfinv-4 220.7n ± 1% 221.5n ± 2% ~ (p=0.272 n=7) Erfcinv-4 221.3n ± 1% 220.4n ± 2% ~ (p=0.738 n=7) Exp-4 471.4n ± 0% 435.1n ± 0% -7.70% (p=0.001 n=7) ExpGo-4 470.6n ± 0% 434.0n ± 0% -7.78% (p=0.001 n=7) Expm1-4 243.1n ± 0% 243.4n ± 0% ~ (p=0.417 n=7) Exp2-4 463.1n ± 0% 427.0n ± 0% -7.80% (p=0.001 n=7) Exp2Go-4 462.4n ± 0% 426.2n ± 5% -7.83% (p=0.001 n=7) Abs-4 37.000n ± 0% 8.039n ± 9% -78.27% (p=0.001 n=7) Dim-4 18.09n ± 0% 18.11n ± 0% ~ (p=0.094 n=7) Floor-4 151.9n ± 0% 151.8n ± 0% ~ (p=0.190 n=7) Max-4 116.7n ± 1% 116.7n ± 1% ~ (p=0.842 n=7) Min-4 116.6n ± 1% 116.6n ± 0% ~ (p=0.464 n=7) Mod-4 1244.0n ± 0% 980.9n ± 0% -21.15% (p=0.001 n=7) Frexp-4 199.0n ± 0% 146.7n ± 0% -26.28% (p=0.001 n=7) Gamma-4 516.4n ± 0% 479.3n ± 1% -7.18% (p=0.001 n=7) Hypot-4 169.8n ± 0% 117.8n ± 2% -30.62% (p=0.001 n=7) HypotGo-4 170.8n ± 0% 117.5n ± 0% -31.21% (p=0.001 n=7) Ilogb-4 160.8n ± 0% 109.5n ± 0% -31.90% (p=0.001 n=7) J0-4 1.359µ ± 0% 1.305µ ± 0% -3.97% (p=0.001 n=7) J1-4 1.386µ ± 0% 1.334µ ± 0% -3.75% (p=0.001 n=7) Jn-4 2.864µ ± 0% 2.758µ ± 0% -3.70% (p=0.001 n=7) Ldexp-4 202.9n ± 0% 151.7n ± 0% -25.23% (p=0.001 n=7) Lgamma-4 234.0n ± 0% 234.3n ± 0% ~ (p=0.199 n=7) Log-4 444.1n ± 0% 407.9n ± 0% -8.15% (p=0.001 n=7) Logb-4 157.8n ± 0% 121.6n ± 0% -22.94% (p=0.001 n=7) Log1p-4 354.8n ± 0% 315.4n ± 0% -11.10% (p=0.001 n=7) Log10-4 453.9n ± 0% 417.9n ± 0% -7.93% (p=0.001 n=7) Log2-4 245.3n ± 0% 209.1n ± 0% -14.76% (p=0.001 n=7) Modf-4 126.6n ± 0% 126.6n ± 0% ~ (p=0.126 n=7) Nextafter32-4 112.5n ± 0% 112.5n ± 0% ~ (p=0.853 n=7) Nextafter64-4 141.7n ± 0% 141.6n ± 0% ~ (p=0.331 n=7) PowInt-4 878.8n ± 1% 758.3n ± 1% -13.71% (p=0.001 n=7) PowFrac-4 1.809µ ± 0% 1.615µ ± 0% -10.72% (p=0.001 n=7) Pow10Pos-4 18.10n ± 0% 18.12n ± 0% ~ (p=0.464 n=7) Pow10Neg-4 17.09n ± 0% 17.09n ± 0% ~ (p=0.263 n=7) Round-4 68.36n ± 0% 68.33n ± 0% ~ (p=0.325 n=7) RoundToEven-4 78.40n ± 0% 78.40n ± 0% ~ (p=0.934 n=7) Remainder-4 894.0n ± 1% 753.4n ± 1% -15.73% (p=0.001 n=7) Signbit-4 18.09n ± 0% 18.09n ± 0% ~ (p=0.761 n=7) Sin-4 389.8n ± 1% 389.8n ± 0% ~ (p=0.995 n=7) Sincos-4 416.0n ± 0% 415.9n ± 0% ~ (p=0.361 n=7) Sinh-4 634.6n ± 4% 585.6n ± 1% -7.72% (p=0.001 n=7) SqrtIndirect-4 8.035n ± 0% 8.036n ± 0% ~ (p=0.523 n=7) SqrtLatency-4 8.039n ± 0% 8.037n ± 0% ~ (p=0.218 n=7) SqrtIndirectLatency-4 8.040n ± 0% 8.040n ± 0% ~ (p=0.652 n=7) SqrtGoLatency-4 895.7n ± 0% 896.6n ± 0% +0.10% (p=0.004 n=7) SqrtPrime-4 5.406µ ± 0% 5.407µ ± 0% ~ (p=0.592 n=7) Tan-4 406.1n ± 0% 405.8n ± 1% ~ (p=0.435 n=7) Tanh-4 627.6n ± 0% 545.5n ± 0% -13.08% (p=0.001 n=7) Trunc-4 146.7n ± 1% 146.7n ± 0% ~ (p=0.755 n=7) Y0-4 1.359µ ± 0% 1.310µ ± 0% -3.61% (p=0.001 n=7) Y1-4 1.351µ ± 0% 1.301µ ± 0% -3.70% (p=0.001 n=7) Yn-4 2.829µ ± 0% 2.729µ ± 0% -3.53% (p=0.001 n=7) Float64bits-4 14.08n ± 0% 14.07n ± 0% ~ (p=0.069 n=7) Float64frombits-4 19.09n ± 0% 19.10n ± 0% ~ (p=0.755 n=7) Float32bits-4 13.06n ± 0% 13.07n ± 1% ~ (p=0.586 n=7) Float32frombits-4 13.06n ± 0% 13.06n ± 0% ~ (p=0.853 n=7) FMA-4 606.9n ± 0% 606.8n ± 0% ~ (p=0.393 n=7) geomean 201.1n 185.4n -7.81% Change-Id: I6d41a97ad3789ed5731588588859ac0b8b13b664 Reviewed-on: https://go-review.googlesource.com/c/go/+/484675 Reviewed-by: Rong Zhang <rongrong@oss.cipunited.com> Reviewed-by: Bryan Mills <bcmills@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Run-TryBot: Than McIntosh <thanm@google.com>
This commit is contained in:
parent
9fa81a8827
commit
5cad8d41ca
@ -363,6 +363,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||||||
ssa.OpMIPSMOVDF,
|
ssa.OpMIPSMOVDF,
|
||||||
ssa.OpMIPSNEGF,
|
ssa.OpMIPSNEGF,
|
||||||
ssa.OpMIPSNEGD,
|
ssa.OpMIPSNEGD,
|
||||||
|
ssa.OpMIPSABSD,
|
||||||
ssa.OpMIPSSQRTF,
|
ssa.OpMIPSSQRTF,
|
||||||
ssa.OpMIPSSQRTD,
|
ssa.OpMIPSSQRTD,
|
||||||
ssa.OpMIPSCLZ:
|
ssa.OpMIPSCLZ:
|
||||||
|
@ -37,6 +37,9 @@
|
|||||||
(Mod8 x y) => (Select0 (DIV (SignExt8to32 x) (SignExt8to32 y)))
|
(Mod8 x y) => (Select0 (DIV (SignExt8to32 x) (SignExt8to32 y)))
|
||||||
(Mod8u x y) => (Select0 (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
(Mod8u x y) => (Select0 (DIVU (ZeroExt8to32 x) (ZeroExt8to32 y)))
|
||||||
|
|
||||||
|
// math package intrinsics
|
||||||
|
(Abs ...) => (ABSD ...)
|
||||||
|
|
||||||
// (x + y) / 2 with x>=y becomes (x - y) / 2 + y
|
// (x + y) / 2 with x>=y becomes (x - y) / 2 + y
|
||||||
(Avg32u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
|
(Avg32u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
|
||||||
|
|
||||||
|
@ -179,6 +179,7 @@ func init() {
|
|||||||
{name: "NEG", argLength: 1, reg: gp11}, // -arg0
|
{name: "NEG", argLength: 1, reg: gp11}, // -arg0
|
||||||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||||
|
{name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64
|
||||||
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
||||||
{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
|
{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
|
||||||
|
|
||||||
|
@ -1872,6 +1872,7 @@ const (
|
|||||||
OpMIPSNEG
|
OpMIPSNEG
|
||||||
OpMIPSNEGF
|
OpMIPSNEGF
|
||||||
OpMIPSNEGD
|
OpMIPSNEGD
|
||||||
|
OpMIPSABSD
|
||||||
OpMIPSSQRTD
|
OpMIPSSQRTD
|
||||||
OpMIPSSQRTF
|
OpMIPSSQRTF
|
||||||
OpMIPSSLL
|
OpMIPSSLL
|
||||||
@ -25054,6 +25055,19 @@ var opcodeTable = [...]opInfo{
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ABSD",
|
||||||
|
argLen: 1,
|
||||||
|
asm: mips.AABSD,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "SQRTD",
|
name: "SQRTD",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
@ -6,6 +6,9 @@ import "cmd/compile/internal/types"
|
|||||||
|
|
||||||
func rewriteValueMIPS(v *Value) bool {
|
func rewriteValueMIPS(v *Value) bool {
|
||||||
switch v.Op {
|
switch v.Op {
|
||||||
|
case OpAbs:
|
||||||
|
v.Op = OpMIPSABSD
|
||||||
|
return true
|
||||||
case OpAdd16:
|
case OpAdd16:
|
||||||
v.Op = OpMIPSADD
|
v.Op = OpMIPSADD
|
||||||
return true
|
return true
|
||||||
|
@ -4369,7 +4369,7 @@ func InitTables() {
|
|||||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0])
|
return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0])
|
||||||
},
|
},
|
||||||
sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS64)
|
sys.ARM64, sys.ARM, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64)
|
||||||
addF("math", "Copysign",
|
addF("math", "Copysign",
|
||||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||||
return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1])
|
return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1])
|
||||||
|
@ -82,6 +82,7 @@ func abs(x, y float64) {
|
|||||||
// wasm:"F64Abs"
|
// wasm:"F64Abs"
|
||||||
// arm/6:"ABSD\t"
|
// arm/6:"ABSD\t"
|
||||||
// mips64/hardfloat:"ABSD\t"
|
// mips64/hardfloat:"ABSD\t"
|
||||||
|
// mips/hardfloat:"ABSD\t"
|
||||||
sink64[0] = math.Abs(x)
|
sink64[0] = math.Abs(x)
|
||||||
|
|
||||||
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
|
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
|
||||||
|
Loading…
Reference in New Issue
Block a user