From 5f8a2fdf096037dbb34a1b14a2f0e77e40f5042b Mon Sep 17 00:00:00 2001 From: Junxian Zhu Date: Fri, 26 May 2023 13:26:51 +0800 Subject: [PATCH] cmd/compile: intrinsify Add64 on mips64 This CL intrinsify Add64 on mips64. pkg: math/bits _ sec/op _ sec/op vs base _ Add64-4 2.783n _ 0% 1.950n _ 0% -29.93% (p=0.000 n=8) Add64multiple-4 5.713n _ 0% 3.063n _ 0% -46.38% (p=0.000 n=8) pkg: crypto/elliptic _ sec/op _ sec/op vs base _ ScalarBaseMult/P256-4 353.7_ _ 0% 282.7_ _ 0% -20.09% (p=0.000 n=8) ScalarBaseMult/P224-4 330.5_ _ 0% 250.0_ _ 0% -24.37% (p=0.000 n=8) ScalarBaseMult/P384-4 1228.8_ _ 0% 791.5_ _ 0% -35.59% (p=0.000 n=8) ScalarBaseMult/P521-4 15.412m _ 0% 2.438m _ 0% -84.18% (p=0.000 n=8) ScalarMult/P256-4 1189.4_ _ 0% 904.2_ _ 0% -23.98% (p=0.000 n=8) ScalarMult/P224-4 1138.8_ _ 0% 813.8_ _ 0% -28.54% (p=0.000 n=8) ScalarMult/P384-4 4.419m _ 0% 2.692m _ 0% -39.08% (p=0.000 n=8) ScalarMult/P521-4 59.768m _ 0% 8.773m _ 0% -85.32% (p=0.000 n=8) MarshalUnmarshal/P256/Uncompressed-4 8.697_ _ 1% 7.923_ _ 1% -8.91% (p=0.000 n=8) MarshalUnmarshal/P256/Compressed-4 104.75_ _ 0% 66.29_ _ 0% -36.72% (p=0.000 n=8) MarshalUnmarshal/P224/Uncompressed-4 8.728_ _ 1% 7.823_ _ 1% -10.37% (p=0.000 n=8) MarshalUnmarshal/P224/Compressed-4 1035.7_ _ 0% 676.5_ _ 2% -34.69% (p=0.000 n=8) MarshalUnmarshal/P384/Uncompressed-4 15.32_ _ 1% 11.81_ _ 1% -22.90% (p=0.000 n=8) MarshalUnmarshal/P384/Compressed-4 399.8_ _ 0% 217.4_ _ 0% -45.62% (p=0.000 n=8) MarshalUnmarshal/P521/Uncompressed-4 96.79_ _ 0% 20.32_ _ 0% -79.01% (p=0.000 n=8) MarshalUnmarshal/P521/Compressed-4 6640.4_ _ 0% 790.8_ _ 0% -88.09% (p=0.000 n=8) Change-Id: I8a0960b9665720c1d3e57dce36386e74db37fefa Reviewed-on: https://go-review.googlesource.com/c/go/+/498496 Reviewed-by: Heschi Kreinick Run-TryBot: Keith Randall TryBot-Result: Gopher Robot Reviewed-by: Keith Randall Reviewed-by: Joel Sing Reviewed-by: Keith Randall --- .../compile/internal/ssa/_gen/MIPS64.rules | 8 +++ src/cmd/compile/internal/ssa/rewriteMIPS64.go | 60 +++++++++++++++++++ src/cmd/compile/internal/ssagen/ssa.go | 2 +- test/codegen/mathbits.go | 7 +++ 4 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules index 4628e2a0242..5c07abc79b3 100644 --- a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules +++ b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules @@ -38,6 +38,10 @@ (Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y))) (Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) +(Select0 (Add64carry x y c)) => (ADDV (ADDV x y) c) +(Select1 (Add64carry x y c)) => + (OR (SGTU x s:(ADDV x y)) (SGTU s (ADDV s c))) + // math package intrinsics (Abs ...) => (ABSD ...) @@ -798,6 +802,10 @@ (GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no) (GEZ (MOVVconst [c]) yes no) && c < 0 => (First no yes) +// SGT/SGTU with known outcomes. +(SGT x x) => (MOVVconst [0]) +(SGTU x x) => (MOVVconst [0]) + // fold readonly sym load (MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read8(sym, int64(off)))]) (MOVHload [off] {sym} (SB) _) && symIsRO(sym) => (MOVVconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS64.go b/src/cmd/compile/internal/ssa/rewriteMIPS64.go index de316e9678e..4e7b51b3514 100644 --- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go +++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go @@ -4797,6 +4797,17 @@ func rewriteValueMIPS64_OpMIPS64SGT(v *Value) bool { v.AddArg(x) return true } + // match: (SGT x x) + // result: (MOVVconst [0]) + for { + x := v_0 + if x != v_1 { + break + } + v.reset(OpMIPS64MOVVconst) + v.AuxInt = int64ToAuxInt(0) + return true + } return false } func rewriteValueMIPS64_OpMIPS64SGTU(v *Value) bool { @@ -4819,6 +4830,17 @@ func rewriteValueMIPS64_OpMIPS64SGTU(v *Value) bool { v.AddArg(x) return true } + // match: (SGTU x x) + // result: (MOVVconst [0]) + for { + x := v_0 + if x != v_1 { + break + } + v.reset(OpMIPS64MOVVconst) + v.AuxInt = int64ToAuxInt(0) + return true + } return false } func rewriteValueMIPS64_OpMIPS64SGTUconst(v *Value) bool { @@ -7315,6 +7337,22 @@ func rewriteValueMIPS64_OpSelect0(v *Value) bool { v.AddArg(v0) return true } + // match: (Select0 (Add64carry x y c)) + // result: (ADDV (ADDV x y) c) + for { + t := v.Type + if v_0.Op != OpAdd64carry { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpMIPS64ADDV) + v0 := b.NewValue0(v.Pos, OpMIPS64ADDV, t) + v0.AddArg2(x, y) + v.AddArg2(v0, c) + return true + } // match: (Select0 (DIVVU _ (MOVVconst [1]))) // result: (MOVVconst [0]) for { @@ -7427,6 +7465,28 @@ func rewriteValueMIPS64_OpSelect1(v *Value) bool { v.AddArg2(v0, v2) return true } + // match: (Select1 (Add64carry x y c)) + // result: (OR (SGTU x s:(ADDV x y)) (SGTU s (ADDV s c))) + for { + t := v.Type + if v_0.Op != OpAdd64carry { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpMIPS64OR) + v0 := b.NewValue0(v.Pos, OpMIPS64SGTU, t) + s := b.NewValue0(v.Pos, OpMIPS64ADDV, t) + s.AddArg2(x, y) + v0.AddArg2(x, s) + v2 := b.NewValue0(v.Pos, OpMIPS64SGTU, t) + v3 := b.NewValue0(v.Pos, OpMIPS64ADDV, t) + v3.AddArg2(s, c) + v2.AddArg2(s, v3) + v.AddArg2(v0, v2) + return true + } // match: (Select1 (MULVU x (MOVVconst [-1]))) // result: (NEGV x) for { diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 678e1ebc11e..09b20b726e9 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -4843,7 +4843,7 @@ func InitTables() { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) }, - sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64) + sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64) alias("math/bits", "Add", "math/bits", "Add64", p8...) addF("math/bits", "Sub64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 8c971cf760c..85c7d24f7de 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -434,6 +434,7 @@ func AddC(x, ci uint) (r, co uint) { // loong64: "ADDV", "SGTU" // ppc64x: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // mips64:"ADDV","SGTU" // riscv64: "ADD","SLTU" return bits.Add(x, 7, ci) } @@ -444,6 +445,7 @@ func AddZ(x, y uint) (r, co uint) { // loong64: "ADDV", "SGTU" // ppc64x: "ADDC", -"ADDE", "ADDZE" // s390x:"ADDC",-"ADDC\t[$]-1," + // mips64:"ADDV","SGTU" // riscv64: "ADD","SLTU" return bits.Add(x, y, 0) } @@ -454,6 +456,7 @@ func AddR(x, y, ci uint) uint { // loong64: "ADDV", -"SGTU" // ppc64x: "ADDC", "ADDE", -"ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // mips64:"ADDV",-"SGTU" // riscv64: "ADD",-"SLTU" r, _ := bits.Add(x, y, ci) return r @@ -475,6 +478,7 @@ func Add64(x, y, ci uint64) (r, co uint64) { // loong64: "ADDV", "SGTU" // ppc64x: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // mips64:"ADDV","SGTU" // riscv64: "ADD","SLTU" return bits.Add64(x, y, ci) } @@ -485,6 +489,7 @@ func Add64C(x, ci uint64) (r, co uint64) { // loong64: "ADDV", "SGTU" // ppc64x: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // mips64:"ADDV","SGTU" // riscv64: "ADD","SLTU" return bits.Add64(x, 7, ci) } @@ -495,6 +500,7 @@ func Add64Z(x, y uint64) (r, co uint64) { // loong64: "ADDV", "SGTU" // ppc64x: "ADDC", -"ADDE", "ADDZE" // s390x:"ADDC",-"ADDC\t[$]-1," + // mips64:"ADDV","SGTU" // riscv64: "ADD","SLTU" return bits.Add64(x, y, 0) } @@ -505,6 +511,7 @@ func Add64R(x, y, ci uint64) uint64 { // loong64: "ADDV", -"SGTU" // ppc64x: "ADDC", "ADDE", -"ADDZE" // s390x:"ADDE","ADDC\t[$]-1," + // mips64:"ADDV",-"SGTU" // riscv64: "ADD",-"SLTU" r, _ := bits.Add64(x, y, ci) return r