From a07176b45a105a8e3bd9685c8e2208f4838c7621 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Thu, 14 Sep 2017 06:52:51 +0000 Subject: [PATCH] cmd/compile: optimize ARM code with MULAF/MULSF/MULAD/MULSD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The go compiler can generate better ARM code with those more efficient FP instructions. And there is little improvement in total but big improvement in special cases. 1. The size of pkg/linux_arm/math.a shrinks by 2.4%. 2. there is neither improvement nor regression in compilecmp benchmark. name old time/op new time/op delta Template 2.32s ± 2% 2.32s ± 1% ~ (p=1.000 n=9+10) Unicode 1.32s ± 4% 1.32s ± 4% ~ (p=0.912 n=10+10) GoTypes 7.76s ± 1% 7.79s ± 1% ~ (p=0.447 n=9+10) Compiler 37.4s ± 2% 37.2s ± 2% ~ (p=0.218 n=10+10) SSA 84.8s ± 2% 85.0s ± 1% ~ (p=0.604 n=10+9) Flate 1.45s ± 2% 1.44s ± 2% ~ (p=0.075 n=10+10) GoParser 1.82s ± 1% 1.81s ± 1% ~ (p=0.190 n=10+10) Reflect 5.06s ± 1% 5.05s ± 1% ~ (p=0.315 n=10+9) Tar 2.37s ± 1% 2.37s ± 2% ~ (p=0.912 n=10+10) XML 2.56s ± 1% 2.58s ± 2% ~ (p=0.089 n=10+10) [Geo mean] 4.77s 4.77s -0.08% name old user-time/op new user-time/op delta Template 2.74s ± 2% 2.75s ± 2% ~ (p=0.856 n=9+10) Unicode 1.61s ± 4% 1.62s ± 3% ~ (p=0.693 n=10+10) GoTypes 9.55s ± 1% 9.49s ± 2% ~ (p=0.056 n=9+10) Compiler 45.9s ± 1% 45.8s ± 1% ~ (p=0.345 n=9+10) SSA 110s ± 1% 110s ± 1% ~ (p=0.763 n=9+10) Flate 1.68s ± 2% 1.68s ± 3% ~ (p=0.616 n=10+10) GoParser 2.14s ± 4% 2.14s ± 1% ~ (p=0.825 n=10+9) Reflect 5.95s ± 1% 5.97s ± 3% ~ (p=0.951 n=9+10) Tar 2.94s ± 3% 2.93s ± 2% ~ (p=0.359 n=10+10) XML 3.03s ± 3% 3.07s ± 6% ~ (p=0.166 n=10+10) [Geo mean] 5.76s 5.77s +0.12% name old text-bytes new text-bytes delta HelloSize 588kB ± 0% 588kB ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.46kB ± 0% 5.46kB ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 72.9kB ± 0% 72.9kB ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 1.03MB ± 0% 1.03MB ± 0% ~ (all equal) 3. The performance of Mandelbrot200 improves 15%, though little improvement in total. name old time/op new time/op delta BinaryTree17-4 41.7s ± 1% 41.7s ± 1% ~ (p=0.264 n=29+23) Fannkuch11-4 24.2s ± 0% 24.1s ± 1% -0.13% (p=0.050 n=30+30) FmtFprintfEmpty-4 826ns ± 1% 824ns ± 1% -0.24% (p=0.038 n=25+30) FmtFprintfString-4 1.38µs ± 1% 1.38µs ± 0% -0.42% (p=0.000 n=27+25) FmtFprintfInt-4 1.46µs ± 1% 1.46µs ± 0% ~ (p=0.060 n=30+23) FmtFprintfIntInt-4 2.11µs ± 1% 2.08µs ± 0% -1.04% (p=0.000 n=30+30) FmtFprintfPrefixedInt-4 2.23µs ± 1% 2.22µs ± 1% -0.51% (p=0.000 n=30+30) FmtFprintfFloat-4 4.49µs ± 1% 4.48µs ± 1% -0.22% (p=0.004 n=26+30) FmtManyArgs-4 8.06µs ± 1% 8.12µs ± 1% +0.68% (p=0.000 n=25+30) GobDecode-4 104ms ± 1% 104ms ± 2% ~ (p=0.362 n=29+29) GobEncode-4 92.9ms ± 1% 92.8ms ± 2% ~ (p=0.786 n=30+30) Gzip-4 4.12s ± 1% 4.12s ± 1% ~ (p=0.314 n=30+30) Gunzip-4 602ms ± 1% 603ms ± 1% ~ (p=0.164 n=30+30) HTTPClientServer-4 659µs ± 1% 655µs ± 2% -0.64% (p=0.006 n=25+28) JSONEncode-4 234ms ± 1% 235ms ± 1% +0.29% (p=0.050 n=30+30) JSONDecode-4 912ms ± 0% 911ms ± 0% ~ (p=0.385 n=18+24) Mandelbrot200-4 49.2ms ± 0% 41.7ms ± 0% -15.35% (p=0.000 n=25+27) GoParse-4 46.3ms ± 1% 46.3ms ± 2% ~ (p=0.572 n=30+30) RegexpMatchEasy0_32-4 1.29µs ± 1% 1.27µs ± 0% -1.59% (p=0.000 n=30+30) RegexpMatchEasy0_1K-4 7.62µs ± 4% 7.71µs ± 3% ~ (p=0.074 n=30+30) RegexpMatchEasy1_32-4 1.31µs ± 0% 1.30µs ± 1% -0.71% (p=0.000 n=23+30) RegexpMatchEasy1_1K-4 10.3µs ± 3% 10.3µs ± 5% ~ (p=0.105 n=30+30) RegexpMatchMedium_32-4 2.06µs ± 1% 2.06µs ± 1% ~ (p=0.100 n=30+30) RegexpMatchMedium_1K-4 533µs ± 1% 534µs ± 1% ~ (p=0.254 n=29+30) RegexpMatchHard_32-4 28.9µs ± 0% 28.9µs ± 0% ~ (p=0.154 n=30+30) RegexpMatchHard_1K-4 868µs ± 1% 867µs ± 0% ~ (p=0.729 n=30+23) Revcomp-4 66.9ms ± 1% 67.2ms ± 2% ~ (p=0.102 n=28+29) Template-4 1.07s ± 1% 1.06s ± 1% -0.53% (p=0.000 n=30+30) TimeParse-4 7.07µs ± 1% 7.01µs ± 0% -0.85% (p=0.000 n=30+25) TimeFormat-4 13.1µs ± 0% 13.2µs ± 1% +0.77% (p=0.000 n=27+27) [Geo mean] 721µs 716µs -0.70% name old speed new speed delta GobDecode-4 7.38MB/s ± 1% 7.37MB/s ± 2% ~ (p=0.399 n=29+29) GobEncode-4 8.26MB/s ± 1% 8.27MB/s ± 2% ~ (p=0.790 n=30+30) Gzip-4 4.71MB/s ± 1% 4.71MB/s ± 1% ~ (p=0.885 n=30+30) Gunzip-4 32.2MB/s ± 1% 32.2MB/s ± 1% ~ (p=0.190 n=30+30) JSONEncode-4 8.28MB/s ± 1% 8.25MB/s ± 1% ~ (p=0.053 n=30+30) JSONDecode-4 2.13MB/s ± 0% 2.12MB/s ± 1% ~ (p=0.072 n=18+30) GoParse-4 1.25MB/s ± 1% 1.25MB/s ± 2% ~ (p=0.863 n=30+30) RegexpMatchEasy0_32-4 24.8MB/s ± 0% 25.2MB/s ± 1% +1.61% (p=0.000 n=30+30) RegexpMatchEasy0_1K-4 134MB/s ± 4% 133MB/s ± 3% ~ (p=0.074 n=30+30) RegexpMatchEasy1_32-4 24.5MB/s ± 0% 24.6MB/s ± 1% +0.72% (p=0.000 n=23+30) RegexpMatchEasy1_1K-4 99.1MB/s ± 3% 99.8MB/s ± 5% ~ (p=0.105 n=30+30) RegexpMatchMedium_32-4 483kB/s ± 1% 487kB/s ± 1% +0.83% (p=0.002 n=30+30) RegexpMatchMedium_1K-4 1.92MB/s ± 1% 1.92MB/s ± 1% ~ (p=0.058 n=30+30) RegexpMatchHard_32-4 1.10MB/s ± 0% 1.11MB/s ± 0% ~ (p=0.804 n=30+30) RegexpMatchHard_1K-4 1.18MB/s ± 0% 1.18MB/s ± 0% ~ (all equal) Revcomp-4 38.0MB/s ± 1% 37.8MB/s ± 2% ~ (p=0.098 n=28+29) Template-4 1.82MB/s ± 1% 1.83MB/s ± 1% +0.55% (p=0.000 n=29+29) [Geo mean] 6.79MB/s 6.79MB/s +0.09% Change-Id: Ia91991c2c5c59c5df712de85a83b13a21c0a554b Reviewed-on: https://go-review.googlesource.com/63770 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/arm/ssa.go | 14 + src/cmd/compile/internal/ssa/gen/ARM.rules | 10 + src/cmd/compile/internal/ssa/gen/ARMOps.go | 6 + src/cmd/compile/internal/ssa/opGen.go | 68 +++++ src/cmd/compile/internal/ssa/rewriteARM.go | 284 +++++++++++++++++++++ 5 files changed, 382 insertions(+) diff --git a/src/cmd/compile/internal/arm/ssa.go b/src/cmd/compile/internal/arm/ssa.go index 5525197d31..a70df6dd0e 100644 --- a/src/cmd/compile/internal/arm/ssa.go +++ b/src/cmd/compile/internal/arm/ssa.go @@ -197,6 +197,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.Reg = r1 p.To.Type = obj.TYPE_REG p.To.Reg = r + case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD: + r := v.Reg() + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + r2 := v.Args[2].Reg() + if r != r0 { + v.Fatalf("result and addend are not in the same register: %v", v.LongString()) + } + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r2 + p.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = r case ssa.OpARMADDS, ssa.OpARMSUBS: r := v.Reg0() diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules index 197f9540d3..b21cd6f9f3 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM.rules @@ -1248,6 +1248,16 @@ (NMULF (NEGF x) y) -> (MULF x y) (NMULD (NEGD x) y) -> (MULD x y) +// the result will overwrite the addend, since they are in the same register +(ADDF a (MULF x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULAF a x y) +(ADDF a (NMULF x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULSF a x y) +(ADDD a (MULD x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULAD a x y) +(ADDD a (NMULD x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULSD a x y) +(SUBF a (MULF x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULSF a x y) +(SUBF a (NMULF x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULAF a x y) +(SUBD a (MULD x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULSD a x y) +(SUBD a (NMULD x y)) && a.Uses == 1 && objabi.GOARM >= 6 -> (MULAD a x y) + (AND x (MVN y)) -> (BIC x y) // simplification with *shift ops diff --git a/src/cmd/compile/internal/ssa/gen/ARMOps.go b/src/cmd/compile/internal/ssa/gen/ARMOps.go index f94ef53258..93b50135d4 100644 --- a/src/cmd/compile/internal/ssa/gen/ARMOps.go +++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go @@ -122,6 +122,7 @@ func init() { fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} + fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} fp2flags = regInfo{inputs: []regMask{fp, fp}} fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} @@ -181,6 +182,11 @@ func init() { {name: "DIVF", argLength: 2, reg: fp21, asm: "DIVF"}, // arg0 / arg1 {name: "DIVD", argLength: 2, reg: fp21, asm: "DIVD"}, // arg0 / arg1 + {name: "MULAF", argLength: 3, reg: fp31, asm: "MULAF", resultInArg0: true}, // arg0 + (arg1 * arg2) + {name: "MULAD", argLength: 3, reg: fp31, asm: "MULAD", resultInArg0: true}, // arg0 + (arg1 * arg2) + {name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2) + {name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2) + {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt {name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true}, // arg0 | arg1 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index c5d43f4611..4493759ae9 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -707,6 +707,10 @@ const ( OpARMNMULD OpARMDIVF OpARMDIVD + OpARMMULAF + OpARMMULAD + OpARMMULSF + OpARMMULSD OpARMAND OpARMANDconst OpARMOR @@ -8655,6 +8659,70 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MULAF", + argLen: 3, + resultInArg0: true, + asm: arm.AMULAF, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + outputs: []outputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + }, + }, + { + name: "MULAD", + argLen: 3, + resultInArg0: true, + asm: arm.AMULAD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + outputs: []outputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + }, + }, + { + name: "MULSF", + argLen: 3, + resultInArg0: true, + asm: arm.AMULSF, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + outputs: []outputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + }, + }, + { + name: "MULSD", + argLen: 3, + resultInArg0: true, + asm: arm.AMULSD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + outputs: []outputInfo{ + {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 + }, + }, + }, { name: "AND", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go index 1c7eb39236..38695c503d 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM.go +++ b/src/cmd/compile/internal/ssa/rewriteARM.go @@ -33,6 +33,10 @@ func rewriteValueARM(v *Value) bool { return rewriteValueARM_OpARMADCshiftRLreg_0(v) case OpARMADD: return rewriteValueARM_OpARMADD_0(v) || rewriteValueARM_OpARMADD_10(v) + case OpARMADDD: + return rewriteValueARM_OpARMADDD_0(v) + case OpARMADDF: + return rewriteValueARM_OpARMADDF_0(v) case OpARMADDS: return rewriteValueARM_OpARMADDS_0(v) || rewriteValueARM_OpARMADDS_10(v) case OpARMADDSshiftLL: @@ -321,6 +325,10 @@ func rewriteValueARM(v *Value) bool { return rewriteValueARM_OpARMSRLconst_0(v) case OpARMSUB: return rewriteValueARM_OpARMSUB_0(v) || rewriteValueARM_OpARMSUB_10(v) + case OpARMSUBD: + return rewriteValueARM_OpARMSUBD_0(v) + case OpARMSUBF: + return rewriteValueARM_OpARMSUBF_0(v) case OpARMSUBS: return rewriteValueARM_OpARMSUBS_0(v) || rewriteValueARM_OpARMSUBS_10(v) case OpARMSUBSshiftLL: @@ -2039,6 +2047,188 @@ func rewriteValueARM_OpARMADD_10(v *Value) bool { } return false } +func rewriteValueARM_OpARMADDD_0(v *Value) bool { + // match: (ADDD a (MULD x y)) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULAD a x y) + for { + _ = v.Args[1] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARMMULD { + break + } + _ = v_1.Args[1] + x := v_1.Args[0] + y := v_1.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULAD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDD (MULD x y) a) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULAD a x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARMMULD { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + a := v.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULAD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDD a (NMULD x y)) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULSD a x y) + for { + _ = v.Args[1] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARMNMULD { + break + } + _ = v_1.Args[1] + x := v_1.Args[0] + y := v_1.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULSD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDD (NMULD x y) a) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULSD a x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARMNMULD { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + a := v.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULSD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueARM_OpARMADDF_0(v *Value) bool { + // match: (ADDF a (MULF x y)) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULAF a x y) + for { + _ = v.Args[1] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARMMULF { + break + } + _ = v_1.Args[1] + x := v_1.Args[0] + y := v_1.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULAF) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDF (MULF x y) a) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULAF a x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARMMULF { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + a := v.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULAF) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDF a (NMULF x y)) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULSF a x y) + for { + _ = v.Args[1] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARMNMULF { + break + } + _ = v_1.Args[1] + x := v_1.Args[0] + y := v_1.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULSF) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (ADDF (NMULF x y) a) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULSF a x y) + for { + _ = v.Args[1] + v_0 := v.Args[0] + if v_0.Op != OpARMNMULF { + break + } + _ = v_0.Args[1] + x := v_0.Args[0] + y := v_0.Args[1] + a := v.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULSF) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} func rewriteValueARM_OpARMADDS_0(v *Value) bool { // match: (ADDS x (MOVWconst [c])) // cond: @@ -13627,6 +13817,100 @@ func rewriteValueARM_OpARMSUB_10(v *Value) bool { } return false } +func rewriteValueARM_OpARMSUBD_0(v *Value) bool { + // match: (SUBD a (MULD x y)) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULSD a x y) + for { + _ = v.Args[1] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARMMULD { + break + } + _ = v_1.Args[1] + x := v_1.Args[0] + y := v_1.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULSD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (SUBD a (NMULD x y)) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULAD a x y) + for { + _ = v.Args[1] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARMNMULD { + break + } + _ = v_1.Args[1] + x := v_1.Args[0] + y := v_1.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULAD) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} +func rewriteValueARM_OpARMSUBF_0(v *Value) bool { + // match: (SUBF a (MULF x y)) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULSF a x y) + for { + _ = v.Args[1] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARMMULF { + break + } + _ = v_1.Args[1] + x := v_1.Args[0] + y := v_1.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULSF) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + // match: (SUBF a (NMULF x y)) + // cond: a.Uses == 1 && objabi.GOARM >= 6 + // result: (MULAF a x y) + for { + _ = v.Args[1] + a := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpARMNMULF { + break + } + _ = v_1.Args[1] + x := v_1.Args[0] + y := v_1.Args[1] + if !(a.Uses == 1 && objabi.GOARM >= 6) { + break + } + v.reset(OpARMMULAF) + v.AddArg(a) + v.AddArg(x) + v.AddArg(y) + return true + } + return false +} func rewriteValueARM_OpARMSUBS_0(v *Value) bool { // match: (SUBS x (MOVWconst [c])) // cond: