From 20cf5c49623530b9f4511fceae7f2ca8c0a29809 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Thu, 22 Mar 2018 02:18:50 +0000 Subject: [PATCH] cmd/compile: optimize 386 binary operations with a memory operand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some integer/float binary operations of 386 can take a direct memory operand, which is more efficient than loading to a register. These CL does this optimization by copying the similar solution of amd64. And the go1 benchmark shows some inprovements, especially the test case Template. (excluding noise) name old time/op new time/op delta BinaryTree17-4 3.42s ± 2% 3.40s ± 2% ~ (p=0.069 n=38+39) Fannkuch11-4 3.48s ± 1% 3.53s ± 1% +1.59% (p=0.000 n=40+40) FmtFprintfEmpty-4 46.7ns ± 4% 46.3ns ± 3% -1.03% (p=0.001 n=40+40) FmtFprintfString-4 80.1ns ± 3% 80.6ns ± 3% +0.56% (p=0.029 n=40+40) FmtFprintfInt-4 92.4ns ± 2% 92.3ns ± 3% ~ (p=0.847 n=40+40) FmtFprintfIntInt-4 147ns ± 3% 144ns ± 3% -1.87% (p=0.000 n=40+40) FmtFprintfPrefixedInt-4 182ns ± 2% 184ns ± 3% +0.99% (p=0.002 n=40+40) FmtFprintfFloat-4 387ns ± 3% 384ns ± 3% ~ (p=0.069 n=40+40) FmtManyArgs-4 619ns ± 3% 616ns ± 3% ~ (p=0.320 n=40+40) GobDecode-4 7.28ms ± 6% 7.27ms ± 5% ~ (p=0.897 n=40+40) GobEncode-4 7.33ms ± 6% 7.21ms ± 6% -1.56% (p=0.022 n=38+40) Gzip-4 357ms ± 4% 357ms ± 4% ~ (p=0.071 n=40+40) Gunzip-4 45.3ms ± 3% 45.4ms ± 3% ~ (p=0.452 n=40+40) HTTPClientServer-4 63.0µs ± 2% 62.9µs ± 3% ~ (p=0.760 n=38+39) JSONEncode-4 22.0ms ± 4% 21.7ms ± 4% -1.49% (p=0.000 n=40+40) JSONDecode-4 67.7ms ± 4% 68.3ms ± 3% +0.86% (p=0.039 n=40+40) Mandelbrot200-4 5.16ms ± 3% 5.17ms ± 3% ~ (p=0.418 n=40+40) GoParse-4 3.30ms ± 2% 3.32ms ± 3% +0.55% (p=0.017 n=40+40) RegexpMatchEasy0_32-4 104ns ± 3% 104ns ± 4% ~ (p=0.992 n=40+40) RegexpMatchEasy0_1K-4 852ns ± 3% 851ns ± 2% ~ (p=0.344 n=40+40) RegexpMatchEasy1_32-4 113ns ± 4% 113ns ± 5% ~ (p=0.937 n=40+40) RegexpMatchEasy1_1K-4 1.03µs ± 5% 1.04µs ± 4% ~ (p=0.430 n=40+40) RegexpMatchMedium_32-4 132ns ± 4% 131ns ± 3% -1.06% (p=0.027 n=40+40) RegexpMatchMedium_1K-4 43.0µs ± 3% 43.2µs ± 3% ~ (p=0.122 n=40+40) RegexpMatchHard_32-4 2.21µs ± 4% 2.20µs ± 4% ~ (p=0.146 n=40+40) RegexpMatchHard_1K-4 67.1µs ± 4% 67.2µs ± 3% ~ (p=0.859 n=40+40) Revcomp-4 1.85s ± 2% 1.85s ± 3% ~ (p=0.184 n=40+40) Template-4 70.1ms ± 4% 67.5ms ± 3% -3.65% (p=0.000 n=40+40) TimeParse-4 457ns ±16% 439ns ± 4% ~ (p=0.683 n=40+34) TimeFormat-4 413ns ± 3% 414ns ± 3% ~ (p=0.850 n=40+40) [Geo mean] 67.5µs 67.3µs -0.38% name old speed new speed delta GobDecode-4 105MB/s ± 6% 106MB/s ± 5% ~ (p=0.893 n=40+40) GobEncode-4 105MB/s ± 6% 107MB/s ± 7% +1.60% (p=0.023 n=38+40) Gzip-4 54.4MB/s ± 4% 54.5MB/s ± 4% ~ (p=0.073 n=40+40) Gunzip-4 429MB/s ± 3% 428MB/s ± 3% ~ (p=0.453 n=40+40) JSONEncode-4 88.3MB/s ± 5% 89.6MB/s ± 4% +1.51% (p=0.000 n=40+40) JSONDecode-4 28.7MB/s ± 4% 28.4MB/s ± 3% -0.87% (p=0.039 n=40+40) GoParse-4 17.6MB/s ± 3% 17.5MB/s ± 3% -0.55% (p=0.020 n=40+40) RegexpMatchEasy0_32-4 308MB/s ± 4% 308MB/s ± 5% ~ (p=0.988 n=40+40) RegexpMatchEasy0_1K-4 1.20GB/s ± 3% 1.20GB/s ± 2% ~ (p=0.329 n=40+40) RegexpMatchEasy1_32-4 283MB/s ± 4% 283MB/s ± 4% ~ (p=0.507 n=40+40) RegexpMatchEasy1_1K-4 991MB/s ± 5% 987MB/s ± 4% ~ (p=0.446 n=40+40) RegexpMatchMedium_32-4 7.54MB/s ± 4% 7.63MB/s ± 3% +1.26% (p=0.004 n=40+40) RegexpMatchMedium_1K-4 23.8MB/s ± 3% 23.7MB/s ± 4% ~ (p=0.121 n=40+40) RegexpMatchHard_32-4 14.5MB/s ± 4% 14.6MB/s ± 4% ~ (p=0.145 n=40+40) RegexpMatchHard_1K-4 15.3MB/s ± 4% 15.2MB/s ± 3% ~ (p=0.874 n=40+40) Revcomp-4 137MB/s ± 2% 137MB/s ± 3% ~ (p=0.179 n=40+40) Template-4 27.7MB/s ± 4% 28.7MB/s ± 3% +3.78% (p=0.000 n=40+40) [Geo mean] 78.9MB/s 79.2MB/s +0.38% Change-Id: I3ba688c253b665485c1ebdf5a75f4ce82cc3def3 Reviewed-on: https://go-review.googlesource.com/102036 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Ilya Tocar Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/gen/386.rules | 24 +- src/cmd/compile/internal/ssa/gen/386Ops.go | 17 +- src/cmd/compile/internal/ssa/opGen.go | 214 ++++ src/cmd/compile/internal/ssa/rewrite386.go | 1245 +++++++++++++++++++- src/cmd/compile/internal/x86/ssa.go | 11 + 5 files changed, 1476 insertions(+), 35 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index 35db2e216d6..d6533e1772f 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -636,6 +636,13 @@ (MOVSSstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSSstore [off1+off2] {sym} ptr val mem) (MOVSDstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVSDstore [off1+off2] {sym} ptr val mem) +((ADD|SUB|AND|OR|XOR)Lmem [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> + ((ADD|SUB|AND|OR|XOR)Lmem [off1+off2] {sym} val base mem) +((ADD|SUB|MUL)SSmem [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> + ((ADD|SUB|MUL)SSmem [off1+off2] {sym} val base mem) +((ADD|SUB|MUL)SDmem [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) -> + ((ADD|SUB|MUL)SDmem [off1+off2] {sym} val base mem) + // Fold constants into stores. (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) -> (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) @@ -748,6 +755,16 @@ (MOVSDstore [off1] {sym1} (LEAL8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) +((ADD|SUB|AND|OR|XOR)Lmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> + ((ADD|SUB|AND|OR|XOR)Lmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) +((ADD|SUB|MUL)SSmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> + ((ADD|SUB|MUL)SSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) +((ADD|SUB|MUL)SDmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) -> + ((ADD|SUB|MUL)SDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + (MOVBload [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOVBloadidx1 [off] {sym} ptr idx mem) (MOVWload [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOVWloadidx1 [off] {sym} ptr idx mem) (MOVLload [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOVLloadidx1 [off] {sym} ptr idx mem) @@ -823,6 +840,11 @@ (MOVSDstoreidx1 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx1 [int64(int32(c+d))] {sym} ptr idx val mem) (MOVSDstoreidx8 [c] {sym} ptr (ADDLconst [d] idx) val mem) -> (MOVSDstoreidx8 [int64(int32(c+8*d))] {sym} ptr idx val mem) +// Merge load to op +((ADD|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR)Lmem x [off] {sym} ptr mem) +((ADD|SUB|MUL)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL)SDmem x [off] {sym} ptr mem) +((ADD|SUB|MUL)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL)SSmem x [off] {sym} ptr mem) + (MOVBstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) -> (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) (MOVWstoreconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) -> @@ -1231,4 +1253,4 @@ // For PIC, break floating-point constant loading into two instructions so we have // a register to use for holding the address of the constant pool entry. (MOVSSconst [c]) && config.ctxt.Flag_shared -> (MOVSSconst2 (MOVSSconst1 [c])) -(MOVSDconst [c]) && config.ctxt.Flag_shared -> (MOVSDconst2 (MOVSDconst1 [c])) +(MOVSDconst [c]) && config.ctxt.Flag_shared -> (MOVSDconst2 (MOVSDconst1 [c])) \ No newline at end of file diff --git a/src/cmd/compile/internal/ssa/gen/386Ops.go b/src/cmd/compile/internal/ssa/gen/386Ops.go index d1f8bc97889..6fed5807834 100644 --- a/src/cmd/compile/internal/ssa/gen/386Ops.go +++ b/src/cmd/compile/internal/ssa/gen/386Ops.go @@ -125,6 +125,7 @@ func init() { flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}} gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly} + gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly} gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly} gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}} @@ -134,6 +135,7 @@ func init() { fp01 = regInfo{inputs: nil, outputs: fponly} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly} + fp21load = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly} fpgp = regInfo{inputs: fponly, outputs: gponly} gpfp = regInfo{inputs: gponly, outputs: fponly} fp11 = regInfo{inputs: fponly, outputs: fponly} @@ -173,6 +175,13 @@ func init() { {name: "MOVSDstoreidx1", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff", symEffect: "Write"}, // fp64 indexed by i store {name: "MOVSDstoreidx8", argLength: 4, reg: fpstoreidx, asm: "MOVSD", aux: "SymOff", symEffect: "Write"}, // fp64 indexed by 8i store + {name: "ADDSSmem", argLength: 3, reg: fp21load, asm: "ADDSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "ADDSDmem", argLength: 3, reg: fp21load, asm: "ADDSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "SUBSSmem", argLength: 3, reg: fp21load, asm: "SUBSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "SUBSDmem", argLength: 3, reg: fp21load, asm: "SUBSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "MULSSmem", argLength: 3, reg: fp21load, asm: "MULSS", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp32 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "MULSDmem", argLength: 3, reg: fp21load, asm: "MULSD", aux: "SymOff", resultInArg0: true, faultOnNilArg1: true, symEffect: "Read"}, // fp64 arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + // binary ops {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true}, // arg0 + arg1 {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint @@ -258,6 +267,12 @@ func init() { {name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15 {name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-7 + {name: "ADDLmem", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "SUBLmem", argLength: 3, reg: gp21load, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "ANDLmem", argLength: 3, reg: gp21load, asm: "ANDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "ORLmem", argLength: 3, reg: gp21load, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + {name: "XORLmem", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem + // unary ops {name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0 @@ -516,4 +531,4 @@ func init() { framepointerreg: int8(num["BP"]), linkreg: -1, // not used }) -} +} \ No newline at end of file diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index ef3875ec028..4b782acfa60 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -255,6 +255,12 @@ const ( Op386MOVSSstoreidx4 Op386MOVSDstoreidx1 Op386MOVSDstoreidx8 + Op386ADDSSmem + Op386ADDSDmem + Op386SUBSSmem + Op386SUBSDmem + Op386MULSSmem + Op386MULSDmem Op386ADDL Op386ADDLconst Op386ADDLcarry @@ -318,6 +324,11 @@ const ( Op386ROLLconst Op386ROLWconst Op386ROLBconst + Op386ADDLmem + Op386SUBLmem + Op386ANDLmem + Op386ORLmem + Op386XORLmem Op386NEGL Op386NOTL Op386BSFL @@ -2487,6 +2498,114 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADDSSmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.AADDSS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + }, + }, + }, + { + name: "ADDSDmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.AADDSD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + }, + }, + }, + { + name: "SUBSSmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.ASUBSS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + }, + }, + }, + { + name: "SUBSDmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.ASUBSD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + }, + }, + }, + { + name: "MULSSmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.AMULSS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + }, + }, + }, + { + name: "MULSDmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.AMULSD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7 + }, + }, + }, { name: "ADDL", argLen: 2, @@ -3437,6 +3556,101 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADDLmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + clobberFlags: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.AADDL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + }, + }, + }, + { + name: "SUBLmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + clobberFlags: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.ASUBL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + }, + }, + }, + { + name: "ANDLmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + clobberFlags: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.AANDL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + }, + }, + }, + { + name: "ORLmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + clobberFlags: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.AORL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + }, + }, + }, + { + name: "XORLmem", + auxType: auxSymOff, + argLen: 3, + resultInArg0: true, + clobberFlags: true, + faultOnNilArg1: true, + symEffect: SymRead, + asm: x86.AXORL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + {1, 65791}, // AX CX DX BX SP BP SI DI SB + }, + outputs: []outputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + }, + }, + }, { name: "NEGL", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index a1ea307121f..cebc016486f 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -23,10 +23,22 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386ADDLcarry_0(v) case Op386ADDLconst: return rewriteValue386_Op386ADDLconst_0(v) + case Op386ADDLmem: + return rewriteValue386_Op386ADDLmem_0(v) + case Op386ADDSD: + return rewriteValue386_Op386ADDSD_0(v) + case Op386ADDSDmem: + return rewriteValue386_Op386ADDSDmem_0(v) + case Op386ADDSS: + return rewriteValue386_Op386ADDSS_0(v) + case Op386ADDSSmem: + return rewriteValue386_Op386ADDSSmem_0(v) case Op386ANDL: return rewriteValue386_Op386ANDL_0(v) case Op386ANDLconst: return rewriteValue386_Op386ANDLconst_0(v) + case Op386ANDLmem: + return rewriteValue386_Op386ANDLmem_0(v) case Op386CMPB: return rewriteValue386_Op386CMPB_0(v) case Op386CMPBconst: @@ -141,6 +153,14 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386MULL_0(v) case Op386MULLconst: return rewriteValue386_Op386MULLconst_0(v) || rewriteValue386_Op386MULLconst_10(v) || rewriteValue386_Op386MULLconst_20(v) || rewriteValue386_Op386MULLconst_30(v) + case Op386MULSD: + return rewriteValue386_Op386MULSD_0(v) + case Op386MULSDmem: + return rewriteValue386_Op386MULSDmem_0(v) + case Op386MULSS: + return rewriteValue386_Op386MULSS_0(v) + case Op386MULSSmem: + return rewriteValue386_Op386MULSSmem_0(v) case Op386NEGL: return rewriteValue386_Op386NEGL_0(v) case Op386NOTL: @@ -149,6 +169,8 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386ORL_0(v) || rewriteValue386_Op386ORL_10(v) || rewriteValue386_Op386ORL_20(v) || rewriteValue386_Op386ORL_30(v) || rewriteValue386_Op386ORL_40(v) || rewriteValue386_Op386ORL_50(v) case Op386ORLconst: return rewriteValue386_Op386ORLconst_0(v) + case Op386ORLmem: + return rewriteValue386_Op386ORLmem_0(v) case Op386ROLBconst: return rewriteValue386_Op386ROLBconst_0(v) case Op386ROLLconst: @@ -213,10 +235,22 @@ func rewriteValue386(v *Value) bool { return rewriteValue386_Op386SUBLcarry_0(v) case Op386SUBLconst: return rewriteValue386_Op386SUBLconst_0(v) + case Op386SUBLmem: + return rewriteValue386_Op386SUBLmem_0(v) + case Op386SUBSD: + return rewriteValue386_Op386SUBSD_0(v) + case Op386SUBSDmem: + return rewriteValue386_Op386SUBSDmem_0(v) + case Op386SUBSS: + return rewriteValue386_Op386SUBSS_0(v) + case Op386SUBSSmem: + return rewriteValue386_Op386SUBSSmem_0(v) case Op386XORL: - return rewriteValue386_Op386XORL_0(v) + return rewriteValue386_Op386XORL_0(v) || rewriteValue386_Op386XORL_10(v) case Op386XORLconst: return rewriteValue386_Op386XORLconst_0(v) + case Op386XORLmem: + return rewriteValue386_Op386XORLmem_0(v) case OpAdd16: return rewriteValue386_OpAdd16_0(v) case OpAdd32: @@ -1194,6 +1228,58 @@ func rewriteValue386_Op386ADDL_20(v *Value) bool { v.AddArg(y) return true } + // match: (ADDL x l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ADDLmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVLload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ADDLmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ADDL l:(MOVLload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ADDLmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != Op386MOVLload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + x := v.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ADDLmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (ADDL x (NEGL y)) // cond: // result: (SUBL x y) @@ -1445,6 +1531,296 @@ func rewriteValue386_Op386ADDLconst_0(v *Value) bool { } return false } +func rewriteValue386_Op386ADDLmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (ADDLmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (ADDLmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ADDLmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ADDLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ADDLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ADDLmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386ADDSD_0(v *Value) bool { + // match: (ADDSD x l:(MOVSDload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ADDSDmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVSDload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ADDSDmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ADDSD l:(MOVSDload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ADDSDmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != Op386MOVSDload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + x := v.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ADDSDmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386ADDSDmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (ADDSDmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (ADDSDmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ADDSDmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ADDSDmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ADDSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ADDSDmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386ADDSS_0(v *Value) bool { + // match: (ADDSS x l:(MOVSSload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ADDSSmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVSSload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ADDSSmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ADDSS l:(MOVSSload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ADDSSmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != Op386MOVSSload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + x := v.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ADDSSmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386ADDSSmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (ADDSSmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (ADDSSmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ADDSSmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ADDSSmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ADDSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ADDSSmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValue386_Op386ANDL_0(v *Value) bool { // match: (ANDL x (MOVLconst [c])) // cond: @@ -1478,6 +1854,58 @@ func rewriteValue386_Op386ANDL_0(v *Value) bool { v.AddArg(x) return true } + // match: (ANDL x l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ANDLmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVLload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ANDLmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ANDL l:(MOVLload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ANDLmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != Op386MOVLload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + x := v.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ANDLmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } // match: (ANDL x x) // cond: // result: x @@ -1553,6 +1981,66 @@ func rewriteValue386_Op386ANDLconst_0(v *Value) bool { } return false } +func rewriteValue386_Op386ANDLmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (ANDLmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (ANDLmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ANDLmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ANDLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ANDLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ANDLmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValue386_Op386CMPB_0(v *Value) bool { b := v.Block _ = b @@ -8825,6 +9313,236 @@ func rewriteValue386_Op386MULLconst_30(v *Value) bool { } return false } +func rewriteValue386_Op386MULSD_0(v *Value) bool { + // match: (MULSD x l:(MOVSDload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (MULSDmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVSDload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386MULSDmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MULSD l:(MOVSDload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (MULSDmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != Op386MOVSDload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + x := v.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386MULSDmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386MULSDmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MULSDmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (MULSDmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MULSDmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (MULSDmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MULSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MULSDmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386MULSS_0(v *Value) bool { + // match: (MULSS x l:(MOVSSload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (MULSSmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVSSload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386MULSSmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MULSS l:(MOVSSload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (MULSSmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != Op386MOVSSload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + x := v.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386MULSSmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386MULSSmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (MULSSmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (MULSSmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386MULSSmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (MULSSmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (MULSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386MULSSmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValue386_Op386NEGL_0(v *Value) bool { // match: (NEGL (MOVLconst [c])) // cond: @@ -8858,10 +9576,6 @@ func rewriteValue386_Op386NOTL_0(v *Value) bool { return false } func rewriteValue386_Op386ORL_0(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (ORL x (MOVLconst [c])) // cond: // result: (ORLconst [c] x) @@ -9060,6 +9774,65 @@ func rewriteValue386_Op386ORL_0(v *Value) bool { v.AddArg(x) return true } + // match: (ORL x l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ORLmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVLload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ORLmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (ORL l:(MOVLload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (ORLmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != Op386MOVLload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + x := v.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386ORLmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386ORL_10(v *Value) bool { + b := v.Block + _ = b + typ := &b.Func.Config.Types + _ = typ // match: (ORL x x) // cond: // result: x @@ -9123,13 +9896,6 @@ func rewriteValue386_Op386ORL_0(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValue386_Op386ORL_10(v *Value) bool { - b := v.Block - _ = b - typ := &b.Func.Config.Types - _ = typ // match: (ORL s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)) x0:(MOVBload [i0] {s} p mem)) // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) @@ -9645,6 +10411,11 @@ func rewriteValue386_Op386ORL_10(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValue386_Op386ORL_20(v *Value) bool { + b := v.Block + _ = b // match: (ORL x0:(MOVBloadidx1 [i0] {s} idx p mem) s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem))) // cond: i1==i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) // result: @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) @@ -9753,11 +10524,6 @@ func rewriteValue386_Op386ORL_10(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValue386_Op386ORL_20(v *Value) bool { - b := v.Block - _ = b // match: (ORL s0:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} idx p mem)) x0:(MOVBloadidx1 [i0] {s} p idx mem)) // cond: i1==i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0) && clobber(x1) && clobber(s0) // result: @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) @@ -10340,6 +11106,11 @@ func rewriteValue386_Op386ORL_20(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValue386_Op386ORL_30(v *Value) bool { + b := v.Block + _ = b // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem))) // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0) // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) @@ -10508,11 +11279,6 @@ func rewriteValue386_Op386ORL_20(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValue386_Op386ORL_30(v *Value) bool { - b := v.Block - _ = b // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} p idx mem))) // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0) // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) @@ -11185,6 +11951,11 @@ func rewriteValue386_Op386ORL_30(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValue386_Op386ORL_40(v *Value) bool { + b := v.Block + _ = b // match: (ORL o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} idx p mem)) s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem))) // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0) // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) @@ -11353,11 +12124,6 @@ func rewriteValue386_Op386ORL_30(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValue386_Op386ORL_40(v *Value) bool { - b := v.Block - _ = b // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)))) // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0) // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) @@ -12030,6 +12796,11 @@ func rewriteValue386_Op386ORL_40(v *Value) bool { v0.AddArg(mem) return true } + return false +} +func rewriteValue386_Op386ORL_50(v *Value) bool { + b := v.Block + _ = b // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} p idx mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))) // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0) // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) @@ -12198,11 +12969,6 @@ func rewriteValue386_Op386ORL_40(v *Value) bool { v0.AddArg(mem) return true } - return false -} -func rewriteValue386_Op386ORL_50(v *Value) bool { - b := v.Block - _ = b // match: (ORL s1:(SHLLconst [24] x2:(MOVBloadidx1 [i3] {s} idx p mem)) o0:(ORL s0:(SHLLconst [16] x1:(MOVBloadidx1 [i2] {s} idx p mem)) x0:(MOVWloadidx1 [i0] {s} p idx mem))) // cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0) && clobber(x1) && clobber(x2) && clobber(s0) && clobber(s1) && clobber(o0) // result: @mergePoint(b,x0,x1,x2) (MOVLloadidx1 [i0] {s} p idx mem) @@ -12668,6 +13434,66 @@ func rewriteValue386_Op386ORLconst_0(v *Value) bool { } return false } +func rewriteValue386_Op386ORLmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (ORLmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (ORLmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386ORLmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (ORLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (ORLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386ORLmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValue386_Op386ROLBconst_0(v *Value) bool { // match: (ROLBconst [c] (ROLBconst [d] x)) // cond: @@ -14090,6 +14916,244 @@ func rewriteValue386_Op386SUBLconst_0(v *Value) bool { return true } } +func rewriteValue386_Op386SUBLmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (SUBLmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (SUBLmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386SUBLmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (SUBLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (SUBLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386SUBLmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386SUBSD_0(v *Value) bool { + // match: (SUBSD x l:(MOVSDload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (SUBSDmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVSDload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386SUBSDmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386SUBSDmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (SUBSDmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (SUBSDmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386SUBSDmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (SUBSDmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (SUBSDmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386SUBSDmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386SUBSS_0(v *Value) bool { + // match: (SUBSS x l:(MOVSSload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (SUBSSmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVSSload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386SUBSSmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386SUBSSmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (SUBSSmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (SUBSSmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386SUBSSmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (SUBSSmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (SUBSSmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386SUBSSmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValue386_Op386XORL_0(v *Value) bool { // match: (XORL x (MOVLconst [c])) // cond: @@ -14289,6 +15353,61 @@ func rewriteValue386_Op386XORL_0(v *Value) bool { v.AddArg(x) return true } + // match: (XORL x l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (XORLmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + x := v.Args[0] + l := v.Args[1] + if l.Op != Op386MOVLload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386XORLmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (XORL l:(MOVLload [off] {sym} ptr mem) x) + // cond: canMergeLoad(v, l, x) && clobber(l) + // result: (XORLmem x [off] {sym} ptr mem) + for { + _ = v.Args[1] + l := v.Args[0] + if l.Op != Op386MOVLload { + break + } + off := l.AuxInt + sym := l.Aux + _ = l.Args[1] + ptr := l.Args[0] + mem := l.Args[1] + x := v.Args[1] + if !(canMergeLoad(v, l, x) && clobber(l)) { + break + } + v.reset(Op386XORLmem) + v.AuxInt = off + v.Aux = sym + v.AddArg(x) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + return false +} +func rewriteValue386_Op386XORL_10(v *Value) bool { // match: (XORL x x) // cond: // result: (MOVLconst [0]) @@ -14351,6 +15470,66 @@ func rewriteValue386_Op386XORLconst_0(v *Value) bool { } return false } +func rewriteValue386_Op386XORLmem_0(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (XORLmem [off1] {sym} val (ADDLconst [off2] base) mem) + // cond: is32Bit(off1+off2) + // result: (XORLmem [off1+off2] {sym} val base mem) + for { + off1 := v.AuxInt + sym := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386ADDLconst { + break + } + off2 := v_1.AuxInt + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(Op386XORLmem) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (XORLmem [off1] {sym1} val (LEAL [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) + // result: (XORLmem [off1+off2] {mergeSym(sym1,sym2)} val base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + _ = v.Args[2] + val := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != Op386LEAL { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + base := v_1.Args[0] + mem := v.Args[2] + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared)) { + break + } + v.reset(Op386XORLmem) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(val) + v.AddArg(base) + v.AddArg(mem) + return true + } + return false +} func rewriteValue386_OpAdd16_0(v *Value) bool { // match: (Add16 x y) // cond: @@ -20230,4 +21409,4 @@ func rewriteBlock386(b *Block) bool { } } return false -} +} \ No newline at end of file diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go index 5f456a146d4..91cf70ff892 100644 --- a/src/cmd/compile/internal/x86/ssa.go +++ b/src/cmd/compile/internal/x86/ssa.go @@ -510,6 +510,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.Op386ADDLmem, ssa.Op386SUBLmem, ssa.Op386ANDLmem, ssa.Op386ORLmem, ssa.Op386XORLmem, + ssa.Op386ADDSDmem, ssa.Op386ADDSSmem, ssa.Op386SUBSDmem, ssa.Op386SUBSSmem, ssa.Op386MULSDmem, ssa.Op386MULSSmem: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_MEM + p.From.Reg = v.Args[1].Reg() + gc.AddAux(&p.From, v) + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + if v.Reg() != v.Args[0].Reg() { + v.Fatalf("input[0] and output not in same register %s", v.LongString()) + } case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG