1
0
mirror of https://github.com/golang/go synced 2024-11-18 18:04:46 -07:00

cmd/compile: switch to typed aux for 386 optimization rules

Convert first section of 386 optimization rules to the typed aux form.

Adds addOffset{32,64} functions that returns ValAndOffs and a
ValAndOff.canAdd32 function that takes an int32.

Passes

  GOARCH=386 gotip build -toolexec 'toolstash -cmp' -a std

Change-Id: I69d2a8ace6936d5e8ba6ba047183002bf07dd5be
Reviewed-on: https://go-review.googlesource.com/c/go/+/228825
Run-TryBot: Alberto Donizetti <alb.donizetti@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Alberto Donizetti 2020-04-19 10:45:04 +02:00
parent 4974ac6874
commit 17fbc818ff
4 changed files with 1614 additions and 1588 deletions

View File

@ -272,8 +272,8 @@
(MOVLstoreconst [makeValAndOff32(0,0)] destptr mem))
// Strip off any fractional word zeroing.
(Zero [s] destptr mem) && s%4 != 0 && s > 4 ->
(Zero [s-s%4] (ADDLconst destptr [s%4])
(Zero [s] destptr mem) && s%4 != 0 && s > 4 =>
(Zero [s-s%4] (ADDLconst destptr [int32(s%4)])
(MOVLstoreconst [0] destptr mem))
// Zero small numbers of words directly.
@ -370,113 +370,114 @@
// TODO: Should the optimizations be a separate pass?
// Fold boolean tests into blocks
(NE (TESTB (SETL cmp) (SETL cmp)) yes no) -> (LT cmp yes no)
(NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE cmp yes no)
(NE (TESTB (SETG cmp) (SETG cmp)) yes no) -> (GT cmp yes no)
(NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE cmp yes no)
(NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ cmp yes no)
(NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE cmp yes no)
(NE (TESTB (SETB cmp) (SETB cmp)) yes no) -> (ULT cmp yes no)
(NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no)
(NE (TESTB (SETA cmp) (SETA cmp)) yes no) -> (UGT cmp yes no)
(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
(NE (TESTB (SETO cmp) (SETO cmp)) yes no) -> (OS cmp yes no)
(NE (TESTB (SETL cmp) (SETL cmp)) yes no) => (LT cmp yes no)
(NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) => (LE cmp yes no)
(NE (TESTB (SETG cmp) (SETG cmp)) yes no) => (GT cmp yes no)
(NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) => (GE cmp yes no)
(NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) => (EQ cmp yes no)
(NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) => (NE cmp yes no)
(NE (TESTB (SETB cmp) (SETB cmp)) yes no) => (ULT cmp yes no)
(NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) => (ULE cmp yes no)
(NE (TESTB (SETA cmp) (SETA cmp)) yes no) => (UGT cmp yes no)
(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) => (UGE cmp yes no)
(NE (TESTB (SETO cmp) (SETO cmp)) yes no) => (OS cmp yes no)
// Special case for floating point - LF/LEF not generated
(NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) -> (UGT cmp yes no)
(NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE cmp yes no)
(NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF cmp yes no)
(NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF cmp yes no)
(NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) => (UGT cmp yes no)
(NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) => (UGE cmp yes no)
(NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) => (EQF cmp yes no)
(NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) => (NEF cmp yes no)
// fold constants into instructions
(ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
(ADDLcarry x (MOVLconst [c])) -> (ADDLconstcarry [c] x)
(ADCL x (MOVLconst [c]) f) -> (ADCLconst [c] x f)
(ADDL x (MOVLconst [c])) => (ADDLconst [c] x)
(ADDLcarry x (MOVLconst [c])) => (ADDLconstcarry [c] x)
(ADCL x (MOVLconst [c]) f) => (ADCLconst [c] x f)
(SUBL x (MOVLconst [c])) -> (SUBLconst x [c])
(SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c]))
(SUBLcarry x (MOVLconst [c])) -> (SUBLconstcarry [c] x)
(SBBL x (MOVLconst [c]) f) -> (SBBLconst [c] x f)
(SUBL x (MOVLconst [c])) => (SUBLconst x [c])
(SUBL (MOVLconst [c]) x) => (NEGL (SUBLconst <v.Type> x [c]))
(SUBLcarry x (MOVLconst [c])) => (SUBLconstcarry [c] x)
(SBBL x (MOVLconst [c]) f) => (SBBLconst [c] x f)
(MULL x (MOVLconst [c])) -> (MULLconst [c] x)
(MULL x (MOVLconst [c])) => (MULLconst [c] x)
(ANDL x (MOVLconst [c])) => (ANDLconst [c] x)
(ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
(ANDLconst [c] (ANDLconst [d] x)) => (ANDLconst [c & d] x)
(XORLconst [c] (XORLconst [d] x)) => (XORLconst [c ^ d] x)
(MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x)
(ANDLconst [c] (ANDLconst [d] x)) -> (ANDLconst [c & d] x)
(ORL x (MOVLconst [c])) => (ORLconst [c] x)
(XORL x (MOVLconst [c])) => (XORLconst [c] x)
(XORLconst [c] (XORLconst [d] x)) -> (XORLconst [c ^ d] x)
(SHLL x (MOVLconst [c])) => (SHLLconst [c&31] x)
(SHRL x (MOVLconst [c])) => (SHRLconst [c&31] x)
(SHRW x (MOVLconst [c])) && c&31 < 16 => (SHRWconst [int16(c&31)] x)
(SHRW _ (MOVLconst [c])) && c&31 >= 16 => (MOVLconst [0])
(SHRB x (MOVLconst [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
(SHRB _ (MOVLconst [c])) && c&31 >= 8 => (MOVLconst [0])
(MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
(SARL x (MOVLconst [c])) => (SARLconst [c&31] x)
(SARW x (MOVLconst [c])) => (SARWconst [int16(min(int64(c&31),15))] x)
(SARB x (MOVLconst [c])) => (SARBconst [int8(min(int64(c&31),7))] x)
(ORL x (MOVLconst [c])) -> (ORLconst [c] x)
(XORL x (MOVLconst [c])) -> (XORLconst [c] x)
(SHLL x (MOVLconst [c])) -> (SHLLconst [c&31] x)
(SHRL x (MOVLconst [c])) -> (SHRLconst [c&31] x)
(SHRW x (MOVLconst [c])) && c&31 < 16 -> (SHRWconst [c&31] x)
(SHRW _ (MOVLconst [c])) && c&31 >= 16 -> (MOVLconst [0])
(SHRB x (MOVLconst [c])) && c&31 < 8 -> (SHRBconst [c&31] x)
(SHRB _ (MOVLconst [c])) && c&31 >= 8 -> (MOVLconst [0])
(SARL x (MOVLconst [c])) -> (SARLconst [c&31] x)
(SARW x (MOVLconst [c])) -> (SARWconst [min(c&31,15)] x)
(SARB x (MOVLconst [c])) -> (SARBconst [min(c&31,7)] x)
(SARL x (ANDLconst [31] y)) -> (SARL x y)
(SHLL x (ANDLconst [31] y)) -> (SHLL x y)
(SHRL x (ANDLconst [31] y)) -> (SHRL x y)
(SARL x (ANDLconst [31] y)) => (SARL x y)
(SHLL x (ANDLconst [31] y)) => (SHLL x y)
(SHRL x (ANDLconst [31] y)) => (SHRL x y)
// Rotate instructions
(ADDL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
( ORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
(XORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c -> (ROLLconst [c] x)
(ADDL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c => (ROLLconst [c] x)
( ORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c => (ROLLconst [c] x)
(XORL (SHLLconst [c] x) (SHRLconst [d] x)) && d == 32-c => (ROLLconst [c] x)
(ADDL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
( ORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
(XORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == 16-c && t.Size() == 2 -> (ROLWconst x [c])
(ADDL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == int16(16-c) && t.Size() == 2
=> (ROLWconst x [int16(c)])
( ORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == int16(16-c) && t.Size() == 2
=> (ROLWconst x [int16(c)])
(XORL <t> (SHLLconst x [c]) (SHRWconst x [d])) && c < 16 && d == int16(16-c) && t.Size() == 2
=> (ROLWconst x [int16(c)])
(ADDL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
( ORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
(XORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == 8-c && t.Size() == 1 -> (ROLBconst x [c])
(ADDL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == int8(8-c) && t.Size() == 1
=> (ROLBconst x [int8(c)])
( ORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == int8(8-c) && t.Size() == 1
=> (ROLBconst x [int8(c)])
(XORL <t> (SHLLconst x [c]) (SHRBconst x [d])) && c < 8 && d == int8(8-c) && t.Size() == 1
=> (ROLBconst x [int8(c)])
(ROLLconst [c] (ROLLconst [d] x)) => (ROLLconst [(c+d)&31] x)
(ROLWconst [c] (ROLWconst [d] x)) => (ROLWconst [(c+d)&15] x)
(ROLBconst [c] (ROLBconst [d] x)) => (ROLBconst [(c+d)& 7] x)
(ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
(ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
(ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x)
// Constant shift simplifications
(SHLLconst x [0]) -> x
(SHRLconst x [0]) -> x
(SARLconst x [0]) -> x
(SHLLconst x [0]) => x
(SHRLconst x [0]) => x
(SARLconst x [0]) => x
(SHRWconst x [0]) -> x
(SARWconst x [0]) -> x
(SHRWconst x [0]) => x
(SARWconst x [0]) => x
(SHRBconst x [0]) -> x
(SARBconst x [0]) -> x
(SHRBconst x [0]) => x
(SARBconst x [0]) => x
(ROLLconst [0] x) -> x
(ROLWconst [0] x) -> x
(ROLBconst [0] x) -> x
(ROLLconst [0] x) => x
(ROLWconst [0] x) => x
(ROLBconst [0] x) => x
// Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
// because the x86 instructions are defined to use all 5 bits of the shift even
// for the small shifts. I don't think we'll ever generate a weird shift (e.g.
// (SHRW x (MOVLconst [24])), but just in case.
(CMPL x (MOVLconst [c])) -> (CMPLconst x [c])
(CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c]))
(CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))])
(CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))]))
(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
(CMPL x (MOVLconst [c])) => (CMPLconst x [c])
(CMPL (MOVLconst [c]) x) => (InvertFlags (CMPLconst x [c]))
(CMPW x (MOVLconst [c])) => (CMPWconst x [int16(c)])
(CMPW (MOVLconst [c]) x) => (InvertFlags (CMPWconst x [int16(c)]))
(CMPB x (MOVLconst [c])) => (CMPBconst x [int8(c)])
(CMPB (MOVLconst [c]) x) => (InvertFlags (CMPBconst x [int8(c)]))
// Canonicalize the order of arguments to comparisons - helps with CSE.
(CMP(L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(L|W|B) y x))
(CMP(L|W|B) x y) && x.ID > y.ID => (InvertFlags (CMP(L|W|B) y x))
// strength reduction
// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
@ -487,86 +488,86 @@
// which can require a register-register move
// to preserve the original value,
// so it must be used with care.
(MULLconst [-9] x) -> (NEGL (LEAL8 <v.Type> x x))
(MULLconst [-5] x) -> (NEGL (LEAL4 <v.Type> x x))
(MULLconst [-3] x) -> (NEGL (LEAL2 <v.Type> x x))
(MULLconst [-1] x) -> (NEGL x)
(MULLconst [0] _) -> (MOVLconst [0])
(MULLconst [1] x) -> x
(MULLconst [3] x) -> (LEAL2 x x)
(MULLconst [5] x) -> (LEAL4 x x)
(MULLconst [7] x) -> (LEAL2 x (LEAL2 <v.Type> x x))
(MULLconst [9] x) -> (LEAL8 x x)
(MULLconst [11] x) -> (LEAL2 x (LEAL4 <v.Type> x x))
(MULLconst [13] x) -> (LEAL4 x (LEAL2 <v.Type> x x))
(MULLconst [19] x) -> (LEAL2 x (LEAL8 <v.Type> x x))
(MULLconst [21] x) -> (LEAL4 x (LEAL4 <v.Type> x x))
(MULLconst [25] x) -> (LEAL8 x (LEAL2 <v.Type> x x))
(MULLconst [27] x) -> (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x))
(MULLconst [37] x) -> (LEAL4 x (LEAL8 <v.Type> x x))
(MULLconst [41] x) -> (LEAL8 x (LEAL4 <v.Type> x x))
(MULLconst [45] x) -> (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x))
(MULLconst [73] x) -> (LEAL8 x (LEAL8 <v.Type> x x))
(MULLconst [81] x) -> (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x))
(MULLconst [-9] x) => (NEGL (LEAL8 <v.Type> x x))
(MULLconst [-5] x) => (NEGL (LEAL4 <v.Type> x x))
(MULLconst [-3] x) => (NEGL (LEAL2 <v.Type> x x))
(MULLconst [-1] x) => (NEGL x)
(MULLconst [0] _) => (MOVLconst [0])
(MULLconst [1] x) => x
(MULLconst [3] x) => (LEAL2 x x)
(MULLconst [5] x) => (LEAL4 x x)
(MULLconst [7] x) => (LEAL2 x (LEAL2 <v.Type> x x))
(MULLconst [9] x) => (LEAL8 x x)
(MULLconst [11] x) => (LEAL2 x (LEAL4 <v.Type> x x))
(MULLconst [13] x) => (LEAL4 x (LEAL2 <v.Type> x x))
(MULLconst [19] x) => (LEAL2 x (LEAL8 <v.Type> x x))
(MULLconst [21] x) => (LEAL4 x (LEAL4 <v.Type> x x))
(MULLconst [25] x) => (LEAL8 x (LEAL2 <v.Type> x x))
(MULLconst [27] x) => (LEAL8 (LEAL2 <v.Type> x x) (LEAL2 <v.Type> x x))
(MULLconst [37] x) => (LEAL4 x (LEAL8 <v.Type> x x))
(MULLconst [41] x) => (LEAL8 x (LEAL4 <v.Type> x x))
(MULLconst [45] x) => (LEAL8 (LEAL4 <v.Type> x x) (LEAL4 <v.Type> x x))
(MULLconst [73] x) => (LEAL8 x (LEAL8 <v.Type> x x))
(MULLconst [81] x) => (LEAL8 (LEAL8 <v.Type> x x) (LEAL8 <v.Type> x x))
(MULLconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBL (SHLLconst <v.Type> [log2(c+1)] x) x)
(MULLconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAL1 (SHLLconst <v.Type> [log2(c-1)] x) x)
(MULLconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAL2 (SHLLconst <v.Type> [log2(c-2)] x) x)
(MULLconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAL4 (SHLLconst <v.Type> [log2(c-4)] x) x)
(MULLconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAL8 (SHLLconst <v.Type> [log2(c-8)] x) x)
(MULLconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHLLconst [log2(c/3)] (LEAL2 <v.Type> x x))
(MULLconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHLLconst [log2(c/5)] (LEAL4 <v.Type> x x))
(MULLconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHLLconst [log2(c/9)] (LEAL8 <v.Type> x x))
(MULLconst [c] x) && isPowerOfTwo32(c+1) && c >= 15 => (SUBL (SHLLconst <v.Type> [int32(log32(c+1))] x) x)
(MULLconst [c] x) && isPowerOfTwo32(c-1) && c >= 17 => (LEAL1 (SHLLconst <v.Type> [int32(log32(c-1))] x) x)
(MULLconst [c] x) && isPowerOfTwo32(c-2) && c >= 34 => (LEAL2 (SHLLconst <v.Type> [int32(log32(c-2))] x) x)
(MULLconst [c] x) && isPowerOfTwo32(c-4) && c >= 68 => (LEAL4 (SHLLconst <v.Type> [int32(log32(c-4))] x) x)
(MULLconst [c] x) && isPowerOfTwo32(c-8) && c >= 136 => (LEAL8 (SHLLconst <v.Type> [int32(log32(c-8))] x) x)
(MULLconst [c] x) && c%3 == 0 && isPowerOfTwo32(c/3) => (SHLLconst [int32(log32(c/3))] (LEAL2 <v.Type> x x))
(MULLconst [c] x) && c%5 == 0 && isPowerOfTwo32(c/5) => (SHLLconst [int32(log32(c/5))] (LEAL4 <v.Type> x x))
(MULLconst [c] x) && c%9 == 0 && isPowerOfTwo32(c/9) => (SHLLconst [int32(log32(c/9))] (LEAL8 <v.Type> x x))
// combine add/shift into LEAL
(ADDL x (SHLLconst [3] y)) -> (LEAL8 x y)
(ADDL x (SHLLconst [2] y)) -> (LEAL4 x y)
(ADDL x (SHLLconst [1] y)) -> (LEAL2 x y)
(ADDL x (ADDL y y)) -> (LEAL2 x y)
(ADDL x (ADDL x y)) -> (LEAL2 y x)
(ADDL x (SHLLconst [3] y)) => (LEAL8 x y)
(ADDL x (SHLLconst [2] y)) => (LEAL4 x y)
(ADDL x (SHLLconst [1] y)) => (LEAL2 x y)
(ADDL x (ADDL y y)) => (LEAL2 x y)
(ADDL x (ADDL x y)) => (LEAL2 y x)
// combine ADDL/ADDLconst into LEAL1
(ADDLconst [c] (ADDL x y)) -> (LEAL1 [c] x y)
(ADDL (ADDLconst [c] x) y) -> (LEAL1 [c] x y)
(ADDLconst [c] (ADDL x y)) => (LEAL1 [c] x y)
(ADDL (ADDLconst [c] x) y) => (LEAL1 [c] x y)
// fold ADDL into LEAL
(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x)
(LEAL [c] {s} (ADDL x y)) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
(ADDL x (LEAL [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEAL1 [c] {s} x y)
(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEAL [c+d] {s} x)
(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(int64(c)+int64(d)) => (LEAL [c+d] {s} x)
(LEAL [c] {s} (ADDL x y)) && x.Op != OpSB && y.Op != OpSB => (LEAL1 [c] {s} x y)
(ADDL x (LEAL [c] {s} y)) && x.Op != OpSB && y.Op != OpSB => (LEAL1 [c] {s} x y)
// fold ADDLconst into LEALx
(ADDLconst [c] (LEAL1 [d] {s} x y)) && is32Bit(c+d) -> (LEAL1 [c+d] {s} x y)
(ADDLconst [c] (LEAL2 [d] {s} x y)) && is32Bit(c+d) -> (LEAL2 [c+d] {s} x y)
(ADDLconst [c] (LEAL4 [d] {s} x y)) && is32Bit(c+d) -> (LEAL4 [c+d] {s} x y)
(ADDLconst [c] (LEAL8 [d] {s} x y)) && is32Bit(c+d) -> (LEAL8 [c+d] {s} x y)
(LEAL1 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAL1 [c+d] {s} x y)
(LEAL2 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAL2 [c+d] {s} x y)
(LEAL2 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEAL2 [c+2*d] {s} x y)
(LEAL4 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAL4 [c+d] {s} x y)
(LEAL4 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEAL4 [c+4*d] {s} x y)
(LEAL8 [c] {s} (ADDLconst [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEAL8 [c+d] {s} x y)
(LEAL8 [c] {s} x (ADDLconst [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEAL8 [c+8*d] {s} x y)
(ADDLconst [c] (LEAL1 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEAL1 [c+d] {s} x y)
(ADDLconst [c] (LEAL2 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEAL2 [c+d] {s} x y)
(ADDLconst [c] (LEAL4 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEAL4 [c+d] {s} x y)
(ADDLconst [c] (LEAL8 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEAL8 [c+d] {s} x y)
(LEAL1 [c] {s} (ADDLconst [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEAL1 [c+d] {s} x y)
(LEAL2 [c] {s} (ADDLconst [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEAL2 [c+d] {s} x y)
(LEAL2 [c] {s} x (ADDLconst [d] y)) && is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB => (LEAL2 [c+2*d] {s} x y)
(LEAL4 [c] {s} (ADDLconst [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEAL4 [c+d] {s} x y)
(LEAL4 [c] {s} x (ADDLconst [d] y)) && is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB => (LEAL4 [c+4*d] {s} x y)
(LEAL8 [c] {s} (ADDLconst [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEAL8 [c+d] {s} x y)
(LEAL8 [c] {s} x (ADDLconst [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEAL8 [c+8*d] {s} x y)
// fold shifts into LEALx
(LEAL1 [c] {s} x (SHLLconst [1] y)) -> (LEAL2 [c] {s} x y)
(LEAL1 [c] {s} x (SHLLconst [2] y)) -> (LEAL4 [c] {s} x y)
(LEAL1 [c] {s} x (SHLLconst [3] y)) -> (LEAL8 [c] {s} x y)
(LEAL2 [c] {s} x (SHLLconst [1] y)) -> (LEAL4 [c] {s} x y)
(LEAL2 [c] {s} x (SHLLconst [2] y)) -> (LEAL8 [c] {s} x y)
(LEAL4 [c] {s} x (SHLLconst [1] y)) -> (LEAL8 [c] {s} x y)
(LEAL1 [c] {s} x (SHLLconst [1] y)) => (LEAL2 [c] {s} x y)
(LEAL1 [c] {s} x (SHLLconst [2] y)) => (LEAL4 [c] {s} x y)
(LEAL1 [c] {s} x (SHLLconst [3] y)) => (LEAL8 [c] {s} x y)
(LEAL2 [c] {s} x (SHLLconst [1] y)) => (LEAL4 [c] {s} x y)
(LEAL2 [c] {s} x (SHLLconst [2] y)) => (LEAL8 [c] {s} x y)
(LEAL4 [c] {s} x (SHLLconst [1] y)) => (LEAL8 [c] {s} x y)
// reverse ordering of compare instruction
(SETL (InvertFlags x)) -> (SETG x)
(SETG (InvertFlags x)) -> (SETL x)
(SETB (InvertFlags x)) -> (SETA x)
(SETA (InvertFlags x)) -> (SETB x)
(SETLE (InvertFlags x)) -> (SETGE x)
(SETGE (InvertFlags x)) -> (SETLE x)
(SETBE (InvertFlags x)) -> (SETAE x)
(SETAE (InvertFlags x)) -> (SETBE x)
(SETEQ (InvertFlags x)) -> (SETEQ x)
(SETNE (InvertFlags x)) -> (SETNE x)
(SETL (InvertFlags x)) => (SETG x)
(SETG (InvertFlags x)) => (SETL x)
(SETB (InvertFlags x)) => (SETA x)
(SETA (InvertFlags x)) => (SETB x)
(SETLE (InvertFlags x)) => (SETGE x)
(SETGE (InvertFlags x)) => (SETLE x)
(SETBE (InvertFlags x)) => (SETAE x)
(SETAE (InvertFlags x)) => (SETBE x)
(SETEQ (InvertFlags x)) => (SETEQ x)
(SETNE (InvertFlags x)) => (SETNE x)
// sign extended loads
// Note: The combined instruction must end up in the same block
@ -576,58 +577,60 @@
// Make sure we don't combine these ops if the load has another use.
// This prevents a single load from being split into multiple loads
// which then might return different values. See test/atomicload.go.
(MOVBLSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBLSXload <v.Type> [off] {sym} ptr mem)
(MOVBLZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
(MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
(MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
(MOVBLSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBLSXload <v.Type> [off] {sym} ptr mem)
(MOVBLZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
(MOVWLSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWLSXload <v.Type> [off] {sym} ptr mem)
(MOVWLZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLZX x)
(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLZX x)
(MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
(MOVBLSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBLSX x)
(MOVWLSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWLSX x)
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBLZX x)
(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWLZX x)
(MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x
(MOVBLSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBLSX x)
(MOVWLSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWLSX x)
// Fold extensions and ANDs together.
(MOVBLZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x)
(MOVWLZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x)
(MOVBLSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x)
(MOVWLSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x)
(MOVBLZX (ANDLconst [c] x)) => (ANDLconst [c & 0xff] x)
(MOVWLZX (ANDLconst [c] x)) => (ANDLconst [c & 0xffff] x)
(MOVBLSX (ANDLconst [c] x)) && c & 0x80 == 0 => (ANDLconst [c & 0x7f] x)
(MOVWLSX (ANDLconst [c] x)) && c & 0x8000 == 0 => (ANDLconst [c & 0x7fff] x)
// Don't extend before storing
(MOVWstore [off] {sym} ptr (MOVWL(S|Z)X x) mem) -> (MOVWstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOVBL(S|Z)X x) mem) -> (MOVBstore [off] {sym} ptr x mem)
(MOVWstore [off] {sym} ptr (MOVWL(S|Z)X x) mem) => (MOVWstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOVBL(S|Z)X x) mem) => (MOVBstore [off] {sym} ptr x mem)
// fold constants into memory operations
// Note that this is not always a good idea because if not all the uses of
// the ADDLconst get eliminated, we still have to compute the ADDLconst and we now
// have potentially two live values (ptr and (ADDLconst [off] ptr)) instead of one.
// Nevertheless, let's do it!
(MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem)
(MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem)
(MOV(L|W|B|SS|SD)load [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
(MOV(L|W|B|SS|SD)load [off1+off2] {sym} ptr mem)
(MOV(L|W|B|SS|SD)store [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) =>
(MOV(L|W|B|SS|SD)store [off1+off2] {sym} ptr val mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && valoff1.canAdd32(off2) =>
((ADD|AND|OR|XOR)Lconstmodify [valoff1.addOffset32(off2)] {sym} base mem)
// Fold constants into stores.
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
(MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
(MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
(MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
(MOVBstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
(MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(int64(off)) =>
(MOVLstoreconst [makeValAndOff32(c,off)] {sym} ptr mem)
(MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(int64(off)) =>
(MOVWstoreconst [makeValAndOff32(c,off)] {sym} ptr mem)
(MOVBstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(int64(off)) =>
(MOVBstoreconst [makeValAndOff32(c,off)] {sym} ptr mem)
// Fold address offsets into constant stores.
(MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
(MOV(L|W|B)storeconst [sc] {s} (ADDLconst [off] ptr) mem) && sc.canAdd32(off) =>
(MOV(L|W|B)storeconst [sc.addOffset32(off)] {s} ptr mem)
// We need to fold LEAL into the MOVx ops so that the live variable analysis knows
// what variables are being read/written by the ops.
@ -637,111 +640,113 @@
// a separate instruction gives us that register. Having the LEAL be
// a separate instruction also allows it to be CSEd (which is good because
// it compiles to a thunk call).
(MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
&& (base.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
&& (base.Op != OpSB || !config.ctxt.Flag_shared) =>
(MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
(MOV(L|W|B|SS|SD)store [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
&& (base.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOV(L|W|B|SS|SD)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOV(L|W|B|SS|SD)store [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
&& (base.Op != OpSB || !config.ctxt.Flag_shared) =>
(MOV(L|W|B|SS|SD)store [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
(MOV(L|W|B)storeconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOV(L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
(MOV(L|W|B)storeconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
(MOV(L|W|B)storeconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
&& valoff1.canAdd32(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
((ADD|AND|OR|XOR)Lconstmodify [valoff1.addOffset32(off2)] {mergeSymTyped(sym1,sym2)} base mem)
// Merge load/store to op
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) ->
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem)
&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem)
&& y.Uses==1 && l.Uses==1 && clobber(y, l) && validValAndOff(int64(c),int64(off)) =>
((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff32(c,off)] {sym} ptr mem)
// fold LEALs together
(LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL [off1+off2] {mergeSym(sym1,sym2)} x)
(LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
(LEAL [off1+off2] {mergeSymTyped(sym1,sym2)} x)
// LEAL into LEAL1
(LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
(LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
(LEAL1 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
// LEAL1 into LEAL
(LEAL [off1] {sym1} (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL [off1] {sym1} (LEAL1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
(LEAL1 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
// LEAL into LEAL[248]
(LEAL2 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
(LEAL2 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL4 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
(LEAL4 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL8 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
(LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL2 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
(LEAL2 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
(LEAL4 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
(LEAL4 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
(LEAL8 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
(LEAL8 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
// LEAL[248] into LEAL
(LEAL [off1] {sym1} (LEAL2 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL2 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL [off1] {sym1} (LEAL4 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL4 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL [off1] {sym1} (LEAL2 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
(LEAL2 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
(LEAL [off1] {sym1} (LEAL4 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
(LEAL4 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
(LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
(LEAL8 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
// LEAL[1248] into LEAL[1248]. Only some such merges are possible.
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
(LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(off1+2*off2) ->
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
(LEAL2 [off1+off2] {mergeSymTyped(sym1, sym2)} x y)
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
(LEAL2 [off1+off2] {mergeSymTyped(sym1, sym2)} y x)
(LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(int64(off1)+2*int64(off2)) =>
(LEAL4 [off1+2*off2] {sym} x y)
(LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(off1+4*off2) ->
(LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(int64(off1)+4*int64(off2)) =>
(LEAL8 [off1+4*off2] {sym} x y)
// Absorb InvertFlags into branches.
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
(LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
(GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
(ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no)
(UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no)
(ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no)
(UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no)
(EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
(NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
(LT (InvertFlags cmp) yes no) => (GT cmp yes no)
(GT (InvertFlags cmp) yes no) => (LT cmp yes no)
(LE (InvertFlags cmp) yes no) => (GE cmp yes no)
(GE (InvertFlags cmp) yes no) => (LE cmp yes no)
(ULT (InvertFlags cmp) yes no) => (UGT cmp yes no)
(UGT (InvertFlags cmp) yes no) => (ULT cmp yes no)
(ULE (InvertFlags cmp) yes no) => (UGE cmp yes no)
(UGE (InvertFlags cmp) yes no) => (ULE cmp yes no)
(EQ (InvertFlags cmp) yes no) => (EQ cmp yes no)
(NE (InvertFlags cmp) yes no) => (NE cmp yes no)
// Constant comparisons.
(CMPLconst (MOVLconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
(CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT)
(CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT)
(CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT)
(CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT)
(CMPLconst (MOVLconst [x]) [y]) && x==y => (FlagEQ)
(CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)<uint32(y) => (FlagLT_ULT)
(CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)>uint32(y) => (FlagLT_UGT)
(CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)<uint32(y) => (FlagGT_ULT)
(CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)>uint32(y) => (FlagGT_UGT)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)==y => (FlagEQ)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)<uint16(y) => (FlagLT_ULT)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)>uint16(y) => (FlagLT_UGT)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)<uint16(y) => (FlagGT_ULT)
(CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)>uint16(y) => (FlagGT_UGT)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)==y => (FlagEQ)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)<uint8(y) => (FlagLT_ULT)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)>uint8(y) => (FlagLT_UGT)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)<uint8(y) => (FlagGT_ULT)
(CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)>uint8(y) => (FlagGT_UGT)
// Other known comparisons.
(CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT_ULT)

View File

@ -190,6 +190,11 @@ func (x ValAndOff) canAdd(off int64) bool {
return newoff == int64(int32(newoff))
}
func (x ValAndOff) canAdd32(off int32) bool {
newoff := x.Off() + int64(off)
return newoff == int64(int32(newoff))
}
func (x ValAndOff) add(off int64) int64 {
if !x.canAdd(off) {
panic("invalid ValAndOff.add")
@ -197,6 +202,20 @@ func (x ValAndOff) add(off int64) int64 {
return makeValAndOff(x.Val(), x.Off()+off)
}
func (x ValAndOff) addOffset32(off int32) ValAndOff {
if !x.canAdd32(off) {
panic("invalid ValAndOff.add")
}
return ValAndOff(makeValAndOff(x.Val(), x.Off()+int64(off)))
}
func (x ValAndOff) addOffset64(off int64) ValAndOff {
if !x.canAdd(off) {
panic("invalid ValAndOff.add")
}
return ValAndOff(makeValAndOff(x.Val(), x.Off()+off))
}
// int128 is a type that stores a 128-bit constant.
// The only allowed constant right now is 0, so we can cheat quite a bit.
type int128 int64

View File

@ -207,9 +207,11 @@ func mergeSym(x, y interface{}) interface{} {
}
panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
}
func canMergeSym(x, y interface{}) bool {
return x == nil || y == nil
}
func mergeSymTyped(x, y Sym) Sym {
if x == nil {
return y

File diff suppressed because it is too large Load Diff