From e81cc9119f7906ebded91f4cdc149866ac2acc0d Mon Sep 17 00:00:00 2001 From: erifan01 Date: Tue, 7 Mar 2023 08:49:11 +0800 Subject: [PATCH] cmd/compile: combine similar optimization rules on arm64 This CL combines some rules with the same structure. In order to avoid extremely long rules, this CL does not merge some rules. In addition, this CL aligned the components of some rules for better reading. Change-Id: I4ba1493251ace00b10591e3c8eef4b6277a4b226 Reviewed-on: https://go-review.googlesource.com/c/go/+/476115 TryBot-Result: Gopher Robot Reviewed-by: Heschi Kreinick Run-TryBot: Eric Fang Reviewed-by: Cherry Mui --- src/cmd/compile/internal/ssa/_gen/ARM64.rules | 935 ++++++++---------- src/cmd/compile/internal/ssa/rewriteARM64.go | 16 +- 2 files changed, 402 insertions(+), 549 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index fdfd7858fb0..747bd020f15 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -3,70 +3,68 @@ // license that can be found in the LICENSE file. (Add(Ptr|64|32|16|8) ...) => (ADD ...) -(Add(32F|64F) ...) => (FADD(S|D) ...) +(Add(32|64)F ...) => (FADD(S|D) ...) (Sub(Ptr|64|32|16|8) ...) => (SUB ...) -(Sub(32F|64F) ...) => (FSUB(S|D) ...) +(Sub(32|64)F ...) => (FSUB(S|D) ...) (Mul64 ...) => (MUL ...) (Mul(32|16|8) ...) => (MULW ...) -(Mul(32F|64F) ...) => (FMUL(S|D) ...) +(Mul(32|64)F ...) => (FMUL(S|D) ...) -(Hmul64 ...) => (MULH ...) +(Hmul64 ...) => (MULH ...) (Hmul64u ...) => (UMULH ...) -(Hmul32 x y) => (SRAconst (MULL x y) [32]) +(Hmul32 x y) => (SRAconst (MULL x y) [32]) (Hmul32u x y) => (SRAconst (UMULL x y) [32]) (Select0 (Mul64uhilo x y)) => (UMULH x y) (Select1 (Mul64uhilo x y)) => (MUL x y) -(Div64 [false] x y) => (DIV x y) -(Div64u ...) => (UDIV ...) +(Div64 [false] x y) => (DIV x y) (Div32 [false] x y) => (DIVW x y) -(Div32u ...) => (UDIVW ...) (Div16 [false] x y) => (DIVW (SignExt16to32 x) (SignExt16to32 y)) (Div16u x y) => (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y)) -(Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y)) -(Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y)) +(Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Div64u ...) => (UDIV ...) +(Div32u ...) => (UDIVW ...) (Div32F ...) => (FDIVS ...) (Div64F ...) => (FDIVD ...) (Mod64 x y) => (MOD x y) -(Mod64u ...) => (UMOD ...) (Mod32 x y) => (MODW x y) +(Mod64u ...) => (UMOD ...) (Mod32u ...) => (UMODW ...) -(Mod16 x y) => (MODW (SignExt16to32 x) (SignExt16to32 y)) -(Mod16u x y) => (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y)) -(Mod8 x y) => (MODW (SignExt8to32 x) (SignExt8to32 y)) -(Mod8u x y) => (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Mod(16|8) x y) => (MODW (SignExt(16|8)to32 x) (SignExt(16|8)to32 y)) +(Mod(16|8)u x y) => (UMODW (ZeroExt(16|8)to32 x) (ZeroExt(16|8)to32 y)) // (x + y) / 2 with x>=y => (x - y) / 2 + y (Avg64u x y) => (ADD (SRLconst (SUB x y) [1]) y) (And(64|32|16|8) ...) => (AND ...) -(Or(64|32|16|8) ...) => (OR ...) +(Or(64|32|16|8) ...) => (OR ...) (Xor(64|32|16|8) ...) => (XOR ...) // unary ops (Neg(64|32|16|8) ...) => (NEG ...) -(Neg(32F|64F) ...) => (FNEG(S|D) ...) +(Neg(32|64)F ...) => (FNEG(S|D) ...) (Com(64|32|16|8) ...) => (MVN ...) // math package intrinsics -(Abs ...) => (FABSD ...) -(Sqrt ...) => (FSQRTD ...) -(Ceil ...) => (FRINTPD ...) -(Floor ...) => (FRINTMD ...) -(Round ...) => (FRINTAD ...) +(Abs ...) => (FABSD ...) +(Sqrt ...) => (FSQRTD ...) +(Ceil ...) => (FRINTPD ...) +(Floor ...) => (FRINTMD ...) +(Round ...) => (FRINTAD ...) (RoundToEven ...) => (FRINTND ...) -(Trunc ...) => (FRINTZD ...) -(FMA x y z) => (FMADDD z x y) +(Trunc ...) => (FRINTZD ...) +(FMA x y z) => (FMADDD z x y) (Sqrt32 ...) => (FSQRTS ...) // lowering rotates // we do rotate detection in generic rules, if the following rules need to be changed, chcek generic rules first. -(RotateLeft8 x (MOVDconst [c])) => (Or8 (Lsh8x64 x (MOVDconst [c&7])) (Rsh8Ux64 x (MOVDconst [-c&7]))) -(RotateLeft8 x y) => (OR (SLL x (ANDconst [7] y)) (SRL (ZeroExt8to64 x) (ANDconst [7] (NEG y)))) +(RotateLeft8 x (MOVDconst [c])) => (Or8 (Lsh8x64 x (MOVDconst [c&7])) (Rsh8Ux64 x (MOVDconst [-c&7]))) +(RotateLeft8 x y) => (OR (SLL x (ANDconst [7] y)) (SRL (ZeroExt8to64 x) (ANDconst [7] (NEG y)))) (RotateLeft16 x (MOVDconst [c])) => (Or16 (Lsh16x64 x (MOVDconst [c&15])) (Rsh16Ux64 x (MOVDconst [-c&15]))) (RotateLeft16 x y) => (RORW (ORshiftLL (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG y)) (RotateLeft32 x y) => (RORW x (NEG y)) @@ -74,10 +72,10 @@ (Ctz(64|32|16|8)NonZero ...) => (Ctz(64|32|32|32) ...) -(Ctz64 x) => (CLZ (RBIT x)) +(Ctz64 x) => (CLZ (RBIT x)) (Ctz32 x) => (CLZW (RBITW x)) (Ctz16 x) => (CLZW (RBITW (ORconst [0x10000] x))) -(Ctz8 x) => (CLZW (RBITW (ORconst [0x100] x))) +(Ctz8 x) => (CLZW (RBITW (ORconst [0x100] x))) (PopCount64 x) => (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp x)))) (PopCount32 x) => (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp (ZeroExt32to64 x))))) @@ -88,17 +86,17 @@ (FMOVDfpgp (Arg [off] {sym})) => @b.Func.Entry (Arg [off] {sym}) // Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set. -(MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem) +(MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem) (FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) => (MOVDstore [off] {sym} ptr val mem) -(MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem) +(MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem) (FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem) // float <=> int register moves, with no conversion. // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}. -(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val) -(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val) +(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val) +(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val) (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) => (FMOVSfpgp val) -(FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val) +(FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val) (BitLen64 x) => (SUB (MOVDconst [64]) (CLZ x)) (BitLen32 x) => (SUB (MOVDconst [32]) (CLZW x)) @@ -108,14 +106,14 @@ (BitRev64 ...) => (RBIT ...) (BitRev32 ...) => (RBITW ...) -(BitRev16 x) => (SRLconst [48] (RBIT x)) -(BitRev8 x) => (SRLconst [56] (RBIT x)) +(BitRev16 x) => (SRLconst [48] (RBIT x)) +(BitRev8 x) => (SRLconst [56] (RBIT x)) // In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into // UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or // after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant. // The purpose of this rule is to have this extra UDIV instruction removed in CSE pass. -(UMOD x y) => (MSUB x y (UDIV x y)) +(UMOD x y) => (MSUB x y (UDIV x y)) (UMODW x y) => (MSUBW x y (UDIVW x y)) // 64-bit addition with carry. @@ -128,10 +126,10 @@ // boolean ops -- booleans are represented with 0=false, 1=true (AndB ...) => (AND ...) -(OrB ...) => (OR ...) -(EqB x y) => (XOR (MOVDconst [1]) (XOR x y)) +(OrB ...) => (OR ...) +(EqB x y) => (XOR (MOVDconst [1]) (XOR x y)) (NeqB ...) => (XOR ...) -(Not x) => (XOR (MOVDconst [1]) x) +(Not x) => (XOR (MOVDconst [1]) x) // shifts // hardware instruction uses only the low 6 bits of the shift @@ -145,7 +143,7 @@ (Lsh(64|32|16|8)x64 x y) && shiftIsBounded(v) => (SLL x y) (Lsh(64|32|16|8)x32 x y) && shiftIsBounded(v) => (SLL x y) (Lsh(64|32|16|8)x16 x y) && shiftIsBounded(v) => (SLL x y) -(Lsh(64|32|16|8)x8 x y) && shiftIsBounded(v) => (SLL x y) +(Lsh(64|32|16|8)x8 x y) && shiftIsBounded(v) => (SLL x y) // signed right shift (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRA x y) @@ -160,69 +158,45 @@ (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRL (ZeroExt8to64 x) y) // shift value may be out of range, use CMP + CSEL instead -(Lsh64x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) -(Lsh64x32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Lsh64x16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Lsh64x8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Lsh64x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) +(Lsh64x(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Lsh32x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) -(Lsh32x32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Lsh32x16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Lsh32x8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Lsh32x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) +(Lsh32x(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Lsh16x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) -(Lsh16x32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Lsh16x16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Lsh16x8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Lsh16x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) +(Lsh16x(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Lsh8x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) -(Lsh8x32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Lsh8x16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Lsh8x8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Lsh8x64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] y)) +(Lsh8x(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh64Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] y)) -(Rsh64Ux32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Rsh64Ux16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Rsh64Ux8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Rsh64Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] y)) +(Rsh64Ux(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL x y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh32Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] y)) -(Rsh32Ux32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Rsh32Ux16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Rsh32Ux8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Rsh32Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] y)) +(Rsh32Ux(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt32to64 x) y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh16Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] y)) -(Rsh16Ux32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Rsh16Ux16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Rsh16Ux8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Rsh16Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] y)) +(Rsh16Ux(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt16to64 x) y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh8Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] y)) -(Rsh8Ux32 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt32to64 y))) -(Rsh8Ux16 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt16to64 y))) -(Rsh8Ux8 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] (ZeroExt8to64 y))) +(Rsh8Ux64 x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] y)) +(Rsh8Ux(32|16|8) x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL (ZeroExt8to64 x) y) (Const64 [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))) -(Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) -(Rsh64x32 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt32to64 y)))) -(Rsh64x16 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt16to64 y)))) -(Rsh64x8 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) +(Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) +(Rsh64x(32|16|8) x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) -(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) -(Rsh32x32 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt32to64 y)))) -(Rsh32x16 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt16to64 y)))) -(Rsh32x8 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) +(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) +(Rsh32x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) -(Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) -(Rsh16x32 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt32to64 y)))) -(Rsh16x16 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt16to64 y)))) -(Rsh16x8 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) +(Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) +(Rsh16x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) -(Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) -(Rsh8x32 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt32to64 y)))) -(Rsh8x16 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt16to64 y)))) -(Rsh8x8 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) +(Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] y))) +(Rsh8x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] y (Const64 [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))) // constants (Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) -(Const(32F|64F) [val]) => (FMOV(S|D)const [float64(val)]) +(Const(32|64)F [val]) => (FMOV(S|D)const [float64(val)]) (ConstNil) => (MOVDconst [0]) (ConstBool [t]) => (MOVDconst [b2i(t)]) @@ -230,41 +204,41 @@ // truncations // Because we ignore high parts of registers, truncates are just copies. -(Trunc16to8 ...) => (Copy ...) -(Trunc32to8 ...) => (Copy ...) +(Trunc16to8 ...) => (Copy ...) +(Trunc32to8 ...) => (Copy ...) (Trunc32to16 ...) => (Copy ...) -(Trunc64to8 ...) => (Copy ...) +(Trunc64to8 ...) => (Copy ...) (Trunc64to16 ...) => (Copy ...) (Trunc64to32 ...) => (Copy ...) // Zero-/Sign-extensions -(ZeroExt8to16 ...) => (MOVBUreg ...) -(ZeroExt8to32 ...) => (MOVBUreg ...) +(ZeroExt8to16 ...) => (MOVBUreg ...) +(ZeroExt8to32 ...) => (MOVBUreg ...) (ZeroExt16to32 ...) => (MOVHUreg ...) -(ZeroExt8to64 ...) => (MOVBUreg ...) +(ZeroExt8to64 ...) => (MOVBUreg ...) (ZeroExt16to64 ...) => (MOVHUreg ...) (ZeroExt32to64 ...) => (MOVWUreg ...) -(SignExt8to16 ...) => (MOVBreg ...) -(SignExt8to32 ...) => (MOVBreg ...) +(SignExt8to16 ...) => (MOVBreg ...) +(SignExt8to32 ...) => (MOVBreg ...) (SignExt16to32 ...) => (MOVHreg ...) -(SignExt8to64 ...) => (MOVBreg ...) +(SignExt8to64 ...) => (MOVBreg ...) (SignExt16to64 ...) => (MOVHreg ...) (SignExt32to64 ...) => (MOVWreg ...) // float <=> int conversion -(Cvt32to32F ...) => (SCVTFWS ...) -(Cvt32to64F ...) => (SCVTFWD ...) -(Cvt64to32F ...) => (SCVTFS ...) -(Cvt64to64F ...) => (SCVTFD ...) +(Cvt32to32F ...) => (SCVTFWS ...) +(Cvt32to64F ...) => (SCVTFWD ...) +(Cvt64to32F ...) => (SCVTFS ...) +(Cvt64to64F ...) => (SCVTFD ...) (Cvt32Uto32F ...) => (UCVTFWS ...) (Cvt32Uto64F ...) => (UCVTFWD ...) (Cvt64Uto32F ...) => (UCVTFS ...) (Cvt64Uto64F ...) => (UCVTFD ...) -(Cvt32Fto32 ...) => (FCVTZSSW ...) -(Cvt64Fto32 ...) => (FCVTZSDW ...) -(Cvt32Fto64 ...) => (FCVTZSS ...) -(Cvt64Fto64 ...) => (FCVTZSD ...) +(Cvt32Fto32 ...) => (FCVTZSSW ...) +(Cvt64Fto32 ...) => (FCVTZSDW ...) +(Cvt32Fto64 ...) => (FCVTZSS ...) +(Cvt64Fto64 ...) => (FCVTZSD ...) (Cvt32Fto32U ...) => (FCVTZUSW ...) (Cvt64Fto32U ...) => (FCVTZUDW ...) (Cvt32Fto64U ...) => (FCVTZUS ...) @@ -278,26 +252,24 @@ (Round64F ...) => (LoweredRound64F ...) // comparisons -(Eq8 x y) => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) -(Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) -(Eq32 x y) => (Equal (CMPW x y)) -(Eq64 x y) => (Equal (CMP x y)) -(EqPtr x y) => (Equal (CMP x y)) +(Eq8 x y) => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) +(Eq32 x y) => (Equal (CMPW x y)) +(Eq64 x y) => (Equal (CMP x y)) +(EqPtr x y) => (Equal (CMP x y)) (Eq32F x y) => (Equal (FCMPS x y)) (Eq64F x y) => (Equal (FCMPD x y)) -(Neq8 x y) => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) -(Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) -(Neq32 x y) => (NotEqual (CMPW x y)) -(Neq64 x y) => (NotEqual (CMP x y)) -(NeqPtr x y) => (NotEqual (CMP x y)) -(Neq32F x y) => (NotEqual (FCMPS x y)) -(Neq64F x y) => (NotEqual (FCMPD x y)) +(Neq8 x y) => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) +(Neq32 x y) => (NotEqual (CMPW x y)) +(Neq64 x y) => (NotEqual (CMP x y)) +(NeqPtr x y) => (NotEqual (CMP x y)) +(Neq(32|64)F x y) => (NotEqual (FCMP(S|D) x y)) -(Less8 x y) => (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y))) -(Less16 x y) => (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y))) +(Less(8|16) x y) => (LessThan (CMPW (SignExt(8|16)to32 x) (SignExt(8|16)to32 y))) (Less32 x y) => (LessThan (CMPW x y)) -(Less64 x y) => (LessThan (CMP x y)) +(Less64 x y) => (LessThan (CMP x y)) // Set condition flags for floating-point comparisons "x < y" // and "x <= y". Because if either or both of the operands are @@ -313,16 +285,16 @@ // x < 1 => x == 0 // 1 <= x => x != 0 (Less(8U|16U|32U|64U) zero:(MOVDconst [0]) x) => (Neq(8|16|32|64) zero x) -(Leq(8U|16U|32U|64U) x zero:(MOVDconst [0])) => (Eq(8|16|32|64) x zero) -(Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0])) -(Leq(8U|16U|32U|64U) (MOVDconst [1]) x) => (Neq(8|16|32|64) (MOVDconst [0]) x) +(Leq(8U|16U|32U|64U) x zero:(MOVDconst [0])) => (Eq(8|16|32|64) x zero) +(Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0])) +(Leq(8U|16U|32U|64U) (MOVDconst [1]) x) => (Neq(8|16|32|64) (MOVDconst [0]) x) -(Less8U x y) => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Less8U x y) => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) (Less16U x y) => (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) (Less32U x y) => (LessThanU (CMPW x y)) (Less64U x y) => (LessThanU (CMP x y)) -(Leq8 x y) => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y))) +(Leq8 x y) => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y))) (Leq16 x y) => (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y))) (Leq32 x y) => (LessEqual (CMPW x y)) (Leq64 x y) => (LessEqual (CMP x y)) @@ -331,7 +303,7 @@ (Leq32F x y) => (LessEqualF (FCMPS x y)) (Leq64F x y) => (LessEqualF (FCMPD x y)) -(Leq8U x y) => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Leq8U x y) => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) (Leq16U x y) => (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) (Leq32U x y) => (LessEqualU (CMPW x y)) (Leq64U x y) => (LessEqualU (CMP x y)) @@ -374,7 +346,7 @@ (Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem) // zeroing -(Zero [0] _ mem) => mem +(Zero [0] _ mem) => mem (Zero [1] ptr mem) => (MOVBstore ptr (MOVDconst [0]) mem) (Zero [2] ptr mem) => (MOVHstore ptr (MOVDconst [0]) mem) (Zero [4] ptr mem) => (MOVWstore ptr (MOVDconst [0]) mem) @@ -456,7 +428,7 @@ mem) // moves -(Move [0] _ _ mem) => mem +(Move [0] _ _ mem) => mem (Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem) (Move [2] dst src mem) => (MOVHstore dst (MOVHUload src mem) mem) (Move [3] dst src mem) => @@ -544,36 +516,36 @@ mem) // calls -(StaticCall ...) => (CALLstatic ...) +(StaticCall ...) => (CALLstatic ...) (ClosureCall ...) => (CALLclosure ...) -(InterCall ...) => (CALLinter ...) -(TailCall ...) => (CALLtail ...) +(InterCall ...) => (CALLinter ...) +(TailCall ...) => (CALLtail ...) // checks (NilCheck ...) => (LoweredNilCheck ...) (IsNonNil ptr) => (NotEqual (CMPconst [0] ptr)) -(IsInBounds idx len) => (LessThanU (CMP idx len)) +(IsInBounds idx len) => (LessThanU (CMP idx len)) (IsSliceInBounds idx len) => (LessEqualU (CMP idx len)) // pseudo-ops (GetClosurePtr ...) => (LoweredGetClosurePtr ...) -(GetCallerSP ...) => (LoweredGetCallerSP ...) -(GetCallerPC ...) => (LoweredGetCallerPC ...) +(GetCallerSP ...) => (LoweredGetCallerSP ...) +(GetCallerPC ...) => (LoweredGetCallerPC ...) // Absorb pseudo-ops into blocks. -(If (Equal cc) yes no) => (EQ cc yes no) -(If (NotEqual cc) yes no) => (NE cc yes no) -(If (LessThan cc) yes no) => (LT cc yes no) -(If (LessThanU cc) yes no) => (ULT cc yes no) -(If (LessEqual cc) yes no) => (LE cc yes no) -(If (LessEqualU cc) yes no) => (ULE cc yes no) -(If (GreaterThan cc) yes no) => (GT cc yes no) -(If (GreaterThanU cc) yes no) => (UGT cc yes no) -(If (GreaterEqual cc) yes no) => (GE cc yes no) +(If (Equal cc) yes no) => (EQ cc yes no) +(If (NotEqual cc) yes no) => (NE cc yes no) +(If (LessThan cc) yes no) => (LT cc yes no) +(If (LessThanU cc) yes no) => (ULT cc yes no) +(If (LessEqual cc) yes no) => (LE cc yes no) +(If (LessEqualU cc) yes no) => (ULE cc yes no) +(If (GreaterThan cc) yes no) => (GT cc yes no) +(If (GreaterThanU cc) yes no) => (UGT cc yes no) +(If (GreaterEqual cc) yes no) => (GE cc yes no) (If (GreaterEqualU cc) yes no) => (UGE cc yes no) -(If (LessThanF cc) yes no) => (FLT cc yes no) -(If (LessEqualF cc) yes no) => (FLE cc yes no) -(If (GreaterThanF cc) yes no) => (FGT cc yes no) +(If (LessThanF cc) yes no) => (FLT cc yes no) +(If (LessEqualF cc) yes no) => (FLE cc yes no) +(If (GreaterThanF cc) yes no) => (FGT cc yes no) (If (GreaterEqualF cc) yes no) => (FGE cc yes no) (If cond yes no) => (TBNZ [0] cond yes no) @@ -593,23 +565,18 @@ (AtomicStorePtrNoWB ...) => (STLR ...) (AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) -(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) -(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) +(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) +(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) (AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...) (AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...) (AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...) // Currently the updated value is not used, but we need a register to temporarily hold it. -(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem)) -(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem)) -(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem)) -(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem)) - -(AtomicAnd8Variant ptr val mem) => (Select1 (LoweredAtomicAnd8Variant ptr val mem)) -(AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem)) -(AtomicOr8Variant ptr val mem) => (Select1 (LoweredAtomicOr8Variant ptr val mem)) -(AtomicOr32Variant ptr val mem) => (Select1 (LoweredAtomicOr32Variant ptr val mem)) +(AtomicAnd(8|32) ptr val mem) => (Select1 (LoweredAtomicAnd(8|32) ptr val mem)) +(AtomicOr(8|32) ptr val mem) => (Select1 (LoweredAtomicOr(8|32) ptr val mem)) +(AtomicAnd(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicAnd(8|32)Variant ptr val mem)) +(AtomicOr(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicOr(8|32)Variant ptr val mem)) // Write barrier. (WB ...) => (LoweredWB ...) @@ -624,214 +591,101 @@ // Optimizations // Absorb boolean tests into block -(NZ (Equal cc) yes no) => (EQ cc yes no) -(NZ (NotEqual cc) yes no) => (NE cc yes no) -(NZ (LessThan cc) yes no) => (LT cc yes no) -(NZ (LessThanU cc) yes no) => (ULT cc yes no) -(NZ (LessEqual cc) yes no) => (LE cc yes no) -(NZ (LessEqualU cc) yes no) => (ULE cc yes no) -(NZ (GreaterThan cc) yes no) => (GT cc yes no) -(NZ (GreaterThanU cc) yes no) => (UGT cc yes no) -(NZ (GreaterEqual cc) yes no) => (GE cc yes no) +(NZ (Equal cc) yes no) => (EQ cc yes no) +(NZ (NotEqual cc) yes no) => (NE cc yes no) +(NZ (LessThan cc) yes no) => (LT cc yes no) +(NZ (LessThanU cc) yes no) => (ULT cc yes no) +(NZ (LessEqual cc) yes no) => (LE cc yes no) +(NZ (LessEqualU cc) yes no) => (ULE cc yes no) +(NZ (GreaterThan cc) yes no) => (GT cc yes no) +(NZ (GreaterThanU cc) yes no) => (UGT cc yes no) +(NZ (GreaterEqual cc) yes no) => (GE cc yes no) (NZ (GreaterEqualU cc) yes no) => (UGE cc yes no) -(NZ (LessThanF cc) yes no) => (FLT cc yes no) -(NZ (LessEqualF cc) yes no) => (FLE cc yes no) -(NZ (GreaterThanF cc) yes no) => (FGT cc yes no) +(NZ (LessThanF cc) yes no) => (FLT cc yes no) +(NZ (LessEqualF cc) yes no) => (FLE cc yes no) +(NZ (GreaterThanF cc) yes no) => (FGT cc yes no) (NZ (GreaterEqualF cc) yes no) => (FGE cc yes no) -(TBNZ [0] (Equal cc) yes no) => (EQ cc yes no) -(TBNZ [0] (NotEqual cc) yes no) => (NE cc yes no) -(TBNZ [0] (LessThan cc) yes no) => (LT cc yes no) -(TBNZ [0] (LessThanU cc) yes no) => (ULT cc yes no) -(TBNZ [0] (LessEqual cc) yes no) => (LE cc yes no) -(TBNZ [0] (LessEqualU cc) yes no) => (ULE cc yes no) -(TBNZ [0] (GreaterThan cc) yes no) => (GT cc yes no) -(TBNZ [0] (GreaterThanU cc) yes no) => (UGT cc yes no) -(TBNZ [0] (GreaterEqual cc) yes no) => (GE cc yes no) +(TBNZ [0] (Equal cc) yes no) => (EQ cc yes no) +(TBNZ [0] (NotEqual cc) yes no) => (NE cc yes no) +(TBNZ [0] (LessThan cc) yes no) => (LT cc yes no) +(TBNZ [0] (LessThanU cc) yes no) => (ULT cc yes no) +(TBNZ [0] (LessEqual cc) yes no) => (LE cc yes no) +(TBNZ [0] (LessEqualU cc) yes no) => (ULE cc yes no) +(TBNZ [0] (GreaterThan cc) yes no) => (GT cc yes no) +(TBNZ [0] (GreaterThanU cc) yes no) => (UGT cc yes no) +(TBNZ [0] (GreaterEqual cc) yes no) => (GE cc yes no) (TBNZ [0] (GreaterEqualU cc) yes no) => (UGE cc yes no) -(TBNZ [0] (LessThanF cc) yes no) => (FLT cc yes no) -(TBNZ [0] (LessEqualF cc) yes no) => (FLE cc yes no) -(TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no) +(TBNZ [0] (LessThanF cc) yes no) => (FLT cc yes no) +(TBNZ [0] (LessEqualF cc) yes no) => (FLE cc yes no) +(TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no) (TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no) -(EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TST x y) yes no) -(NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TST x y) yes no) -(LT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TST x y) yes no) -(LE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TST x y) yes no) -(GT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TST x y) yes no) -(GE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TST x y) yes no) - -(EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTconst [c] y) yes no) -(NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTconst [c] y) yes no) -(LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTconst [c] y) yes no) -(LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTconst [c] y) yes no) -(GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTconst [c] y) yes no) -(GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTconst [c] y) yes no) - -(EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TSTW x y) yes no) -(NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TSTW x y) yes no) -(LT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TSTW x y) yes no) -(LE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TSTW x y) yes no) -(GT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TSTW x y) yes no) -(GE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TSTW x y) yes no) - -(EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTWconst [int32(c)] y) yes no) -(NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTWconst [int32(c)] y) yes no) -(LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTWconst [int32(c)] y) yes no) -(LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTWconst [int32(c)] y) yes no) -(GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTWconst [int32(c)] y) yes no) -(GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTWconst [int32(c)] y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TST x y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTconst [c] y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTW x y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTWconst [int32(c)] y) yes no) // For conditional instructions such as CSET, CSEL. -(Equal (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TST x y)) -(NotEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TST x y)) -(LessThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TST x y)) -(LessEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TST x y)) -(GreaterThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TST x y)) -(GreaterEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TST x y)) +((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPconst [0] z:(AND x y))) && z.Uses == 1 => + ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TST x y)) +((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => + ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTWconst [int32(c)] y)) +((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => + ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTW x y)) +((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => + ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTconst [c] y)) -(Equal (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTWconst [int32(c)] y)) -(NotEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTWconst [int32(c)] y)) -(LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTWconst [int32(c)] y)) -(LessEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTWconst [int32(c)] y)) -(GreaterThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTWconst [int32(c)] y)) -(GreaterEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTWconst [int32(c)] y)) - -(Equal (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TSTW x y)) -(NotEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TSTW x y)) -(LessThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TSTW x y)) -(LessEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TSTW x y)) -(GreaterThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TSTW x y)) -(GreaterEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TSTW x y)) - -(Equal (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTconst [c] y)) -(NotEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTconst [c] y)) -(LessThan (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTconst [c] y)) -(LessEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTconst [c] y)) -(GreaterThan (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTconst [c] y)) -(GreaterEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTconst [c] y)) - -(EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNconst [c] y) yes no) -(NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNconst [c] y) yes no) -(LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNconst [c] y) yes no) -(LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNconst [c] y) yes no) -(GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNconst [c] y) yes no) -(GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNconst [c] y) yes no) - -(EQ (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNWconst [int32(c)] y) yes no) -(NE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNWconst [int32(c)] y) yes no) -(LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNWconst [int32(c)] y) yes no) -(LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNWconst [int32(c)] y) yes no) -(GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNWconst [int32(c)] y) yes no) -(GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNWconst [int32(c)] y) yes no) - -(EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no) -(NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no) -(LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMN x y) yes no) -(LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMN x y) yes no) -(GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMN x y) yes no) -(GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMN x y) yes no) - -(EQ (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no) -(NE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no) -(LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMNW x y) yes no) -(LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMNW x y) yes no) -(GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMNW x y) yes no) -(GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMNW x y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNconst [c] y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNWconst [int32(c)] y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN x y) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW x y) yes no) // CMP(x,-y) -> CMN(x,y) is only valid for unordered comparison, if y can be -1<<63 -(EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no) -(NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no) - -(Equal (CMP x z:(NEG y))) && z.Uses == 1 => (Equal (CMN x y)) -(NotEqual (CMP x z:(NEG y))) && z.Uses == 1 => (NotEqual (CMN x y)) +((EQ|NE) (CMP x z:(NEG y)) yes no) && z.Uses == 1 => ((EQ|NE) (CMN x y) yes no) +((Equal|NotEqual) (CMP x z:(NEG y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN x y)) // CMPW(x,-y) -> CMNW(x,y) is only valid for unordered comparison, if y can be -1<<31 -(EQ (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no) -(NE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no) - -(Equal (CMPW x z:(NEG y))) && z.Uses == 1 => (Equal (CMNW x y)) -(NotEqual (CMPW x z:(NEG y))) && z.Uses == 1 => (NotEqual (CMNW x y)) +((EQ|NE) (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => ((EQ|NE) (CMNW x y) yes no) +((Equal|NotEqual) (CMPW x z:(NEG y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW x y)) // For conditional instructions such as CSET, CSEL. // TODO: add support for LT, LE, GT, GE, overflow needs to be considered. -(Equal (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (Equal (CMNconst [c] y)) -(NotEqual (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (NotEqual (CMNconst [c] y)) +((Equal|NotEqual) (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual) (CMNconst [c] y)) +((Equal|NotEqual) (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual) (CMNWconst [int32(c)] y)) +((Equal|NotEqual) (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN x y)) +((Equal|NotEqual) (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW x y)) +((Equal|NotEqual) (CMPconst [0] z:(MADD a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN a (MUL x y))) +((Equal|NotEqual) (CMPconst [0] z:(MSUB a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMP a (MUL x y))) +((Equal|NotEqual) (CMPWconst [0] z:(MADDW a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW a (MULW x y))) +((Equal|NotEqual) (CMPWconst [0] z:(MSUBW a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMPW a (MULW x y))) -(Equal (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (Equal (CMNWconst [int32(c)] y)) -(NotEqual (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (NotEqual (CMNWconst [int32(c)] y)) +((CMPconst|CMNconst) [c] y) && c < 0 && c != -1<<63 => ((CMNconst|CMPconst) [-c] y) +((CMPWconst|CMNWconst) [c] y) && c < 0 && c != -1<<31 => ((CMNWconst|CMPWconst) [-c] y) -(Equal (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => (Equal (CMN x y)) -(NotEqual (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => (NotEqual (CMN x y)) +((EQ|NE) (CMPconst [0] x) yes no) => ((Z|NZ) x yes no) +((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no) -(Equal (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => (Equal (CMNW x y)) -(NotEqual (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => (NotEqual (CMNW x y)) - -(Equal (CMPconst [0] z:(MADD a x y))) && z.Uses==1 => (Equal (CMN a (MUL x y))) -(NotEqual (CMPconst [0] z:(MADD a x y))) && z.Uses==1 => (NotEqual (CMN a (MUL x y))) - -(Equal (CMPconst [0] z:(MSUB a x y))) && z.Uses==1 => (Equal (CMP a (MUL x y))) -(NotEqual (CMPconst [0] z:(MSUB a x y))) && z.Uses==1 => (NotEqual (CMP a (MUL x y))) - -(Equal (CMPWconst [0] z:(MADDW a x y))) && z.Uses==1 => (Equal (CMNW a (MULW x y))) -(NotEqual (CMPWconst [0] z:(MADDW a x y))) && z.Uses==1 => (NotEqual (CMNW a (MULW x y))) - -(Equal (CMPWconst [0] z:(MSUBW a x y))) && z.Uses==1 => (Equal (CMPW a (MULW x y))) -(NotEqual (CMPWconst [0] z:(MSUBW a x y))) && z.Uses==1 => (NotEqual (CMPW a (MULW x y))) - -(CMPconst [c] y) && c < 0 && c != -1<<63 => (CMNconst [-c] y) -(CMPWconst [c] y) && c < 0 && c != -1<<31 => (CMNWconst [-c] y) -(CMNconst [c] y) && c < 0 && c != -1<<63 => (CMPconst [-c] y) -(CMNWconst [c] y) && c < 0 && c != -1<<31 => (CMPWconst [-c] y) - -(EQ (CMPconst [0] x) yes no) => (Z x yes no) -(NE (CMPconst [0] x) yes no) => (NZ x yes no) -(EQ (CMPWconst [0] x) yes no) => (ZW x yes no) -(NE (CMPWconst [0] x) yes no) => (NZW x yes no) - -(EQ (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (EQ (CMN a (MUL x y)) yes no) -(NE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (NE (CMN a (MUL x y)) yes no) -(LT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (LTnoov (CMN a (MUL x y)) yes no) -(LE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (LEnoov (CMN a (MUL x y)) yes no) -(GT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (GTnoov (CMN a (MUL x y)) yes no) -(GE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (GEnoov (CMN a (MUL x y)) yes no) - -(EQ (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (EQ (CMP a (MUL x y)) yes no) -(NE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (NE (CMP a (MUL x y)) yes no) -(LE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (LEnoov (CMP a (MUL x y)) yes no) -(LT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (LTnoov (CMP a (MUL x y)) yes no) -(GE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (GEnoov (CMP a (MUL x y)) yes no) -(GT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (GTnoov (CMP a (MUL x y)) yes no) - -(EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (EQ (CMNW a (MULW x y)) yes no) -(NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (NE (CMNW a (MULW x y)) yes no) -(LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LEnoov (CMNW a (MULW x y)) yes no) -(LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LTnoov (CMNW a (MULW x y)) yes no) -(GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GEnoov (CMNW a (MULW x y)) yes no) -(GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GTnoov (CMNW a (MULW x y)) yes no) - -(EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (EQ (CMPW a (MULW x y)) yes no) -(NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (NE (CMPW a (MULW x y)) yes no) -(LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LEnoov (CMPW a (MULW x y)) yes no) -(LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LTnoov (CMPW a (MULW x y)) yes no) -(GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GEnoov (CMPW a (MULW x y)) yes no) -(GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GTnoov (CMPW a (MULW x y)) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN a (MUL x y)) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP a (MUL x y)) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW x y)) yes no) +((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMPW a (MULW x y)) yes no) // Absorb bit-tests into block -(Z (ANDconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) -(NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) -(ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) -(NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) -(EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) -(NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) -(EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) -(NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) +(Z (ANDconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) +(NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) +(ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) +(NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) +(EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no) +(NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no) +(EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no) +(NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no) // Test sign-bit for signed comparisons against zero (GE (CMPWconst [0] x) yes no) => (TBZ [31] x yes no) -(GE (CMPconst [0] x) yes no) => (TBZ [63] x yes no) -(LT (CMPWconst [0] x) yes no) => (TBNZ [31] x yes no) -(LT (CMPconst [0] x) yes no) => (TBNZ [63] x yes no) +(GE (CMPconst [0] x) yes no) => (TBZ [63] x yes no) +(LT (CMPWconst [0] x) yes no) => (TBNZ [31] x yes no) +(LT (CMPconst [0] x) yes no) => (TBNZ [63] x yes no) // fold offset into address (ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) => @@ -882,6 +736,7 @@ (MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBloadidx ptr idx mem) (FMOVSload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx ptr idx mem) (FMOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx ptr idx mem) + (MOVDloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDload [int32(c)] ptr mem) (MOVDloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVDload [int32(c)] ptr mem) (MOVWUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem) @@ -972,20 +827,20 @@ (MOVQstorezero [off1+int32(off2)] {sym} ptr mem) // register indexed store -(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem) -(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem) -(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem) -(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem) +(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem) +(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem) +(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem) +(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem) (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx ptr idx val mem) (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx ptr idx val mem) -(MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem) -(MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem) -(MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem) -(MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem) -(MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem) -(MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem) -(MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem) -(MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem) +(MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem) +(MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem) +(MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem) +(MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem) +(MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem) +(MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem) +(MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem) +(MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem) (FMOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVDstore [int32(c)] ptr val mem) (FMOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVDstore [int32(c)] idx val mem) (FMOVSstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVSstore [int32(c)] ptr val mem) @@ -995,14 +850,14 @@ (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx8 ptr idx val mem) (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx4 ptr idx val mem) (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx2 ptr idx val mem) -(MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem) -(MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem) -(MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem) -(MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem) -(MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem) -(MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem) -(MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) -(MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) +(MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem) +(MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem) +(MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem) +(MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem) +(MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem) +(MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem) +(MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) +(MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem) (MOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (MOVDstore [int32(c)<<3] ptr val mem) (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem) (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem) @@ -1138,11 +993,11 @@ (MOVDstorezeroidx ptr (SLLconst [3] idx) mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) => (MOVHstorezeroidx2 ptr idx mem) -(MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem) +(MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVDstorezeroidx (SLLconst [3] idx) ptr mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem) -(MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem) +(MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem) (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx8 ptr idx mem) (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx4 ptr idx mem) (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx2 ptr idx mem) @@ -1152,24 +1007,24 @@ // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // these seem to have bad interaction with other rules, resulting in slower code -//(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x) -//(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x) -//(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x) -//(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x) -//(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x) -//(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x) -//(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x -//(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y +//(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x) +//(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x) +//(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x) +//(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x) +//(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x) +//(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x) +//(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x //(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x //(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x +//(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y -(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) +(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) -(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) +(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) -(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) +(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) -(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) +(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0]) (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) @@ -1186,97 +1041,97 @@ (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _)) && (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0]) -(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) +(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) -(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) +(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) -(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) +(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0]) // don't extend after proper load -(MOVBreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVBreg x:(MOVBload _ _)) => (MOVDreg x) (MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x) -(MOVHreg x:(MOVBload _ _)) => (MOVDreg x) -(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x) -(MOVHreg x:(MOVHload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVHload _ _)) => (MOVDreg x) (MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVBload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x) -(MOVWreg x:(MOVWload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVWload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x) (MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x) -(MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) -(MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) -(MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) -(MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x) -(MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) -(MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) -(MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x) -(MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) +(MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) +(MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) +(MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x) +(MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) (MOVHUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) -(MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) +(MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x) (MOVWUreg x:(MOVWUloadidx4 _ _ _)) => (MOVDreg x) // fold double extensions -(MOVBreg x:(MOVBreg _)) => (MOVDreg x) +(MOVBreg x:(MOVBreg _)) => (MOVDreg x) (MOVBUreg x:(MOVBUreg _)) => (MOVDreg x) -(MOVHreg x:(MOVBreg _)) => (MOVDreg x) -(MOVHreg x:(MOVBUreg _)) => (MOVDreg x) -(MOVHreg x:(MOVHreg _)) => (MOVDreg x) +(MOVHreg x:(MOVBreg _)) => (MOVDreg x) +(MOVHreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVHreg x:(MOVHreg _)) => (MOVDreg x) (MOVHUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVHUreg x:(MOVHUreg _)) => (MOVDreg x) -(MOVWreg x:(MOVBreg _)) => (MOVDreg x) -(MOVWreg x:(MOVBUreg _)) => (MOVDreg x) -(MOVWreg x:(MOVHreg _)) => (MOVDreg x) -(MOVWreg x:(MOVWreg _)) => (MOVDreg x) +(MOVWreg x:(MOVBreg _)) => (MOVDreg x) +(MOVWreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVWreg x:(MOVHreg _)) => (MOVDreg x) +(MOVWreg x:(MOVWreg _)) => (MOVDreg x) (MOVWUreg x:(MOVBUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVHUreg _)) => (MOVDreg x) (MOVWUreg x:(MOVWUreg _)) => (MOVDreg x) // don't extend before store -(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) -(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) -(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) -(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) -(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) -(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) +(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem) -(MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem) -(MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem) -(MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem) -(MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem) -(MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem) -(MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem) -(MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem) -(MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem) +(MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem) +(MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem) +(MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem) +(MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem) +(MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem) (MOVHstoreidx2 ptr idx (MOVHUreg x) mem) => (MOVHstoreidx2 ptr idx x mem) -(MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem) +(MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem) (MOVHstoreidx2 ptr idx (MOVWUreg x) mem) => (MOVHstoreidx2 ptr idx x mem) -(MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem) +(MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem) (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) => (MOVWstoreidx4 ptr idx x mem) // if a register move has only 1 use, just use the same register without emitting instruction @@ -1288,18 +1143,18 @@ (MOVDnop (MOVDconst [c])) => (MOVDconst [c]) // fold constant into arithmetic ops -(ADD x (MOVDconst [c])) && !t.IsPtr() => (ADDconst [c] x) -(SUB x (MOVDconst [c])) => (SUBconst [c] x) -(AND x (MOVDconst [c])) => (ANDconst [c] x) -(OR x (MOVDconst [c])) => (ORconst [c] x) -(XOR x (MOVDconst [c])) => (XORconst [c] x) -(TST x (MOVDconst [c])) => (TSTconst [c] x) +(ADD x (MOVDconst [c])) && !t.IsPtr() => (ADDconst [c] x) +(SUB x (MOVDconst [c])) => (SUBconst [c] x) +(AND x (MOVDconst [c])) => (ANDconst [c] x) +(OR x (MOVDconst [c])) => (ORconst [c] x) +(XOR x (MOVDconst [c])) => (XORconst [c] x) +(TST x (MOVDconst [c])) => (TSTconst [c] x) (TSTW x (MOVDconst [c])) => (TSTWconst [int32(c)] x) -(CMN x (MOVDconst [c])) => (CMNconst [c] x) +(CMN x (MOVDconst [c])) => (CMNconst [c] x) (CMNW x (MOVDconst [c])) => (CMNWconst [int32(c)] x) -(BIC x (MOVDconst [c])) => (ANDconst [^c] x) -(EON x (MOVDconst [c])) => (XORconst [^c] x) -(ORN x (MOVDconst [c])) => (ORconst [^c] x) +(BIC x (MOVDconst [c])) => (ANDconst [^c] x) +(EON x (MOVDconst [c])) => (XORconst [^c] x) +(ORN x (MOVDconst [c])) => (ORconst [^c] x) (SLL x (MOVDconst [c])) => (SLLconst x [c&63]) (SRL x (MOVDconst [c])) => (SRLconst x [c&63]) @@ -1308,12 +1163,12 @@ (SRL x (ANDconst [63] y)) => (SRL x y) (SRA x (ANDconst [63] y)) => (SRA x y) -(CMP x (MOVDconst [c])) => (CMPconst [c] x) -(CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x)) +(CMP x (MOVDconst [c])) => (CMPconst [c] x) +(CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x)) (CMPW x (MOVDconst [c])) => (CMPWconst [int32(c)] x) (CMPW (MOVDconst [c]) x) => (InvertFlags (CMPWconst [int32(c)] x)) -(ROR x (MOVDconst [c])) => (RORconst x [c&63]) +(ROR x (MOVDconst [c])) => (RORconst x [c&63]) (RORW x (MOVDconst [c])) => (RORWconst x [c&31]) (ADDSflags x (MOVDconst [c])) => (ADDSconstflags [c] x) @@ -1324,10 +1179,10 @@ ((CMP|CMPW) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW) y x)) // mul-neg => mneg -(NEG (MUL x y)) => (MNEG x y) -(NEG (MULW x y)) => (MNEGW x y) -(MUL (NEG x) y) => (MNEG x y) -(MULW (NEG x) y) => (MNEGW x y) +(NEG (MUL x y)) => (MNEG x y) +(NEG (MULW x y)) => (MNEGW x y) +(MUL (NEG x) y) => (MNEG x y) +(MULW (NEG x) y) => (MNEGW x y) // madd/msub (ADD a l:(MUL x y)) && l.Uses==1 && clobber(l) => (MADD a x y) @@ -1483,12 +1338,12 @@ (MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [3]) [log64(c/9)]) // div by constant -(UDIV x (MOVDconst [1])) => x -(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x) +(UDIV x (MOVDconst [1])) => x +(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x) (UDIVW x (MOVDconst [c])) && uint32(c)==1 => x (UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log64(c)] x) -(UMOD _ (MOVDconst [1])) => (MOVDconst [0]) -(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x) +(UMOD _ (MOVDconst [1])) => (MOVDconst [0]) +(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x) (UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0]) (UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x) @@ -1592,12 +1447,12 @@ (CMNWconst (MOVDconst [x]) [y]) => (FlagConstant [addFlags32(int32(x),y)]) // other known comparisons -(CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) -(CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)]) -(CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)]) -(CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)]) -(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1< (FlagConstant [subFlags64(0,1)]) -(CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) +(CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) +(CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)]) +(CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)]) +(CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)]) +(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1< (FlagConstant [subFlags64(0,1)]) +(CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)]) (CMPWconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)]) // absorb flag constants into branches @@ -1643,26 +1498,26 @@ (GEnoov (FlagConstant [fc]) yes no) && fc.geNoov() => (First yes no) (GEnoov (FlagConstant [fc]) yes no) && !fc.geNoov() => (First no yes) -(Z (MOVDconst [0]) yes no) => (First yes no) -(Z (MOVDconst [c]) yes no) && c != 0 => (First no yes) -(NZ (MOVDconst [0]) yes no) => (First no yes) -(NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no) -(ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no) -(ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes) +(Z (MOVDconst [0]) yes no) => (First yes no) +(Z (MOVDconst [c]) yes no) && c != 0 => (First no yes) +(NZ (MOVDconst [0]) yes no) => (First no yes) +(NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no) +(ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no) +(ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes) (NZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First no yes) (NZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First yes no) // absorb InvertFlags into branches -(LT (InvertFlags cmp) yes no) => (GT cmp yes no) -(GT (InvertFlags cmp) yes no) => (LT cmp yes no) -(LE (InvertFlags cmp) yes no) => (GE cmp yes no) -(GE (InvertFlags cmp) yes no) => (LE cmp yes no) +(LT (InvertFlags cmp) yes no) => (GT cmp yes no) +(GT (InvertFlags cmp) yes no) => (LT cmp yes no) +(LE (InvertFlags cmp) yes no) => (GE cmp yes no) +(GE (InvertFlags cmp) yes no) => (LE cmp yes no) (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no) (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no) (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no) (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no) -(EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) -(NE (InvertFlags cmp) yes no) => (NE cmp yes no) +(EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) +(NE (InvertFlags cmp) yes no) => (NE cmp yes no) (FLT (InvertFlags cmp) yes no) => (FGT cmp yes no) (FGT (InvertFlags cmp) yes no) => (FLT cmp yes no) (FLE (InvertFlags cmp) yes no) => (FGE cmp yes no) @@ -1673,39 +1528,39 @@ (GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no) // absorb InvertFlags into conditional instructions -(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp) -(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp) -(CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp) +(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp) +(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp) +(CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp) (CSINC [cc] x y (InvertFlags cmp)) => (CSINC [arm64Invert(cc)] x y cmp) (CSINV [cc] x y (InvertFlags cmp)) => (CSINV [arm64Invert(cc)] x y cmp) (CSNEG [cc] x y (InvertFlags cmp)) => (CSNEG [arm64Invert(cc)] x y cmp) // absorb flag constants into boolean values -(Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())]) -(NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())]) -(LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())]) -(LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())]) -(LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())]) -(LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())]) -(GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())]) -(GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())]) -(GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())]) +(Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())]) +(NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())]) +(LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())]) +(LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())]) +(LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())]) +(LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())]) +(GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())]) +(GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())]) +(GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())]) (GreaterEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.uge())]) // absorb InvertFlags into boolean values -(Equal (InvertFlags x)) => (Equal x) -(NotEqual (InvertFlags x)) => (NotEqual x) -(LessThan (InvertFlags x)) => (GreaterThan x) -(LessThanU (InvertFlags x)) => (GreaterThanU x) -(GreaterThan (InvertFlags x)) => (LessThan x) -(GreaterThanU (InvertFlags x)) => (LessThanU x) -(LessEqual (InvertFlags x)) => (GreaterEqual x) -(LessEqualU (InvertFlags x)) => (GreaterEqualU x) -(GreaterEqual (InvertFlags x)) => (LessEqual x) +(Equal (InvertFlags x)) => (Equal x) +(NotEqual (InvertFlags x)) => (NotEqual x) +(LessThan (InvertFlags x)) => (GreaterThan x) +(LessThanU (InvertFlags x)) => (GreaterThanU x) +(GreaterThan (InvertFlags x)) => (LessThan x) +(GreaterThanU (InvertFlags x)) => (LessThanU x) +(LessEqual (InvertFlags x)) => (GreaterEqual x) +(LessEqualU (InvertFlags x)) => (GreaterEqualU x) +(GreaterEqual (InvertFlags x)) => (LessEqual x) (GreaterEqualU (InvertFlags x)) => (LessEqualU x) -(LessThanF (InvertFlags x)) => (GreaterThanF x) -(LessEqualF (InvertFlags x)) => (GreaterEqualF x) -(GreaterThanF (InvertFlags x)) => (LessThanF x) +(LessThanF (InvertFlags x)) => (GreaterThanF x) +(LessEqualF (InvertFlags x)) => (GreaterEqualF x) +(GreaterThanF (InvertFlags x)) => (LessThanF x) (GreaterEqualF (InvertFlags x)) => (LessEqualF x) // Boolean-generating instructions (NOTE: NOT all boolean Values) always @@ -1713,28 +1568,26 @@ (MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => (MOVDreg x) // omit unsign extension - (MOVWUreg x) && zeroUpper32Bits(x, 3) => x // omit sign extension - (MOVWreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffff80000000) == 0 => (ANDconst x [c]) (MOVHreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffff8000) == 0 => (ANDconst x [c]) (MOVBreg (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffffff80) == 0 => (ANDconst x [c]) // absorb flag constants into conditional instructions -(CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x -(CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y -(CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x -(CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) +(CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x +(CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y +(CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x +(CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) (CSNEG [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSNEG [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (NEG y) (CSINV [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSINV [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (Not y) (CSINC [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x (CSINC [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (ADDconst [1] y) -(CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1]) -(CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) +(CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1]) +(CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0]) // absorb flags back into boolean CSEL (CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil => @@ -1960,11 +1813,11 @@ // int64(x << lc) (MOVWreg (SLLconst [lc] x)) && lc < 32 => (SBFIZ [armBFAuxInt(lc, 32-lc)] x) (MOVHreg (SLLconst [lc] x)) && lc < 16 => (SBFIZ [armBFAuxInt(lc, 16-lc)] x) -(MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x) +(MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x) // int64(x) << lc (SLLconst [lc] (MOVWreg x)) => (SBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x) (SLLconst [lc] (MOVHreg x)) => (SBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x) -(SLLconst [lc] (MOVBreg x)) => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) +(SLLconst [lc] (MOVBreg x)) => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) // sbfx // (x << lc) >> rc @@ -1972,7 +1825,7 @@ // int64(x) >> rc (SRAconst [rc] (MOVWreg x)) && rc < 32 => (SBFX [armBFAuxInt(rc, 32-rc)] x) (SRAconst [rc] (MOVHreg x)) && rc < 16 => (SBFX [armBFAuxInt(rc, 16-rc)] x) -(SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x) +(SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x) // merge sbfx and sign-extension into sbfx (MOVWreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (SBFX [bfc] x) (MOVHreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (SBFX [bfc] x) @@ -1991,11 +1844,11 @@ // uint64(x) << lc (SLLconst [lc] (MOVWUreg x)) => (UBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x) (SLLconst [lc] (MOVHUreg x)) => (UBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x) -(SLLconst [lc] (MOVBUreg x)) => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) +(SLLconst [lc] (MOVBUreg x)) => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x) // uint64(x << lc) (MOVWUreg (SLLconst [lc] x)) && lc < 32 => (UBFIZ [armBFAuxInt(lc, 32-lc)] x) (MOVHUreg (SLLconst [lc] x)) && lc < 16 => (UBFIZ [armBFAuxInt(lc, 16-lc)] x) -(MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x) +(MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x) // merge ANDconst into ubfiz // (x & ac) << sc @@ -2011,11 +1864,11 @@ // uint64(x) >> rc (SRLconst [rc] (MOVWUreg x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32-rc)] x) (SRLconst [rc] (MOVHUreg x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16-rc)] x) -(SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x) +(SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x) // uint64(x >> rc) (MOVWUreg (SRLconst [rc] x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32)] x) (MOVHUreg (SRLconst [rc] x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16)] x) -(MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x) +(MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x) // merge ANDconst into ubfx // (x >> sc) & ac (ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0) @@ -2820,7 +2673,7 @@ && clobber(x) => (MOVDstoreidx ptr1 (SLLconst [2] idx1) w0 mem) (MOVBstore [i] {s} ptr w - x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) + x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w) @@ -2837,7 +2690,7 @@ && clobber(x0, x1, x2, x3, x4, x5, x6) => (MOVDstore [i-7] {s} ptr (REV w) mem) (MOVBstore [7] {s} p w - x0:(MOVBstore [6] {s} p (SRLconst [8] w) + x0:(MOVBstore [6] {s} p (SRLconst [8] w) x1:(MOVBstore [5] {s} p (SRLconst [16] w) x2:(MOVBstore [4] {s} p (SRLconst [24] w) x3:(MOVBstore [3] {s} p (SRLconst [32] w) @@ -2857,9 +2710,9 @@ && clobber(x0, x1, x2, x3, x4, x5, x6) => (MOVDstoreidx ptr0 idx0 (REV w) mem) (MOVBstore [i] {s} ptr w - x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) + x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w) - x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem)))) + x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem)))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 @@ -2878,7 +2731,7 @@ && clobber(x0, x1, x2) => (MOVWstoreidx ptr0 idx0 (REVW w) mem) (MOVBstoreidx ptr (ADDconst [3] idx) w - x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w) + x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w) x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem)))) && x0.Uses == 1 @@ -2887,16 +2740,16 @@ && clobber(x0, x1, x2) => (MOVWstoreidx ptr idx (REVW w) mem) (MOVBstoreidx ptr idx w - x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w) + x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w) x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w) - x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem)))) + x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem)))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2) => (MOVWstoreidx ptr idx w mem) (MOVBstore [i] {s} ptr w - x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) + x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem)))) && x0.Uses == 1 @@ -2917,7 +2770,7 @@ && clobber(x0, x1, x2) => (MOVWstoreidx ptr0 idx0 (REVW w) mem) (MOVBstore [i] {s} ptr w - x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) + x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem)))) && x0.Uses == 1 @@ -2987,25 +2840,25 @@ => (MOVHstoreidx ptr0 idx0 (REV16W w) mem) // FP simplification -(FNEGS (FMULS x y)) => (FNMULS x y) -(FNEGD (FMULD x y)) => (FNMULD x y) -(FMULS (FNEGS x) y) => (FNMULS x y) -(FMULD (FNEGD x) y) => (FNMULD x y) -(FNEGS (FNMULS x y)) => (FMULS x y) -(FNEGD (FNMULD x y)) => (FMULD x y) -(FNMULS (FNEGS x) y) => (FMULS x y) -(FNMULD (FNEGD x) y) => (FMULD x y) +(FNEGS (FMULS x y)) => (FNMULS x y) +(FNEGD (FMULD x y)) => (FNMULD x y) +(FMULS (FNEGS x) y) => (FNMULS x y) +(FMULD (FNEGD x) y) => (FNMULD x y) +(FNEGS (FNMULS x y)) => (FMULS x y) +(FNEGD (FNMULD x y)) => (FMULD x y) +(FNMULS (FNEGS x) y) => (FMULS x y) +(FNMULD (FNEGD x) y) => (FMULD x y) -(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) -(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) -(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) -(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) -(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y) -(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y) -(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) -(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) -(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) -(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) +(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) +(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) +(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) +(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) +(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y) +(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y) +(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) +(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) +(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) +(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) (FSUBS (FNMULS x y) a) && a.Block.Func.useFMA(v) => (FNMADDS a x y) (FSUBD (FNMULD x y) a) && a.Block.Func.useFMA(v) => (FNMADDD a x y) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 41d9513160e..b8c3c2c318f 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -4219,7 +4219,7 @@ func rewriteValueARM64_OpARM64Equal(v *Value) bool { return true } // match: (Equal (CMPconst [0] z:(MADD a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (Equal (CMN a (MUL x y))) for { if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 { @@ -4244,7 +4244,7 @@ func rewriteValueARM64_OpARM64Equal(v *Value) bool { return true } // match: (Equal (CMPconst [0] z:(MSUB a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (Equal (CMP a (MUL x y))) for { if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 { @@ -4269,7 +4269,7 @@ func rewriteValueARM64_OpARM64Equal(v *Value) bool { return true } // match: (Equal (CMPWconst [0] z:(MADDW a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (Equal (CMNW a (MULW x y))) for { if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 { @@ -4294,7 +4294,7 @@ func rewriteValueARM64_OpARM64Equal(v *Value) bool { return true } // match: (Equal (CMPWconst [0] z:(MSUBW a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (Equal (CMPW a (MULW x y))) for { if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 { @@ -17021,7 +17021,7 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value) bool { return true } // match: (NotEqual (CMPconst [0] z:(MADD a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (NotEqual (CMN a (MUL x y))) for { if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 { @@ -17046,7 +17046,7 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value) bool { return true } // match: (NotEqual (CMPconst [0] z:(MSUB a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (NotEqual (CMP a (MUL x y))) for { if v_0.Op != OpARM64CMPconst || auxIntToInt64(v_0.AuxInt) != 0 { @@ -17071,7 +17071,7 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value) bool { return true } // match: (NotEqual (CMPWconst [0] z:(MADDW a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (NotEqual (CMNW a (MULW x y))) for { if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 { @@ -17096,7 +17096,7 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value) bool { return true } // match: (NotEqual (CMPWconst [0] z:(MSUBW a x y))) - // cond: z.Uses==1 + // cond: z.Uses == 1 // result: (NotEqual (CMPW a (MULW x y))) for { if v_0.Op != OpARM64CMPWconst || auxIntToInt32(v_0.AuxInt) != 0 {