diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index 200eedf0fbd..e09e41c5363 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -492,6 +492,9 @@ (CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))]) (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))])) +// Canonicalize the order of arguments to comparisons - helps with CSE. +(CMP(L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(L|W|B) y x)) + // strength reduction // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: // 1 - addl, shll, leal, negl, subl diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 491d6795b44..4b48526db6e 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -896,6 +896,9 @@ (CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))]) (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))])) +// Canonicalize the order of arguments to comparisons - helps with CSE. +(CMP(Q|L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(Q|L|W|B) y x)) + // Using MOVZX instead of AND is cheaper. (AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x) (AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x) diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules index d1244f8ceeb..361eb4f4f94 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM.rules @@ -522,6 +522,9 @@ (TST x (MOVWconst [c])) -> (TSTconst [c] x) (TEQ x (MOVWconst [c])) -> (TEQconst [c] x) +// Canonicalize the order of arguments to comparisons - helps with CSE. +(CMP x y) && x.ID > y.ID -> (InvertFlags (CMP y x)) + // don't extend after proper load // MOVWreg instruction is not emitted if src and dst registers are same, but it ensures the type. (MOVBreg x:(MOVBload _ _)) -> (MOVWreg x) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index b4c0565ca24..498e26a0bc4 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -1152,6 +1152,9 @@ (CMPW x (MOVDconst [c])) -> (CMPWconst [int64(int32(c))] x) (CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst [int64(int32(c))] x)) +// Canonicalize the order of arguments to comparisons - helps with CSE. +((CMP|CMPW) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW) y x)) + // mul-neg -> mneg (NEG (MUL x y)) -> (MNEG x y) (NEG (MULW x y)) -> (MNEGW x y) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index e03712b1183..1d511a8278f 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -1026,6 +1026,9 @@ (CMPWU x (MOVDconst [c])) && isU16Bit(c) -> (CMPWUconst x [c]) (CMPWU (MOVDconst [c]) y) && isU16Bit(c) -> (InvertFlags (CMPWUconst y [c])) +// Canonicalize the order of arguments to comparisons - helps with CSE. +((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x)) + // ISEL auxInt values 0=LT 1=GT 2=EQ arg2 ? arg0 : arg1 // ISEL auxInt values 4=GE 5=LE 6=NE arg2 ? arg1 : arg0 // ISELB special case where arg0, arg1 values are 0, 1 diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules index 7761ee3b1eb..2813c33fd0d 100644 --- a/src/cmd/compile/internal/ssa/gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/gen/S390X.rules @@ -700,6 +700,9 @@ (CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))]) (CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))])) +// Canonicalize the order of arguments to comparisons - helps with CSE. +((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x)) + // Using MOV{W,H,B}Zreg instead of AND is cheaper. (AND x (MOVDconst [0xFF])) -> (MOVBZreg x) (AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x) diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index fe92db2bf77..8c2d1f8a812 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -2615,6 +2615,22 @@ func rewriteValue386_Op386CMPB(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPB x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPB y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(Op386InvertFlags) + v0 := b.NewValue0(v.Pos, Op386CMPB, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x) // cond: canMergeLoad(v, l) && clobber(l) // result: (CMPBload {sym} [off] ptr x mem) @@ -2902,6 +2918,22 @@ func rewriteValue386_Op386CMPL(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPL x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPL y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(Op386InvertFlags) + v0 := b.NewValue0(v.Pos, Op386CMPL, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x) // cond: canMergeLoad(v, l) && clobber(l) // result: (CMPLload {sym} [off] ptr x mem) @@ -3204,6 +3236,22 @@ func rewriteValue386_Op386CMPW(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPW x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPW y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(Op386InvertFlags) + v0 := b.NewValue0(v.Pos, Op386CMPW, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x) // cond: canMergeLoad(v, l) && clobber(l) // result: (CMPWload {sym} [off] ptr x mem) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 40e7091fe18..cc629f199eb 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -7334,6 +7334,22 @@ func rewriteValueAMD64_OpAMD64CMPB(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPB x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPB y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPB l:(MOVBload {sym} [off] ptr mem) x) // cond: canMergeLoad(v, l) && clobber(l) // result: (CMPBload {sym} [off] ptr x mem) @@ -7704,6 +7720,22 @@ func rewriteValueAMD64_OpAMD64CMPL(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPL x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPL y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPL l:(MOVLload {sym} [off] ptr mem) x) // cond: canMergeLoad(v, l) && clobber(l) // result: (CMPLload {sym} [off] ptr x mem) @@ -8097,6 +8129,22 @@ func rewriteValueAMD64_OpAMD64CMPQ(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPQ x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPQ y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x) // cond: canMergeLoad(v, l) && clobber(l) // result: (CMPQload {sym} [off] ptr x mem) @@ -8564,6 +8612,22 @@ func rewriteValueAMD64_OpAMD64CMPW(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPW x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPW y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpAMD64InvertFlags) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPW l:(MOVWload {sym} [off] ptr mem) x) // cond: canMergeLoad(v, l) && clobber(l) // result: (CMPWload {sym} [off] ptr x mem) diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go index c74a5602f0b..4b68b4cc4bc 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM.go +++ b/src/cmd/compile/internal/ssa/rewriteARM.go @@ -4034,6 +4034,22 @@ func rewriteValueARM_OpARMCMP(v *Value) bool { v.AddArg(v0) return true } + // match: (CMP x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMP y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpARMInvertFlags) + v0 := b.NewValue0(v.Pos, OpARMCMP, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMP x (SLLconst [c] y)) // result: (CMPshiftLL x y [c]) for { diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 6af28192255..10b0e68f58d 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -2957,6 +2957,22 @@ func rewriteValueARM64_OpARM64CMP(v *Value) bool { v.AddArg(v0) return true } + // match: (CMP x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMP y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpARM64InvertFlags) + v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMP x0 x1:(SLLconst [c] y)) // cond: clobberIfDead(x1) // result: (CMPshiftLL x0 y [c]) @@ -3117,6 +3133,22 @@ func rewriteValueARM64_OpARM64CMPW(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPW x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPW y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpARM64InvertFlags) + v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } return false } func rewriteValueARM64_OpARM64CMPWconst(v *Value) bool { diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 9f62e0d3ba5..d5af441e671 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -4941,6 +4941,22 @@ func rewriteValuePPC64_OpPPC64CMP(v *Value) bool { v.AddArg(v0) return true } + // match: (CMP x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMP y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpPPC64InvertFlags) + v0 := b.NewValue0(v.Pos, OpPPC64CMP, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } return false } func rewriteValuePPC64_OpPPC64CMPU(v *Value) bool { @@ -4983,6 +4999,22 @@ func rewriteValuePPC64_OpPPC64CMPU(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPU x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPU y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpPPC64InvertFlags) + v0 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } return false } func rewriteValuePPC64_OpPPC64CMPUconst(v *Value) bool { @@ -5100,6 +5132,22 @@ func rewriteValuePPC64_OpPPC64CMPW(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPW x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPW y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpPPC64InvertFlags) + v0 := b.NewValue0(v.Pos, OpPPC64CMPW, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } return false } func rewriteValuePPC64_OpPPC64CMPWU(v *Value) bool { @@ -5168,6 +5216,22 @@ func rewriteValuePPC64_OpPPC64CMPWU(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPWU x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPWU y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpPPC64InvertFlags) + v0 := b.NewValue0(v.Pos, OpPPC64CMPWU, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } return false } func rewriteValuePPC64_OpPPC64CMPWUconst(v *Value) bool { diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go index d9262305e86..fe705fee8e3 100644 --- a/src/cmd/compile/internal/ssa/rewriteS390X.go +++ b/src/cmd/compile/internal/ssa/rewriteS390X.go @@ -7234,6 +7234,22 @@ func rewriteValueS390X_OpS390XCMP(v *Value) bool { v.AddArg(v0) return true } + // match: (CMP x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMP y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpS390XInvertFlags) + v0 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } return false } func rewriteValueS390X_OpS390XCMPU(v *Value) bool { @@ -7276,6 +7292,22 @@ func rewriteValueS390X_OpS390XCMPU(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPU x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPU y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpS390XInvertFlags) + v0 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } return false } func rewriteValueS390X_OpS390XCMPUconst(v *Value) bool { @@ -7481,6 +7513,22 @@ func rewriteValueS390X_OpS390XCMPW(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPW x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPW y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpS390XInvertFlags) + v0 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPW x (MOVWreg y)) // result: (CMPW x y) for { @@ -7567,6 +7615,22 @@ func rewriteValueS390X_OpS390XCMPWU(v *Value) bool { v.AddArg(v0) return true } + // match: (CMPWU x y) + // cond: x.ID > y.ID + // result: (InvertFlags (CMPWU y x)) + for { + x := v_0 + y := v_1 + if !(x.ID > y.ID) { + break + } + v.reset(OpS390XInvertFlags) + v0 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags) + v0.AddArg(y) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (CMPWU x (MOVWreg y)) // result: (CMPWU x y) for { diff --git a/test/codegen/compare_and_branch.go b/test/codegen/compare_and_branch.go index 33d8d7bd524..23e7810b31d 100644 --- a/test/codegen/compare_and_branch.go +++ b/test/codegen/compare_and_branch.go @@ -11,7 +11,7 @@ func dummy() {} // Signed 64-bit compare-and-branch. func si64(x, y chan int64) { - // s390x:"CGRJ\t[$]4, R[0-9]+, R[0-9]+, " + // s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, " for <-x < <-y { dummy() } @@ -47,7 +47,7 @@ func si64x8() { // Unsigned 64-bit compare-and-branch. func ui64(x, y chan uint64) { - // s390x:"CLGRJ\t[$]2, R[0-9]+, R[0-9]+, " + // s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, " for <-x > <-y { dummy() } @@ -83,7 +83,7 @@ func ui64x8() { // Signed 32-bit compare-and-branch. func si32(x, y chan int32) { - // s390x:"CRJ\t[$]4, R[0-9]+, R[0-9]+, " + // s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, " for <-x < <-y { dummy() } @@ -119,7 +119,7 @@ func si32x8() { // Unsigned 32-bit compare-and-branch. func ui32(x, y chan uint32) { - // s390x:"CLRJ\t[$]2, R[0-9]+, R[0-9]+, " + // s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, " for <-x > <-y { dummy() } diff --git a/test/codegen/condmove.go b/test/codegen/condmove.go index bd3fe59427e..00118d1b63d 100644 --- a/test/codegen/condmove.go +++ b/test/codegen/condmove.go @@ -32,7 +32,7 @@ func cmovuintptr(x, y uintptr) uintptr { x = -y } // amd64:"CMOVQCS" - // arm64:"CSEL\tLO" + // arm64:"CSEL\t(LO|HI)" // wasm:"Select" return x } @@ -42,7 +42,7 @@ func cmov32bit(x, y uint32) uint32 { x = -y } // amd64:"CMOVLCS" - // arm64:"CSEL\tLO" + // arm64:"CSEL\t(LO|HI)" // wasm:"Select" return x } @@ -52,7 +52,7 @@ func cmov16bit(x, y uint16) uint16 { x = -y } // amd64:"CMOVWCS" - // arm64:"CSEL\tLO" + // arm64:"CSEL\t(LO|HI)" // wasm:"Select" return x }