1
0
mirror of https://github.com/golang/go synced 2024-11-26 17:46:57 -07:00

cmd/compile: canonicalize comparison argument order

Ensure that any comparison between two values has the same argument
order. This helps ensure that they can be eliminated during the
lowered CSE pass which will be particularly important if we eliminate
the Greater and Geq ops (see #37316).

Example:

  CMP R0, R1
  BLT L1
  CMP R1, R0 // different order, cannot eliminate
  BEQ L2

  CMP R0, R1
  BLT L1
  CMP R0, R1 // same order, can eliminate
  BEQ L2

This does have some drawbacks. Notably comparisons might 'flip'
direction in the assembly output after even small changes to the
code or compiler. It should help make optimizations more reliable
however.

compilecmp master -> HEAD
master (218f4572f5): text/template: make reflect.Value indirections more robust
HEAD (f1661fef3e): cmd/compile: canonicalize comparison argument order
platform: linux/amd64

file      before    after     Δ       %
api       6063927   6068023   +4096   +0.068%
asm       5191757   5183565   -8192   -0.158%
cgo       4893518   4901710   +8192   +0.167%
cover     5330345   5326249   -4096   -0.077%
fix       3417778   3421874   +4096   +0.120%
pprof     14889456  14885360  -4096   -0.028%
test2json 2848138   2844042   -4096   -0.144%
trace     11746239  11733951  -12288  -0.105%
total     132739173 132722789 -16384  -0.012%

Change-Id: I11736b3fe2a4553f6fc65018f475e88217fa22f9
Reviewed-on: https://go-review.googlesource.com/c/go/+/220425
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Michael Munday 2020-02-23 22:08:24 +00:00
parent e6d7326fb6
commit 44fe355694
14 changed files with 313 additions and 7 deletions

View File

@ -492,6 +492,9 @@
(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
// Canonicalize the order of arguments to comparisons - helps with CSE.
(CMP(L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(L|W|B) y x))
// strength reduction
// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
// 1 - addl, shll, leal, negl, subl

View File

@ -896,6 +896,9 @@
(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
// Canonicalize the order of arguments to comparisons - helps with CSE.
(CMP(Q|L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(Q|L|W|B) y x))
// Using MOVZX instead of AND is cheaper.
(AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x)
(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x)

View File

@ -522,6 +522,9 @@
(TST x (MOVWconst [c])) -> (TSTconst [c] x)
(TEQ x (MOVWconst [c])) -> (TEQconst [c] x)
// Canonicalize the order of arguments to comparisons - helps with CSE.
(CMP x y) && x.ID > y.ID -> (InvertFlags (CMP y x))
// don't extend after proper load
// MOVWreg instruction is not emitted if src and dst registers are same, but it ensures the type.
(MOVBreg x:(MOVBload _ _)) -> (MOVWreg x)

View File

@ -1152,6 +1152,9 @@
(CMPW x (MOVDconst [c])) -> (CMPWconst [int64(int32(c))] x)
(CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst [int64(int32(c))] x))
// Canonicalize the order of arguments to comparisons - helps with CSE.
((CMP|CMPW) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW) y x))
// mul-neg -> mneg
(NEG (MUL x y)) -> (MNEG x y)
(NEG (MULW x y)) -> (MNEGW x y)

View File

@ -1026,6 +1026,9 @@
(CMPWU x (MOVDconst [c])) && isU16Bit(c) -> (CMPWUconst x [c])
(CMPWU (MOVDconst [c]) y) && isU16Bit(c) -> (InvertFlags (CMPWUconst y [c]))
// Canonicalize the order of arguments to comparisons - helps with CSE.
((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x))
// ISEL auxInt values 0=LT 1=GT 2=EQ arg2 ? arg0 : arg1
// ISEL auxInt values 4=GE 5=LE 6=NE arg2 ? arg1 : arg0
// ISELB special case where arg0, arg1 values are 0, 1

View File

@ -700,6 +700,9 @@
(CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))])
(CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))]))
// Canonicalize the order of arguments to comparisons - helps with CSE.
((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID -> (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x))
// Using MOV{W,H,B}Zreg instead of AND is cheaper.
(AND x (MOVDconst [0xFF])) -> (MOVBZreg x)
(AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x)

View File

@ -2615,6 +2615,22 @@ func rewriteValue386_Op386CMPB(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPB x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPB y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(Op386InvertFlags)
v0 := b.NewValue0(v.Pos, Op386CMPB, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPBload {sym} [off] ptr x mem)
@ -2902,6 +2918,22 @@ func rewriteValue386_Op386CMPL(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPL x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPL y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(Op386InvertFlags)
v0 := b.NewValue0(v.Pos, Op386CMPL, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPLload {sym} [off] ptr x mem)
@ -3204,6 +3236,22 @@ func rewriteValue386_Op386CMPW(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPW x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPW y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(Op386InvertFlags)
v0 := b.NewValue0(v.Pos, Op386CMPW, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPWload {sym} [off] ptr x mem)

View File

@ -7334,6 +7334,22 @@ func rewriteValueAMD64_OpAMD64CMPB(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPB x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPB y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpAMD64InvertFlags)
v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPB l:(MOVBload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPBload {sym} [off] ptr x mem)
@ -7704,6 +7720,22 @@ func rewriteValueAMD64_OpAMD64CMPL(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPL x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPL y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpAMD64InvertFlags)
v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPL l:(MOVLload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPLload {sym} [off] ptr x mem)
@ -8097,6 +8129,22 @@ func rewriteValueAMD64_OpAMD64CMPQ(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPQ x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPQ y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpAMD64InvertFlags)
v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPQ l:(MOVQload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPQload {sym} [off] ptr x mem)
@ -8564,6 +8612,22 @@ func rewriteValueAMD64_OpAMD64CMPW(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPW x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPW y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpAMD64InvertFlags)
v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPW l:(MOVWload {sym} [off] ptr mem) x)
// cond: canMergeLoad(v, l) && clobber(l)
// result: (CMPWload {sym} [off] ptr x mem)

View File

@ -4034,6 +4034,22 @@ func rewriteValueARM_OpARMCMP(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMP x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMP y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpARMInvertFlags)
v0 := b.NewValue0(v.Pos, OpARMCMP, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMP x (SLLconst [c] y))
// result: (CMPshiftLL x y [c])
for {

View File

@ -2957,6 +2957,22 @@ func rewriteValueARM64_OpARM64CMP(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMP x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMP y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpARM64InvertFlags)
v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMP x0 x1:(SLLconst [c] y))
// cond: clobberIfDead(x1)
// result: (CMPshiftLL x0 y [c])
@ -3117,6 +3133,22 @@ func rewriteValueARM64_OpARM64CMPW(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPW x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPW y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpARM64InvertFlags)
v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueARM64_OpARM64CMPWconst(v *Value) bool {

View File

@ -4941,6 +4941,22 @@ func rewriteValuePPC64_OpPPC64CMP(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMP x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMP y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpPPC64InvertFlags)
v0 := b.NewValue0(v.Pos, OpPPC64CMP, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValuePPC64_OpPPC64CMPU(v *Value) bool {
@ -4983,6 +4999,22 @@ func rewriteValuePPC64_OpPPC64CMPU(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPU x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPU y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpPPC64InvertFlags)
v0 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValuePPC64_OpPPC64CMPUconst(v *Value) bool {
@ -5100,6 +5132,22 @@ func rewriteValuePPC64_OpPPC64CMPW(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPW x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPW y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpPPC64InvertFlags)
v0 := b.NewValue0(v.Pos, OpPPC64CMPW, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValuePPC64_OpPPC64CMPWU(v *Value) bool {
@ -5168,6 +5216,22 @@ func rewriteValuePPC64_OpPPC64CMPWU(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPWU x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPWU y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpPPC64InvertFlags)
v0 := b.NewValue0(v.Pos, OpPPC64CMPWU, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValuePPC64_OpPPC64CMPWUconst(v *Value) bool {

View File

@ -7234,6 +7234,22 @@ func rewriteValueS390X_OpS390XCMP(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMP x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMP y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpS390XInvertFlags)
v0 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueS390X_OpS390XCMPU(v *Value) bool {
@ -7276,6 +7292,22 @@ func rewriteValueS390X_OpS390XCMPU(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPU x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPU y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpS390XInvertFlags)
v0 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueS390X_OpS390XCMPUconst(v *Value) bool {
@ -7481,6 +7513,22 @@ func rewriteValueS390X_OpS390XCMPW(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPW x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPW y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpS390XInvertFlags)
v0 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPW x (MOVWreg y))
// result: (CMPW x y)
for {
@ -7567,6 +7615,22 @@ func rewriteValueS390X_OpS390XCMPWU(v *Value) bool {
v.AddArg(v0)
return true
}
// match: (CMPWU x y)
// cond: x.ID > y.ID
// result: (InvertFlags (CMPWU y x))
for {
x := v_0
y := v_1
if !(x.ID > y.ID) {
break
}
v.reset(OpS390XInvertFlags)
v0 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
v0.AddArg(y)
v0.AddArg(x)
v.AddArg(v0)
return true
}
// match: (CMPWU x (MOVWreg y))
// result: (CMPWU x y)
for {

View File

@ -11,7 +11,7 @@ func dummy() {}
// Signed 64-bit compare-and-branch.
func si64(x, y chan int64) {
// s390x:"CGRJ\t[$]4, R[0-9]+, R[0-9]+, "
// s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x < <-y {
dummy()
}
@ -47,7 +47,7 @@ func si64x8() {
// Unsigned 64-bit compare-and-branch.
func ui64(x, y chan uint64) {
// s390x:"CLGRJ\t[$]2, R[0-9]+, R[0-9]+, "
// s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x > <-y {
dummy()
}
@ -83,7 +83,7 @@ func ui64x8() {
// Signed 32-bit compare-and-branch.
func si32(x, y chan int32) {
// s390x:"CRJ\t[$]4, R[0-9]+, R[0-9]+, "
// s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x < <-y {
dummy()
}
@ -119,7 +119,7 @@ func si32x8() {
// Unsigned 32-bit compare-and-branch.
func ui32(x, y chan uint32) {
// s390x:"CLRJ\t[$]2, R[0-9]+, R[0-9]+, "
// s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x > <-y {
dummy()
}

View File

@ -32,7 +32,7 @@ func cmovuintptr(x, y uintptr) uintptr {
x = -y
}
// amd64:"CMOVQCS"
// arm64:"CSEL\tLO"
// arm64:"CSEL\t(LO|HI)"
// wasm:"Select"
return x
}
@ -42,7 +42,7 @@ func cmov32bit(x, y uint32) uint32 {
x = -y
}
// amd64:"CMOVLCS"
// arm64:"CSEL\tLO"
// arm64:"CSEL\t(LO|HI)"
// wasm:"Select"
return x
}
@ -52,7 +52,7 @@ func cmov16bit(x, y uint16) uint16 {
x = -y
}
// amd64:"CMOVWCS"
// arm64:"CSEL\tLO"
// arm64:"CSEL\t(LO|HI)"
// wasm:"Select"
return x
}