1
0
mirror of https://github.com/golang/go synced 2024-11-23 00:20:12 -07:00

cmd/compile: add rewrite rules for conditional instructions on arm64

This CL adds rewrite rules for CSETM, CSINC, CSINV, and CSNEG. By adding
these rules, we can save one instruction.

For example,

  func test(cond bool, a int) int {
    if cond {
      a++
    }
    return a
  }

Before:

  MOVD "".a+8(RSP), R0
  ADD $1, R0, R1
  MOVBU "".cond(RSP), R2
  CMPW $0, R2
  CSEL NE, R1, R0, R0

After:

  MOVBU "".cond(RSP), R0
  CMPW $0, R0
  MOVD "".a+8(RSP), R0
  CSINC EQ, R0, R0, R0

This patch is a copy of CL 285694. Co-authored-by: JunchenLi
<junchen.li@arm.com>

Change-Id: Ic1a79e8b8ece409b533becfcb7950f11e7b76f24
Reviewed-on: https://go-review.googlesource.com/c/go/+/302231
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
fanzha02 2021-01-18 14:32:49 +08:00 committed by fannie zhang
parent 51e4bb236c
commit f5e6d3e879
6 changed files with 494 additions and 6 deletions

View File

@ -956,6 +956,20 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.SetFrom3Reg(r1)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64CSINC, ssa.OpARM64CSINV, ssa.OpARM64CSNEG:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
p.From.Reg = condBits[ssa.Op(v.AuxInt)]
p.Reg = v.Args[0].Reg()
p.SetFrom3Reg(v.Args[1].Reg())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64CSETM:
p := s.Prog(arm64.ACSETM)
p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
p.From.Reg = condBits[ssa.Op(v.AuxInt)]
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64DUFFZERO:
// runtime.duffzero expects start address in R20
p := s.Prog(obj.ADUFFZERO)

View File

@ -1359,8 +1359,18 @@
(XOR x (MVN y)) => (EON x y)
(OR x (MVN y)) => (ORN x y)
(MVN (XOR x y)) => (EON x y)
(CSEL [cc] (MOVDconst [-1]) (MOVDconst [0]) flag) => (CSETM [cc] flag)
(CSEL [cc] (MOVDconst [0]) (MOVDconst [-1]) flag) => (CSETM [arm64Negate(cc)] flag)
(CSEL [cc] x (MOVDconst [0]) flag) => (CSEL0 [cc] x flag)
(CSEL [cc] (MOVDconst [0]) y flag) => (CSEL0 [arm64Negate(cc)] y flag)
(CSEL [cc] x (ADDconst [1] a) flag) => (CSINC [cc] x a flag)
(CSEL [cc] (ADDconst [1] a) x flag) => (CSINC [arm64Negate(cc)] x a flag)
(CSEL [cc] x (MVN a) flag) => (CSINV [cc] x a flag)
(CSEL [cc] (MVN a) x flag) => (CSINV [arm64Negate(cc)] x a flag)
(CSEL [cc] x (NEG a) flag) => (CSNEG [cc] x a flag)
(CSEL [cc] (NEG a) x flag) => (CSNEG [arm64Negate(cc)] x a flag)
(SUB x (SUB y z)) => (SUB (ADD <v.Type> x z) y)
(SUB (SUB x y) z) => (SUB x (ADD <y.Type> y z))
@ -1515,9 +1525,13 @@
(LEnoov (InvertFlags cmp) yes no) => (GEnoov cmp yes no)
(GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no)
// absorb InvertFlags into CSEL(0)
// absorb InvertFlags into conditional instructions
(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp)
(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp)
(CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp)
(CSINC [cc] x y (InvertFlags cmp)) => (CSINC [arm64Invert(cc)] x y cmp)
(CSINV [cc] x y (InvertFlags cmp)) => (CSINV [arm64Invert(cc)] x y cmp)
(CSNEG [cc] x y (InvertFlags cmp)) => (CSNEG [arm64Invert(cc)] x y cmp)
// absorb flag constants into boolean values
(Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())])

View File

@ -472,8 +472,12 @@ func init() {
// conditional instructions; auxint is
// one of the arm64 comparison pseudo-ops (LessThan, LessThanU, etc.)
{name: "CSEL", argLength: 3, reg: gp2flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : arg1
{name: "CSEL0", argLength: 2, reg: gp1flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : 0
{name: "CSEL", argLength: 3, reg: gp2flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : arg1
{name: "CSEL0", argLength: 2, reg: gp1flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : 0
{name: "CSINC", argLength: 3, reg: gp2flags1, asm: "CSINC", aux: "CCop"}, // auxint(flags) ? arg0 : arg1 + 1
{name: "CSINV", argLength: 3, reg: gp2flags1, asm: "CSINV", aux: "CCop"}, // auxint(flags) ? arg0 : ^arg1
{name: "CSNEG", argLength: 3, reg: gp2flags1, asm: "CSNEG", aux: "CCop"}, // auxint(flags) ? arg0 : -arg1
{name: "CSETM", argLength: 1, reg: readflags, asm: "CSETM", aux: "CCop"}, // auxint(flags) ? -1 : 0
// function calls
{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem

View File

@ -1556,6 +1556,10 @@ const (
OpARM64FRINTZD
OpARM64CSEL
OpARM64CSEL0
OpARM64CSINC
OpARM64CSINV
OpARM64CSNEG
OpARM64CSETM
OpARM64CALLstatic
OpARM64CALLclosure
OpARM64CALLinter
@ -20774,6 +20778,62 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "CSINC",
auxType: auxCCop,
argLen: 3,
asm: arm64.ACSINC,
reg: regInfo{
inputs: []inputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
{1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "CSINV",
auxType: auxCCop,
argLen: 3,
asm: arm64.ACSINV,
reg: regInfo{
inputs: []inputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
{1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "CSNEG",
auxType: auxCCop,
argLen: 3,
asm: arm64.ACSNEG,
reg: regInfo{
inputs: []inputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
{1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "CSETM",
auxType: auxCCop,
argLen: 1,
asm: arm64.ACSETM,
reg: regInfo{
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "CALLstatic",
auxType: auxCallOff,

View File

@ -69,6 +69,14 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64CSEL(v)
case OpARM64CSEL0:
return rewriteValueARM64_OpARM64CSEL0(v)
case OpARM64CSETM:
return rewriteValueARM64_OpARM64CSETM(v)
case OpARM64CSINC:
return rewriteValueARM64_OpARM64CSINC(v)
case OpARM64CSINV:
return rewriteValueARM64_OpARM64CSINV(v)
case OpARM64CSNEG:
return rewriteValueARM64_OpARM64CSNEG(v)
case OpARM64DIV:
return rewriteValueARM64_OpARM64DIV(v)
case OpARM64DIVW:
@ -3215,6 +3223,32 @@ func rewriteValueARM64_OpARM64CSEL(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (CSEL [cc] (MOVDconst [-1]) (MOVDconst [0]) flag)
// result: (CSETM [cc] flag)
for {
cc := auxIntToOp(v.AuxInt)
if v_0.Op != OpARM64MOVDconst || auxIntToInt64(v_0.AuxInt) != -1 || v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != 0 {
break
}
flag := v_2
v.reset(OpARM64CSETM)
v.AuxInt = opToAuxInt(cc)
v.AddArg(flag)
return true
}
// match: (CSEL [cc] (MOVDconst [0]) (MOVDconst [-1]) flag)
// result: (CSETM [arm64Negate(cc)] flag)
for {
cc := auxIntToOp(v.AuxInt)
if v_0.Op != OpARM64MOVDconst || auxIntToInt64(v_0.AuxInt) != 0 || v_1.Op != OpARM64MOVDconst || auxIntToInt64(v_1.AuxInt) != -1 {
break
}
flag := v_2
v.reset(OpARM64CSETM)
v.AuxInt = opToAuxInt(arm64Negate(cc))
v.AddArg(flag)
return true
}
// match: (CSEL [cc] x (MOVDconst [0]) flag)
// result: (CSEL0 [cc] x flag)
for {
@ -3243,6 +3277,96 @@ func rewriteValueARM64_OpARM64CSEL(v *Value) bool {
v.AddArg2(y, flag)
return true
}
// match: (CSEL [cc] x (ADDconst [1] a) flag)
// result: (CSINC [cc] x a flag)
for {
cc := auxIntToOp(v.AuxInt)
x := v_0
if v_1.Op != OpARM64ADDconst || auxIntToInt64(v_1.AuxInt) != 1 {
break
}
a := v_1.Args[0]
flag := v_2
v.reset(OpARM64CSINC)
v.AuxInt = opToAuxInt(cc)
v.AddArg3(x, a, flag)
return true
}
// match: (CSEL [cc] (ADDconst [1] a) x flag)
// result: (CSINC [arm64Negate(cc)] x a flag)
for {
cc := auxIntToOp(v.AuxInt)
if v_0.Op != OpARM64ADDconst || auxIntToInt64(v_0.AuxInt) != 1 {
break
}
a := v_0.Args[0]
x := v_1
flag := v_2
v.reset(OpARM64CSINC)
v.AuxInt = opToAuxInt(arm64Negate(cc))
v.AddArg3(x, a, flag)
return true
}
// match: (CSEL [cc] x (MVN a) flag)
// result: (CSINV [cc] x a flag)
for {
cc := auxIntToOp(v.AuxInt)
x := v_0
if v_1.Op != OpARM64MVN {
break
}
a := v_1.Args[0]
flag := v_2
v.reset(OpARM64CSINV)
v.AuxInt = opToAuxInt(cc)
v.AddArg3(x, a, flag)
return true
}
// match: (CSEL [cc] (MVN a) x flag)
// result: (CSINV [arm64Negate(cc)] x a flag)
for {
cc := auxIntToOp(v.AuxInt)
if v_0.Op != OpARM64MVN {
break
}
a := v_0.Args[0]
x := v_1
flag := v_2
v.reset(OpARM64CSINV)
v.AuxInt = opToAuxInt(arm64Negate(cc))
v.AddArg3(x, a, flag)
return true
}
// match: (CSEL [cc] x (NEG a) flag)
// result: (CSNEG [cc] x a flag)
for {
cc := auxIntToOp(v.AuxInt)
x := v_0
if v_1.Op != OpARM64NEG {
break
}
a := v_1.Args[0]
flag := v_2
v.reset(OpARM64CSNEG)
v.AuxInt = opToAuxInt(cc)
v.AddArg3(x, a, flag)
return true
}
// match: (CSEL [cc] (NEG a) x flag)
// result: (CSNEG [arm64Negate(cc)] x a flag)
for {
cc := auxIntToOp(v.AuxInt)
if v_0.Op != OpARM64NEG {
break
}
a := v_0.Args[0]
x := v_1
flag := v_2
v.reset(OpARM64CSNEG)
v.AuxInt = opToAuxInt(arm64Negate(cc))
v.AddArg3(x, a, flag)
return true
}
// match: (CSEL [cc] x y (InvertFlags cmp))
// result: (CSEL [arm64Invert(cc)] x y cmp)
for {
@ -3405,6 +3529,86 @@ func rewriteValueARM64_OpARM64CSEL0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64CSETM(v *Value) bool {
v_0 := v.Args[0]
// match: (CSETM [cc] (InvertFlags cmp))
// result: (CSETM [arm64Invert(cc)] cmp)
for {
cc := auxIntToOp(v.AuxInt)
if v_0.Op != OpARM64InvertFlags {
break
}
cmp := v_0.Args[0]
v.reset(OpARM64CSETM)
v.AuxInt = opToAuxInt(arm64Invert(cc))
v.AddArg(cmp)
return true
}
return false
}
func rewriteValueARM64_OpARM64CSINC(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (CSINC [cc] x y (InvertFlags cmp))
// result: (CSINC [arm64Invert(cc)] x y cmp)
for {
cc := auxIntToOp(v.AuxInt)
x := v_0
y := v_1
if v_2.Op != OpARM64InvertFlags {
break
}
cmp := v_2.Args[0]
v.reset(OpARM64CSINC)
v.AuxInt = opToAuxInt(arm64Invert(cc))
v.AddArg3(x, y, cmp)
return true
}
return false
}
func rewriteValueARM64_OpARM64CSINV(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (CSINV [cc] x y (InvertFlags cmp))
// result: (CSINV [arm64Invert(cc)] x y cmp)
for {
cc := auxIntToOp(v.AuxInt)
x := v_0
y := v_1
if v_2.Op != OpARM64InvertFlags {
break
}
cmp := v_2.Args[0]
v.reset(OpARM64CSINV)
v.AuxInt = opToAuxInt(arm64Invert(cc))
v.AddArg3(x, y, cmp)
return true
}
return false
}
func rewriteValueARM64_OpARM64CSNEG(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (CSNEG [cc] x y (InvertFlags cmp))
// result: (CSNEG [arm64Invert(cc)] x y cmp)
for {
cc := auxIntToOp(v.AuxInt)
x := v_0
y := v_1
if v_2.Op != OpARM64InvertFlags {
break
}
cmp := v_2.Args[0]
v.reset(OpARM64CSNEG)
v.AuxInt = opToAuxInt(arm64Invert(cc))
v.AddArg3(x, y, cmp)
return true
}
return false
}
func rewriteValueARM64_OpARM64DIV(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]

View File

@ -32,7 +32,7 @@ func cmovuintptr(x, y uintptr) uintptr {
x = -y
}
// amd64:"CMOVQ(HI|CS)"
// arm64:"CSEL\t(LO|HI)"
// arm64:"CSNEG\tLS"
// wasm:"Select"
return x
}
@ -42,7 +42,7 @@ func cmov32bit(x, y uint32) uint32 {
x = -y
}
// amd64:"CMOVL(HI|CS)"
// arm64:"CSEL\t(LO|HI)"
// arm64:"CSNEG\t(LS|HS)"
// wasm:"Select"
return x
}
@ -52,7 +52,7 @@ func cmov16bit(x, y uint16) uint16 {
x = -y
}
// amd64:"CMOVW(HI|CS)"
// arm64:"CSEL\t(LO|HI)"
// arm64:"CSNEG\t(LS|HS)"
// wasm:"Select"
return x
}
@ -208,3 +208,195 @@ func cmovstore(a []int, i int, b bool) {
// amd64:"CMOVQNE"
a[i] = 7
}
var r0, r1, r2, r3, r4, r5 int
func cmovinc(cond bool, a, b, c int) {
var x0, x1 int
if cond {
x0 = a
} else {
x0 = b + 1
}
// arm64:"CSINC\tNE", -"CSEL"
r0 = x0
if cond {
x1 = b + 1
} else {
x1 = a
}
// arm64:"CSINC\tEQ", -"CSEL"
r1 = x1
if cond {
c++
}
// arm64:"CSINC\tEQ", -"CSEL"
r2 = c
}
func cmovinv(cond bool, a, b int) {
var x0, x1 int
if cond {
x0 = a
} else {
x0 = ^b
}
// arm64:"CSINV\tNE", -"CSEL"
r0 = x0
if cond {
x1 = ^b
} else {
x1 = a
}
// arm64:"CSINV\tEQ", -"CSEL"
r1 = x1
}
func cmovneg(cond bool, a, b, c int) {
var x0, x1 int
if cond {
x0 = a
} else {
x0 = -b
}
// arm64:"CSNEG\tNE", -"CSEL"
r0 = x0
if cond {
x1 = -b
} else {
x1 = a
}
// arm64:"CSNEG\tEQ", -"CSEL"
r1 = x1
}
func cmovsetm(cond bool, x int) {
var x0, x1 int
if cond {
x0 = -1
} else {
x0 = 0
}
// arm64:"CSETM\tNE", -"CSEL"
r0 = x0
if cond {
x1 = 0
} else {
x1 = -1
}
// arm64:"CSETM\tEQ", -"CSEL"
r1 = x1
}
func cmovFcmp0(s, t float64, a, b int) {
var x0, x1, x2, x3, x4, x5 int
if s < t {
x0 = a
} else {
x0 = b + 1
}
// arm64:"CSINC\tMI", -"CSEL"
r0 = x0
if s <= t {
x1 = a
} else {
x1 = ^b
}
// arm64:"CSINV\tLS", -"CSEL"
r1 = x1
if s > t {
x2 = a
} else {
x2 = -b
}
// arm64:"CSNEG\tMI", -"CSEL"
r2 = x2
if s >= t {
x3 = -1
} else {
x3 = 0
}
// arm64:"CSETM\tLS", -"CSEL"
r3 = x3
if s == t {
x4 = a
} else {
x4 = b + 1
}
// arm64:"CSINC\tEQ", -"CSEL"
r4 = x4
if s != t {
x5 = a
} else {
x5 = b + 1
}
// arm64:"CSINC\tNE", -"CSEL"
r5 = x5
}
func cmovFcmp1(s, t float64, a, b int) {
var x0, x1, x2, x3, x4, x5 int
if s < t {
x0 = b + 1
} else {
x0 = a
}
// arm64:"CSINC\tPL", -"CSEL"
r0 = x0
if s <= t {
x1 = ^b
} else {
x1 = a
}
// arm64:"CSINV\tHI", -"CSEL"
r1 = x1
if s > t {
x2 = -b
} else {
x2 = a
}
// arm64:"CSNEG\tPL", -"CSEL"
r2 = x2
if s >= t {
x3 = 0
} else {
x3 = -1
}
// arm64:"CSETM\tHI", -"CSEL"
r3 = x3
if s == t {
x4 = b + 1
} else {
x4 = a
}
// arm64:"CSINC\tNE", -"CSEL"
r4 = x4
if s != t {
x5 = b + 1
} else {
x5 = a
}
// arm64:"CSINC\tEQ", -"CSEL"
r5 = x5
}