1
0
mirror of https://github.com/golang/go synced 2024-11-17 06:04:47 -07:00

cmd/compile: teach regalloc about temporary registers

Temporary registers are sometimes needed for an architecture backend
which needs to use several machine instructions to implement a single
SSA instruction.

Mark such instructions so that regalloc can reserve the temporary register
for it. That way we don't have to reserve a fixed register like we do now.

Convert the temp-register-using instructions on amd64 to use this
new mechanism. Other archs can follow as needed.

Change-Id: I1d0c8588afdad5cd18b4398eb5a0f755be5dead7
Reviewed-on: https://go-review.googlesource.com/c/go/+/398556
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Keith Randall 2022-04-05 15:07:29 -07:00
parent 249e51e5d9
commit 5f7abeca5a
9 changed files with 95 additions and 44 deletions

View File

@ -600,23 +600,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
// Flag condition: ZERO && !PARITY
// Generate:
// MOV SRC,AX
// CMOV*NE DST,AX
// CMOV*PC AX,DST
// MOV SRC,TMP
// CMOV*NE DST,TMP
// CMOV*PC TMP,DST
//
// TODO(rasky): we could generate:
// CMOV*NE DST,SRC
// CMOV*PC SRC,DST
// But this requires a way for regalloc to know that SRC might be
// clobbered by this instruction.
if v.Args[1].Reg() != x86.REG_AX {
opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
}
t := v.RegTmp()
opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = x86.REG_AX
p.To.Reg = t
var q *obj.Prog
if v.Op == ssa.OpAMD64CMOVQEQF {
q = s.Prog(x86.ACMOVQPC)
@ -626,7 +626,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
q = s.Prog(x86.ACMOVWPC)
}
q.From.Type = obj.TYPE_REG
q.From.Reg = x86.REG_AX
q.From.Reg = t
q.To.Type = obj.TYPE_REG
q.To.Reg = v.Reg()
@ -1194,24 +1194,26 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssagen.AddAux(&p.To, v)
case ssa.OpAMD64SETNEF:
t := v.RegTmp()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
q := s.Prog(x86.ASETPS)
q.To.Type = obj.TYPE_REG
q.To.Reg = x86.REG_AX
q.To.Reg = t
// ORL avoids partial register write and is smaller than ORQ, used by old compiler
opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
opregreg(s, x86.AORL, v.Reg(), t)
case ssa.OpAMD64SETEQF:
t := v.RegTmp()
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
q := s.Prog(x86.ASETPC)
q.To.Type = obj.TYPE_REG
q.To.Reg = x86.REG_AX
q.To.Reg = t
// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
opregreg(s, x86.AANDL, v.Reg(), t)
case ssa.OpAMD64InvertFlags:
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())

View File

@ -136,13 +136,11 @@ func init() {
gp1flags1flags = regInfo{inputs: []regMask{gp, 0}, outputs: []regMask{gp, 0}}
readflags = regInfo{inputs: nil, outputs: gponly}
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
gpload = regInfo{inputs: []regMask{gpspsbg, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}, outputs: gponly}
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsbg, gpsp, 0}, outputs: gponly}
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
gp21shxload = regInfo{inputs: []regMask{gpspsbg, gp, 0}, outputs: gponly}
gp21shxloadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, gp, 0}, outputs: gponly}
@ -563,15 +561,15 @@ func init() {
// InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
// NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
// code generation in amd64/ssa.go.
{name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQEQF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true, needIntTemp: true},
{name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
{name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
{name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
{name: "CMOVLEQF", argLength: 3, reg: gp21pax, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true, needIntTemp: true},
{name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
{name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
{name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
{name: "CMOVWEQF", argLength: 3, reg: gp21pax, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true, needIntTemp: true},
{name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
{name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
{name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
@ -624,10 +622,10 @@ func init() {
// Need different opcodes for floating point conditions because
// any comparison involving a NaN is always FALSE and thus
// the patterns for inverting conditions cannot be used.
{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ", clobberFlags: true}, // extract == condition from arg0
{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE", clobberFlags: true}, // extract != condition from arg0
{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"}, // extract "ordered" (No Nan present) condition from arg0
{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"}, // extract "unordered" (Nan present) condition from arg0
{name: "SETEQF", argLength: 1, reg: flagsgp, asm: "SETEQ", clobberFlags: true, needIntTemp: true}, // extract == condition from arg0
{name: "SETNEF", argLength: 1, reg: flagsgp, asm: "SETNE", clobberFlags: true, needIntTemp: true}, // extract != condition from arg0
{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"}, // extract "ordered" (No Nan present) condition from arg0
{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"}, // extract "unordered" (Nan present) condition from arg0
{name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"}, // extract floating > condition from arg0
{name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0

View File

@ -59,6 +59,7 @@ type opData struct {
resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
resultNotInArgs bool // outputs must not be allocated to the same registers as inputs
clobberFlags bool // this op clobbers flags register
needIntTemp bool // need a temporary free integer register
call bool // is a function call
tailCall bool // is a tail call
nilCheck bool // this op is a nil check on arg0
@ -304,6 +305,9 @@ func genOp() {
if v.clobberFlags {
fmt.Fprintln(w, "clobberFlags: true,")
}
if v.needIntTemp {
fmt.Fprintln(w, "needIntTemp: true,")
}
if v.call {
fmt.Fprintln(w, "call: true,")
}

View File

@ -46,6 +46,9 @@ type Func struct {
// when register allocation is done, maps value ids to locations
RegAlloc []Location
// temporary registers allocated to rare instructions
tempRegs map[ID]*Register
// map from LocalSlot to set of Values that we want to store in that slot.
NamedValues map[LocalSlot][]*Value
// Names is a copy of NamedValues.Keys. We keep a separate list

View File

@ -994,6 +994,9 @@ func (v *Value) LongHTML() string {
if int(v.ID) < len(r) && r[v.ID] != nil {
s += " : " + html.EscapeString(r[v.ID].String())
}
if reg := v.Block.Func.tempRegs[v.ID]; reg != nil {
s += " tmp=" + reg.String()
}
var names []string
for name, values := range v.Block.Func.NamedValues {
for _, value := range values {

View File

@ -33,6 +33,7 @@ type opInfo struct {
resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
resultNotInArgs bool // outputs must not be allocated to the same registers as inputs
clobberFlags bool // this op clobbers flags register
needIntTemp bool // need a temporary free integer register
call bool // is a function call
tailCall bool // is a tail call
nilCheck bool // this op is a nil check on arg0

View File

@ -11565,15 +11565,15 @@ var opcodeTable = [...]opInfo{
name: "CMOVQEQF",
argLen: 3,
resultInArg0: true,
needIntTemp: true,
asm: x86.ACMOVQNE,
reg: regInfo{
inputs: []inputInfo{
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
clobbers: 1, // AX
outputs: []outputInfo{
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
@ -11626,15 +11626,15 @@ var opcodeTable = [...]opInfo{
name: "CMOVLEQF",
argLen: 3,
resultInArg0: true,
needIntTemp: true,
asm: x86.ACMOVLNE,
reg: regInfo{
inputs: []inputInfo{
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
clobbers: 1, // AX
outputs: []outputInfo{
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
@ -11687,15 +11687,15 @@ var opcodeTable = [...]opInfo{
name: "CMOVWEQF",
argLen: 3,
resultInArg0: true,
needIntTemp: true,
asm: x86.ACMOVWNE,
reg: regInfo{
inputs: []inputInfo{
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
clobbers: 1, // AX
outputs: []outputInfo{
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
@ -12120,11 +12120,11 @@ var opcodeTable = [...]opInfo{
name: "SETEQF",
argLen: 1,
clobberFlags: true,
needIntTemp: true,
asm: x86.ASETEQ,
reg: regInfo{
clobbers: 1, // AX
outputs: []outputInfo{
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
@ -12132,11 +12132,11 @@ var opcodeTable = [...]opInfo{
name: "SETNEF",
argLen: 1,
clobberFlags: true,
needIntTemp: true,
asm: x86.ASETNE,
reg: regInfo{
clobbers: 1, // AX
outputs: []outputInfo{
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},

View File

@ -852,6 +852,9 @@ func (s *regAllocState) isGReg(r register) bool {
return s.f.Config.hasGReg && s.GReg == r
}
// Dummy value used to represent the value being held in a temporary register.
var tmpVal Value
func (s *regAllocState) regalloc(f *Func) {
regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
defer f.retSparseSet(regValLiveSet)
@ -1266,6 +1269,7 @@ func (s *regAllocState) regalloc(f *Func) {
// Process all the non-phi values.
for idx, v := range oldSched {
tmpReg := noRegister
if s.f.pass.debug > regDebug {
fmt.Printf(" processing %s\n", v.LongString())
}
@ -1550,6 +1554,20 @@ func (s *regAllocState) regalloc(f *Func) {
}
ok:
// Pick a temporary register if needed.
// It should be distinct from all the input registers, so we
// allocate it after all the input registers, but before
// the input registers are freed via advanceUses below.
// (Not all instructions need that distinct part, but it is conservative.)
if opcodeTable[v.Op].needIntTemp {
m := s.allocatable & s.f.Config.gpRegMask
if m&^desired.avoid != 0 {
m &^= desired.avoid
}
tmpReg = s.allocReg(m, &tmpVal)
s.nospill |= regMask(1) << tmpReg
}
// Now that all args are in regs, we're ready to issue the value itself.
// Before we pick a register for the output value, allow input registers
// to be deallocated. We do this here so that the output can use the
@ -1574,6 +1592,11 @@ func (s *regAllocState) regalloc(f *Func) {
outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
maxOutIdx := -1
var used regMask
if tmpReg != noRegister {
// Ensure output registers are distinct from the temporary register.
// (Not all instructions need that distinct part, but it is conservative.)
used |= regMask(1) << tmpReg
}
for _, out := range regspec.outputs {
mask := out.regs & s.allocatable &^ used
if mask == 0 {
@ -1655,6 +1678,13 @@ func (s *regAllocState) regalloc(f *Func) {
s.assignReg(r, v, v)
}
}
if tmpReg != noRegister {
// Remember the temp register allocation, if any.
if s.f.tempRegs == nil {
s.f.tempRegs = map[ID]*Register{}
}
s.f.tempRegs[v.ID] = &s.registers[tmpReg]
}
}
// deallocate dead args, if we have not done so

View File

@ -148,21 +148,22 @@ func (v *Value) LongString() string {
for _, a := range v.Args {
s += fmt.Sprintf(" %v", a)
}
var r []Location
if v.Block != nil {
r = v.Block.Func.RegAlloc
if v.Block == nil {
return s
}
r := v.Block.Func.RegAlloc
if int(v.ID) < len(r) && r[v.ID] != nil {
s += " : " + r[v.ID].String()
}
if reg := v.Block.Func.tempRegs[v.ID]; reg != nil {
s += " tmp=" + reg.String()
}
var names []string
if v.Block != nil {
for name, values := range v.Block.Func.NamedValues {
for _, value := range values {
if value == v {
names = append(names, name.String())
break // drop duplicates.
}
for name, values := range v.Block.Func.NamedValues {
for _, value := range values {
if value == v {
names = append(names, name.String())
break // drop duplicates.
}
}
}
@ -488,6 +489,15 @@ func (v *Value) Reg1() int16 {
return reg.(*Register).objNum
}
// RegTmp returns the temporary register assigned to v, in cmd/internal/obj/$ARCH numbering.
func (v *Value) RegTmp() int16 {
reg := v.Block.Func.tempRegs[v.ID]
if reg == nil {
v.Fatalf("nil tmp register for value: %s\n%s\n", v.LongString(), v.Block.Func)
}
return reg.objNum
}
func (v *Value) RegName() string {
reg := v.Block.Func.RegAlloc[v.ID]
if reg == nil {