mirror of
https://github.com/golang/go
synced 2024-11-17 06:04:47 -07:00
cmd/compile: teach regalloc about temporary registers
Temporary registers are sometimes needed for an architecture backend which needs to use several machine instructions to implement a single SSA instruction. Mark such instructions so that regalloc can reserve the temporary register for it. That way we don't have to reserve a fixed register like we do now. Convert the temp-register-using instructions on amd64 to use this new mechanism. Other archs can follow as needed. Change-Id: I1d0c8588afdad5cd18b4398eb5a0f755be5dead7 Reviewed-on: https://go-review.googlesource.com/c/go/+/398556 TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
249e51e5d9
commit
5f7abeca5a
@ -600,23 +600,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
|
||||
// Flag condition: ZERO && !PARITY
|
||||
// Generate:
|
||||
// MOV SRC,AX
|
||||
// CMOV*NE DST,AX
|
||||
// CMOV*PC AX,DST
|
||||
// MOV SRC,TMP
|
||||
// CMOV*NE DST,TMP
|
||||
// CMOV*PC TMP,DST
|
||||
//
|
||||
// TODO(rasky): we could generate:
|
||||
// CMOV*NE DST,SRC
|
||||
// CMOV*PC SRC,DST
|
||||
// But this requires a way for regalloc to know that SRC might be
|
||||
// clobbered by this instruction.
|
||||
if v.Args[1].Reg() != x86.REG_AX {
|
||||
opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
|
||||
}
|
||||
t := v.RegTmp()
|
||||
opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
|
||||
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = v.Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = x86.REG_AX
|
||||
p.To.Reg = t
|
||||
var q *obj.Prog
|
||||
if v.Op == ssa.OpAMD64CMOVQEQF {
|
||||
q = s.Prog(x86.ACMOVQPC)
|
||||
@ -626,7 +626,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
q = s.Prog(x86.ACMOVWPC)
|
||||
}
|
||||
q.From.Type = obj.TYPE_REG
|
||||
q.From.Reg = x86.REG_AX
|
||||
q.From.Reg = t
|
||||
q.To.Type = obj.TYPE_REG
|
||||
q.To.Reg = v.Reg()
|
||||
|
||||
@ -1194,24 +1194,26 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||
ssagen.AddAux(&p.To, v)
|
||||
|
||||
case ssa.OpAMD64SETNEF:
|
||||
t := v.RegTmp()
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
q := s.Prog(x86.ASETPS)
|
||||
q.To.Type = obj.TYPE_REG
|
||||
q.To.Reg = x86.REG_AX
|
||||
q.To.Reg = t
|
||||
// ORL avoids partial register write and is smaller than ORQ, used by old compiler
|
||||
opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
|
||||
opregreg(s, x86.AORL, v.Reg(), t)
|
||||
|
||||
case ssa.OpAMD64SETEQF:
|
||||
t := v.RegTmp()
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
q := s.Prog(x86.ASETPC)
|
||||
q.To.Type = obj.TYPE_REG
|
||||
q.To.Reg = x86.REG_AX
|
||||
q.To.Reg = t
|
||||
// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
|
||||
opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
|
||||
opregreg(s, x86.AANDL, v.Reg(), t)
|
||||
|
||||
case ssa.OpAMD64InvertFlags:
|
||||
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
|
||||
|
@ -136,13 +136,11 @@ func init() {
|
||||
gp1flags1flags = regInfo{inputs: []regMask{gp, 0}, outputs: []regMask{gp, 0}}
|
||||
|
||||
readflags = regInfo{inputs: nil, outputs: gponly}
|
||||
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
|
||||
|
||||
gpload = regInfo{inputs: []regMask{gpspsbg, 0}, outputs: gponly}
|
||||
gp21load = regInfo{inputs: []regMask{gp, gpspsbg, 0}, outputs: gponly}
|
||||
gploadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, 0}, outputs: gponly}
|
||||
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsbg, gpsp, 0}, outputs: gponly}
|
||||
gp21pax = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
|
||||
gp21shxload = regInfo{inputs: []regMask{gpspsbg, gp, 0}, outputs: gponly}
|
||||
gp21shxloadidx = regInfo{inputs: []regMask{gpspsbg, gpsp, gp, 0}, outputs: gponly}
|
||||
|
||||
@ -563,15 +561,15 @@ func init() {
|
||||
// InvertFlags correctly, and to generate special code that handles NaN (unordered flag).
|
||||
// NOTE: the fact that CMOV*EQF here is marked to generate CMOV*NE is not a bug. See
|
||||
// code generation in amd64/ssa.go.
|
||||
{name: "CMOVQEQF", argLength: 3, reg: gp21pax, asm: "CMOVQNE", resultInArg0: true},
|
||||
{name: "CMOVQEQF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true, needIntTemp: true},
|
||||
{name: "CMOVQNEF", argLength: 3, reg: gp21, asm: "CMOVQNE", resultInArg0: true},
|
||||
{name: "CMOVQGTF", argLength: 3, reg: gp21, asm: "CMOVQHI", resultInArg0: true},
|
||||
{name: "CMOVQGEF", argLength: 3, reg: gp21, asm: "CMOVQCC", resultInArg0: true},
|
||||
{name: "CMOVLEQF", argLength: 3, reg: gp21pax, asm: "CMOVLNE", resultInArg0: true},
|
||||
{name: "CMOVLEQF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true, needIntTemp: true},
|
||||
{name: "CMOVLNEF", argLength: 3, reg: gp21, asm: "CMOVLNE", resultInArg0: true},
|
||||
{name: "CMOVLGTF", argLength: 3, reg: gp21, asm: "CMOVLHI", resultInArg0: true},
|
||||
{name: "CMOVLGEF", argLength: 3, reg: gp21, asm: "CMOVLCC", resultInArg0: true},
|
||||
{name: "CMOVWEQF", argLength: 3, reg: gp21pax, asm: "CMOVWNE", resultInArg0: true},
|
||||
{name: "CMOVWEQF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true, needIntTemp: true},
|
||||
{name: "CMOVWNEF", argLength: 3, reg: gp21, asm: "CMOVWNE", resultInArg0: true},
|
||||
{name: "CMOVWGTF", argLength: 3, reg: gp21, asm: "CMOVWHI", resultInArg0: true},
|
||||
{name: "CMOVWGEF", argLength: 3, reg: gp21, asm: "CMOVWCC", resultInArg0: true},
|
||||
@ -624,10 +622,10 @@ func init() {
|
||||
// Need different opcodes for floating point conditions because
|
||||
// any comparison involving a NaN is always FALSE and thus
|
||||
// the patterns for inverting conditions cannot be used.
|
||||
{name: "SETEQF", argLength: 1, reg: flagsgpax, asm: "SETEQ", clobberFlags: true}, // extract == condition from arg0
|
||||
{name: "SETNEF", argLength: 1, reg: flagsgpax, asm: "SETNE", clobberFlags: true}, // extract != condition from arg0
|
||||
{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"}, // extract "ordered" (No Nan present) condition from arg0
|
||||
{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"}, // extract "unordered" (Nan present) condition from arg0
|
||||
{name: "SETEQF", argLength: 1, reg: flagsgp, asm: "SETEQ", clobberFlags: true, needIntTemp: true}, // extract == condition from arg0
|
||||
{name: "SETNEF", argLength: 1, reg: flagsgp, asm: "SETNE", clobberFlags: true, needIntTemp: true}, // extract != condition from arg0
|
||||
{name: "SETORD", argLength: 1, reg: flagsgp, asm: "SETPC"}, // extract "ordered" (No Nan present) condition from arg0
|
||||
{name: "SETNAN", argLength: 1, reg: flagsgp, asm: "SETPS"}, // extract "unordered" (Nan present) condition from arg0
|
||||
|
||||
{name: "SETGF", argLength: 1, reg: flagsgp, asm: "SETHI"}, // extract floating > condition from arg0
|
||||
{name: "SETGEF", argLength: 1, reg: flagsgp, asm: "SETCC"}, // extract floating >= condition from arg0
|
||||
|
@ -59,6 +59,7 @@ type opData struct {
|
||||
resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
|
||||
resultNotInArgs bool // outputs must not be allocated to the same registers as inputs
|
||||
clobberFlags bool // this op clobbers flags register
|
||||
needIntTemp bool // need a temporary free integer register
|
||||
call bool // is a function call
|
||||
tailCall bool // is a tail call
|
||||
nilCheck bool // this op is a nil check on arg0
|
||||
@ -304,6 +305,9 @@ func genOp() {
|
||||
if v.clobberFlags {
|
||||
fmt.Fprintln(w, "clobberFlags: true,")
|
||||
}
|
||||
if v.needIntTemp {
|
||||
fmt.Fprintln(w, "needIntTemp: true,")
|
||||
}
|
||||
if v.call {
|
||||
fmt.Fprintln(w, "call: true,")
|
||||
}
|
||||
|
@ -46,6 +46,9 @@ type Func struct {
|
||||
// when register allocation is done, maps value ids to locations
|
||||
RegAlloc []Location
|
||||
|
||||
// temporary registers allocated to rare instructions
|
||||
tempRegs map[ID]*Register
|
||||
|
||||
// map from LocalSlot to set of Values that we want to store in that slot.
|
||||
NamedValues map[LocalSlot][]*Value
|
||||
// Names is a copy of NamedValues.Keys. We keep a separate list
|
||||
|
@ -994,6 +994,9 @@ func (v *Value) LongHTML() string {
|
||||
if int(v.ID) < len(r) && r[v.ID] != nil {
|
||||
s += " : " + html.EscapeString(r[v.ID].String())
|
||||
}
|
||||
if reg := v.Block.Func.tempRegs[v.ID]; reg != nil {
|
||||
s += " tmp=" + reg.String()
|
||||
}
|
||||
var names []string
|
||||
for name, values := range v.Block.Func.NamedValues {
|
||||
for _, value := range values {
|
||||
|
@ -33,6 +33,7 @@ type opInfo struct {
|
||||
resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
|
||||
resultNotInArgs bool // outputs must not be allocated to the same registers as inputs
|
||||
clobberFlags bool // this op clobbers flags register
|
||||
needIntTemp bool // need a temporary free integer register
|
||||
call bool // is a function call
|
||||
tailCall bool // is a tail call
|
||||
nilCheck bool // this op is a nil check on arg0
|
||||
|
@ -11565,15 +11565,15 @@ var opcodeTable = [...]opInfo{
|
||||
name: "CMOVQEQF",
|
||||
argLen: 3,
|
||||
resultInArg0: true,
|
||||
needIntTemp: true,
|
||||
asm: x86.ACMOVQNE,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
clobbers: 1, // AX
|
||||
outputs: []outputInfo{
|
||||
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -11626,15 +11626,15 @@ var opcodeTable = [...]opInfo{
|
||||
name: "CMOVLEQF",
|
||||
argLen: 3,
|
||||
resultInArg0: true,
|
||||
needIntTemp: true,
|
||||
asm: x86.ACMOVLNE,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
clobbers: 1, // AX
|
||||
outputs: []outputInfo{
|
||||
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -11687,15 +11687,15 @@ var opcodeTable = [...]opInfo{
|
||||
name: "CMOVWEQF",
|
||||
argLen: 3,
|
||||
resultInArg0: true,
|
||||
needIntTemp: true,
|
||||
asm: x86.ACMOVWNE,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
clobbers: 1, // AX
|
||||
outputs: []outputInfo{
|
||||
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -12120,11 +12120,11 @@ var opcodeTable = [...]opInfo{
|
||||
name: "SETEQF",
|
||||
argLen: 1,
|
||||
clobberFlags: true,
|
||||
needIntTemp: true,
|
||||
asm: x86.ASETEQ,
|
||||
reg: regInfo{
|
||||
clobbers: 1, // AX
|
||||
outputs: []outputInfo{
|
||||
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -12132,11 +12132,11 @@ var opcodeTable = [...]opInfo{
|
||||
name: "SETNEF",
|
||||
argLen: 1,
|
||||
clobberFlags: true,
|
||||
needIntTemp: true,
|
||||
asm: x86.ASETNE,
|
||||
reg: regInfo{
|
||||
clobbers: 1, // AX
|
||||
outputs: []outputInfo{
|
||||
{0, 49134}, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -852,6 +852,9 @@ func (s *regAllocState) isGReg(r register) bool {
|
||||
return s.f.Config.hasGReg && s.GReg == r
|
||||
}
|
||||
|
||||
// Dummy value used to represent the value being held in a temporary register.
|
||||
var tmpVal Value
|
||||
|
||||
func (s *regAllocState) regalloc(f *Func) {
|
||||
regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
|
||||
defer f.retSparseSet(regValLiveSet)
|
||||
@ -1266,6 +1269,7 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
|
||||
// Process all the non-phi values.
|
||||
for idx, v := range oldSched {
|
||||
tmpReg := noRegister
|
||||
if s.f.pass.debug > regDebug {
|
||||
fmt.Printf(" processing %s\n", v.LongString())
|
||||
}
|
||||
@ -1550,6 +1554,20 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
}
|
||||
|
||||
ok:
|
||||
// Pick a temporary register if needed.
|
||||
// It should be distinct from all the input registers, so we
|
||||
// allocate it after all the input registers, but before
|
||||
// the input registers are freed via advanceUses below.
|
||||
// (Not all instructions need that distinct part, but it is conservative.)
|
||||
if opcodeTable[v.Op].needIntTemp {
|
||||
m := s.allocatable & s.f.Config.gpRegMask
|
||||
if m&^desired.avoid != 0 {
|
||||
m &^= desired.avoid
|
||||
}
|
||||
tmpReg = s.allocReg(m, &tmpVal)
|
||||
s.nospill |= regMask(1) << tmpReg
|
||||
}
|
||||
|
||||
// Now that all args are in regs, we're ready to issue the value itself.
|
||||
// Before we pick a register for the output value, allow input registers
|
||||
// to be deallocated. We do this here so that the output can use the
|
||||
@ -1574,6 +1592,11 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
|
||||
maxOutIdx := -1
|
||||
var used regMask
|
||||
if tmpReg != noRegister {
|
||||
// Ensure output registers are distinct from the temporary register.
|
||||
// (Not all instructions need that distinct part, but it is conservative.)
|
||||
used |= regMask(1) << tmpReg
|
||||
}
|
||||
for _, out := range regspec.outputs {
|
||||
mask := out.regs & s.allocatable &^ used
|
||||
if mask == 0 {
|
||||
@ -1655,6 +1678,13 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
s.assignReg(r, v, v)
|
||||
}
|
||||
}
|
||||
if tmpReg != noRegister {
|
||||
// Remember the temp register allocation, if any.
|
||||
if s.f.tempRegs == nil {
|
||||
s.f.tempRegs = map[ID]*Register{}
|
||||
}
|
||||
s.f.tempRegs[v.ID] = &s.registers[tmpReg]
|
||||
}
|
||||
}
|
||||
|
||||
// deallocate dead args, if we have not done so
|
||||
|
@ -148,21 +148,22 @@ func (v *Value) LongString() string {
|
||||
for _, a := range v.Args {
|
||||
s += fmt.Sprintf(" %v", a)
|
||||
}
|
||||
var r []Location
|
||||
if v.Block != nil {
|
||||
r = v.Block.Func.RegAlloc
|
||||
if v.Block == nil {
|
||||
return s
|
||||
}
|
||||
r := v.Block.Func.RegAlloc
|
||||
if int(v.ID) < len(r) && r[v.ID] != nil {
|
||||
s += " : " + r[v.ID].String()
|
||||
}
|
||||
if reg := v.Block.Func.tempRegs[v.ID]; reg != nil {
|
||||
s += " tmp=" + reg.String()
|
||||
}
|
||||
var names []string
|
||||
if v.Block != nil {
|
||||
for name, values := range v.Block.Func.NamedValues {
|
||||
for _, value := range values {
|
||||
if value == v {
|
||||
names = append(names, name.String())
|
||||
break // drop duplicates.
|
||||
}
|
||||
for name, values := range v.Block.Func.NamedValues {
|
||||
for _, value := range values {
|
||||
if value == v {
|
||||
names = append(names, name.String())
|
||||
break // drop duplicates.
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -488,6 +489,15 @@ func (v *Value) Reg1() int16 {
|
||||
return reg.(*Register).objNum
|
||||
}
|
||||
|
||||
// RegTmp returns the temporary register assigned to v, in cmd/internal/obj/$ARCH numbering.
|
||||
func (v *Value) RegTmp() int16 {
|
||||
reg := v.Block.Func.tempRegs[v.ID]
|
||||
if reg == nil {
|
||||
v.Fatalf("nil tmp register for value: %s\n%s\n", v.LongString(), v.Block.Func)
|
||||
}
|
||||
return reg.objNum
|
||||
}
|
||||
|
||||
func (v *Value) RegName() string {
|
||||
reg := v.Block.Func.RegAlloc[v.ID]
|
||||
if reg == nil {
|
||||
|
Loading…
Reference in New Issue
Block a user