mirror of
https://github.com/golang/go
synced 2024-11-18 07:14:44 -07:00
cmd/compile: move value around before kick it out of register
When allocating registers, before kicking out the existing value, copy it to a spare register if there is one. So later use of this value can be found in register instead of reload from spill. This is very helpful for instructions of which the input and/or output can only be in specific registers, e.g. DIV on x86, MUL/DIV on MIPS. May also be helpful in general. For "go build -a cmd/go" on AMD64, reduce "spilled value remains" by 1% (not including args, which almost certainly remain). For the code in issue #16061 on AMD64: MaxRem-12 111µs ± 1% 94µs ± 0% -15.38% (p=0.008 n=5+5) Go1 benchmark on AMD64: BinaryTree17-12 2.32s ± 2% 2.30s ± 1% ~ (p=0.421 n=5+5) Fannkuch11-12 2.52s ± 0% 2.44s ± 0% -3.44% (p=0.008 n=5+5) FmtFprintfEmpty-12 39.9ns ± 3% 39.8ns ± 0% ~ (p=0.635 n=5+4) FmtFprintfString-12 114ns ± 1% 113ns ± 1% ~ (p=0.905 n=5+5) FmtFprintfInt-12 102ns ± 6% 98ns ± 1% ~ (p=0.087 n=5+5) FmtFprintfIntInt-12 146ns ± 5% 147ns ± 1% ~ (p=0.238 n=5+5) FmtFprintfPrefixedInt-12 155ns ± 2% 151ns ± 1% -2.58% (p=0.008 n=5+5) FmtFprintfFloat-12 231ns ± 1% 232ns ± 1% ~ (p=0.286 n=5+5) FmtManyArgs-12 657ns ± 1% 649ns ± 0% -1.31% (p=0.008 n=5+5) GobDecode-12 6.35ms ± 0% 6.29ms ± 1% ~ (p=0.056 n=5+5) GobEncode-12 5.38ms ± 1% 5.45ms ± 1% ~ (p=0.056 n=5+5) Gzip-12 209ms ± 0% 209ms ± 1% ~ (p=0.690 n=5+5) Gunzip-12 31.2ms ± 1% 31.1ms ± 1% ~ (p=0.548 n=5+5) HTTPClientServer-12 123µs ± 4% 130µs ± 8% ~ (p=0.151 n=5+5) JSONEncode-12 14.0ms ± 1% 14.0ms ± 1% ~ (p=0.421 n=5+5) JSONDecode-12 41.2ms ± 1% 41.1ms ± 2% ~ (p=0.421 n=5+5) Mandelbrot200-12 3.96ms ± 1% 3.98ms ± 0% ~ (p=0.421 n=5+5) GoParse-12 2.88ms ± 1% 2.88ms ± 1% ~ (p=0.841 n=5+5) RegexpMatchEasy0_32-12 68.0ns ± 3% 66.6ns ± 1% -2.00% (p=0.024 n=5+5) RegexpMatchEasy0_1K-12 728ns ± 8% 682ns ± 1% -6.26% (p=0.008 n=5+5) RegexpMatchEasy1_32-12 66.8ns ± 2% 66.0ns ± 1% ~ (p=0.302 n=5+5) RegexpMatchEasy1_1K-12 291ns ± 2% 288ns ± 1% ~ (p=0.111 n=5+5) RegexpMatchMedium_32-12 103ns ± 2% 100ns ± 0% -2.53% (p=0.016 n=5+4) RegexpMatchMedium_1K-12 31.9µs ± 1% 31.3µs ± 0% -1.75% (p=0.008 n=5+5) RegexpMatchHard_32-12 1.59µs ± 2% 1.59µs ± 1% ~ (p=0.548 n=5+5) RegexpMatchHard_1K-12 48.3µs ± 2% 47.7µs ± 1% ~ (p=0.222 n=5+5) Revcomp-12 340ms ± 1% 338ms ± 1% ~ (p=0.421 n=5+5) Template-12 46.3ms ± 1% 46.5ms ± 1% ~ (p=0.690 n=5+5) TimeParse-12 252ns ± 1% 247ns ± 0% -1.91% (p=0.000 n=5+4) TimeFormat-12 277ns ± 1% 267ns ± 0% -3.82% (p=0.008 n=5+5) [Geo mean] 48.8µs 48.3µs -0.93% It has very little effect on binary size and compiler speed. compilebench: Template 230ms ±10% 231ms ± 8% ~ (p=0.546 n=9+9) Unicode 123ms ± 6% 124ms ± 9% ~ (p=0.481 n=10+10) GoTypes 742ms ± 6% 755ms ± 3% ~ (p=0.123 n=10+10) Compiler 3.10s ± 3% 3.08s ± 1% ~ (p=0.631 n=10+10) Fixes #16061. Change-Id: Id99cdc7a182ee10a704fa0f04e8e0d0809b2ac56 Reviewed-on: https://go-review.googlesource.com/29732 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
9d4b40f55d
commit
f876fb9bae
@ -242,6 +242,9 @@ type regAllocState struct {
|
||||
// mask of registers currently in use
|
||||
used regMask
|
||||
|
||||
// mask of registers used in the current instruction
|
||||
tmpused regMask
|
||||
|
||||
// current block we're working on
|
||||
curBlock *Block
|
||||
|
||||
@ -259,6 +262,10 @@ type regAllocState struct {
|
||||
// spillLive[blockid] is the set of live spills at the end of each block
|
||||
spillLive [][]ID
|
||||
|
||||
// a set of copies we generated to move things around, and
|
||||
// whether it is used in shuffle. Unused copies will be deleted.
|
||||
copies map[*Value]bool
|
||||
|
||||
loopnest *loopnest
|
||||
}
|
||||
|
||||
@ -377,6 +384,21 @@ func (s *regAllocState) allocReg(mask regMask, v *Value) register {
|
||||
if maxuse == -1 {
|
||||
s.f.Fatalf("couldn't find register to spill")
|
||||
}
|
||||
|
||||
// Try to move it around before kicking out, if there is a free register.
|
||||
// We generate a Copy and record it. It will be deleted if never used.
|
||||
v2 := s.regs[r].v
|
||||
m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
|
||||
if countRegs(s.values[v2.ID].regs) == 1 && m != 0 {
|
||||
r2 := pickReg(m)
|
||||
c := s.curBlock.NewValue1(v2.Line, OpCopy, v2.Type, s.regs[r].c)
|
||||
s.copies[c] = false
|
||||
if s.f.pass.debug > regDebug {
|
||||
fmt.Printf("copy %s to %s : %s\n", v2, c, s.registers[r2].Name())
|
||||
}
|
||||
s.setOrig(c, v2)
|
||||
s.assignReg(r2, v2, c)
|
||||
}
|
||||
s.freeReg(r)
|
||||
return r
|
||||
}
|
||||
@ -526,6 +548,7 @@ func (s *regAllocState) init(f *Func) {
|
||||
s.regs = make([]regState, s.numRegs)
|
||||
s.values = make([]valState, f.NumValues())
|
||||
s.orig = make([]*Value, f.NumValues())
|
||||
s.copies = make(map[*Value]bool)
|
||||
for _, b := range f.Blocks {
|
||||
for _, v := range b.Values {
|
||||
if !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple() {
|
||||
@ -627,6 +650,9 @@ func (s *regAllocState) setState(regs []endReg) {
|
||||
// compatRegs returns the set of registers which can store a type t.
|
||||
func (s *regAllocState) compatRegs(t Type) regMask {
|
||||
var m regMask
|
||||
if t.IsTuple() || t.IsFlags() {
|
||||
return 0
|
||||
}
|
||||
if t.IsFloat() || t == TypeInt128 {
|
||||
m = s.f.Config.fpRegMask
|
||||
} else {
|
||||
@ -1163,7 +1189,8 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
for _, r := range dinfo[idx].in[0] {
|
||||
if r != noRegister && m>>r&1 != 0 {
|
||||
m = regMask(1) << r
|
||||
s.allocValToReg(v.Args[0], m, true, v.Line)
|
||||
c := s.allocValToReg(v.Args[0], m, true, v.Line)
|
||||
s.copies[c] = false
|
||||
// Note: no update to args[0] so the instruction will
|
||||
// use the original copy.
|
||||
goto ok
|
||||
@ -1173,7 +1200,8 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
for _, r := range dinfo[idx].in[1] {
|
||||
if r != noRegister && m>>r&1 != 0 {
|
||||
m = regMask(1) << r
|
||||
s.allocValToReg(v.Args[1], m, true, v.Line)
|
||||
c := s.allocValToReg(v.Args[1], m, true, v.Line)
|
||||
s.copies[c] = false
|
||||
args[0], args[1] = args[1], args[0]
|
||||
goto ok
|
||||
}
|
||||
@ -1184,21 +1212,24 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
m &^= desired.avoid
|
||||
}
|
||||
// Save input 0 to a new register so we can clobber it.
|
||||
s.allocValToReg(v.Args[0], m, true, v.Line)
|
||||
ok:
|
||||
c := s.allocValToReg(v.Args[0], m, true, v.Line)
|
||||
s.copies[c] = false
|
||||
}
|
||||
|
||||
ok:
|
||||
// Now that all args are in regs, we're ready to issue the value itself.
|
||||
// Before we pick a register for the output value, allow input registers
|
||||
// to be deallocated. We do this here so that the output can use the
|
||||
// same register as a dying input.
|
||||
if !opcodeTable[v.Op].resultNotInArgs {
|
||||
s.tmpused = s.nospill
|
||||
s.nospill = 0
|
||||
s.advanceUses(v) // frees any registers holding args that are no longer live
|
||||
}
|
||||
|
||||
// Dump any registers which will be clobbered
|
||||
s.freeRegs(regspec.clobbers)
|
||||
s.tmpused |= regspec.clobbers
|
||||
|
||||
// Pick registers for outputs.
|
||||
{
|
||||
@ -1250,6 +1281,7 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
r := s.allocReg(mask, v)
|
||||
outRegs[out.idx] = r
|
||||
used |= regMask(1) << r
|
||||
s.tmpused |= regMask(1) << r
|
||||
}
|
||||
// Record register choices
|
||||
if v.Type.IsTuple() {
|
||||
@ -1274,6 +1306,7 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
s.nospill = 0
|
||||
s.advanceUses(v) // frees any registers holding args that are no longer live
|
||||
}
|
||||
s.tmpused = 0
|
||||
|
||||
// Issue the Value itself.
|
||||
for i, a := range args {
|
||||
@ -1314,6 +1347,10 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
// type-compatible register. If this turns out not to be true,
|
||||
// we'll need to introduce a regspec for a block's control value.
|
||||
b.Control = s.allocValToReg(v, s.compatRegs(v.Type), false, b.Line)
|
||||
if b.Control != v {
|
||||
v.Uses--
|
||||
b.Control.Uses++
|
||||
}
|
||||
// Remove this use from the uses list.
|
||||
vi := &s.values[v.ID]
|
||||
u := vi.uses
|
||||
@ -1512,7 +1549,7 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
for i := range s.values {
|
||||
vi := s.values[i]
|
||||
if vi.spillUsed {
|
||||
if s.f.pass.debug > logSpills {
|
||||
if s.f.pass.debug > logSpills && vi.spill.Op != OpArg {
|
||||
s.f.Config.Warnl(vi.spill.Line, "spilled value at %v remains", vi.spill)
|
||||
}
|
||||
continue
|
||||
@ -1670,6 +1707,29 @@ sinking:
|
||||
}
|
||||
}
|
||||
|
||||
// Erase any copies we never used
|
||||
for c, used := range s.copies {
|
||||
if !used && c.Uses == 0 {
|
||||
if s.f.pass.debug > regDebug {
|
||||
fmt.Printf("delete copied value %s\n", c.LongString())
|
||||
}
|
||||
c.Args[0].Uses--
|
||||
f.freeValue(c)
|
||||
}
|
||||
}
|
||||
|
||||
for _, b := range f.Blocks {
|
||||
i := 0
|
||||
for _, v := range b.Values {
|
||||
if v.Op == OpInvalid {
|
||||
continue
|
||||
}
|
||||
b.Values[i] = v
|
||||
i++
|
||||
}
|
||||
b.Values = b.Values[:i]
|
||||
}
|
||||
|
||||
if f.pass.stats > 0 {
|
||||
f.LogStat("spills_info",
|
||||
nSpills, "spills", nSpillsInner, "inner_spills_remaining", nSpillsSunk, "inner_spills_sunk", nSpillsSunkUnused, "inner_spills_unused", nSpillsNotSunkLateUse, "inner_spills_shuffled", nSpillsChanged, "inner_spills_changed")
|
||||
@ -1904,6 +1964,10 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, line int32
|
||||
// Note: if splice==nil then c will appear dead. This is
|
||||
// non-SSA formed code, so be careful after this pass not to run
|
||||
// deadcode elimination.
|
||||
if _, ok := e.s.copies[occupant.c]; ok {
|
||||
// The copy at occupant.c was used to avoid spill.
|
||||
e.s.copies[occupant.c] = true
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user