mirror of
https://github.com/golang/go
synced 2024-11-24 01:40:12 -07:00
cmd/compile: improve startRegs calculation
In register allocation, we calculate what values are used in and after the current block. If a value is used only after a function call, since registers are clobbered in call, we don't need to mark the value live at the entrance of the block. Before this CL it is considered live, and unnecessary copy or load may be generated when resolving merge edge. Fixes #14761. On AMD64: name old time/op new time/op delta BinaryTree17-12 2.84s ± 1% 2.81s ± 1% -1.06% (p=0.000 n=10+9) Fannkuch11-12 3.61s ± 0% 3.55s ± 1% -1.77% (p=0.000 n=10+9) FmtFprintfEmpty-12 50.4ns ± 4% 50.0ns ± 1% ~ (p=0.785 n=9+8) FmtFprintfString-12 80.0ns ± 3% 78.2ns ± 3% -2.35% (p=0.004 n=10+9) FmtFprintfInt-12 81.3ns ± 4% 81.8ns ± 2% ~ (p=0.159 n=10+10) FmtFprintfIntInt-12 120ns ± 4% 118ns ± 2% ~ (p=0.218 n=10+10) FmtFprintfPrefixedInt-12 152ns ± 3% 155ns ± 2% +2.11% (p=0.026 n=10+10) FmtFprintfFloat-12 240ns ± 1% 238ns ± 1% -0.79% (p=0.005 n=9+9) FmtManyArgs-12 504ns ± 1% 510ns ± 1% +1.14% (p=0.000 n=8+9) GobDecode-12 7.00ms ± 1% 6.99ms ± 0% ~ (p=0.497 n=9+10) GobEncode-12 5.47ms ± 1% 5.48ms ± 1% ~ (p=0.218 n=10+10) Gzip-12 258ms ± 2% 256ms ± 1% -0.96% (p=0.043 n=10+9) Gunzip-12 38.6ms ± 0% 38.3ms ± 0% -0.64% (p=0.000 n=9+8) HTTPClientServer-12 90.4µs ± 3% 87.2µs ±11% ~ (p=0.053 n=9+10) JSONEncode-12 15.6ms ± 0% 15.6ms ± 1% ~ (p=0.077 n=9+9) JSONDecode-12 55.1ms ± 1% 54.6ms ± 1% -0.85% (p=0.010 n=10+9) Mandelbrot200-12 4.49ms ± 0% 4.47ms ± 0% -0.25% (p=0.000 n=10+8) GoParse-12 3.38ms ± 0% 3.37ms ± 1% ~ (p=0.315 n=8+10) RegexpMatchEasy0_32-12 82.5ns ± 4% 82.0ns ± 0% ~ (p=0.164 n=10+8) RegexpMatchEasy0_1K-12 203ns ± 1% 202ns ± 1% -0.85% (p=0.000 n=9+10) RegexpMatchEasy1_32-12 82.3ns ± 1% 81.1ns ± 0% -1.39% (p=0.000 n=10+8) RegexpMatchEasy1_1K-12 357ns ± 1% 357ns ± 1% ~ (p=0.697 n=8+9) RegexpMatchMedium_32-12 125ns ± 2% 126ns ± 2% ~ (p=0.197 n=10+10) RegexpMatchMedium_1K-12 39.6µs ± 3% 39.6µs ± 1% ~ (p=0.971 n=10+10) RegexpMatchHard_32-12 1.99µs ± 2% 1.99µs ± 4% ~ (p=0.891 n=10+9) RegexpMatchHard_1K-12 60.1µs ± 3% 60.4µs ± 3% ~ (p=0.684 n=10+10) Revcomp-12 531ms ± 6% 441ms ± 0% -16.94% (p=0.000 n=10+9) Template-12 58.9ms ± 1% 58.7ms ± 1% ~ (p=0.315 n=10+10) TimeParse-12 319ns ± 1% 320ns ± 4% ~ (p=0.215 n=9+9) TimeFormat-12 345ns ± 0% 333ns ± 1% -3.36% (p=0.000 n=9+10) [Geo mean] 52.2µs 51.6µs -1.13% On ARM64: name old time/op new time/op delta BinaryTree17-8 8.53s ± 0% 8.36s ± 0% -1.89% (p=0.000 n=10+10) Fannkuch11-8 6.15s ± 0% 6.10s ± 0% -0.67% (p=0.000 n=10+10) FmtFprintfEmpty-8 117ns ± 0% 117ns ± 0% ~ (all equal) FmtFprintfString-8 192ns ± 0% 192ns ± 0% ~ (all equal) FmtFprintfInt-8 198ns ± 0% 198ns ± 0% ~ (p=0.211 n=10+10) FmtFprintfIntInt-8 289ns ± 0% 291ns ± 0% +0.59% (p=0.000 n=7+10) FmtFprintfPrefixedInt-8 320ns ± 2% 317ns ± 0% ~ (p=0.431 n=10+8) FmtFprintfFloat-8 538ns ± 0% 538ns ± 0% ~ (all equal) FmtManyArgs-8 1.17µs ± 1% 1.18µs ± 1% ~ (p=0.063 n=10+10) GobDecode-8 17.0ms ± 1% 17.2ms ± 1% +0.83% (p=0.000 n=10+10) GobEncode-8 14.2ms ± 0% 14.1ms ± 1% -0.78% (p=0.001 n=9+10) Gzip-8 806ms ± 0% 797ms ± 0% -1.12% (p=0.000 n=6+9) Gunzip-8 131ms ± 0% 130ms ± 0% -0.51% (p=0.000 n=10+9) HTTPClientServer-8 206µs ± 9% 212µs ± 2% ~ (p=0.829 n=10+8) JSONEncode-8 40.1ms ± 0% 40.1ms ± 0% ~ (p=0.136 n=9+9) JSONDecode-8 157ms ± 0% 151ms ± 0% -3.32% (p=0.000 n=9+9) Mandelbrot200-8 10.1ms ± 0% 10.1ms ± 0% -0.05% (p=0.000 n=9+8) GoParse-8 8.43ms ± 0% 8.43ms ± 0% ~ (p=0.912 n=10+10) RegexpMatchEasy0_32-8 228ns ± 1% 227ns ± 0% -0.26% (p=0.026 n=10+9) RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.18% (p=0.001 n=7+7) RegexpMatchEasy1_32-8 258ns ± 1% 250ns ± 0% -2.83% (p=0.000 n=10+10) RegexpMatchEasy1_1K-8 2.39µs ± 0% 2.13µs ± 0% -10.94% (p=0.000 n=9+9) RegexpMatchMedium_32-8 352ns ± 0% 351ns ± 0% -0.29% (p=0.004 n=9+10) RegexpMatchMedium_1K-8 104µs ± 0% 105µs ± 0% +0.58% (p=0.000 n=8+9) RegexpMatchHard_32-8 5.84µs ± 0% 5.82µs ± 0% -0.27% (p=0.000 n=9+10) RegexpMatchHard_1K-8 177µs ± 0% 177µs ± 0% -0.07% (p=0.000 n=9+9) Revcomp-8 1.57s ± 1% 1.50s ± 1% -4.60% (p=0.000 n=9+10) Template-8 157ms ± 1% 153ms ± 1% -2.28% (p=0.000 n=10+9) TimeParse-8 779ns ± 1% 770ns ± 1% -1.18% (p=0.013 n=10+10) TimeFormat-8 823ns ± 2% 826ns ± 1% ~ (p=0.324 n=10+9) [Geo mean] 144µs 142µs -1.45% Reduce cmd/go text size by 0.5%. Change-Id: I9288ff983c4a7cf03fc0cb35b9b1750828013117 Reviewed-on: https://go-review.googlesource.com/38457 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
fa1d54c2ed
commit
8a5175df35
@ -241,6 +241,9 @@ type regAllocState struct {
|
||||
// current state of each (preregalloc) Value
|
||||
values []valState
|
||||
|
||||
// ID of SP, SB values
|
||||
sp, sb ID
|
||||
|
||||
// For each Value, map from its value ID back to the
|
||||
// preregalloc Value it was derived from.
|
||||
orig []*Value
|
||||
@ -709,8 +712,8 @@ func (s *regAllocState) compatRegs(t Type) regMask {
|
||||
}
|
||||
|
||||
func (s *regAllocState) regalloc(f *Func) {
|
||||
liveSet := f.newSparseSet(f.NumValues())
|
||||
defer f.retSparseSet(liveSet)
|
||||
regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
|
||||
defer f.retSparseSet(regValLiveSet)
|
||||
var oldSched []*Value
|
||||
var phis []*Value
|
||||
var phiRegs []register
|
||||
@ -733,32 +736,42 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
for _, b := range f.Blocks {
|
||||
s.curBlock = b
|
||||
|
||||
// Initialize liveSet and uses fields for this block.
|
||||
// Initialize regValLiveSet and uses fields for this block.
|
||||
// Walk backwards through the block doing liveness analysis.
|
||||
liveSet.clear()
|
||||
regValLiveSet.clear()
|
||||
for _, e := range s.live[b.ID] {
|
||||
s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
|
||||
liveSet.add(e.ID)
|
||||
regValLiveSet.add(e.ID)
|
||||
}
|
||||
if v := b.Control; v != nil && s.values[v.ID].needReg {
|
||||
s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value
|
||||
liveSet.add(v.ID)
|
||||
regValLiveSet.add(v.ID)
|
||||
}
|
||||
for i := len(b.Values) - 1; i >= 0; i-- {
|
||||
v := b.Values[i]
|
||||
liveSet.remove(v.ID)
|
||||
regValLiveSet.remove(v.ID)
|
||||
if v.Op == OpPhi {
|
||||
// Remove v from the live set, but don't add
|
||||
// any inputs. This is the state the len(b.Preds)>1
|
||||
// case below desires; it wants to process phis specially.
|
||||
continue
|
||||
}
|
||||
if opcodeTable[v.Op].call {
|
||||
// Function call clobbers all the registers but SP and SB.
|
||||
regValLiveSet.clear()
|
||||
if s.sp != 0 && s.values[s.sp].uses != nil {
|
||||
regValLiveSet.add(s.sp)
|
||||
}
|
||||
if s.sb != 0 && s.values[s.sb].uses != nil {
|
||||
regValLiveSet.add(s.sb)
|
||||
}
|
||||
}
|
||||
for _, a := range v.Args {
|
||||
if !s.values[a.ID].needReg {
|
||||
continue
|
||||
}
|
||||
s.addUse(a.ID, int32(i), v.Pos)
|
||||
liveSet.add(a.ID)
|
||||
regValLiveSet.add(a.ID)
|
||||
}
|
||||
}
|
||||
if s.f.pass.debug > regDebug {
|
||||
@ -808,7 +821,7 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
// live but only used by some other successor of p.
|
||||
for r := register(0); r < s.numRegs; r++ {
|
||||
v := s.regs[r].v
|
||||
if v != nil && !liveSet.contains(v.ID) {
|
||||
if v != nil && !regValLiveSet.contains(v.ID) {
|
||||
s.freeReg(r)
|
||||
}
|
||||
}
|
||||
@ -864,7 +877,7 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
continue
|
||||
}
|
||||
a := v.Args[idx]
|
||||
if !liveSet.contains(a.ID) {
|
||||
if !regValLiveSet.contains(a.ID) {
|
||||
// Input is dead beyond the phi, deallocate
|
||||
// anywhere else it might live.
|
||||
s.freeRegs(s.values[a.ID].regs)
|
||||
@ -932,6 +945,17 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
s.assignReg(r, v, v)
|
||||
}
|
||||
|
||||
// Deallocate any values which are no longer live. Phis are excluded.
|
||||
for r := register(0); r < s.numRegs; r++ {
|
||||
if phiUsed>>r&1 != 0 {
|
||||
continue
|
||||
}
|
||||
v := s.regs[r].v
|
||||
if v != nil && !regValLiveSet.contains(v.ID) {
|
||||
s.freeReg(r)
|
||||
}
|
||||
}
|
||||
|
||||
// Save the starting state for use by merge edges.
|
||||
var regList []startReg
|
||||
for r := register(0); r < s.numRegs; r++ {
|
||||
@ -1034,12 +1058,14 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
s.assignReg(s.SPReg, v, v)
|
||||
b.Values = append(b.Values, v)
|
||||
s.advanceUses(v)
|
||||
s.sp = v.ID
|
||||
continue
|
||||
}
|
||||
if v.Op == OpSB {
|
||||
s.assignReg(s.SBReg, v, v)
|
||||
b.Values = append(b.Values, v)
|
||||
s.advanceUses(v)
|
||||
s.sb = v.ID
|
||||
continue
|
||||
}
|
||||
if v.Op == OpSelect0 || v.Op == OpSelect1 {
|
||||
@ -1435,16 +1461,16 @@ func (s *regAllocState) regalloc(f *Func) {
|
||||
s.endRegs[b.ID] = regList
|
||||
|
||||
if checkEnabled {
|
||||
liveSet.clear()
|
||||
regValLiveSet.clear()
|
||||
for _, x := range s.live[b.ID] {
|
||||
liveSet.add(x.ID)
|
||||
regValLiveSet.add(x.ID)
|
||||
}
|
||||
for r := register(0); r < s.numRegs; r++ {
|
||||
v := s.regs[r].v
|
||||
if v == nil {
|
||||
continue
|
||||
}
|
||||
if !liveSet.contains(v.ID) {
|
||||
if !regValLiveSet.contains(v.ID) {
|
||||
s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user