mirror of
https://github.com/golang/go
synced 2024-11-11 22:50:22 -07:00
cmd/compile: use depth first topological sort algorithm for layout
The current layout algorithm tries to put consecutive blocks together, so the priority of the successor block is higher than the priority of the zero indegree block. This algorithm is beneficial for subsequent register allocation, but will result in more branch instructions. The depth-first topological sorting algorithm is a well-known layout algorithm, which has applications in many languages, and it helps to reduce branch instructions. This CL applies it to the layout pass. The test results show that it helps to reduce the code size. This CL also includes the following changes: 1, Removed the primary predecessor mechanism. The new layout algorithm is not very friendly to register allocator in some cases, in order to adapt to the new layout algorithm, a new primary predecessor selection strategy is introduced. 2, Since the new layout implementation may place non-loop blocks between loop blocks, some adaptive modifications have also been made to looprotate pass. 3, The layout also affects the results of codegen, so this CL also adjusted several codegen tests accordingly. It is inevitable that this CL will cause the code size or performance of a few functions to decrease, but the number of cases it improves is much larger than the number of cases it drops. Statistical data from compilecmp on linux/amd64 is as follow: name old time/op new time/op delta Template 382ms ± 4% 382ms ± 4% ~ (p=0.497 n=49+50) Unicode 170ms ± 9% 169ms ± 8% ~ (p=0.344 n=48+50) GoTypes 2.01s ± 4% 2.01s ± 4% ~ (p=0.628 n=50+48) Compiler 190ms ±10% 189ms ± 9% ~ (p=0.734 n=50+50) SSA 11.8s ± 2% 11.8s ± 3% ~ (p=0.877 n=50+50) Flate 241ms ± 9% 241ms ± 8% ~ (p=0.897 n=50+49) GoParser 366ms ± 3% 361ms ± 4% -1.21% (p=0.004 n=47+50) Reflect 835ms ± 3% 838ms ± 3% ~ (p=0.275 n=50+49) Tar 336ms ± 4% 335ms ± 3% ~ (p=0.454 n=48+48) XML 433ms ± 4% 431ms ± 3% ~ (p=0.071 n=49+48) LinkCompiler 706ms ± 4% 705ms ± 4% ~ (p=0.608 n=50+49) ExternalLinkCompiler 1.85s ± 3% 1.83s ± 2% -1.47% (p=0.000 n=49+48) LinkWithoutDebugCompiler 437ms ± 5% 437ms ± 6% ~ (p=0.953 n=49+50) [Geo mean] 615ms 613ms -0.37% name old alloc/op new alloc/op delta Template 38.7MB ± 1% 38.7MB ± 1% ~ (p=0.834 n=50+50) Unicode 28.1MB ± 0% 28.1MB ± 0% -0.22% (p=0.000 n=49+50) GoTypes 168MB ± 1% 168MB ± 1% ~ (p=0.054 n=47+47) Compiler 23.0MB ± 1% 23.0MB ± 1% ~ (p=0.432 n=50+50) SSA 1.54GB ± 0% 1.54GB ± 0% +0.21% (p=0.000 n=50+50) Flate 23.6MB ± 1% 23.6MB ± 1% ~ (p=0.153 n=43+46) GoParser 35.1MB ± 1% 35.1MB ± 2% ~ (p=0.202 n=50+50) Reflect 84.7MB ± 1% 84.7MB ± 1% ~ (p=0.333 n=48+49) Tar 34.5MB ± 1% 34.5MB ± 1% ~ (p=0.406 n=46+49) XML 44.3MB ± 2% 44.2MB ± 3% ~ (p=0.981 n=50+50) LinkCompiler 131MB ± 0% 128MB ± 0% -2.74% (p=0.000 n=50+50) ExternalLinkCompiler 120MB ± 0% 120MB ± 0% +0.01% (p=0.007 n=50+50) LinkWithoutDebugCompiler 77.3MB ± 0% 77.3MB ± 0% -0.02% (p=0.000 n=50+50) [Geo mean] 69.3MB 69.1MB -0.22% file before after Δ % addr2line 4104220 4043684 -60536 -1.475% api 5342502 5249678 -92824 -1.737% asm 4973785 4858257 -115528 -2.323% buildid 2667844 2625660 -42184 -1.581% cgo 4686849 4616313 -70536 -1.505% compile 23667431 23268406 -399025 -1.686% cover 4959676 4874108 -85568 -1.725% dist 3515934 3450422 -65512 -1.863% doc 3995581 3925469 -70112 -1.755% fix 3379202 3318522 -60680 -1.796% link 6743249 6629913 -113336 -1.681% nm 4047529 3991777 -55752 -1.377% objdump 4456151 4388151 -68000 -1.526% pack 2435040 2398072 -36968 -1.518% pprof 13804080 13565808 -238272 -1.726% test2json 2690043 2645987 -44056 -1.638% trace 10418492 10232716 -185776 -1.783% vet 7258259 7121259 -137000 -1.888% total 113145867 111204202 -1941665 -1.716% The situation on linux/arm64 is as follow: name old time/op new time/op delta Template 280ms ± 1% 282ms ± 1% +0.75% (p=0.000 n=46+48) Unicode 124ms ± 2% 124ms ± 2% +0.37% (p=0.045 n=50+50) GoTypes 1.69s ± 1% 1.70s ± 1% +0.56% (p=0.000 n=49+50) Compiler 122ms ± 1% 123ms ± 1% +0.93% (p=0.000 n=50+50) SSA 12.6s ± 1% 12.7s ± 0% +0.72% (p=0.000 n=50+50) Flate 170ms ± 1% 172ms ± 1% +0.97% (p=0.000 n=49+49) GoParser 262ms ± 1% 263ms ± 1% +0.39% (p=0.000 n=49+48) Reflect 639ms ± 1% 650ms ± 1% +1.63% (p=0.000 n=49+49) Tar 243ms ± 1% 245ms ± 1% +0.82% (p=0.000 n=50+50) XML 324ms ± 1% 327ms ± 1% +0.72% (p=0.000 n=50+49) LinkCompiler 597ms ± 1% 596ms ± 1% -0.27% (p=0.001 n=48+47) ExternalLinkCompiler 1.90s ± 1% 1.88s ± 1% -1.00% (p=0.000 n=50+50) LinkWithoutDebugCompiler 364ms ± 1% 363ms ± 1% ~ (p=0.220 n=49+50) [Geo mean] 485ms 488ms +0.49% name old alloc/op new alloc/op delta Template 38.7MB ± 0% 38.8MB ± 1% ~ (p=0.093 n=43+49) Unicode 28.4MB ± 0% 28.4MB ± 0% +0.03% (p=0.000 n=49+45) GoTypes 169MB ± 1% 169MB ± 1% +0.23% (p=0.010 n=50+50) Compiler 23.2MB ± 1% 23.2MB ± 1% +0.11% (p=0.000 n=40+44) SSA 1.54GB ± 0% 1.55GB ± 0% +0.45% (p=0.000 n=47+49) Flate 23.8MB ± 2% 23.8MB ± 1% ~ (p=0.543 n=50+50) GoParser 35.3MB ± 1% 35.4MB ± 1% ~ (p=0.792 n=50+50) Reflect 85.2MB ± 1% 85.2MB ± 0% ~ (p=0.055 n=50+47) Tar 34.5MB ± 1% 34.5MB ± 1% +0.06% (p=0.015 n=50+50) XML 43.8MB ± 2% 43.9MB ± 2% +0.19% (p=0.000 n=48+48) LinkCompiler 137MB ± 0% 136MB ± 0% -0.92% (p=0.000 n=50+50) ExternalLinkCompiler 127MB ± 0% 127MB ± 0% ~ (p=0.516 n=50+50) LinkWithoutDebugCompiler 84.0MB ± 0% 84.0MB ± 0% ~ (p=0.057 n=50+50) [Geo mean] 70.4MB 70.4MB +0.01% file before after Δ % addr2line 4021557 4002933 -18624 -0.463% api 5127847 5028503 -99344 -1.937% asm 5034716 4936836 -97880 -1.944% buildid 2608118 2594094 -14024 -0.538% cgo 4488592 4398320 -90272 -2.011% compile 22501129 22213592 -287537 -1.278% cover 4742301 4713573 -28728 -0.606% dist 3388071 3365311 -22760 -0.672% doc 3802250 3776082 -26168 -0.688% fix 3306147 3216939 -89208 -2.698% link 6404483 6363699 -40784 -0.637% nm 3941026 3921930 -19096 -0.485% objdump 4383330 4295122 -88208 -2.012% pack 2404547 2389515 -15032 -0.625% pprof 12996234 12856818 -139416 -1.073% test2json 2668500 2586788 -81712 -3.062% trace 9816276 9609580 -206696 -2.106% vet 6900682 6787338 -113344 -1.643% total 108535806 107056973 -1478833 -1.363% Change-Id: Iaec1cdcaacca8025e9babb0fb8a532fddb70c87d Reviewed-on: https://go-review.googlesource.com/c/go/+/255239 Reviewed-by: eric fang <eric.fang@arm.com> Reviewed-by: Keith Randall <khr@golang.org> Trust: eric fang <eric.fang@arm.com>
This commit is contained in:
parent
051bf37833
commit
600259b099
@ -358,6 +358,22 @@ func (b *Block) AuxIntString() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// likelyBranch reports whether block b is the likely branch of all of its predecessors.
|
||||||
|
func (b *Block) likelyBranch() bool {
|
||||||
|
if len(b.Preds) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, e := range b.Preds {
|
||||||
|
p := e.b
|
||||||
|
if len(p.Succs) == 1 || len(p.Succs) == 2 && (p.Likely == BranchLikely && p.Succs[0].b == b ||
|
||||||
|
p.Likely == BranchUnlikely && p.Succs[1].b == b) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func (b *Block) Logf(msg string, args ...interface{}) { b.Func.Logf(msg, args...) }
|
func (b *Block) Logf(msg string, args ...interface{}) { b.Func.Logf(msg, args...) }
|
||||||
func (b *Block) Log() bool { return b.Func.Log() }
|
func (b *Block) Log() bool { return b.Func.Log() }
|
||||||
func (b *Block) Fatalf(msg string, args ...interface{}) { b.Func.Fatalf(msg, args...) }
|
func (b *Block) Fatalf(msg string, args ...interface{}) { b.Func.Fatalf(msg, args...) }
|
||||||
|
@ -41,8 +41,13 @@ func layoutOrder(f *Func) []*Block {
|
|||||||
indegree := make([]int, f.NumBlocks())
|
indegree := make([]int, f.NumBlocks())
|
||||||
posdegree := f.newSparseSet(f.NumBlocks()) // blocks with positive remaining degree
|
posdegree := f.newSparseSet(f.NumBlocks()) // blocks with positive remaining degree
|
||||||
defer f.retSparseSet(posdegree)
|
defer f.retSparseSet(posdegree)
|
||||||
zerodegree := f.newSparseSet(f.NumBlocks()) // blocks with zero remaining degree
|
// blocks with zero remaining degree. Use slice to simulate a LIFO queue to implement
|
||||||
defer f.retSparseSet(zerodegree)
|
// the depth-first topology sorting algorithm.
|
||||||
|
var zerodegree []ID
|
||||||
|
// LIFO queue. Track the successor blocks of the scheduled block so that when we
|
||||||
|
// encounter loops, we choose to schedule the successor block of the most recently
|
||||||
|
// scheduled block.
|
||||||
|
var succs []ID
|
||||||
exit := f.newSparseSet(f.NumBlocks()) // exit blocks
|
exit := f.newSparseSet(f.NumBlocks()) // exit blocks
|
||||||
defer f.retSparseSet(exit)
|
defer f.retSparseSet(exit)
|
||||||
|
|
||||||
@ -88,7 +93,8 @@ func layoutOrder(f *Func) []*Block {
|
|||||||
}
|
}
|
||||||
indegree[b.ID] = len(b.Preds)
|
indegree[b.ID] = len(b.Preds)
|
||||||
if len(b.Preds) == 0 {
|
if len(b.Preds) == 0 {
|
||||||
zerodegree.add(b.ID)
|
// Push an element to the tail of the queue.
|
||||||
|
zerodegree = append(zerodegree, b.ID)
|
||||||
} else {
|
} else {
|
||||||
posdegree.add(b.ID)
|
posdegree.add(b.ID)
|
||||||
}
|
}
|
||||||
@ -105,12 +111,24 @@ blockloop:
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, e := range b.Succs {
|
// Here, the order of traversing the b.Succs affects the direction in which the topological
|
||||||
c := e.b
|
// sort advances in depth. Take the following cfg as an example, regardless of other factors.
|
||||||
|
// b1
|
||||||
|
// 0/ \1
|
||||||
|
// b2 b3
|
||||||
|
// Traverse b.Succs in order, the right child node b3 will be scheduled immediately after
|
||||||
|
// b1, traverse b.Succs in reverse order, the left child node b2 will be scheduled
|
||||||
|
// immediately after b1. The test results show that reverse traversal performs a little
|
||||||
|
// better.
|
||||||
|
// Note: You need to consider both layout and register allocation when testing performance.
|
||||||
|
for i := len(b.Succs) - 1; i >= 0; i-- {
|
||||||
|
c := b.Succs[i].b
|
||||||
indegree[c.ID]--
|
indegree[c.ID]--
|
||||||
if indegree[c.ID] == 0 {
|
if indegree[c.ID] == 0 {
|
||||||
posdegree.remove(c.ID)
|
posdegree.remove(c.ID)
|
||||||
zerodegree.add(c.ID)
|
zerodegree = append(zerodegree, c.ID)
|
||||||
|
} else {
|
||||||
|
succs = append(succs, c.ID)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,30 +150,30 @@ blockloop:
|
|||||||
|
|
||||||
// Use degree for now.
|
// Use degree for now.
|
||||||
bid = 0
|
bid = 0
|
||||||
mindegree := f.NumBlocks()
|
|
||||||
for _, e := range order[len(order)-1].Succs {
|
|
||||||
c := e.b
|
|
||||||
if scheduled[c.ID] || c.Kind == BlockExit {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if indegree[c.ID] < mindegree {
|
|
||||||
mindegree = indegree[c.ID]
|
|
||||||
bid = c.ID
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if bid != 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// TODO: improve this part
|
// TODO: improve this part
|
||||||
// No successor of the previously scheduled block works.
|
// No successor of the previously scheduled block works.
|
||||||
// Pick a zero-degree block if we can.
|
// Pick a zero-degree block if we can.
|
||||||
for zerodegree.size() > 0 {
|
for len(zerodegree) > 0 {
|
||||||
cid := zerodegree.pop()
|
// Pop an element from the tail of the queue.
|
||||||
|
cid := zerodegree[len(zerodegree)-1]
|
||||||
|
zerodegree = zerodegree[:len(zerodegree)-1]
|
||||||
if !scheduled[cid] {
|
if !scheduled[cid] {
|
||||||
bid = cid
|
bid = cid
|
||||||
continue blockloop
|
continue blockloop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Still nothing, pick the unscheduled successor block encountered most recently.
|
||||||
|
for len(succs) > 0 {
|
||||||
|
// Pop an element from the tail of the queue.
|
||||||
|
cid := succs[len(succs)-1]
|
||||||
|
succs = succs[:len(succs)-1]
|
||||||
|
if !scheduled[cid] {
|
||||||
|
bid = cid
|
||||||
|
continue blockloop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Still nothing, pick any non-exit block.
|
// Still nothing, pick any non-exit block.
|
||||||
for posdegree.size() > 0 {
|
for posdegree.size() > 0 {
|
||||||
cid := posdegree.pop()
|
cid := posdegree.pop()
|
||||||
|
@ -68,12 +68,15 @@ func loopRotate(f *Func) {
|
|||||||
if nextb == p { // original loop predecessor is next
|
if nextb == p { // original loop predecessor is next
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if loopnest.b2l[nextb.ID] != loop { // about to leave loop
|
if loopnest.b2l[nextb.ID] == loop {
|
||||||
break
|
after[p.ID] = append(after[p.ID], nextb)
|
||||||
}
|
}
|
||||||
after[p.ID] = append(after[p.ID], nextb)
|
|
||||||
b = nextb
|
b = nextb
|
||||||
}
|
}
|
||||||
|
// Swap b and p so that we'll handle p before b when moving blocks.
|
||||||
|
f.Blocks[idToIdx[loop.header.ID]] = p
|
||||||
|
f.Blocks[idToIdx[p.ID]] = loop.header
|
||||||
|
idToIdx[loop.header.ID], idToIdx[p.ID] = idToIdx[p.ID], idToIdx[loop.header.ID]
|
||||||
|
|
||||||
// Place b after p.
|
// Place b after p.
|
||||||
for _, b := range after[p.ID] {
|
for _, b := range after[p.ID] {
|
||||||
@ -86,21 +89,23 @@ func loopRotate(f *Func) {
|
|||||||
// before the rest of the loop. And that relies on the
|
// before the rest of the loop. And that relies on the
|
||||||
// fact that we only identify reducible loops.
|
// fact that we only identify reducible loops.
|
||||||
j := 0
|
j := 0
|
||||||
for i, b := range f.Blocks {
|
// Some blocks that are not part of a loop may be placed
|
||||||
|
// between loop blocks. In order to avoid these blocks from
|
||||||
|
// being overwritten, use a temporary slice.
|
||||||
|
newOrder := make([]*Block, 0, f.NumBlocks())
|
||||||
|
for _, b := range f.Blocks {
|
||||||
if _, ok := move[b.ID]; ok {
|
if _, ok := move[b.ID]; ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
f.Blocks[j] = b
|
newOrder = append(newOrder, b)
|
||||||
j++
|
j++
|
||||||
for _, a := range after[b.ID] {
|
for _, a := range after[b.ID] {
|
||||||
if j > i {
|
newOrder = append(newOrder, a)
|
||||||
f.Fatalf("head before tail in loop %s", b)
|
|
||||||
}
|
|
||||||
f.Blocks[j] = a
|
|
||||||
j++
|
j++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if j != len(f.Blocks) {
|
if j != len(f.Blocks) {
|
||||||
f.Fatalf("bad reordering in looprotate")
|
f.Fatalf("bad reordering in looprotate")
|
||||||
}
|
}
|
||||||
|
f.Blocks = newOrder
|
||||||
}
|
}
|
||||||
|
@ -241,12 +241,6 @@ type regAllocState struct {
|
|||||||
GReg register
|
GReg register
|
||||||
allocatable regMask
|
allocatable regMask
|
||||||
|
|
||||||
// for each block, its primary predecessor.
|
|
||||||
// A predecessor of b is primary if it is the closest
|
|
||||||
// predecessor that appears before b in the layout order.
|
|
||||||
// We record the index in the Preds list where the primary predecessor sits.
|
|
||||||
primary []int32
|
|
||||||
|
|
||||||
// live values at the end of each block. live[b.ID] is a list of value IDs
|
// live values at the end of each block. live[b.ID] is a list of value IDs
|
||||||
// which are live at the end of b, together with a count of how many instructions
|
// which are live at the end of b, together with a count of how many instructions
|
||||||
// forward to the next use.
|
// forward to the next use.
|
||||||
@ -304,6 +298,9 @@ type regAllocState struct {
|
|||||||
|
|
||||||
// choose a good order in which to visit blocks for allocation purposes.
|
// choose a good order in which to visit blocks for allocation purposes.
|
||||||
visitOrder []*Block
|
visitOrder []*Block
|
||||||
|
|
||||||
|
// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
|
||||||
|
blockOrder []int32
|
||||||
}
|
}
|
||||||
|
|
||||||
type endReg struct {
|
type endReg struct {
|
||||||
@ -636,9 +633,9 @@ func (s *regAllocState) init(f *Func) {
|
|||||||
|
|
||||||
// Compute block order. This array allows us to distinguish forward edges
|
// Compute block order. This array allows us to distinguish forward edges
|
||||||
// from backward edges and compute how far they go.
|
// from backward edges and compute how far they go.
|
||||||
blockOrder := make([]int32, f.NumBlocks())
|
s.blockOrder = make([]int32, f.NumBlocks())
|
||||||
for i, b := range s.visitOrder {
|
for i, b := range s.visitOrder {
|
||||||
blockOrder[b.ID] = int32(i)
|
s.blockOrder[b.ID] = int32(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
s.regs = make([]regState, s.numRegs)
|
s.regs = make([]regState, s.numRegs)
|
||||||
@ -664,22 +661,6 @@ func (s *regAllocState) init(f *Func) {
|
|||||||
}
|
}
|
||||||
s.computeLive()
|
s.computeLive()
|
||||||
|
|
||||||
// Compute primary predecessors.
|
|
||||||
s.primary = make([]int32, f.NumBlocks())
|
|
||||||
for _, b := range s.visitOrder {
|
|
||||||
best := -1
|
|
||||||
for i, e := range b.Preds {
|
|
||||||
p := e.b
|
|
||||||
if blockOrder[p.ID] >= blockOrder[b.ID] {
|
|
||||||
continue // backward edge
|
|
||||||
}
|
|
||||||
if best == -1 || blockOrder[p.ID] > blockOrder[b.Preds[best].b.ID] {
|
|
||||||
best = i
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s.primary[b.ID] = int32(best)
|
|
||||||
}
|
|
||||||
|
|
||||||
s.endRegs = make([][]endReg, f.NumBlocks())
|
s.endRegs = make([][]endReg, f.NumBlocks())
|
||||||
s.startRegs = make([][]startReg, f.NumBlocks())
|
s.startRegs = make([][]startReg, f.NumBlocks())
|
||||||
s.spillLive = make([][]ID, f.NumBlocks())
|
s.spillLive = make([][]ID, f.NumBlocks())
|
||||||
@ -957,10 +938,49 @@ func (s *regAllocState) regalloc(f *Func) {
|
|||||||
// This is the complicated case. We have more than one predecessor,
|
// This is the complicated case. We have more than one predecessor,
|
||||||
// which means we may have Phi ops.
|
// which means we may have Phi ops.
|
||||||
|
|
||||||
// Start with the final register state of the primary predecessor
|
// Start with the final register state of the predecessor with least spill values.
|
||||||
idx := s.primary[b.ID]
|
// This is based on the following points:
|
||||||
|
// 1, The less spill value indicates that the register pressure of this path is smaller,
|
||||||
|
// so the values of this block are more likely to be allocated to registers.
|
||||||
|
// 2, Avoid the predecessor that contains the function call, because the predecessor that
|
||||||
|
// contains the function call usually generates a lot of spills and lose the previous
|
||||||
|
// allocation state.
|
||||||
|
// TODO: Improve this part. At least the size of endRegs of the predecessor also has
|
||||||
|
// an impact on the code size and compiler speed. But it is not easy to find a simple
|
||||||
|
// and efficient method that combines multiple factors.
|
||||||
|
idx := -1
|
||||||
|
for i, p := range b.Preds {
|
||||||
|
// If the predecessor has not been visited yet, skip it because its end state
|
||||||
|
// (redRegs and spillLive) has not been computed yet.
|
||||||
|
pb := p.b
|
||||||
|
if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if idx == -1 {
|
||||||
|
idx = i
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pSel := b.Preds[idx].b
|
||||||
|
if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
|
||||||
|
idx = i
|
||||||
|
} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
|
||||||
|
// Use a bit of likely information. After critical pass, pb and pSel must
|
||||||
|
// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
|
||||||
|
// TODO: improve the prediction of the likely predecessor. The following
|
||||||
|
// method is only suitable for the simplest cases. For complex cases,
|
||||||
|
// the prediction may be inaccurate, but this does not affect the
|
||||||
|
// correctness of the program.
|
||||||
|
// According to the layout algorithm, the predecessor with the
|
||||||
|
// smaller blockOrder is the true branch, and the test results show
|
||||||
|
// that it is better to choose the predecessor with a smaller
|
||||||
|
// blockOrder than no choice.
|
||||||
|
if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
|
||||||
|
idx = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if idx < 0 {
|
if idx < 0 {
|
||||||
f.Fatalf("block with no primary predecessor %s", b)
|
f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
|
||||||
}
|
}
|
||||||
p := b.Preds[idx].b
|
p := b.Preds[idx].b
|
||||||
s.setState(s.endRegs[p.ID])
|
s.setState(s.endRegs[p.ID])
|
||||||
@ -1048,7 +1068,7 @@ func (s *regAllocState) regalloc(f *Func) {
|
|||||||
// If one of the other inputs of v is in a register, and the register is available,
|
// If one of the other inputs of v is in a register, and the register is available,
|
||||||
// select this register, which can save some unnecessary copies.
|
// select this register, which can save some unnecessary copies.
|
||||||
for i, pe := range b.Preds {
|
for i, pe := range b.Preds {
|
||||||
if int32(i) == idx {
|
if i == idx {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
ri := noRegister
|
ri := noRegister
|
||||||
|
@ -322,6 +322,9 @@ func NoFix64A(divr int64) (int64, int64) {
|
|||||||
if divr > 5 {
|
if divr > 5 {
|
||||||
d /= divr // amd64:-"JMP"
|
d /= divr // amd64:-"JMP"
|
||||||
e %= divr // amd64:-"JMP"
|
e %= divr // amd64:-"JMP"
|
||||||
|
// The following statement is to avoid conflict between the above check
|
||||||
|
// and the normal JMP generated at the end of the block.
|
||||||
|
d += e
|
||||||
}
|
}
|
||||||
return d, e
|
return d, e
|
||||||
}
|
}
|
||||||
@ -333,6 +336,7 @@ func NoFix64B(divd int64) (int64, int64) {
|
|||||||
if divd > -9223372036854775808 {
|
if divd > -9223372036854775808 {
|
||||||
d = divd / divr // amd64:-"JMP"
|
d = divd / divr // amd64:-"JMP"
|
||||||
e = divd % divr // amd64:-"JMP"
|
e = divd % divr // amd64:-"JMP"
|
||||||
|
d += e
|
||||||
}
|
}
|
||||||
return d, e
|
return d, e
|
||||||
}
|
}
|
||||||
@ -347,6 +351,7 @@ func NoFix32A(divr int32) (int32, int32) {
|
|||||||
// amd64:-"JMP"
|
// amd64:-"JMP"
|
||||||
// 386:-"JMP"
|
// 386:-"JMP"
|
||||||
e %= divr
|
e %= divr
|
||||||
|
d += e
|
||||||
}
|
}
|
||||||
return d, e
|
return d, e
|
||||||
}
|
}
|
||||||
@ -362,6 +367,7 @@ func NoFix32B(divd int32) (int32, int32) {
|
|||||||
// amd64:-"JMP"
|
// amd64:-"JMP"
|
||||||
// 386:-"JMP"
|
// 386:-"JMP"
|
||||||
e = divd % divr
|
e = divd % divr
|
||||||
|
d += e
|
||||||
}
|
}
|
||||||
return d, e
|
return d, e
|
||||||
}
|
}
|
||||||
@ -376,6 +382,7 @@ func NoFix16A(divr int16) (int16, int16) {
|
|||||||
// amd64:-"JMP"
|
// amd64:-"JMP"
|
||||||
// 386:-"JMP"
|
// 386:-"JMP"
|
||||||
e %= divr
|
e %= divr
|
||||||
|
d += e
|
||||||
}
|
}
|
||||||
return d, e
|
return d, e
|
||||||
}
|
}
|
||||||
@ -391,6 +398,7 @@ func NoFix16B(divd int16) (int16, int16) {
|
|||||||
// amd64:-"JMP"
|
// amd64:-"JMP"
|
||||||
// 386:-"JMP"
|
// 386:-"JMP"
|
||||||
e = divd % divr
|
e = divd % divr
|
||||||
|
d += e
|
||||||
}
|
}
|
||||||
return d, e
|
return d, e
|
||||||
}
|
}
|
||||||
|
@ -426,7 +426,7 @@ func UintGeqZero(a uint8, b uint16, c uint32, d uint64) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func UintGtZero(a uint8, b uint16, c uint32, d uint64) int {
|
func UintGtZero(a uint8, b uint16, c uint32, d uint64) int {
|
||||||
// arm64: `CBZW`, `CBNZW`, `CBNZ`, -`(CMPW|CMP|BLS|BHI)`
|
// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLS|BHI)`
|
||||||
if a > 0 || b > 0 || c > 0 || d > 0 {
|
if a > 0 || b > 0 || c > 0 || d > 0 {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
@ -434,7 +434,7 @@ func UintGtZero(a uint8, b uint16, c uint32, d uint64) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func UintLeqZero(a uint8, b uint16, c uint32, d uint64) int {
|
func UintLeqZero(a uint8, b uint16, c uint32, d uint64) int {
|
||||||
// arm64: `CBNZW`, `CBZW`, `CBZ`, -`(CMPW|CMP|BHI|BLS)`
|
// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHI|BLS)`
|
||||||
if a <= 0 || b <= 0 || c <= 0 || d <= 0 {
|
if a <= 0 || b <= 0 || c <= 0 || d <= 0 {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
@ -442,7 +442,7 @@ func UintLeqZero(a uint8, b uint16, c uint32, d uint64) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func UintLtOne(a uint8, b uint16, c uint32, d uint64) int {
|
func UintLtOne(a uint8, b uint16, c uint32, d uint64) int {
|
||||||
// arm64: `CBNZW`, `CBZW`, `CBZW`, `CBZ`, -`(CMPW|CMP|BHS|BLO)`
|
// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BHS|BLO)`
|
||||||
if a < 1 || b < 1 || c < 1 || d < 1 {
|
if a < 1 || b < 1 || c < 1 || d < 1 {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
@ -450,7 +450,7 @@ func UintLtOne(a uint8, b uint16, c uint32, d uint64) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func UintGeqOne(a uint8, b uint16, c uint32, d uint64) int {
|
func UintGeqOne(a uint8, b uint16, c uint32, d uint64) int {
|
||||||
// arm64: `CBZW`, `CBNZW`, `CBNZ`, -`(CMPW|CMP|BLO|BHS)`
|
// arm64: `(CBN?ZW)`, `(CBN?Z[^W])`, -`(CMPW|CMP|BLO|BHS)`
|
||||||
if a >= 1 || b >= 1 || c >= 1 || d >= 1 {
|
if a >= 1 || b >= 1 || c >= 1 || d >= 1 {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user