mirror of
https://github.com/golang/go
synced 2024-11-05 15:16:11 -07:00
cmd/compile: Use Sreedhar+Gao phi building algorithm
Should be more asymptotically happy. We process each variable in turn to find all the locations where it needs a phi (the dominance frontier of all of its definitions). Then we add all those phis. This takes O(n * #variables), although hopefully much less. Then we do a single tree walk to match all the FwdRefs with the nearest definition or phi. This takes O(n) time. The one remaining inefficiency is that we might end up introducing a bunch of dead phis in the first step. A TODO is to introduce phis only where they might be used by a read. The old algorithm is still faster on small functions, so there's a cutover size (currently 500 blocks). This algorithm supercedes the David's sparse phi placement algorithm for large functions. Lowers compile time of example from #14934 from ~10 sec to ~4 sec. Lowers compile time of example from #16361 from ~4.5 sec to ~3 sec. Lowers #16407 from ~20 min to ~30 sec. Update #14934 Update #16361 Fixes #16407 Change-Id: I1cff6364e1623c143190b6a924d7599e309db58f Reviewed-on: https://go-review.googlesource.com/30163 Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
d0e92f61e5
commit
5a6e511c61
521
src/cmd/compile/internal/gc/phi.go
Normal file
521
src/cmd/compile/internal/gc/phi.go
Normal file
@ -0,0 +1,521 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gc
|
||||
|
||||
import (
|
||||
"cmd/compile/internal/ssa"
|
||||
"container/heap"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// This file contains the algorithm to place phi nodes in a function.
|
||||
// For small functions, we use Braun, Buchwald, Hack, Leißa, Mallon, and Zwinkau.
|
||||
// http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf
|
||||
// For large functions, we use Sreedhar & Gao: A Linear Time Algorithm for Placing Φ-Nodes.
|
||||
// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.8.1979&rep=rep1&type=pdf
|
||||
|
||||
const smallBlocks = 500
|
||||
|
||||
const debugPhi = false
|
||||
|
||||
// insertPhis finds all the places in the function where a phi is
|
||||
// necessary and inserts them.
|
||||
// Uses FwdRef ops to find all uses of variables, and s.defvars to find
|
||||
// all definitions.
|
||||
// Phi values are inserted, and all FwdRefs are changed to a Copy
|
||||
// of the appropriate phi or definition.
|
||||
// TODO: make this part of cmd/compile/internal/ssa somehow?
|
||||
func (s *state) insertPhis() {
|
||||
if len(s.f.Blocks) <= smallBlocks && false {
|
||||
sps := simplePhiState{s: s, f: s.f, defvars: s.defvars}
|
||||
sps.insertPhis()
|
||||
return
|
||||
}
|
||||
ps := phiState{s: s, f: s.f, defvars: s.defvars}
|
||||
ps.insertPhis()
|
||||
}
|
||||
|
||||
type phiState struct {
|
||||
s *state // SSA state
|
||||
f *ssa.Func // function to work on
|
||||
defvars []map[*Node]*ssa.Value // defined variables at end of each block
|
||||
|
||||
varnum map[*Node]int32 // variable numbering
|
||||
|
||||
// properties of the dominator tree
|
||||
idom []*ssa.Block // dominator parents
|
||||
tree []domBlock // dominator child+sibling
|
||||
level []int32 // level in dominator tree (0 = root or unreachable, 1 = children of root, ...)
|
||||
|
||||
// scratch locations
|
||||
priq blockHeap // priority queue of blocks, higher level (toward leaves) = higher priority
|
||||
q []*ssa.Block // inner loop queue
|
||||
queued *sparseSet // has been put in q
|
||||
hasPhi *sparseSet // has a phi
|
||||
hasDef *sparseSet // has a write of the variable we're processing
|
||||
|
||||
// miscellaneous
|
||||
placeholder *ssa.Value // dummy value to use as a "not set yet" placeholder.
|
||||
}
|
||||
|
||||
func (s *phiState) insertPhis() {
|
||||
if debugPhi {
|
||||
fmt.Println(s.f.String())
|
||||
}
|
||||
|
||||
// Find all the variables for which we need to match up reads & writes.
|
||||
// This step prunes any basic-block-only variables from consideration.
|
||||
// Generate a numbering for these variables.
|
||||
s.varnum = map[*Node]int32{}
|
||||
var vars []*Node
|
||||
var vartypes []ssa.Type
|
||||
for _, b := range s.f.Blocks {
|
||||
for _, v := range b.Values {
|
||||
if v.Op != ssa.OpFwdRef {
|
||||
continue
|
||||
}
|
||||
var_ := v.Aux.(*Node)
|
||||
|
||||
// Optimization: look back 1 block for the definition.
|
||||
if len(b.Preds) == 1 {
|
||||
c := b.Preds[0].Block()
|
||||
if w := s.defvars[c.ID][var_]; w != nil {
|
||||
v.Op = ssa.OpCopy
|
||||
v.Aux = nil
|
||||
v.AddArg(w)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := s.varnum[var_]; ok {
|
||||
continue
|
||||
}
|
||||
s.varnum[var_] = int32(len(vartypes))
|
||||
if debugPhi {
|
||||
fmt.Printf("var%d = %v\n", len(vartypes), var_)
|
||||
}
|
||||
vars = append(vars, var_)
|
||||
vartypes = append(vartypes, v.Type)
|
||||
}
|
||||
}
|
||||
|
||||
if len(vartypes) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Find all definitions of the variables we need to process.
|
||||
// defs[n] contains all the blocks in which variable number n is assigned.
|
||||
defs := make([][]*ssa.Block, len(vartypes))
|
||||
for _, b := range s.f.Blocks {
|
||||
for var_ := range s.defvars[b.ID] { // TODO: encode defvars some other way (explicit ops)? make defvars[n] a slice instead of a map.
|
||||
if n, ok := s.varnum[var_]; ok {
|
||||
defs[n] = append(defs[n], b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make dominator tree.
|
||||
s.idom = s.f.Idom()
|
||||
s.tree = make([]domBlock, s.f.NumBlocks())
|
||||
for _, b := range s.f.Blocks {
|
||||
p := s.idom[b.ID]
|
||||
if p != nil {
|
||||
s.tree[b.ID].sibling = s.tree[p.ID].firstChild
|
||||
s.tree[p.ID].firstChild = b
|
||||
}
|
||||
}
|
||||
// Compute levels in dominator tree.
|
||||
// With parent pointers we can do a depth-first walk without
|
||||
// any auxiliary storage.
|
||||
s.level = make([]int32, s.f.NumBlocks())
|
||||
b := s.f.Entry
|
||||
levels:
|
||||
for {
|
||||
if p := s.idom[b.ID]; p != nil {
|
||||
s.level[b.ID] = s.level[p.ID] + 1
|
||||
if debugPhi {
|
||||
fmt.Printf("level %s = %d\n", b, s.level[b.ID])
|
||||
}
|
||||
}
|
||||
if c := s.tree[b.ID].firstChild; c != nil {
|
||||
b = c
|
||||
continue
|
||||
}
|
||||
for {
|
||||
if c := s.tree[b.ID].sibling; c != nil {
|
||||
b = c
|
||||
continue levels
|
||||
}
|
||||
b = s.idom[b.ID]
|
||||
if b == nil {
|
||||
break levels
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate scratch locations.
|
||||
s.priq.level = s.level
|
||||
s.q = make([]*ssa.Block, 0, s.f.NumBlocks())
|
||||
s.queued = newSparseSet(s.f.NumBlocks())
|
||||
s.hasPhi = newSparseSet(s.f.NumBlocks())
|
||||
s.hasDef = newSparseSet(s.f.NumBlocks())
|
||||
s.placeholder = s.s.entryNewValue0(ssa.OpUnknown, ssa.TypeInvalid)
|
||||
|
||||
// Generate phi ops for each variable.
|
||||
for n := range vartypes {
|
||||
s.insertVarPhis(n, vars[n], defs[n], vartypes[n])
|
||||
}
|
||||
|
||||
// Resolve FwdRefs to the correct write or phi.
|
||||
s.resolveFwdRefs()
|
||||
|
||||
// Erase variable numbers stored in AuxInt fields of phi ops. They are no longer needed.
|
||||
for _, b := range s.f.Blocks {
|
||||
for _, v := range b.Values {
|
||||
if v.Op == ssa.OpPhi {
|
||||
v.AuxInt = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *phiState) insertVarPhis(n int, var_ *Node, defs []*ssa.Block, typ ssa.Type) {
|
||||
priq := &s.priq
|
||||
q := s.q
|
||||
queued := s.queued
|
||||
queued.clear()
|
||||
hasPhi := s.hasPhi
|
||||
hasPhi.clear()
|
||||
hasDef := s.hasDef
|
||||
hasDef.clear()
|
||||
|
||||
// Add defining blocks to priority queue.
|
||||
for _, b := range defs {
|
||||
priq.a = append(priq.a, b)
|
||||
hasDef.add(b.ID)
|
||||
if debugPhi {
|
||||
fmt.Printf("def of var%d in %s\n", n, b)
|
||||
}
|
||||
}
|
||||
heap.Init(priq)
|
||||
|
||||
// Visit blocks defining variable n, from deepest to shallowest.
|
||||
for len(priq.a) > 0 {
|
||||
currentRoot := heap.Pop(priq).(*ssa.Block)
|
||||
if debugPhi {
|
||||
fmt.Printf("currentRoot %s\n", currentRoot)
|
||||
}
|
||||
// Walk subtree below definition.
|
||||
// Skip subtrees we've done in previous iterations.
|
||||
// Find edges exiting tree dominated by definition (the dominance frontier).
|
||||
// Insert phis at target blocks.
|
||||
if queued.contains(currentRoot.ID) {
|
||||
s.s.Fatalf("root already in queue")
|
||||
}
|
||||
q = append(q, currentRoot)
|
||||
queued.add(currentRoot.ID)
|
||||
for len(q) > 0 {
|
||||
b := q[len(q)-1]
|
||||
q = q[:len(q)-1]
|
||||
if debugPhi {
|
||||
fmt.Printf(" processing %s\n", b)
|
||||
}
|
||||
|
||||
for _, e := range b.Succs {
|
||||
c := e.Block()
|
||||
// TODO: if the variable is dead at c, skip it.
|
||||
if s.level[c.ID] > s.level[currentRoot.ID] {
|
||||
// a D-edge, or an edge whose target is in currentRoot's subtree.
|
||||
continue
|
||||
}
|
||||
if !hasPhi.contains(c.ID) {
|
||||
// Add a phi to block c for variable n.
|
||||
hasPhi.add(c.ID)
|
||||
v := c.NewValue0I(currentRoot.Line, ssa.OpPhi, typ, int64(n)) // TODO: line number right?
|
||||
// Note: we store the variable number in the phi's AuxInt field. Used temporarily by phi building.
|
||||
s.s.addNamedValue(var_, v)
|
||||
for i := 0; i < len(c.Preds); i++ {
|
||||
v.AddArg(s.placeholder) // Actual args will be filled in by resolveFwdRefs.
|
||||
}
|
||||
if debugPhi {
|
||||
fmt.Printf("new phi for var%d in %s: %s\n", n, c, v)
|
||||
}
|
||||
if !hasDef.contains(c.ID) {
|
||||
// There's now a new definition of this variable in block c.
|
||||
// Add it to the priority queue to explore.
|
||||
heap.Push(priq, c)
|
||||
hasDef.add(c.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Visit children if they have not been visited yet.
|
||||
for c := s.tree[b.ID].firstChild; c != nil; c = s.tree[c.ID].sibling {
|
||||
if !queued.contains(c.ID) {
|
||||
q = append(q, c)
|
||||
queued.add(c.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// resolveFwdRefs links all FwdRef uses up to their nearest dominating definition.
|
||||
func (s *phiState) resolveFwdRefs() {
|
||||
// Do a depth-first walk of the dominator tree, keeping track
|
||||
// of the most-recently-seen value for each variable.
|
||||
|
||||
// Map from variable ID to SSA value at the current point of the walk.
|
||||
values := make([]*ssa.Value, len(s.varnum))
|
||||
for i := range values {
|
||||
values[i] = s.placeholder
|
||||
}
|
||||
|
||||
// Stack of work to do.
|
||||
type stackEntry struct {
|
||||
b *ssa.Block // block to explore
|
||||
|
||||
// variable/value pair to reinstate on exit
|
||||
n int32 // variable ID
|
||||
v *ssa.Value
|
||||
|
||||
// Note: only one of b or n,v will be set.
|
||||
}
|
||||
var stk []stackEntry
|
||||
|
||||
stk = append(stk, stackEntry{b: s.f.Entry})
|
||||
for len(stk) > 0 {
|
||||
work := stk[len(stk)-1]
|
||||
stk = stk[:len(stk)-1]
|
||||
|
||||
b := work.b
|
||||
if b == nil {
|
||||
// On exit from a block, this case will undo any assignments done below.
|
||||
values[work.n] = work.v
|
||||
continue
|
||||
}
|
||||
|
||||
// Process phis as new defs. They come before FwdRefs in this block.
|
||||
for _, v := range b.Values {
|
||||
if v.Op != ssa.OpPhi {
|
||||
continue
|
||||
}
|
||||
n := int32(v.AuxInt)
|
||||
// Remember the old assignment so we can undo it when we exit b.
|
||||
stk = append(stk, stackEntry{n: n, v: values[n]})
|
||||
// Record the new assignment.
|
||||
values[n] = v
|
||||
}
|
||||
|
||||
// Replace a FwdRef op with the current incoming value for its variable.
|
||||
for _, v := range b.Values {
|
||||
if v.Op != ssa.OpFwdRef {
|
||||
continue
|
||||
}
|
||||
n := s.varnum[v.Aux.(*Node)]
|
||||
v.Op = ssa.OpCopy
|
||||
v.Aux = nil
|
||||
v.AddArg(values[n])
|
||||
}
|
||||
|
||||
// Establish values for variables defined in b.
|
||||
for var_, v := range s.defvars[b.ID] {
|
||||
n, ok := s.varnum[var_]
|
||||
if !ok {
|
||||
// some variable not live across a basic block boundary.
|
||||
continue
|
||||
}
|
||||
// Remember the old assignment so we can undo it when we exit b.
|
||||
stk = append(stk, stackEntry{n: n, v: values[n]})
|
||||
// Record the new assignment.
|
||||
values[n] = v
|
||||
}
|
||||
|
||||
// Replace phi args in successors with the current incoming value.
|
||||
for _, e := range b.Succs {
|
||||
c, i := e.Block(), e.Index()
|
||||
for j := len(c.Values) - 1; j >= 0; j-- {
|
||||
v := c.Values[j]
|
||||
if v.Op != ssa.OpPhi {
|
||||
break // All phis will be at the end of the block during phi building.
|
||||
}
|
||||
v.SetArg(i, values[v.AuxInt])
|
||||
}
|
||||
}
|
||||
|
||||
// Walk children in dominator tree.
|
||||
for c := s.tree[b.ID].firstChild; c != nil; c = s.tree[c.ID].sibling {
|
||||
stk = append(stk, stackEntry{b: c})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// domBlock contains extra per-block information to record the dominator tree.
|
||||
type domBlock struct {
|
||||
firstChild *ssa.Block // first child of block in dominator tree
|
||||
sibling *ssa.Block // next child of parent in dominator tree
|
||||
}
|
||||
|
||||
// A block heap is used as a priority queue to implement the PiggyBank
|
||||
// from Sreedhar and Gao. That paper uses an array which is better
|
||||
// asymptotically but worse in the common case when the PiggyBank
|
||||
// holds a sparse set of blocks.
|
||||
type blockHeap struct {
|
||||
a []*ssa.Block // block IDs in heap
|
||||
level []int32 // depth in dominator tree (static, used for determining priority)
|
||||
}
|
||||
|
||||
func (h *blockHeap) Len() int { return len(h.a) }
|
||||
func (h *blockHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] }
|
||||
|
||||
func (h *blockHeap) Push(x interface{}) {
|
||||
v := x.(*ssa.Block)
|
||||
h.a = append(h.a, v)
|
||||
}
|
||||
func (h *blockHeap) Pop() interface{} {
|
||||
old := h.a
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
h.a = old[:n-1]
|
||||
return x
|
||||
}
|
||||
func (h *blockHeap) Less(i, j int) bool {
|
||||
return h.level[h.a[i].ID] > h.level[h.a[j].ID]
|
||||
}
|
||||
|
||||
// TODO: stop walking the iterated domininance frontier when
|
||||
// the variable is dead. Maybe detect that by checking if the
|
||||
// node we're on is reverse dominated by all the reads?
|
||||
// Reverse dominated by the highest common successor of all the reads?
|
||||
|
||||
// copy of ../ssa/sparseset.go
|
||||
// TODO: move this file to ../ssa, then use sparseSet there.
|
||||
type sparseSet struct {
|
||||
dense []ssa.ID
|
||||
sparse []int32
|
||||
}
|
||||
|
||||
// newSparseSet returns a sparseSet that can represent
|
||||
// integers between 0 and n-1
|
||||
func newSparseSet(n int) *sparseSet {
|
||||
return &sparseSet{dense: nil, sparse: make([]int32, n)}
|
||||
}
|
||||
|
||||
func (s *sparseSet) contains(x ssa.ID) bool {
|
||||
i := s.sparse[x]
|
||||
return i < int32(len(s.dense)) && s.dense[i] == x
|
||||
}
|
||||
|
||||
func (s *sparseSet) add(x ssa.ID) {
|
||||
i := s.sparse[x]
|
||||
if i < int32(len(s.dense)) && s.dense[i] == x {
|
||||
return
|
||||
}
|
||||
s.dense = append(s.dense, x)
|
||||
s.sparse[x] = int32(len(s.dense)) - 1
|
||||
}
|
||||
|
||||
func (s *sparseSet) clear() {
|
||||
s.dense = s.dense[:0]
|
||||
}
|
||||
|
||||
// Variant to use for small functions.
|
||||
type simplePhiState struct {
|
||||
s *state // SSA state
|
||||
f *ssa.Func // function to work on
|
||||
fwdrefs []*ssa.Value // list of FwdRefs to be processed
|
||||
defvars []map[*Node]*ssa.Value // defined variables at end of each block
|
||||
}
|
||||
|
||||
func (s *simplePhiState) insertPhis() {
|
||||
// Find FwdRef ops.
|
||||
for _, b := range s.f.Blocks {
|
||||
for _, v := range b.Values {
|
||||
if v.Op != ssa.OpFwdRef {
|
||||
continue
|
||||
}
|
||||
s.fwdrefs = append(s.fwdrefs, v)
|
||||
var_ := v.Aux.(*Node)
|
||||
if _, ok := s.defvars[b.ID][var_]; !ok {
|
||||
s.defvars[b.ID][var_] = v // treat FwdDefs as definitions.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var args []*ssa.Value
|
||||
|
||||
loop:
|
||||
for len(s.fwdrefs) > 0 {
|
||||
v := s.fwdrefs[len(s.fwdrefs)-1]
|
||||
s.fwdrefs = s.fwdrefs[:len(s.fwdrefs)-1]
|
||||
b := v.Block
|
||||
var_ := v.Aux.(*Node)
|
||||
if len(b.Preds) == 0 {
|
||||
if b == s.f.Entry {
|
||||
// No variable should be live at entry.
|
||||
s.s.Fatalf("Value live at entry. It shouldn't be. func %s, node %v, value %v", s.f.Name, var_, v)
|
||||
}
|
||||
// This block is dead; it has no predecessors and it is not the entry block.
|
||||
// It doesn't matter what we use here as long as it is well-formed.
|
||||
v.Op = ssa.OpUnknown
|
||||
v.Aux = nil
|
||||
continue
|
||||
}
|
||||
// Find variable value on each predecessor.
|
||||
args = args[:0]
|
||||
for _, e := range b.Preds {
|
||||
args = append(args, s.lookupVarOutgoing(e.Block(), v.Type, var_, v.Line))
|
||||
}
|
||||
|
||||
// Decide if we need a phi or not. We need a phi if there
|
||||
// are two different args (which are both not v).
|
||||
var w *ssa.Value
|
||||
for _, a := range args {
|
||||
if a == v {
|
||||
continue // self-reference
|
||||
}
|
||||
if a == w {
|
||||
continue // already have this witness
|
||||
}
|
||||
if w != nil {
|
||||
// two witnesses, need a phi value
|
||||
v.Op = ssa.OpPhi
|
||||
v.AddArgs(args...)
|
||||
v.Aux = nil
|
||||
continue loop
|
||||
}
|
||||
w = a // save witness
|
||||
}
|
||||
if w == nil {
|
||||
s.s.Fatalf("no witness for reachable phi %s", v)
|
||||
}
|
||||
// One witness. Make v a copy of w.
|
||||
v.Op = ssa.OpCopy
|
||||
v.Aux = nil
|
||||
v.AddArg(w)
|
||||
}
|
||||
}
|
||||
|
||||
// lookupVarOutgoing finds the variable's value at the end of block b.
|
||||
func (s *simplePhiState) lookupVarOutgoing(b *ssa.Block, t ssa.Type, var_ *Node, line int32) *ssa.Value {
|
||||
for {
|
||||
if v := s.defvars[b.ID][var_]; v != nil {
|
||||
return v
|
||||
}
|
||||
// The variable is not defined by b and we haven't looked it up yet.
|
||||
// If b has exactly one predecessor, loop to look it up there.
|
||||
// Otherwise, give up and insert a new FwdRef and resolve it later.
|
||||
if len(b.Preds) != 1 {
|
||||
break
|
||||
}
|
||||
b = b.Preds[0].Block()
|
||||
}
|
||||
// Generate a FwdRef for the variable and return that.
|
||||
v := b.NewValue0A(line, ssa.OpFwdRef, t, var_)
|
||||
s.defvars[b.ID][var_] = v
|
||||
s.s.addNamedValue(var_, v)
|
||||
s.fwdrefs = append(s.fwdrefs, v)
|
||||
return v
|
||||
}
|
@ -72,6 +72,7 @@ func instrument(fn *Node) {
|
||||
fn.Func.Enter.Prepend(nd)
|
||||
nd = mkcall("racefuncexit", nil, nil)
|
||||
fn.Func.Exit.Append(nd)
|
||||
fn.Func.Dcl = append(fn.Func.Dcl, &nodpc)
|
||||
}
|
||||
|
||||
if Debug['W'] != 0 {
|
||||
|
@ -1,202 +0,0 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package gc
|
||||
|
||||
import (
|
||||
"cmd/compile/internal/ssa"
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
// sparseDefState contains a Go map from ONAMEs (*Node) to sparse definition trees, and
|
||||
// a search helper for the CFG's dominator tree in which those definitions are embedded.
|
||||
// Once initialized, given a use of an ONAME within a block, the ssa definition for
|
||||
// that ONAME can be discovered in time roughly proportional to the log of the number
|
||||
// of SSA definitions of that ONAME (thus avoiding pathological quadratic behavior for
|
||||
// very large programs). The helper contains state (a dominator tree numbering) common
|
||||
// to all the sparse definition trees, as well as some necessary data obtained from
|
||||
// the ssa package.
|
||||
//
|
||||
// This algorithm has improved asymptotic complexity, but the constant factor is
|
||||
// rather large and thus it is only preferred for very large inputs containing
|
||||
// 1000s of blocks and variables.
|
||||
type sparseDefState struct {
|
||||
helper *ssa.SparseTreeHelper // contains one copy of information needed to do sparse mapping
|
||||
defmapForOname map[*Node]*onameDefs // for each ONAME, its definition set (normal and phi)
|
||||
}
|
||||
|
||||
// onameDefs contains a record of definitions (ordinary and implied phi function) for a single OName.
|
||||
// stm is the set of definitions for the OName.
|
||||
// firstdef and lastuse are postorder block numberings that
|
||||
// conservatively bracket the entire lifetime of the OName.
|
||||
type onameDefs struct {
|
||||
stm *ssa.SparseTreeMap
|
||||
// firstdef and lastuse define an interval in the postorder numbering
|
||||
// that is guaranteed to include the entire lifetime of an ONAME.
|
||||
// In the postorder numbering, math.MaxInt32 is before anything,
|
||||
// and 0 is after-or-equal all exit nodes and infinite loops.
|
||||
firstdef int32 // the first definition of this ONAME *in the postorder numbering*
|
||||
lastuse int32 // the last use of this ONAME *in the postorder numbering*
|
||||
}
|
||||
|
||||
// defsFor finds or creates-and-inserts-in-map the definition information
|
||||
// (sparse tree and live range) for a given OName.
|
||||
func (m *sparseDefState) defsFor(n *Node) *onameDefs {
|
||||
d := m.defmapForOname[n]
|
||||
if d != nil {
|
||||
return d
|
||||
}
|
||||
// Reminder: firstdef/lastuse are postorder indices, not block indices,
|
||||
// so these default values define an empty interval, not the entire one.
|
||||
d = &onameDefs{stm: m.helper.NewTree(), firstdef: 0, lastuse: math.MaxInt32}
|
||||
m.defmapForOname[n] = d
|
||||
return d
|
||||
}
|
||||
|
||||
// Insert adds a definition at b (with specified before/within/after adjustment)
|
||||
// to sparse tree onameDefs. The lifetime is extended as necessary.
|
||||
func (m *sparseDefState) Insert(tree *onameDefs, b *ssa.Block, adjust int32) {
|
||||
bponum := m.helper.Ponums[b.ID]
|
||||
if bponum > tree.firstdef {
|
||||
tree.firstdef = bponum
|
||||
}
|
||||
tree.stm.Insert(b, adjust, b, m.helper)
|
||||
}
|
||||
|
||||
// Use updates tree to record a use within b, extending the lifetime as necessary.
|
||||
func (m *sparseDefState) Use(tree *onameDefs, b *ssa.Block) {
|
||||
bponum := m.helper.Ponums[b.ID]
|
||||
if bponum < tree.lastuse {
|
||||
tree.lastuse = bponum
|
||||
}
|
||||
}
|
||||
|
||||
// locatePotentialPhiFunctions finds all the places where phi functions
|
||||
// will be inserted into a program and records those and ordinary definitions
|
||||
// in a "map" (not a Go map) that given an OName and use site, returns the
|
||||
// SSA definition for that OName that will reach the use site (that is,
|
||||
// the use site's nearest def/phi site in the dominator tree.)
|
||||
func (s *state) locatePotentialPhiFunctions(fn *Node) *sparseDefState {
|
||||
// s.config.SparsePhiCutoff() is compared with product of numblocks and numvalues,
|
||||
// if product is smaller than cutoff, use old non-sparse method.
|
||||
// cutoff == 0 implies all sparse
|
||||
// cutoff == uint(-1) implies all non-sparse
|
||||
if uint64(s.f.NumValues())*uint64(s.f.NumBlocks()) < s.config.SparsePhiCutoff() {
|
||||
return nil
|
||||
}
|
||||
|
||||
helper := ssa.NewSparseTreeHelper(s.f)
|
||||
po := helper.Po // index by block.ID to obtain postorder # of block.
|
||||
trees := make(map[*Node]*onameDefs)
|
||||
dm := &sparseDefState{defmapForOname: trees, helper: helper}
|
||||
|
||||
// Process params, taking note of their special lifetimes
|
||||
b := s.f.Entry
|
||||
for _, n := range fn.Func.Dcl {
|
||||
switch n.Class {
|
||||
case PPARAM, PPARAMOUT:
|
||||
t := dm.defsFor(n)
|
||||
dm.Insert(t, b, ssa.AdjustBefore) // define param at entry block
|
||||
if n.Class == PPARAMOUT {
|
||||
dm.Use(t, po[0]) // Explicitly use PPARAMOUT at very last block
|
||||
}
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Process memory variable.
|
||||
t := dm.defsFor(&memVar)
|
||||
dm.Insert(t, b, ssa.AdjustBefore) // define memory at entry block
|
||||
dm.Use(t, po[0]) // Explicitly use memory at last block
|
||||
|
||||
// Next load the map w/ basic definitions for ONames recorded per-block
|
||||
// Iterate over po to avoid unreachable blocks.
|
||||
for i := len(po) - 1; i >= 0; i-- {
|
||||
b := po[i]
|
||||
m := s.defvars[b.ID]
|
||||
for n := range m { // no specified order, but per-node trees are independent.
|
||||
t := dm.defsFor(n)
|
||||
dm.Insert(t, b, ssa.AdjustWithin)
|
||||
}
|
||||
}
|
||||
|
||||
// Find last use of each variable
|
||||
for _, v := range s.fwdRefs {
|
||||
b := v.Block
|
||||
name := v.Aux.(*Node)
|
||||
t := dm.defsFor(name)
|
||||
dm.Use(t, b)
|
||||
}
|
||||
|
||||
for _, t := range trees {
|
||||
// iterating over names in the outer loop
|
||||
for change := true; change; {
|
||||
change = false
|
||||
for i := t.firstdef; i >= t.lastuse; i-- {
|
||||
// Iterating in reverse of post-order reduces number of 'change' iterations;
|
||||
// all possible forward flow goes through each time.
|
||||
b := po[i]
|
||||
// Within tree t, would a use at b require a phi function to ensure a single definition?
|
||||
// TODO: perhaps more efficient to record specific use sites instead of range?
|
||||
if len(b.Preds) < 2 {
|
||||
continue // no phi possible
|
||||
}
|
||||
phi := t.stm.Find(b, ssa.AdjustWithin, helper) // Look for defs in earlier block or AdjustBefore in this one.
|
||||
if phi != nil && phi.(*ssa.Block) == b {
|
||||
continue // has a phi already in this block.
|
||||
}
|
||||
var defseen interface{}
|
||||
// Do preds see different definitions? if so, need a phi function.
|
||||
for _, e := range b.Preds {
|
||||
p := e.Block()
|
||||
dm.Use(t, p) // always count phi pred as "use"; no-op except for loop edges, which matter.
|
||||
x := t.stm.Find(p, ssa.AdjustAfter, helper) // Look for defs reaching or within predecessors.
|
||||
if x == nil { // nil def from a predecessor means a backedge that will be visited soon.
|
||||
continue
|
||||
}
|
||||
if defseen == nil {
|
||||
defseen = x
|
||||
}
|
||||
if defseen != x {
|
||||
// Need to insert a phi function here because predecessors's definitions differ.
|
||||
change = true
|
||||
// Phi insertion is at AdjustBefore, visible with find in same block at AdjustWithin or AdjustAfter.
|
||||
dm.Insert(t, b, ssa.AdjustBefore)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return dm
|
||||
}
|
||||
|
||||
// FindBetterDefiningBlock tries to find a better block for a definition of OName name
|
||||
// reaching (or within) p than p itself. If it cannot, it returns p instead.
|
||||
// This aids in more efficient location of phi functions, since it can skip over
|
||||
// branch code that might contain a definition of name if it actually does not.
|
||||
func (m *sparseDefState) FindBetterDefiningBlock(name *Node, p *ssa.Block) *ssa.Block {
|
||||
if m == nil {
|
||||
return p
|
||||
}
|
||||
t := m.defmapForOname[name]
|
||||
// For now this is fail-soft, since the old algorithm still works using the unimproved block.
|
||||
if t == nil {
|
||||
return p
|
||||
}
|
||||
x := t.stm.Find(p, ssa.AdjustAfter, m.helper)
|
||||
if x == nil {
|
||||
return p
|
||||
}
|
||||
b := x.(*ssa.Block)
|
||||
if b == nil {
|
||||
return p
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func (d *onameDefs) String() string {
|
||||
return fmt.Sprintf("onameDefs:first=%d,last=%d,tree=%s", d.firstdef, d.lastuse, d.stm.String())
|
||||
}
|
@ -80,6 +80,7 @@ func buildssa(fn *Node) *ssa.Func {
|
||||
// Allocate starting values
|
||||
s.labels = map[string]*ssaLabel{}
|
||||
s.labeledNodes = map[*Node]*ssaLabel{}
|
||||
s.fwdVars = map[*Node]*ssa.Value{}
|
||||
s.startmem = s.entryNewValue0(ssa.OpInitMem, ssa.TypeMem)
|
||||
s.sp = s.entryNewValue0(ssa.OpSP, Types[TUINTPTR]) // TODO: use generic pointer type (unsafe.Pointer?) instead
|
||||
s.sb = s.entryNewValue0(ssa.OpSB, Types[TUINTPTR])
|
||||
@ -114,6 +115,21 @@ func buildssa(fn *Node) *ssa.Func {
|
||||
}
|
||||
}
|
||||
|
||||
// Populate arguments.
|
||||
for _, n := range fn.Func.Dcl {
|
||||
if n.Class != PPARAM {
|
||||
continue
|
||||
}
|
||||
var v *ssa.Value
|
||||
if s.canSSA(n) {
|
||||
v = s.newValue0A(ssa.OpArg, n.Type, n)
|
||||
} else {
|
||||
// Not SSAable. Load it.
|
||||
v = s.newValue2(ssa.OpLoad, n.Type, s.decladdrs[n], s.startmem)
|
||||
}
|
||||
s.vars[n] = v
|
||||
}
|
||||
|
||||
// Convert the AST-based IR to the SSA-based IR
|
||||
s.stmts(fn.Func.Enter)
|
||||
s.stmts(fn.Nbody)
|
||||
@ -151,16 +167,7 @@ func buildssa(fn *Node) *ssa.Func {
|
||||
return nil
|
||||
}
|
||||
|
||||
prelinkNumvars := s.f.NumValues()
|
||||
sparseDefState := s.locatePotentialPhiFunctions(fn)
|
||||
|
||||
// Link up variable uses to variable definitions
|
||||
s.linkForwardReferences(sparseDefState)
|
||||
|
||||
if ssa.BuildStats > 0 {
|
||||
s.f.LogStat("build", s.f.NumBlocks(), "blocks", prelinkNumvars, "vars_before",
|
||||
s.f.NumValues(), "vars_after", prelinkNumvars*s.f.NumBlocks(), "ssa_phi_loc_cutoff_score")
|
||||
}
|
||||
s.insertPhis()
|
||||
|
||||
// Don't carry reference this around longer than necessary
|
||||
s.exitCode = Nodes{}
|
||||
@ -197,8 +204,14 @@ type state struct {
|
||||
|
||||
// variable assignments in the current block (map from variable symbol to ssa value)
|
||||
// *Node is the unique identifier (an ONAME Node) for the variable.
|
||||
// TODO: keep a single varnum map, then make all of these maps slices instead?
|
||||
vars map[*Node]*ssa.Value
|
||||
|
||||
// fwdVars are variables that are used before they are defined in the current block.
|
||||
// This map exists just to coalesce multiple references into a single FwdRef op.
|
||||
// *Node is the unique identifier (an ONAME Node) for the variable.
|
||||
fwdVars map[*Node]*ssa.Value
|
||||
|
||||
// all defined variables at the end of each block. Indexed by block ID.
|
||||
defvars []map[*Node]*ssa.Value
|
||||
|
||||
@ -220,12 +233,12 @@ type state struct {
|
||||
// Used to deduplicate panic calls.
|
||||
panics map[funcLine]*ssa.Block
|
||||
|
||||
// list of FwdRef values.
|
||||
fwdRefs []*ssa.Value
|
||||
|
||||
// list of PPARAMOUT (return) variables.
|
||||
returns []*Node
|
||||
|
||||
// A dummy value used during phi construction.
|
||||
placeholder *ssa.Value
|
||||
|
||||
cgoUnsafeArgs bool
|
||||
noWB bool
|
||||
WBLineno int32 // line number of first write barrier. 0=no write barriers
|
||||
@ -292,6 +305,9 @@ func (s *state) startBlock(b *ssa.Block) {
|
||||
}
|
||||
s.curBlock = b
|
||||
s.vars = map[*Node]*ssa.Value{}
|
||||
for n := range s.fwdVars {
|
||||
delete(s.fwdVars, n)
|
||||
}
|
||||
}
|
||||
|
||||
// endBlock marks the end of generating code for the current block.
|
||||
@ -2951,9 +2967,8 @@ func (s *state) addr(n *Node, bounded bool) (*ssa.Value, bool) {
|
||||
if v != nil {
|
||||
return v, false
|
||||
}
|
||||
if n.String() == ".fp" {
|
||||
// Special arg that points to the frame pointer.
|
||||
// (Used by the race detector, others?)
|
||||
if n == nodfp {
|
||||
// Special arg that points to the frame pointer (Used by ORECOVER).
|
||||
aux := s.lookupSymbol(n, &ssa.ArgSymbol{Typ: n.Type, Node: n})
|
||||
return s.entryNewValue1A(ssa.OpAddr, t, aux, s.sp), false
|
||||
}
|
||||
@ -3971,12 +3986,23 @@ func (s *state) checkgoto(from *Node, to *Node) {
|
||||
// variable returns the value of a variable at the current location.
|
||||
func (s *state) variable(name *Node, t ssa.Type) *ssa.Value {
|
||||
v := s.vars[name]
|
||||
if v == nil {
|
||||
v = s.newValue0A(ssa.OpFwdRef, t, name)
|
||||
s.fwdRefs = append(s.fwdRefs, v)
|
||||
s.vars[name] = v
|
||||
s.addNamedValue(name, v)
|
||||
if v != nil {
|
||||
return v
|
||||
}
|
||||
v = s.fwdVars[name]
|
||||
if v != nil {
|
||||
return v
|
||||
}
|
||||
|
||||
if s.curBlock == s.f.Entry {
|
||||
// No variable should be live at entry.
|
||||
s.Fatalf("Value live at entry. It shouldn't be. func %s, node %v, value %v", s.f.Name, name, v)
|
||||
}
|
||||
// Make a FwdRef, which records a value that's live on block input.
|
||||
// We'll find the matching definition as part of insertPhis.
|
||||
v = s.newValue0A(ssa.OpFwdRef, t, name)
|
||||
s.fwdVars[name] = v
|
||||
s.addNamedValue(name, v)
|
||||
return v
|
||||
}
|
||||
|
||||
@ -3984,119 +4010,6 @@ func (s *state) mem() *ssa.Value {
|
||||
return s.variable(&memVar, ssa.TypeMem)
|
||||
}
|
||||
|
||||
func (s *state) linkForwardReferences(dm *sparseDefState) {
|
||||
|
||||
// Build SSA graph. Each variable on its first use in a basic block
|
||||
// leaves a FwdRef in that block representing the incoming value
|
||||
// of that variable. This function links that ref up with possible definitions,
|
||||
// inserting Phi values as needed. This is essentially the algorithm
|
||||
// described by Braun, Buchwald, Hack, Leißa, Mallon, and Zwinkau:
|
||||
// http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf
|
||||
// Differences:
|
||||
// - We use FwdRef nodes to postpone phi building until the CFG is
|
||||
// completely built. That way we can avoid the notion of "sealed"
|
||||
// blocks.
|
||||
// - Phi optimization is a separate pass (in ../ssa/phielim.go).
|
||||
for len(s.fwdRefs) > 0 {
|
||||
v := s.fwdRefs[len(s.fwdRefs)-1]
|
||||
s.fwdRefs = s.fwdRefs[:len(s.fwdRefs)-1]
|
||||
s.resolveFwdRef(v, dm)
|
||||
}
|
||||
}
|
||||
|
||||
// resolveFwdRef modifies v to be the variable's value at the start of its block.
|
||||
// v must be a FwdRef op.
|
||||
func (s *state) resolveFwdRef(v *ssa.Value, dm *sparseDefState) {
|
||||
b := v.Block
|
||||
name := v.Aux.(*Node)
|
||||
v.Aux = nil
|
||||
if b == s.f.Entry {
|
||||
// Live variable at start of function.
|
||||
if s.canSSA(name) {
|
||||
if strings.HasPrefix(name.Sym.Name, "autotmp_") {
|
||||
// It's likely that this is an uninitialized variable in the entry block.
|
||||
s.Fatalf("Treating auto as if it were arg, func %s, node %v, value %v", b.Func.Name, name, v)
|
||||
}
|
||||
v.Op = ssa.OpArg
|
||||
v.Aux = name
|
||||
return
|
||||
}
|
||||
// Not SSAable. Load it.
|
||||
addr := s.decladdrs[name]
|
||||
if addr == nil {
|
||||
// TODO: closure args reach here.
|
||||
s.Fatalf("unhandled closure arg %v at entry to function %s", name, b.Func.Name)
|
||||
}
|
||||
if _, ok := addr.Aux.(*ssa.ArgSymbol); !ok {
|
||||
s.Fatalf("variable live at start of function %s is not an argument %v", b.Func.Name, name)
|
||||
}
|
||||
v.Op = ssa.OpLoad
|
||||
v.AddArgs(addr, s.startmem)
|
||||
return
|
||||
}
|
||||
if len(b.Preds) == 0 {
|
||||
// This block is dead; we have no predecessors and we're not the entry block.
|
||||
// It doesn't matter what we use here as long as it is well-formed.
|
||||
v.Op = ssa.OpUnknown
|
||||
return
|
||||
}
|
||||
// Find variable value on each predecessor.
|
||||
var argstore [4]*ssa.Value
|
||||
args := argstore[:0]
|
||||
for _, e := range b.Preds {
|
||||
p := e.Block()
|
||||
p = dm.FindBetterDefiningBlock(name, p) // try sparse improvement on p
|
||||
args = append(args, s.lookupVarOutgoing(p, v.Type, name, v.Line))
|
||||
}
|
||||
|
||||
// Decide if we need a phi or not. We need a phi if there
|
||||
// are two different args (which are both not v).
|
||||
var w *ssa.Value
|
||||
for _, a := range args {
|
||||
if a == v {
|
||||
continue // self-reference
|
||||
}
|
||||
if a == w {
|
||||
continue // already have this witness
|
||||
}
|
||||
if w != nil {
|
||||
// two witnesses, need a phi value
|
||||
v.Op = ssa.OpPhi
|
||||
v.AddArgs(args...)
|
||||
return
|
||||
}
|
||||
w = a // save witness
|
||||
}
|
||||
if w == nil {
|
||||
s.Fatalf("no witness for reachable phi %s", v)
|
||||
}
|
||||
// One witness. Make v a copy of w.
|
||||
v.Op = ssa.OpCopy
|
||||
v.AddArg(w)
|
||||
}
|
||||
|
||||
// lookupVarOutgoing finds the variable's value at the end of block b.
|
||||
func (s *state) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name *Node, line int32) *ssa.Value {
|
||||
for {
|
||||
if v, ok := s.defvars[b.ID][name]; ok {
|
||||
return v
|
||||
}
|
||||
// The variable is not defined by b and we haven't looked it up yet.
|
||||
// If b has exactly one predecessor, loop to look it up there.
|
||||
// Otherwise, give up and insert a new FwdRef and resolve it later.
|
||||
if len(b.Preds) != 1 {
|
||||
break
|
||||
}
|
||||
b = b.Preds[0].Block()
|
||||
}
|
||||
// Generate a FwdRef for the variable and return that.
|
||||
v := b.NewValue0A(line, ssa.OpFwdRef, t, name)
|
||||
s.fwdRefs = append(s.fwdRefs, v)
|
||||
s.defvars[b.ID][name] = v
|
||||
s.addNamedValue(name, v)
|
||||
return v
|
||||
}
|
||||
|
||||
func (s *state) addNamedValue(n *Node, v *ssa.Value) {
|
||||
if n.Class == Pxxx {
|
||||
// Don't track our dummy nodes (&memVar etc.).
|
||||
|
@ -89,6 +89,9 @@ type Edge struct {
|
||||
func (e Edge) Block() *Block {
|
||||
return e.b
|
||||
}
|
||||
func (e Edge) Index() int {
|
||||
return e.i
|
||||
}
|
||||
|
||||
// kind control successors
|
||||
// ------------------------------------------
|
||||
|
@ -459,6 +459,9 @@ func (f *Func) idom() []*Block {
|
||||
}
|
||||
return f.cachedIdom
|
||||
}
|
||||
func (f *Func) Idom() []*Block {
|
||||
return f.idom()
|
||||
}
|
||||
|
||||
// sdom returns a sparse tree representing the dominator relationships
|
||||
// among the blocks of f.
|
||||
|
Loading…
Reference in New Issue
Block a user