From 5a6e511c614a158cb58150fb62bfbc207a33922d Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 30 Sep 2016 10:12:32 -0700 Subject: [PATCH] cmd/compile: Use Sreedhar+Gao phi building algorithm Should be more asymptotically happy. We process each variable in turn to find all the locations where it needs a phi (the dominance frontier of all of its definitions). Then we add all those phis. This takes O(n * #variables), although hopefully much less. Then we do a single tree walk to match all the FwdRefs with the nearest definition or phi. This takes O(n) time. The one remaining inefficiency is that we might end up introducing a bunch of dead phis in the first step. A TODO is to introduce phis only where they might be used by a read. The old algorithm is still faster on small functions, so there's a cutover size (currently 500 blocks). This algorithm supercedes the David's sparse phi placement algorithm for large functions. Lowers compile time of example from #14934 from ~10 sec to ~4 sec. Lowers compile time of example from #16361 from ~4.5 sec to ~3 sec. Lowers #16407 from ~20 min to ~30 sec. Update #14934 Update #16361 Fixes #16407 Change-Id: I1cff6364e1623c143190b6a924d7599e309db58f Reviewed-on: https://go-review.googlesource.com/30163 Reviewed-by: David Chase --- src/cmd/compile/internal/gc/phi.go | 521 ++++++++++++++++++ src/cmd/compile/internal/gc/racewalk.go | 1 + .../internal/gc/sparselocatephifunctions.go | 202 ------- src/cmd/compile/internal/gc/ssa.go | 181 ++---- src/cmd/compile/internal/ssa/block.go | 3 + src/cmd/compile/internal/ssa/func.go | 3 + 6 files changed, 575 insertions(+), 336 deletions(-) create mode 100644 src/cmd/compile/internal/gc/phi.go delete mode 100644 src/cmd/compile/internal/gc/sparselocatephifunctions.go diff --git a/src/cmd/compile/internal/gc/phi.go b/src/cmd/compile/internal/gc/phi.go new file mode 100644 index 0000000000..ea9e5b10aa --- /dev/null +++ b/src/cmd/compile/internal/gc/phi.go @@ -0,0 +1,521 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gc + +import ( + "cmd/compile/internal/ssa" + "container/heap" + "fmt" +) + +// This file contains the algorithm to place phi nodes in a function. +// For small functions, we use Braun, Buchwald, Hack, Leißa, Mallon, and Zwinkau. +// http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf +// For large functions, we use Sreedhar & Gao: A Linear Time Algorithm for Placing Φ-Nodes. +// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.8.1979&rep=rep1&type=pdf + +const smallBlocks = 500 + +const debugPhi = false + +// insertPhis finds all the places in the function where a phi is +// necessary and inserts them. +// Uses FwdRef ops to find all uses of variables, and s.defvars to find +// all definitions. +// Phi values are inserted, and all FwdRefs are changed to a Copy +// of the appropriate phi or definition. +// TODO: make this part of cmd/compile/internal/ssa somehow? +func (s *state) insertPhis() { + if len(s.f.Blocks) <= smallBlocks && false { + sps := simplePhiState{s: s, f: s.f, defvars: s.defvars} + sps.insertPhis() + return + } + ps := phiState{s: s, f: s.f, defvars: s.defvars} + ps.insertPhis() +} + +type phiState struct { + s *state // SSA state + f *ssa.Func // function to work on + defvars []map[*Node]*ssa.Value // defined variables at end of each block + + varnum map[*Node]int32 // variable numbering + + // properties of the dominator tree + idom []*ssa.Block // dominator parents + tree []domBlock // dominator child+sibling + level []int32 // level in dominator tree (0 = root or unreachable, 1 = children of root, ...) + + // scratch locations + priq blockHeap // priority queue of blocks, higher level (toward leaves) = higher priority + q []*ssa.Block // inner loop queue + queued *sparseSet // has been put in q + hasPhi *sparseSet // has a phi + hasDef *sparseSet // has a write of the variable we're processing + + // miscellaneous + placeholder *ssa.Value // dummy value to use as a "not set yet" placeholder. +} + +func (s *phiState) insertPhis() { + if debugPhi { + fmt.Println(s.f.String()) + } + + // Find all the variables for which we need to match up reads & writes. + // This step prunes any basic-block-only variables from consideration. + // Generate a numbering for these variables. + s.varnum = map[*Node]int32{} + var vars []*Node + var vartypes []ssa.Type + for _, b := range s.f.Blocks { + for _, v := range b.Values { + if v.Op != ssa.OpFwdRef { + continue + } + var_ := v.Aux.(*Node) + + // Optimization: look back 1 block for the definition. + if len(b.Preds) == 1 { + c := b.Preds[0].Block() + if w := s.defvars[c.ID][var_]; w != nil { + v.Op = ssa.OpCopy + v.Aux = nil + v.AddArg(w) + continue + } + } + + if _, ok := s.varnum[var_]; ok { + continue + } + s.varnum[var_] = int32(len(vartypes)) + if debugPhi { + fmt.Printf("var%d = %v\n", len(vartypes), var_) + } + vars = append(vars, var_) + vartypes = append(vartypes, v.Type) + } + } + + if len(vartypes) == 0 { + return + } + + // Find all definitions of the variables we need to process. + // defs[n] contains all the blocks in which variable number n is assigned. + defs := make([][]*ssa.Block, len(vartypes)) + for _, b := range s.f.Blocks { + for var_ := range s.defvars[b.ID] { // TODO: encode defvars some other way (explicit ops)? make defvars[n] a slice instead of a map. + if n, ok := s.varnum[var_]; ok { + defs[n] = append(defs[n], b) + } + } + } + + // Make dominator tree. + s.idom = s.f.Idom() + s.tree = make([]domBlock, s.f.NumBlocks()) + for _, b := range s.f.Blocks { + p := s.idom[b.ID] + if p != nil { + s.tree[b.ID].sibling = s.tree[p.ID].firstChild + s.tree[p.ID].firstChild = b + } + } + // Compute levels in dominator tree. + // With parent pointers we can do a depth-first walk without + // any auxiliary storage. + s.level = make([]int32, s.f.NumBlocks()) + b := s.f.Entry +levels: + for { + if p := s.idom[b.ID]; p != nil { + s.level[b.ID] = s.level[p.ID] + 1 + if debugPhi { + fmt.Printf("level %s = %d\n", b, s.level[b.ID]) + } + } + if c := s.tree[b.ID].firstChild; c != nil { + b = c + continue + } + for { + if c := s.tree[b.ID].sibling; c != nil { + b = c + continue levels + } + b = s.idom[b.ID] + if b == nil { + break levels + } + } + } + + // Allocate scratch locations. + s.priq.level = s.level + s.q = make([]*ssa.Block, 0, s.f.NumBlocks()) + s.queued = newSparseSet(s.f.NumBlocks()) + s.hasPhi = newSparseSet(s.f.NumBlocks()) + s.hasDef = newSparseSet(s.f.NumBlocks()) + s.placeholder = s.s.entryNewValue0(ssa.OpUnknown, ssa.TypeInvalid) + + // Generate phi ops for each variable. + for n := range vartypes { + s.insertVarPhis(n, vars[n], defs[n], vartypes[n]) + } + + // Resolve FwdRefs to the correct write or phi. + s.resolveFwdRefs() + + // Erase variable numbers stored in AuxInt fields of phi ops. They are no longer needed. + for _, b := range s.f.Blocks { + for _, v := range b.Values { + if v.Op == ssa.OpPhi { + v.AuxInt = 0 + } + } + } +} + +func (s *phiState) insertVarPhis(n int, var_ *Node, defs []*ssa.Block, typ ssa.Type) { + priq := &s.priq + q := s.q + queued := s.queued + queued.clear() + hasPhi := s.hasPhi + hasPhi.clear() + hasDef := s.hasDef + hasDef.clear() + + // Add defining blocks to priority queue. + for _, b := range defs { + priq.a = append(priq.a, b) + hasDef.add(b.ID) + if debugPhi { + fmt.Printf("def of var%d in %s\n", n, b) + } + } + heap.Init(priq) + + // Visit blocks defining variable n, from deepest to shallowest. + for len(priq.a) > 0 { + currentRoot := heap.Pop(priq).(*ssa.Block) + if debugPhi { + fmt.Printf("currentRoot %s\n", currentRoot) + } + // Walk subtree below definition. + // Skip subtrees we've done in previous iterations. + // Find edges exiting tree dominated by definition (the dominance frontier). + // Insert phis at target blocks. + if queued.contains(currentRoot.ID) { + s.s.Fatalf("root already in queue") + } + q = append(q, currentRoot) + queued.add(currentRoot.ID) + for len(q) > 0 { + b := q[len(q)-1] + q = q[:len(q)-1] + if debugPhi { + fmt.Printf(" processing %s\n", b) + } + + for _, e := range b.Succs { + c := e.Block() + // TODO: if the variable is dead at c, skip it. + if s.level[c.ID] > s.level[currentRoot.ID] { + // a D-edge, or an edge whose target is in currentRoot's subtree. + continue + } + if !hasPhi.contains(c.ID) { + // Add a phi to block c for variable n. + hasPhi.add(c.ID) + v := c.NewValue0I(currentRoot.Line, ssa.OpPhi, typ, int64(n)) // TODO: line number right? + // Note: we store the variable number in the phi's AuxInt field. Used temporarily by phi building. + s.s.addNamedValue(var_, v) + for i := 0; i < len(c.Preds); i++ { + v.AddArg(s.placeholder) // Actual args will be filled in by resolveFwdRefs. + } + if debugPhi { + fmt.Printf("new phi for var%d in %s: %s\n", n, c, v) + } + if !hasDef.contains(c.ID) { + // There's now a new definition of this variable in block c. + // Add it to the priority queue to explore. + heap.Push(priq, c) + hasDef.add(c.ID) + } + } + } + + // Visit children if they have not been visited yet. + for c := s.tree[b.ID].firstChild; c != nil; c = s.tree[c.ID].sibling { + if !queued.contains(c.ID) { + q = append(q, c) + queued.add(c.ID) + } + } + } + } +} + +// resolveFwdRefs links all FwdRef uses up to their nearest dominating definition. +func (s *phiState) resolveFwdRefs() { + // Do a depth-first walk of the dominator tree, keeping track + // of the most-recently-seen value for each variable. + + // Map from variable ID to SSA value at the current point of the walk. + values := make([]*ssa.Value, len(s.varnum)) + for i := range values { + values[i] = s.placeholder + } + + // Stack of work to do. + type stackEntry struct { + b *ssa.Block // block to explore + + // variable/value pair to reinstate on exit + n int32 // variable ID + v *ssa.Value + + // Note: only one of b or n,v will be set. + } + var stk []stackEntry + + stk = append(stk, stackEntry{b: s.f.Entry}) + for len(stk) > 0 { + work := stk[len(stk)-1] + stk = stk[:len(stk)-1] + + b := work.b + if b == nil { + // On exit from a block, this case will undo any assignments done below. + values[work.n] = work.v + continue + } + + // Process phis as new defs. They come before FwdRefs in this block. + for _, v := range b.Values { + if v.Op != ssa.OpPhi { + continue + } + n := int32(v.AuxInt) + // Remember the old assignment so we can undo it when we exit b. + stk = append(stk, stackEntry{n: n, v: values[n]}) + // Record the new assignment. + values[n] = v + } + + // Replace a FwdRef op with the current incoming value for its variable. + for _, v := range b.Values { + if v.Op != ssa.OpFwdRef { + continue + } + n := s.varnum[v.Aux.(*Node)] + v.Op = ssa.OpCopy + v.Aux = nil + v.AddArg(values[n]) + } + + // Establish values for variables defined in b. + for var_, v := range s.defvars[b.ID] { + n, ok := s.varnum[var_] + if !ok { + // some variable not live across a basic block boundary. + continue + } + // Remember the old assignment so we can undo it when we exit b. + stk = append(stk, stackEntry{n: n, v: values[n]}) + // Record the new assignment. + values[n] = v + } + + // Replace phi args in successors with the current incoming value. + for _, e := range b.Succs { + c, i := e.Block(), e.Index() + for j := len(c.Values) - 1; j >= 0; j-- { + v := c.Values[j] + if v.Op != ssa.OpPhi { + break // All phis will be at the end of the block during phi building. + } + v.SetArg(i, values[v.AuxInt]) + } + } + + // Walk children in dominator tree. + for c := s.tree[b.ID].firstChild; c != nil; c = s.tree[c.ID].sibling { + stk = append(stk, stackEntry{b: c}) + } + } +} + +// domBlock contains extra per-block information to record the dominator tree. +type domBlock struct { + firstChild *ssa.Block // first child of block in dominator tree + sibling *ssa.Block // next child of parent in dominator tree +} + +// A block heap is used as a priority queue to implement the PiggyBank +// from Sreedhar and Gao. That paper uses an array which is better +// asymptotically but worse in the common case when the PiggyBank +// holds a sparse set of blocks. +type blockHeap struct { + a []*ssa.Block // block IDs in heap + level []int32 // depth in dominator tree (static, used for determining priority) +} + +func (h *blockHeap) Len() int { return len(h.a) } +func (h *blockHeap) Swap(i, j int) { a := h.a; a[i], a[j] = a[j], a[i] } + +func (h *blockHeap) Push(x interface{}) { + v := x.(*ssa.Block) + h.a = append(h.a, v) +} +func (h *blockHeap) Pop() interface{} { + old := h.a + n := len(old) + x := old[n-1] + h.a = old[:n-1] + return x +} +func (h *blockHeap) Less(i, j int) bool { + return h.level[h.a[i].ID] > h.level[h.a[j].ID] +} + +// TODO: stop walking the iterated domininance frontier when +// the variable is dead. Maybe detect that by checking if the +// node we're on is reverse dominated by all the reads? +// Reverse dominated by the highest common successor of all the reads? + +// copy of ../ssa/sparseset.go +// TODO: move this file to ../ssa, then use sparseSet there. +type sparseSet struct { + dense []ssa.ID + sparse []int32 +} + +// newSparseSet returns a sparseSet that can represent +// integers between 0 and n-1 +func newSparseSet(n int) *sparseSet { + return &sparseSet{dense: nil, sparse: make([]int32, n)} +} + +func (s *sparseSet) contains(x ssa.ID) bool { + i := s.sparse[x] + return i < int32(len(s.dense)) && s.dense[i] == x +} + +func (s *sparseSet) add(x ssa.ID) { + i := s.sparse[x] + if i < int32(len(s.dense)) && s.dense[i] == x { + return + } + s.dense = append(s.dense, x) + s.sparse[x] = int32(len(s.dense)) - 1 +} + +func (s *sparseSet) clear() { + s.dense = s.dense[:0] +} + +// Variant to use for small functions. +type simplePhiState struct { + s *state // SSA state + f *ssa.Func // function to work on + fwdrefs []*ssa.Value // list of FwdRefs to be processed + defvars []map[*Node]*ssa.Value // defined variables at end of each block +} + +func (s *simplePhiState) insertPhis() { + // Find FwdRef ops. + for _, b := range s.f.Blocks { + for _, v := range b.Values { + if v.Op != ssa.OpFwdRef { + continue + } + s.fwdrefs = append(s.fwdrefs, v) + var_ := v.Aux.(*Node) + if _, ok := s.defvars[b.ID][var_]; !ok { + s.defvars[b.ID][var_] = v // treat FwdDefs as definitions. + } + } + } + + var args []*ssa.Value + +loop: + for len(s.fwdrefs) > 0 { + v := s.fwdrefs[len(s.fwdrefs)-1] + s.fwdrefs = s.fwdrefs[:len(s.fwdrefs)-1] + b := v.Block + var_ := v.Aux.(*Node) + if len(b.Preds) == 0 { + if b == s.f.Entry { + // No variable should be live at entry. + s.s.Fatalf("Value live at entry. It shouldn't be. func %s, node %v, value %v", s.f.Name, var_, v) + } + // This block is dead; it has no predecessors and it is not the entry block. + // It doesn't matter what we use here as long as it is well-formed. + v.Op = ssa.OpUnknown + v.Aux = nil + continue + } + // Find variable value on each predecessor. + args = args[:0] + for _, e := range b.Preds { + args = append(args, s.lookupVarOutgoing(e.Block(), v.Type, var_, v.Line)) + } + + // Decide if we need a phi or not. We need a phi if there + // are two different args (which are both not v). + var w *ssa.Value + for _, a := range args { + if a == v { + continue // self-reference + } + if a == w { + continue // already have this witness + } + if w != nil { + // two witnesses, need a phi value + v.Op = ssa.OpPhi + v.AddArgs(args...) + v.Aux = nil + continue loop + } + w = a // save witness + } + if w == nil { + s.s.Fatalf("no witness for reachable phi %s", v) + } + // One witness. Make v a copy of w. + v.Op = ssa.OpCopy + v.Aux = nil + v.AddArg(w) + } +} + +// lookupVarOutgoing finds the variable's value at the end of block b. +func (s *simplePhiState) lookupVarOutgoing(b *ssa.Block, t ssa.Type, var_ *Node, line int32) *ssa.Value { + for { + if v := s.defvars[b.ID][var_]; v != nil { + return v + } + // The variable is not defined by b and we haven't looked it up yet. + // If b has exactly one predecessor, loop to look it up there. + // Otherwise, give up and insert a new FwdRef and resolve it later. + if len(b.Preds) != 1 { + break + } + b = b.Preds[0].Block() + } + // Generate a FwdRef for the variable and return that. + v := b.NewValue0A(line, ssa.OpFwdRef, t, var_) + s.defvars[b.ID][var_] = v + s.s.addNamedValue(var_, v) + s.fwdrefs = append(s.fwdrefs, v) + return v +} diff --git a/src/cmd/compile/internal/gc/racewalk.go b/src/cmd/compile/internal/gc/racewalk.go index b47ac9d1e0..effdcf9ea4 100644 --- a/src/cmd/compile/internal/gc/racewalk.go +++ b/src/cmd/compile/internal/gc/racewalk.go @@ -72,6 +72,7 @@ func instrument(fn *Node) { fn.Func.Enter.Prepend(nd) nd = mkcall("racefuncexit", nil, nil) fn.Func.Exit.Append(nd) + fn.Func.Dcl = append(fn.Func.Dcl, &nodpc) } if Debug['W'] != 0 { diff --git a/src/cmd/compile/internal/gc/sparselocatephifunctions.go b/src/cmd/compile/internal/gc/sparselocatephifunctions.go deleted file mode 100644 index 43cc50bd92..0000000000 --- a/src/cmd/compile/internal/gc/sparselocatephifunctions.go +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package gc - -import ( - "cmd/compile/internal/ssa" - "fmt" - "math" -) - -// sparseDefState contains a Go map from ONAMEs (*Node) to sparse definition trees, and -// a search helper for the CFG's dominator tree in which those definitions are embedded. -// Once initialized, given a use of an ONAME within a block, the ssa definition for -// that ONAME can be discovered in time roughly proportional to the log of the number -// of SSA definitions of that ONAME (thus avoiding pathological quadratic behavior for -// very large programs). The helper contains state (a dominator tree numbering) common -// to all the sparse definition trees, as well as some necessary data obtained from -// the ssa package. -// -// This algorithm has improved asymptotic complexity, but the constant factor is -// rather large and thus it is only preferred for very large inputs containing -// 1000s of blocks and variables. -type sparseDefState struct { - helper *ssa.SparseTreeHelper // contains one copy of information needed to do sparse mapping - defmapForOname map[*Node]*onameDefs // for each ONAME, its definition set (normal and phi) -} - -// onameDefs contains a record of definitions (ordinary and implied phi function) for a single OName. -// stm is the set of definitions for the OName. -// firstdef and lastuse are postorder block numberings that -// conservatively bracket the entire lifetime of the OName. -type onameDefs struct { - stm *ssa.SparseTreeMap - // firstdef and lastuse define an interval in the postorder numbering - // that is guaranteed to include the entire lifetime of an ONAME. - // In the postorder numbering, math.MaxInt32 is before anything, - // and 0 is after-or-equal all exit nodes and infinite loops. - firstdef int32 // the first definition of this ONAME *in the postorder numbering* - lastuse int32 // the last use of this ONAME *in the postorder numbering* -} - -// defsFor finds or creates-and-inserts-in-map the definition information -// (sparse tree and live range) for a given OName. -func (m *sparseDefState) defsFor(n *Node) *onameDefs { - d := m.defmapForOname[n] - if d != nil { - return d - } - // Reminder: firstdef/lastuse are postorder indices, not block indices, - // so these default values define an empty interval, not the entire one. - d = &onameDefs{stm: m.helper.NewTree(), firstdef: 0, lastuse: math.MaxInt32} - m.defmapForOname[n] = d - return d -} - -// Insert adds a definition at b (with specified before/within/after adjustment) -// to sparse tree onameDefs. The lifetime is extended as necessary. -func (m *sparseDefState) Insert(tree *onameDefs, b *ssa.Block, adjust int32) { - bponum := m.helper.Ponums[b.ID] - if bponum > tree.firstdef { - tree.firstdef = bponum - } - tree.stm.Insert(b, adjust, b, m.helper) -} - -// Use updates tree to record a use within b, extending the lifetime as necessary. -func (m *sparseDefState) Use(tree *onameDefs, b *ssa.Block) { - bponum := m.helper.Ponums[b.ID] - if bponum < tree.lastuse { - tree.lastuse = bponum - } -} - -// locatePotentialPhiFunctions finds all the places where phi functions -// will be inserted into a program and records those and ordinary definitions -// in a "map" (not a Go map) that given an OName and use site, returns the -// SSA definition for that OName that will reach the use site (that is, -// the use site's nearest def/phi site in the dominator tree.) -func (s *state) locatePotentialPhiFunctions(fn *Node) *sparseDefState { - // s.config.SparsePhiCutoff() is compared with product of numblocks and numvalues, - // if product is smaller than cutoff, use old non-sparse method. - // cutoff == 0 implies all sparse - // cutoff == uint(-1) implies all non-sparse - if uint64(s.f.NumValues())*uint64(s.f.NumBlocks()) < s.config.SparsePhiCutoff() { - return nil - } - - helper := ssa.NewSparseTreeHelper(s.f) - po := helper.Po // index by block.ID to obtain postorder # of block. - trees := make(map[*Node]*onameDefs) - dm := &sparseDefState{defmapForOname: trees, helper: helper} - - // Process params, taking note of their special lifetimes - b := s.f.Entry - for _, n := range fn.Func.Dcl { - switch n.Class { - case PPARAM, PPARAMOUT: - t := dm.defsFor(n) - dm.Insert(t, b, ssa.AdjustBefore) // define param at entry block - if n.Class == PPARAMOUT { - dm.Use(t, po[0]) // Explicitly use PPARAMOUT at very last block - } - default: - } - } - - // Process memory variable. - t := dm.defsFor(&memVar) - dm.Insert(t, b, ssa.AdjustBefore) // define memory at entry block - dm.Use(t, po[0]) // Explicitly use memory at last block - - // Next load the map w/ basic definitions for ONames recorded per-block - // Iterate over po to avoid unreachable blocks. - for i := len(po) - 1; i >= 0; i-- { - b := po[i] - m := s.defvars[b.ID] - for n := range m { // no specified order, but per-node trees are independent. - t := dm.defsFor(n) - dm.Insert(t, b, ssa.AdjustWithin) - } - } - - // Find last use of each variable - for _, v := range s.fwdRefs { - b := v.Block - name := v.Aux.(*Node) - t := dm.defsFor(name) - dm.Use(t, b) - } - - for _, t := range trees { - // iterating over names in the outer loop - for change := true; change; { - change = false - for i := t.firstdef; i >= t.lastuse; i-- { - // Iterating in reverse of post-order reduces number of 'change' iterations; - // all possible forward flow goes through each time. - b := po[i] - // Within tree t, would a use at b require a phi function to ensure a single definition? - // TODO: perhaps more efficient to record specific use sites instead of range? - if len(b.Preds) < 2 { - continue // no phi possible - } - phi := t.stm.Find(b, ssa.AdjustWithin, helper) // Look for defs in earlier block or AdjustBefore in this one. - if phi != nil && phi.(*ssa.Block) == b { - continue // has a phi already in this block. - } - var defseen interface{} - // Do preds see different definitions? if so, need a phi function. - for _, e := range b.Preds { - p := e.Block() - dm.Use(t, p) // always count phi pred as "use"; no-op except for loop edges, which matter. - x := t.stm.Find(p, ssa.AdjustAfter, helper) // Look for defs reaching or within predecessors. - if x == nil { // nil def from a predecessor means a backedge that will be visited soon. - continue - } - if defseen == nil { - defseen = x - } - if defseen != x { - // Need to insert a phi function here because predecessors's definitions differ. - change = true - // Phi insertion is at AdjustBefore, visible with find in same block at AdjustWithin or AdjustAfter. - dm.Insert(t, b, ssa.AdjustBefore) - break - } - } - } - } - } - return dm -} - -// FindBetterDefiningBlock tries to find a better block for a definition of OName name -// reaching (or within) p than p itself. If it cannot, it returns p instead. -// This aids in more efficient location of phi functions, since it can skip over -// branch code that might contain a definition of name if it actually does not. -func (m *sparseDefState) FindBetterDefiningBlock(name *Node, p *ssa.Block) *ssa.Block { - if m == nil { - return p - } - t := m.defmapForOname[name] - // For now this is fail-soft, since the old algorithm still works using the unimproved block. - if t == nil { - return p - } - x := t.stm.Find(p, ssa.AdjustAfter, m.helper) - if x == nil { - return p - } - b := x.(*ssa.Block) - if b == nil { - return p - } - return b -} - -func (d *onameDefs) String() string { - return fmt.Sprintf("onameDefs:first=%d,last=%d,tree=%s", d.firstdef, d.lastuse, d.stm.String()) -} diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 419240eab7..741a9d4e86 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -80,6 +80,7 @@ func buildssa(fn *Node) *ssa.Func { // Allocate starting values s.labels = map[string]*ssaLabel{} s.labeledNodes = map[*Node]*ssaLabel{} + s.fwdVars = map[*Node]*ssa.Value{} s.startmem = s.entryNewValue0(ssa.OpInitMem, ssa.TypeMem) s.sp = s.entryNewValue0(ssa.OpSP, Types[TUINTPTR]) // TODO: use generic pointer type (unsafe.Pointer?) instead s.sb = s.entryNewValue0(ssa.OpSB, Types[TUINTPTR]) @@ -114,6 +115,21 @@ func buildssa(fn *Node) *ssa.Func { } } + // Populate arguments. + for _, n := range fn.Func.Dcl { + if n.Class != PPARAM { + continue + } + var v *ssa.Value + if s.canSSA(n) { + v = s.newValue0A(ssa.OpArg, n.Type, n) + } else { + // Not SSAable. Load it. + v = s.newValue2(ssa.OpLoad, n.Type, s.decladdrs[n], s.startmem) + } + s.vars[n] = v + } + // Convert the AST-based IR to the SSA-based IR s.stmts(fn.Func.Enter) s.stmts(fn.Nbody) @@ -151,16 +167,7 @@ func buildssa(fn *Node) *ssa.Func { return nil } - prelinkNumvars := s.f.NumValues() - sparseDefState := s.locatePotentialPhiFunctions(fn) - - // Link up variable uses to variable definitions - s.linkForwardReferences(sparseDefState) - - if ssa.BuildStats > 0 { - s.f.LogStat("build", s.f.NumBlocks(), "blocks", prelinkNumvars, "vars_before", - s.f.NumValues(), "vars_after", prelinkNumvars*s.f.NumBlocks(), "ssa_phi_loc_cutoff_score") - } + s.insertPhis() // Don't carry reference this around longer than necessary s.exitCode = Nodes{} @@ -197,8 +204,14 @@ type state struct { // variable assignments in the current block (map from variable symbol to ssa value) // *Node is the unique identifier (an ONAME Node) for the variable. + // TODO: keep a single varnum map, then make all of these maps slices instead? vars map[*Node]*ssa.Value + // fwdVars are variables that are used before they are defined in the current block. + // This map exists just to coalesce multiple references into a single FwdRef op. + // *Node is the unique identifier (an ONAME Node) for the variable. + fwdVars map[*Node]*ssa.Value + // all defined variables at the end of each block. Indexed by block ID. defvars []map[*Node]*ssa.Value @@ -220,12 +233,12 @@ type state struct { // Used to deduplicate panic calls. panics map[funcLine]*ssa.Block - // list of FwdRef values. - fwdRefs []*ssa.Value - // list of PPARAMOUT (return) variables. returns []*Node + // A dummy value used during phi construction. + placeholder *ssa.Value + cgoUnsafeArgs bool noWB bool WBLineno int32 // line number of first write barrier. 0=no write barriers @@ -292,6 +305,9 @@ func (s *state) startBlock(b *ssa.Block) { } s.curBlock = b s.vars = map[*Node]*ssa.Value{} + for n := range s.fwdVars { + delete(s.fwdVars, n) + } } // endBlock marks the end of generating code for the current block. @@ -2951,9 +2967,8 @@ func (s *state) addr(n *Node, bounded bool) (*ssa.Value, bool) { if v != nil { return v, false } - if n.String() == ".fp" { - // Special arg that points to the frame pointer. - // (Used by the race detector, others?) + if n == nodfp { + // Special arg that points to the frame pointer (Used by ORECOVER). aux := s.lookupSymbol(n, &ssa.ArgSymbol{Typ: n.Type, Node: n}) return s.entryNewValue1A(ssa.OpAddr, t, aux, s.sp), false } @@ -3971,12 +3986,23 @@ func (s *state) checkgoto(from *Node, to *Node) { // variable returns the value of a variable at the current location. func (s *state) variable(name *Node, t ssa.Type) *ssa.Value { v := s.vars[name] - if v == nil { - v = s.newValue0A(ssa.OpFwdRef, t, name) - s.fwdRefs = append(s.fwdRefs, v) - s.vars[name] = v - s.addNamedValue(name, v) + if v != nil { + return v } + v = s.fwdVars[name] + if v != nil { + return v + } + + if s.curBlock == s.f.Entry { + // No variable should be live at entry. + s.Fatalf("Value live at entry. It shouldn't be. func %s, node %v, value %v", s.f.Name, name, v) + } + // Make a FwdRef, which records a value that's live on block input. + // We'll find the matching definition as part of insertPhis. + v = s.newValue0A(ssa.OpFwdRef, t, name) + s.fwdVars[name] = v + s.addNamedValue(name, v) return v } @@ -3984,119 +4010,6 @@ func (s *state) mem() *ssa.Value { return s.variable(&memVar, ssa.TypeMem) } -func (s *state) linkForwardReferences(dm *sparseDefState) { - - // Build SSA graph. Each variable on its first use in a basic block - // leaves a FwdRef in that block representing the incoming value - // of that variable. This function links that ref up with possible definitions, - // inserting Phi values as needed. This is essentially the algorithm - // described by Braun, Buchwald, Hack, Leißa, Mallon, and Zwinkau: - // http://pp.info.uni-karlsruhe.de/uploads/publikationen/braun13cc.pdf - // Differences: - // - We use FwdRef nodes to postpone phi building until the CFG is - // completely built. That way we can avoid the notion of "sealed" - // blocks. - // - Phi optimization is a separate pass (in ../ssa/phielim.go). - for len(s.fwdRefs) > 0 { - v := s.fwdRefs[len(s.fwdRefs)-1] - s.fwdRefs = s.fwdRefs[:len(s.fwdRefs)-1] - s.resolveFwdRef(v, dm) - } -} - -// resolveFwdRef modifies v to be the variable's value at the start of its block. -// v must be a FwdRef op. -func (s *state) resolveFwdRef(v *ssa.Value, dm *sparseDefState) { - b := v.Block - name := v.Aux.(*Node) - v.Aux = nil - if b == s.f.Entry { - // Live variable at start of function. - if s.canSSA(name) { - if strings.HasPrefix(name.Sym.Name, "autotmp_") { - // It's likely that this is an uninitialized variable in the entry block. - s.Fatalf("Treating auto as if it were arg, func %s, node %v, value %v", b.Func.Name, name, v) - } - v.Op = ssa.OpArg - v.Aux = name - return - } - // Not SSAable. Load it. - addr := s.decladdrs[name] - if addr == nil { - // TODO: closure args reach here. - s.Fatalf("unhandled closure arg %v at entry to function %s", name, b.Func.Name) - } - if _, ok := addr.Aux.(*ssa.ArgSymbol); !ok { - s.Fatalf("variable live at start of function %s is not an argument %v", b.Func.Name, name) - } - v.Op = ssa.OpLoad - v.AddArgs(addr, s.startmem) - return - } - if len(b.Preds) == 0 { - // This block is dead; we have no predecessors and we're not the entry block. - // It doesn't matter what we use here as long as it is well-formed. - v.Op = ssa.OpUnknown - return - } - // Find variable value on each predecessor. - var argstore [4]*ssa.Value - args := argstore[:0] - for _, e := range b.Preds { - p := e.Block() - p = dm.FindBetterDefiningBlock(name, p) // try sparse improvement on p - args = append(args, s.lookupVarOutgoing(p, v.Type, name, v.Line)) - } - - // Decide if we need a phi or not. We need a phi if there - // are two different args (which are both not v). - var w *ssa.Value - for _, a := range args { - if a == v { - continue // self-reference - } - if a == w { - continue // already have this witness - } - if w != nil { - // two witnesses, need a phi value - v.Op = ssa.OpPhi - v.AddArgs(args...) - return - } - w = a // save witness - } - if w == nil { - s.Fatalf("no witness for reachable phi %s", v) - } - // One witness. Make v a copy of w. - v.Op = ssa.OpCopy - v.AddArg(w) -} - -// lookupVarOutgoing finds the variable's value at the end of block b. -func (s *state) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name *Node, line int32) *ssa.Value { - for { - if v, ok := s.defvars[b.ID][name]; ok { - return v - } - // The variable is not defined by b and we haven't looked it up yet. - // If b has exactly one predecessor, loop to look it up there. - // Otherwise, give up and insert a new FwdRef and resolve it later. - if len(b.Preds) != 1 { - break - } - b = b.Preds[0].Block() - } - // Generate a FwdRef for the variable and return that. - v := b.NewValue0A(line, ssa.OpFwdRef, t, name) - s.fwdRefs = append(s.fwdRefs, v) - s.defvars[b.ID][name] = v - s.addNamedValue(name, v) - return v -} - func (s *state) addNamedValue(n *Node, v *ssa.Value) { if n.Class == Pxxx { // Don't track our dummy nodes (&memVar etc.). diff --git a/src/cmd/compile/internal/ssa/block.go b/src/cmd/compile/internal/ssa/block.go index b5bedd3912..3ee27df5e7 100644 --- a/src/cmd/compile/internal/ssa/block.go +++ b/src/cmd/compile/internal/ssa/block.go @@ -89,6 +89,9 @@ type Edge struct { func (e Edge) Block() *Block { return e.b } +func (e Edge) Index() int { + return e.i +} // kind control successors // ------------------------------------------ diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index 759e19d8e6..6ba5448998 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -459,6 +459,9 @@ func (f *Func) idom() []*Block { } return f.cachedIdom } +func (f *Func) Idom() []*Block { + return f.idom() +} // sdom returns a sparse tree representing the dominator relationships // among the blocks of f.