From 149671dfc30889b72254a7a43ba515783b4c5bf7 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Fri, 27 Mar 2015 13:41:30 -0700 Subject: [PATCH] [dev.ssa] cmd/internal/ssa: add CSE pass Implement a simple common-subexpression elimination. It uses value numbering & a dominator tree to detect redundant computation. Change-Id: Id0ff775e439c22f4d41bdd5976176017dd2a2086 Reviewed-on: https://go-review.googlesource.com/8172 Reviewed-by: Alan Donovan --- src/cmd/internal/ssa/compile.go | 7 +- src/cmd/internal/ssa/cse.go | 163 ++++++++++++++++++++++++++++++++ src/cmd/internal/ssa/dom.go | 121 ++++++++++++++++++++++++ src/cmd/internal/ssa/lower.go | 1 - 4 files changed, 288 insertions(+), 4 deletions(-) create mode 100644 src/cmd/internal/ssa/cse.go create mode 100644 src/cmd/internal/ssa/dom.go diff --git a/src/cmd/internal/ssa/compile.go b/src/cmd/internal/ssa/compile.go index 6103cc9557..08477d470c 100644 --- a/src/cmd/internal/ssa/compile.go +++ b/src/cmd/internal/ssa/compile.go @@ -54,11 +54,12 @@ var passes = [...]pass{ {"phielim", phielim}, {"copyelim", copyelim}, {"opt", opt}, - // cse - {"deadcode", deadcode}, + {"generic cse", cse}, + {"generic deadcode", deadcode}, {"fuse", fuse}, {"lower", lower}, - // cse + {"lowered cse", cse}, + {"lowered deadcode", deadcode}, {"critical", critical}, // remove critical edges {"layout", layout}, // schedule blocks {"schedule", schedule}, // schedule values diff --git a/src/cmd/internal/ssa/cse.go b/src/cmd/internal/ssa/cse.go new file mode 100644 index 0000000000..71f23013cf --- /dev/null +++ b/src/cmd/internal/ssa/cse.go @@ -0,0 +1,163 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import ( + "sort" +) + +// cse does common-subexpression elimination on the Function. +// Values are just relinked, nothing is deleted. A subsequent deadcode +// pass is required to actually remove duplicate expressions. +func cse(f *Func) { + // Two values are equivalent if they satisfy the following definition: + // equivalent(v, w): + // v.op == w.op + // v.type == w.type + // v.aux == w.aux + // len(v.args) == len(w.args) + // equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1 + + // The algorithm searches for a partition of f's values into + // equivalence classes using the above definition. + // It starts with a coarse partition and iteratively refines it + // until it reaches a fixed point. + + // Make initial partition based on opcode/type/aux/nargs + // TODO(khr): types are not canonical, so we may split unnecessarily. Fix that. + type key struct { + op Op + typ Type + aux interface{} + nargs int + } + m := map[key]eqclass{} + for _, b := range f.Blocks { + for _, v := range b.Values { + k := key{v.Op, v.Type, v.Aux, len(v.Args)} + m[k] = append(m[k], v) + } + } + + // A partition is a set of disjoint eqclasses. + var partition []eqclass + for _, v := range m { + partition = append(partition, v) + } + + // map from value id back to eqclass id + valueEqClass := make([]int, f.NumValues()) + for i, e := range partition { + for _, v := range e { + valueEqClass[v.ID] = i + } + } + + // Find an equivalence class where some members of the class have + // non-equvalent arguments. Split the equivalence class appropriately. + // Repeat until we can't find any more splits. + for { + changed := false + + for i, e := range partition { + v := e[0] + // all values in this equiv class that are not equivalent to v get moved + // into another equiv class q. + var q eqclass + eqloop: + for j := 1; j < len(e); { + w := e[j] + for i := 0; i < len(v.Args); i++ { + if valueEqClass[v.Args[i].ID] != valueEqClass[w.Args[i].ID] { + // w is not equivalent to v. + // remove w from e + e, e[j] = e[:len(e)-1], e[len(e)-1] + // add w to q + q = append(q, w) + valueEqClass[w.ID] = len(partition) + changed = true + continue eqloop + } + } + // v and w are equivalent. Keep w in e. + j++ + } + partition[i] = e + if q != nil { + partition = append(partition, q) + } + } + + if !changed { + break + } + } + + // Compute dominator tree + idom := dominators(f) + + // Compute substitutions we would like to do. We substitute v for w + // if v and w are in the same equivalence class and v dominates w. + rewrite := make([]*Value, f.NumValues()) + for _, e := range partition { + sort.Sort(e) // ensure deterministic ordering + for len(e) > 1 { + // Find a maximal dominant element in e + v := e[0] + for _, w := range e[1:] { + if dom(w.Block, v.Block, idom) { + v = w + } + } + + // Replace all elements of e which v dominates + for i := 0; i < len(e); { + w := e[i] + if w != v && dom(v.Block, w.Block, idom) { + rewrite[w.ID] = v + e, e[i] = e[:len(e)-1], e[len(e)-1] + } else { + i++ + } + } + // TODO(khr): if value is a control value, do we need to keep it block-local? + } + } + + // Apply substitutions + for _, b := range f.Blocks { + for _, v := range b.Values { + for i, w := range v.Args { + if x := rewrite[w.ID]; x != nil { + v.SetArg(i, x) + } + } + } + } +} + +// returns true if b dominates c. +// TODO(khr): faster +func dom(b, c *Block, idom []*Block) bool { + // Walk up from c in the dominator tree looking for b. + for c != nil { + if c == b { + return true + } + c = idom[c.ID] + } + // Reached the entry block, never saw b. + return false +} + +// An eqclass approximates an equivalence class. During the +// algorithm it may represent the union of several of the +// final equivalence classes. +type eqclass []*Value + +// Sort an equivalence class by value ID. +func (e eqclass) Len() int { return len(e) } +func (e eqclass) Swap(i, j int) { e[i], e[j] = e[j], e[i] } +func (e eqclass) Less(i, j int) bool { return e[i].ID < e[j].ID } diff --git a/src/cmd/internal/ssa/dom.go b/src/cmd/internal/ssa/dom.go new file mode 100644 index 0000000000..aaf3ab3da1 --- /dev/null +++ b/src/cmd/internal/ssa/dom.go @@ -0,0 +1,121 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +// This file contains code to compute the dominator tree +// of a control-flow graph. + +import "log" + +// postorder computes a postorder traversal ordering for the +// basic blocks in f. Unreachable blocks will not appear. +func postorder(f *Func) []*Block { + mark := make([]byte, f.NumBlocks()) + // mark values + const ( + notFound = 0 // block has not been discovered yet + notExplored = 1 // discovered and in queue, outedges not processed yet + explored = 2 // discovered and in queue, outedges processed + done = 3 // all done, in output ordering + ) + + // result ordering + var order []*Block + + // stack of blocks + var s []*Block + s = append(s, f.Entry) + mark[f.Entry.ID] = notExplored + for len(s) > 0 { + b := s[len(s)-1] + switch mark[b.ID] { + case explored: + // Children have all been visited. Pop & output block. + s = s[:len(s)-1] + mark[b.ID] = done + order = append(order, b) + case notExplored: + // Children have not been visited yet. Mark as explored + // and queue any children we haven't seen yet. + mark[b.ID] = explored + for _, c := range b.Succs { + if mark[c.ID] == notFound { + mark[c.ID] = notExplored + s = append(s, c) + } + } + default: + log.Fatalf("bad stack state %v %d", b, mark[b.ID]) + } + } + return order +} + +// dominators computes the dominator tree for f. It returns a slice +// which maps block ID to the immediate dominator of that block. +// Unreachable blocks map to nil. The entry block maps to nil. +func dominators(f *Func) []*Block { + // A simple algorithm for now + // Cooper, Harvey, Kennedy + idom := make([]*Block, f.NumBlocks()) + + // Compute postorder walk + post := postorder(f) + + // Make map from block id to order index (for intersect call) + postnum := make([]int, f.NumBlocks()) + for i, b := range post { + postnum[b.ID] = i + } + + // Make the entry block a self-loop + idom[f.Entry.ID] = f.Entry + if postnum[f.Entry.ID] != len(post)-1 { + log.Fatalf("entry block %v not last in postorder", f.Entry) + } + + // Compute relaxation of idom entries + for { + changed := false + + for i := len(post) - 2; i >= 0; i-- { + b := post[i] + var d *Block + for _, p := range b.Preds { + if idom[p.ID] == nil { + continue + } + if d == nil { + d = p + continue + } + d = intersect(d, p, postnum, idom) + } + if d != idom[b.ID] { + idom[b.ID] = d + changed = true + } + } + if !changed { + break + } + } + // Set idom of entry block to nil instead of itself. + idom[f.Entry.ID] = nil + return idom +} + +// intersect finds the closest dominator of both b and c. +// It requires a postorder numbering of all the blocks. +func intersect(b, c *Block, postnum []int, idom []*Block) *Block { + for b != c { + if postnum[b.ID] < postnum[c.ID] { + b = idom[b.ID] + } else { + c = idom[c.ID] + } + } + return b +} diff --git a/src/cmd/internal/ssa/lower.go b/src/cmd/internal/ssa/lower.go index 7d97b0b466..18fe9861a6 100644 --- a/src/cmd/internal/ssa/lower.go +++ b/src/cmd/internal/ssa/lower.go @@ -39,5 +39,4 @@ func lower(f *Func) { // TODO: others } } - deadcode(f) // TODO: separate pass? }