From 149671dfc30889b72254a7a43ba515783b4c5bf7 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Fri, 27 Mar 2015 13:41:30 -0700
Subject: [PATCH] [dev.ssa] cmd/internal/ssa: add CSE pass

Implement a simple common-subexpression elimination.
It uses value numbering & a dominator tree to detect redundant computation.

Change-Id: Id0ff775e439c22f4d41bdd5976176017dd2a2086
Reviewed-on: https://go-review.googlesource.com/8172
Reviewed-by: Alan Donovan <adonovan@google.com>
---
 src/cmd/internal/ssa/compile.go |   7 +-
 src/cmd/internal/ssa/cse.go     | 163 ++++++++++++++++++++++++++++++++
 src/cmd/internal/ssa/dom.go     | 121 ++++++++++++++++++++++++
 src/cmd/internal/ssa/lower.go   |   1 -
 4 files changed, 288 insertions(+), 4 deletions(-)
 create mode 100644 src/cmd/internal/ssa/cse.go
 create mode 100644 src/cmd/internal/ssa/dom.go

diff --git a/src/cmd/internal/ssa/compile.go b/src/cmd/internal/ssa/compile.go
index 6103cc9557..08477d470c 100644
--- a/src/cmd/internal/ssa/compile.go
+++ b/src/cmd/internal/ssa/compile.go
@@ -54,11 +54,12 @@ var passes = [...]pass{
 	{"phielim", phielim},
 	{"copyelim", copyelim},
 	{"opt", opt},
-	// cse
-	{"deadcode", deadcode},
+	{"generic cse", cse},
+	{"generic deadcode", deadcode},
 	{"fuse", fuse},
 	{"lower", lower},
-	// cse
+	{"lowered cse", cse},
+	{"lowered deadcode", deadcode},
 	{"critical", critical}, // remove critical edges
 	{"layout", layout},     // schedule blocks
 	{"schedule", schedule}, // schedule values
diff --git a/src/cmd/internal/ssa/cse.go b/src/cmd/internal/ssa/cse.go
new file mode 100644
index 0000000000..71f23013cf
--- /dev/null
+++ b/src/cmd/internal/ssa/cse.go
@@ -0,0 +1,163 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"sort"
+)
+
+// cse does common-subexpression elimination on the Function.
+// Values are just relinked, nothing is deleted.  A subsequent deadcode
+// pass is required to actually remove duplicate expressions.
+func cse(f *Func) {
+	// Two values are equivalent if they satisfy the following definition:
+	// equivalent(v, w):
+	//   v.op == w.op
+	//   v.type == w.type
+	//   v.aux == w.aux
+	//   len(v.args) == len(w.args)
+	//   equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
+
+	// The algorithm searches for a partition of f's values into
+	// equivalence classes using the above definition.
+	// It starts with a coarse partition and iteratively refines it
+	// until it reaches a fixed point.
+
+	// Make initial partition based on opcode/type/aux/nargs
+	// TODO(khr): types are not canonical, so we may split unnecessarily.  Fix that.
+	type key struct {
+		op    Op
+		typ   Type
+		aux   interface{}
+		nargs int
+	}
+	m := map[key]eqclass{}
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			k := key{v.Op, v.Type, v.Aux, len(v.Args)}
+			m[k] = append(m[k], v)
+		}
+	}
+
+	// A partition is a set of disjoint eqclasses.
+	var partition []eqclass
+	for _, v := range m {
+		partition = append(partition, v)
+	}
+
+	// map from value id back to eqclass id
+	valueEqClass := make([]int, f.NumValues())
+	for i, e := range partition {
+		for _, v := range e {
+			valueEqClass[v.ID] = i
+		}
+	}
+
+	// Find an equivalence class where some members of the class have
+	// non-equvalent arguments.  Split the equivalence class appropriately.
+	// Repeat until we can't find any more splits.
+	for {
+		changed := false
+
+		for i, e := range partition {
+			v := e[0]
+			// all values in this equiv class that are not equivalent to v get moved
+			// into another equiv class q.
+			var q eqclass
+		eqloop:
+			for j := 1; j < len(e); {
+				w := e[j]
+				for i := 0; i < len(v.Args); i++ {
+					if valueEqClass[v.Args[i].ID] != valueEqClass[w.Args[i].ID] {
+						// w is not equivalent to v.
+						// remove w from e
+						e, e[j] = e[:len(e)-1], e[len(e)-1]
+						// add w to q
+						q = append(q, w)
+						valueEqClass[w.ID] = len(partition)
+						changed = true
+						continue eqloop
+					}
+				}
+				// v and w are equivalent.  Keep w in e.
+				j++
+			}
+			partition[i] = e
+			if q != nil {
+				partition = append(partition, q)
+			}
+		}
+
+		if !changed {
+			break
+		}
+	}
+
+	// Compute dominator tree
+	idom := dominators(f)
+
+	// Compute substitutions we would like to do.  We substitute v for w
+	// if v and w are in the same equivalence class and v dominates w.
+	rewrite := make([]*Value, f.NumValues())
+	for _, e := range partition {
+		sort.Sort(e) // ensure deterministic ordering
+		for len(e) > 1 {
+			// Find a maximal dominant element in e
+			v := e[0]
+			for _, w := range e[1:] {
+				if dom(w.Block, v.Block, idom) {
+					v = w
+				}
+			}
+
+			// Replace all elements of e which v dominates
+			for i := 0; i < len(e); {
+				w := e[i]
+				if w != v && dom(v.Block, w.Block, idom) {
+					rewrite[w.ID] = v
+					e, e[i] = e[:len(e)-1], e[len(e)-1]
+				} else {
+					i++
+				}
+			}
+			// TODO(khr): if value is a control value, do we need to keep it block-local?
+		}
+	}
+
+	// Apply substitutions
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			for i, w := range v.Args {
+				if x := rewrite[w.ID]; x != nil {
+					v.SetArg(i, x)
+				}
+			}
+		}
+	}
+}
+
+// returns true if b dominates c.
+// TODO(khr): faster
+func dom(b, c *Block, idom []*Block) bool {
+	// Walk up from c in the dominator tree looking for b.
+	for c != nil {
+		if c == b {
+			return true
+		}
+		c = idom[c.ID]
+	}
+	// Reached the entry block, never saw b.
+	return false
+}
+
+// An eqclass approximates an equivalence class.  During the
+// algorithm it may represent the union of several of the
+// final equivalence classes.
+type eqclass []*Value
+
+// Sort an equivalence class by value ID.
+func (e eqclass) Len() int           { return len(e) }
+func (e eqclass) Swap(i, j int)      { e[i], e[j] = e[j], e[i] }
+func (e eqclass) Less(i, j int) bool { return e[i].ID < e[j].ID }
diff --git a/src/cmd/internal/ssa/dom.go b/src/cmd/internal/ssa/dom.go
new file mode 100644
index 0000000000..aaf3ab3da1
--- /dev/null
+++ b/src/cmd/internal/ssa/dom.go
@@ -0,0 +1,121 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// This file contains code to compute the dominator tree
+// of a control-flow graph.
+
+import "log"
+
+// postorder computes a postorder traversal ordering for the
+// basic blocks in f.  Unreachable blocks will not appear.
+func postorder(f *Func) []*Block {
+	mark := make([]byte, f.NumBlocks())
+	// mark values
+	const (
+		notFound    = 0 // block has not been discovered yet
+		notExplored = 1 // discovered and in queue, outedges not processed yet
+		explored    = 2 // discovered and in queue, outedges processed
+		done        = 3 // all done, in output ordering
+	)
+
+	// result ordering
+	var order []*Block
+
+	// stack of blocks
+	var s []*Block
+	s = append(s, f.Entry)
+	mark[f.Entry.ID] = notExplored
+	for len(s) > 0 {
+		b := s[len(s)-1]
+		switch mark[b.ID] {
+		case explored:
+			// Children have all been visited.  Pop & output block.
+			s = s[:len(s)-1]
+			mark[b.ID] = done
+			order = append(order, b)
+		case notExplored:
+			// Children have not been visited yet.  Mark as explored
+			// and queue any children we haven't seen yet.
+			mark[b.ID] = explored
+			for _, c := range b.Succs {
+				if mark[c.ID] == notFound {
+					mark[c.ID] = notExplored
+					s = append(s, c)
+				}
+			}
+		default:
+			log.Fatalf("bad stack state %v %d", b, mark[b.ID])
+		}
+	}
+	return order
+}
+
+// dominators computes the dominator tree for f.  It returns a slice
+// which maps block ID to the immediate dominator of that block.
+// Unreachable blocks map to nil.  The entry block maps to nil.
+func dominators(f *Func) []*Block {
+	// A simple algorithm for now
+	// Cooper, Harvey, Kennedy
+	idom := make([]*Block, f.NumBlocks())
+
+	// Compute postorder walk
+	post := postorder(f)
+
+	// Make map from block id to order index (for intersect call)
+	postnum := make([]int, f.NumBlocks())
+	for i, b := range post {
+		postnum[b.ID] = i
+	}
+
+	// Make the entry block a self-loop
+	idom[f.Entry.ID] = f.Entry
+	if postnum[f.Entry.ID] != len(post)-1 {
+		log.Fatalf("entry block %v not last in postorder", f.Entry)
+	}
+
+	// Compute relaxation of idom entries
+	for {
+		changed := false
+
+		for i := len(post) - 2; i >= 0; i-- {
+			b := post[i]
+			var d *Block
+			for _, p := range b.Preds {
+				if idom[p.ID] == nil {
+					continue
+				}
+				if d == nil {
+					d = p
+					continue
+				}
+				d = intersect(d, p, postnum, idom)
+			}
+			if d != idom[b.ID] {
+				idom[b.ID] = d
+				changed = true
+			}
+		}
+		if !changed {
+			break
+		}
+	}
+	// Set idom of entry block to nil instead of itself.
+	idom[f.Entry.ID] = nil
+	return idom
+}
+
+// intersect finds the closest dominator of both b and c.
+// It requires a postorder numbering of all the blocks.
+func intersect(b, c *Block, postnum []int, idom []*Block) *Block {
+	for b != c {
+		if postnum[b.ID] < postnum[c.ID] {
+			b = idom[b.ID]
+		} else {
+			c = idom[c.ID]
+		}
+	}
+	return b
+}
diff --git a/src/cmd/internal/ssa/lower.go b/src/cmd/internal/ssa/lower.go
index 7d97b0b466..18fe9861a6 100644
--- a/src/cmd/internal/ssa/lower.go
+++ b/src/cmd/internal/ssa/lower.go
@@ -39,5 +39,4 @@ func lower(f *Func) {
 			// TODO: others
 		}
 	}
-	deadcode(f) // TODO: separate pass?
 }