go/types: sort to reduce computational complexity of initOrder

Our calculation of initOrder builds the dependency graph and then removes function nodes approximately at random. While profiling, I noticed that this latter step introduces a superlinear algorithm into our type checking pass, which can dominate type checking for large packages such as runtime. It is hard to analyze this rigorously, but to give an idea of how such a non-linearity could arise, suppose the following assumptions hold: - Every function makes D calls at random to other functions in the package, for some fixed constant D. - The number of functions is proportional to N, the size of the package. Under these simplified assumptions, the cost of removing an arbitrary function F is P*D, where P is the expected number of functions calling F. P has a Poisson distribution with mean D. Now consider the fact that when removing a function F in position i, we recursively pay the cost of copying F's predecessors and successors for each node in the remaining unremoved subgraph of functions containing F. With our assumptions, the size of this subgraph is proportional to (N-i), the number of remaining functions to remove. Therefore, the total cost of removing functions is proportional to P*D*Σᴺ(N-i) which is proportional to N². However, if we remove functions in ascending order of cost, we can partition by the number of predecessors, and the total cost of removing functions is proportional to N*D*Σ(PMF(X)) where PMF is the probability mass function of P. In other words cost is proportional to N. Assuming the above analysis is correct, it is still the case that the initial assumptions are naive. Many large packages are more accurately characterized as combinations of many smaller packages. Nevertheless, it is intuitively clear that removing expensive nodes last should be cheaper. Therefore, we sort by cost first before removing nodes in dependencyGraph. We also move deletes to the outer loop, to avoid redundant deletes. By inspection, this avoids a bug where n may not have been removed from its successors if n had no predecessors. name old time/op new time/op delta Check/runtime/funcbodies/noinfo-8 568ms ±25% 82ms ± 1% -85.53% (p=0.000 n=8+10) name old lines/s new lines/s delta Check/runtime/funcbodies/noinfo-8 93.1k ±56% 705.1k ± 1% +657.63% (p=0.000 n=10+10) Updates #49856 Change-Id: Id2e70d67401af19205e1e0b9947baa16dd6506f0 Reviewed-on: https://go-review.googlesource.com/c/go/+/369434 Trust: Robert Findley <rfindley@google.com> Run-TryBot: Robert Findley <rfindley@google.com> Reviewed-by: Robert Griesemer <gri@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org>
2024-11-17 04:04:46 -07:00 · 2021-12-04 09:35:34 -05:00 · 2021-12-04 09:35:34 -05:00 · ac7e950d38
commit ac7e950d38
parent c759ec2284
2 changed files with 42 additions and 21 deletions
--- a/src/go/types/initorder.go
+++ b/src/go/types/initorder.go
@ -7,6 +7,7 @@ package types
 import (
 	"container/heap"
 	"fmt"
+	"sort"
 )

 // initOrder computes the Info.InitOrder for package variables.
@ -184,6 +185,12 @@ type graphNode struct {
 	ndeps      int        // number of outstanding dependencies before this object can be initialized
 }

+// cost returns the cost of removing this node, which involves copying each
+// predecessor to each successor (and vice-versa).
+func (n *graphNode) cost() int {
+	return len(n.pred) * len(n.succ)
+}
+
 type nodeSet map[*graphNode]bool

 func (s *nodeSet) add(p *graphNode) {
@ -221,35 +228,48 @@ func dependencyGraph(objMap map[Object]*declInfo) []*graphNode {
 		}
 	}

+	var G, funcG []*graphNode // separate non-functions and functions
+	for _, n := range M {
+		if _, ok := n.obj.(*Func); ok {
+			funcG = append(funcG, n)
+		} else {
+			G = append(G, n)
+		}
+	}
+
 	// remove function nodes and collect remaining graph nodes in G
 	// (Mutually recursive functions may introduce cycles among themselves
 	// which are permitted. Yet such cycles may incorrectly inflate the dependency
 	// count for variables which in turn may not get scheduled for initialization
 	// in correct order.)
-	var G []*graphNode
-	for obj, n := range M {
-		if _, ok := obj.(*Func); ok {
-			// connect each predecessor p of n with each successor s
-			// and drop the function node (don't collect it in G)
-			for p := range n.pred {
-				// ignore self-cycles
-				if p != n {
-					// Each successor s of n becomes a successor of p, and
-					// each predecessor p of n becomes a predecessor of s.
-					for s := range n.succ {
-						// ignore self-cycles
-						if s != n {
-							p.succ.add(s)
-							s.pred.add(p)
-							delete(s.pred, n) // remove edge to n
-						}
+	//
+	// Note that because we recursively copy predecessors and successors
+	// throughout the function graph, the cost of removing a function at
+	// position X is proportional to cost * (len(funcG)-X). Therefore, we should
+	// remove high-cost functions last.
+	sort.Slice(funcG, func(i, j int) bool {
+		return funcG[i].cost() < funcG[j].cost()
+	})
+	for _, n := range funcG {
+		// connect each predecessor p of n with each successor s
+		// and drop the function node (don't collect it in G)
+		for p := range n.pred {
+			// ignore self-cycles
+			if p != n {
+				// Each successor s of n becomes a successor of p, and
+				// each predecessor p of n becomes a predecessor of s.
+				for s := range n.succ {
+					// ignore self-cycles
+					if s != n {
+						p.succ.add(s)
+						s.pred.add(p)
 					}
-					delete(p.succ, n) // remove edge to n
 				}
+				delete(p.succ, n) // remove edge to n
 			}
-		} else {
-			// collect non-function nodes
-			G = append(G, n)
+		}
+		for s := range n.succ {
+			delete(s.pred, n) // remove edge to n
 		}
 	}

--- a/src/go/types/self_test.go
+++ b/src/go/types/self_test.go
@ -36,6 +36,7 @@ func BenchmarkCheck(b *testing.B) {
 		"net/http",
 		"go/parser",
 		"go/constant",
+		"runtime",
 		filepath.Join("go", "internal", "gcimporter"),
 	} {
 		b.Run(path.Base(p), func(b *testing.B) {