go/ast/inspector: faster (amortized) AST traversals

This new package provides helper functions for traversal over the syntax trees of a package, including node filtering by type, and materialization of the traversal stack. During construction, the inspector does a complete traversal and builds a list of push/pop events and their node type. Subsequent method calls that request a traversal scan this list, rather than walk the AST, and perform type filtering using efficient bit sets. Experiments suggest the inspector's traversals are about 2.5x faster than ast.Inspect, but it may take around 5 traversals for this benefit to amortize the inspector's construction cost. This design is well-suited to the ongoing reworking of cmd/vet (see docs.google.com/document/d/1-azPLXaLgTCKeKDNg0HVMq2ovMlD-e7n1ZHzZVzOlJk), which historically made a single pass over the ASTs but is being replaced by a design that requires a separate pass for each analysis. Change-Id: I9a67aed6a3bf948076641d96447860d97ede67b4 Reviewed-on: https://go-review.googlesource.com/135655 Reviewed-by: Robert Griesemer <gri@golang.org>
2024-11-18 17:54:57 -07:00 · 2018-09-17 09:36:13 -04:00 · 2018-09-17 09:36:13 -04:00 · b3c0be4c97
commit b3c0be4c97
parent 16720d5f2d
3 changed files with 618 additions and 0 deletions
--- a/go/ast/inspector/inspector.go
+++ b/go/ast/inspector/inspector.go
@ -0,0 +1,182 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package inspector provides helper functions for traversal over the
+// syntax trees of a package, including node filtering by type, and
+// materialization of the traversal stack.
+//
+// During construction, the inspector does a complete traversal and
+// builds a list of push/pop events and their node type. Subsequent
+// method calls that request a traversal scan this list, rather than walk
+// the AST, and perform type filtering using efficient bit sets.
+//
+// Experiments suggest the inspector's traversals are about 2.5x faster
+// than ast.Inspect, but it may take around 5 traversals for this
+// benefit to amortize the inspector's construction cost.
+// If efficiency is the primary concern, do not use use Inspector for
+// one-off traversals.
+package inspector
+
+// There are four orthogonal features in a traversal:
+//  1 type filtering
+//  2 pruning
+//  3 postorder calls to f
+//  4 stack
+// Rather than offer all of them in the API,
+// only a few combinations are exposed:
+// - Preorder is the fastest and has fewest features,
+//   but is the most commonly needed traversal.
+// - Nodes and WithStack both provide pruning and postorder calls,
+//   even though few clients need it, because supporting two versions
+//   is not justified.
+// More combinations could be supported by expressing them as
+// wrappers around a more generic traversal, but this was measured
+// and found to degrade performance significantly (30%).
+
+import (
+	"go/ast"
+)
+
+// An Inspector provides methods for inspecting
+// (traversing) the syntax trees of a package.
+type Inspector struct {
+	events []event
+}
+
+// New returns an Inspector for the specified syntax trees.
+func New(files []*ast.File) *Inspector {
+	return &Inspector{traverse(files)}
+}
+
+// An event represents a push or a pop
+// of an ast.Node during a traversal.
+type event struct {
+	node  ast.Node
+	typ   uint64 // typeOf(node)
+	index int    // 1 + index of corresponding pop event, or 0 if this is a pop
+}
+
+// Preorder visits all the nodes of the files supplied to New in
+// depth-first order. It calls f(n) for each node n before it visits
+// n's children.
+//
+// The types argument, if non-empty, enables type-based filtering of
+// events. The function f if is called only for nodes whose type
+// matches an element of the types slice.
+func (in *Inspector) Preorder(types []ast.Node, f func(ast.Node)) {
+	// Because it avoids postorder calls to f, and the pruning
+	// check, Preorder is almost twice as fast as Nodes. The two
+	// features seem to contribute similar slowdowns (~1.4x each).
+
+	mask := maskOf(types)
+	for i := 0; i < len(in.events); {
+		ev := in.events[i]
+		if ev.typ&mask != 0 {
+			if ev.index > 0 {
+				f(ev.node)
+			}
+		}
+		i++
+	}
+}
+
+// Nodes visits the nodes of the files supplied to New in depth-first
+// order. It calls f(n, true) for each node n before it visits n's
+// children. If f returns true, Nodes invokes f recursively for each
+// of the non-nil children of the node, followed by a call of
+// f(n, false).
+//
+// The types argument, if non-empty, enables type-based filtering of
+// events. The function f if is called only for nodes whose type
+// matches an element of the types slice.
+func (in *Inspector) Nodes(types []ast.Node, f func(n ast.Node, push bool) (prune bool)) {
+	mask := maskOf(types)
+	for i := 0; i < len(in.events); {
+		ev := in.events[i]
+		if ev.typ&mask != 0 {
+			if ev.index > 0 {
+				// push
+				if !f(ev.node, true) {
+					i = ev.index // jump to corresponding pop + 1
+					continue
+				}
+			} else {
+				// pop
+				f(ev.node, false)
+			}
+		}
+		i++
+	}
+}
+
+// WithStack visits nodes in a similar manner to Nodes, but it
+// supplies each call to f an additional argument, the current
+// traversal stack. The stack's first element is the outermost node,
+// an *ast.File; its last is the innermost, n.
+func (in *Inspector) WithStack(types []ast.Node, f func(n ast.Node, push bool, stack []ast.Node) (prune bool)) {
+	mask := maskOf(types)
+	var stack []ast.Node
+	for i := 0; i < len(in.events); {
+		ev := in.events[i]
+		if ev.index > 0 {
+			// push
+			stack = append(stack, ev.node)
+			if ev.typ&mask != 0 {
+				if !f(ev.node, true, stack) {
+					i = ev.index
+					stack = stack[:len(stack)-1]
+					continue
+				}
+			}
+		} else {
+			// pop
+			if ev.typ&mask != 0 {
+				f(ev.node, false, stack)
+			}
+			stack = stack[:len(stack)-1]
+		}
+		i++
+	}
+}
+
+// traverse builds the table of events representing a traversal.
+func traverse(files []*ast.File) []event {
+	// Preallocate approximate number of events
+	// based on source file extent.
+	// This makes traverse faster by 4x (!).
+	var extent int
+	for _, f := range files {
+		extent += int(f.End() - f.Pos())
+	}
+	// This estimate is based on the net/http package.
+	events := make([]event, 0, extent*33/100)
+
+	var stack []event
+	for _, f := range files {
+		ast.Inspect(f, func(n ast.Node) bool {
+			if n != nil {
+				// push
+				ev := event{
+					node:  n,
+					typ:   typeOf(n),
+					index: len(events), // push event temporarily holds own index
+				}
+				stack = append(stack, ev)
+				events = append(events, ev)
+			} else {
+				// pop
+				ev := stack[len(stack)-1]
+				stack = stack[:len(stack)-1]
+
+				events[ev.index].index = len(events) + 1 // make push refer to pop
+
+				ev.index = 0 // turn ev into a pop event
+				events = append(events, ev)
+			}
+			return true
+		})
+	}
+
+	return events
+}
--- a/go/ast/inspector/inspector_test.go
+++ b/go/ast/inspector/inspector_test.go
@ -0,0 +1,220 @@
+package inspector_test
+
+import (
+	"go/ast"
+	"go/build"
+	"go/parser"
+	"go/token"
+	"log"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"testing"
+
+	"golang.org/x/tools/go/ast/inspector"
+)
+
+var netFiles []*ast.File
+
+func init() {
+	files, err := parseNetFiles()
+	if err != nil {
+		log.Fatal(err)
+	}
+	netFiles = files
+}
+
+func parseNetFiles() ([]*ast.File, error) {
+	pkg, err := build.Default.Import("net", "", 0)
+	if err != nil {
+		return nil, err
+	}
+	fset := token.NewFileSet()
+	var files []*ast.File
+	for _, filename := range pkg.GoFiles {
+		filename = filepath.Join(pkg.Dir, filename)
+		f, err := parser.ParseFile(fset, filename, nil, 0)
+		if err != nil {
+			return nil, err
+		}
+		files = append(files, f)
+	}
+	return files, nil
+}
+
+// TestAllNodes compares Inspector against ast.Inspect.
+func TestInspectAllNodes(t *testing.T) {
+	inspect := inspector.New(netFiles)
+
+	var nodesA []ast.Node
+	inspect.Nodes(nil, func(n ast.Node, push bool) bool {
+		if push {
+			nodesA = append(nodesA, n)
+		}
+		return true
+	})
+	var nodesB []ast.Node
+	for _, f := range netFiles {
+		ast.Inspect(f, func(n ast.Node) bool {
+			if n != nil {
+				nodesB = append(nodesB, n)
+			}
+			return true
+		})
+	}
+	compare(t, nodesA, nodesB)
+}
+
+// TestPruning compares Inspector against ast.Inspect,
+// pruning descent within ast.CallExpr nodes.
+func TestInspectPruning(t *testing.T) {
+	inspect := inspector.New(netFiles)
+
+	var nodesA []ast.Node
+	inspect.Nodes(nil, func(n ast.Node, push bool) bool {
+		if push {
+			nodesA = append(nodesA, n)
+			_, isCall := n.(*ast.CallExpr)
+			return !isCall // don't descend into function calls
+		}
+		return false
+	})
+	var nodesB []ast.Node
+	for _, f := range netFiles {
+		ast.Inspect(f, func(n ast.Node) bool {
+			if n != nil {
+				nodesB = append(nodesB, n)
+				_, isCall := n.(*ast.CallExpr)
+				return !isCall // don't descend into function calls
+			}
+			return false
+		})
+	}
+	compare(t, nodesA, nodesB)
+}
+
+func compare(t *testing.T, nodesA, nodesB []ast.Node) {
+	if len(nodesA) != len(nodesB) {
+		t.Errorf("inconsistent node lists: %d vs %d", len(nodesA), len(nodesB))
+	} else {
+		for i := range nodesA {
+			if a, b := nodesA[i], nodesB[i]; a != b {
+				t.Errorf("node %d is inconsistent: %T, %T", i, a, b)
+			}
+		}
+	}
+}
+
+func TestTypeFiltering(t *testing.T) {
+	const src = `package a
+func f() {
+	print("hi")
+	panic("oops")
+}
+`
+	fset := token.NewFileSet()
+	f, _ := parser.ParseFile(fset, "a.go", src, 0)
+	inspect := inspector.New([]*ast.File{f})
+
+	var got []string
+	fn := func(n ast.Node, push bool) bool {
+		if push {
+			got = append(got, typeOf(n))
+		}
+		return true
+	}
+
+	// no type filtering
+	inspect.Nodes(nil, fn)
+	if want := strings.Fields("File Ident FuncDecl Ident FuncType FieldList BlockStmt ExprStmt CallExpr Ident BasicLit ExprStmt CallExpr Ident BasicLit"); !reflect.DeepEqual(got, want) {
+		t.Errorf("inspect: got %s, want %s", got, want)
+	}
+
+	// type filtering
+	nodeTypes := []ast.Node{
+		(*ast.BasicLit)(nil),
+		(*ast.CallExpr)(nil),
+	}
+	got = nil
+	inspect.Nodes(nodeTypes, fn)
+	if want := strings.Fields("CallExpr BasicLit CallExpr BasicLit"); !reflect.DeepEqual(got, want) {
+		t.Errorf("inspect: got %s, want %s", got, want)
+	}
+
+	// inspect with stack
+	got = nil
+	inspect.WithStack(nodeTypes, func(n ast.Node, push bool, stack []ast.Node) bool {
+		if push {
+			var line []string
+			for _, n := range stack {
+				line = append(line, typeOf(n))
+			}
+			got = append(got, strings.Join(line, " "))
+		}
+		return true
+	})
+	want := []string{
+		"File FuncDecl BlockStmt ExprStmt CallExpr",
+		"File FuncDecl BlockStmt ExprStmt CallExpr BasicLit",
+		"File FuncDecl BlockStmt ExprStmt CallExpr",
+		"File FuncDecl BlockStmt ExprStmt CallExpr BasicLit",
+	}
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("inspect: got %s, want %s", got, want)
+	}
+}
+
+func typeOf(n ast.Node) string {
+	return strings.TrimPrefix(reflect.TypeOf(n).String(), "*ast.")
+}
+
+// The numbers show a marginal improvement (ASTInspect/Inspect) of 3.5x,
+// but a break-even point (NewInspector/(ASTInspect-Inspect)) of about 5
+// traversals.
+//
+// BenchmarkNewInspector   4.5 ms
+// BenchmarkNewInspect	   0.33ms
+// BenchmarkASTInspect    1.2  ms
+
+func BenchmarkNewInspector(b *testing.B) {
+	// Measure one-time construction overhead.
+	for i := 0; i < b.N; i++ {
+		inspector.New(netFiles)
+	}
+}
+
+func BenchmarkInspect(b *testing.B) {
+	b.StopTimer()
+	inspect := inspector.New(netFiles)
+	b.StartTimer()
+
+	// Measure marginal cost of traversal.
+	var ndecls, nlits int
+	for i := 0; i < b.N; i++ {
+		inspect.Preorder(nil, func(n ast.Node) {
+			switch n.(type) {
+			case *ast.FuncDecl:
+				ndecls++
+			case *ast.FuncLit:
+				nlits++
+			}
+		})
+	}
+}
+
+func BenchmarkASTInspect(b *testing.B) {
+	var ndecls, nlits int
+	for i := 0; i < b.N; i++ {
+		for _, f := range netFiles {
+			ast.Inspect(f, func(n ast.Node) bool {
+				switch n.(type) {
+				case *ast.FuncDecl:
+					ndecls++
+				case *ast.FuncLit:
+					nlits++
+				}
+				return true
+			})
+		}
+	}
+}
--- a/go/ast/inspector/typeof.go
+++ b/go/ast/inspector/typeof.go
@ -0,0 +1,216 @@
+package inspector
+
+// This file defines func typeOf(ast.Node) uint64.
+//
+// The initial map-based implementation was too slow;
+// see https://go-review.googlesource.com/c/tools/+/135655/1/go/ast/inspector/inspector.go#196
+
+import "go/ast"
+
+const (
+	nArrayType = iota
+	nAssignStmt
+	nBadDecl
+	nBadExpr
+	nBadStmt
+	nBasicLit
+	nBinaryExpr
+	nBlockStmt
+	nBranchStmt
+	nCallExpr
+	nCaseClause
+	nChanType
+	nCommClause
+	nComment
+	nCommentGroup
+	nCompositeLit
+	nDeclStmt
+	nDeferStmt
+	nEllipsis
+	nEmptyStmt
+	nExprStmt
+	nField
+	nFieldList
+	nFile
+	nForStmt
+	nFuncDecl
+	nFuncLit
+	nFuncType
+	nGenDecl
+	nGoStmt
+	nIdent
+	nIfStmt
+	nImportSpec
+	nIncDecStmt
+	nIndexExpr
+	nInterfaceType
+	nKeyValueExpr
+	nLabeledStmt
+	nMapType
+	nPackage
+	nParenExpr
+	nRangeStmt
+	nReturnStmt
+	nSelectStmt
+	nSelectorExpr
+	nSendStmt
+	nSliceExpr
+	nStarExpr
+	nStructType
+	nSwitchStmt
+	nTypeAssertExpr
+	nTypeSpec
+	nTypeSwitchStmt
+	nUnaryExpr
+	nValueSpec
+)
+
+// typeOf returns a distinct single-bit value that represents the type of n.
+//
+// Various implementations were benchmarked with BenchmarkNewInspector:
+//								GOGC=off
+// - type switch				4.9-5.5ms	2.1ms
+// - binary search over a sorted list of types  5.5-5.9ms	2.5ms
+// - linear scan, frequency-ordered list 	5.9-6.1ms	2.7ms
+// - linear scan, unordered list		6.4ms		2.7ms
+// - hash table					6.5ms		3.1ms
+// A perfect hash seemed like overkill.
+//
+// The compiler's switch statement is the clear winner
+// as it produces a binary tree in code,
+// with constant conditions and good branch prediction.
+// (Sadly it is the most verbose in source code.)
+// Binary search suffered from poor branch prediction.
+//
+func typeOf(n ast.Node) uint64 {
+	// Fast path: nearly half of all nodes are identifiers.
+	if _, ok := n.(*ast.Ident); ok {
+		return 1 << nIdent
+	}
+
+	// These cases include all nodes encountered by ast.Inspect.
+	switch n.(type) {
+	case *ast.ArrayType:
+		return 1 << nArrayType
+	case *ast.AssignStmt:
+		return 1 << nAssignStmt
+	case *ast.BadDecl:
+		return 1 << nBadDecl
+	case *ast.BadExpr:
+		return 1 << nBadExpr
+	case *ast.BadStmt:
+		return 1 << nBadStmt
+	case *ast.BasicLit:
+		return 1 << nBasicLit
+	case *ast.BinaryExpr:
+		return 1 << nBinaryExpr
+	case *ast.BlockStmt:
+		return 1 << nBlockStmt
+	case *ast.BranchStmt:
+		return 1 << nBranchStmt
+	case *ast.CallExpr:
+		return 1 << nCallExpr
+	case *ast.CaseClause:
+		return 1 << nCaseClause
+	case *ast.ChanType:
+		return 1 << nChanType
+	case *ast.CommClause:
+		return 1 << nCommClause
+	case *ast.Comment:
+		return 1 << nComment
+	case *ast.CommentGroup:
+		return 1 << nCommentGroup
+	case *ast.CompositeLit:
+		return 1 << nCompositeLit
+	case *ast.DeclStmt:
+		return 1 << nDeclStmt
+	case *ast.DeferStmt:
+		return 1 << nDeferStmt
+	case *ast.Ellipsis:
+		return 1 << nEllipsis
+	case *ast.EmptyStmt:
+		return 1 << nEmptyStmt
+	case *ast.ExprStmt:
+		return 1 << nExprStmt
+	case *ast.Field:
+		return 1 << nField
+	case *ast.FieldList:
+		return 1 << nFieldList
+	case *ast.File:
+		return 1 << nFile
+	case *ast.ForStmt:
+		return 1 << nForStmt
+	case *ast.FuncDecl:
+		return 1 << nFuncDecl
+	case *ast.FuncLit:
+		return 1 << nFuncLit
+	case *ast.FuncType:
+		return 1 << nFuncType
+	case *ast.GenDecl:
+		return 1 << nGenDecl
+	case *ast.GoStmt:
+		return 1 << nGoStmt
+	case *ast.Ident:
+		return 1 << nIdent
+	case *ast.IfStmt:
+		return 1 << nIfStmt
+	case *ast.ImportSpec:
+		return 1 << nImportSpec
+	case *ast.IncDecStmt:
+		return 1 << nIncDecStmt
+	case *ast.IndexExpr:
+		return 1 << nIndexExpr
+	case *ast.InterfaceType:
+		return 1 << nInterfaceType
+	case *ast.KeyValueExpr:
+		return 1 << nKeyValueExpr
+	case *ast.LabeledStmt:
+		return 1 << nLabeledStmt
+	case *ast.MapType:
+		return 1 << nMapType
+	case *ast.Package:
+		return 1 << nPackage
+	case *ast.ParenExpr:
+		return 1 << nParenExpr
+	case *ast.RangeStmt:
+		return 1 << nRangeStmt
+	case *ast.ReturnStmt:
+		return 1 << nReturnStmt
+	case *ast.SelectStmt:
+		return 1 << nSelectStmt
+	case *ast.SelectorExpr:
+		return 1 << nSelectorExpr
+	case *ast.SendStmt:
+		return 1 << nSendStmt
+	case *ast.SliceExpr:
+		return 1 << nSliceExpr
+	case *ast.StarExpr:
+		return 1 << nStarExpr
+	case *ast.StructType:
+		return 1 << nStructType
+	case *ast.SwitchStmt:
+		return 1 << nSwitchStmt
+	case *ast.TypeAssertExpr:
+		return 1 << nTypeAssertExpr
+	case *ast.TypeSpec:
+		return 1 << nTypeSpec
+	case *ast.TypeSwitchStmt:
+		return 1 << nTypeSwitchStmt
+	case *ast.UnaryExpr:
+		return 1 << nUnaryExpr
+	case *ast.ValueSpec:
+		return 1 << nValueSpec
+	}
+	return 0
+}
+
+func maskOf(nodes []ast.Node) uint64 {
+	if nodes == nil {
+		return 1<<64 - 1 // match all node types
+	}
+	var mask uint64
+	for _, n := range nodes {
+		mask |= typeOf(n)
+	}
+	return mask
+}