1
0
mirror of https://github.com/golang/go synced 2024-11-18 17:54:57 -07:00

go/ast/inspector: faster (amortized) AST traversals

This new package provides helper functions for traversal over the
syntax trees of a package, including node filtering by type, and
materialization of the traversal stack.

During construction, the inspector does a complete traversal and
builds a list of push/pop events and their node type. Subsequent
method calls that request a traversal scan this list, rather than walk
the AST, and perform type filtering using efficient bit sets.

Experiments suggest the inspector's traversals are about 2.5x faster
than ast.Inspect, but it may take around 5 traversals for this benefit
to amortize the inspector's construction cost.

This design is well-suited to the ongoing reworking of cmd/vet (see
docs.google.com/document/d/1-azPLXaLgTCKeKDNg0HVMq2ovMlD-e7n1ZHzZVzOlJk),
which historically made a single pass over the ASTs but is being
replaced by a design that requires a separate pass for each analysis.

Change-Id: I9a67aed6a3bf948076641d96447860d97ede67b4
Reviewed-on: https://go-review.googlesource.com/135655
Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
Alan Donovan 2018-09-17 09:36:13 -04:00
parent 16720d5f2d
commit b3c0be4c97
3 changed files with 618 additions and 0 deletions

View File

@ -0,0 +1,182 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package inspector provides helper functions for traversal over the
// syntax trees of a package, including node filtering by type, and
// materialization of the traversal stack.
//
// During construction, the inspector does a complete traversal and
// builds a list of push/pop events and their node type. Subsequent
// method calls that request a traversal scan this list, rather than walk
// the AST, and perform type filtering using efficient bit sets.
//
// Experiments suggest the inspector's traversals are about 2.5x faster
// than ast.Inspect, but it may take around 5 traversals for this
// benefit to amortize the inspector's construction cost.
// If efficiency is the primary concern, do not use use Inspector for
// one-off traversals.
package inspector
// There are four orthogonal features in a traversal:
// 1 type filtering
// 2 pruning
// 3 postorder calls to f
// 4 stack
// Rather than offer all of them in the API,
// only a few combinations are exposed:
// - Preorder is the fastest and has fewest features,
// but is the most commonly needed traversal.
// - Nodes and WithStack both provide pruning and postorder calls,
// even though few clients need it, because supporting two versions
// is not justified.
// More combinations could be supported by expressing them as
// wrappers around a more generic traversal, but this was measured
// and found to degrade performance significantly (30%).
import (
"go/ast"
)
// An Inspector provides methods for inspecting
// (traversing) the syntax trees of a package.
type Inspector struct {
events []event
}
// New returns an Inspector for the specified syntax trees.
func New(files []*ast.File) *Inspector {
return &Inspector{traverse(files)}
}
// An event represents a push or a pop
// of an ast.Node during a traversal.
type event struct {
node ast.Node
typ uint64 // typeOf(node)
index int // 1 + index of corresponding pop event, or 0 if this is a pop
}
// Preorder visits all the nodes of the files supplied to New in
// depth-first order. It calls f(n) for each node n before it visits
// n's children.
//
// The types argument, if non-empty, enables type-based filtering of
// events. The function f if is called only for nodes whose type
// matches an element of the types slice.
func (in *Inspector) Preorder(types []ast.Node, f func(ast.Node)) {
// Because it avoids postorder calls to f, and the pruning
// check, Preorder is almost twice as fast as Nodes. The two
// features seem to contribute similar slowdowns (~1.4x each).
mask := maskOf(types)
for i := 0; i < len(in.events); {
ev := in.events[i]
if ev.typ&mask != 0 {
if ev.index > 0 {
f(ev.node)
}
}
i++
}
}
// Nodes visits the nodes of the files supplied to New in depth-first
// order. It calls f(n, true) for each node n before it visits n's
// children. If f returns true, Nodes invokes f recursively for each
// of the non-nil children of the node, followed by a call of
// f(n, false).
//
// The types argument, if non-empty, enables type-based filtering of
// events. The function f if is called only for nodes whose type
// matches an element of the types slice.
func (in *Inspector) Nodes(types []ast.Node, f func(n ast.Node, push bool) (prune bool)) {
mask := maskOf(types)
for i := 0; i < len(in.events); {
ev := in.events[i]
if ev.typ&mask != 0 {
if ev.index > 0 {
// push
if !f(ev.node, true) {
i = ev.index // jump to corresponding pop + 1
continue
}
} else {
// pop
f(ev.node, false)
}
}
i++
}
}
// WithStack visits nodes in a similar manner to Nodes, but it
// supplies each call to f an additional argument, the current
// traversal stack. The stack's first element is the outermost node,
// an *ast.File; its last is the innermost, n.
func (in *Inspector) WithStack(types []ast.Node, f func(n ast.Node, push bool, stack []ast.Node) (prune bool)) {
mask := maskOf(types)
var stack []ast.Node
for i := 0; i < len(in.events); {
ev := in.events[i]
if ev.index > 0 {
// push
stack = append(stack, ev.node)
if ev.typ&mask != 0 {
if !f(ev.node, true, stack) {
i = ev.index
stack = stack[:len(stack)-1]
continue
}
}
} else {
// pop
if ev.typ&mask != 0 {
f(ev.node, false, stack)
}
stack = stack[:len(stack)-1]
}
i++
}
}
// traverse builds the table of events representing a traversal.
func traverse(files []*ast.File) []event {
// Preallocate approximate number of events
// based on source file extent.
// This makes traverse faster by 4x (!).
var extent int
for _, f := range files {
extent += int(f.End() - f.Pos())
}
// This estimate is based on the net/http package.
events := make([]event, 0, extent*33/100)
var stack []event
for _, f := range files {
ast.Inspect(f, func(n ast.Node) bool {
if n != nil {
// push
ev := event{
node: n,
typ: typeOf(n),
index: len(events), // push event temporarily holds own index
}
stack = append(stack, ev)
events = append(events, ev)
} else {
// pop
ev := stack[len(stack)-1]
stack = stack[:len(stack)-1]
events[ev.index].index = len(events) + 1 // make push refer to pop
ev.index = 0 // turn ev into a pop event
events = append(events, ev)
}
return true
})
}
return events
}

View File

@ -0,0 +1,220 @@
package inspector_test
import (
"go/ast"
"go/build"
"go/parser"
"go/token"
"log"
"path/filepath"
"reflect"
"strings"
"testing"
"golang.org/x/tools/go/ast/inspector"
)
var netFiles []*ast.File
func init() {
files, err := parseNetFiles()
if err != nil {
log.Fatal(err)
}
netFiles = files
}
func parseNetFiles() ([]*ast.File, error) {
pkg, err := build.Default.Import("net", "", 0)
if err != nil {
return nil, err
}
fset := token.NewFileSet()
var files []*ast.File
for _, filename := range pkg.GoFiles {
filename = filepath.Join(pkg.Dir, filename)
f, err := parser.ParseFile(fset, filename, nil, 0)
if err != nil {
return nil, err
}
files = append(files, f)
}
return files, nil
}
// TestAllNodes compares Inspector against ast.Inspect.
func TestInspectAllNodes(t *testing.T) {
inspect := inspector.New(netFiles)
var nodesA []ast.Node
inspect.Nodes(nil, func(n ast.Node, push bool) bool {
if push {
nodesA = append(nodesA, n)
}
return true
})
var nodesB []ast.Node
for _, f := range netFiles {
ast.Inspect(f, func(n ast.Node) bool {
if n != nil {
nodesB = append(nodesB, n)
}
return true
})
}
compare(t, nodesA, nodesB)
}
// TestPruning compares Inspector against ast.Inspect,
// pruning descent within ast.CallExpr nodes.
func TestInspectPruning(t *testing.T) {
inspect := inspector.New(netFiles)
var nodesA []ast.Node
inspect.Nodes(nil, func(n ast.Node, push bool) bool {
if push {
nodesA = append(nodesA, n)
_, isCall := n.(*ast.CallExpr)
return !isCall // don't descend into function calls
}
return false
})
var nodesB []ast.Node
for _, f := range netFiles {
ast.Inspect(f, func(n ast.Node) bool {
if n != nil {
nodesB = append(nodesB, n)
_, isCall := n.(*ast.CallExpr)
return !isCall // don't descend into function calls
}
return false
})
}
compare(t, nodesA, nodesB)
}
func compare(t *testing.T, nodesA, nodesB []ast.Node) {
if len(nodesA) != len(nodesB) {
t.Errorf("inconsistent node lists: %d vs %d", len(nodesA), len(nodesB))
} else {
for i := range nodesA {
if a, b := nodesA[i], nodesB[i]; a != b {
t.Errorf("node %d is inconsistent: %T, %T", i, a, b)
}
}
}
}
func TestTypeFiltering(t *testing.T) {
const src = `package a
func f() {
print("hi")
panic("oops")
}
`
fset := token.NewFileSet()
f, _ := parser.ParseFile(fset, "a.go", src, 0)
inspect := inspector.New([]*ast.File{f})
var got []string
fn := func(n ast.Node, push bool) bool {
if push {
got = append(got, typeOf(n))
}
return true
}
// no type filtering
inspect.Nodes(nil, fn)
if want := strings.Fields("File Ident FuncDecl Ident FuncType FieldList BlockStmt ExprStmt CallExpr Ident BasicLit ExprStmt CallExpr Ident BasicLit"); !reflect.DeepEqual(got, want) {
t.Errorf("inspect: got %s, want %s", got, want)
}
// type filtering
nodeTypes := []ast.Node{
(*ast.BasicLit)(nil),
(*ast.CallExpr)(nil),
}
got = nil
inspect.Nodes(nodeTypes, fn)
if want := strings.Fields("CallExpr BasicLit CallExpr BasicLit"); !reflect.DeepEqual(got, want) {
t.Errorf("inspect: got %s, want %s", got, want)
}
// inspect with stack
got = nil
inspect.WithStack(nodeTypes, func(n ast.Node, push bool, stack []ast.Node) bool {
if push {
var line []string
for _, n := range stack {
line = append(line, typeOf(n))
}
got = append(got, strings.Join(line, " "))
}
return true
})
want := []string{
"File FuncDecl BlockStmt ExprStmt CallExpr",
"File FuncDecl BlockStmt ExprStmt CallExpr BasicLit",
"File FuncDecl BlockStmt ExprStmt CallExpr",
"File FuncDecl BlockStmt ExprStmt CallExpr BasicLit",
}
if !reflect.DeepEqual(got, want) {
t.Errorf("inspect: got %s, want %s", got, want)
}
}
func typeOf(n ast.Node) string {
return strings.TrimPrefix(reflect.TypeOf(n).String(), "*ast.")
}
// The numbers show a marginal improvement (ASTInspect/Inspect) of 3.5x,
// but a break-even point (NewInspector/(ASTInspect-Inspect)) of about 5
// traversals.
//
// BenchmarkNewInspector 4.5 ms
// BenchmarkNewInspect 0.33ms
// BenchmarkASTInspect 1.2 ms
func BenchmarkNewInspector(b *testing.B) {
// Measure one-time construction overhead.
for i := 0; i < b.N; i++ {
inspector.New(netFiles)
}
}
func BenchmarkInspect(b *testing.B) {
b.StopTimer()
inspect := inspector.New(netFiles)
b.StartTimer()
// Measure marginal cost of traversal.
var ndecls, nlits int
for i := 0; i < b.N; i++ {
inspect.Preorder(nil, func(n ast.Node) {
switch n.(type) {
case *ast.FuncDecl:
ndecls++
case *ast.FuncLit:
nlits++
}
})
}
}
func BenchmarkASTInspect(b *testing.B) {
var ndecls, nlits int
for i := 0; i < b.N; i++ {
for _, f := range netFiles {
ast.Inspect(f, func(n ast.Node) bool {
switch n.(type) {
case *ast.FuncDecl:
ndecls++
case *ast.FuncLit:
nlits++
}
return true
})
}
}
}

216
go/ast/inspector/typeof.go Normal file
View File

@ -0,0 +1,216 @@
package inspector
// This file defines func typeOf(ast.Node) uint64.
//
// The initial map-based implementation was too slow;
// see https://go-review.googlesource.com/c/tools/+/135655/1/go/ast/inspector/inspector.go#196
import "go/ast"
const (
nArrayType = iota
nAssignStmt
nBadDecl
nBadExpr
nBadStmt
nBasicLit
nBinaryExpr
nBlockStmt
nBranchStmt
nCallExpr
nCaseClause
nChanType
nCommClause
nComment
nCommentGroup
nCompositeLit
nDeclStmt
nDeferStmt
nEllipsis
nEmptyStmt
nExprStmt
nField
nFieldList
nFile
nForStmt
nFuncDecl
nFuncLit
nFuncType
nGenDecl
nGoStmt
nIdent
nIfStmt
nImportSpec
nIncDecStmt
nIndexExpr
nInterfaceType
nKeyValueExpr
nLabeledStmt
nMapType
nPackage
nParenExpr
nRangeStmt
nReturnStmt
nSelectStmt
nSelectorExpr
nSendStmt
nSliceExpr
nStarExpr
nStructType
nSwitchStmt
nTypeAssertExpr
nTypeSpec
nTypeSwitchStmt
nUnaryExpr
nValueSpec
)
// typeOf returns a distinct single-bit value that represents the type of n.
//
// Various implementations were benchmarked with BenchmarkNewInspector:
// GOGC=off
// - type switch 4.9-5.5ms 2.1ms
// - binary search over a sorted list of types 5.5-5.9ms 2.5ms
// - linear scan, frequency-ordered list 5.9-6.1ms 2.7ms
// - linear scan, unordered list 6.4ms 2.7ms
// - hash table 6.5ms 3.1ms
// A perfect hash seemed like overkill.
//
// The compiler's switch statement is the clear winner
// as it produces a binary tree in code,
// with constant conditions and good branch prediction.
// (Sadly it is the most verbose in source code.)
// Binary search suffered from poor branch prediction.
//
func typeOf(n ast.Node) uint64 {
// Fast path: nearly half of all nodes are identifiers.
if _, ok := n.(*ast.Ident); ok {
return 1 << nIdent
}
// These cases include all nodes encountered by ast.Inspect.
switch n.(type) {
case *ast.ArrayType:
return 1 << nArrayType
case *ast.AssignStmt:
return 1 << nAssignStmt
case *ast.BadDecl:
return 1 << nBadDecl
case *ast.BadExpr:
return 1 << nBadExpr
case *ast.BadStmt:
return 1 << nBadStmt
case *ast.BasicLit:
return 1 << nBasicLit
case *ast.BinaryExpr:
return 1 << nBinaryExpr
case *ast.BlockStmt:
return 1 << nBlockStmt
case *ast.BranchStmt:
return 1 << nBranchStmt
case *ast.CallExpr:
return 1 << nCallExpr
case *ast.CaseClause:
return 1 << nCaseClause
case *ast.ChanType:
return 1 << nChanType
case *ast.CommClause:
return 1 << nCommClause
case *ast.Comment:
return 1 << nComment
case *ast.CommentGroup:
return 1 << nCommentGroup
case *ast.CompositeLit:
return 1 << nCompositeLit
case *ast.DeclStmt:
return 1 << nDeclStmt
case *ast.DeferStmt:
return 1 << nDeferStmt
case *ast.Ellipsis:
return 1 << nEllipsis
case *ast.EmptyStmt:
return 1 << nEmptyStmt
case *ast.ExprStmt:
return 1 << nExprStmt
case *ast.Field:
return 1 << nField
case *ast.FieldList:
return 1 << nFieldList
case *ast.File:
return 1 << nFile
case *ast.ForStmt:
return 1 << nForStmt
case *ast.FuncDecl:
return 1 << nFuncDecl
case *ast.FuncLit:
return 1 << nFuncLit
case *ast.FuncType:
return 1 << nFuncType
case *ast.GenDecl:
return 1 << nGenDecl
case *ast.GoStmt:
return 1 << nGoStmt
case *ast.Ident:
return 1 << nIdent
case *ast.IfStmt:
return 1 << nIfStmt
case *ast.ImportSpec:
return 1 << nImportSpec
case *ast.IncDecStmt:
return 1 << nIncDecStmt
case *ast.IndexExpr:
return 1 << nIndexExpr
case *ast.InterfaceType:
return 1 << nInterfaceType
case *ast.KeyValueExpr:
return 1 << nKeyValueExpr
case *ast.LabeledStmt:
return 1 << nLabeledStmt
case *ast.MapType:
return 1 << nMapType
case *ast.Package:
return 1 << nPackage
case *ast.ParenExpr:
return 1 << nParenExpr
case *ast.RangeStmt:
return 1 << nRangeStmt
case *ast.ReturnStmt:
return 1 << nReturnStmt
case *ast.SelectStmt:
return 1 << nSelectStmt
case *ast.SelectorExpr:
return 1 << nSelectorExpr
case *ast.SendStmt:
return 1 << nSendStmt
case *ast.SliceExpr:
return 1 << nSliceExpr
case *ast.StarExpr:
return 1 << nStarExpr
case *ast.StructType:
return 1 << nStructType
case *ast.SwitchStmt:
return 1 << nSwitchStmt
case *ast.TypeAssertExpr:
return 1 << nTypeAssertExpr
case *ast.TypeSpec:
return 1 << nTypeSpec
case *ast.TypeSwitchStmt:
return 1 << nTypeSwitchStmt
case *ast.UnaryExpr:
return 1 << nUnaryExpr
case *ast.ValueSpec:
return 1 << nValueSpec
}
return 0
}
func maskOf(nodes []ast.Node) uint64 {
if nodes == nil {
return 1<<64 - 1 // match all node types
}
var mask uint64
for _, n := range nodes {
mask |= typeOf(n)
}
return mask
}