go/oracle/oracle.go

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package oracle contains the implementation of the oracle tool whose
// command-line is provided by code.google.com/p/go.tools/cmd/oracle.
//
// http://golang.org/s/oracle-design
// http://golang.org/s/oracle-user-manual
//
package oracle

// This file defines oracle.Query, the entry point for the oracle tool.
// The actual executable is defined in cmd/oracle.

// TODO(adonovan): new queries
// - show all statements that may update the selected lvalue
//   (local, global, field, etc).
// - show all places where an object of type T is created
//   (&T{}, var t T, new(T), new(struct{array [3]T}), etc.

// ORACLE CONTROL FLOW
//
// The Oracle is somewhat convoluted due to the need to support two
// very different use-cases, "one-shot" and "long running", and to do
// so quickly.
//
// The cmd/oracle tool issues "one-shot" queries via the exported
// Query function, which creates an Oracle to answer a single query.
// newOracle consults the 'needs' flags of the query mode and the
// package containing the query to avoid doing more work than it needs
// (loading, parsing, type checking, SSA construction).
//
// The Pythia tool (github.com/fzipp/pythia) is an example of a "long
// running" tool.  It calls New() and then loops, calling
// ParseQueryPos and (*Oracle).Query to handle each incoming HTTP
// query.  Since New cannot see which queries will follow, it must
// load, parse, type-check and SSA-build the entire transitive closure
// of the analysis scope, retaining full debug information and all
// typed ASTs.
//
// TODO(adonovan): experiment with inverting the control flow by
// making each mode consist of two functions: a "one-shot setup"
// function and the existing "impl" function.  The one-shot setup
// function would do all of the work of Query and newOracle,
// specialized to each mode, calling library utilities for the common
// things.  This would give it more control over "scope reduction".
// Long running tools would not call the one-shot setup function but
// would have their own setup function equivalent to the existing
// 'needsAll' flow path.

import (
	"fmt"
	"go/ast"
	"go/build"
	"go/token"
	"io"

	"code.google.com/p/go.tools/astutil"
	"code.google.com/p/go.tools/go/loader"
	"code.google.com/p/go.tools/go/pointer"
	"code.google.com/p/go.tools/go/ssa"
	"code.google.com/p/go.tools/go/types"
	"code.google.com/p/go.tools/oracle/serial"
)

// An Oracle holds the program state required for one or more queries.
type Oracle struct {
	fset      *token.FileSet                         // file set [all queries]
	prog      *ssa.Program                           // the SSA program [needSSA]
	ptaConfig pointer.Config                         // pointer analysis configuration [needPTA]
	typeInfo  map[*types.Package]*loader.PackageInfo // type info for all ASTs in the program [needRetainTypeInfo]
}

// A set of bits indicating the analytical requirements of each mode.
//
// Typed ASTs for the whole program are always constructed
// transiently; they are retained only for the queried package unless
// needRetainTypeInfo is set.
const (
	needPos            = 1 << iota // needs a position
	needExactPos                   // needs an exact AST selection; implies needPos
	needRetainTypeInfo             // needs to retain type info for all ASTs in the program
	needSSA                        // needs ssa.Packages for whole program
	needSSADebug                   // needs debug info for ssa.Packages
	needPTA            = needSSA   // needs pointer analysis
	needAll            = -1        // needs everything (e.g. a sequence of queries)
)

type modeInfo struct {
	name  string
	needs int
	impl  func(*Oracle, *QueryPos) (queryResult, error)
}

var modes = []*modeInfo{
	// Pointer analyses, whole program:
	{"callees", needPTA | needExactPos, callees},
	{"callers", needPTA | needPos, callers},
	{"callgraph", needPTA, doCallgraph},
	{"callstack", needPTA | needPos, callstack},
	{"peers", needPTA | needSSADebug | needPos, peers},
	{"pointsto", needPTA | needSSADebug | needExactPos, pointsto},

	// Type-based, modular analyses:
	{"definition", needPos, definition},
	{"describe", needExactPos, describe},
	{"freevars", needPos, freevars},

	// Type-based, whole-program analyses:
	{"implements", needRetainTypeInfo | needPos, implements},
	{"referrers", needRetainTypeInfo | needPos, referrers},
}

func findMode(mode string) *modeInfo {
	for _, m := range modes {
		if m.name == mode {
			return m
		}
	}
	return nil
}

type printfFunc func(pos interface{}, format string, args ...interface{})

// queryResult is the interface of each query-specific result type.
type queryResult interface {
	toSerial(res *serial.Result, fset *token.FileSet)
	display(printf printfFunc)
}

// A QueryPos represents the position provided as input to a query:
// a textual extent in the program's source code, the AST node it
// corresponds to, and the package to which it belongs.
// Instances are created by ParseQueryPos.
//
type QueryPos struct {
	fset       *token.FileSet
	start, end token.Pos           // source extent of query
	path       []ast.Node          // AST path from query node to root of ast.File
	exact      bool                // 2nd result of PathEnclosingInterval
	info       *loader.PackageInfo // type info for the queried package (nil for fastQueryPos)
}

// TypeString prints type T relative to the query position.
func (qpos *QueryPos) TypeString(T types.Type) string {
	return types.TypeString(qpos.info.Pkg, T)
}

// ObjectString prints object obj relative to the query position.
func (qpos *QueryPos) ObjectString(obj types.Object) string {
	return types.ObjectString(qpos.info.Pkg, obj)
}

// SelectionString prints selection sel relative to the query position.
func (qpos *QueryPos) SelectionString(sel *types.Selection) string {
	return types.SelectionString(qpos.info.Pkg, sel)
}

// A Result encapsulates the result of an oracle.Query.
type Result struct {
	fset     *token.FileSet
	q        queryResult       // the query-specific result
	mode     string            // query mode
	warnings []pointer.Warning // pointer analysis warnings (TODO(adonovan): fix: never populated!)
}

// Serial returns an instance of serial.Result, which implements the
// {xml,json}.Marshaler interfaces so that query results can be
// serialized as JSON or XML.
//
func (res *Result) Serial() *serial.Result {
	resj := &serial.Result{Mode: res.mode}
	res.q.toSerial(resj, res.fset)
	for _, w := range res.warnings {
		resj.Warnings = append(resj.Warnings, serial.PTAWarning{
			Pos:     res.fset.Position(w.Pos).String(),
			Message: w.Message,
		})
	}
	return resj
}

// Query runs a single oracle query.
//
// args specify the main package in (*loader.Config).FromArgs syntax.
// mode is the query mode ("callers", etc).
// ptalog is the (optional) pointer-analysis log file.
// buildContext is the go/build configuration for locating packages.
// reflection determines whether to model reflection soundly (currently slow).
//
// Clients that intend to perform multiple queries against the same
// analysis scope should use this pattern instead:
//
//	conf := loader.Config{Build: buildContext, SourceImports: true}
//	... populate config, e.g. conf.FromArgs(args) ...
//	iprog, err := conf.Load()
//	if err != nil { ... }
// 	o, err := oracle.New(iprog, nil, false)
//	if err != nil { ... }
//	for ... {
//		qpos, err := oracle.ParseQueryPos(imp, pos, needExact)
//		if err != nil { ... }
//
//		res, err := o.Query(mode, qpos)
//		if err != nil { ... }
//
//		// use res
//	}
//
// TODO(adonovan): the ideal 'needsExact' parameter for ParseQueryPos
// depends on the query mode; how should we expose this?
//
func Query(args []string, mode, pos string, ptalog io.Writer, buildContext *build.Context, reflection bool) (*Result, error) {
	if mode == "what" {
		// Bypass package loading, type checking, SSA construction.
		return what(pos, buildContext)
	}

	minfo := findMode(mode)
	if minfo == nil {
		return nil, fmt.Errorf("invalid mode type: %q", mode)
	}

	conf := loader.Config{Build: buildContext, SourceImports: true}

	// Determine initial packages.
	args, err := conf.FromArgs(args, true)
	if err != nil {
		return nil, err
	}
	if len(args) > 0 {
		return nil, fmt.Errorf("surplus arguments: %q", args)
	}

	// For queries needing only a single typed package,
	// reduce the analysis scope to that package.
	if minfo.needs&(needSSA|needRetainTypeInfo) == 0 {
		reduceScope(pos, &conf)
	}

	// TODO(adonovan): report type errors to the user via Serial
	// types, not stderr?
	// conf.TypeChecker.Error = func(err error) {
	// 	E := err.(types.Error)
	// 	fmt.Fprintf(os.Stderr, "%s: %s\n", E.Fset.Position(E.Pos), E.Msg)
	// }

	// Load/parse/type-check the program.
	iprog, err := conf.Load()
	if err != nil {
		return nil, err
	}

	o, err := newOracle(iprog, ptalog, minfo.needs, reflection)
	if err != nil {
		return nil, err
	}

	qpos, err := ParseQueryPos(iprog, pos, minfo.needs&needExactPos != 0)
	if err != nil && minfo.needs&(needPos|needExactPos) != 0 {
		return nil, err
	}

	// SSA is built and we have the QueryPos.
	// Release the other ASTs and type info to the GC.
	iprog = nil

	return o.query(minfo, qpos)
}

// reduceScope is called for one-shot queries that need only a single
// typed package.  It attempts to guess the query package from pos and
// reduce the analysis scope (set of loaded packages) to just that one
// plus (the exported parts of) its dependencies.  It leaves its
// arguments unchanged on failure.
//
// TODO(adonovan): this is a real mess... but it's fast.
//
func reduceScope(pos string, conf *loader.Config) {
	fqpos, err := fastQueryPos(pos)
	if err != nil {
		return // bad query
	}

	// TODO(adonovan): fix: this gives the wrong results for files
	// in non-importable packages such as tests and ad-hoc packages
	// specified as a list of files (incl. the oracle's tests).
	_, importPath, err := guessImportPath(fqpos.fset.File(fqpos.start).Name(), conf.Build)
	if err != nil {
		return // can't find GOPATH dir
	}
	if importPath == "" {
		return
	}

	// Check that it's possible to load the queried package.
	// (e.g. oracle tests contain different 'package' decls in same dir.)
	// Keep consistent with logic in loader/util.go!
	cfg2 := *conf.Build
	cfg2.CgoEnabled = false
	bp, err := cfg2.Import(importPath, "", 0)
	if err != nil {
		return // no files for package
	}

	// Check that the queried file appears in the package:
	// it might be a '// +build ignore' from an ad-hoc main
	// package, e.g. $GOROOT/src/net/http/triv.go.
	if !pkgContainsFile(bp, fqpos.fset.File(fqpos.start).Name()) {
		return // not found
	}

	conf.TypeCheckFuncBodies = func(p string) bool { return p == importPath }

	// Ignore packages specified on command line.
	conf.CreatePkgs = nil
	conf.ImportPkgs = nil

	// Instead load just the one containing the query position
	// (and possibly its corresponding tests/production code).
	// TODO(adonovan): set 'augment' based on which file list
	// contains
	_ = conf.ImportWithTests(importPath) // ignore error
}

func pkgContainsFile(bp *build.Package, filename string) bool {
	for _, files := range [][]string{bp.GoFiles, bp.TestGoFiles, bp.XTestGoFiles} {
		for _, file := range files {
			if sameFile(file, filename) {
				return true
			}
		}
	}
	return false
}

// New constructs a new Oracle that can be used for a sequence of queries.
//
// iprog specifies the program to analyze.
// ptalog is the (optional) pointer-analysis log file.
// reflection determines whether to model reflection soundly (currently slow).
//
func New(iprog *loader.Program, ptalog io.Writer, reflection bool) (*Oracle, error) {
	return newOracle(iprog, ptalog, needAll, reflection)
}

func newOracle(iprog *loader.Program, ptalog io.Writer, needs int, reflection bool) (*Oracle, error) {
	o := &Oracle{fset: iprog.Fset}

	// Retain type info for all ASTs in the program.
	if needs&needRetainTypeInfo != 0 {
		o.typeInfo = iprog.AllPackages
	}

	// Create SSA package for the initial packages and their dependencies.
	if needs&needSSA != 0 {
		var mode ssa.BuilderMode
		if needs&needSSADebug != 0 {
			mode |= ssa.GlobalDebug
		}
		prog := ssa.Create(iprog, mode)

		// For each initial package (specified on the command line),
		// if it has a main function, analyze that,
		// otherwise analyze its tests, if any.
		var testPkgs, mains []*ssa.Package
		for _, info := range iprog.InitialPackages() {
			initialPkg := prog.Package(info.Pkg)

			// Add package to the pointer analysis scope.
			if initialPkg.Func("main") != nil {
				mains = append(mains, initialPkg)
			} else {
				testPkgs = append(testPkgs, initialPkg)
			}
		}
		if testPkgs != nil {
			if p := prog.CreateTestMainPackage(testPkgs...); p != nil {
				mains = append(mains, p)
			}
		}
		if mains == nil {
			return nil, fmt.Errorf("analysis scope has no main and no tests")
		}
		o.ptaConfig.Log = ptalog
		o.ptaConfig.Reflection = reflection
		o.ptaConfig.Mains = mains

		o.prog = prog
	}

	return o, nil
}

// Query runs the query of the specified mode and selection.
//
// TODO(adonovan): fix: this function does not currently support the
// "what" query, which needs to access the go/build.Context.
//
func (o *Oracle) Query(mode string, qpos *QueryPos) (*Result, error) {
	minfo := findMode(mode)
	if minfo == nil {
		return nil, fmt.Errorf("invalid mode type: %q", mode)
	}
	return o.query(minfo, qpos)
}

func (o *Oracle) query(minfo *modeInfo, qpos *QueryPos) (*Result, error) {
	// Clear out residue of previous query (for long-running clients).
	o.ptaConfig.Queries = nil
	o.ptaConfig.IndirectQueries = nil

	res := &Result{
		mode: minfo.name,
		fset: o.fset,
	}
	var err error
	res.q, err = minfo.impl(o, qpos)
	if err != nil {
		return nil, err
	}
	return res, nil
}

// ParseQueryPos parses the source query position pos.
// If needExact, it must identify a single AST subtree;
// this is appropriate for queries that allow fairly arbitrary syntax,
// e.g. "describe".
//
func ParseQueryPos(iprog *loader.Program, posFlag string, needExact bool) (*QueryPos, error) {
	filename, startOffset, endOffset, err := parsePosFlag(posFlag)
	if err != nil {
		return nil, err
	}
	start, end, err := findQueryPos(iprog.Fset, filename, startOffset, endOffset)
	if err != nil {
		return nil, err
	}
	info, path, exact := iprog.PathEnclosingInterval(start, end)
	if path == nil {
		return nil, fmt.Errorf("no syntax here")
	}
	if needExact && !exact {
		return nil, fmt.Errorf("ambiguous selection within %s", astutil.NodeDescription(path[0]))
	}
	return &QueryPos{iprog.Fset, start, end, path, exact, info}, nil
}

// WriteTo writes the oracle query result res to out in a compiler diagnostic format.
func (res *Result) WriteTo(out io.Writer) {
	printf := func(pos interface{}, format string, args ...interface{}) {
		fprintf(out, res.fset, pos, format, args...)
	}
	res.q.display(printf)

	// Print warnings after the main output.
	if res.warnings != nil {
		fmt.Fprintln(out, "\nPointer analysis warnings:")
		for _, w := range res.warnings {
			printf(w.Pos, "warning: "+w.Message)
		}
	}
}

// ---------- Utilities ----------

// buildSSA constructs the SSA representation of Go-source function bodies.
// Not needed in simpler modes, e.g. freevars.
//
func buildSSA(o *Oracle) {
	o.prog.BuildAll()
}

// ptrAnalysis runs the pointer analysis and returns its result.
func ptrAnalysis(o *Oracle) *pointer.Result {
	result, err := pointer.Analyze(&o.ptaConfig)
	if err != nil {
		panic(err) // pointer analysis internal error
	}
	return result
}

// unparen returns e with any enclosing parentheses stripped.
func unparen(e ast.Expr) ast.Expr {
	for {
		p, ok := e.(*ast.ParenExpr)
		if !ok {
			break
		}
		e = p.X
	}
	return e
}

// deref returns a pointer's element type; otherwise it returns typ.
func deref(typ types.Type) types.Type {
	if p, ok := typ.Underlying().(*types.Pointer); ok {
		return p.Elem()
	}
	return typ
}

// fprintf prints to w a message of the form "location: message\n"
// where location is derived from pos.
//
// pos must be one of:
//    - a token.Pos, denoting a position
//    - an ast.Node, denoting an interval
//    - anything with a Pos() method:
//         ssa.Member, ssa.Value, ssa.Instruction, types.Object, pointer.Label, etc.
//    - a QueryPos, denoting the extent of the user's query.
//    - nil, meaning no position at all.
//
// The output format is is compatible with the 'gnu'
// compilation-error-regexp in Emacs' compilation mode.
// TODO(adonovan): support other editors.
//
func fprintf(w io.Writer, fset *token.FileSet, pos interface{}, format string, args ...interface{}) {
	var start, end token.Pos
	switch pos := pos.(type) {
	case ast.Node:
		start = pos.Pos()
		end = pos.End()
	case token.Pos:
		start = pos
		end = start
	case interface {
		Pos() token.Pos
	}:
		start = pos.Pos()
		end = start
	case *QueryPos:
		start = pos.start
		end = pos.end
	case nil:
		// no-op
	default:
		panic(fmt.Sprintf("invalid pos: %T", pos))
	}

	if sp := fset.Position(start); start == end {
		// (prints "-: " for token.NoPos)
		fmt.Fprintf(w, "%s: ", sp)
	} else {
		ep := fset.Position(end)
		// The -1 below is a concession to Emacs's broken use of
		// inclusive (not half-open) intervals.
		// Other editors may not want it.
		// TODO(adonovan): add an -editor=vim|emacs|acme|auto
		// flag; auto uses EMACS=t / VIM=... / etc env vars.
		fmt.Fprintf(w, "%s:%d.%d-%d.%d: ",
			sp.Filename, sp.Line, sp.Column, ep.Line, ep.Column-1)
	}
	fmt.Fprintf(w, format, args...)
	io.WriteString(w, "\n")
}