diff --git a/go/loader/doc.go b/go/loader/doc.go new file mode 100644 index 0000000000..c6dcac2461 --- /dev/null +++ b/go/loader/doc.go @@ -0,0 +1,188 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package loader loads, parses and type-checks packages of Go code +// plus their transitive closure, and retains both the ASTs and the +// derived facts. +// +// THIS INTERFACE IS EXPERIMENTAL AND IS LIKELY TO CHANGE. +// +// The package defines two primary types: Config, which specifies a +// set of initial packages to load and various other options; and +// Program, which is the result of successfully loading the packages +// specified by a configuration. +// +// The configuration can be set directly, but *Config provides various +// convenience methods to simplify the common cases, each of which can +// be called any number of times. Finally, these are followed by a +// call to Load() to actually load and type-check the program. +// +// var conf loader.Config +// +// // Use the command-line arguments to specify +// // a set of initial packages to load from source. +// // See FromArgsUsage for help. +// rest, err := conf.FromArgs(os.Args[1:], wantTests) +// +// // Parse the specified files and create an ad-hoc package with path "foo". +// // All files must have the same 'package' declaration. +// conf.CreateFromFilenames("foo", "foo.go", "bar.go") +// +// // Create an ad-hoc package with path "foo" from +// // the specified already-parsed files. +// // All ASTs must have the same 'package' declaration. +// conf.CreateFromFiles("foo", parsedFiles) +// +// // Add "runtime" to the set of packages to be loaded. +// conf.Import("runtime") +// +// // Adds "fmt" and "fmt_test" to the set of packages +// // to be loaded. "fmt" will include *_test.go files. +// conf.ImportWithTests("fmt") +// +// // Finally, load all the packages specified by the configuration. +// prog, err := conf.Load() +// +// See examples_test.go for examples of API usage. +// +// +// CONCEPTS AND TERMINOLOGY +// +// An AD-HOC package is one specified as a set of source files on the +// command line. In the simplest case, it may consist of a single file +// such as $GOROOT/src/net/http/triv.go. +// +// EXTERNAL TEST packages are those comprised of a set of *_test.go +// files all with the same 'package foo_test' declaration, all in the +// same directory. (go/build.Package calls these files XTestFiles.) +// +// An IMPORTABLE package is one that can be referred to by some import +// spec. The Path() of each importable package is unique within a +// Program. +// +// Ad-hoc packages and external test packages are NON-IMPORTABLE. The +// Path() of an ad-hoc package is inferred from the package +// declarations of its files and is therefore not a unique package key. +// For example, Config.CreatePkgs may specify two initial ad-hoc +// packages both called "main". +// +// An AUGMENTED package is an importable package P plus all the +// *_test.go files with same 'package foo' declaration as P. +// (go/build.Package calls these files TestFiles.) +// +// The INITIAL packages are those specified in the configuration. A +// DEPENDENCY is a package loaded to satisfy an import in an initial +// package or another dependency. +// +package loader + +// IMPLEMENTATION NOTES +// +// 'go test', in-package test files, and import cycles +// --------------------------------------------------- +// +// An external test package may depend upon members of the augmented +// package that are not in the unaugmented package, such as functions +// that expose internals. (See bufio/export_test.go for an example.) +// So, the loader must ensure that for each external test package +// it loads, it also augments the corresponding non-test package. +// +// The import graph over n unaugmented packages must be acyclic; the +// import graph over n-1 unaugmented packages plus one augmented +// package must also be acyclic. ('go test' relies on this.) But the +// import graph over n augmented packages may contain cycles. +// +// First, all the (unaugmented) non-test packages and their +// dependencies are imported in the usual way; the loader reports an +// error if it detects an import cycle. +// +// Then, each package P for which testing is desired is augmented by +// the list P' of its in-package test files, by calling +// (*types.Checker).Files. This arrangement ensures that P' may +// reference definitions within P, but P may not reference definitions +// within P'. Furthermore, P' may import any other package, including +// ones that depend upon P, without an import cycle error. +// +// Consider two packages A and B, both of which have lists of +// in-package test files we'll call A' and B', and which have the +// following import graph edges: +// B imports A +// B' imports A +// A' imports B +// This last edge would be expected to create an error were it not +// for the special type-checking discipline above. +// Cycles of size greater than two are possible. For example: +// compress/bzip2/bzip2_test.go (package bzip2) imports "io/ioutil" +// io/ioutil/tempfile_test.go (package ioutil) imports "regexp" +// regexp/exec_test.go (package regexp) imports "compress/bzip2" +// +// +// Concurrency +// ----------- +// +// Let us define the import dependency graph as follows. Each node is a +// list of files passed to (Checker).Files at once. Many of these lists +// are the production code of an importable Go package, so those nodes +// are labelled by the package's import path. The remaining nodes are +// ad-hoc packages and lists of in-package *_test.go files that augment +// an importable package; those nodes have no label. +// +// The edges of the graph represent import statements appearing within a +// file. An edge connects a node (a list of files) to the node it +// imports, which is importable and thus always labelled. +// +// Loading is controlled by this dependency graph. +// +// To reduce I/O latency, we start loading a package's dependencies +// asynchronously as soon as we've parsed its files and enumerated its +// imports (scanImports). This performs a preorder traversal of the +// import dependency graph. +// +// To exploit hardware parallelism, we type-check unrelated packages in +// parallel, where "unrelated" means not ordered by the partial order of +// the import dependency graph. +// +// We use a concurrency-safe blocking cache (importer.imported) to +// record the results of type-checking, whether success or failure. An +// entry is created in this cache by startLoad the first time the +// package is imported. The first goroutine to request an entry becomes +// responsible for completing the task and broadcasting completion to +// subsequent requestors, which block until then. +// +// Type checking occurs in (parallel) postorder: we cannot type-check a +// set of files until we have loaded and type-checked all of their +// immediate dependencies (and thus all of their transitive +// dependencies). If the input were guaranteed free of import cycles, +// this would be trivial: we could simply wait for completion of the +// dependencies and then invoke the typechecker. +// +// But as we saw in the 'go test' section above, some cycles in the +// import graph over packages are actually legal, so long as the +// cycle-forming edge originates in the in-package test files that +// augment the package. This explains why the nodes of the import +// dependency graph are not packages, but lists of files: the unlabelled +// nodes avoid the cycles. Consider packages A and B where B imports A +// and A's in-package tests AT import B. The naively constructed import +// graph over packages would contain a cycle (A+AT) --> B --> (A+AT) but +// the graph over lists of files is AT --> B --> A, where AT is an +// unlabelled node. +// +// Awaiting completion of the dependencies in a cyclic graph would +// deadlock, so we must materialize the import dependency graph (as +// importer.graph) and check whether each import edge forms a cycle. If +// x imports y, and the graph already contains a path from y to x, then +// there is an import cycle, in which case the processing of x must not +// wait for the completion of processing of y. +// +// When the type-checker makes a callback (doImport) to the loader for a +// given import edge, there are two possible cases. In the normal case, +// the dependency has already been completely type-checked; doImport +// does a cache lookup and returns it. In the cyclic case, the entry in +// the cache is still necessarily incomplete, indicating a cycle. We +// perform the cycle check again to obtain the error message, and return +// the error. +// +// The result of using concurrency is about a 2.5x speedup for stdlib_test. + +// TODO(adonovan): overhaul the package documentation. diff --git a/go/loader/example_test.go b/go/loader/example_test.go new file mode 100644 index 0000000000..45614e5408 --- /dev/null +++ b/go/loader/example_test.go @@ -0,0 +1,169 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package loader_test + +import ( + "fmt" + "go/token" + "log" + "path/filepath" + "runtime" + "sort" + + "golang.org/x/tools/go/loader" +) + +func printProg(prog *loader.Program) { + // Created packages are the initial packages specified by a call + // to CreateFromFilenames or CreateFromFiles. + var names []string + for _, info := range prog.Created { + names = append(names, info.Pkg.Path()) + } + fmt.Printf("created: %s\n", names) + + // Imported packages are the initial packages specified by a + // call to Import or ImportWithTests. + names = nil + for _, info := range prog.Imported { + names = append(names, info.Pkg.Path()) + } + sort.Strings(names) + fmt.Printf("imported: %s\n", names) + + // InitialPackages contains the union of created and imported. + names = nil + for _, info := range prog.InitialPackages() { + names = append(names, info.Pkg.Path()) + } + sort.Strings(names) + fmt.Printf("initial: %s\n", names) + + // AllPackages contains all initial packages and their dependencies. + names = nil + for pkg := range prog.AllPackages { + names = append(names, pkg.Path()) + } + sort.Strings(names) + fmt.Printf("all: %s\n", names) +} + +func printFilenames(fset *token.FileSet, info *loader.PackageInfo) { + var names []string + for _, f := range info.Files { + names = append(names, filepath.Base(fset.File(f.Pos()).Name())) + } + fmt.Printf("%s.Files: %s\n", info.Pkg.Path(), names) +} + +// ExampleFromArgs loads a set of packages and all their dependencies +// from a typical command-line. FromArgs parses a command line and +// makes calls to the other methods of Config shown in the examples that +// follow. +func ExampleFromArgs() { + args := []string{"mytool", "unicode/utf8", "errors", "runtime", "--", "foo", "bar"} + const wantTests = false + + var conf loader.Config + rest, err := conf.FromArgs(args[1:], wantTests) + prog, err := conf.Load() + if err != nil { + log.Fatal(err) + } + + fmt.Printf("rest: %s\n", rest) + printProg(prog) + // Output: + // rest: [foo bar] + // created: [] + // imported: [errors runtime unicode/utf8] + // initial: [errors runtime unicode/utf8] + // all: [errors runtime unicode/utf8] +} + +// ExampleCreateFromFilenames loads a single package (without tests) and +// all its dependencies from a list of filenames. +func ExampleCreateFromFilenames() { + var conf loader.Config + filename := filepath.Join(runtime.GOROOT(), "src/container/heap/heap.go") + conf.CreateFromFilenames("container/heap", filename) + prog, err := conf.Load() + if err != nil { + log.Fatal(err) + } + + printProg(prog) + // Output: + // created: [container/heap] + // imported: [] + // initial: [container/heap] + // all: [container/heap sort] +} + +// In the examples below, for stability, the chosen packages are +// relatively small, platform-independent, and low-level (and thus +// infrequently changing). +// The strconv package has internal and external tests. + +const hello = `package main + +import "fmt" + +func main() { + fmt.Println("Hello, world.") +} +` + +// ExampleCreateFromFiles loads a package and all its dependencies from +// a list of already-parsed files. +func ExampleCreateFromFiles() { + var conf loader.Config + f, err := conf.ParseFile("hello.go", hello) + if err != nil { + log.Fatal(err) + } + conf.CreateFromFiles("hello", f) + prog, err := conf.Load() + if err != nil { + log.Fatal(err) + } + + printProg(prog) + printFilenames(prog.Fset, prog.Package("strconv")) + // Output: + // created: [hello] + // imported: [] + // initial: [hello] + // all: [errors fmt hello io math os reflect runtime strconv sync sync/atomic syscall time unicode/utf8] + // strconv.Files: [atob.go atof.go atoi.go decimal.go extfloat.go ftoa.go isprint.go itoa.go quote.go] +} + +// ExampleImport loads three packages and all their dependencies. +func ExampleImport() { + // ImportWithTest("strconv") causes strconv to include + // internal_test.go, and creates an external test package, + // strconv_test. + // (Compare with output of ExampleCreateFromFiles.) + + var conf loader.Config + conf.Import("unicode/utf8") + conf.Import("errors") + conf.ImportWithTests("strconv") + prog, err := conf.Load() + if err != nil { + log.Fatal(err) + } + + printProg(prog) + printFilenames(prog.Fset, prog.Package("strconv")) + printFilenames(prog.Fset, prog.Package("strconv_test")) + // Output: + // created: [strconv_test] + // imported: [errors strconv unicode/utf8] + // initial: [errors strconv strconv_test unicode/utf8] + // all: [bufio bytes errors flag fmt io math math/rand os reflect runtime runtime/pprof sort strconv strconv_test strings sync sync/atomic syscall testing text/tabwriter time unicode unicode/utf8] + // strconv.Files: [atob.go atof.go atoi.go decimal.go extfloat.go ftoa.go isprint.go itoa.go quote.go internal_test.go] + // strconv_test.Files: [atob_test.go atof_test.go atoi_test.go decimal_test.go fp_test.go ftoa_test.go itoa_test.go quote_example_test.go quote_test.go strconv_test.go] +} diff --git a/go/loader/loader.go b/go/loader/loader.go index 4dd8485246..41df1512c3 100644 --- a/go/loader/loader.go +++ b/go/loader/loader.go @@ -2,189 +2,9 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package loader loads, parses and type-checks packages of Go code -// plus their transitive closure, and retains both the ASTs and the -// derived facts. -// -// THIS INTERFACE IS EXPERIMENTAL AND IS LIKELY TO CHANGE. -// -// The package defines two primary types: Config, which specifies a -// set of initial packages to load and various other options; and -// Program, which is the result of successfully loading the packages -// specified by a configuration. -// -// The configuration can be set directly, but *Config provides various -// convenience methods to simplify the common cases, each of which can -// be called any number of times. Finally, these are followed by a -// call to Load() to actually load and type-check the program. -// -// var conf loader.Config -// -// // Use the command-line arguments to specify -// // a set of initial packages to load from source. -// // See FromArgsUsage for help. -// rest, err := conf.FromArgs(os.Args[1:], wantTests) -// -// // Parse the specified files and create an ad-hoc package with path "foo". -// // All files must have the same 'package' declaration. -// conf.CreateFromFilenames("foo", "foo.go", "bar.go") -// -// // Create an ad-hoc package with path "foo" from -// // the specified already-parsed files. -// // All ASTs must have the same 'package' declaration. -// conf.CreateFromFiles("foo", parsedFiles) -// -// // Add "runtime" to the set of packages to be loaded. -// conf.Import("runtime") -// -// // Adds "fmt" and "fmt_test" to the set of packages -// // to be loaded. "fmt" will include *_test.go files. -// conf.ImportWithTests("fmt") -// -// // Finally, load all the packages specified by the configuration. -// prog, err := conf.Load() -// -// -// CONCEPTS AND TERMINOLOGY -// -// An AD-HOC package is one specified as a set of source files on the -// command line. In the simplest case, it may consist of a single file -// such as $GOROOT/src/net/http/triv.go. -// -// EXTERNAL TEST packages are those comprised of a set of *_test.go -// files all with the same 'package foo_test' declaration, all in the -// same directory. (go/build.Package calls these files XTestFiles.) -// -// An IMPORTABLE package is one that can be referred to by some import -// spec. The Path() of each importable package is unique within a -// Program. -// -// Ad-hoc packages and external test packages are NON-IMPORTABLE. The -// Path() of an ad-hoc package is inferred from the package -// declarations of its files and is therefore not a unique package key. -// For example, Config.CreatePkgs may specify two initial ad-hoc -// packages both called "main". -// -// An AUGMENTED package is an importable package P plus all the -// *_test.go files with same 'package foo' declaration as P. -// (go/build.Package calls these files TestFiles.) -// -// The INITIAL packages are those specified in the configuration. A -// DEPENDENCY is a package loaded to satisfy an import in an initial -// package or another dependency. -// package loader -// 'go test', in-package test files, and import cycles -// --------------------------------------------------- -// -// An external test package may depend upon members of the augmented -// package that are not in the unaugmented package, such as functions -// that expose internals. (See bufio/export_test.go for an example.) -// So, the loader must ensure that for each external test package -// it loads, it also augments the corresponding non-test package. -// -// The import graph over n unaugmented packages must be acyclic; the -// import graph over n-1 unaugmented packages plus one augmented -// package must also be acyclic. ('go test' relies on this.) But the -// import graph over n augmented packages may contain cycles. -// -// First, all the (unaugmented) non-test packages and their -// dependencies are imported in the usual way; the loader reports an -// error if it detects an import cycle. -// -// Then, each package P for which testing is desired is augmented by -// the list P' of its in-package test files, by calling -// (*types.Checker).Files. This arrangement ensures that P' may -// reference definitions within P, but P may not reference definitions -// within P'. Furthermore, P' may import any other package, including -// ones that depend upon P, without an import cycle error. -// -// Consider two packages A and B, both of which have lists of -// in-package test files we'll call A' and B', and which have the -// following import graph edges: -// B imports A -// B' imports A -// A' imports B -// This last edge would be expected to create an error were it not -// for the special type-checking discipline above. -// Cycles of size greater than two are possible. For example: -// compress/bzip2/bzip2_test.go (package bzip2) imports "io/ioutil" -// io/ioutil/tempfile_test.go (package ioutil) imports "regexp" -// regexp/exec_test.go (package regexp) imports "compress/bzip2" -// -// -// Concurrency -// ----------- -// -// Let us define the import dependency graph as follows. Each node is a -// list of files passed to (Checker).Files at once. Many of these lists -// are the production code of an importable Go package, so those nodes -// are labelled by the package's import path. The remaining nodes are -// ad-hoc packages and lists of in-package *_test.go files that augment -// an importable package; those nodes have no label. -// -// The edges of the graph represent import statements appearing within a -// file. An edge connects a node (a list of files) to the node it -// imports, which is importable and thus always labelled. -// -// Loading is controlled by this dependency graph. -// -// To reduce I/O latency, we start loading a package's dependencies -// asynchronously as soon as we've parsed its files and enumerated its -// imports (scanImports). This performs a preorder traversal of the -// import dependency graph. -// -// To exploit hardware parallelism, we type-check unrelated packages in -// parallel, where "unrelated" means not ordered by the partial order of -// the import dependency graph. -// -// We use a concurrency-safe blocking cache (importer.imported) to -// record the results of type-checking, whether success or failure. An -// entry is created in this cache by startLoad the first time the -// package is imported. The first goroutine to request an entry becomes -// responsible for completing the task and broadcasting completion to -// subsequent requestors, which block until then. -// -// Type checking occurs in (parallel) postorder: we cannot type-check a -// set of files until we have loaded and type-checked all of their -// immediate dependencies (and thus all of their transitive -// dependencies). If the input were guaranteed free of import cycles, -// this would be trivial: we could simply wait for completion of the -// dependencies and then invoke the typechecker. -// -// But as we saw in the 'go test' section above, some cycles in the -// import graph over packages are actually legal, so long as the -// cycle-forming edge originates in the in-package test files that -// augment the package. This explains why the nodes of the import -// dependency graph are not packages, but lists of files: the unlabelled -// nodes avoid the cycles. Consider packages A and B where B imports A -// and A's in-package tests AT import B. The naively constructed import -// graph over packages would contain a cycle (A+AT) --> B --> (A+AT) but -// the graph over lists of files is AT --> B --> A, where AT is an -// unlabelled node. -// -// Awaiting completion of the dependencies in a cyclic graph would -// deadlock, so we must materialize the import dependency graph (as -// importer.graph) and check whether each import edge forms a cycle. If -// x imports y, and the graph already contains a path from y to x, then -// there is an import cycle, in which case the processing of x must not -// wait for the completion of processing of y. -// -// When the type-checker makes a callback (doImport) to the loader for a -// given import edge, there are two possible cases. In the normal case, -// the dependency has already been completely type-checked; doImport -// does a cache lookup and returns it. In the cyclic case, the entry in -// the cache is still necessarily incomplete, indicating a cycle. We -// perform the cycle check again to obtain the error message, and return -// the error. -// -// The result of using concurrency is about a 2.5x speedup for stdlib_test. - -// TODO(adonovan): -// - cache the calls to build.Import so we don't do it three times per -// test package. -// - Thorough overhaul of package documentation. +// See doc.go for package documentation and implementation notes. import ( "errors" @@ -353,16 +173,16 @@ type Program struct { // as specified by Config.ImportPkgs. Imported map[string]*PackageInfo - // ImportMap is the canonical mapping of import paths to - // packages used by the type-checker (Config.TypeChecker.Packages). - // It contains all Imported initial packages, but not Created - // ones, and all imported dependencies. - ImportMap map[string]*types.Package - // AllPackages contains the PackageInfo of every package // encountered by Load: all initial packages and all // dependencies, including incomplete ones. AllPackages map[*types.Package]*PackageInfo + + // importMap is the canonical mapping of import paths to + // packages used by the type-checker. + // It contains all Imported initial packages, but not Created + // ones, and all imported dependencies. + importMap map[string]*types.Package } // PackageInfo holds the ASTs and facts derived by the type-checker @@ -561,6 +381,20 @@ func (prog *Program) InitialPackages() []*PackageInfo { return infos } +// Package returns the ASTs and results of type checking for the +// specified package. +func (prog *Program) Package(path string) *PackageInfo { + if info, ok := prog.AllPackages[prog.importMap[path]]; ok { + return info + } + for _, info := range prog.Created { + if path == info.Pkg.Path() { + return info + } + } + return nil +} + // ---------- Implementation ---------- // importer holds the working state of the algorithm. @@ -661,6 +495,8 @@ func (conf *Config) Load() (*Program, error) { // Install default FindPackage hook using go/build logic. if conf.FindPackage == nil { conf.FindPackage = func(ctxt *build.Context, path string) (*build.Package, error) { + // TODO(adonovan): cache calls to build.Import + // so we don't do it three times per test package. bp, err := ctxt.Import(path, conf.Cwd, 0) if _, ok := err.(*build.NoGoError); ok { return bp, nil // empty directory is not an error @@ -672,7 +508,7 @@ func (conf *Config) Load() (*Program, error) { prog := &Program{ Fset: conf.fset(), Imported: make(map[string]*PackageInfo), - ImportMap: conf.TypeChecker.Packages, + importMap: conf.TypeChecker.Packages, AllPackages: make(map[*types.Package]*PackageInfo), } @@ -779,7 +615,7 @@ func (conf *Config) Load() (*Program, error) { // Create infos for indirectly imported packages. // e.g. incomplete packages without syntax, loaded from export data. - for _, obj := range prog.ImportMap { + for _, obj := range prog.importMap { info := prog.AllPackages[obj] if info == nil { prog.AllPackages[obj] = &PackageInfo{Pkg: obj, Importable: true}