1
0
mirror of https://github.com/golang/go synced 2024-11-05 15:26:15 -07:00

cmd/go: add index creation methods

This change functions to scan modules and packages into an intermediate
RawPackage struct and also functions to write them out to and index.

Change-Id: Ia1a3b58b992e9be52c5d1397e85c642f902011cb
Reviewed-on: https://go-review.googlesource.com/c/go/+/398415
Run-TryBot: Michael Matloob <matloob@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
This commit is contained in:
Michael Matloob 2022-04-05 18:47:23 -04:00
parent 8a56c7742d
commit 293d43e17e
4 changed files with 452 additions and 4 deletions

View File

@ -9,6 +9,7 @@ package modindex
import (
"bytes"
"cmd/go/internal/fsys"
"errors"
"fmt"
"go/ast"
@ -16,7 +17,6 @@ import (
"go/token"
"io"
"io/fs"
"os"
"path/filepath"
"sort"
"strings"
@ -130,9 +130,9 @@ func (ctxt *Context) isAbsPath(path string) bool {
return filepath.IsAbs(path)
}
// isDir calls ctxt.IsDir (if not nil) or else uses os.Stat.
// isDir calls ctxt.IsDir (if not nil) or else uses fsys.Stat.
func isDir(path string) bool {
fi, err := os.Stat(path)
fi, err := fsys.Stat(path)
return err == nil && fi.IsDir()
}
@ -476,7 +476,7 @@ func getFileInfo(dir, name string, fset *token.FileSet) (*fileInfo, error) {
return info, nil
}
f, err := os.Open(info.name)
f, err := fsys.Open(info.name)
if err != nil {
return nil, err
}

View File

@ -0,0 +1,47 @@
This file documents the index format that is read and written by this package.
The index format is an encoding of a series of RawPackage structs
Field names refer to fields on RawPackage and rawFile.
The file uses little endian encoding for the uint32s.
Strings are written into the string table at the end of the file. Each string
is null-terminated. String offsets are relative to the start of the string table.
Bools are written as uint32s: 0 for false and 1 for true.
“go index v0\n”
str uint32 - offset of string table
n uint32 - number of packages
dirnames [n]uint32 - offsets to package names in string table; names sorted by raw string
packages [n]uint32 - offset where package begins
for each RawPackage:
error uint32 - string offset // error is produced by fsys.ReadDir or fmt.Errorf
path uint32 - string offset
dir uint32 - string offset (directory path relative to module root)
len(sourceFiles) uint32
sourceFiles [n]uint32 - offset to source file (relative to start of index file)
for each sourceFile:
error - string offset // error is either produced by fmt.Errorf,errors.New or is io.EOF
parseError - string offset // if non-empty, a json-encoded parseError struct (see below). Is either produced by io.ReadAll,os.ReadFile,errors.New or is scanner.Error,scanner.ErrorList
name - string offset
synopsis - string offset
pkgName - string offset
ignoreFile - int32 bool // report the file in Ignored(Go|Other)Files because there was an error reading it or parsing its build constraints.
binaryOnly uint32 bool
cgoDirectives string offset // the #cgo directive lines in the comment on import "C"
goBuildConstraint - string offset
len(plusBuildConstraints) - uint32
plusBuildConstraints - [n]uint32 (string offsets)
len(imports) uint32
for each rawImport:
path - string offset
position - file, offset, line, column - uint32
len(embeds) numEmbeds uint32
for each embed:
pattern - string offset
position - file, offset, line, column - uint32
[string table]
// parseError struct
type parseError struct {
ErrorList *scanner.ErrorList // non-nil if the error was an ErrorList, nil otherwise
ErrorString string // non-empty for all other cases
}

View File

@ -0,0 +1,246 @@
package modindex
import (
"cmd/go/internal/base"
"cmd/go/internal/fsys"
"cmd/go/internal/par"
"encoding/json"
"errors"
"fmt"
"go/doc"
"go/scanner"
"go/token"
"io/fs"
"path/filepath"
"strings"
)
// indexModule indexes the module at the given directory and returns its
// encoded representation.
func indexModule(modroot string) ([]byte, error) {
var packages []*rawPackage
err := fsys.Walk(modroot, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
return nil
}
// stop at module boundaries
if modroot != path {
if fi, err := fsys.Stat(filepath.Join(path, "go.mod")); err == nil && !fi.IsDir() {
return filepath.SkipDir
}
}
// TODO(matloob): what do we do about symlinks
rel, err := filepath.Rel(modroot, path)
if err != nil {
panic(err)
}
packages = append(packages, importRaw(modroot, rel))
return nil
})
if err != nil {
return nil, err
}
return encodeModule(packages)
}
// rawPackage holds the information from each package that's needed to
// fill a build.Package once the context is available.
type rawPackage struct {
error string
dir string // directory containing package sources, relative to the module root
// Source files
sourceFiles []*rawFile
}
type parseError struct {
ErrorList *scanner.ErrorList
ErrorString string
}
// parseErrorToString converts the error from parsing the file into a string
// representation. A nil error is converted to an empty string, and all other
// errors are converted to a JSON-marshalled parseError struct, with ErrorList
// set for errors of type scanner.ErrorList, and ErrorString set to the error's
// string representation for all other errors.
func parseErrorToString(err error) string {
if err == nil {
return ""
}
var p parseError
if e, ok := err.(scanner.ErrorList); ok {
p.ErrorList = &e
} else {
p.ErrorString = e.Error()
}
s, err := json.Marshal(p)
if err != nil {
panic(err) // This should be impossible because scanner.Error contains only strings and ints.
}
return string(s)
}
// parseErrorFrom string converts a string produced by parseErrorToString back
// to an error. An empty string is converted to a nil error, and all
// other strings are expected to be JSON-marshalled parseError structs.
// The two functions are meant to preserve the structure of an
// error of type scanner.ErrorList in a round trip, but may not preserve the
// structure of other errors.
func parseErrorFromString(s string) error {
if s == "" {
return nil
}
var p parseError
if err := json.Unmarshal([]byte(s), &p); err != nil {
base.Fatalf(`go: invalid parse error value in index: %q. This indicates a corrupted index. Run "go clean -cache" to reset the module cache.`, s)
}
if p.ErrorList != nil {
return *p.ErrorList
}
return errors.New(p.ErrorString)
}
// rawFile is the struct representation of the file holding all
// information in its fields.
type rawFile struct {
error string
parseError string
name string
synopsis string // doc.Synopsis of package comment... Compute synopsis on all of these?
pkgName string
ignoreFile bool // starts with _ or . or should otherwise always be ignored
binaryOnly bool // cannot be rebuilt from source (has //go:binary-only-package comment)
cgoDirectives string // the #cgo directive lines in the comment on import "C"
goBuildConstraint string
plusBuildConstraints []string
imports []rawImport
embeds []embed
}
type rawImport struct {
path string
position token.Position
}
type embed struct {
pattern string
position token.Position
}
var pkgcache par.Cache // for packages not in modcache
// importRaw fills the rawPackage from the package files in srcDir.
// dir is the package's path relative to the modroot.
func importRaw(modroot, reldir string) *rawPackage {
p := &rawPackage{
dir: reldir,
}
absdir := filepath.Join(modroot, reldir)
// We still haven't checked
// that p.dir directory exists. This is the right time to do that check.
// We can't do it earlier, because we want to gather partial information for the
// non-nil *Package returned when an error occurs.
// We need to do this before we return early on FindOnly flag.
if !isDir(absdir) {
// package was not found
p.error = fmt.Errorf("cannot find package in:\n\t%s", absdir).Error()
return p
}
entries, err := fsys.ReadDir(absdir)
if err != nil {
p.error = err.Error()
return p
}
fset := token.NewFileSet()
for _, d := range entries {
if d.IsDir() {
continue
}
if d.Mode()&fs.ModeSymlink != 0 {
if isDir(filepath.Join(absdir, d.Name())) {
// Symlinks to directories are not source files.
continue
}
}
name := d.Name()
ext := nameExt(name)
if strings.HasPrefix(name, "_") || strings.HasPrefix(name, ".") {
continue
}
info, err := getFileInfo(absdir, name, fset)
if err != nil {
p.sourceFiles = append(p.sourceFiles, &rawFile{name: name, error: err.Error()})
continue
} else if info == nil {
p.sourceFiles = append(p.sourceFiles, &rawFile{name: name, ignoreFile: true})
continue
}
rf := &rawFile{
name: name,
goBuildConstraint: info.goBuildConstraint,
plusBuildConstraints: info.plusBuildConstraints,
binaryOnly: info.binaryOnly,
}
if info.parsed != nil {
rf.pkgName = info.parsed.Name.Name
}
// Going to save the file. For non-Go files, can stop here.
p.sourceFiles = append(p.sourceFiles, rf)
if ext != ".go" {
continue
}
if info.parseErr != nil {
rf.parseError = parseErrorToString(info.parseErr)
// Fall through: we might still have a partial AST in info.Parsed,
// and we want to list files with parse errors anyway.
}
if info.parsed != nil && info.parsed.Doc != nil {
rf.synopsis = doc.Synopsis(info.parsed.Doc.Text())
}
var cgoDirectives []string
for _, imp := range info.imports {
if imp.path == "C" {
cgoDirectives = append(cgoDirectives, extractCgoDirectives(imp.doc.Text())...)
}
rf.imports = append(rf.imports, rawImport{path: imp.path, position: fset.Position(imp.pos)})
}
rf.cgoDirectives = strings.Join(cgoDirectives, "\n")
for _, emb := range info.embeds {
rf.embeds = append(rf.embeds, embed{emb.pattern, emb.pos})
}
}
return p
}
// extractCgoDirectives filters only the lines containing #cgo directives from the input,
// which is the comment on import "C".
func extractCgoDirectives(doc string) []string {
var out []string
for _, line := range strings.Split(doc, "\n") {
// Line is
// #cgo [GOOS/GOARCH...] LDFLAGS: stuff
//
line = strings.TrimSpace(line)
if len(line) < 5 || line[:4] != "#cgo" || (line[4] != ' ' && line[4] != '\t') {
continue
}
out = append(out, line)
}
return out
}

View File

@ -0,0 +1,155 @@
package modindex
import (
"cmd/go/internal/base"
"encoding/binary"
"go/token"
"math"
"sort"
"strings"
)
const indexVersion = "go index v0\n"
// encodeModule produces the encoded representation of the module index.
// encodeModule may modify the packages slice.
func encodeModule(packages []*rawPackage) ([]byte, error) {
e := newEncoder()
e.Bytes([]byte(indexVersion))
stringTableOffsetPos := e.Pos() // fill this at the end
e.Uint32(0) // string table offset
e.Int(len(packages))
sort.Slice(packages, func(i, j int) bool {
return packages[i].dir < packages[j].dir
})
for _, p := range packages {
e.String(p.dir)
}
packagesOffsetPos := e.Pos()
for range packages {
e.Int(0)
}
for i, p := range packages {
e.IntAt(e.Pos(), packagesOffsetPos+4*i)
encodePackage(e, p)
}
e.IntAt(e.Pos(), stringTableOffsetPos)
e.Bytes(e.stringTable)
return e.b, nil
}
func encodePackage(e *encoder, p *rawPackage) {
e.String(p.error)
e.String(p.dir)
e.Int(len(p.sourceFiles)) // number of source files
sourceFileOffsetPos := e.Pos() // the pos of the start of the source file offsets
for range p.sourceFiles {
e.Int(0)
}
for i, f := range p.sourceFiles {
e.IntAt(e.Pos(), sourceFileOffsetPos+4*i)
encodeFile(e, f)
}
}
func encodeFile(e *encoder, f *rawFile) {
e.String(f.error)
e.String(f.parseError)
e.String(f.synopsis)
e.String(f.name)
e.String(f.pkgName)
e.Bool(f.ignoreFile)
e.Bool(f.binaryOnly)
e.String(f.cgoDirectives)
e.String(f.goBuildConstraint)
e.Int(len(f.plusBuildConstraints))
for _, s := range f.plusBuildConstraints {
e.String(s)
}
e.Int(len(f.imports))
for _, m := range f.imports {
e.String(m.path)
e.Position(m.position)
}
e.Int(len(f.embeds))
for _, embed := range f.embeds {
e.String(embed.pattern)
e.Position(embed.position)
}
}
func newEncoder() *encoder {
e := &encoder{strings: make(map[string]int)}
// place the empty string at position 0 in the string table
e.stringTable = append(e.stringTable, 0)
e.strings[""] = 0
return e
}
func (e *encoder) Position(position token.Position) {
e.String(position.Filename)
e.Int(position.Offset)
e.Int(position.Line)
e.Int(position.Column)
}
type encoder struct {
b []byte
stringTable []byte
strings map[string]int
}
func (e *encoder) Pos() int {
return len(e.b)
}
func (e *encoder) Bytes(b []byte) {
e.b = append(e.b, b...)
}
func (e *encoder) String(s string) {
if strings.IndexByte(s, 0) >= 0 {
base.Fatalf("go: attempting to encode a string containing a null byte")
}
if n, ok := e.strings[s]; ok {
e.Int(n)
return
}
pos := len(e.stringTable)
e.strings[s] = pos
e.Int(pos)
e.stringTable = append(e.stringTable, []byte(s)...)
e.stringTable = append(e.stringTable, 0)
}
func (e *encoder) Bool(b bool) {
if b {
e.Uint32(1)
} else {
e.Uint32(0)
}
}
func (e *encoder) Uint32(n uint32) {
e.b = binary.LittleEndian.AppendUint32(e.b, n)
}
// Int encodes n. Note that all ints are written to the index as uint32s.
func (e *encoder) Int(n int) {
if n < 0 || int64(n) > math.MaxUint32 {
base.Fatalf("go: attempting to write an int to the index that overflows uint32")
}
e.Uint32(uint32(n))
}
func (e *encoder) IntAt(n int, at int) {
if n < 0 || int64(n) > math.MaxUint32 {
base.Fatalf("go: attempting to write an int to the index that overflows uint32")
}
binary.LittleEndian.PutUint32(e.b[at:], uint32(n))
}