From 293d43e17eaae8ccb83364e401538d51c035b8a4 Mon Sep 17 00:00:00 2001 From: Michael Matloob Date: Tue, 5 Apr 2022 18:47:23 -0400 Subject: [PATCH] cmd/go: add index creation methods This change functions to scan modules and packages into an intermediate RawPackage struct and also functions to write them out to and index. Change-Id: Ia1a3b58b992e9be52c5d1397e85c642f902011cb Reviewed-on: https://go-review.googlesource.com/c/go/+/398415 Run-TryBot: Michael Matloob TryBot-Result: Gopher Robot Reviewed-by: Michael Matloob Reviewed-by: Bryan Mills --- src/cmd/go/internal/modindex/build.go | 8 +- src/cmd/go/internal/modindex/index_format.txt | 47 ++++ src/cmd/go/internal/modindex/scan.go | 246 ++++++++++++++++++ src/cmd/go/internal/modindex/write.go | 155 +++++++++++ 4 files changed, 452 insertions(+), 4 deletions(-) create mode 100644 src/cmd/go/internal/modindex/index_format.txt create mode 100644 src/cmd/go/internal/modindex/scan.go create mode 100644 src/cmd/go/internal/modindex/write.go diff --git a/src/cmd/go/internal/modindex/build.go b/src/cmd/go/internal/modindex/build.go index 2f62e2868f..78bd12636d 100644 --- a/src/cmd/go/internal/modindex/build.go +++ b/src/cmd/go/internal/modindex/build.go @@ -9,6 +9,7 @@ package modindex import ( "bytes" + "cmd/go/internal/fsys" "errors" "fmt" "go/ast" @@ -16,7 +17,6 @@ import ( "go/token" "io" "io/fs" - "os" "path/filepath" "sort" "strings" @@ -130,9 +130,9 @@ func (ctxt *Context) isAbsPath(path string) bool { return filepath.IsAbs(path) } -// isDir calls ctxt.IsDir (if not nil) or else uses os.Stat. +// isDir calls ctxt.IsDir (if not nil) or else uses fsys.Stat. func isDir(path string) bool { - fi, err := os.Stat(path) + fi, err := fsys.Stat(path) return err == nil && fi.IsDir() } @@ -476,7 +476,7 @@ func getFileInfo(dir, name string, fset *token.FileSet) (*fileInfo, error) { return info, nil } - f, err := os.Open(info.name) + f, err := fsys.Open(info.name) if err != nil { return nil, err } diff --git a/src/cmd/go/internal/modindex/index_format.txt b/src/cmd/go/internal/modindex/index_format.txt new file mode 100644 index 0000000000..3768eea6c7 --- /dev/null +++ b/src/cmd/go/internal/modindex/index_format.txt @@ -0,0 +1,47 @@ +This file documents the index format that is read and written by this package. +The index format is an encoding of a series of RawPackage structs + +Field names refer to fields on RawPackage and rawFile. +The file uses little endian encoding for the uint32s. +Strings are written into the string table at the end of the file. Each string +is null-terminated. String offsets are relative to the start of the string table. +Bools are written as uint32s: 0 for false and 1 for true. + +“go index v0\n” +str uint32 - offset of string table +n uint32 - number of packages +dirnames [n]uint32 - offsets to package names in string table; names sorted by raw string +packages [n]uint32 - offset where package begins +for each RawPackage: + error uint32 - string offset // error is produced by fsys.ReadDir or fmt.Errorf + path uint32 - string offset + dir uint32 - string offset (directory path relative to module root) + len(sourceFiles) uint32 + sourceFiles [n]uint32 - offset to source file (relative to start of index file) + for each sourceFile: + error - string offset // error is either produced by fmt.Errorf,errors.New or is io.EOF + parseError - string offset // if non-empty, a json-encoded parseError struct (see below). Is either produced by io.ReadAll,os.ReadFile,errors.New or is scanner.Error,scanner.ErrorList + name - string offset + synopsis - string offset + pkgName - string offset + ignoreFile - int32 bool // report the file in Ignored(Go|Other)Files because there was an error reading it or parsing its build constraints. + binaryOnly uint32 bool + cgoDirectives string offset // the #cgo directive lines in the comment on import "C" + goBuildConstraint - string offset + len(plusBuildConstraints) - uint32 + plusBuildConstraints - [n]uint32 (string offsets) + len(imports) uint32 + for each rawImport: + path - string offset + position - file, offset, line, column - uint32 + len(embeds) numEmbeds uint32 + for each embed: + pattern - string offset + position - file, offset, line, column - uint32 +[string table] + +// parseError struct +type parseError struct { + ErrorList *scanner.ErrorList // non-nil if the error was an ErrorList, nil otherwise + ErrorString string // non-empty for all other cases +} \ No newline at end of file diff --git a/src/cmd/go/internal/modindex/scan.go b/src/cmd/go/internal/modindex/scan.go new file mode 100644 index 0000000000..0904278691 --- /dev/null +++ b/src/cmd/go/internal/modindex/scan.go @@ -0,0 +1,246 @@ +package modindex + +import ( + "cmd/go/internal/base" + "cmd/go/internal/fsys" + "cmd/go/internal/par" + "encoding/json" + "errors" + "fmt" + "go/doc" + "go/scanner" + "go/token" + "io/fs" + "path/filepath" + "strings" +) + +// indexModule indexes the module at the given directory and returns its +// encoded representation. +func indexModule(modroot string) ([]byte, error) { + var packages []*rawPackage + err := fsys.Walk(modroot, func(path string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + return nil + } + // stop at module boundaries + if modroot != path { + if fi, err := fsys.Stat(filepath.Join(path, "go.mod")); err == nil && !fi.IsDir() { + return filepath.SkipDir + } + } + // TODO(matloob): what do we do about symlinks + rel, err := filepath.Rel(modroot, path) + if err != nil { + panic(err) + } + packages = append(packages, importRaw(modroot, rel)) + return nil + }) + if err != nil { + return nil, err + } + return encodeModule(packages) +} + +// rawPackage holds the information from each package that's needed to +// fill a build.Package once the context is available. +type rawPackage struct { + error string + dir string // directory containing package sources, relative to the module root + + // Source files + sourceFiles []*rawFile +} + +type parseError struct { + ErrorList *scanner.ErrorList + ErrorString string +} + +// parseErrorToString converts the error from parsing the file into a string +// representation. A nil error is converted to an empty string, and all other +// errors are converted to a JSON-marshalled parseError struct, with ErrorList +// set for errors of type scanner.ErrorList, and ErrorString set to the error's +// string representation for all other errors. +func parseErrorToString(err error) string { + if err == nil { + return "" + } + var p parseError + if e, ok := err.(scanner.ErrorList); ok { + p.ErrorList = &e + } else { + p.ErrorString = e.Error() + } + s, err := json.Marshal(p) + if err != nil { + panic(err) // This should be impossible because scanner.Error contains only strings and ints. + } + return string(s) +} + +// parseErrorFrom string converts a string produced by parseErrorToString back +// to an error. An empty string is converted to a nil error, and all +// other strings are expected to be JSON-marshalled parseError structs. +// The two functions are meant to preserve the structure of an +// error of type scanner.ErrorList in a round trip, but may not preserve the +// structure of other errors. +func parseErrorFromString(s string) error { + if s == "" { + return nil + } + var p parseError + if err := json.Unmarshal([]byte(s), &p); err != nil { + base.Fatalf(`go: invalid parse error value in index: %q. This indicates a corrupted index. Run "go clean -cache" to reset the module cache.`, s) + } + if p.ErrorList != nil { + return *p.ErrorList + } + return errors.New(p.ErrorString) +} + +// rawFile is the struct representation of the file holding all +// information in its fields. +type rawFile struct { + error string + parseError string + + name string + synopsis string // doc.Synopsis of package comment... Compute synopsis on all of these? + pkgName string + ignoreFile bool // starts with _ or . or should otherwise always be ignored + binaryOnly bool // cannot be rebuilt from source (has //go:binary-only-package comment) + cgoDirectives string // the #cgo directive lines in the comment on import "C" + goBuildConstraint string + plusBuildConstraints []string + imports []rawImport + embeds []embed +} + +type rawImport struct { + path string + position token.Position +} + +type embed struct { + pattern string + position token.Position +} + +var pkgcache par.Cache // for packages not in modcache + +// importRaw fills the rawPackage from the package files in srcDir. +// dir is the package's path relative to the modroot. +func importRaw(modroot, reldir string) *rawPackage { + p := &rawPackage{ + dir: reldir, + } + + absdir := filepath.Join(modroot, reldir) + + // We still haven't checked + // that p.dir directory exists. This is the right time to do that check. + // We can't do it earlier, because we want to gather partial information for the + // non-nil *Package returned when an error occurs. + // We need to do this before we return early on FindOnly flag. + if !isDir(absdir) { + // package was not found + p.error = fmt.Errorf("cannot find package in:\n\t%s", absdir).Error() + return p + } + + entries, err := fsys.ReadDir(absdir) + if err != nil { + p.error = err.Error() + return p + } + + fset := token.NewFileSet() + for _, d := range entries { + if d.IsDir() { + continue + } + if d.Mode()&fs.ModeSymlink != 0 { + if isDir(filepath.Join(absdir, d.Name())) { + // Symlinks to directories are not source files. + continue + } + } + + name := d.Name() + ext := nameExt(name) + + if strings.HasPrefix(name, "_") || strings.HasPrefix(name, ".") { + continue + } + info, err := getFileInfo(absdir, name, fset) + if err != nil { + p.sourceFiles = append(p.sourceFiles, &rawFile{name: name, error: err.Error()}) + continue + } else if info == nil { + p.sourceFiles = append(p.sourceFiles, &rawFile{name: name, ignoreFile: true}) + continue + } + rf := &rawFile{ + name: name, + goBuildConstraint: info.goBuildConstraint, + plusBuildConstraints: info.plusBuildConstraints, + binaryOnly: info.binaryOnly, + } + if info.parsed != nil { + rf.pkgName = info.parsed.Name.Name + } + + // Going to save the file. For non-Go files, can stop here. + p.sourceFiles = append(p.sourceFiles, rf) + if ext != ".go" { + continue + } + + if info.parseErr != nil { + rf.parseError = parseErrorToString(info.parseErr) + // Fall through: we might still have a partial AST in info.Parsed, + // and we want to list files with parse errors anyway. + } + + if info.parsed != nil && info.parsed.Doc != nil { + rf.synopsis = doc.Synopsis(info.parsed.Doc.Text()) + } + + var cgoDirectives []string + for _, imp := range info.imports { + if imp.path == "C" { + cgoDirectives = append(cgoDirectives, extractCgoDirectives(imp.doc.Text())...) + } + rf.imports = append(rf.imports, rawImport{path: imp.path, position: fset.Position(imp.pos)}) + } + rf.cgoDirectives = strings.Join(cgoDirectives, "\n") + for _, emb := range info.embeds { + rf.embeds = append(rf.embeds, embed{emb.pattern, emb.pos}) + } + + } + return p +} + +// extractCgoDirectives filters only the lines containing #cgo directives from the input, +// which is the comment on import "C". +func extractCgoDirectives(doc string) []string { + var out []string + for _, line := range strings.Split(doc, "\n") { + // Line is + // #cgo [GOOS/GOARCH...] LDFLAGS: stuff + // + line = strings.TrimSpace(line) + if len(line) < 5 || line[:4] != "#cgo" || (line[4] != ' ' && line[4] != '\t') { + continue + } + + out = append(out, line) + } + return out +} diff --git a/src/cmd/go/internal/modindex/write.go b/src/cmd/go/internal/modindex/write.go new file mode 100644 index 0000000000..b3a41cb2bd --- /dev/null +++ b/src/cmd/go/internal/modindex/write.go @@ -0,0 +1,155 @@ +package modindex + +import ( + "cmd/go/internal/base" + "encoding/binary" + "go/token" + "math" + "sort" + "strings" +) + +const indexVersion = "go index v0\n" + +// encodeModule produces the encoded representation of the module index. +// encodeModule may modify the packages slice. +func encodeModule(packages []*rawPackage) ([]byte, error) { + e := newEncoder() + e.Bytes([]byte(indexVersion)) + stringTableOffsetPos := e.Pos() // fill this at the end + e.Uint32(0) // string table offset + e.Int(len(packages)) + sort.Slice(packages, func(i, j int) bool { + return packages[i].dir < packages[j].dir + }) + for _, p := range packages { + e.String(p.dir) + } + packagesOffsetPos := e.Pos() + for range packages { + e.Int(0) + } + for i, p := range packages { + e.IntAt(e.Pos(), packagesOffsetPos+4*i) + encodePackage(e, p) + } + e.IntAt(e.Pos(), stringTableOffsetPos) + e.Bytes(e.stringTable) + return e.b, nil +} + +func encodePackage(e *encoder, p *rawPackage) { + e.String(p.error) + e.String(p.dir) + e.Int(len(p.sourceFiles)) // number of source files + sourceFileOffsetPos := e.Pos() // the pos of the start of the source file offsets + for range p.sourceFiles { + e.Int(0) + } + for i, f := range p.sourceFiles { + e.IntAt(e.Pos(), sourceFileOffsetPos+4*i) + encodeFile(e, f) + } +} + +func encodeFile(e *encoder, f *rawFile) { + e.String(f.error) + e.String(f.parseError) + e.String(f.synopsis) + e.String(f.name) + e.String(f.pkgName) + e.Bool(f.ignoreFile) + e.Bool(f.binaryOnly) + e.String(f.cgoDirectives) + e.String(f.goBuildConstraint) + + e.Int(len(f.plusBuildConstraints)) + for _, s := range f.plusBuildConstraints { + e.String(s) + } + + e.Int(len(f.imports)) + for _, m := range f.imports { + e.String(m.path) + e.Position(m.position) + } + + e.Int(len(f.embeds)) + for _, embed := range f.embeds { + e.String(embed.pattern) + e.Position(embed.position) + } +} + +func newEncoder() *encoder { + e := &encoder{strings: make(map[string]int)} + + // place the empty string at position 0 in the string table + e.stringTable = append(e.stringTable, 0) + e.strings[""] = 0 + + return e +} + +func (e *encoder) Position(position token.Position) { + e.String(position.Filename) + e.Int(position.Offset) + e.Int(position.Line) + e.Int(position.Column) +} + +type encoder struct { + b []byte + stringTable []byte + strings map[string]int +} + +func (e *encoder) Pos() int { + return len(e.b) +} + +func (e *encoder) Bytes(b []byte) { + e.b = append(e.b, b...) +} + +func (e *encoder) String(s string) { + if strings.IndexByte(s, 0) >= 0 { + base.Fatalf("go: attempting to encode a string containing a null byte") + } + if n, ok := e.strings[s]; ok { + e.Int(n) + return + } + pos := len(e.stringTable) + e.strings[s] = pos + e.Int(pos) + e.stringTable = append(e.stringTable, []byte(s)...) + e.stringTable = append(e.stringTable, 0) +} + +func (e *encoder) Bool(b bool) { + if b { + e.Uint32(1) + } else { + e.Uint32(0) + } +} + +func (e *encoder) Uint32(n uint32) { + e.b = binary.LittleEndian.AppendUint32(e.b, n) +} + +// Int encodes n. Note that all ints are written to the index as uint32s. +func (e *encoder) Int(n int) { + if n < 0 || int64(n) > math.MaxUint32 { + base.Fatalf("go: attempting to write an int to the index that overflows uint32") + } + e.Uint32(uint32(n)) +} + +func (e *encoder) IntAt(n int, at int) { + if n < 0 || int64(n) > math.MaxUint32 { + base.Fatalf("go: attempting to write an int to the index that overflows uint32") + } + binary.LittleEndian.PutUint32(e.b[at:], uint32(n)) +}