1
0
mirror of https://github.com/golang/go synced 2024-09-30 14:28:33 -06:00

godoc: feed indexer concurrently, add selective indexing hook, tests

On big corpuses, the indexer was spending most of its time waiting
for filesystem operations (especially with network filesystems)
and not actually indexing.  This keeps the filesystem busy and indexer
running in different goroutines.

Also, add a hook to let godoc hosts disable indexing of certain
directories.

And finally, start adding tests for godoc, which required
fleshing out (and testing) the mapfs code.

R=golang-dev, adg, bgarcia
CC=golang-dev
https://golang.org/cl/21520045
This commit is contained in:
Brad Fitzpatrick 2013-11-05 09:35:58 -05:00
parent 2afbb1cd5f
commit 964f0f559c
5 changed files with 352 additions and 51 deletions

View File

@ -32,6 +32,11 @@ type Corpus struct {
// order.
IndexFiles string
// IndexThrottle specifies the indexing throttle value
// between 0.0 and 1.0. At 0.0, the indexer always sleeps.
// At 1.0, the indexer never sleeps. Because 0.0 is useless
// and redundant with setting IndexEnabled to false, the
// zero value for IndexThrottle means 0.9.
IndexThrottle float64
// MaxResults optionally specifies the maximum results for indexing.
@ -50,6 +55,13 @@ type Corpus struct {
// package listing.
SummarizePackage func(pkg string) (summary string, showList, ok bool)
// IndexDirectory optionally specifies a function to determine
// whether the provided directory should be indexed. The dir
// will be of the form "/src/cmd/6a", "/doc/play",
// "/src/pkg/io", etc.
// If nil, all directories are indexed if indexing is enabled.
IndexDirectory func(dir string) bool
testDir string // TODO(bradfitz,adg): migrate old godoc flag? looks unused.
// Send a value on this channel to trigger a metadata refresh.

View File

@ -56,10 +56,12 @@ import (
"runtime"
"sort"
"strings"
"sync"
"time"
"unicode"
"code.google.com/p/go.tools/godoc/util"
"code.google.com/p/go.tools/godoc/vfs"
)
// ----------------------------------------------------------------------------
@ -371,8 +373,11 @@ type Statistics struct {
// interface for walking file trees, and the ast.Visitor interface for
// walking Go ASTs.
type Indexer struct {
c *Corpus
fset *token.FileSet // file set for all indexed files
c *Corpus
fset *token.FileSet // file set for all indexed files
fsOpenGate chan bool // send pre fs.Open; receive on close
mu sync.Mutex // guards all the following
sources bytes.Buffer // concatenated sources
packages map[string]*Pak // map of canonicalized *Paks
words map[string]*IndexResult // RunLists of Spots
@ -381,6 +386,7 @@ type Indexer struct {
file *File // AST for current file
decl ast.Decl // AST for current decl
stats Statistics
throttle *util.Throttle
}
func (x *Indexer) lookupPackage(path, name string) *Pak {
@ -535,12 +541,7 @@ func pkgName(filename string) string {
// addFile adds a file to the index if possible and returns the file set file
// and the file's AST if it was successfully parsed as a Go file. If addFile
// failed (that is, if the file was not added), it returns file == nil.
func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) {
// open file
f, err := x.c.fs.Open(filename)
if err != nil {
return
}
func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
defer f.Close()
// The file set's base offset and x.sources size must be in lock-step;
@ -641,17 +642,17 @@ func isWhitelisted(filename string) bool {
return whitelisted[key]
}
func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
if f.IsDir() {
func (x *Indexer) visitFile(dirname string, fi os.FileInfo, fulltextIndex bool) {
if fi.IsDir() {
return
}
filename := pathpkg.Join(dirname, f.Name())
filename := pathpkg.Join(dirname, fi.Name())
goFile := false
switch {
case isGoFile(f):
if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) {
case isGoFile(fi):
if !includeTestFiles && (!isPkgFile(fi) || strings.HasPrefix(filename, "test/")) {
return
}
if !includeMainPackages && pkgName(filename) == "main" {
@ -659,11 +660,25 @@ func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
}
goFile = true
case !fulltextIndex || !isWhitelisted(f.Name()):
case !fulltextIndex || !isWhitelisted(fi.Name()):
return
}
file, fast := x.addFile(filename, goFile)
x.fsOpenGate <- true
defer func() { <-x.fsOpenGate }()
// open file
f, err := x.c.fs.Open(filename)
if err != nil {
return
}
x.mu.Lock()
defer x.mu.Unlock()
x.throttle.Throttle()
file, fast := x.addFile(f, filename, goFile)
if file == nil {
return // addFile failed
}
@ -672,7 +687,7 @@ func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) {
// we've got a Go file to index
x.current = file
pak := x.lookupPackage(dirname, fast.Name.Name)
x.file = &File{f.Name(), pak}
x.file = &File{fi.Name(), pak}
ast.Walk(x, fast)
}
@ -701,33 +716,59 @@ type Index struct {
func canonical(w string) string { return strings.ToLower(w) }
// Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
// consuming file descriptors, where some systems have low 256 or 512 limits.
// Go should have a built-in way to cap fd usage under the ulimit.
const (
maxOpenFiles = 200
maxOpenDirs = 50
)
// NewIndex creates a new index for the .go files
// in the directories given by dirnames.
//
// The throttle parameter specifies a value between 0.0 and 1.0 that controls
// artificial sleeping. If 0.0, the indexer always sleeps. If 1.0, the indexer
// never sleeps.
func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle float64) *Index {
var x Indexer
th := util.NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time
// initialize Indexer
// (use some reasonably sized maps to start)
x.c = c
x.fset = token.NewFileSet()
x.packages = make(map[string]*Pak, 256)
x.words = make(map[string]*IndexResult, 8192)
x := &Indexer{
c: c,
fset: token.NewFileSet(),
fsOpenGate: make(chan bool, maxOpenFiles),
packages: make(map[string]*Pak, 256),
words: make(map[string]*IndexResult, 8192),
throttle: util.NewThrottle(throttle, 100*time.Millisecond), // run at least 0.1s at a time
}
// index all files in the directories given by dirnames
var wg sync.WaitGroup // outstanding ReadDir + visitFile
dirGate := make(chan bool, maxOpenDirs)
for dirname := range dirnames {
list, err := c.fs.ReadDir(dirname)
if err != nil {
continue // ignore this directory
if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
continue
}
for _, f := range list {
if !f.IsDir() {
x.visitFile(dirname, f, fulltextIndex)
dirGate <- true
wg.Add(1)
go func(dirname string) {
defer func() { <-dirGate }()
defer wg.Done()
list, err := c.fs.ReadDir(dirname)
if err != nil {
log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
return // ignore this directory
}
th.Throttle()
}
for _, fi := range list {
wg.Add(1)
go func(fi os.FileInfo) {
defer wg.Done()
x.visitFile(dirname, fi, fulltextIndex)
}(fi)
}
}(dirname)
}
wg.Wait()
if !fulltextIndex {
// the file set, the current file, and the sources are
@ -751,7 +792,7 @@ func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle fl
Others: others,
}
wlist = append(wlist, &wordPair{canonical(w), w})
th.Throttle()
x.throttle.Throttle()
}
x.stats.Words = len(words)
@ -1094,7 +1135,13 @@ func (c *Corpus) UpdateIndex() {
log.Printf("updating index...")
}
start := time.Now()
index := NewIndex(c, c.fsDirnames(), c.MaxResults > 0, c.IndexThrottle)
throttle := c.IndexThrottle
if throttle <= 0 {
throttle = 0.9
} else if throttle > 1.0 {
throttle = 1.0
}
index := NewIndex(c, c.fsDirnames(), c.MaxResults > 0, throttle)
stop := time.Now()
c.searchIndex.Set(index)
if c.Verbose {
@ -1105,10 +1152,14 @@ func (c *Corpus) UpdateIndex() {
}
memstats := new(runtime.MemStats)
runtime.ReadMemStats(memstats)
log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
if c.Verbose {
log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
}
runtime.GC()
runtime.ReadMemStats(memstats)
log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
if c.Verbose {
log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
}
}
// RunIndexer runs forever, indexing.

56
godoc/index_test.go Normal file
View File

@ -0,0 +1,56 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package godoc
import (
"reflect"
"strings"
"testing"
"code.google.com/p/go.tools/godoc/vfs/mapfs"
)
func TestIndex(t *testing.T) {
c := NewCorpus(mapfs.New(map[string]string{
"src/pkg/foo/foo.go": `// Package foo is an example.
package foo
// Foo is stuff.
type Foo struct{}
func New() *Foo {
return new(Foo)
}
`,
"src/pkg/bar/bar.go": `// Package bar is another example to test races.
package bar
`,
"src/pkg/skip/skip.go": `// Package skip should be skipped.
package skip
func Skip() {}
`,
}))
c.IndexEnabled = true
c.IndexDirectory = func(dir string) bool {
return !strings.Contains(dir, "skip")
}
if err := c.Init(); err != nil {
t.Fatal(err)
}
c.UpdateIndex()
ix, _ := c.CurrentIndex()
if ix == nil {
t.Fatal("no index")
}
t.Logf("Got: %#v", ix)
wantStats := Statistics{Bytes: 179, Files: 2, Lines: 11, Words: 5, Spots: 7}
if !reflect.DeepEqual(ix.Stats(), wantStats) {
t.Errorf("Stats = %#v; want %#v", ix.Stats(), wantStats)
}
if _, ok := ix.words["Skip"]; ok {
t.Errorf("the word Skip was found; expected it to be skipped")
}
}

View File

@ -9,12 +9,17 @@ package mapfs
import (
"io"
"os"
pathpkg "path"
"sort"
"strings"
"time"
"code.google.com/p/go.tools/godoc/vfs"
)
// New returns a new FileSystem from the provided map.
// Map keys should be forward slash-separated pathnames
// and not contain a leading slash.
func New(m map[string]string) vfs.FileSystem {
return mapFS(m)
}
@ -27,10 +32,7 @@ func (fs mapFS) String() string { return "mapfs" }
func (fs mapFS) Close() error { return nil }
func filename(p string) string {
if len(p) > 0 && p[0] == '/' {
p = p[1:]
}
return p
return strings.TrimPrefix(p, "/")
}
func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) {
@ -41,22 +43,85 @@ func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) {
return nopCloser{strings.NewReader(b)}, nil
}
func fileInfo(name, contents string) os.FileInfo {
return mapFI{name: pathpkg.Base(name), size: len(contents)}
}
func dirInfo(name string) os.FileInfo {
return mapFI{name: pathpkg.Base(name), dir: true}
}
func (fs mapFS) Lstat(p string) (os.FileInfo, error) {
b, ok := fs[filename(p)]
if !ok {
return nil, os.ErrNotExist
if ok {
return fileInfo(p, b), nil
}
return mapFI{name: p, size: int64(len(b))}, nil
ents, _ := fs.ReadDir(p)
if len(ents) > 0 {
return dirInfo(p), nil
}
return nil, os.ErrNotExist
}
func (fs mapFS) Stat(p string) (os.FileInfo, error) {
return fs.Lstat(p)
}
// slashdir returns path.Dir(p), but special-cases paths not beginning
// with a slash to be in the root.
func slashdir(p string) string {
d := pathpkg.Dir(p)
if d == "." {
return "/"
}
if strings.HasPrefix(p, "/") {
return d
}
return "/" + d
}
func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) {
var list []os.FileInfo
p = pathpkg.Clean(p)
var ents []string
fim := make(map[string]os.FileInfo) // base -> fi
for fn, b := range fs {
list = append(list, mapFI{name: fn, size: int64(len(b))})
dir := slashdir(fn)
isFile := true
var lastBase string
for {
if dir == p {
base := lastBase
if isFile {
base = pathpkg.Base(fn)
}
if fim[base] == nil {
var fi os.FileInfo
if isFile {
fi = fileInfo(fn, b)
} else {
fi = dirInfo(base)
}
ents = append(ents, base)
fim[base] = fi
}
}
if dir == "/" {
break
} else {
isFile = false
lastBase = pathpkg.Base(dir)
dir = pathpkg.Dir(dir)
}
}
}
if len(ents) == 0 {
return nil, os.ErrNotExist
}
sort.Strings(ents)
var list []os.FileInfo
for _, dir := range ents {
list = append(list, fim[dir])
}
return list, nil
}
@ -64,15 +129,21 @@ func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) {
// mapFI is the map-based implementation of FileInfo.
type mapFI struct {
name string
size int64
size int
dir bool
}
func (fi mapFI) IsDir() bool { return false }
func (fi mapFI) IsDir() bool { return fi.dir }
func (fi mapFI) ModTime() time.Time { return time.Time{} }
func (fi mapFI) Mode() os.FileMode { return 0444 }
func (fi mapFI) Name() string { return fi.name }
func (fi mapFI) Size() int64 { return fi.size }
func (fi mapFI) Sys() interface{} { return nil }
func (fi mapFI) Mode() os.FileMode {
if fi.IsDir() {
return 0755 | os.ModeDir
}
return 0444
}
func (fi mapFI) Name() string { return pathpkg.Base(fi.name) }
func (fi mapFI) Size() int64 { return int64(fi.size) }
func (fi mapFI) Sys() interface{} { return nil }
type nopCloser struct {
io.ReadSeeker

View File

@ -0,0 +1,111 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mapfs
import (
"io/ioutil"
"os"
"reflect"
"testing"
)
func TestOpenRoot(t *testing.T) {
fs := New(map[string]string{
"foo/bar/three.txt": "a",
"foo/bar.txt": "b",
"top.txt": "c",
"other-top.txt": "d",
})
tests := []struct {
path string
want string
}{
{"/foo/bar/three.txt", "a"},
{"foo/bar/three.txt", "a"},
{"foo/bar.txt", "b"},
{"top.txt", "c"},
{"/top.txt", "c"},
{"other-top.txt", "d"},
{"/other-top.txt", "d"},
}
for _, tt := range tests {
rsc, err := fs.Open(tt.path)
if err != nil {
t.Errorf("Open(%q) = %v", tt.path, err)
continue
}
slurp, err := ioutil.ReadAll(rsc)
if err != nil {
t.Error(err)
}
if string(slurp) != tt.want {
t.Errorf("Read(%q) = %q; want %q", tt.path, tt.want, slurp)
}
rsc.Close()
}
_, err := fs.Open("/xxxx")
if !os.IsNotExist(err) {
t.Errorf("ReadDir /xxxx = %v; want os.IsNotExist error", err)
}
}
func TestReaddir(t *testing.T) {
fs := New(map[string]string{
"foo/bar/three.txt": "333",
"foo/bar.txt": "22",
"top.txt": "top.txt file",
"other-top.txt": "other-top.txt file",
})
tests := []struct {
dir string
want []os.FileInfo
}{
{
dir: "/",
want: []os.FileInfo{
mapFI{name: "foo", dir: true},
mapFI{name: "other-top.txt", size: len("other-top.txt file")},
mapFI{name: "top.txt", size: len("top.txt file")},
},
},
{
dir: "/foo",
want: []os.FileInfo{
mapFI{name: "bar", dir: true},
mapFI{name: "bar.txt", size: 2},
},
},
{
dir: "/foo/",
want: []os.FileInfo{
mapFI{name: "bar", dir: true},
mapFI{name: "bar.txt", size: 2},
},
},
{
dir: "/foo/bar",
want: []os.FileInfo{
mapFI{name: "three.txt", size: 3},
},
},
}
for _, tt := range tests {
fis, err := fs.ReadDir(tt.dir)
if err != nil {
t.Errorf("ReadDir(%q) = %v", tt.dir, err)
continue
}
if !reflect.DeepEqual(fis, tt.want) {
t.Errorf("ReadDir(%q) = %#v; want %#v", tt.dir, fis, tt.want)
continue
}
}
_, err := fs.ReadDir("/xxxx")
if !os.IsNotExist(err) {
t.Errorf("ReadDir /xxxx = %v; want os.IsNotExist error", err)
}
}