From 964f0f559cb5e33a8db407e71f00e7adb3a43c87 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Tue, 5 Nov 2013 09:35:58 -0500 Subject: [PATCH] godoc: feed indexer concurrently, add selective indexing hook, tests On big corpuses, the indexer was spending most of its time waiting for filesystem operations (especially with network filesystems) and not actually indexing. This keeps the filesystem busy and indexer running in different goroutines. Also, add a hook to let godoc hosts disable indexing of certain directories. And finally, start adding tests for godoc, which required fleshing out (and testing) the mapfs code. R=golang-dev, adg, bgarcia CC=golang-dev https://golang.org/cl/21520045 --- godoc/corpus.go | 12 ++++ godoc/index.go | 123 ++++++++++++++++++++++++---------- godoc/index_test.go | 56 ++++++++++++++++ godoc/vfs/mapfs/mapfs.go | 101 +++++++++++++++++++++++----- godoc/vfs/mapfs/mapfs_test.go | 111 ++++++++++++++++++++++++++++++ 5 files changed, 352 insertions(+), 51 deletions(-) create mode 100644 godoc/index_test.go create mode 100644 godoc/vfs/mapfs/mapfs_test.go diff --git a/godoc/corpus.go b/godoc/corpus.go index 106c6b62f4..ed26894707 100644 --- a/godoc/corpus.go +++ b/godoc/corpus.go @@ -32,6 +32,11 @@ type Corpus struct { // order. IndexFiles string + // IndexThrottle specifies the indexing throttle value + // between 0.0 and 1.0. At 0.0, the indexer always sleeps. + // At 1.0, the indexer never sleeps. Because 0.0 is useless + // and redundant with setting IndexEnabled to false, the + // zero value for IndexThrottle means 0.9. IndexThrottle float64 // MaxResults optionally specifies the maximum results for indexing. @@ -50,6 +55,13 @@ type Corpus struct { // package listing. SummarizePackage func(pkg string) (summary string, showList, ok bool) + // IndexDirectory optionally specifies a function to determine + // whether the provided directory should be indexed. The dir + // will be of the form "/src/cmd/6a", "/doc/play", + // "/src/pkg/io", etc. + // If nil, all directories are indexed if indexing is enabled. + IndexDirectory func(dir string) bool + testDir string // TODO(bradfitz,adg): migrate old godoc flag? looks unused. // Send a value on this channel to trigger a metadata refresh. diff --git a/godoc/index.go b/godoc/index.go index 5611489f40..da97ce0a57 100644 --- a/godoc/index.go +++ b/godoc/index.go @@ -56,10 +56,12 @@ import ( "runtime" "sort" "strings" + "sync" "time" "unicode" "code.google.com/p/go.tools/godoc/util" + "code.google.com/p/go.tools/godoc/vfs" ) // ---------------------------------------------------------------------------- @@ -371,8 +373,11 @@ type Statistics struct { // interface for walking file trees, and the ast.Visitor interface for // walking Go ASTs. type Indexer struct { - c *Corpus - fset *token.FileSet // file set for all indexed files + c *Corpus + fset *token.FileSet // file set for all indexed files + fsOpenGate chan bool // send pre fs.Open; receive on close + + mu sync.Mutex // guards all the following sources bytes.Buffer // concatenated sources packages map[string]*Pak // map of canonicalized *Paks words map[string]*IndexResult // RunLists of Spots @@ -381,6 +386,7 @@ type Indexer struct { file *File // AST for current file decl ast.Decl // AST for current decl stats Statistics + throttle *util.Throttle } func (x *Indexer) lookupPackage(path, name string) *Pak { @@ -535,12 +541,7 @@ func pkgName(filename string) string { // addFile adds a file to the index if possible and returns the file set file // and the file's AST if it was successfully parsed as a Go file. If addFile // failed (that is, if the file was not added), it returns file == nil. -func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) { - // open file - f, err := x.c.fs.Open(filename) - if err != nil { - return - } +func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) { defer f.Close() // The file set's base offset and x.sources size must be in lock-step; @@ -641,17 +642,17 @@ func isWhitelisted(filename string) bool { return whitelisted[key] } -func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { - if f.IsDir() { +func (x *Indexer) visitFile(dirname string, fi os.FileInfo, fulltextIndex bool) { + if fi.IsDir() { return } - filename := pathpkg.Join(dirname, f.Name()) + filename := pathpkg.Join(dirname, fi.Name()) goFile := false switch { - case isGoFile(f): - if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) { + case isGoFile(fi): + if !includeTestFiles && (!isPkgFile(fi) || strings.HasPrefix(filename, "test/")) { return } if !includeMainPackages && pkgName(filename) == "main" { @@ -659,11 +660,25 @@ func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { } goFile = true - case !fulltextIndex || !isWhitelisted(f.Name()): + case !fulltextIndex || !isWhitelisted(fi.Name()): return } - file, fast := x.addFile(filename, goFile) + x.fsOpenGate <- true + defer func() { <-x.fsOpenGate }() + + // open file + f, err := x.c.fs.Open(filename) + if err != nil { + return + } + + x.mu.Lock() + defer x.mu.Unlock() + + x.throttle.Throttle() + + file, fast := x.addFile(f, filename, goFile) if file == nil { return // addFile failed } @@ -672,7 +687,7 @@ func (x *Indexer) visitFile(dirname string, f os.FileInfo, fulltextIndex bool) { // we've got a Go file to index x.current = file pak := x.lookupPackage(dirname, fast.Name.Name) - x.file = &File{f.Name(), pak} + x.file = &File{fi.Name(), pak} ast.Walk(x, fast) } @@ -701,33 +716,59 @@ type Index struct { func canonical(w string) string { return strings.ToLower(w) } +// Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems +// consuming file descriptors, where some systems have low 256 or 512 limits. +// Go should have a built-in way to cap fd usage under the ulimit. +const ( + maxOpenFiles = 200 + maxOpenDirs = 50 +) + // NewIndex creates a new index for the .go files // in the directories given by dirnames. -// +// The throttle parameter specifies a value between 0.0 and 1.0 that controls +// artificial sleeping. If 0.0, the indexer always sleeps. If 1.0, the indexer +// never sleeps. func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle float64) *Index { - var x Indexer - th := util.NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time - // initialize Indexer // (use some reasonably sized maps to start) - x.c = c - x.fset = token.NewFileSet() - x.packages = make(map[string]*Pak, 256) - x.words = make(map[string]*IndexResult, 8192) + x := &Indexer{ + c: c, + fset: token.NewFileSet(), + fsOpenGate: make(chan bool, maxOpenFiles), + packages: make(map[string]*Pak, 256), + words: make(map[string]*IndexResult, 8192), + throttle: util.NewThrottle(throttle, 100*time.Millisecond), // run at least 0.1s at a time + } // index all files in the directories given by dirnames + var wg sync.WaitGroup // outstanding ReadDir + visitFile + dirGate := make(chan bool, maxOpenDirs) for dirname := range dirnames { - list, err := c.fs.ReadDir(dirname) - if err != nil { - continue // ignore this directory + if c.IndexDirectory != nil && !c.IndexDirectory(dirname) { + continue } - for _, f := range list { - if !f.IsDir() { - x.visitFile(dirname, f, fulltextIndex) + dirGate <- true + wg.Add(1) + go func(dirname string) { + defer func() { <-dirGate }() + defer wg.Done() + + list, err := c.fs.ReadDir(dirname) + if err != nil { + log.Printf("ReadDir(%q): %v; skipping directory", dirname, err) + return // ignore this directory } - th.Throttle() - } + for _, fi := range list { + wg.Add(1) + go func(fi os.FileInfo) { + defer wg.Done() + x.visitFile(dirname, fi, fulltextIndex) + }(fi) + } + }(dirname) } + wg.Wait() if !fulltextIndex { // the file set, the current file, and the sources are @@ -751,7 +792,7 @@ func NewIndex(c *Corpus, dirnames <-chan string, fulltextIndex bool, throttle fl Others: others, } wlist = append(wlist, &wordPair{canonical(w), w}) - th.Throttle() + x.throttle.Throttle() } x.stats.Words = len(words) @@ -1094,7 +1135,13 @@ func (c *Corpus) UpdateIndex() { log.Printf("updating index...") } start := time.Now() - index := NewIndex(c, c.fsDirnames(), c.MaxResults > 0, c.IndexThrottle) + throttle := c.IndexThrottle + if throttle <= 0 { + throttle = 0.9 + } else if throttle > 1.0 { + throttle = 1.0 + } + index := NewIndex(c, c.fsDirnames(), c.MaxResults > 0, throttle) stop := time.Now() c.searchIndex.Set(index) if c.Verbose { @@ -1105,10 +1152,14 @@ func (c *Corpus) UpdateIndex() { } memstats := new(runtime.MemStats) runtime.ReadMemStats(memstats) - log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) + if c.Verbose { + log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) + } runtime.GC() runtime.ReadMemStats(memstats) - log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) + if c.Verbose { + log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys) + } } // RunIndexer runs forever, indexing. diff --git a/godoc/index_test.go b/godoc/index_test.go new file mode 100644 index 0000000000..001d2a7752 --- /dev/null +++ b/godoc/index_test.go @@ -0,0 +1,56 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package godoc + +import ( + "reflect" + "strings" + "testing" + + "code.google.com/p/go.tools/godoc/vfs/mapfs" +) + +func TestIndex(t *testing.T) { + c := NewCorpus(mapfs.New(map[string]string{ + "src/pkg/foo/foo.go": `// Package foo is an example. +package foo + +// Foo is stuff. +type Foo struct{} + +func New() *Foo { + return new(Foo) +} +`, + "src/pkg/bar/bar.go": `// Package bar is another example to test races. +package bar +`, + "src/pkg/skip/skip.go": `// Package skip should be skipped. +package skip +func Skip() {} +`, + })) + c.IndexEnabled = true + c.IndexDirectory = func(dir string) bool { + return !strings.Contains(dir, "skip") + } + + if err := c.Init(); err != nil { + t.Fatal(err) + } + c.UpdateIndex() + ix, _ := c.CurrentIndex() + if ix == nil { + t.Fatal("no index") + } + t.Logf("Got: %#v", ix) + wantStats := Statistics{Bytes: 179, Files: 2, Lines: 11, Words: 5, Spots: 7} + if !reflect.DeepEqual(ix.Stats(), wantStats) { + t.Errorf("Stats = %#v; want %#v", ix.Stats(), wantStats) + } + if _, ok := ix.words["Skip"]; ok { + t.Errorf("the word Skip was found; expected it to be skipped") + } +} diff --git a/godoc/vfs/mapfs/mapfs.go b/godoc/vfs/mapfs/mapfs.go index 5dd20fdd10..550c4bafaa 100644 --- a/godoc/vfs/mapfs/mapfs.go +++ b/godoc/vfs/mapfs/mapfs.go @@ -9,12 +9,17 @@ package mapfs import ( "io" "os" + pathpkg "path" + "sort" "strings" "time" "code.google.com/p/go.tools/godoc/vfs" ) +// New returns a new FileSystem from the provided map. +// Map keys should be forward slash-separated pathnames +// and not contain a leading slash. func New(m map[string]string) vfs.FileSystem { return mapFS(m) } @@ -27,10 +32,7 @@ func (fs mapFS) String() string { return "mapfs" } func (fs mapFS) Close() error { return nil } func filename(p string) string { - if len(p) > 0 && p[0] == '/' { - p = p[1:] - } - return p + return strings.TrimPrefix(p, "/") } func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) { @@ -41,22 +43,85 @@ func (fs mapFS) Open(p string) (vfs.ReadSeekCloser, error) { return nopCloser{strings.NewReader(b)}, nil } +func fileInfo(name, contents string) os.FileInfo { + return mapFI{name: pathpkg.Base(name), size: len(contents)} +} + +func dirInfo(name string) os.FileInfo { + return mapFI{name: pathpkg.Base(name), dir: true} +} + func (fs mapFS) Lstat(p string) (os.FileInfo, error) { b, ok := fs[filename(p)] - if !ok { - return nil, os.ErrNotExist + if ok { + return fileInfo(p, b), nil } - return mapFI{name: p, size: int64(len(b))}, nil + ents, _ := fs.ReadDir(p) + if len(ents) > 0 { + return dirInfo(p), nil + } + return nil, os.ErrNotExist } func (fs mapFS) Stat(p string) (os.FileInfo, error) { return fs.Lstat(p) } +// slashdir returns path.Dir(p), but special-cases paths not beginning +// with a slash to be in the root. +func slashdir(p string) string { + d := pathpkg.Dir(p) + if d == "." { + return "/" + } + if strings.HasPrefix(p, "/") { + return d + } + return "/" + d +} + func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) { - var list []os.FileInfo + p = pathpkg.Clean(p) + var ents []string + fim := make(map[string]os.FileInfo) // base -> fi for fn, b := range fs { - list = append(list, mapFI{name: fn, size: int64(len(b))}) + dir := slashdir(fn) + isFile := true + var lastBase string + for { + if dir == p { + base := lastBase + if isFile { + base = pathpkg.Base(fn) + } + if fim[base] == nil { + var fi os.FileInfo + if isFile { + fi = fileInfo(fn, b) + } else { + fi = dirInfo(base) + } + ents = append(ents, base) + fim[base] = fi + } + } + if dir == "/" { + break + } else { + isFile = false + lastBase = pathpkg.Base(dir) + dir = pathpkg.Dir(dir) + } + } + } + if len(ents) == 0 { + return nil, os.ErrNotExist + } + + sort.Strings(ents) + var list []os.FileInfo + for _, dir := range ents { + list = append(list, fim[dir]) } return list, nil } @@ -64,15 +129,21 @@ func (fs mapFS) ReadDir(p string) ([]os.FileInfo, error) { // mapFI is the map-based implementation of FileInfo. type mapFI struct { name string - size int64 + size int + dir bool } -func (fi mapFI) IsDir() bool { return false } +func (fi mapFI) IsDir() bool { return fi.dir } func (fi mapFI) ModTime() time.Time { return time.Time{} } -func (fi mapFI) Mode() os.FileMode { return 0444 } -func (fi mapFI) Name() string { return fi.name } -func (fi mapFI) Size() int64 { return fi.size } -func (fi mapFI) Sys() interface{} { return nil } +func (fi mapFI) Mode() os.FileMode { + if fi.IsDir() { + return 0755 | os.ModeDir + } + return 0444 +} +func (fi mapFI) Name() string { return pathpkg.Base(fi.name) } +func (fi mapFI) Size() int64 { return int64(fi.size) } +func (fi mapFI) Sys() interface{} { return nil } type nopCloser struct { io.ReadSeeker diff --git a/godoc/vfs/mapfs/mapfs_test.go b/godoc/vfs/mapfs/mapfs_test.go new file mode 100644 index 0000000000..6b7db290ee --- /dev/null +++ b/godoc/vfs/mapfs/mapfs_test.go @@ -0,0 +1,111 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mapfs + +import ( + "io/ioutil" + "os" + "reflect" + "testing" +) + +func TestOpenRoot(t *testing.T) { + fs := New(map[string]string{ + "foo/bar/three.txt": "a", + "foo/bar.txt": "b", + "top.txt": "c", + "other-top.txt": "d", + }) + tests := []struct { + path string + want string + }{ + {"/foo/bar/three.txt", "a"}, + {"foo/bar/three.txt", "a"}, + {"foo/bar.txt", "b"}, + {"top.txt", "c"}, + {"/top.txt", "c"}, + {"other-top.txt", "d"}, + {"/other-top.txt", "d"}, + } + for _, tt := range tests { + rsc, err := fs.Open(tt.path) + if err != nil { + t.Errorf("Open(%q) = %v", tt.path, err) + continue + } + slurp, err := ioutil.ReadAll(rsc) + if err != nil { + t.Error(err) + } + if string(slurp) != tt.want { + t.Errorf("Read(%q) = %q; want %q", tt.path, tt.want, slurp) + } + rsc.Close() + } + + _, err := fs.Open("/xxxx") + if !os.IsNotExist(err) { + t.Errorf("ReadDir /xxxx = %v; want os.IsNotExist error", err) + } +} + +func TestReaddir(t *testing.T) { + fs := New(map[string]string{ + "foo/bar/three.txt": "333", + "foo/bar.txt": "22", + "top.txt": "top.txt file", + "other-top.txt": "other-top.txt file", + }) + tests := []struct { + dir string + want []os.FileInfo + }{ + { + dir: "/", + want: []os.FileInfo{ + mapFI{name: "foo", dir: true}, + mapFI{name: "other-top.txt", size: len("other-top.txt file")}, + mapFI{name: "top.txt", size: len("top.txt file")}, + }, + }, + { + dir: "/foo", + want: []os.FileInfo{ + mapFI{name: "bar", dir: true}, + mapFI{name: "bar.txt", size: 2}, + }, + }, + { + dir: "/foo/", + want: []os.FileInfo{ + mapFI{name: "bar", dir: true}, + mapFI{name: "bar.txt", size: 2}, + }, + }, + { + dir: "/foo/bar", + want: []os.FileInfo{ + mapFI{name: "three.txt", size: 3}, + }, + }, + } + for _, tt := range tests { + fis, err := fs.ReadDir(tt.dir) + if err != nil { + t.Errorf("ReadDir(%q) = %v", tt.dir, err) + continue + } + if !reflect.DeepEqual(fis, tt.want) { + t.Errorf("ReadDir(%q) = %#v; want %#v", tt.dir, fis, tt.want) + continue + } + } + + _, err := fs.ReadDir("/xxxx") + if !os.IsNotExist(err) { + t.Errorf("ReadDir /xxxx = %v; want os.IsNotExist error", err) + } +}