From d01ee38fb09bfc9f03f5930fa2054cc767d611d2 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Mon, 29 Aug 2011 17:22:20 -0700 Subject: [PATCH] godoc: support for reading/writing (splitted) index files. This CL implements a new godoc feature to save the search index on disk. Use -write_index to create the search index file named with -index_files. Use -index_files to provide a glob pattern specifying index file(s) when starting godoc; in this case the run-time indexer is not run. Known issues: - saving/restoring full text index is not yet supported - the list of flags and overall usage logic could use a cleanup R=rsc, dsymonds CC=golang-dev https://golang.org/cl/4974045 --- src/cmd/godoc/appconfig.go | 13 ++++++- src/cmd/godoc/appinit.go | 27 +++++++++---- src/cmd/godoc/doc.go | 6 +++ src/cmd/godoc/godoc.go | 80 ++++++++++++++++++++++++++++---------- src/cmd/godoc/index.go | 33 ++++++++++++++++ src/cmd/godoc/main.go | 46 ++++++++++++++++++++-- 6 files changed, 170 insertions(+), 35 deletions(-) diff --git a/src/cmd/godoc/appconfig.go b/src/cmd/godoc/appconfig.go index 9cbe7a4434..1f420fc6cf 100644 --- a/src/cmd/godoc/appconfig.go +++ b/src/cmd/godoc/appconfig.go @@ -11,9 +11,18 @@ package main const ( // zipFilename is the name of the .zip file // containing the file system served by godoc. - zipFilename = "go.zip" + zipFilename = "godoc.zip" // zipGoroot is the path of the goroot directory // in the .zip file. - zipGoroot = "/home/username/go" + zipGoroot = "/home/user/go" + + // indexFilenames is a glob pattern specifying + // files containing the search index served by + // godoc. The files are concatenated in sorted + // order (by filename). + // app-engine limit: file sizes must be <= 10MB; + // use "split -b8m indexfile index.split." to get + // smaller files. + indexFilenames = "index.split.*" ) diff --git a/src/cmd/godoc/appinit.go b/src/cmd/godoc/appinit.go index 20e0fdc30a..96f8d5e2ac 100644 --- a/src/cmd/godoc/appinit.go +++ b/src/cmd/godoc/appinit.go @@ -23,11 +23,12 @@ // strings // never version of the strings package // ... // // app.yaml // app engine control file -// go.zip // zip file containing the file system to serve +// godoc.zip // .zip file containing the file system to serve // godoc // contains godoc sources // appinit.go // this file instead of godoc/main.go // appconfig.go // godoc for app engine configuration // ... // +// index.split.* // index file(s) containing the search index to serve // // To run app the engine emulator locally: // @@ -43,6 +44,7 @@ import ( "http" "log" "os" + "path" ) func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.Error) { @@ -53,7 +55,16 @@ func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.E func init() { log.Println("initializing godoc ...") + log.Printf(".zip file = %s", zipFilename) + log.Printf(".zip GOROOT = %s", zipGoroot) + log.Printf("index files = %s", indexFilenames) + + // initialize flags for app engine *goroot = path.Join("/", zipGoroot) // fsHttp paths are relative to '/' + *indexEnabled = true + *indexFiles = indexFilenames + *maxResults = 0 // save space for now + *indexThrottle = 0.3 // in case *indexFiles is empty (and thus the indexer is run) // read .zip file and set up file systems const zipfile = zipFilename @@ -65,8 +76,8 @@ func init() { fsHttp = NewHttpZipFS(rc, *goroot) // initialize http handlers - initHandlers() readTemplates() + initHandlers() registerPublicHandlers(http.DefaultServeMux) // initialize default directory tree with corresponding timestamp. @@ -75,12 +86,12 @@ func init() { // initialize directory trees for user-defined file systems (-path flag). initDirTrees() - // create search index - // TODO(gri) Disabled for now as it takes too long. Find a solution for this. - /* - *indexEnabled = true - go indexer() - */ + // initialize search index + if *indexEnabled { + if err := initIndex(); err != nil { + log.Fatalf("error initializing index: %s", err) + } + } log.Println("godoc initialization complete") } diff --git a/src/cmd/godoc/doc.go b/src/cmd/godoc/doc.go index 57073ffb1f..813527d280 100644 --- a/src/cmd/godoc/doc.go +++ b/src/cmd/godoc/doc.go @@ -50,11 +50,17 @@ The flags are: -index enable identifier and full text search index (no search box is shown if -index is not set) + -index_files="" + glob pattern specifying index files; if not empty, + the index is read from these files in sorted order -index_throttle=0.75 index throttle value; a value of 0 means no time is allocated to the indexer (the indexer will never finish), a value of 1.0 means that index creation is running at full throttle (other goroutines may get no time while the index is built) + -write_index=false + write index to a file; the file name must be specified with + -index_files -maxresults=10000 maximum number of full text search results shown (no full text index is built if maxresults <= 0) diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go index 9554d47b77..c172235168 100644 --- a/src/cmd/godoc/godoc.go +++ b/src/cmd/godoc/godoc.go @@ -63,7 +63,9 @@ var ( templateDir = flag.String("templates", "", "directory containing alternate template files") // search index - indexEnabled = flag.Bool("index", false, "enable search index") + indexEnabled = flag.Bool("index", false, "enable search index") + indexFiles = flag.String("index_files", "", "glob pattern specifying index files;"+ + "if not empty, the index is read from these files in sorted order") maxResults = flag.Int("maxresults", 10000, "maximum number of full text search results shown") indexThrottle = flag.Float64("index_throttle", 0.75, "index throttle value; 0.0 = no time allocated, 1.0 = full throttle") @@ -1062,10 +1064,12 @@ func lookup(query string) (result SearchResult) { // is the result accurate? if *indexEnabled { if _, ts := fsModified.get(); timestamp < ts { - // The index is older than the latest file system change - // under godoc's observation. Indexing may be in progress - // or start shortly (see indexer()). - result.Alert = "Indexing in progress: result may be inaccurate" + // The index is older than the latest file system change under godoc's observation. + if *indexFiles != "" { + result.Alert = "Index not automatically updated: result may be inaccurate" + } else { + result.Alert = "Indexing in progress: result may be inaccurate" + } } } else { result.Alert = "Search index disabled: no results available" @@ -1141,26 +1145,30 @@ func fsDirnames() <-chan string { return c } +func updateIndex() { + if *verbose { + log.Printf("updating index...") + } + start := time.Nanoseconds() + index := NewIndex(fsDirnames(), *maxResults > 0, *indexThrottle) + stop := time.Nanoseconds() + searchIndex.set(index) + if *verbose { + secs := float64((stop-start)/1e6) / 1e3 + stats := index.Stats() + log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)", + secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots) + } + log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys) + runtime.GC() + log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys) +} + func indexer() { for { if !indexUpToDate() { // index possibly out of date - make a new one - if *verbose { - log.Printf("updating index...") - } - start := time.Nanoseconds() - index := NewIndex(fsDirnames(), *maxResults > 0, *indexThrottle) - stop := time.Nanoseconds() - searchIndex.set(index) - if *verbose { - secs := float64((stop-start)/1e6) / 1e3 - stats := index.Stats() - log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)", - secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots) - } - log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys) - runtime.GC() - log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys) + updateIndex() } var delay int64 = 60 * 1e9 // by default, try every 60s if *testDir != "" { @@ -1170,3 +1178,33 @@ func indexer() { time.Sleep(delay) } } + +func initIndex() os.Error { + if *indexFiles == "" { + // run periodic indexer + go indexer() + return nil + } + + // get search index from files + matches, err := filepath.Glob(*indexFiles) + if err != nil { + return err + } + sort.Strings(matches) // make sure files are in the right order + files := make([]io.Reader, 0, len(matches)) + for _, filename := range matches { + f, err := os.Open(filename) + if err != nil { + return err + } + defer f.Close() + files = append(files, f) + } + x := new(Index) + if err := x.Read(io.MultiReader(files...)); err != nil { + return err + } + searchIndex.set(x) + return nil +} diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go index f33ca05730..8bf1a9eb38 100644 --- a/src/cmd/godoc/index.go +++ b/src/cmd/godoc/index.go @@ -43,7 +43,9 @@ import ( "go/parser" "go/token" "go/scanner" + "gob" "index/suffixarray" + "io" "os" "path/filepath" "regexp" @@ -804,6 +806,37 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Ind return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats} } +type FileIndex struct { + Words map[string]*LookupResult + Alts map[string]*AltWords + Snippets []*Snippet +} + +// Write writes the index x to w. +func (x *Index) Write(w io.Writer) os.Error { + if x.suffixes != nil { + panic("no support for writing full text index yet") + } + fx := FileIndex{ + x.words, + x.alts, + x.snippets, + } + return gob.NewEncoder(w).Encode(fx) +} + +// Read reads the index from r into x; x must not be nil. +func (x *Index) Read(r io.Reader) os.Error { + var fx FileIndex + if err := gob.NewDecoder(r).Decode(&fx); err != nil { + return err + } + x.words = fx.Words + x.alts = fx.Alts + x.snippets = fx.Snippets + return nil +} + // Stats() returns index statistics. func (x *Index) Stats() Statistics { return x.stats diff --git a/src/cmd/godoc/main.go b/src/cmd/godoc/main.go index 48bfa2477e..20eff6dd3a 100644 --- a/src/cmd/godoc/main.go +++ b/src/cmd/godoc/main.go @@ -54,6 +54,9 @@ var ( // (with e.g.: zip -r go.zip $GOROOT -i \*.go -i \*.html -i \*.css -i \*.js -i \*.txt -i \*.c -i \*.h -i \*.s -i \*.png -i \*.jpg -i \*.sh -i favicon.ico) zipfile = flag.String("zip", "", "zip file providing the file system to serve; disabled if empty") + // file-based index + writeIndex = flag.Bool("write_index", false, "write index to a file; the file name must be specified with -index_files") + // periodic sync syncCmd = flag.String("sync", "", "sync command; disabled if empty") syncMin = flag.Int("sync_minutes", 0, "sync interval in minutes; disabled if <= 0") @@ -221,8 +224,8 @@ func main() { flag.Usage = usage flag.Parse() - // Check usage: either server and no args, or command line and args - if (*httpAddr != "") != (flag.NArg() == 0) { + // Check usage: either server and no args, command line and args, or index creation mode + if (*httpAddr != "") != (flag.NArg() == 0) && !*writeIndex { usage() } @@ -253,6 +256,39 @@ func main() { readTemplates() initHandlers() + if (*indexEnabled || *writeIndex) && *indexFiles != "" && *maxResults > 0 { + log.Println("warning: no support for full-text index yet (setting -maxresults to 0)") + *maxResults = 0 + } + + if *writeIndex { + if *indexFiles == "" { + log.Fatal("no index files specified") + } + + log.Println("initialize file systems") + *verbose = true // want to see what happens + initFSTree() + initDirTrees() + + *indexThrottle = 1 + updateIndex() + + log.Println("writing index file", *indexFiles) + f, err := os.Create(*indexFiles) + if err != nil { + log.Fatal(err) + } + index, _ := searchIndex.get() + err = index.(*Index).Write(f) + if err != nil { + log.Fatal(err) + } + + log.Println("done") + return + } + if *httpAddr != "" { // HTTP server mode. var handler http.Handler = http.DefaultServeMux @@ -304,9 +340,11 @@ func main() { }() } - // Start indexing goroutine. + // Initialize search index. if *indexEnabled { - go indexer() + if err := initIndex(); err != nil { + log.Fatalf("error initializing index: %s", err) + } } // Start http server.