mirror of
https://github.com/golang/go
synced 2024-11-22 03:34:40 -07:00
godoc: support for reading/writing (splitted) index files.
This CL implements a new godoc feature to save the search index on disk. Use -write_index to create the search index file named with -index_files. Use -index_files to provide a glob pattern specifying index file(s) when starting godoc; in this case the run-time indexer is not run. Known issues: - saving/restoring full text index is not yet supported - the list of flags and overall usage logic could use a cleanup R=rsc, dsymonds CC=golang-dev https://golang.org/cl/4974045
This commit is contained in:
parent
6b90262870
commit
d01ee38fb0
@ -11,9 +11,18 @@ package main
|
|||||||
const (
|
const (
|
||||||
// zipFilename is the name of the .zip file
|
// zipFilename is the name of the .zip file
|
||||||
// containing the file system served by godoc.
|
// containing the file system served by godoc.
|
||||||
zipFilename = "go.zip"
|
zipFilename = "godoc.zip"
|
||||||
|
|
||||||
// zipGoroot is the path of the goroot directory
|
// zipGoroot is the path of the goroot directory
|
||||||
// in the .zip file.
|
// in the .zip file.
|
||||||
zipGoroot = "/home/username/go"
|
zipGoroot = "/home/user/go"
|
||||||
|
|
||||||
|
// indexFilenames is a glob pattern specifying
|
||||||
|
// files containing the search index served by
|
||||||
|
// godoc. The files are concatenated in sorted
|
||||||
|
// order (by filename).
|
||||||
|
// app-engine limit: file sizes must be <= 10MB;
|
||||||
|
// use "split -b8m indexfile index.split." to get
|
||||||
|
// smaller files.
|
||||||
|
indexFilenames = "index.split.*"
|
||||||
)
|
)
|
||||||
|
@ -23,11 +23,12 @@
|
|||||||
// strings // never version of the strings package
|
// strings // never version of the strings package
|
||||||
// ... //
|
// ... //
|
||||||
// app.yaml // app engine control file
|
// app.yaml // app engine control file
|
||||||
// go.zip // zip file containing the file system to serve
|
// godoc.zip // .zip file containing the file system to serve
|
||||||
// godoc // contains godoc sources
|
// godoc // contains godoc sources
|
||||||
// appinit.go // this file instead of godoc/main.go
|
// appinit.go // this file instead of godoc/main.go
|
||||||
// appconfig.go // godoc for app engine configuration
|
// appconfig.go // godoc for app engine configuration
|
||||||
// ... //
|
// ... //
|
||||||
|
// index.split.* // index file(s) containing the search index to serve
|
||||||
//
|
//
|
||||||
// To run app the engine emulator locally:
|
// To run app the engine emulator locally:
|
||||||
//
|
//
|
||||||
@ -43,6 +44,7 @@ import (
|
|||||||
"http"
|
"http"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"path"
|
||||||
)
|
)
|
||||||
|
|
||||||
func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.Error) {
|
func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.Error) {
|
||||||
@ -53,7 +55,16 @@ func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.E
|
|||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
log.Println("initializing godoc ...")
|
log.Println("initializing godoc ...")
|
||||||
|
log.Printf(".zip file = %s", zipFilename)
|
||||||
|
log.Printf(".zip GOROOT = %s", zipGoroot)
|
||||||
|
log.Printf("index files = %s", indexFilenames)
|
||||||
|
|
||||||
|
// initialize flags for app engine
|
||||||
*goroot = path.Join("/", zipGoroot) // fsHttp paths are relative to '/'
|
*goroot = path.Join("/", zipGoroot) // fsHttp paths are relative to '/'
|
||||||
|
*indexEnabled = true
|
||||||
|
*indexFiles = indexFilenames
|
||||||
|
*maxResults = 0 // save space for now
|
||||||
|
*indexThrottle = 0.3 // in case *indexFiles is empty (and thus the indexer is run)
|
||||||
|
|
||||||
// read .zip file and set up file systems
|
// read .zip file and set up file systems
|
||||||
const zipfile = zipFilename
|
const zipfile = zipFilename
|
||||||
@ -65,8 +76,8 @@ func init() {
|
|||||||
fsHttp = NewHttpZipFS(rc, *goroot)
|
fsHttp = NewHttpZipFS(rc, *goroot)
|
||||||
|
|
||||||
// initialize http handlers
|
// initialize http handlers
|
||||||
initHandlers()
|
|
||||||
readTemplates()
|
readTemplates()
|
||||||
|
initHandlers()
|
||||||
registerPublicHandlers(http.DefaultServeMux)
|
registerPublicHandlers(http.DefaultServeMux)
|
||||||
|
|
||||||
// initialize default directory tree with corresponding timestamp.
|
// initialize default directory tree with corresponding timestamp.
|
||||||
@ -75,12 +86,12 @@ func init() {
|
|||||||
// initialize directory trees for user-defined file systems (-path flag).
|
// initialize directory trees for user-defined file systems (-path flag).
|
||||||
initDirTrees()
|
initDirTrees()
|
||||||
|
|
||||||
// create search index
|
// initialize search index
|
||||||
// TODO(gri) Disabled for now as it takes too long. Find a solution for this.
|
if *indexEnabled {
|
||||||
/*
|
if err := initIndex(); err != nil {
|
||||||
*indexEnabled = true
|
log.Fatalf("error initializing index: %s", err)
|
||||||
go indexer()
|
}
|
||||||
*/
|
}
|
||||||
|
|
||||||
log.Println("godoc initialization complete")
|
log.Println("godoc initialization complete")
|
||||||
}
|
}
|
||||||
|
@ -50,11 +50,17 @@ The flags are:
|
|||||||
-index
|
-index
|
||||||
enable identifier and full text search index
|
enable identifier and full text search index
|
||||||
(no search box is shown if -index is not set)
|
(no search box is shown if -index is not set)
|
||||||
|
-index_files=""
|
||||||
|
glob pattern specifying index files; if not empty,
|
||||||
|
the index is read from these files in sorted order
|
||||||
-index_throttle=0.75
|
-index_throttle=0.75
|
||||||
index throttle value; a value of 0 means no time is allocated
|
index throttle value; a value of 0 means no time is allocated
|
||||||
to the indexer (the indexer will never finish), a value of 1.0
|
to the indexer (the indexer will never finish), a value of 1.0
|
||||||
means that index creation is running at full throttle (other
|
means that index creation is running at full throttle (other
|
||||||
goroutines may get no time while the index is built)
|
goroutines may get no time while the index is built)
|
||||||
|
-write_index=false
|
||||||
|
write index to a file; the file name must be specified with
|
||||||
|
-index_files
|
||||||
-maxresults=10000
|
-maxresults=10000
|
||||||
maximum number of full text search results shown
|
maximum number of full text search results shown
|
||||||
(no full text index is built if maxresults <= 0)
|
(no full text index is built if maxresults <= 0)
|
||||||
|
@ -63,7 +63,9 @@ var (
|
|||||||
templateDir = flag.String("templates", "", "directory containing alternate template files")
|
templateDir = flag.String("templates", "", "directory containing alternate template files")
|
||||||
|
|
||||||
// search index
|
// search index
|
||||||
indexEnabled = flag.Bool("index", false, "enable search index")
|
indexEnabled = flag.Bool("index", false, "enable search index")
|
||||||
|
indexFiles = flag.String("index_files", "", "glob pattern specifying index files;"+
|
||||||
|
"if not empty, the index is read from these files in sorted order")
|
||||||
maxResults = flag.Int("maxresults", 10000, "maximum number of full text search results shown")
|
maxResults = flag.Int("maxresults", 10000, "maximum number of full text search results shown")
|
||||||
indexThrottle = flag.Float64("index_throttle", 0.75, "index throttle value; 0.0 = no time allocated, 1.0 = full throttle")
|
indexThrottle = flag.Float64("index_throttle", 0.75, "index throttle value; 0.0 = no time allocated, 1.0 = full throttle")
|
||||||
|
|
||||||
@ -1062,10 +1064,12 @@ func lookup(query string) (result SearchResult) {
|
|||||||
// is the result accurate?
|
// is the result accurate?
|
||||||
if *indexEnabled {
|
if *indexEnabled {
|
||||||
if _, ts := fsModified.get(); timestamp < ts {
|
if _, ts := fsModified.get(); timestamp < ts {
|
||||||
// The index is older than the latest file system change
|
// The index is older than the latest file system change under godoc's observation.
|
||||||
// under godoc's observation. Indexing may be in progress
|
if *indexFiles != "" {
|
||||||
// or start shortly (see indexer()).
|
result.Alert = "Index not automatically updated: result may be inaccurate"
|
||||||
result.Alert = "Indexing in progress: result may be inaccurate"
|
} else {
|
||||||
|
result.Alert = "Indexing in progress: result may be inaccurate"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
result.Alert = "Search index disabled: no results available"
|
result.Alert = "Search index disabled: no results available"
|
||||||
@ -1141,26 +1145,30 @@ func fsDirnames() <-chan string {
|
|||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func updateIndex() {
|
||||||
|
if *verbose {
|
||||||
|
log.Printf("updating index...")
|
||||||
|
}
|
||||||
|
start := time.Nanoseconds()
|
||||||
|
index := NewIndex(fsDirnames(), *maxResults > 0, *indexThrottle)
|
||||||
|
stop := time.Nanoseconds()
|
||||||
|
searchIndex.set(index)
|
||||||
|
if *verbose {
|
||||||
|
secs := float64((stop-start)/1e6) / 1e3
|
||||||
|
stats := index.Stats()
|
||||||
|
log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
|
||||||
|
secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
|
||||||
|
}
|
||||||
|
log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
|
||||||
|
runtime.GC()
|
||||||
|
log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
|
||||||
|
}
|
||||||
|
|
||||||
func indexer() {
|
func indexer() {
|
||||||
for {
|
for {
|
||||||
if !indexUpToDate() {
|
if !indexUpToDate() {
|
||||||
// index possibly out of date - make a new one
|
// index possibly out of date - make a new one
|
||||||
if *verbose {
|
updateIndex()
|
||||||
log.Printf("updating index...")
|
|
||||||
}
|
|
||||||
start := time.Nanoseconds()
|
|
||||||
index := NewIndex(fsDirnames(), *maxResults > 0, *indexThrottle)
|
|
||||||
stop := time.Nanoseconds()
|
|
||||||
searchIndex.set(index)
|
|
||||||
if *verbose {
|
|
||||||
secs := float64((stop-start)/1e6) / 1e3
|
|
||||||
stats := index.Stats()
|
|
||||||
log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
|
|
||||||
secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
|
|
||||||
}
|
|
||||||
log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
|
|
||||||
runtime.GC()
|
|
||||||
log.Printf("after GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
|
|
||||||
}
|
}
|
||||||
var delay int64 = 60 * 1e9 // by default, try every 60s
|
var delay int64 = 60 * 1e9 // by default, try every 60s
|
||||||
if *testDir != "" {
|
if *testDir != "" {
|
||||||
@ -1170,3 +1178,33 @@ func indexer() {
|
|||||||
time.Sleep(delay)
|
time.Sleep(delay)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func initIndex() os.Error {
|
||||||
|
if *indexFiles == "" {
|
||||||
|
// run periodic indexer
|
||||||
|
go indexer()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// get search index from files
|
||||||
|
matches, err := filepath.Glob(*indexFiles)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
sort.Strings(matches) // make sure files are in the right order
|
||||||
|
files := make([]io.Reader, 0, len(matches))
|
||||||
|
for _, filename := range matches {
|
||||||
|
f, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
files = append(files, f)
|
||||||
|
}
|
||||||
|
x := new(Index)
|
||||||
|
if err := x.Read(io.MultiReader(files...)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
searchIndex.set(x)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@ -43,7 +43,9 @@ import (
|
|||||||
"go/parser"
|
"go/parser"
|
||||||
"go/token"
|
"go/token"
|
||||||
"go/scanner"
|
"go/scanner"
|
||||||
|
"gob"
|
||||||
"index/suffixarray"
|
"index/suffixarray"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
@ -804,6 +806,37 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Ind
|
|||||||
return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
|
return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type FileIndex struct {
|
||||||
|
Words map[string]*LookupResult
|
||||||
|
Alts map[string]*AltWords
|
||||||
|
Snippets []*Snippet
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write writes the index x to w.
|
||||||
|
func (x *Index) Write(w io.Writer) os.Error {
|
||||||
|
if x.suffixes != nil {
|
||||||
|
panic("no support for writing full text index yet")
|
||||||
|
}
|
||||||
|
fx := FileIndex{
|
||||||
|
x.words,
|
||||||
|
x.alts,
|
||||||
|
x.snippets,
|
||||||
|
}
|
||||||
|
return gob.NewEncoder(w).Encode(fx)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read reads the index from r into x; x must not be nil.
|
||||||
|
func (x *Index) Read(r io.Reader) os.Error {
|
||||||
|
var fx FileIndex
|
||||||
|
if err := gob.NewDecoder(r).Decode(&fx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
x.words = fx.Words
|
||||||
|
x.alts = fx.Alts
|
||||||
|
x.snippets = fx.Snippets
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Stats() returns index statistics.
|
// Stats() returns index statistics.
|
||||||
func (x *Index) Stats() Statistics {
|
func (x *Index) Stats() Statistics {
|
||||||
return x.stats
|
return x.stats
|
||||||
|
@ -54,6 +54,9 @@ var (
|
|||||||
// (with e.g.: zip -r go.zip $GOROOT -i \*.go -i \*.html -i \*.css -i \*.js -i \*.txt -i \*.c -i \*.h -i \*.s -i \*.png -i \*.jpg -i \*.sh -i favicon.ico)
|
// (with e.g.: zip -r go.zip $GOROOT -i \*.go -i \*.html -i \*.css -i \*.js -i \*.txt -i \*.c -i \*.h -i \*.s -i \*.png -i \*.jpg -i \*.sh -i favicon.ico)
|
||||||
zipfile = flag.String("zip", "", "zip file providing the file system to serve; disabled if empty")
|
zipfile = flag.String("zip", "", "zip file providing the file system to serve; disabled if empty")
|
||||||
|
|
||||||
|
// file-based index
|
||||||
|
writeIndex = flag.Bool("write_index", false, "write index to a file; the file name must be specified with -index_files")
|
||||||
|
|
||||||
// periodic sync
|
// periodic sync
|
||||||
syncCmd = flag.String("sync", "", "sync command; disabled if empty")
|
syncCmd = flag.String("sync", "", "sync command; disabled if empty")
|
||||||
syncMin = flag.Int("sync_minutes", 0, "sync interval in minutes; disabled if <= 0")
|
syncMin = flag.Int("sync_minutes", 0, "sync interval in minutes; disabled if <= 0")
|
||||||
@ -221,8 +224,8 @@ func main() {
|
|||||||
flag.Usage = usage
|
flag.Usage = usage
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
// Check usage: either server and no args, or command line and args
|
// Check usage: either server and no args, command line and args, or index creation mode
|
||||||
if (*httpAddr != "") != (flag.NArg() == 0) {
|
if (*httpAddr != "") != (flag.NArg() == 0) && !*writeIndex {
|
||||||
usage()
|
usage()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,6 +256,39 @@ func main() {
|
|||||||
readTemplates()
|
readTemplates()
|
||||||
initHandlers()
|
initHandlers()
|
||||||
|
|
||||||
|
if (*indexEnabled || *writeIndex) && *indexFiles != "" && *maxResults > 0 {
|
||||||
|
log.Println("warning: no support for full-text index yet (setting -maxresults to 0)")
|
||||||
|
*maxResults = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if *writeIndex {
|
||||||
|
if *indexFiles == "" {
|
||||||
|
log.Fatal("no index files specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("initialize file systems")
|
||||||
|
*verbose = true // want to see what happens
|
||||||
|
initFSTree()
|
||||||
|
initDirTrees()
|
||||||
|
|
||||||
|
*indexThrottle = 1
|
||||||
|
updateIndex()
|
||||||
|
|
||||||
|
log.Println("writing index file", *indexFiles)
|
||||||
|
f, err := os.Create(*indexFiles)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
index, _ := searchIndex.get()
|
||||||
|
err = index.(*Index).Write(f)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("done")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if *httpAddr != "" {
|
if *httpAddr != "" {
|
||||||
// HTTP server mode.
|
// HTTP server mode.
|
||||||
var handler http.Handler = http.DefaultServeMux
|
var handler http.Handler = http.DefaultServeMux
|
||||||
@ -304,9 +340,11 @@ func main() {
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start indexing goroutine.
|
// Initialize search index.
|
||||||
if *indexEnabled {
|
if *indexEnabled {
|
||||||
go indexer()
|
if err := initIndex(); err != nil {
|
||||||
|
log.Fatalf("error initializing index: %s", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start http server.
|
// Start http server.
|
||||||
|
Loading…
Reference in New Issue
Block a user