diff --git a/lib/godoc/search.html b/lib/godoc/search.html
index febd7e5693..29ea41cc92 100644
--- a/lib/godoc/search.html
+++ b/lib/godoc/search.html
@@ -66,3 +66,27 @@
or a qualified identifier (such as math.Sin).
{.end}
+{.section Textual}
+ Textual occurences
+
+
+ File |
+ Occurences |
+ Lines |
+
+ {.repeated section @}
+
+
+ {Filename|url-src}:
+ |
+ {Lines|linelist}
+
+ {.end}
+
+{.end}
+{.section Complete}
+{.or}
+
+ Incomplete list of results
+
+{.end}
diff --git a/src/cmd/godoc/doc.go b/src/cmd/godoc/doc.go
index 53f05ceb47..cce095ade4 100644
--- a/src/cmd/godoc/doc.go
+++ b/src/cmd/godoc/doc.go
@@ -47,6 +47,8 @@ The flags are:
width of tabs in units of spaces
-timestamps=true
show timestamps with directory listings
+ -fulltext=false
+ build full text index for string search results
-path=""
additional package directories (colon-separated)
-html
diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go
index ff51c4dd86..5dced1498b 100644
--- a/src/cmd/godoc/godoc.go
+++ b/src/cmd/godoc/godoc.go
@@ -63,6 +63,7 @@ var (
// layout control
tabwidth = flag.Int("tabwidth", 4, "tab width")
showTimestamps = flag.Bool("timestamps", true, "show timestamps with directory listings")
+ fulltextIndex = flag.Bool("fulltext", false, "build full text index for string search results")
// file system mapping
fsMap Mapping // user-defined mapping
@@ -736,6 +737,25 @@ func localnameFmt(w io.Writer, format string, x ...interface{}) {
}
+// Template formatter for "linelist" format.
+func linelistFmt(w io.Writer, format string, x ...interface{}) {
+ const max = 20 // show at most this many lines
+ list := x[0].([]int)
+ // print number of occurences
+ fmt.Fprintf(w, "%d | ", len(list))
+ // print actual lines
+ // TODO(gri) should sort them
+ for i, line := range list {
+ if i < max {
+ fmt.Fprintf(w, "%d | ", line)
+ } else {
+ fmt.Fprint(w, "... | ")
+ break
+ }
+ }
+}
+
+
var fmap = template.FormatterMap{
"": textFmt,
"html": htmlFmt,
@@ -751,6 +771,7 @@ var fmap = template.FormatterMap{
"time": timeFmt,
"dir/": dirslashFmt,
"localname": localnameFmt,
+ "linelist": linelistFmt,
}
@@ -1309,17 +1330,23 @@ var searchIndex RWValue
type SearchResult struct {
Query string
- Hit *LookupResult
- Alt *AltWords
- Illegal bool
- Accurate bool
+ Hit *LookupResult // identifier occurences of Query
+ Alt *AltWords // alternative identifiers to look for
+ Illegal bool // true if Query for identifier search has incorrect syntax
+ Textual []Positions // textual occurences of Query
+ Complete bool // true if all textual occurences of Query are reported
+ Accurate bool // true if the index is not older than the indexed files
}
func lookup(query string) (result SearchResult) {
result.Query = query
if index, timestamp := searchIndex.get(); index != nil {
- result.Hit, result.Alt, result.Illegal = index.(*Index).Lookup(query)
+ index := index.(*Index)
+ result.Hit, result.Alt, result.Illegal = index.Lookup(query)
+ // TODO(gri) should max be a flag?
+ const max = 5000 // show at most this many fulltext results
+ result.Textual, result.Complete = index.LookupString(query, max)
_, ts := fsModified.get()
result.Accurate = timestamp >= ts
}
@@ -1338,7 +1365,7 @@ func search(w http.ResponseWriter, r *http.Request) {
}
var title string
- if result.Hit != nil {
+ if result.Hit != nil || len(result.Textual) > 0 {
title = fmt.Sprintf(`Results for query %q`, query)
} else {
title = fmt.Sprintf(`No results found for query %q`, query)
@@ -1407,17 +1434,18 @@ func indexer() {
log.Printf("updating index...")
}
start := time.Nanoseconds()
- index := NewIndex(fsDirnames())
+ index := NewIndex(fsDirnames(), *fulltextIndex)
stop := time.Nanoseconds()
searchIndex.set(index)
if *verbose {
secs := float64((stop-start)/1e6) / 1e3
- nwords, nspots := index.Size()
- log.Printf("index updated (%gs, %d unique words, %d spots)", secs, nwords, nspots)
+ stats := index.Stats()
+ log.Printf("index updated (%gs, %d bytes of source, %d files, %d unique words, %d spots)",
+ secs, stats.Bytes, stats.Files, stats.Words, stats.Spots)
}
- log.Printf("bytes=%d footprint=%d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
+ log.Printf("before GC: bytes = %d footprint = %d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
runtime.GC()
- log.Printf("bytes=%d footprint=%d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
+ log.Printf("after GC: bytes = %d footprint = %d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
}
time.Sleep(1 * 60e9) // try once a minute
}
diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go
index 6f41f1819d..ff51a278e4 100644
--- a/src/cmd/godoc/index.go
+++ b/src/cmd/godoc/index.go
@@ -3,9 +3,9 @@
// license that can be found in the LICENSE file.
// This file contains the infrastructure to create an
-// (identifier) index for a set of Go files.
+// identifier and full-text index for a set of Go files.
//
-// Basic indexing algorithm:
+// Algorithm for identifier index:
// - traverse all .go files of the file tree specified by root
// - for each word (identifier) encountered, collect all occurences (spots)
// into a list; this produces a list of spots for each word
@@ -21,15 +21,30 @@
// (the line number for spots with snippets is stored in the snippet)
// - at the end, create lists of alternative spellings for a given
// word
+//
+// Algorithm for full text index:
+// - concatenate all source code in a byte buffer (in memory)
+// - add the files to a file set in lockstep as they are added to the byte
+// buffer such that a byte buffer offset corresponds to the Pos value for
+// that file location
+// - create a suffix array from the concatenated sources
+//
+// String lookup in full text index:
+// - use the suffix array to lookup a string's offsets - the offsets
+// correspond to the Pos values relative to the file set
+// - translate the Pos values back into file and line information and
+// sort the result
package main
import (
+ "bytes"
"container/vector"
"go/ast"
"go/parser"
"go/token"
"go/scanner"
+ "index/suffixarray"
"io/ioutil"
"os"
pathutil "path"
@@ -424,18 +439,28 @@ type IndexResult struct {
}
+// Statistics provides statistics information for an index.
+type Statistics struct {
+ Bytes int // total size of indexed source files
+ Files int // number of indexed source files
+ Words int // number of different identifiers
+ Spots int // number of identifier occurences
+}
+
+
// An Indexer maintains the data structures and provides the machinery
// for indexing .go files under a file tree. It implements the path.Visitor
// interface for walking file trees, and the ast.Visitor interface for
// walking Go ASTs.
type Indexer struct {
fset *token.FileSet // file set for all indexed files
+ sources bytes.Buffer // concatenated sources
words map[string]*IndexResult // RunLists of Spots
snippets vector.Vector // vector of *Snippets, indexed by snippet indices
current *token.File // last file added to file set
- file *File // current file
- decl ast.Decl // current decl
- nspots int // number of spots encountered
+ file *File // AST for current file
+ decl ast.Decl // AST for current decl
+ stats Statistics
}
@@ -472,7 +497,7 @@ func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
lists.Decls.Push(Spot{x.file, info})
}
- x.nspots++
+ x.stats.Spots++
}
}
@@ -581,8 +606,10 @@ func (x *Indexer) Visit(node ast.Node) ast.Visitor {
}
-func pkgName(fset *token.FileSet, filename string) string {
- file, err := parser.ParseFile(fset, filename, nil, parser.PackageClauseOnly)
+func pkgName(filename string) string {
+ // use a new file set each time in order to not pollute the indexer's
+ // file set (which must stay in sync with the concatenated source code)
+ file, err := parser.ParseFile(token.NewFileSet(), filename, nil, parser.PackageClauseOnly)
if err != nil || file == nil {
return ""
}
@@ -590,7 +617,59 @@ func pkgName(fset *token.FileSet, filename string) string {
}
+func (x *Indexer) addFile(filename string) *ast.File {
+ // open file
+ f, err := os.Open(filename, os.O_RDONLY, 0)
+ if err != nil {
+ return nil
+ }
+ defer f.Close()
+
+ // The file set's base offset and x.sources size must be in lock-step;
+ // this permits the direct mapping of suffix array lookup results to
+ // to corresponding Pos values.
+ //
+ // When a file is added to the file set, it's offset base increases by
+ // the size of the file + 1; and the initial base offset is 1. Add an
+ // extra byte to the sources here.
+ x.sources.WriteByte(0)
+
+ // If the sources length doesn't match the file set base at this point
+ // the file set implementation changed or we have another error.
+ base := x.fset.Base()
+ if x.sources.Len() != base {
+ panic("internal error - file base incorrect")
+ }
+
+ // append file contents to x.sources
+ if _, err := x.sources.ReadFrom(f); err != nil {
+ x.sources.Truncate(base) // discard possibly added data
+ return nil // ignore files with I/O errors
+ }
+
+ // parse the file and in the process add it to the file set
+ src := x.sources.Bytes()[base:] // no need to reread the file
+ file, err := parser.ParseFile(x.fset, filename, src, parser.ParseComments)
+ if err != nil {
+ // do not discard the added source code in this case
+ // because the file has been added to the file set and
+ // the source size must match the file set base
+ // TODO(gri): given a FileSet.RemoveFile() one might be
+ // able to discard the data here (worthwhile?)
+ return nil // ignore files with (parse) errors
+ }
+
+ return file
+}
+
+
func (x *Indexer) visitFile(dirname string, f *os.FileInfo) {
+ // for now, exclude bug257.go as it causes problems with suffixarray
+ // TODO fix index/suffixarray
+ if f.Name == "bug257.go" {
+ return
+ }
+
if !isGoFile(f) {
return
}
@@ -600,20 +679,26 @@ func (x *Indexer) visitFile(dirname string, f *os.FileInfo) {
return
}
- if excludeMainPackages && pkgName(x.fset, path) == "main" {
+ if excludeMainPackages && pkgName(path) == "main" {
return
}
- file, err := parser.ParseFile(x.fset, path, nil, parser.ParseComments)
- if err != nil {
- return // ignore files with (parse) errors
+ file := x.addFile(path)
+ if file == nil {
+ return
}
+ // we've got a file to index
x.current = x.fset.File(file.Pos()) // file.Pos is in the current file
dir, _ := pathutil.Split(path)
pak := Pak{dir, file.Name.Name}
x.file = &File{path, pak}
ast.Walk(x, file)
+
+ // update statistics
+ // (count real file size as opposed to using the padded x.sources.Len())
+ x.stats.Bytes += x.current.Size()
+ x.stats.Files++
}
@@ -627,10 +712,12 @@ type LookupResult struct {
type Index struct {
+ fset *token.FileSet // file set used during indexing; nil if no textindex
+ suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex
words map[string]*LookupResult // maps words to hit lists
alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings
snippets []*Snippet // all snippets, indexed by snippet index
- nspots int // number of spots indexed (a measure of the index size)
+ stats Statistics
}
@@ -640,7 +727,7 @@ func canonical(w string) string { return strings.ToLower(w) }
// NewIndex creates a new index for the .go files
// in the directories given by dirnames.
//
-func NewIndex(dirnames <-chan string) *Index {
+func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index {
var x Indexer
// initialize Indexer
@@ -660,9 +747,14 @@ func NewIndex(dirnames <-chan string) *Index {
}
}
- // the file set and current file are not needed after indexing - help GC and clear them
- x.fset = nil
- x.current = nil // contains reference to fset!
+ if !fulltextIndex {
+ // the file set, the current file, and the sources are
+ // not needed after indexing if no text index is built -
+ // help GC and clear them
+ x.fset = nil
+ x.sources.Reset()
+ x.current = nil // contains reference to fset!
+ }
// for each word, reduce the RunLists into a LookupResult;
// also collect the word with its canonical spelling in a
@@ -678,6 +770,7 @@ func NewIndex(dirnames <-chan string) *Index {
}
wlist.Push(&wordPair{canonical(w), w})
}
+ x.stats.Words = len(words)
// reduce the word list {canonical(w), w} into
// a list of AltWords runs {canonical(w), {w}}
@@ -696,14 +789,19 @@ func NewIndex(dirnames <-chan string) *Index {
snippets[i] = x.snippets.At(i).(*Snippet)
}
- return &Index{words, alts, snippets, x.nspots}
+ // create text index
+ var suffixes *suffixarray.Index
+ if fulltextIndex {
+ suffixes = suffixarray.New(x.sources.Bytes())
+ }
+
+ return &Index{x.fset, suffixes, words, alts, snippets, x.stats}
}
-// Size returns the number of different words and
-// spots indexed as a measure for the index size.
-func (x *Index) Size() (nwords int, nspots int) {
- return len(x.words), x.nspots
+// Stats() returns index statistics.
+func (x *Index) Stats() Statistics {
+ return x.stats
}
@@ -774,3 +872,71 @@ func (x *Index) Snippet(i int) *Snippet {
}
return nil
}
+
+
+type positionList []struct {
+ filename string
+ line int
+}
+
+func (list positionList) Len() int { return len(list) }
+func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
+func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] }
+
+
+// A Positions value specifies a file and line numbers within that file.
+type Positions struct {
+ Filename string
+ Lines []int
+}
+
+
+// LookupString returns a list of positions where a string s is found
+// in the full text index and whether the result is complete or not.
+// At most n positions (filename and line) are returned. The result is
+// not complete if the index is not present or there are more than n
+// occurrences of s.
+//
+func (x *Index) LookupString(s string, n int) (result []Positions, complete bool) {
+ if x.suffixes == nil {
+ return
+ }
+
+ offsets := x.suffixes.Lookup([]byte(s), n+1)
+ if len(offsets) <= n {
+ complete = true
+ } else {
+ offsets = offsets[0:n]
+ }
+
+ // compute file names and lines and sort the list by filename
+ list := make(positionList, len(offsets))
+ for i, offs := range offsets {
+ // by construction, an offs corresponds to
+ // the Pos value for the file set - use it
+ // to get full Position information
+ pos := x.fset.Position(token.Pos(offs))
+ list[i].filename = pos.Filename
+ list[i].line = pos.Line
+ }
+ sort.Sort(list)
+
+ // compact positions with equal file names
+ var last string
+ var lines []int
+ for _, pos := range list {
+ if pos.filename != last {
+ if len(lines) > 0 {
+ result = append(result, Positions{last, lines})
+ lines = nil
+ }
+ last = pos.filename
+ }
+ lines = append(lines, pos.line)
+ }
+ if len(lines) > 0 {
+ result = append(result, Positions{last, lines})
+ }
+
+ return
+}