mirror of
https://github.com/golang/go
synced 2024-11-20 11:04:56 -07:00
parent
569a1cd545
commit
5a75ac88c9
664
src/cmd/godoc/index.go
Normal file
664
src/cmd/godoc/index.go
Normal file
@ -0,0 +1,664 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file contains the infrastructure to create an
|
||||
// (identifier) index for a set of Go files.
|
||||
//
|
||||
// Basic indexing algorithm:
|
||||
// - traverse all .go files of the file tree specified by root
|
||||
// - for each word (identifier) encountered, collect all occurences (spots)
|
||||
// into a list; this produces a list of spots for each word
|
||||
// - reduce the lists: from a list of spots to a list of FileRuns,
|
||||
// and from a list of FileRuns into a list of PakRuns
|
||||
// - make a HitList from the PakRuns
|
||||
//
|
||||
// Details:
|
||||
// - keep two lists per word: one containing package-level declarations
|
||||
// that have snippets, and one containing all other spots
|
||||
// - keep the snippets in a separate table indexed by snippet index
|
||||
// and store the snippet index in place of the line number in a SpotInfo
|
||||
// (the line number for spots with snippets is stored in the snippet)
|
||||
// - at the end, create lists of alternative spellings for a given
|
||||
// word
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"container/vector";
|
||||
"go/ast";
|
||||
"go/parser";
|
||||
"go/token";
|
||||
"os";
|
||||
pathutil "path";
|
||||
"sort";
|
||||
"strings";
|
||||
)
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Data structures used during indexing
|
||||
|
||||
// A RunList is a vector of entries that can be sorted according to some
|
||||
// criteria. A RunList may be compressed by grouping "runs" of entries
|
||||
// which are equal (according to the sort critera) into a new RunList of
|
||||
// runs. For instance, a RunList containing pairs (x, y) may be compressed
|
||||
// into a RunList containing pair runs (x, {y}) where each run consists of
|
||||
// a list of y's with the same x.
|
||||
type RunList struct {
|
||||
vector.Vector;
|
||||
less func(x, y interface{}) bool;
|
||||
}
|
||||
|
||||
func (h *RunList) Less(i, j int) bool {
|
||||
return h.less(h.At(i), h.At(j));
|
||||
}
|
||||
|
||||
|
||||
func (h *RunList) sort(less func(x, y interface{}) bool) {
|
||||
h.less = less;
|
||||
sort.Sort(h);
|
||||
}
|
||||
|
||||
|
||||
// Compress entries which are the same according to a sort criteria
|
||||
// (specified by less) into "runs".
|
||||
func (h *RunList) reduce(less func(x, y interface{}) bool, newRun func(h *RunList, i, j int) interface{}) *RunList {
|
||||
// create runs of entries with equal values
|
||||
h.sort(less);
|
||||
|
||||
// for each run, make a new run object and collect them in a new RunList
|
||||
var hh RunList;
|
||||
i := 0;
|
||||
for j := 0; j < h.Len(); j++ {
|
||||
if less(h.At(i), h.At(j)) {
|
||||
hh.Push(newRun(h, i, j));
|
||||
i = j; // start a new run
|
||||
}
|
||||
}
|
||||
// add final run, if any
|
||||
if i < h.Len() {
|
||||
hh.Push(newRun(h, i, h.Len()));
|
||||
}
|
||||
|
||||
return &hh;
|
||||
}
|
||||
|
||||
|
||||
// A SpotInfo value describes a particular identifier spot in a given file;
|
||||
// It encodes three values: the SpotKind (declaration or use), a line or
|
||||
// snippet index "lori", and whether it's a line or index.
|
||||
//
|
||||
// The following encoding is used:
|
||||
//
|
||||
// bits 32 4 1 0
|
||||
// value [lori|kind|isIndex]
|
||||
//
|
||||
type SpotInfo uint32
|
||||
|
||||
// SpotKind describes whether an identifier is declared (and what kind of
|
||||
// declaration) or used.
|
||||
type SpotKind uint32
|
||||
|
||||
const (
|
||||
ImportDecl SpotKind = iota;
|
||||
ConstDecl;
|
||||
TypeDecl;
|
||||
VarDecl;
|
||||
FuncDecl;
|
||||
MethodDecl;
|
||||
Use;
|
||||
nKinds;
|
||||
)
|
||||
|
||||
|
||||
// makeSpotInfo makes a SpotInfo.
|
||||
func makeSpotInfo(kind SpotKind, lori int, isIndex bool) SpotInfo {
|
||||
// encode lori: bits [4..32)
|
||||
x := SpotInfo(lori)<<4;
|
||||
if int(x>>4) != lori {
|
||||
// lori value doesn't fit - since snippet indices are
|
||||
// most certainly always smaller then 1<<28, this can
|
||||
// only happen for line numbers; give it no line number (= 0)
|
||||
x = 0;
|
||||
}
|
||||
// encode kind: bits [1..4)
|
||||
x |= SpotInfo(kind)<<1;
|
||||
// encode isIndex: bit 0
|
||||
if isIndex {
|
||||
x |= 1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
func (x SpotInfo) less(y SpotInfo) bool {
|
||||
return x.Lori() < y.Lori();
|
||||
}
|
||||
|
||||
|
||||
func (x SpotInfo) Kind() SpotKind {
|
||||
return SpotKind(x>>1&7);
|
||||
}
|
||||
|
||||
|
||||
func (x SpotInfo) Lori() int {
|
||||
return int(x>>4);
|
||||
}
|
||||
|
||||
|
||||
func (x SpotInfo) IsIndex() bool {
|
||||
return x&1 != 0;
|
||||
}
|
||||
|
||||
|
||||
// A Pak describes a Go package.
|
||||
type Pak struct {
|
||||
Path string; // directory name containing the package
|
||||
Name string; // package name as declared by package clause
|
||||
}
|
||||
|
||||
|
||||
func (p *Pak) less(q *Pak) bool {
|
||||
return p.Path < q.Path || p.Name < q.Name;
|
||||
}
|
||||
|
||||
|
||||
// A File describes a Go file.
|
||||
type File struct {
|
||||
Path string; // complete file name
|
||||
Pak Pak; // the package to which the file belongs
|
||||
}
|
||||
|
||||
|
||||
func (f *File) less(g *File) bool {
|
||||
return f.Path < g.Path;
|
||||
}
|
||||
|
||||
|
||||
// A Spot describes a single occurence of a word.
|
||||
type Spot struct {
|
||||
File *File;
|
||||
Info SpotInfo;
|
||||
}
|
||||
|
||||
|
||||
// Spots are sorted by filename.
|
||||
func lessSpot(x, y interface{}) bool {
|
||||
return x.(Spot).File.less(y.(Spot).File);
|
||||
}
|
||||
|
||||
|
||||
// A FileRun describes a run of Spots of a word in a single file.
|
||||
type FileRun struct {
|
||||
File *File;
|
||||
Infos []SpotInfo;
|
||||
}
|
||||
|
||||
|
||||
// newFileRun allocates a new *FileRun from the Spot run [i, j) in h.
|
||||
func newFileRun(h *RunList, i, j int) interface{} {
|
||||
file := h.At(i).(Spot).File;
|
||||
lines := make([]SpotInfo, j-i);
|
||||
prev := 0;
|
||||
k := 0;
|
||||
for ; i < j; i++ {
|
||||
info := h.At(i).(Spot).Info;
|
||||
// ignore line duplicates
|
||||
// (if lori is a snippet index it is unique - no need to check IsIndex())
|
||||
lori := info.Lori();
|
||||
if lori != prev {
|
||||
lines[k] = info;
|
||||
prev = lori;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
return &FileRun{file, lines[0:k]};
|
||||
}
|
||||
|
||||
|
||||
// FileRuns are sorted by package.
|
||||
func lessFileRun(x, y interface{}) bool {
|
||||
return x.(*FileRun).File.Pak.less(&y.(*FileRun).File.Pak);
|
||||
}
|
||||
|
||||
|
||||
// A PakRun describes a run of *FileRuns of a package.
|
||||
type PakRun struct {
|
||||
Pak Pak;
|
||||
Files []*FileRun;
|
||||
}
|
||||
|
||||
// Sorting support for files within a PakRun.
|
||||
func (p *PakRun) Len() int {
|
||||
return len(p.Files);
|
||||
}
|
||||
func (p *PakRun) Less(i, j int) bool {
|
||||
return p.Files[i].File.less(p.Files[j].File);
|
||||
}
|
||||
func (p *PakRun) Swap(i, j int) {
|
||||
p.Files[i], p.Files[j] = p.Files[j], p.Files[i];
|
||||
}
|
||||
|
||||
|
||||
// newPakRun allocates a new *PakRun from the *FileRun run [i, j) in h.
|
||||
func newPakRun(h *RunList, i, j int) interface{} {
|
||||
pak := h.At(i).(*FileRun).File.Pak;
|
||||
files := make([]*FileRun, j-i);
|
||||
k := 0;
|
||||
for ; i < j; i++ {
|
||||
files[k] = h.At(i).(*FileRun);
|
||||
k++;
|
||||
}
|
||||
run := &PakRun{pak, files};
|
||||
sort.Sort(run); // files were sorted by package; sort them by file now
|
||||
return run;
|
||||
}
|
||||
|
||||
|
||||
// PakRuns are sorted by package.
|
||||
func lessPakRun(x, y interface{}) bool {
|
||||
return x.(*PakRun).Pak.less(&y.(*PakRun).Pak);
|
||||
}
|
||||
|
||||
|
||||
// A HitList describes a list of PakRuns.
|
||||
type HitList []*PakRun
|
||||
|
||||
|
||||
func reduce(h0 *RunList) HitList {
|
||||
// reduce a list of Spots into a list of FileRuns
|
||||
h1 := h0.reduce(lessSpot, newFileRun);
|
||||
// reduce a list of FileRuns into a list of PakRuns
|
||||
h2 := h1.reduce(lessFileRun, newPakRun);
|
||||
// sort the list of PakRuns by package
|
||||
h2.sort(lessPakRun);
|
||||
// create a HitList
|
||||
h := make(HitList, h2.Len());
|
||||
for i := 0; i < h2.Len(); i++ {
|
||||
h[i] = h2.At(i).(*PakRun);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
|
||||
func (h HitList) filter(pakname string) HitList {
|
||||
// determine number of matching packages (most of the time just one)
|
||||
n := 0;
|
||||
for _, p := range h {
|
||||
if p.Pak.Name == pakname {
|
||||
n++;
|
||||
}
|
||||
}
|
||||
// create filtered HitList
|
||||
hh := make(HitList, n);
|
||||
i := 0;
|
||||
for _, p := range h {
|
||||
if p.Pak.Name == pakname {
|
||||
hh[i] = p;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return hh;
|
||||
}
|
||||
|
||||
|
||||
type wordPair struct {
|
||||
canon string; // canonical word spelling (all lowercase)
|
||||
alt string; // alternative spelling
|
||||
}
|
||||
|
||||
|
||||
// An AltWords describes a list of alternative spellings for a
|
||||
// canonical (all lowercase) spelling of a word.
|
||||
type AltWords struct {
|
||||
Canon string; // canonical word spelling (all lowercase)
|
||||
Alts []string; // alternative spelling for the same word
|
||||
}
|
||||
|
||||
|
||||
func lessWordPair(x, y interface{}) bool {
|
||||
return x.(*wordPair).canon < y.(*wordPair).canon;
|
||||
}
|
||||
|
||||
|
||||
// newAltWords allocates a new *AltWords from the *wordPair run [i, j) in h.
|
||||
func newAltWords(h *RunList, i, j int) interface{} {
|
||||
canon := h.At(i).(*wordPair).canon;
|
||||
alts := make([]string, j-i);
|
||||
k := 0;
|
||||
for ; i < j; i++ {
|
||||
alts[k] = h.At(i).(*wordPair).alt;
|
||||
k++;
|
||||
}
|
||||
return &AltWords{canon, alts};
|
||||
}
|
||||
|
||||
|
||||
func (a *AltWords) filter(s string) *AltWords {
|
||||
if len(a.Alts) == 1 && a.Alts[0] == s {
|
||||
// there are no different alternatives
|
||||
return nil;
|
||||
}
|
||||
|
||||
// make a new AltWords with the current spelling removed
|
||||
alts := make([]string, len(a.Alts));
|
||||
i := 0;
|
||||
for _, w := range a.Alts {
|
||||
if w != s {
|
||||
alts[i] = w;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return &AltWords{a.Canon, alts[0:i]};
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Indexer
|
||||
|
||||
type IndexResult struct {
|
||||
Decls RunList; // package-level declarations (with snippets)
|
||||
Others RunList; // all other occurences
|
||||
}
|
||||
|
||||
|
||||
// An Indexer maintains the data structures and provides the machinery
|
||||
// for indexing .go files under a file tree. It implements the path.Visitor
|
||||
// interface for walking file trees, and the ast.Visitor interface for
|
||||
// walking Go ASTs.
|
||||
type Indexer struct {
|
||||
words map[string]*IndexResult; // RunLists of Spots
|
||||
snippets vector.Vector; // vector of *Snippets, indexed by snippet indices
|
||||
file *File; // current file
|
||||
decl ast.Decl; // current decl
|
||||
nspots int; // number of spots encountered
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) addSnippet(s *Snippet) int {
|
||||
index := x.snippets.Len();
|
||||
x.snippets.Push(s);
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) visitComment(c *ast.CommentGroup) {
|
||||
if c != nil {
|
||||
ast.Walk(x, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
|
||||
if id != nil {
|
||||
lists, found := x.words[id.Value];
|
||||
if !found {
|
||||
lists = new(IndexResult);
|
||||
x.words[id.Value] = lists;
|
||||
}
|
||||
|
||||
if kind == Use || x.decl == nil {
|
||||
// not a declaration or no snippet required
|
||||
info := makeSpotInfo(kind, id.Pos().Line, false);
|
||||
lists.Others.Push(Spot{x.file, info});
|
||||
} else {
|
||||
// a declaration with snippet
|
||||
index := x.addSnippet(NewSnippet(x.decl, id));
|
||||
info := makeSpotInfo(kind, index, true);
|
||||
lists.Decls.Push(Spot{x.file, info});
|
||||
}
|
||||
|
||||
x.nspots++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) visitSpec(spec ast.Spec, isVarDecl bool) {
|
||||
switch n := spec.(type) {
|
||||
case *ast.ImportSpec:
|
||||
x.visitComment(n.Doc);
|
||||
x.visitIdent(ImportDecl, n.Name);
|
||||
for _, s := range n.Path {
|
||||
ast.Walk(x, s);
|
||||
}
|
||||
x.visitComment(n.Comment);
|
||||
|
||||
case *ast.ValueSpec:
|
||||
x.visitComment(n.Doc);
|
||||
kind := ConstDecl;
|
||||
if isVarDecl {
|
||||
kind = VarDecl;
|
||||
}
|
||||
for _, n := range n.Names {
|
||||
x.visitIdent(kind, n);
|
||||
}
|
||||
ast.Walk(x, n.Type);
|
||||
for _, v := range n.Values {
|
||||
ast.Walk(x, v);
|
||||
}
|
||||
x.visitComment(n.Comment);
|
||||
|
||||
case *ast.TypeSpec:
|
||||
x.visitComment(n.Doc);
|
||||
x.visitIdent(TypeDecl, n.Name);
|
||||
ast.Walk(x, n.Type);
|
||||
x.visitComment(n.Comment);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) Visit(node interface{}) bool {
|
||||
// TODO(gri): methods in interface types are categorized as VarDecl
|
||||
switch n := node.(type) {
|
||||
case *ast.Ident:
|
||||
x.visitIdent(Use, n);
|
||||
|
||||
case *ast.Field:
|
||||
x.decl = nil; // no snippets for fields
|
||||
x.visitComment(n.Doc);
|
||||
for _, m := range n.Names {
|
||||
x.visitIdent(VarDecl, m);
|
||||
}
|
||||
ast.Walk(x, n.Type);
|
||||
for _, s := range n.Tag {
|
||||
ast.Walk(x, s);
|
||||
}
|
||||
x.visitComment(n.Comment);
|
||||
|
||||
case *ast.DeclStmt:
|
||||
if decl, ok := n.Decl.(*ast.GenDecl); ok {
|
||||
// local declarations can only be *ast.GenDecls
|
||||
x.decl = nil; // no snippets for local declarations
|
||||
x.visitComment(decl.Doc);
|
||||
for _, s := range decl.Specs {
|
||||
x.visitSpec(s, decl.Tok == token.VAR);
|
||||
}
|
||||
} else {
|
||||
// handle error case gracefully
|
||||
ast.Walk(x, n.Decl);
|
||||
}
|
||||
|
||||
case *ast.GenDecl:
|
||||
x.decl = n;
|
||||
x.visitComment(n.Doc);
|
||||
for _, s := range n.Specs {
|
||||
x.visitSpec(s, n.Tok == token.VAR);
|
||||
}
|
||||
|
||||
case *ast.FuncDecl:
|
||||
x.visitComment(n.Doc);
|
||||
kind := FuncDecl;
|
||||
if n.Recv != nil {
|
||||
kind = MethodDecl;
|
||||
ast.Walk(x, n.Recv);
|
||||
}
|
||||
x.decl = n;
|
||||
x.visitIdent(kind, n.Name);
|
||||
ast.Walk(x, n.Type);
|
||||
if n.Body != nil {
|
||||
ast.Walk(x, n.Type);
|
||||
}
|
||||
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) VisitDir(path string, d *os.Dir) bool {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) VisitFile(path string, d *os.Dir) {
|
||||
if !isGoFile(d) {
|
||||
return;
|
||||
}
|
||||
|
||||
file, err := parser.ParseFile(path, nil, parser.ParseComments);
|
||||
if err != nil {
|
||||
return; // ignore files with (parse) errors
|
||||
}
|
||||
|
||||
dir, _ := pathutil.Split(path);
|
||||
pak := Pak{dir, file.Name.Value};
|
||||
x.file = &File{path, pak};
|
||||
ast.Walk(x, file);
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Index
|
||||
|
||||
type LookupResult struct {
|
||||
Decls HitList; // package-level declarations (with snippets)
|
||||
Others HitList; // all other occurences
|
||||
}
|
||||
|
||||
|
||||
type Index struct {
|
||||
words map[string]*LookupResult; // maps words to hit lists
|
||||
alts map[string]*AltWords; // maps canonical(words) to lists of alternative spellings
|
||||
snippets []*Snippet; // all snippets, indexed by snippet index
|
||||
nspots int; // number of spots indexed (a measure of the index size)
|
||||
}
|
||||
|
||||
|
||||
func canonical(w string) string {
|
||||
return strings.ToLower(w);
|
||||
}
|
||||
|
||||
|
||||
// NewIndex creates a new index for the file tree rooted at root.
|
||||
func NewIndex(root string) *Index {
|
||||
var x Indexer;
|
||||
|
||||
// initialize Indexer
|
||||
x.words = make(map[string]*IndexResult);
|
||||
|
||||
// collect all Spots
|
||||
pathutil.Walk(root, &x, nil);
|
||||
|
||||
// for each word, reduce the RunLists into a LookupResult;
|
||||
// also collect the word with its canonical spelling in a
|
||||
// word list for later computation of alternative spellings
|
||||
words := make(map[string]*LookupResult);
|
||||
var wlist RunList;
|
||||
for w, h := range x.words {
|
||||
decls := reduce(&h.Decls);
|
||||
others := reduce(&h.Others);
|
||||
words[w] = &LookupResult{
|
||||
Decls: decls,
|
||||
Others: others,
|
||||
};
|
||||
wlist.Push(&wordPair{canonical(w), w});
|
||||
}
|
||||
|
||||
// reduce the word list {canonical(w), w} into
|
||||
// a list of AltWords runs {canonical(w), {w}}
|
||||
alist := wlist.reduce(lessWordPair, newAltWords);
|
||||
|
||||
// convert alist into a map of alternative spellings
|
||||
alts := make(map[string]*AltWords);
|
||||
for i := 0; i < alist.Len(); i++ {
|
||||
a := alist.At(i).(*AltWords);
|
||||
alts[a.Canon] = a;
|
||||
}
|
||||
|
||||
// convert snippet vector into a list
|
||||
snippets := make([]*Snippet, x.snippets.Len());
|
||||
for i := 0; i < x.snippets.Len(); i++ {
|
||||
snippets[i] = x.snippets.At(i).(*Snippet);
|
||||
}
|
||||
|
||||
return &Index{words, alts, snippets, x.nspots};
|
||||
}
|
||||
|
||||
|
||||
// Size returns the number of different words and
|
||||
// spots indexed as a measure for the index size.
|
||||
func (x *Index) Size() (nwords int, nspots int) {
|
||||
return len(x.words), x.nspots;
|
||||
}
|
||||
|
||||
|
||||
func (x *Index) LookupWord(w string) (match *LookupResult, alt *AltWords) {
|
||||
match, _ = x.words[w];
|
||||
alt, _ = x.alts[canonical(w)];
|
||||
// remove current spelling from alternatives
|
||||
// (if there is no match, the alternatives do
|
||||
// not contain the current spelling)
|
||||
if match != nil && alt != nil {
|
||||
alt = alt.filter(w);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// For a given string s, which is either a single identifier or a qualified
|
||||
// identifier, Lookup returns a LookupResult, and a list of alternative
|
||||
// spellings, if any.
|
||||
func (x *Index) Lookup(s string) (match *LookupResult, alt *AltWords) {
|
||||
ss := strings.Split(s, ".", 0);
|
||||
|
||||
switch len(ss) {
|
||||
case 1:
|
||||
match, alt = x.LookupWord(ss[0]);
|
||||
|
||||
case 2:
|
||||
pakname := ss[0];
|
||||
match, alt = x.LookupWord(ss[1]);
|
||||
if match != nil {
|
||||
// found a match - filter by package name
|
||||
decls := match.Decls.filter(pakname);
|
||||
others := match.Others.filter(pakname);
|
||||
match = &LookupResult{decls, others};
|
||||
}
|
||||
if alt != nil {
|
||||
// alternative spellings found - add package name
|
||||
// TODO(gri): At the moment this is not very smart
|
||||
// and likely will produce suggestions that have
|
||||
// no match. Should filter incorrect alternatives.
|
||||
canon := pakname + "." + alt.Canon; // for completeness (currently not used)
|
||||
alts := make([]string, len(alt.Alts));
|
||||
for i, a := range alt.Alts {
|
||||
alts[i] = pakname+"."+a;
|
||||
}
|
||||
alt = &AltWords{canon, alts};
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
func (x *Index) Snippet(i int) *Snippet {
|
||||
// handle illegal snippet indices gracefully
|
||||
if 0 <= i && i < len(x.snippets) {
|
||||
return x.snippets[i];
|
||||
}
|
||||
return nil;
|
||||
}
|
Loading…
Reference in New Issue
Block a user