mirror of
https://github.com/golang/go
synced 2024-11-21 20:54:45 -07:00
godoc: full text index for whitelisted non-Go files
R=rsc CC=golang-dev https://golang.org/cl/4029046
This commit is contained in:
parent
c2ea38ac7e
commit
4497960b2d
@ -25,7 +25,6 @@ import (
|
||||
"strings"
|
||||
"template"
|
||||
"time"
|
||||
"utf8"
|
||||
)
|
||||
|
||||
|
||||
@ -56,7 +55,7 @@ var (
|
||||
// TODO(gri) consider the invariant that goroot always end in '/'
|
||||
goroot = flag.String("goroot", runtime.GOROOT(), "Go root directory")
|
||||
testDir = flag.String("testdir", "", "Go root subdirectory - for testing only (faster startups)")
|
||||
path = flag.String("path", "", "additional package directories (colon-separated)")
|
||||
pkgPath = flag.String("path", "", "additional package directories (colon-separated)")
|
||||
filter = flag.String("filter", "", "filter file containing permitted package directory paths")
|
||||
filterMin = flag.Int("filter_minutes", 0, "filter file update interval in minutes; disabled if <= 0")
|
||||
filterDelay delayTime // actual filter update interval in minutes; usually filterDelay == filterMin, but filterDelay may back off exponentially
|
||||
@ -80,7 +79,7 @@ var (
|
||||
|
||||
|
||||
func initHandlers() {
|
||||
fsMap.Init(*path)
|
||||
fsMap.Init(*pkgPath)
|
||||
fileServer = http.FileServer(*goroot, "")
|
||||
cmdHandler = httpHandler{"/cmd/", pathutil.Join(*goroot, "src/cmd"), false}
|
||||
pkgHandler = httpHandler{"/pkg/", pathutil.Join(*goroot, "src/pkg"), true}
|
||||
@ -768,53 +767,6 @@ func redirect(w http.ResponseWriter, r *http.Request) (redirected bool) {
|
||||
}
|
||||
|
||||
|
||||
// TODO(gri): Should have a mapping from extension to handler, eventually.
|
||||
|
||||
// textExt[x] is true if the extension x indicates a text file, and false otherwise.
|
||||
var textExt = map[string]bool{
|
||||
".css": false, // must be served raw
|
||||
".js": false, // must be served raw
|
||||
}
|
||||
|
||||
|
||||
func isTextFile(path string) bool {
|
||||
// if the extension is known, use it for decision making
|
||||
if isText, found := textExt[pathutil.Ext(path)]; found {
|
||||
return isText
|
||||
}
|
||||
|
||||
// the extension is not known; read an initial chunk of
|
||||
// file and check if it looks like correct UTF-8; if it
|
||||
// does, it's probably a text file
|
||||
f, err := os.Open(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var buf [1024]byte
|
||||
n, err := f.Read(buf[0:])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
s := string(buf[0:n])
|
||||
n -= utf8.UTFMax // make sure there's enough bytes for a complete unicode char
|
||||
for i, c := range s {
|
||||
if i > n {
|
||||
break
|
||||
}
|
||||
if c == 0xFFFD || c < ' ' && c != '\n' && c != '\t' {
|
||||
// decoding error or control character - not a text file
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// likely a text file
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath, title string) {
|
||||
src, err := ioutil.ReadFile(abspath)
|
||||
if err != nil {
|
||||
@ -922,7 +874,7 @@ type PageInfoMode uint
|
||||
|
||||
const (
|
||||
exportsOnly PageInfoMode = 1 << iota // only keep exported stuff
|
||||
genDoc // generate documentation
|
||||
genDoc // generate documentation
|
||||
)
|
||||
|
||||
|
||||
|
@ -47,7 +47,7 @@ import (
|
||||
"index/suffixarray"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
pathutil "path"
|
||||
"path"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
@ -430,8 +430,9 @@ func (a *AltWords) filter(s string) *AltWords {
|
||||
// Indexer
|
||||
|
||||
// Adjust these flags as seems best.
|
||||
const excludeMainPackages = false
|
||||
const excludeTestFiles = false
|
||||
const includeNonGoFiles = true
|
||||
const includeMainPackages = true
|
||||
const includeTestFiles = true
|
||||
|
||||
|
||||
type IndexResult struct {
|
||||
@ -619,11 +620,14 @@ func pkgName(filename string) string {
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) addFile(filename string) *ast.File {
|
||||
// addFile adds a file to the index if possible and returns the file set file
|
||||
// and the file's AST if it was successfully parsed as a Go file. If addFile
|
||||
// failed (that is, if the file was not added), it returns file == nil.
|
||||
func (x *Indexer) addFile(filename string, goFile bool) (file *token.File, ast *ast.File) {
|
||||
// open file
|
||||
f, err := os.Open(filename, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return nil
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
@ -643,59 +647,126 @@ func (x *Indexer) addFile(filename string) *ast.File {
|
||||
panic("internal error - file base incorrect")
|
||||
}
|
||||
|
||||
// append file contents to x.sources
|
||||
if _, err := x.sources.ReadFrom(f); err != nil {
|
||||
x.sources.Truncate(base) // discard possibly added data
|
||||
return nil // ignore files with I/O errors
|
||||
// append file contents (src) to x.sources
|
||||
if _, err := x.sources.ReadFrom(f); err == nil {
|
||||
src := x.sources.Bytes()[base:]
|
||||
|
||||
if goFile {
|
||||
// parse the file and in the process add it to the file set
|
||||
if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
|
||||
file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
|
||||
return
|
||||
}
|
||||
// file has parse errors, and the AST may be incorrect -
|
||||
// set lines information explicitly and index as ordinary
|
||||
// text file (cannot fall through to the text case below
|
||||
// because the file has already been added to the file set
|
||||
// by the parser)
|
||||
file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
|
||||
file.SetLinesForContent(src)
|
||||
ast = nil
|
||||
return
|
||||
}
|
||||
|
||||
if isText(src) {
|
||||
// only add the file to the file set (for the full text index)
|
||||
file = x.fset.AddFile(filename, x.fset.Base(), len(src))
|
||||
file.SetLinesForContent(src)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// parse the file and in the process add it to the file set
|
||||
src := x.sources.Bytes()[base:] // no need to reread the file
|
||||
file, err := parser.ParseFile(x.fset, filename, src, parser.ParseComments)
|
||||
if err != nil {
|
||||
// do not discard the added source code in this case
|
||||
// because the file has been added to the file set and
|
||||
// the source size must match the file set base
|
||||
// TODO(gri): given a FileSet.RemoveFile() one might be
|
||||
// able to discard the data here (worthwhile?)
|
||||
return nil // ignore files with (parse) errors
|
||||
}
|
||||
// discard possibly added data
|
||||
x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
|
||||
return
|
||||
}
|
||||
|
||||
return file
|
||||
|
||||
// Design note: Using an explicit white list of permitted files for indexing
|
||||
// makes sure that the important files are included and massively reduces the
|
||||
// number of files to index. The advantage over a blacklist is that unexpected
|
||||
// (non-blacklisted) files won't suddenly explode the index.
|
||||
//
|
||||
// TODO(gri): We may want to make this list customizable, perhaps via a flag.
|
||||
|
||||
// Files are whitelisted if they have a file name or extension
|
||||
// present as key in whitelisted.
|
||||
var whitelisted = map[string]bool{
|
||||
".bash": true,
|
||||
".c": true,
|
||||
".css": true,
|
||||
".go": true,
|
||||
".goc": true,
|
||||
".h": true,
|
||||
".html": true,
|
||||
".js": true,
|
||||
".out": true,
|
||||
".py": true,
|
||||
".s": true,
|
||||
".sh": true,
|
||||
".txt": true,
|
||||
".xml": true,
|
||||
"AUTHORS": true,
|
||||
"CONTRIBUTORS": true,
|
||||
"LICENSE": true,
|
||||
"Makefile": true,
|
||||
"PATENTS": true,
|
||||
"README": true,
|
||||
}
|
||||
|
||||
|
||||
// isWhitelisted returns true if a file is on the list
|
||||
// of "permitted" files for indexing.
|
||||
func isWhitelisted(filename string) bool {
|
||||
key := path.Ext(filename)
|
||||
if key == "" {
|
||||
// file has no extension - use entire filename
|
||||
key = filename
|
||||
}
|
||||
return whitelisted[key]
|
||||
}
|
||||
|
||||
|
||||
func (x *Indexer) visitFile(dirname string, f *os.FileInfo) {
|
||||
if !isGoFile(f) {
|
||||
if !f.IsRegular() {
|
||||
return
|
||||
}
|
||||
|
||||
path := pathutil.Join(dirname, f.Name)
|
||||
if excludeTestFiles && (!isPkgFile(f) || strings.HasPrefix(path, "test/")) {
|
||||
filename := path.Join(dirname, f.Name)
|
||||
goFile := false
|
||||
|
||||
switch {
|
||||
case isGoFile(f):
|
||||
if !includeTestFiles && (!isPkgFile(f) || strings.HasPrefix(filename, "test/")) {
|
||||
return
|
||||
}
|
||||
if !includeMainPackages && pkgName(filename) == "main" {
|
||||
return
|
||||
}
|
||||
goFile = true
|
||||
|
||||
case !includeNonGoFiles || !isWhitelisted(filename):
|
||||
return
|
||||
}
|
||||
|
||||
if excludeMainPackages && pkgName(path) == "main" {
|
||||
return
|
||||
}
|
||||
|
||||
file := x.addFile(path)
|
||||
file, fast := x.addFile(filename, goFile)
|
||||
if file == nil {
|
||||
return
|
||||
return // addFile failed
|
||||
}
|
||||
|
||||
// we've got a file to index
|
||||
x.current = x.fset.File(file.Pos()) // file.Pos is in the current file
|
||||
dir, _ := pathutil.Split(path)
|
||||
pak := Pak{dir, file.Name.Name}
|
||||
x.file = &File{path, pak}
|
||||
ast.Walk(x, file)
|
||||
if fast != nil {
|
||||
// we've got a Go file to index
|
||||
x.current = file
|
||||
dir, _ := path.Split(filename)
|
||||
pak := Pak{dir, fast.Name.Name}
|
||||
x.file = &File{filename, pak}
|
||||
ast.Walk(x, fast)
|
||||
}
|
||||
|
||||
// update statistics
|
||||
// (count real file size as opposed to using the padded x.sources.Len())
|
||||
x.stats.Bytes += x.current.Size()
|
||||
x.stats.Bytes += file.Size()
|
||||
x.stats.Files++
|
||||
x.stats.Lines += x.current.LineCount()
|
||||
x.stats.Lines += file.LineCount()
|
||||
}
|
||||
|
||||
|
||||
|
@ -15,11 +15,13 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"utf8"
|
||||
)
|
||||
|
||||
|
||||
// An RWValue wraps a value and permits mutually exclusive
|
||||
// access to it and records the time the value was last set.
|
||||
//
|
||||
type RWValue struct {
|
||||
mutex sync.RWMutex
|
||||
value interface{}
|
||||
@ -107,3 +109,63 @@ func writeFileAtomically(filename string, data []byte) os.Error {
|
||||
}
|
||||
return os.Rename(f.Name(), filename)
|
||||
}
|
||||
|
||||
|
||||
// isText returns true if a significant prefix of s looks like correct UTF-8;
|
||||
// that is, if it is likely that s is human-readable text.
|
||||
//
|
||||
func isText(s []byte) bool {
|
||||
const max = 1024 // at least utf8.UTFMax
|
||||
if len(s) > max {
|
||||
s = s[0:max]
|
||||
}
|
||||
for i, c := range string(s) {
|
||||
if i+utf8.UTFMax > len(s) {
|
||||
// last char may be incomplete - ignore
|
||||
break
|
||||
}
|
||||
if c == 0xFFFD || c < ' ' && c != '\n' && c != '\t' {
|
||||
// decoding error or control character - not a text file
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
// TODO(gri): Should have a mapping from extension to handler, eventually.
|
||||
|
||||
// textExt[x] is true if the extension x indicates a text file, and false otherwise.
|
||||
var textExt = map[string]bool{
|
||||
".css": false, // must be served raw
|
||||
".js": false, // must be served raw
|
||||
}
|
||||
|
||||
|
||||
// isTextFile returns true if the file has a known extension indicating
|
||||
// a text file, or if a significant chunk of the specified file looks like
|
||||
// correct UTF-8; that is, if it is likely that the file contains human-
|
||||
// readable text.
|
||||
//
|
||||
func isTextFile(filename string) bool {
|
||||
// if the extension is known, use it for decision making
|
||||
if isText, found := textExt[pathutil.Ext(filename)]; found {
|
||||
return isText
|
||||
}
|
||||
|
||||
// the extension is not known; read an initial chunk
|
||||
// of the file and check if it looks like text
|
||||
f, err := os.Open(filename, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var buf [1024]byte
|
||||
n, err := f.Read(buf[0:])
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return isText(buf[0:n])
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user