godoc: support for regular expression full text search

Regular expressions may now be used in conjuction with full text search. Godoc will show the first 10000 occurences in the source code and highlight the respective text segments. - added new flag -testDir to specify a small directory for testing (fast index creation; default = "") - use new FormatText function to format text and Go source code in HTML, supporting multiple kinds of text selections simulatenously); this replaces the uses of go/printer Stylers - for now removed currently unused mechanism for identifier- specific JS popups (will come back in some form once we have type or other useful information) - various typo fixes and minor cleanups throughout Missing: - indexing of non-.go files R=r, r2 CC=golang-dev, rsc https://golang.org/cl/3699041
2024-11-24 05:10:19 -07:00 · 2011-01-10 15:34:29 -08:00 · 2011-01-10 15:34:29 -08:00 · 4bdaf59cc9
commit 4bdaf59cc9
parent dd916be3d8
11 changed files with 573 additions and 422 deletions
--- a/doc/all.css
+++ b/doc/all.css
@ -161,9 +161,30 @@ span.comment {
  color: #002090;
 }
 span.highlight {
-  background: #FFFF90;
+  background: #FF9900;
  font-weight: bold;
 }
+span.highlight-comment {
+  background: #FF9900;
+  font-weight: bold;
+  color: #002090;
+}
+span.selection {
+  background: #FFFF00
+}
+span.selection-comment {
+  color: #002090;
+  background: #FFFF00
+}
+span.selection-highlight {
+  background: #FF9900;
+  font-weight: bold;
+}
+span.selection-highlight-comment {
+  background: #FF9900;
+  font-weight: bold;
+  color: #002090;
+}
 span.alert {
  color: #D00000;
 }
--- a/lib/godoc/search.html
+++ b/lib/godoc/search.html
@ -4,10 +4,9 @@
 	license that can be found in the LICENSE file.
 -->

-{.section Accurate}
-{.or}
+{.section Alert}
 	<p>
-	<span class="alert" style="font-size:120%">Indexing in progress - result may be inaccurate.</span>
+	<span class="alert" style="font-size:120%">{@}</span>
 	</p>
 {.end}
 {.section Alt}
@ -27,7 +26,7 @@
 				{.repeated section Groups}
 					{.repeated section Infos}
 						<a href="/{File.Path|url-src}?h={Query|urlquery-esc}#L{@|infoLine}">{File.Path|url-src}:{@|infoLine}</a>
-						<pre>{@|infoSnippet}</pre>
+						{@|infoSnippet}
 					{.end}
 				{.end}
 			{.end}
@ -59,11 +58,11 @@
 {.end}
 {.section Textual}
 	{.section Complete}
-		<h2 id="Textual">{Found|html-esc} textual occurences</h2>
+		<h2 id="Textual">{Found|html-esc} textual occurrences</h2>
 	{.or}
-		<h2 id="Textual">More than {Found|html-esc} textual occurences</h2>
+		<h2 id="Textual">More than {Found|html-esc} textual occurrences</h2>
 		<p>
-		<span class="alert" style="font-size:120%">Not all files or lines containing {Query|html-esc} are shown.</span>
+		<span class="alert" style="font-size:120%">Not all files or lines containing "{Query|html-esc}" are shown.</span>
 		</p>
 	{.end}
 	<p>
@ -71,12 +70,16 @@
 	{.repeated section @}
 		<tr>
 		<td align="left" valign="top">
-		<a href="/{Filename|url-src}?g={Query|urlquery-esc}">{Filename|url-src}</a>:
+		<a href="/{Filename|url-src}?h={Query|urlquery-esc}">{Filename|url-src}</a>:
 		</td>
 		<td align="left" width="4"></td>
 		<th align="left" valign="top">{Lines|numlines}</th>
 		<td align="left" width="4"></td>
-		<td align="left">{Lines Complete|linelist}</td>
+		<td align="left">
+		{.repeated section Lines}
+			<a href="/{Filename|url-src}?h={Query|urlquery-esc}#L{@|html-esc}">{@|html-esc}</a>
+		{.end}
+		</td>
 		</tr>
 	{.end}
 	{.section Complete}
--- a/lib/godoc/search.txt
+++ b/lib/godoc/search.txt
@ -1,9 +1,8 @@
 QUERY
 	{Query}

-{.section Accurate}
-{.or}
-INDEXING IN PROGRESS - RESULT MAY BE INACCURATE
+{.section Alert}
+{@}

 {.end}
 {.section Alt}
--- a/lib/godoc/source.html
+++ b/lib/godoc/source.html
@ -1,20 +0,0 @@
-<!--
-	Copyright 2009 The Go Authors. All rights reserved.
-	Use of this source code is governed by a BSD-style
-	license that can be found in the LICENSE file.
-->
-
-<script src="http://www.google.com/jsapi"></script>
-<script src="/doc/popups.js"></script>
-<script>
-{# IdList is HTML-escaped by godoc}
-var popup_data = {IdList}
-
-google.load("jquery", "1");
-google.setOnLoadCallback(function() {.meta-left}
-        godocs_bindPopups(popup_data);
-{.meta-right});
-</script>
-
-{# Source is HTML-escaped by godoc}
-<pre>{Source}</pre>
--- a/src/cmd/godoc/Makefile
+++ b/src/cmd/godoc/Makefile
@ -8,6 +8,7 @@ TARG=godoc
 GOFILES=\
 	codewalk.go\
 	dirtrees.go\
+	format.go\
 	godoc.go\
 	index.go\
 	main.go\
--- a/src/cmd/godoc/doc.go
+++ b/src/cmd/godoc/doc.go
@ -48,7 +48,7 @@ The flags are:
 	-timestamps=true
 		show timestamps with directory listings
 	-fulltext=false
-		build full text index for string search results
+		build full text index for regular expression queries
 	-path=""
 		additional package directories (colon-separated)
 	-html
--- a/src/cmd/godoc/format.go
+++ b/src/cmd/godoc/format.go
@ -0,0 +1,342 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements FormatSelections and FormatText.
+// FormatText is used to HTML-format Go and non-Go source
+// text with line numbers and highlighted sections. It is
+// built on top of FormatSelections, a generic formatter
+// for "selected" text.
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/scanner"
+	"go/token"
+	"io"
+	"regexp"
+	"strconv"
+	"template"
+)
+
+
+// ----------------------------------------------------------------------------
+// Implementation of FormatSelections
+
+// A Selection is a function returning offset pairs []int{a, b}
+// describing consecutive non-overlapping text segments [a, b).
+// If there are no more segments, a Selection must return nil.
+//
+// TODO It's more efficient to return a pair (a, b int) instead
+//      of creating lots of slices. Need to determine how to
+//      indicate the end of a Selection.
+//
+type Selection func() []int
+
+
+// A LinkWriter writes some start or end "tag" to w for the text offset offs.
+// It is called by FormatSelections at the start or end of each link segment.
+//
+type LinkWriter func(w io.Writer, offs int, start bool)
+
+
+// A SegmentWriter formats a text according to selections and writes it to w.
+// The selections parameter is a bit set indicating which selections provided
+// to FormatSelections overlap with the text segment: If the n'th bit is set
+// in selections, the n'th selection provided to FormatSelections is overlapping
+// with the text.
+//
+type SegmentWriter func(w io.Writer, text []byte, selections int)
+
+
+// FormatSelections takes a text and writes it to w using link and segment
+// writers lw and sw as follows: lw is invoked for consecutive segment starts
+// and ends as specified through the links selection, and sw is invoked for
+// consecutive segments of text overlapped by the same selections as specified
+// by selections. The link writer lw may be nil, in which case the links
+// Selection is ignored.
+//
+func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
+	if lw != nil {
+		selections = append(selections, links)
+	}
+	// compute the sequence of consecutive segment changes
+	changes := newMerger(selections)
+	// The i'th bit in bitset indicates that the text
+	// at the current offset is covered by selections[i].
+	bitset := 0
+	lastOffs := 0
+	for {
+		// get the next segment change
+		index, offs, start := changes.next()
+		if index < 0 || offs > len(text) {
+			// no more segment changes or the next change
+			// is past the end of the text - we're done
+			break
+		}
+		// determine the kind of segment change
+		if index == len(selections)-1 {
+			// we have a link segment change:
+			// format the previous selection segment, write the
+			// link tag and start a new selection segment
+			sw(w, text[lastOffs:offs], bitset)
+			lastOffs = offs
+			lw(w, offs, start)
+		} else {
+			// we have a selection change:
+			// format the previous selection segment, determine
+			// the new selection bitset and start a new segment 
+			sw(w, text[lastOffs:offs], bitset)
+			lastOffs = offs
+			mask := 1 << uint(index)
+			if start {
+				bitset |= mask
+			} else {
+				bitset &^= mask
+			}
+		}
+	}
+	sw(w, text[lastOffs:], bitset)
+}
+
+
+// A merger merges a slice of Selections and produces a sequence of
+// consecutive segment change events through repeated next() calls.
+//
+type merger struct {
+	selections []Selection
+	segments   [][]int // segments[i] is the next segment of selections[i]
+}
+
+
+const infinity int = 2e9
+
+func newMerger(selections []Selection) *merger {
+	segments := make([][]int, len(selections))
+	for i, sel := range selections {
+		segments[i] = []int{infinity, infinity}
+		if sel != nil {
+			if seg := sel(); seg != nil {
+				segments[i] = seg
+			}
+		}
+	}
+	return &merger{selections, segments}
+}
+
+
+// next returns the next segment change: index specifies the Selection
+// to which the segment belongs, offs is the segment start or end offset
+// as determined by the start value. If there are no more segment changes,
+// next returns an index value < 0.
+//
+func (m *merger) next() (index, offs int, start bool) {
+	// find the next smallest offset where a segment starts or ends
+	offs = infinity
+	index = -1
+	for i, seg := range m.segments {
+		switch {
+		case seg[0] < offs:
+			offs = seg[0]
+			index = i
+			start = true
+		case seg[1] < offs:
+			offs = seg[1]
+			index = i
+			start = false
+		}
+	}
+	if index < 0 {
+		// no offset found => all selections merged
+		return
+	}
+	// offset found - it's either the start or end offset but
+	// either way it is ok to consume the start offset: set it
+	// to infinity so it won't be considered in the following
+	// next call
+	m.segments[index][0] = infinity
+	if start {
+		return
+	}
+	// end offset found - consume it
+	m.segments[index][1] = infinity
+	// advance to the next segment for that selection
+	seg := m.selections[index]()
+	if seg == nil {
+		return
+	}
+	m.segments[index] = seg
+	return
+}
+
+
+// ----------------------------------------------------------------------------
+// Implementation of FormatText
+
+// lineSelection returns the line segments for text as a Selection.
+func lineSelection(text []byte) Selection {
+	i, j := 0, 0
+	return func() (seg []int) {
+		// find next newline, if any
+		for j < len(text) {
+			j++
+			if text[j-1] == '\n' {
+				break
+			}
+		}
+		if i < j {
+			// text[i:j] constitutes a line
+			seg = []int{i, j}
+			i = j
+		}
+		return
+	}
+}
+
+
+// commentSelection returns the sequence of consecutive comments
+// in the Go src text as a Selection.
+//
+func commentSelection(src []byte) Selection {
+	var s scanner.Scanner
+	file := s.Init(token.NewFileSet(), "", src, nil, scanner.ScanComments+scanner.InsertSemis)
+	return func() (seg []int) {
+		for {
+			pos, tok, lit := s.Scan()
+			if tok == token.EOF {
+				break
+			}
+			offs := file.Offset(pos)
+			if tok == token.COMMENT {
+				seg = []int{offs, offs + len(lit)}
+				break
+			}
+		}
+		return
+	}
+}
+
+
+// makeSelection is a helper function to make a Selection from a slice of pairs.
+func makeSelection(matches [][]int) Selection {
+	return func() (seg []int) {
+		if len(matches) > 0 {
+			seg = matches[0]
+			matches = matches[1:]
+		}
+		return
+	}
+}
+
+
+// regexpSelection computes the Selection for the regular expression expr in text.
+func regexpSelection(text []byte, expr string) Selection {
+	var matches [][]int
+	if rx, err := regexp.Compile(expr); err == nil {
+		matches = rx.FindAllIndex(text, -1)
+	}
+	return makeSelection(matches)
+}
+
+
+var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
+
+// rangeSelection computes the Selection for a text range described
+// by the argument str; the range description must match the selRx
+// regular expression.
+//
+func rangeSelection(str string) Selection {
+	m := selRx.FindStringSubmatch(str)
+	if len(m) >= 2 {
+		from, _ := strconv.Atoi(m[1])
+		to, _ := strconv.Atoi(m[2])
+		if from < to {
+			return makeSelection([][]int{[]int{from, to}})
+		}
+	}
+	return nil
+}
+
+
+// Span tags for all the possible selection combinations that may
+// be generated by FormatText. Selections are indicated by a bitset,
+// and the value of the bitset specifies the tag to be used.
+//
+// bit 0: comments
+// bit 1: highlights
+// bit 2: selections
+//
+var startTags = [][]byte{
+	/* 000 */ []byte(``),
+	/* 001 */ []byte(`<span class ="comment">`),
+	/* 010 */ []byte(`<span class="highlight">`),
+	/* 011 */ []byte(`<span class="highlight-comment">`),
+	/* 100 */ []byte(`<span class="selection">`),
+	/* 101 */ []byte(`<span class="selection-comment">`),
+	/* 110 */ []byte(`<span class="selection-highlight">`),
+	/* 111 */ []byte(`<span class="selection-highlight-comment">`),
+}
+
+var endTag = []byte(`</span>`)
+
+
+func selectionTag(w io.Writer, text []byte, selections int) {
+	if len(text) > 0 {
+		if selections < len(startTags) {
+			if tag := startTags[selections]; len(tag) > 0 {
+				w.Write(tag)
+				template.HTMLEscape(w, text)
+				w.Write(endTag)
+				return
+			}
+		}
+		template.HTMLEscape(w, text)
+	}
+}
+
+
+// FormatText HTML-escapes text and returns it wrapped in <pre> tags.
+// Conscutive text segments are wrapped in HTML spans (with tags as
+// defined by startTags and endTag) as follows:
+//
+//	- if line >= 0, line numbers are printed before each line, starting
+//	  with the value of line
+//	- if the text is Go source, comments get the "comment" span class
+//	- each occurrence of the regular expression pattern gets the "highlight"
+//	  span class
+//	- text segments covered by selection get the "selection" span class
+//
+// Comments, highlights, and selections may overlap arbitrarily; the respective
+// HTML span classes are specified in the startTags variable.
+//
+func FormatText(text []byte, line int, goSource bool, pattern string, selection Selection) []byte {
+	var buf bytes.Buffer
+	buf.WriteString("<pre>\n")
+
+	var comments, highlights Selection
+	if goSource {
+		comments = commentSelection(text)
+	}
+	if pattern != "" {
+		highlights = regexpSelection(text, pattern)
+	}
+	if comments != nil || highlights != nil || selection != nil {
+		var lineTag LinkWriter
+		if line >= 0 {
+			lineTag = func(w io.Writer, _ int, start bool) {
+				if start {
+					fmt.Fprintf(w, "<a id=\"L%d\"></a>%5d\t", line, line)
+					line++
+				}
+			}
+		}
+		FormatSelections(&buf, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
+	} else {
+		template.HTMLEscape(&buf, text)
+	}
+
+	buf.WriteString("</pre>\n")
+	return buf.Bytes()
+}
--- a/src/cmd/godoc/godoc.go
+++ b/src/cmd/godoc/godoc.go
@ -55,6 +55,7 @@ var (
 	// file system roots
 	// TODO(gri) consider the invariant that goroot always end in '/'
 	goroot      = flag.String("goroot", runtime.GOROOT(), "Go root directory")
+	testDir     = flag.String("testdir", "", "Go root subdirectory - for testing only (faster startups)")
 	path        = flag.String("path", "", "additional package directories (colon-separated)")
 	filter      = flag.String("filter", "", "filter file containing permitted package directory paths")
 	filterMin   = flag.Int("filter_minutes", 0, "filter file update interval in minutes; disabled if <= 0")
@ -63,7 +64,7 @@ var (
 	// layout control
 	tabwidth       = flag.Int("tabwidth", 4, "tab width")
 	showTimestamps = flag.Bool("timestamps", true, "show timestamps with directory listings")
-	fulltextIndex  = flag.Bool("fulltext", false, "build full text index for string search results")
+	fulltextIndex  = flag.Bool("fulltext", false, "build full text index for regular expression queries")

 	// file system mapping
 	fsMap      Mapping // user-defined mapping
@ -95,6 +96,12 @@ func registerPublicHandlers(mux *http.ServeMux) {
 }


+func initFSTree() {
+	fsTree.set(newDirectory(pathutil.Join(*goroot, *testDir), nil, -1))
+	invalidateIndex()
+}
+
+
 // ----------------------------------------------------------------------------
 // Directory filters

@ -265,181 +272,6 @@ func relativePath(path string) string {
 }


-// ----------------------------------------------------------------------------
-// HTML formatting support
-
-// aposescaper implements an io.Writer that escapes single quotes:
-// ' is written as \' . It is used to escape text such that it can
-// be used as the content of single-quoted string literals.
-type aposescaper struct {
-	w io.Writer
-}
-
-
-func (e *aposescaper) Write(p []byte) (n int, err os.Error) {
-	backslash := []byte{'\\'}
-	var i, m int
-	for j, b := range p {
-		if b == '\'' {
-			m, err = e.w.Write(p[i:j])
-			n += m
-			if err != nil {
-				return
-			}
-			_, err = e.w.Write(backslash)
-			if err != nil {
-				return
-			}
-			i = j
-		}
-	}
-	m, err = e.w.Write(p[i:])
-	n += m
-	return
-}
-
-
-// Styler implements a printer.Styler.
-type Styler struct {
-	linetags  bool
-	highlight string
-	objmap    map[*ast.Object]int
-	idcount   int
-}
-
-
-func newStyler(highlight string) *Styler {
-	return &Styler{true, highlight, make(map[*ast.Object]int), 0}
-}
-
-
-// identId returns a number >= 0 identifying the *ast.Object
-// denoted by name. If no object is denoted, the result is < 0.
-//
-// TODO(gri): Consider making this a mapping from popup info
-//            (for that name) to id, instead of *ast.Object
-//            to id. If a lot of the popup info is the same
-//            (e.g. type information), this will reduce the
-//            size of the html generated.
-func (s *Styler) identId(name *ast.Ident) int {
-	obj := name.Obj
-	if obj == nil || s.objmap == nil {
-		return -1
-	}
-	id, found := s.objmap[obj]
-	if !found {
-		// first occurence
-		id = s.idcount
-		s.objmap[obj] = id
-		s.idcount++
-	}
-	return id
-}
-
-
-// writeObjInfo writes the popup info corresponding to obj to w.
-// The text is HTML-escaped and does not contain single quotes.
-func writeObjInfo(w io.Writer, fset *token.FileSet, obj *ast.Object) {
-	// for now, show object kind and name; eventually
-	// do something more interesting (show declaration,
-	// for instance)
-	if obj.Kind != ast.Bad {
-		fmt.Fprintf(w, "%s ", obj.Kind)
-	}
-	template.HTMLEscape(w, []byte(obj.Name))
-	// show type if we know it
-	if obj.Type != nil && obj.Type.Expr != nil {
-		fmt.Fprint(w, " ")
-		writeNode(&aposescaper{w}, fset, obj.Type.Expr, true, &defaultStyler)
-	}
-}
-
-
-// idList returns a Javascript array (source) with identifier popup
-// information: The i'th array entry is a single-quoted string with
-// the popup information for an identifier x with s.identId(x) == i,
-// for 0 <= i < s.idcount.
-func (s *Styler) idList(fset *token.FileSet) []byte {
-	var buf bytes.Buffer
-	buf.WriteString("[\n")
-
-	if s.idcount > 0 {
-		// invert objmap: create an array [id]obj from map[obj]id
-		a := make([]*ast.Object, s.idcount)
-		for obj, id := range s.objmap {
-			a[id] = obj
-		}
-
-		// for each id, print object info as single-quoted Javascript string
-		for id, obj := range a {
-			printIndex := false // enable for debugging (but longer html)
-			if printIndex {
-				fmt.Fprintf(&buf, "/* %4d */ ", id)
-			}
-			fmt.Fprint(&buf, "'")
-			writeObjInfo(&buf, fset, obj)
-			fmt.Fprint(&buf, "',\n")
-		}
-	}
-
-	buf.WriteString("]\n")
-	return buf.Bytes()
-}
-
-
-// Use the defaultStyler when there is no specific styler.
-// The defaultStyler does not emit line tags since they may
-// interfere with tags emitted by templates.
-// TODO(gri): Should emit line tags at the beginning of a line;
-//            never in the middle of code.
-var defaultStyler Styler
-
-
-func (s *Styler) LineTag(line int) (text []byte, tag printer.HTMLTag) {
-	if s.linetags {
-		tag = printer.HTMLTag{fmt.Sprintf(`<a id="L%d">`, line), "</a>"}
-	}
-	return
-}
-
-
-func (s *Styler) Comment(c *ast.Comment, line []byte) (text []byte, tag printer.HTMLTag) {
-	text = line
-	// minimal syntax-coloring of comments for now - people will want more
-	// (don't do anything more until there's a button to turn it on/off)
-	tag = printer.HTMLTag{`<span class="comment">`, "</span>"}
-	return
-}
-
-
-func (s *Styler) BasicLit(x *ast.BasicLit) (text []byte, tag printer.HTMLTag) {
-	text = x.Value
-	return
-}
-
-
-func (s *Styler) Ident(name *ast.Ident) (text []byte, tag printer.HTMLTag) {
-	text = []byte(name.Name)
-	var str string
-	if id := s.identId(name); id >= 0 {
-		str = fmt.Sprintf(` id="%d"`, id)
-	}
-	if s.highlight == name.Name {
-		str += ` class="highlight"`
-	}
-	if str != "" {
-		tag = printer.HTMLTag{"<span" + str + ">", "</span>"}
-	}
-	return
-}
-
-
-func (s *Styler) Token(tok token.Token) (text []byte, tag printer.HTMLTag) {
-	text = []byte(tok.String())
-	return
-}
-
-
 // ----------------------------------------------------------------------------
 // Tab conversion

@ -516,7 +348,7 @@ func (p *tconv) Write(data []byte) (n int, err os.Error) {
 // Templates

 // Write an AST-node to w; optionally html-escaped.
-func writeNode(w io.Writer, fset *token.FileSet, node interface{}, html bool, styler printer.Styler) {
+func writeNode(w io.Writer, fset *token.FileSet, node interface{}, html bool) {
 	mode := printer.TabIndent | printer.UseSpaces
 	if html {
 		mode |= printer.GenHTML
@ -525,7 +357,7 @@ func writeNode(w io.Writer, fset *token.FileSet, node interface{}, html bool, st
 	// to ensure a good outcome in most browsers (there may still
 	// be tabs in comments and strings, but converting those into
 	// the right number of spaces is much harder)
-	(&printer.Config{mode, *tabwidth, styler}).Fprint(&tconv{output: w}, fset, node)
+	(&printer.Config{mode, *tabwidth, nil}).Fprint(&tconv{output: w}, fset, node)
 }


@ -547,7 +379,7 @@ func writeAny(w io.Writer, fset *token.FileSet, html bool, x interface{}) {
 	case string:
 		writeText(w, []byte(v), html)
 	case ast.Decl, ast.Expr, ast.Stmt, *ast.File:
-		writeNode(w, fset, x, html, &defaultStyler)
+		writeNode(w, fset, x, html)
 	default:
 		if html {
 			var buf bytes.Buffer
@ -612,20 +444,26 @@ func urlQueryEscFmt(w io.Writer, format string, x ...interface{}) {
 func urlFmt(w io.Writer, format string, x ...interface{}) {
 	var path string
 	var line int
+	var low, high int // selection

 	// determine path and position info, if any
 	type positioner interface {
 		Pos() token.Pos
+		End() token.Pos
 	}
 	switch t := x[0].(type) {
 	case string:
 		path = t
 	case positioner:
-		pos := t.Pos()
-		if pos.IsValid() {
-			pos := fileset(x).Position(pos)
+		fset := fileset(x)
+		if p := t.Pos(); p.IsValid() {
+			pos := fset.Position(p)
 			path = pos.Filename
 			line = pos.Line
+			low = pos.Offset
+		}
+		if p := t.End(); p.IsValid() {
+			high = fset.Position(p).Offset
 		}
 	default:
 		// we should never reach here, but be resilient
@ -655,11 +493,23 @@ func urlFmt(w io.Writer, format string, x ...interface{}) {
 	case "url-src":
 		template.HTMLEscape(w, []byte(relpath))
 	case "url-pos":
+		template.HTMLEscape(w, []byte(relpath))
+		// selection ranges are of form "s=low:high"
+		if low < high {
+			fmt.Fprintf(w, "?s=%d:%d", low, high)
+			// if we have a selection, position the page
+			// such that the selection is a bit below the top
+			line -= 10
+			if line < 1 {
+				line = 1
+			}
+		}
 		// line id's in html-printed source are of the
 		// form "L%d" where %d stands for the line number
-		template.HTMLEscape(w, []byte(relpath))
+		if line > 0 {
 			fmt.Fprintf(w, "#L%d", line)
 		}
+	}
 }


@ -705,14 +555,14 @@ func infoLineFmt(w io.Writer, format string, x ...interface{}) {
 // Template formatter for "infoSnippet" format.
 func infoSnippetFmt(w io.Writer, format string, x ...interface{}) {
 	info := x[0].(SpotInfo)
-	text := `<span class="alert">no snippet text available</span>`
+	text := []byte(`<span class="alert">no snippet text available</span>`)
 	if info.IsIndex() {
 		index, _ := searchIndex.get()
 		// no escaping of snippet text needed;
 		// snippet text is escaped when generated
 		text = index.(*Index).Snippet(info.Lori()).Text
 	}
-	fmt.Fprint(w, text)
+	w.Write(text)
 }


@ -752,27 +602,6 @@ func numlinesFmt(w io.Writer, format string, x ...interface{}) {
 }


-// Template formatter for "linelist" format.
-func linelistFmt(w io.Writer, format string, x ...interface{}) {
-	list := x[0].([]int)
-	complete := x[1].(bool)
-
-	const max = 100 // show at most this many lines
-	if len(list) > max {
-		list = list[0:max]
-		complete = false
-	}
-	sort.SortInts(list)
-
-	for _, line := range list {
-		fmt.Fprintf(w, " %d", line)
-	}
-	if !complete {
-		fmt.Fprintf(w, " ...")
-	}
-}
-
-
 var fmap = template.FormatterMap{
 	"":             textFmt,
 	"html":         htmlFmt,
@ -790,7 +619,6 @@ var fmap = template.FormatterMap{
 	"dir/":         dirslashFmt,
 	"localname":    localnameFmt,
 	"numlines":     numlinesFmt,
-	"linelist":     linelistFmt,
 }


@ -817,8 +645,7 @@ var (
 	packageHTML,
 	packageText,
 	searchHTML,
-	searchText,
-	sourceHTML *template.Template
+	searchText *template.Template
 )

 func readTemplates() {
@ -832,7 +659,6 @@ func readTemplates() {
 	packageText = readTemplate("package.txt")
 	searchHTML = readTemplate("search.html")
 	searchText = readTemplate("search.txt")
-	sourceHTML = readTemplate("source.html")
 }


@ -933,34 +759,6 @@ func applyTemplate(t *template.Template, name string, data interface{}) []byte {
 }


-func serveGoSource(w http.ResponseWriter, r *http.Request, abspath, relpath string) {
-	fset := token.NewFileSet()
-	file, err := parser.ParseFile(fset, abspath, nil, parser.ParseComments)
-	if err != nil {
-		log.Printf("parser.ParseFile: %s", err)
-		serveError(w, r, relpath, err)
-		return
-	}
-
-	// TODO(gri) enable once we are confident it works for all files
-	// augment AST with types; ignore errors (partial type information ok)
-	// typechecker.CheckFile(file, nil)
-
-	var buf bytes.Buffer
-	styler := newStyler(r.FormValue("h"))
-	writeNode(&buf, fset, file, true, styler)
-
-	type SourceInfo struct {
-		IdList []byte
-		Source []byte
-	}
-	info := &SourceInfo{styler.idList(fset), buf.Bytes()}
-
-	contents := applyTemplate(sourceHTML, "sourceHTML", info)
-	servePage(w, "Source file "+relpath, "", "", contents)
-}
-
-
 func redirect(w http.ResponseWriter, r *http.Request) (redirected bool) {
 	if canonical := pathutil.Clean(r.URL.Path) + "/"; r.URL.Path != canonical {
 		http.Redirect(w, r, canonical, http.StatusMovedPermanently)
@ -1017,36 +815,7 @@ func isTextFile(path string) bool {
 }


-// HTMLSubst replaces all occurences of f in s with r and HTML-escapes
-// everything else in s (but not r). The result is written to w.
-//
-func HTMLSubst(w io.Writer, s, f, r []byte) {
-	for {
-		i := bytes.Index(s, f)
-		if i < 0 {
-			break
-		}
-		template.HTMLEscape(w, s[0:i])
-		w.Write(r)
-		s = s[i+len(f):]
-	}
-	template.HTMLEscape(w, s)
-}
-
-
-// highlight highlights all occurrences of h in s and writes the
-// HTML-escaped result to w.
-//
-func highlight(w io.Writer, s, h []byte) {
-	var r bytes.Buffer
-	r.WriteString(`<span class="highlight">`)
-	template.HTMLEscape(&r, h)
-	r.WriteString(`</span>`)
-	HTMLSubst(w, s, h, r.Bytes())
-}
-
-
-func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath string) {
+func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath, title string) {
 	src, err := ioutil.ReadFile(abspath)
 	if err != nil {
 		log.Printf("ioutil.ReadFile: %s", err)
@ -1054,17 +823,8 @@ func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath stri
 		return
 	}

-	var buf bytes.Buffer
-	buf.WriteString("<pre>\n")
-	g := r.FormValue("g")
-	if g != "" {
-		highlight(&buf, src, []byte(g))
-	} else {
-		template.HTMLEscape(&buf, src)
-	}
-	buf.WriteString("</pre>\n")
-
-	servePage(w, "Text file "+relpath, "", "", buf.Bytes())
+	contents := FormatText(src, 1, pathutil.Ext(abspath) == ".go", r.FormValue("h"), rangeSelection(r.FormValue("s")))
+	servePage(w, title+" "+relpath, "", "", contents)
 }


@ -1119,11 +879,7 @@ func serveFile(w http.ResponseWriter, r *http.Request) {
 		return

 	case ".go":
-		if r.FormValue("g") != "" {
-			serveTextFile(w, r, abspath, relpath)
-			return
-		}
-		serveGoSource(w, r, abspath, relpath)
+		serveTextFile(w, r, abspath, relpath, "Source file")
 		return
 	}

@ -1147,7 +903,7 @@ func serveFile(w http.ResponseWriter, r *http.Request) {
 	}

 	if isTextFile(abspath) {
-		serveTextFile(w, r, abspath, relpath)
+		serveTextFile(w, r, abspath, relpath, "Text file")
 		return
 	}

@ -1195,7 +951,7 @@ type httpHandler struct {
 // computed (PageInfo.PAst), otherwise package documentation (PageInfo.Doc)
 // is extracted from the AST. If there is no corresponding package in the
 // directory, PageInfo.PAst and PageInfo.PDoc are nil. If there are no sub-
-// directories, PageInfo.Dirs is nil. If a directory read error occured,
+// directories, PageInfo.Dirs is nil. If a directory read error occurred,
 // PageInfo.Err is set to the respective error but the error is not logged.
 //
 func (h *httpHandler) getPageInfo(abspath, relpath, pkgname string, mode PageInfoMode) PageInfo {
@ -1387,25 +1143,56 @@ var searchIndex RWValue

 type SearchResult struct {
 	Query string
-	Hit      *LookupResult // identifier occurences of Query
+	Alert string // error or warning message
+
+	// identifier matches
+	Hit *LookupResult // identifier matches of Query
 	Alt *AltWords     // alternative identifiers to look for
-	Found    int           // number of textual occurences found
-	Textual  []Positions   // textual occurences of Query
-	Complete bool          // true if all textual occurences of Query are reported
-	Accurate bool          // true if the index is not older than the indexed files
+
+	// textual matches
+	Found    int         // number of textual occurrences found
+	Textual  []FileLines // textual matches of Query
+	Complete bool        // true if all textual occurrences of Query are reported
 }


 func lookup(query string) (result SearchResult) {
 	result.Query = query
+
+	// determine identifier lookup string and full text regexp
+	lookupStr := ""
+	lookupRx, err := regexp.Compile(query)
+	if err != nil {
+		result.Alert = "Error in query regular expression: " + err.String()
+		return
+	}
+	if prefix, complete := lookupRx.LiteralPrefix(); complete {
+		// otherwise we lookup "" (with no result) because
+		// identifier lookup doesn't support regexp search
+		lookupStr = prefix
+	}
+
 	if index, timestamp := searchIndex.get(); index != nil {
+		// identifier search
 		index := index.(*Index)
-		result.Hit, result.Alt, _ = index.Lookup(query)
+		result.Hit, result.Alt, err = index.Lookup(lookupStr)
+		if err != nil && !*fulltextIndex {
+			// ignore the error if there is full text search
+			// since it accepts that query regular expression
+			result.Alert = "Error in query string: " + err.String()
+			return
+		}
+
+		// textual search
 		// TODO(gri) should max be a flag?
-		const max = 5000 // show at most this many fulltext results
-		result.Found, result.Textual, result.Complete = index.LookupString(query, max)
-		_, ts := fsModified.get()
-		result.Accurate = timestamp >= ts
+		const max = 10000 // show at most this many fulltext results
+		result.Found, result.Textual = index.LookupRegexp(lookupRx, max+1)
+		result.Complete = result.Found <= max
+
+		// is the result accurate?
+		if _, ts := fsModified.get(); timestamp < ts {
+			result.Alert = "Indexing in progress: result may be inaccurate"
+		}
 	}
 	return
 }
@ -1500,10 +1287,15 @@ func indexer() {
 				log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
 					secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
 			}
-			log.Printf("before GC: bytes = %d footprint = %d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
+			log.Printf("before GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
 			runtime.GC()
-			log.Printf("after  GC: bytes = %d footprint = %d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
+			log.Printf("after  GC: bytes = %d footprint = %d", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
 		}
-		time.Sleep(1 * 60e9) // try once a minute
+		var delay int64 = 60 * 1e9 // by default, try every 60s
+		if *testDir != "" {
+			// in test mode, try once a second for fast startup
+			delay = 1 * 1e9
+		}
+		time.Sleep(delay)
 	}
 }
--- a/src/cmd/godoc/index.go
+++ b/src/cmd/godoc/index.go
@ -7,7 +7,7 @@
 //
 // Algorithm for identifier index:
 // - traverse all .go files of the file tree specified by root
-// - for each word (identifier) encountered, collect all occurences (spots)
+// - for each word (identifier) encountered, collect all occurrences (spots)
 //   into a list; this produces a list of spots for each word
 // - reduce the lists: from a list of spots to a list of FileRuns,
 //   and from a list of FileRuns into a list of PakRuns
@ -48,6 +48,7 @@ import (
 	"io/ioutil"
 	"os"
 	pathutil "path"
+	"regexp"
 	"sort"
 	"strings"
 )
@ -247,7 +248,7 @@ type File struct {
 }


-// A Spot describes a single occurence of a word.
+// A Spot describes a single occurrence of a word.
 type Spot struct {
 	File *File
 	Info SpotInfo
@ -435,7 +436,7 @@ const excludeTestFiles = false

 type IndexResult struct {
 	Decls  RunList // package-level declarations (with snippets)
-	Others RunList // all other occurences
+	Others RunList // all other occurrences
 }


@ -445,7 +446,7 @@ type Statistics struct {
 	Files int // number of indexed source files
 	Lines int // number of lines (all files)
 	Words int // number of different identifiers
-	Spots int // number of identifier occurences
+	Spots int // number of identifier occurrences
 }


@ -709,7 +710,7 @@ func (x *Indexer) visitFile(dirname string, f *os.FileInfo) {

 type LookupResult struct {
 	Decls  HitList // package-level declarations (with snippets)
-	Others HitList // all other occurences
+	Others HitList // all other occurrences
 }


@ -833,14 +834,14 @@ func isIdentifier(s string) bool {

 // For a given query, which is either a single identifier or a qualified
 // identifier, Lookup returns a LookupResult, and a list of alternative
-// spellings, if any. If the query syntax is wrong, illegal is set.
-func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, illegal bool) {
+// spellings, if any. If the query syntax is wrong, an error is reported.
+func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, err os.Error) {
 	ss := strings.Split(query, ".", -1)

 	// check query syntax
 	for _, s := range ss {
 		if !isIdentifier(s) {
-			illegal = true
+			err = os.NewError("all query parts must be identifiers")
 			return
 		}
 	}
@ -860,7 +861,7 @@ func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, illega
 		}

 	default:
-		illegal = true
+		err = os.NewError("query is not a (qualified) identifier")
 	}

 	return
@ -886,60 +887,91 @@ func (list positionList) Less(i, j int) bool { return list[i].filename < list[j]
 func (list positionList) Swap(i, j int)      { list[i], list[j] = list[j], list[i] }


-// A Positions value specifies a file and line numbers within that file.
-type Positions struct {
+// unique returns the list sorted and with duplicate entries removed
+func unique(list []int) []int {
+	sort.SortInts(list)
+	var last int
+	i := 0
+	for _, x := range list {
+		if i == 0 || x != last {
+			last = x
+			list[i] = x
+			i++
+		}
+	}
+	return list[0:i]
+}
+
+
+// A FileLines value specifies a file and line numbers within that file.
+type FileLines struct {
 	Filename string
 	Lines    []int
 }


-// LookupString returns the number and list of positions where a string
-// s is found in the full text index and whether the result is complete
-// or not. At most n positions (filename and line) are returned (and thus
-// found <= n). The result is incomplete if the index is not present or
-// if there are more than n occurrences of s.
+// LookupRegexp returns the number of matches and the matches where a regular
+// expression r is found in the full text index. At most n matches are
+// returned (thus found <= n).
 //
-func (x *Index) LookupString(s string, n int) (found int, result []Positions, complete bool) {
-	if x.suffixes == nil {
+func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
+	if x.suffixes == nil || n <= 0 {
 		return
 	}
+	// n > 0

-	offsets := x.suffixes.Lookup([]byte(s), n+1)
-	if len(offsets) <= n {
-		complete = true
-	} else {
-		offsets = offsets[0:n]
+	var list positionList
+	// FindAllIndex may returns matches that span across file boundaries.
+	// Such matches are unlikely, buf after eliminating them we may end up
+	// with fewer than n matches. If we don't have enough at the end, redo
+	// the search with an increased value n1, but only if FindAllIndex
+	// returned all the requested matches in the first place (if it
+	// returned fewer than that there cannot be more).
+	for n1 := n; found < n; n1 += n - found {
+		found = 0
+		matches := x.suffixes.FindAllIndex(r, n1)
+		// compute files, exclude matches that span file boundaries,
+		// and map offsets to file-local offsets
+		list = make(positionList, len(matches))
+		for _, m := range matches {
+			// by construction, an offset corresponds to the Pos value
+			// for the file set - use it to get the file and line
+			p := token.Pos(m[0])
+			if file := x.fset.File(p); file != nil {
+				if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
+					// match [m[0], m[1]) is within the file boundaries
+					list[found].filename = file.Name()
+					list[found].line = file.Line(p)
+					found++
 				}
-	found = len(offsets)
-
-	// compute file names and lines and sort the list by filename
-	list := make(positionList, len(offsets))
-	for i, offs := range offsets {
-		// by construction, an offs corresponds to
-		// the Pos value for the file set - use it
-		// to get full Position information
-		pos := x.fset.Position(token.Pos(offs))
-		list[i].filename = pos.Filename
-		list[i].line = pos.Line
 			}
-	sort.Sort(list)
+		}
+		if found == n || len(matches) < n1 {
+			// found all matches or there's no chance to find more
+			break
+		}
+	}
+	list = list[0:found]
+	sort.Sort(list) // sort by filename

-	// compact positions with equal file names
+	// collect matches belonging to the same file
 	var last string
 	var lines []int
-	for _, pos := range list {
-		if pos.filename != last {
+	addLines := func() {
 		if len(lines) > 0 {
-				result = append(result, Positions{last, lines})
+			// remove duplicate lines
+			result = append(result, FileLines{last, unique(lines)})
 			lines = nil
 		}
-			last = pos.filename
 	}
-		lines = append(lines, pos.line)
+	for _, m := range list {
+		if m.filename != last {
+			addLines()
+			last = m.filename
 		}
-	if len(lines) > 0 {
-		result = append(result, Positions{last, lines})
+		lines = append(lines, m.line)
 	}
+	addLines()

 	return
 }
--- a/src/cmd/godoc/main.go
+++ b/src/cmd/godoc/main.go
@ -74,7 +74,7 @@ func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.E
 func exec(rw http.ResponseWriter, args []string) (status int) {
 	r, w, err := os.Pipe()
 	if err != nil {
-		log.Printf("os.Pipe(): %v\n", err)
+		log.Printf("os.Pipe(): %v", err)
 		return 2
 	}

@ -87,7 +87,7 @@ func exec(rw http.ResponseWriter, args []string) (status int) {
 	defer r.Close()
 	w.Close()
 	if err != nil {
-		log.Printf("os.ForkExec(%q): %v\n", bin, err)
+		log.Printf("os.ForkExec(%q): %v", bin, err)
 		return 2
 	}

@ -96,7 +96,7 @@ func exec(rw http.ResponseWriter, args []string) (status int) {
 	wait, err := os.Wait(pid, 0)
 	if err != nil {
 		os.Stderr.Write(buf.Bytes())
-		log.Printf("os.Wait(%d, 0): %v\n", pid, err)
+		log.Printf("os.Wait(%d, 0): %v", pid, err)
 		return 2
 	}
 	status = wait.ExitStatus()
@ -127,8 +127,7 @@ func dosync(w http.ResponseWriter, r *http.Request) {
 		// TODO(gri): The directory tree may be temporarily out-of-sync.
 		//            Consider keeping separate time stamps so the web-
 		//            page can indicate this discrepancy.
-		fsTree.set(newDirectory(*goroot, nil, -1))
-		invalidateIndex()
+		initFSTree()
 		fallthrough
 	case 1:
 		// sync failed because no files changed;
@ -238,11 +237,14 @@ func main() {
 		// HTTP server mode.
 		var handler http.Handler = http.DefaultServeMux
 		if *verbose {
-			log.Printf("Go Documentation Server\n")
-			log.Printf("version = %s\n", runtime.Version())
-			log.Printf("address = %s\n", *httpAddr)
-			log.Printf("goroot = %s\n", *goroot)
-			log.Printf("tabwidth = %d\n", *tabwidth)
+			log.Printf("Go Documentation Server")
+			log.Printf("version = %s", runtime.Version())
+			log.Printf("address = %s", *httpAddr)
+			log.Printf("goroot = %s", *goroot)
+			log.Printf("tabwidth = %d", *tabwidth)
+			if *fulltextIndex {
+				log.Print("full text index enabled")
+			}
 			if !fsMap.IsEmpty() {
 				log.Print("user-defined mapping:")
 				fsMap.Fprint(os.Stderr)
@ -257,10 +259,7 @@ func main() {

 		// Initialize default directory tree with corresponding timestamp.
 		// (Do it in a goroutine so that launch is quick.)
-		go func() {
-			fsTree.set(newDirectory(*goroot, nil, -1))
-			invalidateIndex()
-		}()
+		go initFSTree()

 		// Initialize directory trees for user-defined file systems (-path flag).
 		initDirTrees()
--- a/src/cmd/godoc/snippet.go
+++ b/src/cmd/godoc/snippet.go
@ -13,41 +13,21 @@ import (
 	"bytes"
 	"go/ast"
 	"go/token"
-	"go/printer"
 	"fmt"
 )


 type Snippet struct {
 	Line int
-	Text string
-}
-
-
-type snippetStyler struct {
-	Styler               // defined in godoc.go
-	highlight *ast.Ident // identifier to highlight
-}
-
-
-func (s *snippetStyler) LineTag(line int) (text []uint8, tag printer.HTMLTag) {
-	return // no LineTag for snippets
-}
-
-
-func (s *snippetStyler) Ident(id *ast.Ident) (text []byte, tag printer.HTMLTag) {
-	text = []byte(id.Name)
-	if s.highlight == id {
-		tag = printer.HTMLTag{"<span class=highlight>", "</span>"}
-	}
-	return
+	Text []byte
 }


 func newSnippet(fset *token.FileSet, decl ast.Decl, id *ast.Ident) *Snippet {
+	// TODO instead of pretty-printing the node, should use the original source instead
 	var buf bytes.Buffer
-	writeNode(&buf, fset, decl, true, &snippetStyler{highlight: id})
-	return &Snippet{fset.Position(id.Pos()).Line, buf.String()}
+	writeNode(&buf, fset, decl, true)
+	return &Snippet{fset.Position(id.Pos()).Line, FormatText(buf.Bytes(), -1, true, id.Name, nil)}
 }


@ -113,9 +93,11 @@ func NewSnippet(fset *token.FileSet, decl ast.Decl, id *ast.Ident) (s *Snippet)

 	// handle failure gracefully
 	if s == nil {
+		var buf bytes.Buffer
+		fmt.Fprintf(&buf, `<span class="alert">could not generate a snippet for <span class="highlight">%s</span></span>`, id.Name)
 		s = &Snippet{
 			fset.Position(id.Pos()).Line,
-			fmt.Sprintf(`could not generate a snippet for <span class="highlight">%s</span>`, id.Name),
+			buf.Bytes(),
 		}
 	}
 	return