1
0
mirror of https://github.com/golang/go synced 2024-11-26 04:58:00 -07:00

go/doc/comment: add paragraph parsing and test framework

[This CL is part of a sequence implementing the proposal #51082.
The design doc is at https://go.dev/s/godocfmt-design.]

Implement parsing of plain text doc paragraphs,
as well as a txtar-based test framework. Subsequent CLs will
implement the rest of the possible markup.

For #51082.

Change-Id: I449aac69b44089f241fde8050ac134e17cb25116
Reviewed-on: https://go-review.googlesource.com/c/go/+/397278
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Russ Cox 2022-04-03 08:15:40 -04:00
parent 7575811c2b
commit 98b17892a0
4 changed files with 356 additions and 0 deletions

View File

@ -1,5 +1,6 @@
pkg go/doc/comment, method (*List) BlankBefore() bool #51082
pkg go/doc/comment, method (*List) BlankBetween() bool #51082
pkg go/doc/comment, method (*Parser) Parse(string) *Doc #51082
pkg go/doc/comment, type Block interface, unexported methods #51082
pkg go/doc/comment, type Code struct #51082
pkg go/doc/comment, type Code struct, Text string #51082
@ -31,5 +32,9 @@ pkg go/doc/comment, type ListItem struct, Content []Block #51082
pkg go/doc/comment, type ListItem struct, Number string #51082
pkg go/doc/comment, type Paragraph struct #51082
pkg go/doc/comment, type Paragraph struct, Text []Text #51082
pkg go/doc/comment, type Parser struct #51082
pkg go/doc/comment, type Parser struct, LookupPackage func(string) (string, bool) #51082
pkg go/doc/comment, type Parser struct, LookupSym func(string, string) bool #51082
pkg go/doc/comment, type Parser struct, Words map[string]string #51082
pkg go/doc/comment, type Plain string #51082
pkg go/doc/comment, type Text interface, unexported methods #51082

View File

@ -174,6 +174,152 @@ type DocLink struct {
func (*DocLink) text() {}
// A Parser is a doc comment parser.
// The fields in the struct can be filled in before calling Parse
// in order to customize the details of the parsing process.
type Parser struct {
// Words is a map of Go identifier words that
// should be italicized and potentially linked.
// If Words[w] is the empty string, then the word w
// is only italicized. Otherwise it is linked, using
// Words[w] as the link target.
// Words corresponds to the [go/doc.ToHTML] words parameter.
Words map[string]string
// LookupPackage resolves a package name to an import path.
//
// If LookupPackage(name) returns ok == true, then [name]
// (or [name.Sym] or [name.Sym.Method])
// is considered a documentation link to importPath's package docs.
// It is valid to return "", true, in which case name is considered
// to refer to the current package.
//
// If LookupPackage(name) returns ok == false,
// then [name] (or [name.Sym] or [name.Sym.Method])
// will not be considered a documentation link,
// except in the case where name is the full (but single-element) import path
// of a package in the standard library, such as in [math] or [io.Reader].
// LookupPackage is still called for such names,
// in order to permit references to imports of other packages
// with the same package names.
//
// Setting LookupPackage to nil is equivalent to setting it to
// a function that always returns "", false.
LookupPackage func(name string) (importPath string, ok bool)
// LookupSym reports whether a symbol name or method name
// exists in the current package.
//
// If LookupSym("", "Name") returns true, then [Name]
// is considered a documentation link for a const, func, type, or var.
//
// Similarly, if LookupSym("Recv", "Name") returns true,
// then [Recv.Name] is considered a documentation link for
// type Recv's method Name.
//
// Setting LookupSym to nil is equivalent to setting it to a function
// that always returns false.
LookupSym func(recv, name string) (ok bool)
}
// parseDoc is parsing state for a single doc comment.
type parseDoc struct {
*Parser
*Doc
links map[string]*LinkDef
lines []string
lookupSym func(recv, name string) bool
}
// Parse parses the doc comment text and returns the *Doc form.
// Comment markers (/* // and */) in the text must have already been removed.
func (p *Parser) Parse(text string) *Doc {
lines := unindent(strings.Split(text, "\n"))
d := &parseDoc{
Parser: p,
Doc: new(Doc),
links: make(map[string]*LinkDef),
lines: lines,
lookupSym: func(recv, name string) bool { return false },
}
if p.LookupSym != nil {
d.lookupSym = p.LookupSym
}
// First pass: break into block structure and collect known links.
// The text is all recorded as Plain for now.
// TODO: Break into actual block structure.
for len(lines) > 0 {
line := lines[0]
if line != "" {
var b Block
b, lines = d.paragraph(lines)
d.Content = append(d.Content, b)
} else {
lines = lines[1:]
}
}
// Second pass: interpret all the Plain text now that we know the links.
// TODO: Actually interpret the plain text.
return d.Doc
}
// unindent removes any common space/tab prefix
// from each line in lines, returning a copy of lines in which
// those prefixes have been trimmed from each line.
func unindent(lines []string) []string {
// Trim leading and trailing blank lines.
for len(lines) > 0 && isBlank(lines[0]) {
lines = lines[1:]
}
for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
lines = lines[:len(lines)-1]
}
if len(lines) == 0 {
return nil
}
// Compute and remove common indentation.
prefix := leadingSpace(lines[0])
for _, line := range lines[1:] {
if !isBlank(line) {
prefix = commonPrefix(prefix, leadingSpace(line))
}
}
out := make([]string, len(lines))
for i, line := range lines {
line = strings.TrimPrefix(line, prefix)
if strings.TrimSpace(line) == "" {
line = ""
}
out[i] = line
}
for len(out) > 0 && out[0] == "" {
out = out[1:]
}
for len(out) > 0 && out[len(out)-1] == "" {
out = out[:len(out)-1]
}
return out
}
// isBlank reports whether s is a blank line.
func isBlank(s string) bool {
return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
}
// commonPrefix returns the longest common prefix of a and b.
func commonPrefix(a, b string) string {
i := 0
for i < len(a) && i < len(b) && a[i] == b[i] {
i++
}
return a[0:i]
}
// leadingSpace returns the longest prefix of s consisting of spaces and tabs.
func leadingSpace(s string) string {
i := 0
@ -234,6 +380,27 @@ func isOldHeading(line string, all []string, off int) bool {
return true
}
// parargraph returns a paragraph block built from the
// unindented text at the start of lines, along with the remainder of the lines.
// If there is no unindented text at the start of lines,
// then paragraph returns a nil Block.
func (d *parseDoc) paragraph(lines []string) (b Block, rest []string) {
// TODO: Paragraph should be interrupted by any indented line,
// which is either a list or a code block,
// and of course by a blank line.
// It should not be interrupted by a # line - headings must stand alone.
i := 0
for i < len(lines) && lines[i] != "" {
i++
}
lines, rest = lines[:i], lines[i:]
if len(lines) == 0 {
return nil, rest
}
return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}, rest
}
// autoURL checks whether s begins with a URL that should be hyperlinked.
// If so, it returns the URL, which is a prefix of s, and ok == true.
// Otherwise it returns "", false.

16
src/go/doc/comment/testdata/hello.txt vendored Normal file
View File

@ -0,0 +1,16 @@
-- input --
Hello,
world
This is
a test.
-- dump --
Doc
Paragraph
Plain
"Hello,\n"
"world"
Paragraph
Plain
"This is\n"
"a test."

View File

@ -0,0 +1,168 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package comment
import (
"bytes"
"fmt"
"internal/diff"
"internal/txtar"
"path/filepath"
"strings"
"testing"
)
func TestTestdata(t *testing.T) {
files, _ := filepath.Glob("testdata/*.txt")
if len(files) == 0 {
t.Fatalf("no testdata")
}
var p Parser
stripDollars := func(b []byte) []byte {
// Remove trailing $ on lines.
// They make it easier to see lines with trailing spaces,
// as well as turning them into lines without trailing spaces,
// in case editors remove trailing spaces.
return bytes.ReplaceAll(b, []byte("$\n"), []byte("\n"))
}
for _, file := range files {
t.Run(filepath.Base(file), func(t *testing.T) {
a, err := txtar.ParseFile(file)
if err != nil {
t.Fatal(err)
}
if len(a.Files) < 1 || a.Files[0].Name != "input" {
t.Fatalf("first file is not %q", "input")
}
d := p.Parse(string(stripDollars(a.Files[0].Data)))
for _, f := range a.Files[1:] {
want := stripDollars(f.Data)
for len(want) >= 2 && want[len(want)-1] == '\n' && want[len(want)-2] == '\n' {
want = want[:len(want)-1]
}
var out []byte
switch f.Name {
default:
t.Fatalf("unknown output file %q", f.Name)
case "dump":
out = dump(d)
}
if string(out) != string(want) {
t.Errorf("%s: %s", file, diff.Diff(f.Name, want, "have", out))
}
}
})
}
}
func dump(d *Doc) []byte {
var out bytes.Buffer
dumpTo(&out, 0, d)
return out.Bytes()
}
func dumpTo(out *bytes.Buffer, indent int, x any) {
switch x := x.(type) {
default:
fmt.Fprintf(out, "?%T", x)
case *Doc:
fmt.Fprintf(out, "Doc")
dumpTo(out, indent+1, x.Content)
if len(x.Links) > 0 {
dumpNL(out, indent+1)
fmt.Fprintf(out, "Links")
dumpTo(out, indent+2, x.Links)
}
fmt.Fprintf(out, "\n")
case []*LinkDef:
for _, def := range x {
dumpNL(out, indent)
dumpTo(out, indent, def)
}
case *LinkDef:
fmt.Fprintf(out, "LinkDef Used:%v Text:%q URL:%s", x.Used, x.Text, x.URL)
case []Block:
for _, blk := range x {
dumpNL(out, indent)
dumpTo(out, indent, blk)
}
case *Heading:
fmt.Fprintf(out, "Heading")
dumpTo(out, indent+1, x.Text)
case *List:
fmt.Fprintf(out, "List ForceBlankBefore=%v ForceBlankBetween=%v", x.ForceBlankBefore, x.ForceBlankBetween)
dumpTo(out, indent+1, x.Items)
case []*ListItem:
for _, item := range x {
dumpNL(out, indent)
dumpTo(out, indent, item)
}
case *ListItem:
fmt.Fprintf(out, "Item Number=%q", x.Number)
dumpTo(out, indent+1, x.Content)
case *Paragraph:
fmt.Fprintf(out, "Paragraph")
dumpTo(out, indent+1, x.Text)
case *Code:
fmt.Fprintf(out, "Code")
dumpTo(out, indent+1, x.Text)
case []Text:
for _, t := range x {
dumpNL(out, indent)
dumpTo(out, indent, t)
}
case Plain:
if !strings.Contains(string(x), "\n") {
fmt.Fprintf(out, "Plain %q", string(x))
} else {
fmt.Fprintf(out, "Plain")
dumpTo(out, indent+1, string(x))
}
case Italic:
if !strings.Contains(string(x), "\n") {
fmt.Fprintf(out, "Italic %q", string(x))
} else {
fmt.Fprintf(out, "Italic")
dumpTo(out, indent+1, string(x))
}
case string:
for _, line := range strings.SplitAfter(x, "\n") {
if line != "" {
dumpNL(out, indent)
fmt.Fprintf(out, "%q", line)
}
}
case *Link:
fmt.Fprintf(out, "Link %q", x.URL)
dumpTo(out, indent+1, x.Text)
case *DocLink:
fmt.Fprintf(out, "DocLink pkg:%q, recv:%q, name:%q", x.ImportPath, x.Recv, x.Name)
dumpTo(out, indent+1, x.Text)
}
}
func dumpNL(out *bytes.Buffer, n int) {
out.WriteByte('\n')
for i := 0; i < n; i++ {
out.WriteByte('\t')
}
}