go/doc/comment: add paragraph parsing and test framework

[This CL is part of a sequence implementing the proposal #51082. The design doc is at https://go.dev/s/godocfmt-design.] Implement parsing of plain text doc paragraphs, as well as a txtar-based test framework. Subsequent CLs will implement the rest of the possible markup. For #51082. Change-Id: I449aac69b44089f241fde8050ac134e17cb25116 Reviewed-on: https://go-review.googlesource.com/c/go/+/397278 Run-TryBot: Russ Cox <rsc@golang.org> Reviewed-by: Jonathan Amsterdam <jba@google.com> Reviewed-by: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org>
2024-11-26 07:47:57 -07:00 · 2022-04-03 08:15:40 -04:00 · 2022-04-03 08:15:40 -04:00 · 98b17892a0
commit 98b17892a0
parent 7575811c2b
4 changed files with 356 additions and 0 deletions
--- a/api/next/51082.txt
+++ b/api/next/51082.txt
@ -1,5 +1,6 @@
 pkg go/doc/comment, method (*List) BlankBefore() bool #51082
 pkg go/doc/comment, method (*List) BlankBetween() bool #51082
 pkg go/doc/comment, method (*Parser) Parse(string) *Doc #51082
 pkg go/doc/comment, type Block interface, unexported methods #51082
 pkg go/doc/comment, type Code struct #51082
 pkg go/doc/comment, type Code struct, Text string #51082
@ -31,5 +32,9 @@ pkg go/doc/comment, type ListItem struct, Content []Block #51082
 pkg go/doc/comment, type ListItem struct, Number string #51082
 pkg go/doc/comment, type Paragraph struct #51082
 pkg go/doc/comment, type Paragraph struct, Text []Text #51082
 pkg go/doc/comment, type Parser struct #51082
 pkg go/doc/comment, type Parser struct, LookupPackage func(string) (string, bool) #51082
 pkg go/doc/comment, type Parser struct, LookupSym func(string, string) bool #51082
 pkg go/doc/comment, type Parser struct, Words map[string]string #51082
 pkg go/doc/comment, type Plain string #51082
 pkg go/doc/comment, type Text interface, unexported methods #51082
--- a/src/go/doc/comment/parse.go
+++ b/src/go/doc/comment/parse.go
@ -174,6 +174,152 @@ type DocLink struct {
 func (*DocLink) text() {}
 // A Parser is a doc comment parser.
 // The fields in the struct can be filled in before calling Parse
 // in order to customize the details of the parsing process.
 type Parser struct {
 	// Words is a map of Go identifier words that
 	// should be italicized and potentially linked.
 	// If Words[w] is the empty string, then the word w
 	// is only italicized. Otherwise it is linked, using
 	// Words[w] as the link target.
 	// Words corresponds to the [go/doc.ToHTML] words parameter.
 	Words map[string]string
 	// LookupPackage resolves a package name to an import path.
 	//
 	// If LookupPackage(name) returns ok == true, then [name]
 	// (or [name.Sym] or [name.Sym.Method])
 	// is considered a documentation link to importPath's package docs.
 	// It is valid to return "", true, in which case name is considered
 	// to refer to the current package.
 	//
 	// If LookupPackage(name) returns ok == false,
 	// then [name] (or [name.Sym] or [name.Sym.Method])
 	// will not be considered a documentation link,
 	// except in the case where name is the full (but single-element) import path
 	// of a package in the standard library, such as in [math] or [io.Reader].
 	// LookupPackage is still called for such names,
 	// in order to permit references to imports of other packages
 	// with the same package names.
 	//
 	// Setting LookupPackage to nil is equivalent to setting it to
 	// a function that always returns "", false.
 	LookupPackage func(name string) (importPath string, ok bool)
 	// LookupSym reports whether a symbol name or method name
 	// exists in the current package.
 	//
 	// If LookupSym("", "Name") returns true, then [Name]
 	// is considered a documentation link for a const, func, type, or var.
 	//
 	// Similarly, if LookupSym("Recv", "Name") returns true,
 	// then [Recv.Name] is considered a documentation link for
 	// type Recv's method Name.
 	//
 	// Setting LookupSym to nil is equivalent to setting it to a function
 	// that always returns false.
 	LookupSym func(recv, name string) (ok bool)
 }
 // parseDoc is parsing state for a single doc comment.
 type parseDoc struct {
 	*Parser
 	*Doc
 	links     map[string]*LinkDef
 	lines     []string
 	lookupSym func(recv, name string) bool
 }
 // Parse parses the doc comment text and returns the *Doc form.
 // Comment markers (/* // and */) in the text must have already been removed.
 func (p *Parser) Parse(text string) *Doc {
 	lines := unindent(strings.Split(text, "\n"))
 	d := &parseDoc{
 		Parser:    p,
 		Doc:       new(Doc),
 		links:     make(map[string]*LinkDef),
 		lines:     lines,
 		lookupSym: func(recv, name string) bool { return false },
 	}
 	if p.LookupSym != nil {
 		d.lookupSym = p.LookupSym
 	}
 	// First pass: break into block structure and collect known links.
 	// The text is all recorded as Plain for now.
 	// TODO: Break into actual block structure.
 	for len(lines) > 0 {
 		line := lines[0]
 		if line != "" {
 			var b Block
 			b, lines = d.paragraph(lines)
 			d.Content = append(d.Content, b)
 		} else {
 			lines = lines[1:]
 		}
 	}
 	// Second pass: interpret all the Plain text now that we know the links.
 	// TODO: Actually interpret the plain text.
 	return d.Doc
 }
 // unindent removes any common space/tab prefix
 // from each line in lines, returning a copy of lines in which
 // those prefixes have been trimmed from each line.
 func unindent(lines []string) []string {
 	// Trim leading and trailing blank lines.
 	for len(lines) > 0 && isBlank(lines[0]) {
 		lines = lines[1:]
 	}
 	for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
 		lines = lines[:len(lines)-1]
 	}
 	if len(lines) == 0 {
 		return nil
 	}
 	// Compute and remove common indentation.
 	prefix := leadingSpace(lines[0])
 	for _, line := range lines[1:] {
 		if !isBlank(line) {
 			prefix = commonPrefix(prefix, leadingSpace(line))
 		}
 	}
 	out := make([]string, len(lines))
 	for i, line := range lines {
 		line = strings.TrimPrefix(line, prefix)
 		if strings.TrimSpace(line) == "" {
 			line = ""
 		}
 		out[i] = line
 	}
 	for len(out) > 0 && out[0] == "" {
 		out = out[1:]
 	}
 	for len(out) > 0 && out[len(out)-1] == "" {
 		out = out[:len(out)-1]
 	}
 	return out
 }
 // isBlank reports whether s is a blank line.
 func isBlank(s string) bool {
 	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
 }
 // commonPrefix returns the longest common prefix of a and b.
 func commonPrefix(a, b string) string {
 	i := 0
 	for i < len(a) && i < len(b) && a[i] == b[i] {
 		i++
 	}
 	return a[0:i]
 }
 // leadingSpace returns the longest prefix of s consisting of spaces and tabs.
 func leadingSpace(s string) string {
 	i := 0
@ -234,6 +380,27 @@ func isOldHeading(line string, all []string, off int) bool {
 	return true
 }
 // parargraph returns a paragraph block built from the
 // unindented text at the start of lines, along with the remainder of the lines.
 // If there is no unindented text at the start of lines,
 // then paragraph returns a nil Block.
 func (d *parseDoc) paragraph(lines []string) (b Block, rest []string) {
 	// TODO: Paragraph should be interrupted by any indented line,
 	// which is either a list or a code block,
 	// and of course by a blank line.
 	// It should not be interrupted by a # line - headings must stand alone.
 	i := 0
 	for i < len(lines) && lines[i] != "" {
 		i++
 	}
 	lines, rest = lines[:i], lines[i:]
 	if len(lines) == 0 {
 		return nil, rest
 	}
 	return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}, rest
 }
 // autoURL checks whether s begins with a URL that should be hyperlinked.
 // If so, it returns the URL, which is a prefix of s, and ok == true.
 // Otherwise it returns "", false.
--- a/src/go/doc/comment/testdata/hello.txt
+++ b/src/go/doc/comment/testdata/hello.txt
@ -0,0 +1,16 @@
 -- input --
 Hello,
 world
 This is
 a test.
 -- dump --
 Doc
 	Paragraph
 		Plain
 			"Hello,\n"
 			"world"
 	Paragraph
 		Plain
 			"This is\n"
 			"a test."
--- a/src/go/doc/comment/testdata_test.go
+++ b/src/go/doc/comment/testdata_test.go
@ -0,0 +1,168 @@
 // Copyright 2022 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package comment
 import (
 	"bytes"
 	"fmt"
 	"internal/diff"
 	"internal/txtar"
 	"path/filepath"
 	"strings"
 	"testing"
 )
 func TestTestdata(t *testing.T) {
 	files, _ := filepath.Glob("testdata/*.txt")
 	if len(files) == 0 {
 		t.Fatalf("no testdata")
 	}
 	var p Parser
 	stripDollars := func(b []byte) []byte {
 		// Remove trailing $ on lines.
 		// They make it easier to see lines with trailing spaces,
 		// as well as turning them into lines without trailing spaces,
 		// in case editors remove trailing spaces.
 		return bytes.ReplaceAll(b, []byte("$\n"), []byte("\n"))
 	}
 	for _, file := range files {
 		t.Run(filepath.Base(file), func(t *testing.T) {
 			a, err := txtar.ParseFile(file)
 			if err != nil {
 				t.Fatal(err)
 			}
 			if len(a.Files) < 1 || a.Files[0].Name != "input" {
 				t.Fatalf("first file is not %q", "input")
 			}
 			d := p.Parse(string(stripDollars(a.Files[0].Data)))
 			for _, f := range a.Files[1:] {
 				want := stripDollars(f.Data)
 				for len(want) >= 2 && want[len(want)-1] == '\n' && want[len(want)-2] == '\n' {
 					want = want[:len(want)-1]
 				}
 				var out []byte
 				switch f.Name {
 				default:
 					t.Fatalf("unknown output file %q", f.Name)
 				case "dump":
 					out = dump(d)
 				}
 				if string(out) != string(want) {
 					t.Errorf("%s: %s", file, diff.Diff(f.Name, want, "have", out))
 				}
 			}
 		})
 	}
 }
 func dump(d *Doc) []byte {
 	var out bytes.Buffer
 	dumpTo(&out, 0, d)
 	return out.Bytes()
 }
 func dumpTo(out *bytes.Buffer, indent int, x any) {
 	switch x := x.(type) {
 	default:
 		fmt.Fprintf(out, "?%T", x)
 	case *Doc:
 		fmt.Fprintf(out, "Doc")
 		dumpTo(out, indent+1, x.Content)
 		if len(x.Links) > 0 {
 			dumpNL(out, indent+1)
 			fmt.Fprintf(out, "Links")
 			dumpTo(out, indent+2, x.Links)
 		}
 		fmt.Fprintf(out, "\n")
 	case []*LinkDef:
 		for _, def := range x {
 			dumpNL(out, indent)
 			dumpTo(out, indent, def)
 		}
 	case *LinkDef:
 		fmt.Fprintf(out, "LinkDef Used:%v Text:%q URL:%s", x.Used, x.Text, x.URL)
 	case []Block:
 		for _, blk := range x {
 			dumpNL(out, indent)
 			dumpTo(out, indent, blk)
 		}
 	case *Heading:
 		fmt.Fprintf(out, "Heading")
 		dumpTo(out, indent+1, x.Text)
 	case *List:
 		fmt.Fprintf(out, "List ForceBlankBefore=%v ForceBlankBetween=%v", x.ForceBlankBefore, x.ForceBlankBetween)
 		dumpTo(out, indent+1, x.Items)
 	case []*ListItem:
 		for _, item := range x {
 			dumpNL(out, indent)
 			dumpTo(out, indent, item)
 		}
 	case *ListItem:
 		fmt.Fprintf(out, "Item Number=%q", x.Number)
 		dumpTo(out, indent+1, x.Content)
 	case *Paragraph:
 		fmt.Fprintf(out, "Paragraph")
 		dumpTo(out, indent+1, x.Text)
 	case *Code:
 		fmt.Fprintf(out, "Code")
 		dumpTo(out, indent+1, x.Text)
 	case []Text:
 		for _, t := range x {
 			dumpNL(out, indent)
 			dumpTo(out, indent, t)
 		}
 	case Plain:
 		if !strings.Contains(string(x), "\n") {
 			fmt.Fprintf(out, "Plain %q", string(x))
 		} else {
 			fmt.Fprintf(out, "Plain")
 			dumpTo(out, indent+1, string(x))
 		}
 	case Italic:
 		if !strings.Contains(string(x), "\n") {
 			fmt.Fprintf(out, "Italic %q", string(x))
 		} else {
 			fmt.Fprintf(out, "Italic")
 			dumpTo(out, indent+1, string(x))
 		}
 	case string:
 		for _, line := range strings.SplitAfter(x, "\n") {
 			if line != "" {
 				dumpNL(out, indent)
 				fmt.Fprintf(out, "%q", line)
 			}
 		}
 	case *Link:
 		fmt.Fprintf(out, "Link %q", x.URL)
 		dumpTo(out, indent+1, x.Text)
 	case *DocLink:
 		fmt.Fprintf(out, "DocLink pkg:%q, recv:%q, name:%q", x.ImportPath, x.Recv, x.Name)
 		dumpTo(out, indent+1, x.Text)
 	}
 }
 func dumpNL(out *bytes.Buffer, n int) {
 	out.WriteByte('\n')
 	for i := 0; i < n; i++ {
 		out.WriteByte('\t')
 	}
 }