1
0
mirror of https://github.com/golang/go synced 2024-09-24 01:20:13 -06:00

go/doc/comment: parse and print doc links

[This CL is part of a sequence implementing the proposal #51082.
The design doc is at https://go.dev/s/godocfmt-design.]

Implement parsing and printing of documentation links,
like [math.Sqrt] or [*golang.org/x/text/runes.Set].

For #51082.

Change-Id: I1cc73afbac1c6568773f921ce4b73e52f17acef6
Reviewed-on: https://go-review.googlesource.com/c/go/+/397281
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Russ Cox 2022-04-05 14:12:41 -04:00
parent ae3d890202
commit 910a33a0ee
18 changed files with 538 additions and 6 deletions

View File

@ -1,3 +1,5 @@
pkg go/doc/comment, func DefaultLookupPackage(string) (string, bool) #51082
pkg go/doc/comment, method (*DocLink) DefaultURL(string) string #51082
pkg go/doc/comment, method (*List) BlankBefore() bool #51082
pkg go/doc/comment, method (*List) BlankBetween() bool #51082
pkg go/doc/comment, method (*Parser) Parse(string) *Doc #51082

View File

@ -54,6 +54,17 @@ func (p *htmlPrinter) text(out *bytes.Buffer, x []Text) {
out.WriteString(`">`)
p.text(out, t.Text)
out.WriteString("</a>")
case *DocLink:
url := p.docLinkURL(t)
if url != "" {
out.WriteString(`<a href="`)
p.escape(out, url)
out.WriteString(`">`)
}
p.text(out, t.Text)
if url != "" {
out.WriteString("</a>")
}
}
}
}

View File

@ -7,6 +7,7 @@ package comment
import (
"bytes"
"fmt"
"strings"
)
// An mdPrinter holds the state needed for printing a Doc as Markdown.
@ -87,6 +88,19 @@ func (p *mdPrinter) rawText(out *bytes.Buffer, x []Text) {
out.WriteString("](")
out.WriteString(t.URL)
out.WriteString(")")
case *DocLink:
url := p.docLinkURL(t)
if url != "" {
out.WriteString("[")
}
p.rawText(out, t.Text)
if url != "" {
out.WriteString("](")
url = strings.ReplaceAll(url, "(", "%28")
url = strings.ReplaceAll(url, ")", "%29")
out.WriteString(url)
out.WriteString(")")
}
}
}
}

24
src/go/doc/comment/mkstd.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/bash
# Copyright 2022 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# This could be a good use for embed but go/doc/comment
# is built into the bootstrap go command, so it can't use embed.
# Also not using embed lets us emit a string array directly
# and avoid init-time work.
(
echo "// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by 'go generate' DO NOT EDIT.
//go:generate ./mkstd.sh
package comment
var stdPkgs = []string{"
go list std | grep -v / | sort | sed 's/.*/"&",/'
echo "}"
) | gofmt >std.go.tmp && mv std.go.tmp std.go

View File

@ -5,6 +5,7 @@
package comment
import (
"sort"
"strings"
"unicode"
"unicode/utf8"
@ -27,7 +28,7 @@ type LinkDef struct {
}
// A Block is block-level content in a doc comment,
// one of *[Code], *[Heading], *[List], or *[Paragraph].
// one of [*Code], [*Heading], [*List], or [*Paragraph].
type Block interface {
block()
}
@ -131,7 +132,7 @@ type Code struct {
func (*Code) block() {}
// A Text is text-level content in a doc comment,
// one of [Plain], [Italic], *[Link], or *[DocLink].
// one of [Plain], [Italic], [*Link], or [*DocLink].
type Text interface {
text()
}
@ -231,6 +232,54 @@ type parseDoc struct {
lookupSym func(recv, name string) bool
}
// lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv].
// If pkg has a slash, it is assumed to be the full import path and is returned with ok = true.
//
// Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand").
// d.LookupPackage provides a way for the caller to allow resolving such names with reference
// to the imports in the surrounding package.
//
// There is one collision between these two cases: single-element standard library names
// like "math" are full import paths but don't contain slashes. We let d.LookupPackage have
// the first chance to resolve it, in case there's a different package imported as math,
// and otherwise we refer to a built-in list of single-element standard library package names.
func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) {
if strings.Contains(pkg, "/") { // assume a full import path
if validImportPath(pkg) {
return pkg, true
}
return "", false
}
if d.LookupPackage != nil {
// Give LookupPackage a chance.
if path, ok := d.LookupPackage(pkg); ok {
return path, true
}
}
return DefaultLookupPackage(pkg)
}
func isStdPkg(path string) bool {
// TODO(rsc): Use sort.Find.
i := sort.Search(len(stdPkgs), func(i int) bool { return stdPkgs[i] >= path })
return i < len(stdPkgs) && stdPkgs[i] == path
}
// DefaultLookupPackage is the default package lookup
// function, used when [Parser].LookupPackage is nil.
// It recognizes names of the packages from the standard
// library with single-element import paths, such as math,
// which would otherwise be impossible to name.
//
// Note that the go/doc package provides a more sophisticated
// lookup based on the imports used in the current package.
func DefaultLookupPackage(name string) (importPath string, ok bool) {
if isStdPkg(name) {
return name, true
}
return "", false
}
// Parse parses the doc comment text and returns the *Doc form.
// Comment markers (/* // and */) in the text must have already been removed.
func (p *Parser) Parse(text string) *Doc {
@ -264,7 +313,7 @@ func (p *Parser) Parse(text string) *Doc {
for _, b := range d.Content {
switch b := b.(type) {
case *Paragraph:
b.Text = d.parseText(string(b.Text[0].(Plain)))
b.Text = d.parseLinkedText(string(b.Text[0].(Plain)))
}
}
@ -406,9 +455,131 @@ func (d *parseDoc) paragraph(lines []string) (b Block, rest []string) {
return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}, rest
}
// parseText parses s as text and returns the parsed Text elements.
func (d *parseDoc) parseText(s string) []Text {
// parseLinkedText parses text that is allowed to contain explicit links,
// such as [math.Sin] or [Go home page], into a slice of Text items.
//
// A “pkg” is only assumed to be a full import path if it starts with
// a domain name (a path element with a dot) or is one of the packages
// from the standard library (“[os]”, “[encoding/json]”, and so on).
// To avoid problems with maps, generics, and array types, doc links
// must be both preceded and followed by punctuation, spaces, tabs,
// or the start or end of a line. An example problem would be treating
// map[ast.Expr]TypeAndValue as containing a link.
func (d *parseDoc) parseLinkedText(text string) []Text {
var out []Text
wrote := 0
flush := func(i int) {
if wrote < i {
out = d.parseText(out, text[wrote:i], true)
wrote = i
}
}
start := -1
var buf []byte
for i := 0; i < len(text); i++ {
c := text[i]
if c == '\n' || c == '\t' {
c = ' '
}
switch c {
case '[':
start = i
case ']':
if start >= 0 {
if def, ok := d.links[string(buf)]; ok {
def.Used = true
flush(start)
out = append(out, &Link{
Text: d.parseText(nil, text[start+1:i], false),
URL: def.URL,
})
wrote = i + 1
} else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok {
flush(start)
link.Text = d.parseText(nil, text[start+1:i], false)
out = append(out, link)
wrote = i + 1
}
}
start = -1
buf = buf[:0]
}
if start >= 0 && i != start {
buf = append(buf, c)
}
}
flush(len(text))
return out
}
// docLink parses text, which was found inside [ ] brackets,
// as a doc link if possible, returning the DocLink and ok == true
// or else nil, false.
// The before and after strings are the text before the [ and after the ]
// on the same line. Doc links must be preceded and followed by
// punctuation, spaces, tabs, or the start or end of a line.
func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) {
if before != "" {
r, _ := utf8.DecodeLastRuneInString(before)
if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
return nil, false
}
}
if after != "" {
r, _ := utf8.DecodeRuneInString(after)
if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
return nil, false
}
}
if strings.HasPrefix(text, "*") {
text = text[1:]
}
pkg, name, ok := splitDocName(text)
var recv string
if ok {
pkg, recv, _ = splitDocName(pkg)
}
if pkg != "" {
if pkg, ok = d.lookupPkg(pkg); !ok {
return nil, false
}
} else {
if ok = d.lookupSym(recv, name); !ok {
return nil, false
}
}
link = &DocLink{
ImportPath: pkg,
Recv: recv,
Name: name,
}
return link, true
}
// If text is of the form before.Name, where Name is a capitalized Go identifier,
// then splitDocName returns before, name, true.
// Otherwise it returns text, "", false.
func splitDocName(text string) (before, name string, foundDot bool) {
i := strings.LastIndex(text, ".")
name = text[i+1:]
if !isName(name) {
return text, "", false
}
if i >= 0 {
before = text[:i]
}
return before, name, true
}
// parseText parses s as text and returns the result of appending
// those parsed Text elements to out.
// parseText does not handle explicit links like [math.Sin] or [Go home page]:
// those are handled by parseLinkedText.
// If autoLink is true, then parseText recognizes URLs and words from d.Words
// and converts those to links as appropriate.
func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text {
var w strings.Builder
wrote := 0
writeUntil := func(i int) {
@ -424,7 +595,6 @@ func (d *parseDoc) parseText(s string) []Text {
}
for i := 0; i < len(s); {
t := s[i:]
const autoLink = true
if autoLink {
if url, ok := autoURL(t); ok {
flush(i)
@ -692,6 +862,10 @@ func ident(s string) (id string, ok bool) {
// isIdentASCII reports whether c is an ASCII identifier byte.
func isIdentASCII(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<26-1)<<'A' |
(1<<26-1)<<'a' |
@ -701,3 +875,64 @@ func isIdentASCII(c byte) bool {
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}
// validImportPath reports whether path is a valid import path.
// It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath.
func validImportPath(path string) bool {
if !utf8.ValidString(path) {
return false
}
if path == "" {
return false
}
if path[0] == '-' {
return false
}
if strings.Contains(path, "//") {
return false
}
if path[len(path)-1] == '/' {
return false
}
elemStart := 0
for i, r := range path {
if r == '/' {
if !validImportPathElem(path[elemStart:i]) {
return false
}
elemStart = i + 1
}
}
return validImportPathElem(path[elemStart:])
}
func validImportPathElem(elem string) bool {
if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' {
return false
}
for i := 0; i < len(elem); i++ {
if !importPathOK(elem[i]) {
return false
}
}
return true
}
func importPathOK(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<26-1)<<'A' |
(1<<26-1)<<'a' |
(1<<10-1)<<'0' |
1<<'-' |
1<<'.' |
1<<'~' |
1<<'_' |
1<<'+'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}

View File

@ -55,6 +55,54 @@ type Printer struct {
TextWidth int
}
func (p *Printer) docLinkURL(link *DocLink) string {
if p.DocLinkURL != nil {
return p.DocLinkURL(link)
}
return link.DefaultURL(p.DocLinkBaseURL)
}
// DefaultURL constructs and returns the documentation URL for l,
// using baseURL as a prefix for links to other packages.
//
// The possible forms returned by DefaultURL are:
// - baseURL/ImportPath, for a link to another package
// - baseURL/ImportPath#Name, for a link to a const, func, type, or var in another package
// - baseURL/ImportPath#Recv.Name, for a link to a method in another package
// - #Name, for a link to a const, func, type, or var in this package
// - #Recv.Name, for a link to a method in this package
//
// If baseURL ends in a trailing slash, then DefaultURL inserts
// a slash between ImportPath and # in the anchored forms.
// For example, here are some baseURL values and URLs they can generate:
//
// "/pkg/" → "/pkg/math/#Sqrt"
// "/pkg" → "/pkg/math#Sqrt"
// "/" → "/math/#Sqrt"
// "" → "/math#Sqrt"
func (l *DocLink) DefaultURL(baseURL string) string {
if l.ImportPath != "" {
slash := ""
if strings.HasSuffix(baseURL, "/") {
slash = "/"
} else {
baseURL += "/"
}
switch {
case l.Name == "":
return baseURL + l.ImportPath + slash
case l.Recv != "":
return baseURL + l.ImportPath + slash + "#" + l.Recv + "." + l.Name
default:
return baseURL + l.ImportPath + slash + "#" + l.Name
}
}
if l.Recv != "" {
return "#" + l.Recv + "." + l.Name
}
return "#" + l.Name
}
type commentPrinter struct {
*Printer
headingPrefix string
@ -107,6 +155,10 @@ func (p *commentPrinter) text(out *bytes.Buffer, indent string, x []Text) {
p.indent(out, indent, string(t))
case *Link:
p.text(out, indent, t.Text)
case *DocLink:
out.WriteString("[")
p.text(out, indent, t.Text)
out.WriteString("]")
}
}
}

44
src/go/doc/comment/std.go Normal file
View File

@ -0,0 +1,44 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Code generated by 'go generate' DO NOT EDIT.
//go:generate ./mkstd.sh
package comment
var stdPkgs = []string{
"bufio",
"bytes",
"context",
"crypto",
"embed",
"encoding",
"errors",
"expvar",
"flag",
"fmt",
"hash",
"html",
"image",
"io",
"log",
"math",
"mime",
"net",
"os",
"path",
"plugin",
"reflect",
"regexp",
"runtime",
"sort",
"strconv",
"strings",
"sync",
"syscall",
"testing",
"time",
"unicode",
"unsafe",
}

View File

@ -0,0 +1,35 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package comment
import (
"internal/diff"
"internal/testenv"
"os/exec"
"sort"
"strings"
"testing"
)
func TestStd(t *testing.T) {
out, err := exec.Command(testenv.GoToolPath(t), "list", "std").CombinedOutput()
if err != nil {
t.Fatalf("%v\n%s", err, out)
}
var list []string
for _, pkg := range strings.Fields(string(out)) {
if !strings.Contains(pkg, "/") {
list = append(list, pkg)
}
}
sort.Strings(list)
have := strings.Join(stdPkgs, "\n") + "\n"
want := strings.Join(list, "\n") + "\n"
if have != want {
t.Errorf("stdPkgs is out of date: regenerate with 'go generate'\n%s", diff.Diff("stdPkgs", []byte(have), "want", []byte(want)))
}
}

42
src/go/doc/comment/testdata/README.md vendored Normal file
View File

@ -0,0 +1,42 @@
This directory contains test files (*.txt) for the comment parser.
The files are in [txtar format](https://pkg.go.dev/golang.org/x/tools/txtar).
Consider this example:
-- input --
Hello.
-- gofmt --
Hello.
-- html --
<p>Hello.
-- markdown --
Hello.
-- text --
Hello.
Each `-- name --` line introduces a new file with the given name.
The file named “input” must be first and contains the input to
[comment.Parser](https://pkg.go.dev/go/doc/comment/#Parser).
The remaining files contain the expected output for the named format generated by
[comment.Printer](https://pkg.go.dev/go/doc/comment/#Printer):
“gofmt” for Printer.Comment (Go comment format, as used by gofmt),
“html” for Printer.HTML, “markdown” for Printer.Markdown, and “text” for Printer.Text.
The format can also be “dump” for a textual dump of the raw data structures.
The text before the `-- input --` line, if present, is JSON to be unmarshalled
to initialize a comment.Printer. For example, this test case sets the Printer's
TextWidth field to 20:
{"TextWidth": 20}
-- input --
Package gob manages streams of gobs - binary values exchanged between an
Encoder (transmitter) and a Decoder (receiver).
-- text --
Package gob
manages streams
of gobs - binary
values exchanged
between an Encoder
(transmitter) and a
Decoder (receiver).

19
src/go/doc/comment/testdata/doclink.txt vendored Normal file
View File

@ -0,0 +1,19 @@
-- input --
In this package, see [Doc] and [Parser.Parse].
There is no [Undef] or [Undef.Method].
See also the [comment] package,
especially [comment.Doc] and [comment.Parser.Parse].
-- gofmt --
In this package, see [Doc] and [Parser.Parse].
There is no [Undef] or [Undef.Method].
See also the [comment] package,
especially [comment.Doc] and [comment.Parser.Parse].
-- text --
In this package, see Doc and Parser.Parse. There is no [Undef] or [Undef.Method]. See also the comment package, especially comment.Doc and comment.Parser.Parse.
-- markdown --
In this package, see [Doc](#Doc) and [Parser.Parse](#Parser.Parse). There is no \[Undef] or \[Undef.Method]. See also the [comment](/go/doc/comment) package, especially [comment.Doc](/go/doc/comment#Doc) and [comment.Parser.Parse](/go/doc/comment#Parser.Parse).
-- html --
<p>In this package, see <a href="#Doc">Doc</a> and <a href="#Parser.Parse">Parser.Parse</a>.
There is no [Undef] or [Undef.Method].
See also the <a href="/go/doc/comment">comment</a> package,
especially <a href="/go/doc/comment#Doc">comment.Doc</a> and <a href="/go/doc/comment#Parser.Parse">comment.Parser.Parse</a>.

View File

@ -0,0 +1,8 @@
-- input --
We use [io.Reader] a lot, and also a few map[io.Reader]string.
Never [io.Reader]int or Slice[io.Reader] though.
-- markdown --
We use [io.Reader](/io#Reader) a lot, and also a few map\[io.Reader]string.
Never \[io.Reader]int or Slice\[io.Reader] though.

View File

@ -0,0 +1,8 @@
-- input --
[encoding/json.Marshal] is a doc link.
[rot13.Marshal] is not.
-- markdown --
[encoding/json.Marshal](/encoding/json#Marshal) is a doc link.
\[rot13.Marshal] is not.

View File

@ -0,0 +1,7 @@
-- input --
[io] at start of comment.
[io] at start of line.
At end of line: [io]
At end of comment: [io]
-- markdown --
[io](/io) at start of comment. [io](/io) at start of line. At end of line: [io](/io) At end of comment: [io](/io)

View File

@ -0,0 +1,5 @@
{"DocLinkBaseURL": "https://pkg.go.dev"}
-- input --
[encoding/json.Marshal] is a doc link.
-- markdown --
[encoding/json.Marshal](https://pkg.go.dev/encoding/json#Marshal) is a doc link.

View File

@ -0,0 +1,5 @@
{"DocLinkBaseURL": "https://go.dev/pkg/"}
-- input --
[encoding/json.Marshal] is a doc link, and so is [rsc.io/quote.NonExist].
-- markdown --
[encoding/json.Marshal](https://go.dev/pkg/encoding/json/#Marshal) is a doc link, and so is [rsc.io/quote.NonExist](https://go.dev/pkg/rsc.io/quote/#NonExist).

View File

@ -0,0 +1,4 @@
-- input --
You see more [*bytes.Buffer] than [bytes.Buffer].
-- markdown --
You see more [\*bytes.Buffer](/bytes#Buffer) than [bytes.Buffer](/bytes#Buffer).

View File

@ -25,6 +25,20 @@ func TestTestdata(t *testing.T) {
"italicword": "",
"linkedword": "https://example.com/linkedword",
}
p.LookupPackage = func(name string) (importPath string, ok bool) {
if name == "comment" {
return "go/doc/comment", true
}
return DefaultLookupPackage(name)
}
p.LookupSym = func(recv, name string) (ok bool) {
if recv == "Parser" && name == "Parse" ||
recv == "" && name == "Doc" ||
recv == "" && name == "NoURL" {
return true
}
return false
}
stripDollars := func(b []byte) []byte {
// Remove trailing $ on lines.

View File

@ -69,6 +69,7 @@ func (p *textPrinter) text(out *bytes.Buffer, x []Text) {
// oneLongLine prints the text sequence x to out as one long line,
// without worrying about line wrapping.
// Explicit links have the [ ] dropped to improve readability.
func (p *textPrinter) oneLongLine(out *bytes.Buffer, x []Text) {
for _, t := range x {
switch t := t.(type) {
@ -78,6 +79,8 @@ func (p *textPrinter) oneLongLine(out *bytes.Buffer, x []Text) {
out.WriteString(string(t))
case *Link:
p.oneLongLine(out, t.Text)
case *DocLink:
p.oneLongLine(out, t.Text)
}
}
}