internal/lsp: convert comments to markdown before sending to client

This converts all the comments from golang doc syntax to popper markdown. Fixes #34161 Change-Id: If513100170e7d8c159bfa93b0d1e36d293e9872f GitHub-Last-Rev: 093f82e6ad8567b3e41bcf5daf14e9c0b5832015 GitHub-Pull-Request: golang/tools#165 Reviewed-on: https://go-review.googlesource.com/c/tools/+/197760 Reviewed-by: Rebecca Stambler <rstambler@golang.org> Run-TryBot: Rebecca Stambler <rstambler@golang.org>
2024-11-18 14:14:46 -07:00 · 2019-10-04 17:06:35 +00:00 · 2019-10-04 17:06:35 +00:00 · 27eeabb020
commit 27eeabb020
parent b22818684c
3 changed files with 631 additions and 3 deletions
--- a/internal/lsp/hover.go
+++ b/internal/lsp/hover.go
@ -49,20 +49,33 @@ func (s *Server) toProtocolHoverContents(ctx context.Context, h *source.HoverInf
 	if content.Kind == protocol.Markdown {
 		signature = fmt.Sprintf("```go\n%s\n```", h.Signature)
 	}
+
 	switch options.HoverKind {
 	case source.SingleLine:
-		content.Value = h.SingleLine
+		doc := h.SingleLine
+		if content.Kind == protocol.Markdown {
+			doc = source.CommentToMarkdown(doc)
+		}
+		content.Value = doc
 	case source.NoDocumentation:
 		content.Value = signature
 	case source.SynopsisDocumentation:
 		if h.Synopsis != "" {
-			content.Value = fmt.Sprintf("%s\n%s", h.Synopsis, signature)
+			doc := h.Synopsis
+			if content.Kind == protocol.Markdown {
+				doc = source.CommentToMarkdown(h.Synopsis)
+			}
+			content.Value = fmt.Sprintf("%s\n%s", doc, signature)
 		} else {
 			content.Value = signature
 		}
 	case source.FullDocumentation:
 		if h.FullDocumentation != "" {
-			content.Value = fmt.Sprintf("%s\n%s", signature, h.FullDocumentation)
+			doc := h.FullDocumentation
+			if content.Kind == protocol.Markdown {
+				doc = source.CommentToMarkdown(h.FullDocumentation)
+			}
+			content.Value = fmt.Sprintf("%s\n%s", signature, doc)
 		} else {
 			content.Value = signature
 		}
--- a/internal/lsp/source/comment.go
+++ b/internal/lsp/source/comment.go
@ -0,0 +1,389 @@
+package source
+
+import (
+	"bytes"
+	"io"
+	"regexp"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// CommentToMarkdown converts comment text to formatted markdown.
+// The comment was prepared by DocReader,
+// so it is known not to have leading, trailing blank lines
+// nor to have trailing spaces at the end of lines.
+// The comment markers have already been removed.
+//
+// Each line is converted into a markdown line and empty lines are just converted to
+// newlines. Heading are prefixed with `### ` to make it a markdown heading.
+//
+// A span of indented lines retains a 4 space prefix block, with the common indent
+// prefix removed unless empty, in which case it will be converted to a newline.
+//
+// URLs in the comment text are converted into links.
+func CommentToMarkdown(text string) string {
+	buf := &bytes.Buffer{}
+	commentToMarkdown(buf, text)
+	return buf.String()
+}
+
+var (
+	mdNewline   = []byte("\n")
+	mdHeader    = []byte("### ")
+	mdIndent    = []byte("&nbsp;&nbsp;&nbsp;&nbsp;")
+	mdLinkStart = []byte("[")
+	mdLinkDiv   = []byte("](")
+	mdLinkEnd   = []byte(")")
+)
+
+func commentToMarkdown(w io.Writer, text string) {
+	isFirstLine := true
+	for _, b := range blocks(text) {
+		switch b.op {
+		case opPara:
+			if !isFirstLine {
+				w.Write(mdNewline)
+			}
+
+			for _, line := range b.lines {
+				emphasize(w, line, true)
+			}
+		case opHead:
+			if !isFirstLine {
+				w.Write(mdNewline)
+			}
+			w.Write(mdNewline)
+
+			for _, line := range b.lines {
+				w.Write(mdHeader)
+				commentEscape(w, line, true)
+				w.Write(mdNewline)
+			}
+		case opPre:
+			if !isFirstLine {
+				w.Write(mdNewline)
+			}
+			w.Write(mdNewline)
+
+			for _, line := range b.lines {
+				if isBlank(line) {
+					w.Write(mdNewline)
+				} else {
+					w.Write(mdIndent)
+					w.Write([]byte(line))
+					w.Write(mdNewline)
+				}
+			}
+		}
+		isFirstLine = false
+	}
+}
+
+const (
+	ulquo = "“"
+	urquo = "”"
+)
+
+var (
+	markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`)
+
+	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
+)
+
+// commentEscape escapes comment text for markdown. If nice is set,
+// also turn `` into “; and '' into ”;.
+func commentEscape(w io.Writer, text string, nice bool) {
+	if nice {
+		text = convertQuotes(text)
+	}
+	text = escapeRegex(text)
+	w.Write([]byte(text))
+}
+
+func convertQuotes(text string) string {
+	return unicodeQuoteReplacer.Replace(text)
+}
+
+func escapeRegex(text string) string {
+	return markdownEscape.ReplaceAllString(text, `\$1`)
+}
+
+func emphasize(w io.Writer, line string, nice bool) {
+	for {
+		m := matchRx.FindStringSubmatchIndex(line)
+		if m == nil {
+			break
+		}
+		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
+
+		// write text before match
+		commentEscape(w, line[0:m[0]], nice)
+
+		// adjust match for URLs
+		match := line[m[0]:m[1]]
+		if strings.Contains(match, "://") {
+			m0, m1 := m[0], m[1]
+			for _, s := range []string{"()", "{}", "[]"} {
+				open, close := s[:1], s[1:] // E.g., "(" and ")"
+				// require opening parentheses before closing parentheses (#22285)
+				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
+					m1 = m0 + i
+					match = line[m0:m1]
+				}
+				// require balanced pairs of parentheses (#5043)
+				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
+					m1 = strings.LastIndexAny(line[:m1], s)
+					match = line[m0:m1]
+				}
+			}
+			if m1 != m[1] {
+				// redo matching with shortened line for correct indices
+				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
+			}
+		}
+
+		// Following code has been modified from go/doc since words is always
+		// nil. All html formatting has also been transformed into markdown formatting
+
+		// analyze match
+		url := ""
+		if m[2] >= 0 {
+			url = match
+		}
+
+		// write match
+		if len(url) > 0 {
+			w.Write(mdLinkStart)
+		}
+
+		commentEscape(w, match, nice)
+
+		if len(url) > 0 {
+			w.Write(mdLinkDiv)
+			w.Write([]byte(urlReplacer.Replace(url)))
+			w.Write(mdLinkEnd)
+		}
+
+		// advance
+		line = line[m[1]:]
+	}
+	commentEscape(w, line, nice)
+}
+
+// Everything from here on is a copy of go/doc/comment.go
+
+const (
+	// Regexp for Go identifiers
+	identRx = `[\pL_][\pL_0-9]*`
+
+	// Regexp for URLs
+	// Match parens, and check later for balance - see #5043, #22285
+	// Match .,:;?! within path, but not at end - see #18139, #16565
+	// This excludes some rare yet valid urls ending in common punctuation
+	// in order to allow sentences ending in URLs.
+
+	// protocol (required) e.g. http
+	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
+	// host (required) e.g. www.example.com or [::1]:8080
+	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
+	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
+	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
+
+	urlRx = protoPart + `://` + hostPart + pathPart
+)
+
+var (
+	matchRx     = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
+	urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
+)
+
+func indentLen(s string) int {
+	i := 0
+	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
+		i++
+	}
+	return i
+}
+
+func isBlank(s string) bool {
+	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
+}
+
+func commonPrefix(a, b string) string {
+	i := 0
+	for i < len(a) && i < len(b) && a[i] == b[i] {
+		i++
+	}
+	return a[0:i]
+}
+
+func unindent(block []string) {
+	if len(block) == 0 {
+		return
+	}
+
+	// compute maximum common white prefix
+	prefix := block[0][0:indentLen(block[0])]
+	for _, line := range block {
+		if !isBlank(line) {
+			prefix = commonPrefix(prefix, line[0:indentLen(line)])
+		}
+	}
+	n := len(prefix)
+
+	// remove
+	for i, line := range block {
+		if !isBlank(line) {
+			block[i] = line[n:]
+		}
+	}
+}
+
+// heading returns the trimmed line if it passes as a section heading;
+// otherwise it returns the empty string.
+func heading(line string) string {
+	line = strings.TrimSpace(line)
+	if len(line) == 0 {
+		return ""
+	}
+
+	// a heading must start with an uppercase letter
+	r, _ := utf8.DecodeRuneInString(line)
+	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
+		return ""
+	}
+
+	// it must end in a letter or digit:
+	r, _ = utf8.DecodeLastRuneInString(line)
+	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
+		return ""
+	}
+
+	// exclude lines with illegal characters. we allow "(),"
+	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
+		return ""
+	}
+
+	// allow "'" for possessive "'s" only
+	for b := line; ; {
+		i := strings.IndexRune(b, '\'')
+		if i < 0 {
+			break
+		}
+		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
+			return "" // not followed by "s "
+		}
+		b = b[i+2:]
+	}
+
+	// allow "." when followed by non-space
+	for b := line; ; {
+		i := strings.IndexRune(b, '.')
+		if i < 0 {
+			break
+		}
+		if i+1 >= len(b) || b[i+1] == ' ' {
+			return "" // not followed by non-space
+		}
+		b = b[i+1:]
+	}
+
+	return line
+}
+
+type op int
+
+const (
+	opPara op = iota
+	opHead
+	opPre
+)
+
+type block struct {
+	op    op
+	lines []string
+}
+
+var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
+
+func anchorID(line string) string {
+	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
+	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
+}
+
+func blocks(text string) []block {
+	var (
+		out  []block
+		para []string
+
+		lastWasBlank   = false
+		lastWasHeading = false
+	)
+
+	close := func() {
+		if para != nil {
+			out = append(out, block{opPara, para})
+			para = nil
+		}
+	}
+
+	lines := strings.SplitAfter(text, "\n")
+	unindent(lines)
+	for i := 0; i < len(lines); {
+		line := lines[i]
+		if isBlank(line) {
+			// close paragraph
+			close()
+			i++
+			lastWasBlank = true
+			continue
+		}
+		if indentLen(line) > 0 {
+			// close paragraph
+			close()
+
+			// count indented or blank lines
+			j := i + 1
+			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
+				j++
+			}
+			// but not trailing blank lines
+			for j > i && isBlank(lines[j-1]) {
+				j--
+			}
+			pre := lines[i:j]
+			i = j
+
+			unindent(pre)
+
+			// put those lines in a pre block
+			out = append(out, block{opPre, pre})
+			lastWasHeading = false
+			continue
+		}
+
+		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
+			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
+			// current line is non-blank, surrounded by blank lines
+			// and the next non-blank line is not indented: this
+			// might be a heading.
+			if head := heading(line); head != "" {
+				close()
+				out = append(out, block{opHead, []string{head}})
+				i += 2
+				lastWasHeading = true
+				continue
+			}
+		}
+
+		// open paragraph
+		lastWasBlank = false
+		lastWasHeading = false
+		para = append(para, lines[i])
+		i++
+	}
+	close()
+
+	return out
+}
--- a/internal/lsp/source/comment_test.go
+++ b/internal/lsp/source/comment_test.go
@ -0,0 +1,226 @@
+package source
+
+import (
+	"bytes"
+	"reflect"
+	"strings"
+	"testing"
+)
+
+// This file is a copy of go/doc/comment_test.go with the exception for
+// the test cases for TestEmphasize and TestCommentEscape
+
+var headingTests = []struct {
+	line string
+	ok   bool
+}{
+	{"Section", true},
+	{"A typical usage", true},
+	{"ΔΛΞ is Greek", true},
+	{"Foo 42", true},
+	{"", false},
+	{"section", false},
+	{"A typical usage:", false},
+	{"This code:", false},
+	{"δ is Greek", false},
+	{"Foo §", false},
+	{"Fermat's Last Sentence", true},
+	{"Fermat's", true},
+	{"'sX", false},
+	{"Ted 'Too' Bar", false},
+	{"Use n+m", false},
+	{"Scanning:", false},
+	{"N:M", false},
+}
+
+func TestIsHeading(t *testing.T) {
+	for _, tt := range headingTests {
+		if h := heading(tt.line); (len(h) > 0) != tt.ok {
+			t.Errorf("isHeading(%q) = %v, want %v", tt.line, h, tt.ok)
+		}
+	}
+}
+
+var blocksTests = []struct {
+	in   string
+	out  []block
+	text string
+}{
+	{
+		in: `Para 1.
+Para 1 line 2.
+
+Para 2.
+
+Section
+
+Para 3.
+
+	pre
+	pre1
+
+Para 4.
+
+	pre
+	pre1
+
+	pre2
+
+Para 5.
+
+
+	pre
+
+
+	pre1
+	pre2
+
+Para 6.
+	pre
+	pre2
+`,
+		out: []block{
+			{opPara, []string{"Para 1.\n", "Para 1 line 2.\n"}},
+			{opPara, []string{"Para 2.\n"}},
+			{opHead, []string{"Section"}},
+			{opPara, []string{"Para 3.\n"}},
+			{opPre, []string{"pre\n", "pre1\n"}},
+			{opPara, []string{"Para 4.\n"}},
+			{opPre, []string{"pre\n", "pre1\n", "\n", "pre2\n"}},
+			{opPara, []string{"Para 5.\n"}},
+			{opPre, []string{"pre\n", "\n", "\n", "pre1\n", "pre2\n"}},
+			{opPara, []string{"Para 6.\n"}},
+			{opPre, []string{"pre\n", "pre2\n"}},
+		},
+		text: `.   Para 1. Para 1 line 2.
+
+.   Para 2.
+
+
+.   Section
+
+.   Para 3.
+
+$	pre
+$	pre1
+
+.   Para 4.
+
+$	pre
+$	pre1
+
+$	pre2
+
+.   Para 5.
+
+$	pre
+
+
+$	pre1
+$	pre2
+
+.   Para 6.
+
+$	pre
+$	pre2
+`,
+	},
+	{
+		in: "Para.\n\tshould not be ``escaped''",
+		out: []block{
+			{opPara, []string{"Para.\n"}},
+			{opPre, []string{"should not be ``escaped''"}},
+		},
+		text: ".   Para.\n\n$	should not be ``escaped''",
+	},
+	{
+		in: "// A very long line of 46 char for line wrapping.",
+		out: []block{
+			{opPara, []string{"// A very long line of 46 char for line wrapping."}},
+		},
+		text: `.   // A very long line of 46 char for line
+.   // wrapping.
+`,
+	},
+	{
+		in: `/* A very long line of 46 char for line wrapping.
+A very long line of 46 char for line wrapping. */`,
+		out: []block{
+			{opPara, []string{"/* A very long line of 46 char for line wrapping.\n", "A very long line of 46 char for line wrapping. */"}},
+		},
+		text: `.   /* A very long line of 46 char for line
+.   wrapping. A very long line of 46 char
+.   for line wrapping. */
+`,
+	},
+}
+
+func TestBlocks(t *testing.T) {
+	for i, tt := range blocksTests {
+		b := blocks(tt.in)
+		if !reflect.DeepEqual(b, tt.out) {
+			t.Errorf("#%d: mismatch\nhave: %v\nwant: %v", i, b, tt.out)
+		}
+	}
+}
+
+// This has been modified from go/doc to use markdown links instead of html ones
+// and use markdown escaping instead oh html
+var emphasizeTests = []struct {
+	in, out string
+}{
+	{"", ""},
+	{"http://[::1]:8080/foo.txt", `[http\:\/\/\[\:\:1\]\:8080\/foo\.txt](http://[::1]:8080/foo.txt)`},
+	{"before (https://www.google.com) after", `before \([https\:\/\/www\.google\.com](https://www.google.com)\) after`},
+	{"before https://www.google.com:30/x/y/z:b::c. After", `before [https\:\/\/www\.google\.com\:30\/x\/y\/z\:b\:\:c](https://www.google.com:30/x/y/z:b::c)\. After`},
+	{"http://www.google.com/path/:;!-/?query=%34b#093124", `[http\:\/\/www\.google\.com\/path\/\:\;\!\-\/\?query\=\%34b\#093124](http://www.google.com/path/:;!-/?query=%34b#093124)`},
+	{"http://www.google.com/path/:;!-/?query=%34bar#093124", `[http\:\/\/www\.google\.com\/path\/\:\;\!\-\/\?query\=\%34bar\#093124](http://www.google.com/path/:;!-/?query=%34bar#093124)`},
+	{"http://www.google.com/index.html! After", `[http\:\/\/www\.google\.com\/index\.html](http://www.google.com/index.html)\! After`},
+	{"http://www.google.com/", `[http\:\/\/www\.google\.com\/](http://www.google.com/)`},
+	{"https://www.google.com/", `[https\:\/\/www\.google\.com\/](https://www.google.com/)`},
+	{"http://www.google.com/path.", `[http\:\/\/www\.google\.com\/path](http://www.google.com/path)\.`},
+	{"http://en.wikipedia.org/wiki/Camellia_(cipher)", `[http\:\/\/en\.wikipedia\.org\/wiki\/Camellia\_\(cipher\)](http://en.wikipedia.org/wiki/Camellia_\(cipher\))`},
+	{"(http://www.google.com/)", `\([http\:\/\/www\.google\.com\/](http://www.google.com/)\)`},
+	{"http://gmail.com)", `[http\:\/\/gmail\.com](http://gmail.com)\)`},
+	{"((http://gmail.com))", `\(\([http\:\/\/gmail\.com](http://gmail.com)\)\)`},
+	{"http://gmail.com ((http://gmail.com)) ()", `[http\:\/\/gmail\.com](http://gmail.com) \(\([http\:\/\/gmail\.com](http://gmail.com)\)\) \(\)`},
+	{"Foo bar http://example.com/ quux!", `Foo bar [http\:\/\/example\.com\/](http://example.com/) quux\!`},
+	{"Hello http://example.com/%2f/ /world.", `Hello [http\:\/\/example\.com\/\%2f\/](http://example.com/%2f/) \/world\.`},
+	{"Lorem http: ipsum //host/path", `Lorem http\: ipsum \/\/host\/path`},
+	{"javascript://is/not/linked", `javascript\:\/\/is\/not\/linked`},
+	{"http://foo", `[http\:\/\/foo](http://foo)`},
+	{"art by [[https://www.example.com/person/][Person Name]]", `art by \[\[[https\:\/\/www\.example\.com\/person\/](https://www.example.com/person/)\]\[Person Name\]\]`},
+	{"please visit (http://golang.org/)", `please visit \([http\:\/\/golang\.org\/](http://golang.org/)\)`},
+	{"please visit http://golang.org/hello())", `please visit [http\:\/\/golang\.org\/hello\(\)](http://golang.org/hello\(\))\)`},
+	{"http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD", `[http\:\/\/git\.qemu\.org\/\?p\=qemu\.git\;a\=blob\;f\=qapi\-schema\.json\;hb\=HEAD](http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD)`},
+	{"https://foo.bar/bal/x(])", `[https\:\/\/foo\.bar\/bal\/x\(](https://foo.bar/bal/x\()\]\)`},
+	{"foo [ http://bar(])", `foo \[ [http\:\/\/bar\(](http://bar\()\]\)`},
+}
+
+func TestEmphasize(t *testing.T) {
+	for i, tt := range emphasizeTests {
+		var buf bytes.Buffer
+		emphasize(&buf, tt.in, true)
+		out := buf.String()
+		if out != tt.out {
+			t.Errorf("#%d: mismatch\nhave: %v\nwant: %v", i, out, tt.out)
+		}
+	}
+}
+
+func TestCommentEscape(t *testing.T) {
+	//ldquo -> ulquo and rdquo -> urquo
+	commentTests := []struct {
+		in, out string
+	}{
+		{"typically invoked as ``go tool asm'',", "typically invoked as " + ulquo + "go tool asm" + urquo + ","},
+		{"For more detail, run ``go help test'' and ``go help testflag''", "For more detail, run " + ulquo + "go help test" + urquo + " and " + ulquo + "go help testflag" + urquo}}
+	for i, tt := range commentTests {
+		var buf strings.Builder
+		commentEscape(&buf, tt.in, true)
+		out := buf.String()
+		if out != tt.out {
+			t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out)
+		}
+	}
+}