From 27eeabb020795fa81d7dce3766c9028f32a0e5dc Mon Sep 17 00:00:00 2001 From: jaap aarts Date: Fri, 4 Oct 2019 17:06:35 +0000 Subject: [PATCH] internal/lsp: convert comments to markdown before sending to client This converts all the comments from golang doc syntax to popper markdown. Fixes #34161 Change-Id: If513100170e7d8c159bfa93b0d1e36d293e9872f GitHub-Last-Rev: 093f82e6ad8567b3e41bcf5daf14e9c0b5832015 GitHub-Pull-Request: golang/tools#165 Reviewed-on: https://go-review.googlesource.com/c/tools/+/197760 Reviewed-by: Rebecca Stambler Run-TryBot: Rebecca Stambler --- internal/lsp/hover.go | 19 +- internal/lsp/source/comment.go | 389 ++++++++++++++++++++++++++++ internal/lsp/source/comment_test.go | 226 ++++++++++++++++ 3 files changed, 631 insertions(+), 3 deletions(-) create mode 100644 internal/lsp/source/comment.go create mode 100644 internal/lsp/source/comment_test.go diff --git a/internal/lsp/hover.go b/internal/lsp/hover.go index ae51f46596..ddf1a32bbb 100644 --- a/internal/lsp/hover.go +++ b/internal/lsp/hover.go @@ -49,20 +49,33 @@ func (s *Server) toProtocolHoverContents(ctx context.Context, h *source.HoverInf if content.Kind == protocol.Markdown { signature = fmt.Sprintf("```go\n%s\n```", h.Signature) } + switch options.HoverKind { case source.SingleLine: - content.Value = h.SingleLine + doc := h.SingleLine + if content.Kind == protocol.Markdown { + doc = source.CommentToMarkdown(doc) + } + content.Value = doc case source.NoDocumentation: content.Value = signature case source.SynopsisDocumentation: if h.Synopsis != "" { - content.Value = fmt.Sprintf("%s\n%s", h.Synopsis, signature) + doc := h.Synopsis + if content.Kind == protocol.Markdown { + doc = source.CommentToMarkdown(h.Synopsis) + } + content.Value = fmt.Sprintf("%s\n%s", doc, signature) } else { content.Value = signature } case source.FullDocumentation: if h.FullDocumentation != "" { - content.Value = fmt.Sprintf("%s\n%s", signature, h.FullDocumentation) + doc := h.FullDocumentation + if content.Kind == protocol.Markdown { + doc = source.CommentToMarkdown(h.FullDocumentation) + } + content.Value = fmt.Sprintf("%s\n%s", signature, doc) } else { content.Value = signature } diff --git a/internal/lsp/source/comment.go b/internal/lsp/source/comment.go new file mode 100644 index 0000000000..42458cc2af --- /dev/null +++ b/internal/lsp/source/comment.go @@ -0,0 +1,389 @@ +package source + +import ( + "bytes" + "io" + "regexp" + "strings" + "unicode" + "unicode/utf8" +) + +// CommentToMarkdown converts comment text to formatted markdown. +// The comment was prepared by DocReader, +// so it is known not to have leading, trailing blank lines +// nor to have trailing spaces at the end of lines. +// The comment markers have already been removed. +// +// Each line is converted into a markdown line and empty lines are just converted to +// newlines. Heading are prefixed with `### ` to make it a markdown heading. +// +// A span of indented lines retains a 4 space prefix block, with the common indent +// prefix removed unless empty, in which case it will be converted to a newline. +// +// URLs in the comment text are converted into links. +func CommentToMarkdown(text string) string { + buf := &bytes.Buffer{} + commentToMarkdown(buf, text) + return buf.String() +} + +var ( + mdNewline = []byte("\n") + mdHeader = []byte("### ") + mdIndent = []byte("    ") + mdLinkStart = []byte("[") + mdLinkDiv = []byte("](") + mdLinkEnd = []byte(")") +) + +func commentToMarkdown(w io.Writer, text string) { + isFirstLine := true + for _, b := range blocks(text) { + switch b.op { + case opPara: + if !isFirstLine { + w.Write(mdNewline) + } + + for _, line := range b.lines { + emphasize(w, line, true) + } + case opHead: + if !isFirstLine { + w.Write(mdNewline) + } + w.Write(mdNewline) + + for _, line := range b.lines { + w.Write(mdHeader) + commentEscape(w, line, true) + w.Write(mdNewline) + } + case opPre: + if !isFirstLine { + w.Write(mdNewline) + } + w.Write(mdNewline) + + for _, line := range b.lines { + if isBlank(line) { + w.Write(mdNewline) + } else { + w.Write(mdIndent) + w.Write([]byte(line)) + w.Write(mdNewline) + } + } + } + isFirstLine = false + } +} + +const ( + ulquo = "“" + urquo = "”" +) + +var ( + markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`) + + unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo) +) + +// commentEscape escapes comment text for markdown. If nice is set, +// also turn `` into “; and '' into ”;. +func commentEscape(w io.Writer, text string, nice bool) { + if nice { + text = convertQuotes(text) + } + text = escapeRegex(text) + w.Write([]byte(text)) +} + +func convertQuotes(text string) string { + return unicodeQuoteReplacer.Replace(text) +} + +func escapeRegex(text string) string { + return markdownEscape.ReplaceAllString(text, `\$1`) +} + +func emphasize(w io.Writer, line string, nice bool) { + for { + m := matchRx.FindStringSubmatchIndex(line) + if m == nil { + break + } + // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx) + + // write text before match + commentEscape(w, line[0:m[0]], nice) + + // adjust match for URLs + match := line[m[0]:m[1]] + if strings.Contains(match, "://") { + m0, m1 := m[0], m[1] + for _, s := range []string{"()", "{}", "[]"} { + open, close := s[:1], s[1:] // E.g., "(" and ")" + // require opening parentheses before closing parentheses (#22285) + if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) { + m1 = m0 + i + match = line[m0:m1] + } + // require balanced pairs of parentheses (#5043) + for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ { + m1 = strings.LastIndexAny(line[:m1], s) + match = line[m0:m1] + } + } + if m1 != m[1] { + // redo matching with shortened line for correct indices + m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)]) + } + } + + // Following code has been modified from go/doc since words is always + // nil. All html formatting has also been transformed into markdown formatting + + // analyze match + url := "" + if m[2] >= 0 { + url = match + } + + // write match + if len(url) > 0 { + w.Write(mdLinkStart) + } + + commentEscape(w, match, nice) + + if len(url) > 0 { + w.Write(mdLinkDiv) + w.Write([]byte(urlReplacer.Replace(url))) + w.Write(mdLinkEnd) + } + + // advance + line = line[m[1]:] + } + commentEscape(w, line, nice) +} + +// Everything from here on is a copy of go/doc/comment.go + +const ( + // Regexp for Go identifiers + identRx = `[\pL_][\pL_0-9]*` + + // Regexp for URLs + // Match parens, and check later for balance - see #5043, #22285 + // Match .,:;?! within path, but not at end - see #18139, #16565 + // This excludes some rare yet valid urls ending in common punctuation + // in order to allow sentences ending in URLs. + + // protocol (required) e.g. http + protoPart = `(https?|ftp|file|gopher|mailto|nntp)` + // host (required) e.g. www.example.com or [::1]:8080 + hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` + // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar + pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` + + urlRx = protoPart + `://` + hostPart + pathPart +) + +var ( + matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) + urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`) +) + +func indentLen(s string) int { + i := 0 + for i < len(s) && (s[i] == ' ' || s[i] == '\t') { + i++ + } + return i +} + +func isBlank(s string) bool { + return len(s) == 0 || (len(s) == 1 && s[0] == '\n') +} + +func commonPrefix(a, b string) string { + i := 0 + for i < len(a) && i < len(b) && a[i] == b[i] { + i++ + } + return a[0:i] +} + +func unindent(block []string) { + if len(block) == 0 { + return + } + + // compute maximum common white prefix + prefix := block[0][0:indentLen(block[0])] + for _, line := range block { + if !isBlank(line) { + prefix = commonPrefix(prefix, line[0:indentLen(line)]) + } + } + n := len(prefix) + + // remove + for i, line := range block { + if !isBlank(line) { + block[i] = line[n:] + } + } +} + +// heading returns the trimmed line if it passes as a section heading; +// otherwise it returns the empty string. +func heading(line string) string { + line = strings.TrimSpace(line) + if len(line) == 0 { + return "" + } + + // a heading must start with an uppercase letter + r, _ := utf8.DecodeRuneInString(line) + if !unicode.IsLetter(r) || !unicode.IsUpper(r) { + return "" + } + + // it must end in a letter or digit: + r, _ = utf8.DecodeLastRuneInString(line) + if !unicode.IsLetter(r) && !unicode.IsDigit(r) { + return "" + } + + // exclude lines with illegal characters. we allow "()," + if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { + return "" + } + + // allow "'" for possessive "'s" only + for b := line; ; { + i := strings.IndexRune(b, '\'') + if i < 0 { + break + } + if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') { + return "" // not followed by "s " + } + b = b[i+2:] + } + + // allow "." when followed by non-space + for b := line; ; { + i := strings.IndexRune(b, '.') + if i < 0 { + break + } + if i+1 >= len(b) || b[i+1] == ' ' { + return "" // not followed by non-space + } + b = b[i+1:] + } + + return line +} + +type op int + +const ( + opPara op = iota + opHead + opPre +) + +type block struct { + op op + lines []string +} + +var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`) + +func anchorID(line string) string { + // Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols. + return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_") +} + +func blocks(text string) []block { + var ( + out []block + para []string + + lastWasBlank = false + lastWasHeading = false + ) + + close := func() { + if para != nil { + out = append(out, block{opPara, para}) + para = nil + } + } + + lines := strings.SplitAfter(text, "\n") + unindent(lines) + for i := 0; i < len(lines); { + line := lines[i] + if isBlank(line) { + // close paragraph + close() + i++ + lastWasBlank = true + continue + } + if indentLen(line) > 0 { + // close paragraph + close() + + // count indented or blank lines + j := i + 1 + for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) { + j++ + } + // but not trailing blank lines + for j > i && isBlank(lines[j-1]) { + j-- + } + pre := lines[i:j] + i = j + + unindent(pre) + + // put those lines in a pre block + out = append(out, block{opPre, pre}) + lastWasHeading = false + continue + } + + if lastWasBlank && !lastWasHeading && i+2 < len(lines) && + isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 { + // current line is non-blank, surrounded by blank lines + // and the next non-blank line is not indented: this + // might be a heading. + if head := heading(line); head != "" { + close() + out = append(out, block{opHead, []string{head}}) + i += 2 + lastWasHeading = true + continue + } + } + + // open paragraph + lastWasBlank = false + lastWasHeading = false + para = append(para, lines[i]) + i++ + } + close() + + return out +} diff --git a/internal/lsp/source/comment_test.go b/internal/lsp/source/comment_test.go new file mode 100644 index 0000000000..cc37464f8a --- /dev/null +++ b/internal/lsp/source/comment_test.go @@ -0,0 +1,226 @@ +package source + +import ( + "bytes" + "reflect" + "strings" + "testing" +) + +// This file is a copy of go/doc/comment_test.go with the exception for +// the test cases for TestEmphasize and TestCommentEscape + +var headingTests = []struct { + line string + ok bool +}{ + {"Section", true}, + {"A typical usage", true}, + {"ΔΛΞ is Greek", true}, + {"Foo 42", true}, + {"", false}, + {"section", false}, + {"A typical usage:", false}, + {"This code:", false}, + {"δ is Greek", false}, + {"Foo §", false}, + {"Fermat's Last Sentence", true}, + {"Fermat's", true}, + {"'sX", false}, + {"Ted 'Too' Bar", false}, + {"Use n+m", false}, + {"Scanning:", false}, + {"N:M", false}, +} + +func TestIsHeading(t *testing.T) { + for _, tt := range headingTests { + if h := heading(tt.line); (len(h) > 0) != tt.ok { + t.Errorf("isHeading(%q) = %v, want %v", tt.line, h, tt.ok) + } + } +} + +var blocksTests = []struct { + in string + out []block + text string +}{ + { + in: `Para 1. +Para 1 line 2. + +Para 2. + +Section + +Para 3. + + pre + pre1 + +Para 4. + + pre + pre1 + + pre2 + +Para 5. + + + pre + + + pre1 + pre2 + +Para 6. + pre + pre2 +`, + out: []block{ + {opPara, []string{"Para 1.\n", "Para 1 line 2.\n"}}, + {opPara, []string{"Para 2.\n"}}, + {opHead, []string{"Section"}}, + {opPara, []string{"Para 3.\n"}}, + {opPre, []string{"pre\n", "pre1\n"}}, + {opPara, []string{"Para 4.\n"}}, + {opPre, []string{"pre\n", "pre1\n", "\n", "pre2\n"}}, + {opPara, []string{"Para 5.\n"}}, + {opPre, []string{"pre\n", "\n", "\n", "pre1\n", "pre2\n"}}, + {opPara, []string{"Para 6.\n"}}, + {opPre, []string{"pre\n", "pre2\n"}}, + }, + text: `. Para 1. Para 1 line 2. + +. Para 2. + + +. Section + +. Para 3. + +$ pre +$ pre1 + +. Para 4. + +$ pre +$ pre1 + +$ pre2 + +. Para 5. + +$ pre + + +$ pre1 +$ pre2 + +. Para 6. + +$ pre +$ pre2 +`, + }, + { + in: "Para.\n\tshould not be ``escaped''", + out: []block{ + {opPara, []string{"Para.\n"}}, + {opPre, []string{"should not be ``escaped''"}}, + }, + text: ". Para.\n\n$ should not be ``escaped''", + }, + { + in: "// A very long line of 46 char for line wrapping.", + out: []block{ + {opPara, []string{"// A very long line of 46 char for line wrapping."}}, + }, + text: `. // A very long line of 46 char for line +. // wrapping. +`, + }, + { + in: `/* A very long line of 46 char for line wrapping. +A very long line of 46 char for line wrapping. */`, + out: []block{ + {opPara, []string{"/* A very long line of 46 char for line wrapping.\n", "A very long line of 46 char for line wrapping. */"}}, + }, + text: `. /* A very long line of 46 char for line +. wrapping. A very long line of 46 char +. for line wrapping. */ +`, + }, +} + +func TestBlocks(t *testing.T) { + for i, tt := range blocksTests { + b := blocks(tt.in) + if !reflect.DeepEqual(b, tt.out) { + t.Errorf("#%d: mismatch\nhave: %v\nwant: %v", i, b, tt.out) + } + } +} + +// This has been modified from go/doc to use markdown links instead of html ones +// and use markdown escaping instead oh html +var emphasizeTests = []struct { + in, out string +}{ + {"", ""}, + {"http://[::1]:8080/foo.txt", `[http\:\/\/\[\:\:1\]\:8080\/foo\.txt](http://[::1]:8080/foo.txt)`}, + {"before (https://www.google.com) after", `before \([https\:\/\/www\.google\.com](https://www.google.com)\) after`}, + {"before https://www.google.com:30/x/y/z:b::c. After", `before [https\:\/\/www\.google\.com\:30\/x\/y\/z\:b\:\:c](https://www.google.com:30/x/y/z:b::c)\. After`}, + {"http://www.google.com/path/:;!-/?query=%34b#093124", `[http\:\/\/www\.google\.com\/path\/\:\;\!\-\/\?query\=\%34b\#093124](http://www.google.com/path/:;!-/?query=%34b#093124)`}, + {"http://www.google.com/path/:;!-/?query=%34bar#093124", `[http\:\/\/www\.google\.com\/path\/\:\;\!\-\/\?query\=\%34bar\#093124](http://www.google.com/path/:;!-/?query=%34bar#093124)`}, + {"http://www.google.com/index.html! After", `[http\:\/\/www\.google\.com\/index\.html](http://www.google.com/index.html)\! After`}, + {"http://www.google.com/", `[http\:\/\/www\.google\.com\/](http://www.google.com/)`}, + {"https://www.google.com/", `[https\:\/\/www\.google\.com\/](https://www.google.com/)`}, + {"http://www.google.com/path.", `[http\:\/\/www\.google\.com\/path](http://www.google.com/path)\.`}, + {"http://en.wikipedia.org/wiki/Camellia_(cipher)", `[http\:\/\/en\.wikipedia\.org\/wiki\/Camellia\_\(cipher\)](http://en.wikipedia.org/wiki/Camellia_\(cipher\))`}, + {"(http://www.google.com/)", `\([http\:\/\/www\.google\.com\/](http://www.google.com/)\)`}, + {"http://gmail.com)", `[http\:\/\/gmail\.com](http://gmail.com)\)`}, + {"((http://gmail.com))", `\(\([http\:\/\/gmail\.com](http://gmail.com)\)\)`}, + {"http://gmail.com ((http://gmail.com)) ()", `[http\:\/\/gmail\.com](http://gmail.com) \(\([http\:\/\/gmail\.com](http://gmail.com)\)\) \(\)`}, + {"Foo bar http://example.com/ quux!", `Foo bar [http\:\/\/example\.com\/](http://example.com/) quux\!`}, + {"Hello http://example.com/%2f/ /world.", `Hello [http\:\/\/example\.com\/\%2f\/](http://example.com/%2f/) \/world\.`}, + {"Lorem http: ipsum //host/path", `Lorem http\: ipsum \/\/host\/path`}, + {"javascript://is/not/linked", `javascript\:\/\/is\/not\/linked`}, + {"http://foo", `[http\:\/\/foo](http://foo)`}, + {"art by [[https://www.example.com/person/][Person Name]]", `art by \[\[[https\:\/\/www\.example\.com\/person\/](https://www.example.com/person/)\]\[Person Name\]\]`}, + {"please visit (http://golang.org/)", `please visit \([http\:\/\/golang\.org\/](http://golang.org/)\)`}, + {"please visit http://golang.org/hello())", `please visit [http\:\/\/golang\.org\/hello\(\)](http://golang.org/hello\(\))\)`}, + {"http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD", `[http\:\/\/git\.qemu\.org\/\?p\=qemu\.git\;a\=blob\;f\=qapi\-schema\.json\;hb\=HEAD](http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD)`}, + {"https://foo.bar/bal/x(])", `[https\:\/\/foo\.bar\/bal\/x\(](https://foo.bar/bal/x\()\]\)`}, + {"foo [ http://bar(])", `foo \[ [http\:\/\/bar\(](http://bar\()\]\)`}, +} + +func TestEmphasize(t *testing.T) { + for i, tt := range emphasizeTests { + var buf bytes.Buffer + emphasize(&buf, tt.in, true) + out := buf.String() + if out != tt.out { + t.Errorf("#%d: mismatch\nhave: %v\nwant: %v", i, out, tt.out) + } + } +} + +func TestCommentEscape(t *testing.T) { + //ldquo -> ulquo and rdquo -> urquo + commentTests := []struct { + in, out string + }{ + {"typically invoked as ``go tool asm'',", "typically invoked as " + ulquo + "go tool asm" + urquo + ","}, + {"For more detail, run ``go help test'' and ``go help testflag''", "For more detail, run " + ulquo + "go help test" + urquo + " and " + ulquo + "go help testflag" + urquo}} + for i, tt := range commentTests { + var buf strings.Builder + commentEscape(&buf, tt.in, true) + out := buf.String() + if out != tt.out { + t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out) + } + } +}