1
0
mirror of https://github.com/golang/go synced 2024-11-24 05:10:19 -07:00

go/doc: allow : in godoc links

The emphasize function used a complex regexp to find URLs, which
truncated some types of URL and did not match others.
This has been simplified and adjusted to allow valid punctuation
like :: or ! in the path part and :[] in the host part.
Comments were added to clarify what this regexp allows.
The path part matches query and fragment also so document this.
Removed news, telnet, wais, and prospero protocols.

Tests were added for:
 IPV6 URLs
 URLs surrounded by brackets
 URLs containing ::
 URLs containing :;!- in the path

In order to allow punctuation and yet preserve current behaviour,
URLs are not permitted to end in .,:;?! to allow the use of
normal punctuation surrounding URLs in comments.

Fixes #18139

Change-Id: I38b2d7a85fe0d171e4bf4aac420f8c2d3ced8a2f
Reviewed-on: https://go-review.googlesource.com/37192
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Kenny Grant 2016-12-18 07:24:58 +00:00 committed by Brad Fitzpatrick
parent a4a3d63dbe
commit 497b608fab
2 changed files with 19 additions and 6 deletions

View File

@ -48,12 +48,19 @@ const (
identRx = `[\pL_][\pL_0-9]*`
// Regexp for URLs
protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
hostPart = `[a-zA-Z0-9_@\-]+`
filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
urlRx = `(` + protocol + `)://` + // http://
hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
filePart + `([:.,;]` + filePart + `)*`
// Match parens, and check in pairedParensPrefixLen for balance - see #5043
// Match .,:;?! within path, but not at end - see #18139, #16565
// This excludes some rare yet valid urls ending in common punctuation
// in order to allow sentences ending in URLs.
// protocol (required) e.g. http
protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
// host (required) e.g. www.example.com or [::1]:8080
hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
urlRx = protoPart + `://` + hostPart + pathPart
)
var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)

View File

@ -150,6 +150,12 @@ func TestToText(t *testing.T) {
var emphasizeTests = []struct {
in, out string
}{
{"http://[::1]:8080/foo.txt", `<a href="http://[::1]:8080/foo.txt">http://[::1]:8080/foo.txt</a>`},
{"before (https://www.google.com) after", `before (<a href="https://www.google.com">https://www.google.com</a>) after`},
{"before https://www.google.com:30/x/y/z:b::c. After", `before <a href="https://www.google.com:30/x/y/z:b::c">https://www.google.com:30/x/y/z:b::c</a>. After`},
{"http://www.google.com/path/:;!-/?query=%34b#093124", `<a href="http://www.google.com/path/:;!-/?query=%34b#093124">http://www.google.com/path/:;!-/?query=%34b#093124</a>`},
{"http://www.google.com/path/:;!-/?query=%34bar#093124", `<a href="http://www.google.com/path/:;!-/?query=%34bar#093124">http://www.google.com/path/:;!-/?query=%34bar#093124</a>`},
{"http://www.google.com/index.html! After", `<a href="http://www.google.com/index.html">http://www.google.com/index.html</a>! After`},
{"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`},
{"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`},
{"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`},