mirror of
https://github.com/golang/go
synced 2024-11-24 05:10:19 -07:00
go/doc: allow : in godoc links
The emphasize function used a complex regexp to find URLs, which truncated some types of URL and did not match others. This has been simplified and adjusted to allow valid punctuation like :: or ! in the path part and :[] in the host part. Comments were added to clarify what this regexp allows. The path part matches query and fragment also so document this. Removed news, telnet, wais, and prospero protocols. Tests were added for: IPV6 URLs URLs surrounded by brackets URLs containing :: URLs containing :;!- in the path In order to allow punctuation and yet preserve current behaviour, URLs are not permitted to end in .,:;?! to allow the use of normal punctuation surrounding URLs in comments. Fixes #18139 Change-Id: I38b2d7a85fe0d171e4bf4aac420f8c2d3ced8a2f Reviewed-on: https://go-review.googlesource.com/37192 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
a4a3d63dbe
commit
497b608fab
@ -48,12 +48,19 @@ const (
|
||||
identRx = `[\pL_][\pL_0-9]*`
|
||||
|
||||
// Regexp for URLs
|
||||
protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
|
||||
hostPart = `[a-zA-Z0-9_@\-]+`
|
||||
filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
|
||||
urlRx = `(` + protocol + `)://` + // http://
|
||||
hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
|
||||
filePart + `([:.,;]` + filePart + `)*`
|
||||
// Match parens, and check in pairedParensPrefixLen for balance - see #5043
|
||||
// Match .,:;?! within path, but not at end - see #18139, #16565
|
||||
// This excludes some rare yet valid urls ending in common punctuation
|
||||
// in order to allow sentences ending in URLs.
|
||||
|
||||
// protocol (required) e.g. http
|
||||
protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
|
||||
// host (required) e.g. www.example.com or [::1]:8080
|
||||
hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
|
||||
// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
|
||||
pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
|
||||
|
||||
urlRx = protoPart + `://` + hostPart + pathPart
|
||||
)
|
||||
|
||||
var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
|
||||
|
@ -150,6 +150,12 @@ func TestToText(t *testing.T) {
|
||||
var emphasizeTests = []struct {
|
||||
in, out string
|
||||
}{
|
||||
{"http://[::1]:8080/foo.txt", `<a href="http://[::1]:8080/foo.txt">http://[::1]:8080/foo.txt</a>`},
|
||||
{"before (https://www.google.com) after", `before (<a href="https://www.google.com">https://www.google.com</a>) after`},
|
||||
{"before https://www.google.com:30/x/y/z:b::c. After", `before <a href="https://www.google.com:30/x/y/z:b::c">https://www.google.com:30/x/y/z:b::c</a>. After`},
|
||||
{"http://www.google.com/path/:;!-/?query=%34b#093124", `<a href="http://www.google.com/path/:;!-/?query=%34b#093124">http://www.google.com/path/:;!-/?query=%34b#093124</a>`},
|
||||
{"http://www.google.com/path/:;!-/?query=%34bar#093124", `<a href="http://www.google.com/path/:;!-/?query=%34bar#093124">http://www.google.com/path/:;!-/?query=%34bar#093124</a>`},
|
||||
{"http://www.google.com/index.html! After", `<a href="http://www.google.com/index.html">http://www.google.com/index.html</a>! After`},
|
||||
{"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`},
|
||||
{"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`},
|
||||
{"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`},
|
||||
|
Loading…
Reference in New Issue
Block a user