From 497b608faba2a9c450d03e2bca2ef005ea32b933 Mon Sep 17 00:00:00 2001 From: Kenny Grant Date: Sun, 18 Dec 2016 07:24:58 +0000 Subject: [PATCH] go/doc: allow : in godoc links The emphasize function used a complex regexp to find URLs, which truncated some types of URL and did not match others. This has been simplified and adjusted to allow valid punctuation like :: or ! in the path part and :[] in the host part. Comments were added to clarify what this regexp allows. The path part matches query and fragment also so document this. Removed news, telnet, wais, and prospero protocols. Tests were added for: IPV6 URLs URLs surrounded by brackets URLs containing :: URLs containing :;!- in the path In order to allow punctuation and yet preserve current behaviour, URLs are not permitted to end in .,:;?! to allow the use of normal punctuation surrounding URLs in comments. Fixes #18139 Change-Id: I38b2d7a85fe0d171e4bf4aac420f8c2d3ced8a2f Reviewed-on: https://go-review.googlesource.com/37192 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/go/doc/comment.go | 19 +++++++++++++------ src/go/doc/comment_test.go | 6 ++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/go/doc/comment.go b/src/go/doc/comment.go index 15e034b6df..4228e8cd9c 100644 --- a/src/go/doc/comment.go +++ b/src/go/doc/comment.go @@ -48,12 +48,19 @@ const ( identRx = `[\pL_][\pL_0-9]*` // Regexp for URLs - protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero` - hostPart = `[a-zA-Z0-9_@\-]+` - filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen - urlRx = `(` + protocol + `)://` + // http:// - hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ - filePart + `([:.,;]` + filePart + `)*` + // Match parens, and check in pairedParensPrefixLen for balance - see #5043 + // Match .,:;?! within path, but not at end - see #18139, #16565 + // This excludes some rare yet valid urls ending in common punctuation + // in order to allow sentences ending in URLs. + + // protocol (required) e.g. http + protoPart = `(https?|ftp|file|gopher|mailto|nntp)` + // host (required) e.g. www.example.com or [::1]:8080 + hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` + // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar + pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` + + urlRx = protoPart + `://` + hostPart + pathPart ) var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`) diff --git a/src/go/doc/comment_test.go b/src/go/doc/comment_test.go index 76dfbeac79..0523ab899e 100644 --- a/src/go/doc/comment_test.go +++ b/src/go/doc/comment_test.go @@ -150,6 +150,12 @@ func TestToText(t *testing.T) { var emphasizeTests = []struct { in, out string }{ + {"http://[::1]:8080/foo.txt", `http://[::1]:8080/foo.txt`}, + {"before (https://www.google.com) after", `before (https://www.google.com) after`}, + {"before https://www.google.com:30/x/y/z:b::c. After", `before https://www.google.com:30/x/y/z:b::c. After`}, + {"http://www.google.com/path/:;!-/?query=%34b#093124", `http://www.google.com/path/:;!-/?query=%34b#093124`}, + {"http://www.google.com/path/:;!-/?query=%34bar#093124", `http://www.google.com/path/:;!-/?query=%34bar#093124`}, + {"http://www.google.com/index.html! After", `http://www.google.com/index.html! After`}, {"http://www.google.com/", `http://www.google.com/`}, {"https://www.google.com/", `https://www.google.com/`}, {"http://www.google.com/path.", `http://www.google.com/path.`},