From aaac279de09eb1fe48b32fd95e61453f0d602ac4 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 11 Apr 2022 15:37:29 -0400 Subject: [ruby/rdoc] Only parse valid URLs Only valid characters for URLs should be used for generating URLs. A list of valid characters can be found in sections 2.2 and 2.3 of IETF RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt). https://github.com/ruby/rdoc/commit/2bd8fcdd4f --- lib/rdoc/markup/to_html.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib/rdoc/markup/to_html.rb') diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb index 7c2e91cecf..2bfabc8942 100644 --- a/lib/rdoc/markup/to_html.rb +++ b/lib/rdoc/markup/to_html.rb @@ -61,12 +61,14 @@ class RDoc::Markup::ToHtml < RDoc::Markup::Formatter # # These methods are used by regexp handling markup added by RDoc::Markup#add_regexp_handling. + URL_CHARACTERS_REGEXP_STR = /[A-Za-z0-9\-._~:\/\?#\[\]@!$&'\(\)*+,;%=]/.source + ## # Adds regexp handlings. def init_regexp_handlings # external links - @markup.add_regexp_handling(/(?:link:|https?:|mailto:|ftp:|irc:|www\.)\S+\w/, + @markup.add_regexp_handling(/(?:link:|https?:|mailto:|ftp:|irc:|www\.)#{URL_CHARACTERS_REGEXP_STR}+\w/, :HYPERLINK) init_link_notation_regexp_handlings end -- cgit v1.2.3