summaryrefslogtreecommitdiff
path: root/lib/uri/common.rb
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2021-03-04 14:05:18 -0800
committergit <svn-admin@ruby-lang.org>2022-05-12 14:54:37 +0900
commitfbebfe1697938a684f460cd28af36cf1f056513c (patch)
tree22771001917053627d5562d7725ead5344220fb6 /lib/uri/common.rb
parent054ae999dc5dfcb182f407bffceec5a52ae7ff6c (diff)
[ruby/uri] Add URI::Generic#decoded_#{user,password}
URI::Generic#{user,password} return the encoded values, which are not that useful if you want to do authentication with them. Automatic decoding by default would break backwards compatibility. Optional automatic decoding via a keyword to URI.parse would require threading the option through at least 3 other methods, and would make semantics confusing (user= takes encoded or unencoded password?) or require more work. Thus, adding this as a separate method seemed the simplest approach. Unfortunately, URI lacks a method for correct decoding. Unlike in www form components, + in earlier parts of the URI such as the userinfo section is treated verbatim and not as an encoded space. Add URI.#{en,de}code_uri_component methods, which are almost the same as URI.#{en,de}code_www_form_component, but without the special SP => + handling. Implements [Feature #9045] https://github.com/ruby/uri/commit/16cfc4e92f
Diffstat (limited to 'lib/uri/common.rb')
-rw-r--r--lib/uri/common.rb41
1 files changed, 33 insertions, 8 deletions
diff --git a/lib/uri/common.rb b/lib/uri/common.rb
index d592fdc9ba..a6d08aa26f 100644
--- a/lib/uri/common.rb
+++ b/lib/uri/common.rb
@@ -295,6 +295,7 @@ module URI
256.times do |i|
TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
end
+ TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
TBLENCWWWCOMP_[' '] = '+'
TBLENCWWWCOMP_.freeze
TBLDECWWWCOMP_ = {} # :nodoc:
@@ -320,6 +321,33 @@ module URI
#
# See URI.decode_www_form_component, URI.encode_www_form.
def self.encode_www_form_component(str, enc=nil)
+ _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
+ end
+
+ # Decodes given +str+ of URL-encoded form data.
+ #
+ # This decodes + to SP.
+ #
+ # See URI.encode_www_form_component, URI.decode_www_form.
+ def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+ _decode_uri_component(/\+|%\h\h/, str, enc)
+ end
+
+ # Encodes +str+ using URL encoding
+ #
+ # This encodes SP to %20 instead of +.
+ def self.encode_uri_component(str, enc=nil)
+ _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
+ end
+
+ # Decodes given +str+ of URL-encoded data.
+ #
+ # This does not decode + to SP.
+ def self.decode_uri_component(str, enc=Encoding::UTF_8)
+ _decode_uri_component(/%\h\h/, str, enc)
+ end
+
+ def self._encode_uri_component(regexp, table, str, enc)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
@@ -328,19 +356,16 @@ module URI
end
str.force_encoding(Encoding::ASCII_8BIT)
end
- str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
+ str.gsub!(regexp, table)
str.force_encoding(Encoding::US_ASCII)
end
+ private_class_method :_encode_uri_component
- # Decodes given +str+ of URL-encoded form data.
- #
- # This decodes + to SP.
- #
- # See URI.encode_www_form_component, URI.decode_www_form.
- def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+ def self._decode_uri_component(regexp, str, enc)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
- str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
+ str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
end
+ private_class_method :_decode_uri_component
# Generates URL-encoded form data from given +enum+.
#