From fbebfe1697938a684f460cd28af36cf1f056513c Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Thu, 4 Mar 2021 14:05:18 -0800 Subject: [ruby/uri] Add URI::Generic#decoded_#{user,password} URI::Generic#{user,password} return the encoded values, which are not that useful if you want to do authentication with them. Automatic decoding by default would break backwards compatibility. Optional automatic decoding via a keyword to URI.parse would require threading the option through at least 3 other methods, and would make semantics confusing (user= takes encoded or unencoded password?) or require more work. Thus, adding this as a separate method seemed the simplest approach. Unfortunately, URI lacks a method for correct decoding. Unlike in www form components, + in earlier parts of the URI such as the userinfo section is treated verbatim and not as an encoded space. Add URI.#{en,de}code_uri_component methods, which are almost the same as URI.#{en,de}code_www_form_component, but without the special SP => + handling. Implements [Feature #9045] https://github.com/ruby/uri/commit/16cfc4e92f --- lib/uri/common.rb | 41 +++++++++++++++++++++++++++++++++-------- lib/uri/generic.rb | 14 ++++++++++++-- 2 files changed, 45 insertions(+), 10 deletions(-) (limited to 'lib/uri') diff --git a/lib/uri/common.rb b/lib/uri/common.rb index d592fdc9ba..a6d08aa26f 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -295,6 +295,7 @@ module URI 256.times do |i| TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i) end + TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze TBLENCWWWCOMP_[' '] = '+' TBLENCWWWCOMP_.freeze TBLDECWWWCOMP_ = {} # :nodoc: @@ -320,6 +321,33 @@ module URI # # See URI.decode_www_form_component, URI.encode_www_form. def self.encode_www_form_component(str, enc=nil) + _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc) + end + + # Decodes given +str+ of URL-encoded form data. + # + # This decodes + to SP. + # + # See URI.encode_www_form_component, URI.decode_www_form. + def self.decode_www_form_component(str, enc=Encoding::UTF_8) + _decode_uri_component(/\+|%\h\h/, str, enc) + end + + # Encodes +str+ using URL encoding + # + # This encodes SP to %20 instead of +. + def self.encode_uri_component(str, enc=nil) + _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc) + end + + # Decodes given +str+ of URL-encoded data. + # + # This does not decode + to SP. + def self.decode_uri_component(str, enc=Encoding::UTF_8) + _decode_uri_component(/%\h\h/, str, enc) + end + + def self._encode_uri_component(regexp, table, str, enc) str = str.to_s.dup if str.encoding != Encoding::ASCII_8BIT if enc && enc != Encoding::ASCII_8BIT @@ -328,19 +356,16 @@ module URI end str.force_encoding(Encoding::ASCII_8BIT) end - str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_) + str.gsub!(regexp, table) str.force_encoding(Encoding::US_ASCII) end + private_class_method :_encode_uri_component - # Decodes given +str+ of URL-encoded form data. - # - # This decodes + to SP. - # - # See URI.encode_www_form_component, URI.decode_www_form. - def self.decode_www_form_component(str, enc=Encoding::UTF_8) + def self._decode_uri_component(regexp, str, enc) raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str) - str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc) + str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc) end + private_class_method :_decode_uri_component # Generates URL-encoded form data from given +enum+. # diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb index cfa0de6b74..69698c4e2d 100644 --- a/lib/uri/generic.rb +++ b/lib/uri/generic.rb @@ -564,16 +564,26 @@ module URI end end - # Returns the user component. + # Returns the user component (without URI decoding). def user @user end - # Returns the password component. + # Returns the password component (without URI decoding). def password @password end + # Returns the user component after URI decoding. + def decoded_user + URI.decode_uri_component(@user) if @user + end + + # Returns the password component after URI decoding. + def decoded_password + URI.decode_uri_component(@password) if @password + end + # # Checks the host +v+ component for RFC2396 compliance # and against the URI::Parser Regexp for :HOST. -- cgit v1.2.3