diff options
| author | Hiroshi SHIBATA <hsbt@ruby-lang.org> | 2023-02-07 16:22:16 +0900 |
|---|---|---|
| committer | nagachika <nagachika@ruby-lang.org> | 2023-03-26 14:22:38 +0900 |
| commit | da27583cf364c0d69c085db4abf358c334a8eca1 (patch) | |
| tree | bb2e5ff28214241b49e63c9f2dd28db0965dc257 /lib | |
| parent | 19af12ff195aba64bdca7a83f564f2c0e46061c0 (diff) | |
Merge URI-0.12.0
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/uri.rb | 1 | ||||
| -rw-r--r-- | lib/uri/common.rb | 52 | ||||
| -rw-r--r-- | lib/uri/file.rb | 6 | ||||
| -rw-r--r-- | lib/uri/generic.rb | 14 | ||||
| -rw-r--r-- | lib/uri/mailto.rb | 2 | ||||
| -rw-r--r-- | lib/uri/rfc3986_parser.rb | 5 | ||||
| -rw-r--r-- | lib/uri/version.rb | 2 |
7 files changed, 64 insertions, 18 deletions
diff --git a/lib/uri.rb b/lib/uri.rb index 394c156ac5..59a7c4ad28 100644 --- a/lib/uri.rb +++ b/lib/uri.rb @@ -101,3 +101,4 @@ require_relative 'uri/ldap' require_relative 'uri/ldaps' require_relative 'uri/mailto' require_relative 'uri/ws' +require_relative 'uri/wss' diff --git a/lib/uri/common.rb b/lib/uri/common.rb index 26b179add2..ca38bec7ec 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -13,6 +13,8 @@ require_relative "rfc2396_parser" require_relative "rfc3986_parser" module URI + include RFC2396_REGEXP + REGEXP = RFC2396_REGEXP Parser = RFC2396_Parser RFC3986_PARSER = RFC3986_Parser.new @@ -62,14 +64,17 @@ module URI module_function :make_components_hash end - include REGEXP - module Schemes end private_constant :Schemes + # + # Register the given +klass+ to be instantiated when parsing URLs with the given +scheme+. + # Note that currently only schemes which after .upcase are valid constant names + # can be registered (no -/+/. allowed). + # def self.register_scheme(scheme, klass) - Schemes.const_set(scheme, klass) + Schemes.const_set(scheme.to_s.upcase, klass) end # Returns a Hash of the defined schemes. @@ -295,6 +300,7 @@ module URI 256.times do |i| TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i) end + TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze TBLENCWWWCOMP_[' '] = '+' TBLENCWWWCOMP_.freeze TBLDECWWWCOMP_ = {} # :nodoc: @@ -320,6 +326,33 @@ module URI # # See URI.decode_www_form_component, URI.encode_www_form. def self.encode_www_form_component(str, enc=nil) + _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc) + end + + # Decodes given +str+ of URL-encoded form data. + # + # This decodes + to SP. + # + # See URI.encode_www_form_component, URI.decode_www_form. + def self.decode_www_form_component(str, enc=Encoding::UTF_8) + _decode_uri_component(/\+|%\h\h/, str, enc) + end + + # Encodes +str+ using URL encoding + # + # This encodes SP to %20 instead of +. + def self.encode_uri_component(str, enc=nil) + _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc) + end + + # Decodes given +str+ of URL-encoded data. + # + # This does not decode + to SP. + def self.decode_uri_component(str, enc=Encoding::UTF_8) + _decode_uri_component(/%\h\h/, str, enc) + end + + def self._encode_uri_component(regexp, table, str, enc) str = str.to_s.dup if str.encoding != Encoding::ASCII_8BIT if enc && enc != Encoding::ASCII_8BIT @@ -328,19 +361,16 @@ module URI end str.force_encoding(Encoding::ASCII_8BIT) end - str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_) + str.gsub!(regexp, table) str.force_encoding(Encoding::US_ASCII) end + private_class_method :_encode_uri_component - # Decodes given +str+ of URL-encoded form data. - # - # This decodes + to SP. - # - # See URI.encode_www_form_component, URI.decode_www_form. - def self.decode_www_form_component(str, enc=Encoding::UTF_8) + def self._decode_uri_component(regexp, str, enc) raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str) - str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc) + str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc) end + private_class_method :_decode_uri_component # Generates URL-encoded form data from given +enum+. # diff --git a/lib/uri/file.rb b/lib/uri/file.rb index 7671ad6470..4ff0bc097e 100644 --- a/lib/uri/file.rb +++ b/lib/uri/file.rb @@ -33,6 +33,9 @@ module URI # If an Array is used, the components must be passed in the # order <code>[host, path]</code>. # + # A path from e.g. the File class should be escaped before + # being passed. + # # Examples: # # require 'uri' @@ -44,6 +47,9 @@ module URI # :path => '/ruby/src'}) # uri2.to_s # => "file://host.example.com/ruby/src" # + # uri3 = URI::File.build({:path => URI::escape('/path/my file.txt')}) + # uri3.to_s # => "file:///path/my%20file.txt" + # def self.build(args) tmp = Util::make_components_hash(self, args) super(tmp) diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb index cfa0de6b74..69698c4e2d 100644 --- a/lib/uri/generic.rb +++ b/lib/uri/generic.rb @@ -564,16 +564,26 @@ module URI end end - # Returns the user component. + # Returns the user component (without URI decoding). def user @user end - # Returns the password component. + # Returns the password component (without URI decoding). def password @password end + # Returns the user component after URI decoding. + def decoded_user + URI.decode_uri_component(@user) if @user + end + + # Returns the password component after URI decoding. + def decoded_password + URI.decode_uri_component(@password) if @password + end + # # Checks the host +v+ component for RFC2396 compliance # and against the URI::Parser Regexp for :HOST. diff --git a/lib/uri/mailto.rb b/lib/uri/mailto.rb index 87cb99656f..cb8024f301 100644 --- a/lib/uri/mailto.rb +++ b/lib/uri/mailto.rb @@ -15,7 +15,7 @@ module URI # RFC6068, the mailto URL scheme. # class MailTo < Generic - include REGEXP + include RFC2396_REGEXP # A Default port of nil for URI::MailTo. DEFAULT_PORT = nil diff --git a/lib/uri/rfc3986_parser.rb b/lib/uri/rfc3986_parser.rb index 3e07de4805..f3816d9ae5 100644 --- a/lib/uri/rfc3986_parser.rb +++ b/lib/uri/rfc3986_parser.rb @@ -2,9 +2,8 @@ module URI class RFC3986_Parser # :nodoc: # URI defined in RFC3986 - # this regexp is modified not to host is not empty string - RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/ - RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/ + RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/ + RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>[^#]*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~\/?])*))?)\z/ attr_reader :regexp def initialize diff --git a/lib/uri/version.rb b/lib/uri/version.rb index 82188e25ad..a9643ef8bc 100644 --- a/lib/uri/version.rb +++ b/lib/uri/version.rb @@ -1,6 +1,6 @@ module URI # :stopdoc: - VERSION_CODE = '001100'.freeze + VERSION_CODE = '001200'.freeze VERSION = VERSION_CODE.scan(/../).collect{|n| n.to_i}.join('.').freeze # :startdoc: end |
