diff options
Diffstat (limited to 'lib/uri/common.rb')
| -rw-r--r-- | lib/uri/common.rb | 70 |
1 files changed, 56 insertions, 14 deletions
diff --git a/lib/uri/common.rb b/lib/uri/common.rb index 1115736297..a2fb531631 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -30,6 +30,9 @@ module URI remove_const(:Parser) if defined?(::URI::Parser) const_set("Parser", parser.class) + remove_const(:PARSER) if defined?(::URI::PARSER) + const_set("PARSER", parser) + remove_const(:REGEXP) if defined?(::URI::REGEXP) remove_const(:PATTERN) if defined?(::URI::PATTERN) if Parser == RFC2396_Parser @@ -92,6 +95,40 @@ module URI end module Schemes # :nodoc: + class << self + ReservedChars = ".+-" + EscapedChars = "\u01C0\u01C1\u01C2" + # Use Lo category chars as escaped chars for TruffleRuby, which + # does not allow Symbol categories as identifiers. + + def escape(name) + unless name and name.ascii_only? + return nil + end + name.upcase.tr(ReservedChars, EscapedChars) + end + + def unescape(name) + name.tr(EscapedChars, ReservedChars).encode(Encoding::US_ASCII).upcase + end + + def find(name) + const_get(name, false) if name and const_defined?(name, false) + end + + def register(name, klass) + unless scheme = escape(name) + raise ArgumentError, "invalid character as scheme - #{name}" + end + const_set(scheme, klass) + end + + def list + constants.map { |name| + [unescape(name.to_s), const_get(name)] + }.to_h + end + end end private_constant :Schemes @@ -104,7 +141,7 @@ module URI # Note that after calling String#upcase on +scheme+, it must be a valid # constant name. def self.register_scheme(scheme, klass) - Schemes.const_set(scheme.to_s.upcase, klass) + Schemes.register(scheme, klass) end # Returns a hash of the defined schemes: @@ -122,14 +159,14 @@ module URI # # Related: URI.register_scheme. def self.scheme_list - Schemes.constants.map { |name| - [name.to_s.upcase, Schemes.const_get(name)] - }.to_h + Schemes.list end + # :stopdoc: INITIAL_SCHEMES = scheme_list private_constant :INITIAL_SCHEMES Ractor.make_shareable(INITIAL_SCHEMES) if defined?(Ractor) + # :startdoc: # Returns a new object constructed from the given +scheme+, +arguments+, # and +default+: @@ -148,12 +185,10 @@ module URI # # => #<URI::HTTP foo://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top> # def self.for(scheme, *arguments, default: Generic) - const_name = scheme.to_s.upcase + const_name = Schemes.escape(scheme) uri_class = INITIAL_SCHEMES[const_name] - uri_class ||= if /\A[A-Z]\w*\z/.match?(const_name) && Schemes.const_defined?(const_name, false) - Schemes.const_get(const_name, false) - end + uri_class ||= Schemes.find(const_name) uri_class ||= default return uri_class.new(scheme, *arguments) @@ -195,7 +230,7 @@ module URI # ["fragment", "top"]] # def self.split(uri) - DEFAULT_PARSER.split(uri) + PARSER.split(uri) end # Returns a new \URI object constructed from the given string +uri+: @@ -205,11 +240,11 @@ module URI # URI.parse('http://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top') # # => #<URI::HTTP http://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top> # - # It's recommended to first ::escape string +uri+ + # It's recommended to first URI::RFC2396_PARSER.escape string +uri+ # if it may contain invalid URI characters. # def self.parse(uri) - DEFAULT_PARSER.parse(uri) + PARSER.parse(uri) end # Merges the given URI strings +str+ @@ -236,7 +271,7 @@ module URI # # => #<URI::HTTP http://example.com/foo/bar> # def self.join(*str) - DEFAULT_PARSER.join(*str) + PARSER.join(*str) end # @@ -265,7 +300,7 @@ module URI # def self.extract(str, schemes = nil, &block) # :nodoc: warn "URI.extract is obsolete", uplevel: 1 if $VERBOSE - DEFAULT_PARSER.extract(str, schemes, &block) + PARSER.extract(str, schemes, &block) end # @@ -302,7 +337,7 @@ module URI # def self.regexp(schemes = nil)# :nodoc: warn "URI.regexp is obsolete", uplevel: 1 if $VERBOSE - DEFAULT_PARSER.make_regexp(schemes) + PARSER.make_regexp(schemes) end TBLENCWWWCOMP_ = {} # :nodoc: @@ -407,6 +442,8 @@ module URI _decode_uri_component(/%\h\h/, str, enc) end + # Returns a string derived from the given string +str+ with + # URI-encoded characters matching +regexp+ according to +table+. def self._encode_uri_component(regexp, table, str, enc) str = str.to_s.dup if str.encoding != Encoding::ASCII_8BIT @@ -421,6 +458,8 @@ module URI end private_class_method :_encode_uri_component + # Returns a string decoding characters matching +regexp+ from the + # given \URL-encoded string +str+. def self._decode_uri_component(regexp, str, enc) raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str) str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc) @@ -859,6 +898,7 @@ module Kernel # Returns a \URI object derived from the given +uri+, # which may be a \URI string or an existing \URI object: # + # require 'uri' # # Returns a new URI. # uri = URI('http://github.com/ruby/ruby') # # => #<URI::HTTP http://github.com/ruby/ruby> @@ -866,6 +906,8 @@ module Kernel # URI(uri) # # => #<URI::HTTP http://github.com/ruby/ruby> # + # You must require 'uri' to use this method. + # def URI(uri) if uri.is_a?(URI::Generic) uri |
