diff options
Diffstat (limited to 'lib/uri/common.rb')
| -rw-r--r-- | lib/uri/common.rb | 123 |
1 files changed, 96 insertions, 27 deletions
diff --git a/lib/uri/common.rb b/lib/uri/common.rb index dce09fbc1e..a2fb531631 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -13,24 +13,54 @@ require_relative "rfc2396_parser" require_relative "rfc3986_parser" module URI - include RFC2396_REGEXP + # The default parser instance for RFC 2396. + RFC2396_PARSER = RFC2396_Parser.new + Ractor.make_shareable(RFC2396_PARSER) if defined?(Ractor) - REGEXP = RFC2396_REGEXP - Parser = RFC2396_Parser + # The default parser instance for RFC 3986. RFC3986_PARSER = RFC3986_Parser.new Ractor.make_shareable(RFC3986_PARSER) if defined?(Ractor) - # URI::Parser.new - DEFAULT_PARSER = Parser.new - DEFAULT_PARSER.pattern.each_pair do |sym, str| - unless REGEXP::PATTERN.const_defined?(sym) - REGEXP::PATTERN.const_set(sym, str) + # The default parser instance. + DEFAULT_PARSER = RFC3986_PARSER + Ractor.make_shareable(DEFAULT_PARSER) if defined?(Ractor) + + # Set the default parser instance. + def self.parser=(parser = RFC3986_PARSER) + remove_const(:Parser) if defined?(::URI::Parser) + const_set("Parser", parser.class) + + remove_const(:PARSER) if defined?(::URI::PARSER) + const_set("PARSER", parser) + + remove_const(:REGEXP) if defined?(::URI::REGEXP) + remove_const(:PATTERN) if defined?(::URI::PATTERN) + if Parser == RFC2396_Parser + const_set("REGEXP", URI::RFC2396_REGEXP) + const_set("PATTERN", URI::RFC2396_REGEXP::PATTERN) + end + + Parser.new.regexp.each_pair do |sym, str| + remove_const(sym) if const_defined?(sym, false) + const_set(sym, str) end end - DEFAULT_PARSER.regexp.each_pair do |sym, str| - const_set(sym, str) + self.parser = RFC3986_PARSER + + def self.const_missing(const) # :nodoc: + if const == :REGEXP + warn "URI::REGEXP is obsolete. Use URI::RFC2396_REGEXP explicitly.", uplevel: 1 if $VERBOSE + URI::RFC2396_REGEXP + elsif value = RFC2396_PARSER.regexp[const] + warn "URI::#{const} is obsolete. Use URI::RFC2396_PARSER.regexp[#{const.inspect}] explicitly.", uplevel: 1 if $VERBOSE + value + elsif value = RFC2396_Parser.const_get(const) + warn "URI::#{const} is obsolete. Use URI::RFC2396_Parser::#{const} explicitly.", uplevel: 1 if $VERBOSE + value + else + super + end end - Ractor.make_shareable(DEFAULT_PARSER) if defined?(Ractor) module Util # :nodoc: def make_components_hash(klass, array_hash) @@ -64,7 +94,41 @@ module URI module_function :make_components_hash end - module Schemes + module Schemes # :nodoc: + class << self + ReservedChars = ".+-" + EscapedChars = "\u01C0\u01C1\u01C2" + # Use Lo category chars as escaped chars for TruffleRuby, which + # does not allow Symbol categories as identifiers. + + def escape(name) + unless name and name.ascii_only? + return nil + end + name.upcase.tr(ReservedChars, EscapedChars) + end + + def unescape(name) + name.tr(EscapedChars, ReservedChars).encode(Encoding::US_ASCII).upcase + end + + def find(name) + const_get(name, false) if name and const_defined?(name, false) + end + + def register(name, klass) + unless scheme = escape(name) + raise ArgumentError, "invalid character as scheme - #{name}" + end + const_set(scheme, klass) + end + + def list + constants.map { |name| + [unescape(name.to_s), const_get(name)] + }.to_h + end + end end private_constant :Schemes @@ -77,7 +141,7 @@ module URI # Note that after calling String#upcase on +scheme+, it must be a valid # constant name. def self.register_scheme(scheme, klass) - Schemes.const_set(scheme.to_s.upcase, klass) + Schemes.register(scheme, klass) end # Returns a hash of the defined schemes: @@ -95,14 +159,14 @@ module URI # # Related: URI.register_scheme. def self.scheme_list - Schemes.constants.map { |name| - [name.to_s.upcase, Schemes.const_get(name)] - }.to_h + Schemes.list end + # :stopdoc: INITIAL_SCHEMES = scheme_list private_constant :INITIAL_SCHEMES Ractor.make_shareable(INITIAL_SCHEMES) if defined?(Ractor) + # :startdoc: # Returns a new object constructed from the given +scheme+, +arguments+, # and +default+: @@ -121,12 +185,10 @@ module URI # # => #<URI::HTTP foo://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top> # def self.for(scheme, *arguments, default: Generic) - const_name = scheme.to_s.upcase + const_name = Schemes.escape(scheme) uri_class = INITIAL_SCHEMES[const_name] - uri_class ||= if /\A[A-Z]\w*\z/.match?(const_name) && Schemes.const_defined?(const_name, false) - Schemes.const_get(const_name, false) - end + uri_class ||= Schemes.find(const_name) uri_class ||= default return uri_class.new(scheme, *arguments) @@ -168,7 +230,7 @@ module URI # ["fragment", "top"]] # def self.split(uri) - RFC3986_PARSER.split(uri) + PARSER.split(uri) end # Returns a new \URI object constructed from the given string +uri+: @@ -178,11 +240,11 @@ module URI # URI.parse('http://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top') # # => #<URI::HTTP http://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top> # - # It's recommended to first ::escape string +uri+ + # It's recommended to first URI::RFC2396_PARSER.escape string +uri+ # if it may contain invalid URI characters. # def self.parse(uri) - RFC3986_PARSER.parse(uri) + PARSER.parse(uri) end # Merges the given URI strings +str+ @@ -209,7 +271,7 @@ module URI # # => #<URI::HTTP http://example.com/foo/bar> # def self.join(*str) - RFC3986_PARSER.join(*str) + PARSER.join(*str) end # @@ -238,7 +300,7 @@ module URI # def self.extract(str, schemes = nil, &block) # :nodoc: warn "URI.extract is obsolete", uplevel: 1 if $VERBOSE - DEFAULT_PARSER.extract(str, schemes, &block) + PARSER.extract(str, schemes, &block) end # @@ -275,14 +337,14 @@ module URI # def self.regexp(schemes = nil)# :nodoc: warn "URI.regexp is obsolete", uplevel: 1 if $VERBOSE - DEFAULT_PARSER.make_regexp(schemes) + PARSER.make_regexp(schemes) end TBLENCWWWCOMP_ = {} # :nodoc: 256.times do |i| TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i) end - TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze + TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze # :nodoc: TBLENCWWWCOMP_[' '] = '+' TBLENCWWWCOMP_.freeze TBLDECWWWCOMP_ = {} # :nodoc: @@ -380,6 +442,8 @@ module URI _decode_uri_component(/%\h\h/, str, enc) end + # Returns a string derived from the given string +str+ with + # URI-encoded characters matching +regexp+ according to +table+. def self._encode_uri_component(regexp, table, str, enc) str = str.to_s.dup if str.encoding != Encoding::ASCII_8BIT @@ -394,6 +458,8 @@ module URI end private_class_method :_encode_uri_component + # Returns a string decoding characters matching +regexp+ from the + # given \URL-encoded string +str+. def self._decode_uri_component(regexp, str, enc) raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str) str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc) @@ -832,6 +898,7 @@ module Kernel # Returns a \URI object derived from the given +uri+, # which may be a \URI string or an existing \URI object: # + # require 'uri' # # Returns a new URI. # uri = URI('http://github.com/ruby/ruby') # # => #<URI::HTTP http://github.com/ruby/ruby> @@ -839,6 +906,8 @@ module Kernel # URI(uri) # # => #<URI::HTTP http://github.com/ruby/ruby> # + # You must require 'uri' to use this method. + # def URI(uri) if uri.is_a?(URI::Generic) uri |
