diff options
Diffstat (limited to 'lib/uri/rfc2396_parser.rb')
| -rw-r--r-- | lib/uri/rfc2396_parser.rb | 109 |
1 files changed, 56 insertions, 53 deletions
diff --git a/lib/uri/rfc2396_parser.rb b/lib/uri/rfc2396_parser.rb index b9e7b2b26e..cefd126cc6 100644 --- a/lib/uri/rfc2396_parser.rb +++ b/lib/uri/rfc2396_parser.rb @@ -3,7 +3,6 @@ # = uri/common.rb # # Author:: Akira Yamada <akira@ruby-lang.org> -# Revision:: $Id$ # License:: # You can redistribute it and/or modify it under the same term as Ruby. # @@ -58,7 +57,7 @@ module URI # :startdoc: end # REGEXP - # class that Parses String's into URI's + # Class that parses String's into URI's. # # It contains a Hash set of patterns and Regexp's that match and validate. # @@ -68,7 +67,7 @@ module URI # # == Synopsis # - # URI::Parser.new([opts]) + # URI::RFC2396_Parser.new([opts]) # # == Args # @@ -87,13 +86,13 @@ module URI # # == Examples # - # p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})") - # u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD> + # p = URI::RFC2396_Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})") + # u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP http://example.jp/%uABCD> # URI.parse(u.to_s) #=> raises URI::InvalidURIError # # s = "http://example.com/ABCD" - # u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD> - # u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD> + # u1 = p.parse(s) #=> #<URI::HTTP http://example.com/ABCD> + # u2 = URI.parse(s) #=> #<URI::HTTP http://example.com/ABCD> # u1 == u2 #=> true # u1.eql?(u2) #=> false # @@ -109,15 +108,15 @@ module URI # The Hash of patterns. # - # see also URI::Parser.initialize_pattern + # See also #initialize_pattern. attr_reader :pattern - # The Hash of Regexp + # The Hash of Regexp. # - # see also URI::Parser.initialize_regexp + # See also #initialize_regexp. attr_reader :regexp - # Returns a split URI against regexp[:ABS_URI] + # Returns a split URI against +regexp[:ABS_URI]+. def split(uri) case uri when '' @@ -141,11 +140,11 @@ module URI if !scheme raise InvalidURIError, - "bad URI(absolute but no scheme): #{uri}" + "bad URI (absolute but no scheme): #{uri}" end if !opaque && (!path && (!host && !registry)) raise InvalidURIError, - "bad URI(absolute but no path): #{uri}" + "bad URI (absolute but no path): #{uri}" end when @regexp[:REL_URI] @@ -174,7 +173,7 @@ module URI # server = [ [ userinfo "@" ] hostport ] else - raise InvalidURIError, "bad URI(is not URI?): #{uri}" + raise InvalidURIError, "bad URI (is not URI?): #{uri}" end path = '' if !path && !opaque # (see RFC2396 Section 5.2) @@ -198,31 +197,18 @@ module URI # # == Description # - # parses +uri+ and constructs either matching URI scheme object - # (FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic + # Parses +uri+ and constructs either matching URI scheme object + # (File, FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic. # # == Usage # - # p = URI::Parser.new - # p.parse("ldap://ldap.example.com/dc=example?user=john") - # #=> #<URI::LDAP:0x00000000b9e7e8 URL:ldap://ldap.example.com/dc=example?user=john> + # URI::RFC2396_PARSER.parse("ldap://ldap.example.com/dc=example?user=john") + # #=> #<URI::LDAP ldap://ldap.example.com/dc=example?user=john> # def parse(uri) - scheme, userinfo, host, port, - registry, path, opaque, query, fragment = self.split(uri) - - if scheme && URI.scheme_list.include?(scheme.upcase) - URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, - registry, path, opaque, query, - fragment, self) - else - Generic.new(scheme, userinfo, host, port, - registry, path, opaque, query, - fragment, self) - end + URI.for(*self.split(uri), self) end - # # == Args # @@ -231,7 +217,7 @@ module URI # # == Description # - # Attempts to parse and merge a set of URIs + # Attempts to parse and merge a set of URIs. # def join(*uris) uris[0] = convert_to_uri(uris[0]) @@ -253,11 +239,11 @@ module URI # # == Description # - # Attempts to parse and merge a set of URIs - # If no +block+ given , then returns the result, + # Attempts to parse and merge a set of URIs. + # If no +block+ given, then returns the result, # else it calls +block+ for each element in result. # - # see also URI::Parser.make_regexp + # See also #make_regexp. # def extract(str, schemes = nil) if block_given? @@ -270,13 +256,13 @@ module URI end end - # returns Regexp that is default self.regexp[:ABS_URI_REF], - # unless +schemes+ is provided. Then it is a Regexp.union with self.pattern[:X_ABS_URI] + # Returns Regexp that is default +self.regexp[:ABS_URI_REF]+, + # unless +schemes+ is provided. Then it is a Regexp.union with +self.pattern[:X_ABS_URI]+. def make_regexp(schemes = nil) unless schemes @regexp[:ABS_URI_REF] else - /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x + /(?=(?i:#{Regexp.union(*schemes).source}):)#{@pattern[:X_ABS_URI]}/x end end @@ -290,11 +276,11 @@ module URI # +str+:: # String to make safe # +unsafe+:: - # Regexp to apply. Defaults to self.regexp[:UNSAFE] + # Regexp to apply. Defaults to +self.regexp[:UNSAFE]+ # # == Description # - # constructs a safe String from +str+, removing unsafe characters, + # Constructs a safe String from +str+, removing unsafe characters, # replacing them with codes. # def escape(str, unsafe = @regexp[:UNSAFE]) @@ -315,31 +301,39 @@ module URI # # :call-seq: # unescape( str ) - # unescape( str, unsafe ) + # unescape( str, escaped ) # # == Args # # +str+:: # String to remove escapes from - # +unsafe+:: - # Regexp to apply. Defaults to self.regexp[:ESCAPED] + # +escaped+:: + # Regexp to apply. Defaults to +self.regexp[:ESCAPED]+ # # == Description # - # Removes escapes from +str+ + # Removes escapes from +str+. # def unescape(str, escaped = @regexp[:ESCAPED]) - str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding) + enc = str.encoding + enc = Encoding::UTF_8 if enc == Encoding::US_ASCII + str.gsub(escaped) { [$&[1, 2]].pack('H2').force_encoding(enc) } end - @@to_s = Kernel.instance_method(:to_s) - def inspect - @@to_s.bind(self).call + TO_S = Kernel.instance_method(:to_s) # :nodoc: + if TO_S.respond_to?(:bind_call) + def inspect # :nodoc: + TO_S.bind_call(self) + end + else + def inspect # :nodoc: + TO_S.bind(self).call + end end private - # Constructs the default Hash of patterns + # Constructs the default Hash of patterns. def initialize_pattern(opts = {}) ret = {} ret[:ESCAPED] = escaped = (opts.delete(:ESCAPED) || PATTERN::ESCAPED) @@ -497,13 +491,13 @@ module URI ret end - # Constructs the default Hash of Regexp's + # Constructs the default Hash of Regexp's. def initialize_regexp(pattern) ret = {} # for URI::split - ret[:ABS_URI] = Regexp.new('\A\s*' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED) - ret[:REL_URI] = Regexp.new('\A\s*' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED) + ret[:ABS_URI] = Regexp.new('\A\s*+' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED) + ret[:REL_URI] = Regexp.new('\A\s*+' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED) # for URI::extract ret[:URI_REF] = Regexp.new(pattern[:URI_REF]) @@ -529,6 +523,8 @@ module URI ret end + # Returns +uri+ as-is if it is URI, or convert it to URI if it is + # a String. def convert_to_uri(uri) if uri.is_a?(URI::Generic) uri @@ -541,4 +537,11 @@ module URI end end # class Parser + + # Backward compatibility for URI::REGEXP::PATTERN::* + RFC2396_Parser.new.pattern.each_pair do |sym, str| + unless RFC2396_REGEXP::PATTERN.const_defined?(sym, false) + RFC2396_REGEXP::PATTERN.const_set(sym, str) + end + end end # module URI |
