summaryrefslogtreecommitdiff
path: root/lib/uri/rfc2396_parser.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/uri/rfc2396_parser.rb')
-rw-r--r--lib/uri/rfc2396_parser.rb109
1 files changed, 56 insertions, 53 deletions
diff --git a/lib/uri/rfc2396_parser.rb b/lib/uri/rfc2396_parser.rb
index b9e7b2b26e..cefd126cc6 100644
--- a/lib/uri/rfc2396_parser.rb
+++ b/lib/uri/rfc2396_parser.rb
@@ -3,7 +3,6 @@
# = uri/common.rb
#
# Author:: Akira Yamada <akira@ruby-lang.org>
-# Revision:: $Id$
# License::
# You can redistribute it and/or modify it under the same term as Ruby.
#
@@ -58,7 +57,7 @@ module URI
# :startdoc:
end # REGEXP
- # class that Parses String's into URI's
+ # Class that parses String's into URI's.
#
# It contains a Hash set of patterns and Regexp's that match and validate.
#
@@ -68,7 +67,7 @@ module URI
#
# == Synopsis
#
- # URI::Parser.new([opts])
+ # URI::RFC2396_Parser.new([opts])
#
# == Args
#
@@ -87,13 +86,13 @@ module URI
#
# == Examples
#
- # p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")
- # u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD>
+ # p = URI::RFC2396_Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")
+ # u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP http://example.jp/%uABCD>
# URI.parse(u.to_s) #=> raises URI::InvalidURIError
#
# s = "http://example.com/ABCD"
- # u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD>
- # u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD>
+ # u1 = p.parse(s) #=> #<URI::HTTP http://example.com/ABCD>
+ # u2 = URI.parse(s) #=> #<URI::HTTP http://example.com/ABCD>
# u1 == u2 #=> true
# u1.eql?(u2) #=> false
#
@@ -109,15 +108,15 @@ module URI
# The Hash of patterns.
#
- # see also URI::Parser.initialize_pattern
+ # See also #initialize_pattern.
attr_reader :pattern
- # The Hash of Regexp
+ # The Hash of Regexp.
#
- # see also URI::Parser.initialize_regexp
+ # See also #initialize_regexp.
attr_reader :regexp
- # Returns a split URI against regexp[:ABS_URI]
+ # Returns a split URI against +regexp[:ABS_URI]+.
def split(uri)
case uri
when ''
@@ -141,11 +140,11 @@ module URI
if !scheme
raise InvalidURIError,
- "bad URI(absolute but no scheme): #{uri}"
+ "bad URI (absolute but no scheme): #{uri}"
end
if !opaque && (!path && (!host && !registry))
raise InvalidURIError,
- "bad URI(absolute but no path): #{uri}"
+ "bad URI (absolute but no path): #{uri}"
end
when @regexp[:REL_URI]
@@ -174,7 +173,7 @@ module URI
# server = [ [ userinfo "@" ] hostport ]
else
- raise InvalidURIError, "bad URI(is not URI?): #{uri}"
+ raise InvalidURIError, "bad URI (is not URI?): #{uri}"
end
path = '' if !path && !opaque # (see RFC2396 Section 5.2)
@@ -198,31 +197,18 @@ module URI
#
# == Description
#
- # parses +uri+ and constructs either matching URI scheme object
- # (FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic
+ # Parses +uri+ and constructs either matching URI scheme object
+ # (File, FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic.
#
# == Usage
#
- # p = URI::Parser.new
- # p.parse("ldap://ldap.example.com/dc=example?user=john")
- # #=> #<URI::LDAP:0x00000000b9e7e8 URL:ldap://ldap.example.com/dc=example?user=john>
+ # URI::RFC2396_PARSER.parse("ldap://ldap.example.com/dc=example?user=john")
+ # #=> #<URI::LDAP ldap://ldap.example.com/dc=example?user=john>
#
def parse(uri)
- scheme, userinfo, host, port,
- registry, path, opaque, query, fragment = self.split(uri)
-
- if scheme && URI.scheme_list.include?(scheme.upcase)
- URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
- registry, path, opaque, query,
- fragment, self)
- else
- Generic.new(scheme, userinfo, host, port,
- registry, path, opaque, query,
- fragment, self)
- end
+ URI.for(*self.split(uri), self)
end
-
#
# == Args
#
@@ -231,7 +217,7 @@ module URI
#
# == Description
#
- # Attempts to parse and merge a set of URIs
+ # Attempts to parse and merge a set of URIs.
#
def join(*uris)
uris[0] = convert_to_uri(uris[0])
@@ -253,11 +239,11 @@ module URI
#
# == Description
#
- # Attempts to parse and merge a set of URIs
- # If no +block+ given , then returns the result,
+ # Attempts to parse and merge a set of URIs.
+ # If no +block+ given, then returns the result,
# else it calls +block+ for each element in result.
#
- # see also URI::Parser.make_regexp
+ # See also #make_regexp.
#
def extract(str, schemes = nil)
if block_given?
@@ -270,13 +256,13 @@ module URI
end
end
- # returns Regexp that is default self.regexp[:ABS_URI_REF],
- # unless +schemes+ is provided. Then it is a Regexp.union with self.pattern[:X_ABS_URI]
+ # Returns Regexp that is default +self.regexp[:ABS_URI_REF]+,
+ # unless +schemes+ is provided. Then it is a Regexp.union with +self.pattern[:X_ABS_URI]+.
def make_regexp(schemes = nil)
unless schemes
@regexp[:ABS_URI_REF]
else
- /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
+ /(?=(?i:#{Regexp.union(*schemes).source}):)#{@pattern[:X_ABS_URI]}/x
end
end
@@ -290,11 +276,11 @@ module URI
# +str+::
# String to make safe
# +unsafe+::
- # Regexp to apply. Defaults to self.regexp[:UNSAFE]
+ # Regexp to apply. Defaults to +self.regexp[:UNSAFE]+
#
# == Description
#
- # constructs a safe String from +str+, removing unsafe characters,
+ # Constructs a safe String from +str+, removing unsafe characters,
# replacing them with codes.
#
def escape(str, unsafe = @regexp[:UNSAFE])
@@ -315,31 +301,39 @@ module URI
#
# :call-seq:
# unescape( str )
- # unescape( str, unsafe )
+ # unescape( str, escaped )
#
# == Args
#
# +str+::
# String to remove escapes from
- # +unsafe+::
- # Regexp to apply. Defaults to self.regexp[:ESCAPED]
+ # +escaped+::
+ # Regexp to apply. Defaults to +self.regexp[:ESCAPED]+
#
# == Description
#
- # Removes escapes from +str+
+ # Removes escapes from +str+.
#
def unescape(str, escaped = @regexp[:ESCAPED])
- str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
+ enc = str.encoding
+ enc = Encoding::UTF_8 if enc == Encoding::US_ASCII
+ str.gsub(escaped) { [$&[1, 2]].pack('H2').force_encoding(enc) }
end
- @@to_s = Kernel.instance_method(:to_s)
- def inspect
- @@to_s.bind(self).call
+ TO_S = Kernel.instance_method(:to_s) # :nodoc:
+ if TO_S.respond_to?(:bind_call)
+ def inspect # :nodoc:
+ TO_S.bind_call(self)
+ end
+ else
+ def inspect # :nodoc:
+ TO_S.bind(self).call
+ end
end
private
- # Constructs the default Hash of patterns
+ # Constructs the default Hash of patterns.
def initialize_pattern(opts = {})
ret = {}
ret[:ESCAPED] = escaped = (opts.delete(:ESCAPED) || PATTERN::ESCAPED)
@@ -497,13 +491,13 @@ module URI
ret
end
- # Constructs the default Hash of Regexp's
+ # Constructs the default Hash of Regexp's.
def initialize_regexp(pattern)
ret = {}
# for URI::split
- ret[:ABS_URI] = Regexp.new('\A\s*' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED)
- ret[:REL_URI] = Regexp.new('\A\s*' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED)
+ ret[:ABS_URI] = Regexp.new('\A\s*+' + pattern[:X_ABS_URI] + '\s*\z', Regexp::EXTENDED)
+ ret[:REL_URI] = Regexp.new('\A\s*+' + pattern[:X_REL_URI] + '\s*\z', Regexp::EXTENDED)
# for URI::extract
ret[:URI_REF] = Regexp.new(pattern[:URI_REF])
@@ -529,6 +523,8 @@ module URI
ret
end
+ # Returns +uri+ as-is if it is URI, or convert it to URI if it is
+ # a String.
def convert_to_uri(uri)
if uri.is_a?(URI::Generic)
uri
@@ -541,4 +537,11 @@ module URI
end
end # class Parser
+
+ # Backward compatibility for URI::REGEXP::PATTERN::*
+ RFC2396_Parser.new.pattern.each_pair do |sym, str|
+ unless RFC2396_REGEXP::PATTERN.const_defined?(sym, false)
+ RFC2396_REGEXP::PATTERN.const_set(sym, str)
+ end
+ end
end # module URI