* lib/uri.rb: Documented (thanks Dmitry V. Sabanin).

* lib/uri/common.rb: Ditto. * lib/uri/ftp.rb: Ditto. * lib/uri/generic.rb: Ditto. * lib/uri/http.rb: Ditto. * lib/uri/https.rb: Ditto. * lib/uri/ldap.rb: Ditto. * lib/uri/mailto.rb: Ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6015 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: gsinclair <gsinclair@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2004-03-24 11:53:31 +0000
committer: gsinclair <gsinclair@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2004-03-24 11:53:31 +0000
commit: 42ad5216ec57a5890eb20230efb09e1aa53ed528 (patch)
tree: e165885640ee8aee2f1b0c76d5a295acb75430c9 /lib/uri/common.rb
parent: 5e23ff603f53c99ac45e331e24498bfa124b4d72 (diff)
1 files changed, 281 insertions, 126 deletions
diff --git a/lib/uri/common.rb b/lib/uri/common.rb
index c9b3e0de70..175ef97304 100644
--- a/lib/uri/common.rb
+++ b/lib/uri/common.rb
@@ -1,19 +1,19 @@
+# = uri/common.rb
 #
-# $Id$
+# Author:: Akira Yamada <akira@ruby-lang.org>
+# Revision:: $Id$
+# License:: 
+#   You can redistribute it and/or modify it under the same term as Ruby.
 #
-# Copyright (c) 2001 akira yamada <akira@ruby-lang.org>
-# You can redistribute it and/or modify it under the same term as Ruby.
-#
-
-=begin
-
-== URI
-
-=end
 
 module URI
   module REGEXP
+    #
+    # Patterns used to parse URI's
+    #
     module PATTERN
+      # :stopdoc:
+
       # RFC 2396 (URI Generic Syntax)
       # RFC 2732 (IPv6 Literal Addresses in URL's)
       # RFC 2373 (IPv6 Addressing Architecture)
@@ -143,23 +143,23 @@ module URI
 
       # XXX:
       X_ABS_URI = "
-	(#{PATTERN::SCHEME}):                     (?# 1: scheme)
-	(?:
-	   (#{PATTERN::OPAQUE_PART})              (?# 2: opaque)
-	|
-	   (?:(?:
-	     //(?:
-		 (?:(?:(#{PATTERN::USERINFO})@)?  (?# 3: userinfo)
-		   (?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port)
-	       |
-		 (#{PATTERN::REG_NAME})           (?# 6: registry)
-	       )
-	     |
-	     (?!//))                              (?# XXX: '//' is the mark for hostport)
-	     (#{PATTERN::ABS_PATH})?              (?# 7: path)
-	   )(?:\\?(#{PATTERN::QUERY}))?           (?# 8: query)
-	)
-	(?:\\#(#{PATTERN::FRAGMENT}))?            (?# 9: fragment)
+        (#{PATTERN::SCHEME}):                     (?# 1: scheme)
+        (?:
+           (#{PATTERN::OPAQUE_PART})              (?# 2: opaque)
+        |
+           (?:(?:
+             //(?:
+                 (?:(?:(#{PATTERN::USERINFO})@)?  (?# 3: userinfo)
+                   (?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port)
+               |
+                 (#{PATTERN::REG_NAME})           (?# 6: registry)
+               )
+             |
+             (?!//))                              (?# XXX: '//' is the mark for hostport)
+             (#{PATTERN::ABS_PATH})?              (?# 7: path)
+           )(?:\\?(#{PATTERN::QUERY}))?           (?# 8: query)
+        )
+        (?:\\#(#{PATTERN::FRAGMENT}))?            (?# 9: fragment)
       "
       X_REL_URI = "
         (?:
@@ -179,13 +179,16 @@ module URI
         (?:\\?(#{PATTERN::QUERY}))?              (?# 7: query)
         (?:\\#(#{PATTERN::FRAGMENT}))?           (?# 8: fragment)
       "
+      # :startdoc:
     end # PATTERN
 
+    # :stopdoc:
+
     # for URI::split
     ABS_URI = Regexp.new('^' + PATTERN::X_ABS_URI + '$', #'
-			 Regexp::EXTENDED, 'N').freeze
+                         Regexp::EXTENDED, 'N').freeze
     REL_URI = Regexp.new('^' + PATTERN::X_REL_URI + '$', #'
-			 Regexp::EXTENDED, 'N').freeze
+                         Regexp::EXTENDED, 'N').freeze
 
     # for URI::extract
     URI_REF     = Regexp.new(PATTERN::URI_REF, false, 'N').freeze
@@ -195,7 +198,7 @@ module URI
     # for URI::escape/unescape
     ESCAPED = Regexp.new(PATTERN::ESCAPED, false, 'N').freeze
     UNSAFE  = Regexp.new("[^#{PATTERN::UNRESERVED}#{PATTERN::RESERVED}]",
-			 false, 'N').freeze
+                         false, 'N').freeze
 
     # for Generic#initialize
     SCHEME   = Regexp.new("^#{PATTERN::SCHEME}$", false, 'N').freeze #"
@@ -208,32 +211,33 @@ module URI
     REL_PATH = Regexp.new("^#{PATTERN::REL_PATH}$", false, 'N').freeze #"
     QUERY    = Regexp.new("^#{PATTERN::QUERY}$", false, 'N').freeze #"
     FRAGMENT = Regexp.new("^#{PATTERN::FRAGMENT}$", false, 'N').freeze #"
+    # :startdoc:
   end # REGEXP
 
   module Util
     def make_components_hash(klass, array_hash)
       tmp = {}
       if array_hash.kind_of?(Array) &&
-	  array_hash.size == klass.component.size - 1
-	klass.component[1..-1].each_index do |i|
-	  begin
-	    tmp[klass.component[i + 1]] = array_hash[i].clone
-	  rescue TypeError
-	    tmp[klass.component[i + 1]] = array_hash[i]
-	  end
-	end
+          array_hash.size == klass.component.size - 1
+        klass.component[1..-1].each_index do |i|
+          begin
+            tmp[klass.component[i + 1]] = array_hash[i].clone
+          rescue TypeError
+            tmp[klass.component[i + 1]] = array_hash[i]
+          end
+        end
 
       elsif array_hash.kind_of?(Hash)
-	array_hash.each do |key, value|
-	  begin
-	    tmp[key] = value.clone
-	  rescue TypeError
-	    tmp[key] = value
-	  end
-	end
+        array_hash.each do |key, value|
+          begin
+            tmp[key] = value.clone
+          rescue TypeError
+            tmp[key] = value
+          end
+        end
       else
-	raise ArgumentError, 
-	  "expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})"
+        raise ArgumentError, 
+          "expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})"
       end
       tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase
 
@@ -245,24 +249,72 @@ module URI
   module Escape
     include REGEXP
 
+    #
+    # == Synopsis
+    #
+    #   URI.escape(str [, unsafe])
+    #
+    # == Args
+    #
+    # +str+::
+    #   String to replaces in.
+    # +unsafe+::
+    #   Regexp that matches all symbols that must be replaced with codes.
+    #   By default uses <tt>REGEXP::SAFE</tt>.
+    #
+    # == Description
+    #
+    # Escapes the string, replacing all unsafe characters with codes.
+    #
+    # == Usage
+    #
+    #   require 'uri'
+    #
+    #   enc_uri = URI.escape("http://foobar.com/?a=\11\15")
+    #   p enc_uri
+    #   # => "http://foobar.com/?a=%09%0D"
+    #
+    #   p URI.unescape(enc_uri)
+    #   # => "http://foobar.com/?a=\t\r"
+    #
     def escape(str, unsafe = UNSAFE)
       unless unsafe.kind_of?(Regexp)
-	# perhaps unsafe is String object
-	unsafe = Regexp.new(Regexp.quote(unsafe), false, 'N')
+        # perhaps unsafe is String object
+        unsafe = Regexp.new(Regexp.quote(unsafe), false, 'N')
       end
       str.gsub(unsafe) do |us|
-	tmp = ''
-	us.each_byte do |uc|
-	  tmp << sprintf('%%%02X', uc)
-	end
-	tmp
+        tmp = ''
+        us.each_byte do |uc|
+          tmp << sprintf('%%%02X', uc)
+        end
+        tmp
       end
     end
     alias encode escape
-
+    #
+    # == Synopsis
+    #
+    #   URI.unescape(str)
+    #
+    # == Args
+    #
+    # +str+::
+    #   Unescapes the string.
+    #
+    # == Usage
+    #
+    #   require 'uri'
+    #
+    #   enc_uri = URI.escape("http://foobar.com/?a=\11\15")
+    #   p enc_uri
+    #   # => "http://foobar.com/?a=%09%0D"
+    #
+    #   p URI.unescape(enc_uri)
+    #   # => "http://foobar.com/?a=\t\r"
+    #
     def unescape(str)
       str.gsub(ESCAPED) do
-	$&[1,2].hex.chr
+        $&[1,2].hex.chr
       end
     end
     alias decode unescape
@@ -272,20 +324,55 @@ module URI
   extend Escape
 
   @@schemes = {}
-
+  
+  #
+  # Base class for all URI exceptions.
+  #
   class Error < StandardError; end
-  class InvalidURIError < Error; end # it is not URI.
-  class InvalidComponentError < Error; end # it is not component of URI.
-  class BadURIError < Error; end # the URI is valid but it is bad for the position.
-
-=begin
-
-=== Methods
-
---- URI::split(uri)
-
-=end
-
+  #
+  # Not a URI.
+  #
+  class InvalidURIError < Error; end
+  #
+  # Not a URI component.
+  #
+  class InvalidComponentError < Error; end
+  #
+  # URI is valid, bad usage is not.
+  #
+  class BadURIError < Error; end
+
+  #
+  # == Synopsis
+  #
+  #   URI::split(uri)
+  #
+  # == Args
+  #
+  # +uri+::
+  #   String with URI.
+  #
+  # == Description
+  #
+  # Splits the string on following parts and returns array with result:
+  #
+  #   * Scheme
+  #   * Userinfo
+  #   * Host
+  #   * Port
+  #   * Registry
+  #   * Path
+  #   * Opaque
+  #   * Query
+  #   * Fragment
+  # 
+  # == Usage
+  #
+  #   require 'uri'
+  #
+  #   p URI.split("http://www.ruby-lang.org/")
+  #   # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]
+  #
   def self.split(uri)
     case uri
     when ''
@@ -293,7 +380,7 @@ module URI
 
     when ABS_URI
       scheme, opaque, userinfo, host, port, 
-	registry, path, query, fragment = $~[1..-1]
+        registry, path, query, fragment = $~[1..-1]
 
       # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 
@@ -308,12 +395,12 @@ module URI
       # server        = [ [ userinfo "@" ] hostport ]
 
       if !scheme
-	raise InvalidURIError, 
-	  "bad URI(absolute but no scheme): #{uri}"
+        raise InvalidURIError, 
+          "bad URI(absolute but no scheme): #{uri}"
       end
       if !opaque && (!path && (!host && !registry))
-	raise InvalidURIError,
-	  "bad URI(absolute but no path): #{uri}" 
+        raise InvalidURIError,
+          "bad URI(absolute but no path): #{uri}" 
       end
 
     when REL_URI
@@ -321,13 +408,13 @@ module URI
       opaque = nil
 
       userinfo, host, port, registry, 
-	rel_segment, abs_path, query, fragment = $~[1..-1]
+        rel_segment, abs_path, query, fragment = $~[1..-1]
       if rel_segment && abs_path
-	path = rel_segment + abs_path
+        path = rel_segment + abs_path
       elsif rel_segment
-	path = rel_segment
+        path = rel_segment
       elsif abs_path
-	path = abs_path
+        path = abs_path
       end
 
       # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
@@ -348,41 +435,83 @@ module URI
     path = '' if !path && !opaque # (see RFC2396 Section 5.2)
     ret = [
       scheme, 
-      userinfo, host, port, 	# X
-      registry,			# X
-      path, 			# Y
-      opaque,			# Y
+      userinfo, host, port,         # X
+      registry,                        # X
+      path,                         # Y
+      opaque,                        # Y
       query,
       fragment
     ]
     return ret
   end
 
-=begin
-
---- URI::parse(uri_str)
-
-=end
+  #
+  # == Synopsis
+  #
+  #   URI::parse(uri_str)
+  #
+  # == Args
+  #
+  # +uri_str+::
+  #   String with URI.
+  #
+  # == Description
+  #
+  # Creates one of the URI's subclasses instance from the string.
+  #  
+  # == Raises
+  #
+  # URI::InvalidURIError
+  #   Raised if URI given is not a correct one.
+  #
+  # == Usage
+  #
+  #   require 'uri'
+  #
+  #   uri = URI.parse("http://www.ruby-lang.org/")
+  #   p uri
+  #   # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/>
+  #   p uri.scheme 
+  #   # => "http" 
+  #   p uri.host 
+  #   # => "www.ruby-lang.org" 
+  # 
   def self.parse(uri)
     scheme, userinfo, host, port, 
       registry, path, opaque, query, fragment = self.split(uri)
 
     if scheme && @@schemes.include?(scheme.upcase)
       @@schemes[scheme.upcase].new(scheme, userinfo, host, port, 
-				   registry, path, opaque, query, 
-				   fragment)
+                                   registry, path, opaque, query, 
+                                   fragment)
     else
       Generic.new(scheme, userinfo, host, port, 
-		  registry, path, opaque, query, 
-		  fragment)
+                  registry, path, opaque, query, 
+                  fragment)
     end
   end
 
-=begin
-
---- URI::join(str[, str, ...])
-
-=end
+  #
+  # == Synopsis
+  #
+  #   URI::join(str[, str, ...])
+  #
+  # == Args
+  #
+  # +str+::
+  #   String(s) to work with
+  #
+  # == Description
+  #
+  # Joins URIs.
+  #
+  # == Usage
+  #
+  #   require 'uri'
+  #
+  #   p URI.join("http:/localhost/","main.rbx")
+  #   # => #<URI::HTTP:0x2022ac02 URL:http:/localhost/main.php>
+  #
   def self.join(*str)
     u = self.parse(str[0])
     str[1 .. -1].each do |x|
@@ -391,11 +520,30 @@ module URI
     u
   end
 
-=begin
-
---- URI::extract(str[, schemes])
-
-=end
+  #
+  # == Synopsis
+  #
+  #   URI::extract(str[, schemes][,&blk])
+  #
+  # == Args
+  #
+  # +str+:: 
+  #   String to extract URIs from.
+  # +schemes+::
+  #   Limit URI matching to a specific schemes.
+  #
+  # == Description
+  #
+  # Extracts URIs from a string. If block given, iterates through all matched URIs.
+  # Returns nil if block given or array with matches.
+  #
+  # == Usage
+  #
+  #   require "uri"
+  #
+  #   URI.extract("text here http://foo.bar.org/bla and here mailto:test@ruby.com and here also.")
+  #   # => ["http://foo.bar.com/foobar", "mailto:foo@bar.com"]
+  #
   def self.extract(str, schemes = nil, &block)
     if block_given?
       str.scan(regexp(schemes)) { yield $& }
@@ -407,30 +555,37 @@ module URI
     end
   end
 
-=begin
-
---- URI::regexp([match_schemes])
-
-    Returns a Regexp object which matches to URI-like strings.
-    If MATCH_SCHEMES given, resulting regexp matches to URIs
-    whose scheme is one of the MATCH_SCHEMES.
-
-    The Regexp object returned by this method includes arbitrary
-    number of capture group (parentheses).  Never rely on its
-    number.
-
-      # extract first URI from html_string
-      html_string.slice(URI.regexp)
-
-      # remove ftp URIs
-      html_string.sub(URI.regexp(['ftp'])
-
-      # You should not rely on the number of parentheses
-      html_string.scan(URI.regexp) do |*matches|
-        p $&
-      end
-
-=end
+  #
+  # == Synopsis
+  #
+  #   URI::regexp([match_schemes])
+  #
+  # == Args
+  #
+  # +match_schemes+:: 
+  #   Array of schemes. If given, resulting regexp matches to URIs
+  #   whose scheme is one of the match_schemes.
+  # 
+  # == Description
+  # Returns a Regexp object which matches to URI-like strings.
+  # The Regexp object returned by this method includes arbitrary
+  # number of capture group (parentheses).  Never rely on it's number.
+  # 
+  # == Usage
+  #
+  #   require 'uri'
+  #
+  #   # extract first URI from html_string
+  #   html_string.slice(URI.regexp)
+  # 
+  #   # remove ftp URIs
+  #   html_string.sub(URI.regexp(['ftp'])
+  # 
+  #   # You should not rely on the number of parentheses
+  #   html_string.scan(URI.regexp) do |*matches|
+  #     p $&
+  #   end
+  #
   def self.regexp(schemes = nil)
     unless schemes
       ABS_URI_REF
@@ -439,4 +594,4 @@ module URI
     end
   end
 
-end # URI
+end
author	gsinclair <gsinclair@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2004-03-24 11:53:31 +0000
committer	gsinclair <gsinclair@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2004-03-24 11:53:31 +0000
commit	42ad5216ec57a5890eb20230efb09e1aa53ed528 (patch)
tree	e165885640ee8aee2f1b0c76d5a295acb75430c9 /lib/uri/common.rb
parent	5e23ff603f53c99ac45e331e24498bfa124b4d72 (diff)