diff options
author | yugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-25 15:02:05 +0000 |
---|---|---|
committer | yugui <yugui@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-25 15:02:05 +0000 |
commit | 0dc342de848a642ecce8db697b8fecd83a63e117 (patch) | |
tree | 2b7ed4724aff1f86073e4740134bda9c4aac1a39 /trunk/lib/uri | |
parent | ef70cf7138ab8034b5b806f466e4b484b24f0f88 (diff) |
added tag v1_9_0_4
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/tags/v1_9_0_4@18845 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'trunk/lib/uri')
-rw-r--r-- | trunk/lib/uri/.document | 7 | ||||
-rw-r--r-- | trunk/lib/uri/common.rb | 611 | ||||
-rw-r--r-- | trunk/lib/uri/ftp.rb | 198 | ||||
-rw-r--r-- | trunk/lib/uri/generic.rb | 1122 | ||||
-rw-r--r-- | trunk/lib/uri/http.rb | 100 | ||||
-rw-r--r-- | trunk/lib/uri/https.rb | 20 | ||||
-rw-r--r-- | trunk/lib/uri/ldap.rb | 190 | ||||
-rw-r--r-- | trunk/lib/uri/ldaps.rb | 12 | ||||
-rw-r--r-- | trunk/lib/uri/mailto.rb | 266 |
9 files changed, 2526 insertions, 0 deletions
diff --git a/trunk/lib/uri/.document b/trunk/lib/uri/.document new file mode 100644 index 0000000000..214dd2e48f --- /dev/null +++ b/trunk/lib/uri/.document @@ -0,0 +1,7 @@ +common.rb +ftp.rb +generic.rb +http.rb +https.rb +ldap.rb +mailto.rb diff --git a/trunk/lib/uri/common.rb b/trunk/lib/uri/common.rb new file mode 100644 index 0000000000..f0d68884de --- /dev/null +++ b/trunk/lib/uri/common.rb @@ -0,0 +1,611 @@ +# = uri/common.rb +# +# Author:: Akira Yamada <akira@ruby-lang.org> +# Revision:: $Id$ +# License:: +# You can redistribute it and/or modify it under the same term as Ruby. +# + +module URI + module REGEXP + # + # Patterns used to parse URI's + # + module PATTERN + # :stopdoc: + + # RFC 2396 (URI Generic Syntax) + # RFC 2732 (IPv6 Literal Addresses in URL's) + # RFC 2373 (IPv6 Addressing Architecture) + + # alpha = lowalpha | upalpha + ALPHA = "a-zA-Z" + # alphanum = alpha | digit + ALNUM = "#{ALPHA}\\d" + + # hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | + # "a" | "b" | "c" | "d" | "e" | "f" + HEX = "a-fA-F\\d" + # escaped = "%" hex hex + ESCAPED = "%[#{HEX}]{2}" + # mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | + # "(" | ")" + # unreserved = alphanum | mark + UNRESERVED = "-_.!~*'()#{ALNUM}" + # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + # "$" | "," + # reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + # "$" | "," | "[" | "]" (RFC 2732) + RESERVED = ";/?:@&=+$,\\[\\]" + + # uric = reserved | unreserved | escaped + URIC = "(?:[#{UNRESERVED}#{RESERVED}]|#{ESCAPED})" + # uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | + # "&" | "=" | "+" | "$" | "," + URIC_NO_SLASH = "(?:[#{UNRESERVED};?:@&=+$,]|#{ESCAPED})" + # query = *uric + QUERY = "#{URIC}*" + # fragment = *uric + FRAGMENT = "#{URIC}*" + + # domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum + DOMLABEL = "(?:[#{ALNUM}](?:[-#{ALNUM}]*[#{ALNUM}])?)" + # toplabel = alpha | alpha *( alphanum | "-" ) alphanum + TOPLABEL = "(?:[#{ALPHA}](?:[-#{ALNUM}]*[#{ALNUM}])?)" + # hostname = *( domainlabel "." ) toplabel [ "." ] + HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?" + + # RFC 2373, APPENDIX B: + # IPv6address = hexpart [ ":" IPv4address ] + # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT + # hexpart = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ] + # hexseq = hex4 *( ":" hex4) + # hex4 = 1*4HEXDIG + # + # XXX: This definition has a flaw. "::" + IPv4address must be + # allowed too. Here is a replacement. + # + # IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT + IPV4ADDR = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}" + # hex4 = 1*4HEXDIG + HEX4 = "[#{HEX}]{1,4}" + # lastpart = hex4 | IPv4address + LASTPART = "(?:#{HEX4}|#{IPV4ADDR})" + # hexseq1 = *( hex4 ":" ) hex4 + HEXSEQ1 = "(?:#{HEX4}:)*#{HEX4}" + # hexseq2 = *( hex4 ":" ) lastpart + HEXSEQ2 = "(?:#{HEX4}:)*#{LASTPART}" + # IPv6address = hexseq2 | [ hexseq1 ] "::" [ hexseq2 ] + IPV6ADDR = "(?:#{HEXSEQ2}|(?:#{HEXSEQ1})?::(?:#{HEXSEQ2})?)" + + # IPv6prefix = ( hexseq1 | [ hexseq1 ] "::" [ hexseq1 ] ) "/" 1*2DIGIT + # unused + + # ipv6reference = "[" IPv6address "]" (RFC 2732) + IPV6REF = "\\[#{IPV6ADDR}\\]" + + # host = hostname | IPv4address + # host = hostname | IPv4address | IPv6reference (RFC 2732) + HOST = "(?:#{HOSTNAME}|#{IPV4ADDR}|#{IPV6REF})" + # port = *digit + PORT = '\d*' + # hostport = host [ ":" port ] + HOSTPORT = "#{HOST}(?::#{PORT})?" + + # userinfo = *( unreserved | escaped | + # ";" | ":" | "&" | "=" | "+" | "$" | "," ) + USERINFO = "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})*" + + # pchar = unreserved | escaped | + # ":" | "@" | "&" | "=" | "+" | "$" | "," + PCHAR = "(?:[#{UNRESERVED}:@&=+$,]|#{ESCAPED})" + # param = *pchar + PARAM = "#{PCHAR}*" + # segment = *pchar *( ";" param ) + SEGMENT = "#{PCHAR}*(?:;#{PARAM})*" + # path_segments = segment *( "/" segment ) + PATH_SEGMENTS = "#{SEGMENT}(?:/#{SEGMENT})*" + + # server = [ [ userinfo "@" ] hostport ] + SERVER = "(?:#{USERINFO}@)?#{HOSTPORT}" + # reg_name = 1*( unreserved | escaped | "$" | "," | + # ";" | ":" | "@" | "&" | "=" | "+" ) + REG_NAME = "(?:[#{UNRESERVED}$,;:@&=+]|#{ESCAPED})+" + # authority = server | reg_name + AUTHORITY = "(?:#{SERVER}|#{REG_NAME})" + + # rel_segment = 1*( unreserved | escaped | + # ";" | "@" | "&" | "=" | "+" | "$" | "," ) + REL_SEGMENT = "(?:[#{UNRESERVED};@&=+$,]|#{ESCAPED})+" + + # scheme = alpha *( alpha | digit | "+" | "-" | "." ) + SCHEME = "[#{ALPHA}][-+.#{ALPHA}\\d]*" + + # abs_path = "/" path_segments + ABS_PATH = "/#{PATH_SEGMENTS}" + # rel_path = rel_segment [ abs_path ] + REL_PATH = "#{REL_SEGMENT}(?:#{ABS_PATH})?" + # net_path = "//" authority [ abs_path ] + NET_PATH = "//#{AUTHORITY}(?:#{ABS_PATH})?" + + # hier_part = ( net_path | abs_path ) [ "?" query ] + HIER_PART = "(?:#{NET_PATH}|#{ABS_PATH})(?:\\?(?:#{QUERY}))?" + # opaque_part = uric_no_slash *uric + OPAQUE_PART = "#{URIC_NO_SLASH}#{URIC}*" + + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + ABS_URI = "#{SCHEME}:(?:#{HIER_PART}|#{OPAQUE_PART})" + # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + REL_URI = "(?:#{NET_PATH}|#{ABS_PATH}|#{REL_PATH})(?:\\?#{QUERY})?" + + # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + URI_REF = "(?:#{ABS_URI}|#{REL_URI})?(?:##{FRAGMENT})?" + + # XXX: + X_ABS_URI = " + (#{PATTERN::SCHEME}): (?# 1: scheme) + (?: + (#{PATTERN::OPAQUE_PART}) (?# 2: opaque) + | + (?:(?: + //(?: + (?:(?:(#{PATTERN::USERINFO})@)? (?# 3: userinfo) + (?:(#{PATTERN::HOST})(?::(\\d*))?))?(?# 4: host, 5: port) + | + (#{PATTERN::REG_NAME}) (?# 6: registry) + ) + | + (?!//)) (?# XXX: '//' is the mark for hostport) + (#{PATTERN::ABS_PATH})? (?# 7: path) + )(?:\\?(#{PATTERN::QUERY}))? (?# 8: query) + ) + (?:\\#(#{PATTERN::FRAGMENT}))? (?# 9: fragment) + " + X_REL_URI = " + (?: + (?: + // + (?: + (?:(#{PATTERN::USERINFO})@)? (?# 1: userinfo) + (#{PATTERN::HOST})?(?::(\\d*))? (?# 2: host, 3: port) + | + (#{PATTERN::REG_NAME}) (?# 4: registry) + ) + ) + | + (#{PATTERN::REL_SEGMENT}) (?# 5: rel_segment) + )? + (#{PATTERN::ABS_PATH})? (?# 6: abs_path) + (?:\\?(#{PATTERN::QUERY}))? (?# 7: query) + (?:\\#(#{PATTERN::FRAGMENT}))? (?# 8: fragment) + " + # :startdoc: + end # PATTERN + + # :stopdoc: + + # for URI::split + ABS_URI = Regexp.new('^' + PATTERN::X_ABS_URI + '$', #' + Regexp::EXTENDED).freeze + REL_URI = Regexp.new('^' + PATTERN::X_REL_URI + '$', #' + Regexp::EXTENDED).freeze + + # for URI::extract + URI_REF = Regexp.new(PATTERN::URI_REF).freeze + ABS_URI_REF = Regexp.new(PATTERN::X_ABS_URI, Regexp::EXTENDED).freeze + REL_URI_REF = Regexp.new(PATTERN::X_REL_URI, Regexp::EXTENDED).freeze + + # for URI::escape/unescape + ESCAPED = Regexp.new(PATTERN::ESCAPED).freeze + UNSAFE = Regexp.new("[^#{PATTERN::UNRESERVED}#{PATTERN::RESERVED}]").freeze + + # for Generic#initialize + SCHEME = Regexp.new("^#{PATTERN::SCHEME}$").freeze #" + USERINFO = Regexp.new("^#{PATTERN::USERINFO}$").freeze #" + HOST = Regexp.new("^#{PATTERN::HOST}$").freeze #" + PORT = Regexp.new("^#{PATTERN::PORT}$").freeze #" + OPAQUE = Regexp.new("^#{PATTERN::OPAQUE_PART}$").freeze #" + REGISTRY = Regexp.new("^#{PATTERN::REG_NAME}$").freeze #" + ABS_PATH = Regexp.new("^#{PATTERN::ABS_PATH}$").freeze #" + REL_PATH = Regexp.new("^#{PATTERN::REL_PATH}$").freeze #" + QUERY = Regexp.new("^#{PATTERN::QUERY}$").freeze #" + FRAGMENT = Regexp.new("^#{PATTERN::FRAGMENT}$").freeze #" + # :startdoc: + end # REGEXP + + module Util # :nodoc: + def make_components_hash(klass, array_hash) + tmp = {} + if array_hash.kind_of?(Array) && + array_hash.size == klass.component.size - 1 + klass.component[1..-1].each_index do |i| + begin + tmp[klass.component[i + 1]] = array_hash[i].clone + rescue TypeError + tmp[klass.component[i + 1]] = array_hash[i] + end + end + + elsif array_hash.kind_of?(Hash) + array_hash.each do |key, value| + begin + tmp[key] = value.clone + rescue TypeError + tmp[key] = value + end + end + else + raise ArgumentError, + "expected Array of or Hash of components of #{klass.to_s} (#{klass.component[1..-1].join(', ')})" + end + tmp[:scheme] = klass.to_s.sub(/\A.*::/, '').downcase + + return tmp + end + module_function :make_components_hash + end + + module Escape + include REGEXP + + # + # == Synopsis + # + # URI.escape(str [, unsafe]) + # + # == Args + # + # +str+:: + # String to replaces in. + # +unsafe+:: + # Regexp that matches all symbols that must be replaced with codes. + # By default uses <tt>REGEXP::UNSAFE</tt>. + # When this argument is a String, it represents a character set. + # + # == Description + # + # Escapes the string, replacing all unsafe characters with codes. + # + # == Usage + # + # require 'uri' + # + # enc_uri = URI.escape("http://example.com/?a=\11\15") + # p enc_uri + # # => "http://example.com/?a=%09%0D" + # + # p URI.unescape(enc_uri) + # # => "http://example.com/?a=\t\r" + # + # p URI.escape("@?@!", "!?") + # # => "@%3F@%21" + # + def escape(str, unsafe = UNSAFE) + unless unsafe.kind_of?(Regexp) + # perhaps unsafe is String object + unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false, 'N') + end + str.gsub(unsafe) do + us = $& + tmp = '' + us.each_byte do |uc| + tmp << sprintf('%%%02X', uc) + end + tmp + end + end + alias encode escape + # + # == Synopsis + # + # URI.unescape(str) + # + # == Args + # + # +str+:: + # Unescapes the string. + # + # == Usage + # + # require 'uri' + # + # enc_uri = URI.escape("http://example.com/?a=\11\15") + # p enc_uri + # # => "http://example.com/?a=%09%0D" + # + # p URI.unescape(enc_uri) + # # => "http://example.com/?a=\t\r" + # + def unescape(str) + str.gsub(ESCAPED) do + $&[1,2].hex.chr + end + end + alias decode unescape + end + + include REGEXP + extend Escape + + @@schemes = {} + + # + # Base class for all URI exceptions. + # + class Error < StandardError; end + # + # Not a URI. + # + class InvalidURIError < Error; end + # + # Not a URI component. + # + class InvalidComponentError < Error; end + # + # URI is valid, bad usage is not. + # + class BadURIError < Error; end + + # + # == Synopsis + # + # URI::split(uri) + # + # == Args + # + # +uri+:: + # String with URI. + # + # == Description + # + # Splits the string on following parts and returns array with result: + # + # * Scheme + # * Userinfo + # * Host + # * Port + # * Registry + # * Path + # * Opaque + # * Query + # * Fragment + # + # == Usage + # + # require 'uri' + # + # p URI.split("http://www.ruby-lang.org/") + # # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil] + # + def self.split(uri) + case uri + when '' + # null uri + + when ABS_URI + scheme, opaque, userinfo, host, port, + registry, path, query, fragment = $~[1..-1] + + # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + # opaque_part = uric_no_slash *uric + + # abs_path = "/" path_segments + # net_path = "//" authority [ abs_path ] + + # authority = server | reg_name + # server = [ [ userinfo "@" ] hostport ] + + if !scheme + raise InvalidURIError, + "bad URI(absolute but no scheme): #{uri}" + end + if !opaque && (!path && (!host && !registry)) + raise InvalidURIError, + "bad URI(absolute but no path): #{uri}" + end + + when REL_URI + scheme = nil + opaque = nil + + userinfo, host, port, registry, + rel_segment, abs_path, query, fragment = $~[1..-1] + if rel_segment && abs_path + path = rel_segment + abs_path + elsif rel_segment + path = rel_segment + elsif abs_path + path = abs_path + end + + # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + + # relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + + # net_path = "//" authority [ abs_path ] + # abs_path = "/" path_segments + # rel_path = rel_segment [ abs_path ] + + # authority = server | reg_name + # server = [ [ userinfo "@" ] hostport ] + + else + raise InvalidURIError, "bad URI(is not URI?): #{uri}" + end + + path = '' if !path && !opaque # (see RFC2396 Section 5.2) + ret = [ + scheme, + userinfo, host, port, # X + registry, # X + path, # Y + opaque, # Y + query, + fragment + ] + return ret + end + + # + # == Synopsis + # + # URI::parse(uri_str) + # + # == Args + # + # +uri_str+:: + # String with URI. + # + # == Description + # + # Creates one of the URI's subclasses instance from the string. + # + # == Raises + # + # URI::InvalidURIError + # Raised if URI given is not a correct one. + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse("http://www.ruby-lang.org/") + # p uri + # # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/> + # p uri.scheme + # # => "http" + # p uri.host + # # => "www.ruby-lang.org" + # + def self.parse(uri) + scheme, userinfo, host, port, + registry, path, opaque, query, fragment = self.split(uri) + + if scheme && @@schemes.include?(scheme.upcase) + @@schemes[scheme.upcase].new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment) + else + Generic.new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment) + end + end + + # + # == Synopsis + # + # URI::join(str[, str, ...]) + # + # == Args + # + # +str+:: + # String(s) to work with + # + # == Description + # + # Joins URIs. + # + # == Usage + # + # require 'uri' + # + # p URI.join("http://localhost/","main.rbx") + # # => #<URI::HTTP:0x2022ac02 URL:http://localhost/main.rbx> + # + def self.join(*str) + u = self.parse(str[0]) + str[1 .. -1].each do |x| + u = u.merge(x) + end + u + end + + # + # == Synopsis + # + # URI::extract(str[, schemes][,&blk]) + # + # == Args + # + # +str+:: + # String to extract URIs from. + # +schemes+:: + # Limit URI matching to a specific schemes. + # + # == Description + # + # Extracts URIs from a string. If block given, iterates through all matched URIs. + # Returns nil if block given or array with matches. + # + # == Usage + # + # require "uri" + # + # URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.") + # # => ["http://foo.example.com/bla", "mailto:test@example.com"] + # + def self.extract(str, schemes = nil, &block) + if block_given? + str.scan(regexp(schemes)) { yield $& } + nil + else + result = [] + str.scan(regexp(schemes)) { result.push $& } + result + end + end + + # + # == Synopsis + # + # URI::regexp([match_schemes]) + # + # == Args + # + # +match_schemes+:: + # Array of schemes. If given, resulting regexp matches to URIs + # whose scheme is one of the match_schemes. + # + # == Description + # Returns a Regexp object which matches to URI-like strings. + # The Regexp object returned by this method includes arbitrary + # number of capture group (parentheses). Never rely on it's number. + # + # == Usage + # + # require 'uri' + # + # # extract first URI from html_string + # html_string.slice(URI.regexp) + # + # # remove ftp URIs + # html_string.sub(URI.regexp(['ftp']) + # + # # You should not rely on the number of parentheses + # html_string.scan(URI.regexp) do |*matches| + # p $& + # end + # + def self.regexp(schemes = nil) + unless schemes + ABS_URI_REF + else + /(?=#{Regexp.union(*schemes)}:)#{PATTERN::X_ABS_URI}/xn + end + end + +end + +module Kernel + # alias for URI.parse. + # + # This method is introduced at 1.8.2. + def URI(uri_str) # :doc: + URI.parse(uri_str) + end + module_function :URI +end diff --git a/trunk/lib/uri/ftp.rb b/trunk/lib/uri/ftp.rb new file mode 100644 index 0000000000..3afdce01b4 --- /dev/null +++ b/trunk/lib/uri/ftp.rb @@ -0,0 +1,198 @@ +# +# = uri/ftp.rb +# +# Author:: Akira Yamada <akira@ruby-lang.org> +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id$ +# + +require 'uri/generic' + +module URI + + # + # FTP URI syntax is defined by RFC1738 section 3.2. + # + class FTP < Generic + DEFAULT_PORT = 21 + + COMPONENT = [ + :scheme, + :userinfo, :host, :port, + :path, :typecode + ].freeze + # + # Typecode is "a", "i" or "d". + # + # * "a" indicates a text file (the FTP command was ASCII) + # * "i" indicates a binary file (FTP command IMAGE) + # * "d" indicates the contents of a directory should be displayed + # + TYPECODE = ['a', 'i', 'd'].freeze + TYPECODE_PREFIX = ';type='.freeze + + def self.new2(user, password, host, port, path, + typecode = nil, arg_check = true) + typecode = nil if typecode.size == 0 + if typecode && !TYPECODE.include?(typecode) + raise ArgumentError, + "bad typecode is specified: #{typecode}" + end + + # do escape + + self.new('ftp', + [user, password], + host, port, nil, + typecode ? path + TYPECODE_PREFIX + typecode : path, + nil, nil, nil, arg_check) + end + + # + # == Description + # + # Creates a new URI::FTP object from components, with syntax checking. + # + # The components accepted are +userinfo+, +host+, +port+, +path+ and + # +typecode+. + # + # The components should be provided either as an Array, or as a Hash + # with keys formed by preceding the component names with a colon. + # + # If an Array is used, the components must be passed in the order + # [userinfo, host, port, path, typecode] + # + # If the path supplied is absolute, it will be escaped in order to + # make it absolute in the URI. Examples: + # + # require 'uri' + # + # uri = URI::FTP.build(['user:password', 'ftp.example.com', nil, + # '/path/file.> zip', 'i']) + # puts uri.to_s -> ftp://user:password@ftp.example.com/%2Fpath/file.zip;type=a + # + # uri2 = URI::FTP.build({:host => 'ftp.example.com', + # :path => 'ruby/src'}) + # puts uri2.to_s -> ftp://ftp.example.com/ruby/src + # + def self.build(args) + + # Fix the incoming path to be generic URL syntax + # FTP path -> URL path + # foo/bar /foo/bar + # /foo/bar /%2Ffoo/bar + # + if args.kind_of?(Array) + args[3] = '/' + args[3].sub(/^\//, '%2F') + else + args[:path] = '/' + args[:path].sub(/^\//, '%2F') + end + + tmp = Util::make_components_hash(self, args) + + if tmp[:typecode] + if tmp[:typecode].size == 1 + tmp[:typecode] = TYPECODE_PREFIX + tmp[:typecode] + end + tmp[:path] << tmp[:typecode] + end + + return super(tmp) + end + + # + # == Description + # + # Creates a new URI::FTP object from generic URL components with no + # syntax checking. + # + # Unlike build(), this method does not escape the path component as + # required by RFC1738; instead it is treated as per RFC2396. + # + # Arguments are +scheme+, +userinfo+, +host+, +port+, +registry+, +path+, + # +opaque+, +query+ and +fragment+, in that order. + # + def initialize(*arg) + super(*arg) + @typecode = nil + tmp = @path.index(TYPECODE_PREFIX) + if tmp + typecode = @path[tmp + TYPECODE_PREFIX.size..-1] + self.set_path(@path[0..tmp - 1]) + + if arg[-1] + self.typecode = typecode + else + self.set_typecode(typecode) + end + end + end + attr_reader :typecode + + def check_typecode(v) + if TYPECODE.include?(v) + return true + else + raise InvalidComponentError, + "bad typecode(expected #{TYPECODE.join(', ')}): #{v}" + end + end + private :check_typecode + + def set_typecode(v) + @typecode = v + end + protected :set_typecode + + def typecode=(typecode) + check_typecode(typecode) + set_typecode(typecode) + typecode + end + + def merge(oth) # :nodoc: + tmp = super(oth) + if self != tmp + tmp.set_typecode(oth.typecode) + end + + return tmp + end + + # Returns the path from an FTP URI. + # + # RFC 1738 specifically states that the path for an FTP URI does not + # include the / which separates the URI path from the URI host. Example: + # + # ftp://ftp.example.com/pub/ruby + # + # The above URI indicates that the client should connect to + # ftp.example.com then cd pub/ruby from the initial login directory. + # + # If you want to cd to an absolute directory, you must include an + # escaped / (%2F) in the path. Example: + # + # ftp://ftp.example.com/%2Fpub/ruby + # + # This method will then return "/pub/ruby" + # + def path + return @path.sub(/^\//,'').sub(/^%2F/,'/') + end + + def to_s + save_path = nil + if @typecode + save_path = @path + @path = @path + TYPECODE_PREFIX + @typecode + end + str = super + if @typecode + @path = save_path + end + + return str + end + end + @@schemes['FTP'] = FTP +end diff --git a/trunk/lib/uri/generic.rb b/trunk/lib/uri/generic.rb new file mode 100644 index 0000000000..d907e0b4b2 --- /dev/null +++ b/trunk/lib/uri/generic.rb @@ -0,0 +1,1122 @@ +# +# = uri/generic.rb +# +# Author:: Akira Yamada <akira@ruby-lang.org> +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id$ +# + +require 'uri/common' + +module URI + + # + # Base class for all URI classes. + # Implements generic URI syntax as per RFC 2396. + # + class Generic + include URI + include REGEXP + + DEFAULT_PORT = nil + + # + # Returns default port + # + def self.default_port + self::DEFAULT_PORT + end + + def default_port + self.class.default_port + end + + COMPONENT = [ + :scheme, + :userinfo, :host, :port, :registry, + :path, :opaque, + :query, + :fragment + ].freeze + + # + # Components of the URI in the order. + # + def self.component + self::COMPONENT + end + + USE_REGISTRY = false + + # + # DOC: FIXME! + # + def self.use_registry + self::USE_REGISTRY + end + + # + # == Synopsis + # + # See #new + # + # == Description + # + # At first, tries to create a new URI::Generic instance using + # URI::Generic::build. But, if exception URI::InvalidComponentError is raised, + # then it URI::Escape.escape all URI components and tries again. + # + # + def self.build2(args) + begin + return self.build(args) + rescue InvalidComponentError + if args.kind_of?(Array) + return self.build(args.collect{|x| + if x + URI.escape(x) + else + x + end + }) + elsif args.kind_of?(Hash) + tmp = {} + args.each do |key, value| + tmp[key] = if value + URI.escape(value) + else + value + end + end + return self.build(tmp) + end + end + end + + # + # == Synopsis + # + # See #new + # + # == Description + # + # Creates a new URI::Generic instance from components of URI::Generic + # with check. Components are: scheme, userinfo, host, port, registry, path, + # opaque, query and fragment. You can provide arguments either by an Array or a Hash. + # See #new for hash keys to use or for order of array items. + # + def self.build(args) + if args.kind_of?(Array) && + args.size == ::URI::Generic::COMPONENT.size + tmp = args + elsif args.kind_of?(Hash) + tmp = ::URI::Generic::COMPONENT.collect do |c| + if args.include?(c) + args[c] + else + nil + end + end + else + raise ArgumentError, + "expected Array of or Hash of components of #{self.class} (#{self.class.component.join(', ')})" + end + + tmp << true + return self.new(*tmp) + end + # + # == Args + # + # +scheme+:: + # Protocol scheme, i.e. 'http','ftp','mailto' and so on. + # +userinfo+:: + # User name and password, i.e. 'sdmitry:bla' + # +host+:: + # Server host name + # +port+:: + # Server port + # +registry+:: + # DOC: FIXME! + # +path+:: + # Path on server + # +opaque+:: + # DOC: FIXME! + # +query+:: + # Query data + # +fragment+:: + # A part of URI after '#' sign + # +arg_check+:: + # Check arguments [false by default] + # + # == Description + # + # Creates a new URI::Generic instance from ``generic'' components without check. + # + def initialize(scheme, + userinfo, host, port, registry, + path, opaque, + query, + fragment, + arg_check = false) + @scheme = nil + @user = nil + @password = nil + @host = nil + @port = nil + @path = nil + @query = nil + @opaque = nil + @registry = nil + @fragment = nil + + if arg_check + self.scheme = scheme + self.userinfo = userinfo + self.host = host + self.port = port + self.path = path + self.query = query + self.opaque = opaque + self.registry = registry + self.fragment = fragment + else + self.set_scheme(scheme) + self.set_userinfo(userinfo) + self.set_host(host) + self.set_port(port) + self.set_path(path) + self.set_query(query) + self.set_opaque(opaque) + self.set_registry(registry) + self.set_fragment(fragment) + end + if @registry && !self.class.use_registry + raise InvalidURIError, + "the scheme #{@scheme} does not accept registry part: #{@registry} (or bad hostname?)" + end + + @scheme.freeze if @scheme + self.set_path('') if !@path && !@opaque # (see RFC2396 Section 5.2) + self.set_port(self.default_port) if self.default_port && !@port + end + attr_reader :scheme + attr_reader :host + attr_reader :port + attr_reader :registry + attr_reader :path + attr_reader :query + attr_reader :opaque + attr_reader :fragment + + # replace self by other URI object + def replace!(oth) + if self.class != oth.class + raise ArgumentError, "expected #{self.class} object" + end + + component.each do |c| + self.__send__("#{c}=", oth.__send__(c)) + end + end + private :replace! + + def component + self.class.component + end + + def check_scheme(v) + if v && SCHEME !~ v + raise InvalidComponentError, + "bad component(expected scheme component): #{v}" + end + + return true + end + private :check_scheme + + def set_scheme(v) + @scheme = v + end + protected :set_scheme + + def scheme=(v) + check_scheme(v) + set_scheme(v) + v + end + + def check_userinfo(user, password = nil) + if !password + user, password = split_userinfo(user) + end + check_user(user) + check_password(password, user) + + return true + end + private :check_userinfo + + def check_user(v) + if @registry || @opaque + raise InvalidURIError, + "can not set user with registry or opaque" + end + + return v unless v + + if USERINFO !~ v + raise InvalidComponentError, + "bad component(expected userinfo component or user component): #{v}" + end + + return true + end + private :check_user + + def check_password(v, user = @user) + if @registry || @opaque + raise InvalidURIError, + "can not set password with registry or opaque" + end + return v unless v + + if !user + raise InvalidURIError, + "password component depends user component" + end + + if USERINFO !~ v + raise InvalidComponentError, + "bad component(expected user component): #{v}" + end + + return true + end + private :check_password + + # + # Sets userinfo, argument is string like 'name:pass' + # + def userinfo=(userinfo) + if userinfo.nil? + return nil + end + check_userinfo(*userinfo) + set_userinfo(*userinfo) + # returns userinfo + end + + def user=(user) + check_user(user) + set_user(user) + # returns user + end + + def password=(password) + check_password(password) + set_password(password) + # returns password + end + + def set_userinfo(user, password = nil) + unless password + user, password = split_userinfo(user) + end + @user = user + @password = password if password + + [@user, @password] + end + protected :set_userinfo + + def set_user(v) + set_userinfo(v, @password) + v + end + protected :set_user + + def set_password(v) + @password = v + # returns v + end + protected :set_password + + def split_userinfo(ui) + return nil, nil unless ui + user, password = ui.split(/:/, 2) + + return user, password + end + private :split_userinfo + + def escape_userpass(v) + v = URI.escape(v, /[@:\/]/o) # RFC 1738 section 3.1 #/ + end + private :escape_userpass + + def userinfo + if @user.nil? + nil + elsif @password.nil? + @user + else + @user + ':' + @password + end + end + + def user + @user + end + + def password + @password + end + + def check_host(v) + return v unless v + + if @registry || @opaque + raise InvalidURIError, + "can not set host with registry or opaque" + elsif HOST !~ v + raise InvalidComponentError, + "bad component(expected host component): #{v}" + end + + return true + end + private :check_host + + def set_host(v) + @host = v + end + protected :set_host + + def host=(v) + check_host(v) + set_host(v) + v + end + + def check_port(v) + return v unless v + + if @registry || @opaque + raise InvalidURIError, + "can not set port with registry or opaque" + elsif !v.kind_of?(Fixnum) && PORT !~ v + raise InvalidComponentError, + "bad component(expected port component): #{v}" + end + + return true + end + private :check_port + + def set_port(v) + unless !v || v.kind_of?(Fixnum) + if v.empty? + v = nil + else + v = v.to_i + end + end + @port = v + end + protected :set_port + + def port=(v) + check_port(v) + set_port(v) + port + end + + def check_registry(v) + return v unless v + + # raise if both server and registry are not nil, because: + # authority = server | reg_name + # server = [ [ userinfo "@" ] hostport ] + if @host || @port || @user # userinfo = @user + ':' + @password + raise InvalidURIError, + "can not set registry with host, port, or userinfo" + elsif v && REGISTRY !~ v + raise InvalidComponentError, + "bad component(expected registry component): #{v}" + end + + return true + end + private :check_registry + + def set_registry(v) + @registry = v + end + protected :set_registry + + def registry=(v) + check_registry(v) + set_registry(v) + v + end + + def check_path(v) + # raise if both hier and opaque are not nil, because: + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + if v && @opaque + raise InvalidURIError, + "path conflicts with opaque" + end + + if @scheme + if v && v != '' && ABS_PATH !~ v + raise InvalidComponentError, + "bad component(expected absolute path component): #{v}" + end + else + if v && v != '' && ABS_PATH !~ v && REL_PATH !~ v + raise InvalidComponentError, + "bad component(expected relative path component): #{v}" + end + end + + return true + end + private :check_path + + def set_path(v) + @path = v + end + protected :set_path + + def path=(v) + check_path(v) + set_path(v) + v + end + + def check_query(v) + return v unless v + + # raise if both hier and opaque are not nil, because: + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + if @opaque + raise InvalidURIError, + "query conflicts with opaque" + end + + if v && v != '' && QUERY !~ v + raise InvalidComponentError, + "bad component(expected query component): #{v}" + end + + return true + end + private :check_query + + def set_query(v) + @query = v + end + protected :set_query + + def query=(v) + check_query(v) + set_query(v) + v + end + + def check_opaque(v) + return v unless v + + # raise if both hier and opaque are not nil, because: + # absoluteURI = scheme ":" ( hier_part | opaque_part ) + # hier_part = ( net_path | abs_path ) [ "?" query ] + if @host || @port || @user || @path # userinfo = @user + ':' + @password + raise InvalidURIError, + "can not set opaque with host, port, userinfo or path" + elsif v && OPAQUE !~ v + raise InvalidComponentError, + "bad component(expected opaque component): #{v}" + end + + return true + end + private :check_opaque + + def set_opaque(v) + @opaque = v + end + protected :set_opaque + + def opaque=(v) + check_opaque(v) + set_opaque(v) + v + end + + def check_fragment(v) + return v unless v + + if v && v != '' && FRAGMENT !~ v + raise InvalidComponentError, + "bad component(expected fragment component): #{v}" + end + + return true + end + private :check_fragment + + def set_fragment(v) + @fragment = v + end + protected :set_fragment + + def fragment=(v) + check_fragment(v) + set_fragment(v) + v + end + + # + # Checks if URI has a path + # + def hierarchical? + if @path + true + else + false + end + end + + # + # Checks if URI is an absolute one + # + def absolute? + if @scheme + true + else + false + end + end + alias absolute absolute? + + # + # Checks if URI is relative + # + def relative? + !absolute? + end + + def split_path(path) + path.split(%r{/+}, -1) + end + private :split_path + + def merge_path(base, rel) + + # RFC2396, Section 5.2, 5) + # RFC2396, Section 5.2, 6) + base_path = split_path(base) + rel_path = split_path(rel) + + # RFC2396, Section 5.2, 6), a) + base_path << '' if base_path.last == '..' + while i = base_path.index('..') + base_path.slice!(i - 1, 2) + end + + if (first = rel_path.first) and first.empty? + base_path.clear + rel_path.shift + end + + # RFC2396, Section 5.2, 6), c) + # RFC2396, Section 5.2, 6), d) + rel_path.push('') if rel_path.last == '.' || rel_path.last == '..' + rel_path.delete('.') + + # RFC2396, Section 5.2, 6), e) + tmp = [] + rel_path.each do |x| + if x == '..' && + !(tmp.empty? || tmp.last == '..') + tmp.pop + else + tmp << x + end + end + + add_trailer_slash = !tmp.empty? + if base_path.empty? + base_path = [''] # keep '/' for root directory + elsif add_trailer_slash + base_path.pop + end + while x = tmp.shift + if x == '..' + # RFC2396, Section 4 + # a .. or . in an absolute path has no special meaning + base_path.pop if base_path.size > 1 + else + # if x == '..' + # valid absolute (but abnormal) path "/../..." + # else + # valid absolute path + # end + base_path << x + tmp.each {|t| base_path << t} + add_trailer_slash = false + break + end + end + base_path.push('') if add_trailer_slash + + return base_path.join('/') + end + private :merge_path + + # + # == Args + # + # +oth+:: + # URI or String + # + # == Description + # + # Destructive form of #merge + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse("http://my.example.com") + # uri.merge!("/main.rbx?page=1") + # p uri + # # => #<URI::HTTP:0x2021f3b0 URL:http://my.example.com/main.rbx?page=1> + # + def merge!(oth) + t = merge(oth) + if self == t + nil + else + replace!(t) + self + end + end + + # + # == Args + # + # +oth+:: + # URI or String + # + # == Description + # + # Merges two URI's. + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse("http://my.example.com") + # p uri.merge("/main.rbx?page=1") + # # => #<URI::HTTP:0x2021f3b0 URL:http://my.example.com/main.rbx?page=1> + # + def merge(oth) + begin + base, rel = merge0(oth) + rescue + raise $!.class, $!.message + end + + if base == rel + return base + end + + authority = rel.userinfo || rel.host || rel.port + + # RFC2396, Section 5.2, 2) + if (rel.path.nil? || rel.path.empty?) && !authority && !rel.query + base.set_fragment(rel.fragment) if rel.fragment + return base + end + + base.set_query(nil) + base.set_fragment(nil) + + # RFC2396, Section 5.2, 4) + if !authority + base.set_path(merge_path(base.path, rel.path)) if base.path && rel.path + else + # RFC2396, Section 5.2, 4) + base.set_path(rel.path) if rel.path + end + + # RFC2396, Section 5.2, 7) + base.set_userinfo(rel.userinfo) if rel.userinfo + base.set_host(rel.host) if rel.host + base.set_port(rel.port) if rel.port + base.set_query(rel.query) if rel.query + base.set_fragment(rel.fragment) if rel.fragment + + return base + end # merge + alias + merge + + # return base and rel. + # you can modify `base', but can not `rel'. + def merge0(oth) + case oth + when Generic + when String + oth = URI.parse(oth) + else + raise ArgumentError, + "bad argument(expected URI object or URI string)" + end + + if self.relative? && oth.relative? + raise BadURIError, + "both URI are relative" + end + + if self.absolute? && oth.absolute? + #raise BadURIError, + # "both URI are absolute" + # hmm... should return oth for usability? + return oth, oth + end + + if self.absolute? + return self.dup, oth + else + return oth, oth + end + end + private :merge0 + + def route_from_path(src, dst) + # RFC2396, Section 4.2 + return '' if src == dst + + src_path = split_path(src) + dst_path = split_path(dst) + + # hmm... dst has abnormal absolute path, + # like "/./", "/../", "/x/../", ... + if dst_path.include?('..') || + dst_path.include?('.') + return dst.dup + end + + src_path.pop + + # discard same parts + while dst_path.first == src_path.first + break if dst_path.empty? + + src_path.shift + dst_path.shift + end + + tmp = dst_path.join('/') + + # calculate + if src_path.empty? + if tmp.empty? + return './' + elsif dst_path.first.include?(':') # (see RFC2396 Section 5) + return './' + tmp + else + return tmp + end + end + + return '../' * src_path.size + tmp + end + private :route_from_path + + def route_from0(oth) + case oth + when Generic + when String + oth = URI.parse(oth) + else + raise ArgumentError, + "bad argument(expected URI object or URI string)" + end + + if self.relative? + raise BadURIError, + "relative URI: #{self}" + end + if oth.relative? + raise BadURIError, + "relative URI: #{oth}" + end + + if self.scheme != oth.scheme + return self, self.dup + end + rel = URI::Generic.new(nil, # it is relative URI + self.userinfo, self.host, self.port, + self.registry, self.path, self.opaque, + self.query, self.fragment) + + if rel.userinfo != oth.userinfo || + rel.host.to_s.downcase != oth.host.to_s.downcase || + rel.port != oth.port + if self.userinfo.nil? && self.host.nil? + return self, self.dup + end + rel.set_port(nil) if rel.port == oth.default_port + return rel, rel + end + rel.set_userinfo(nil) + rel.set_host(nil) + rel.set_port(nil) + + if rel.path && rel.path == oth.path + rel.set_path('') + rel.set_query(nil) if rel.query == oth.query + return rel, rel + elsif rel.opaque && rel.opaque == oth.opaque + rel.set_opaque('') + rel.set_query(nil) if rel.query == oth.query + return rel, rel + end + + # you can modify `rel', but can not `oth'. + return oth, rel + end + private :route_from0 + # + # == Args + # + # +oth+:: + # URI or String + # + # == Description + # + # Calculates relative path from oth to self + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse('http://my.example.com/main.rbx?page=1') + # p uri.route_from('http://my.example.com') + # #=> #<URI::Generic:0x20218858 URL:/main.rbx?page=1> + # + def route_from(oth) + # you can modify `rel', but can not `oth'. + begin + oth, rel = route_from0(oth) + rescue + raise $!.class, $!.message + end + if oth == rel + return rel + end + + rel.set_path(route_from_path(oth.path, self.path)) + if rel.path == './' && self.query + # "./?foo" -> "?foo" + rel.set_path('') + end + + return rel + end + + alias - route_from + + # + # == Args + # + # +oth+:: + # URI or String + # + # == Description + # + # Calculates relative path to oth from self + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse('http://my.example.com') + # p uri.route_to('http://my.example.com/main.rbx?page=1') + # #=> #<URI::Generic:0x2020c2f6 URL:/main.rbx?page=1> + # + def route_to(oth) + case oth + when Generic + when String + oth = URI.parse(oth) + else + raise ArgumentError, + "bad argument(expected URI object or URI string)" + end + + oth.route_from(self) + end + + # + # Returns normalized URI + # + def normalize + uri = dup + uri.normalize! + uri + end + + # + # Destructive version of #normalize + # + def normalize! + if path && path == '' + set_path('/') + end + if host && host != host.downcase + set_host(self.host.downcase) + end + end + + def path_query + str = @path + if @query + str += '?' + @query + end + str + end + private :path_query + + # + # Constructs String from URI + # + def to_s + str = '' + if @scheme + str << @scheme + str << ':' + end + + if @opaque + str << @opaque + + else + if @registry + str << @registry + else + if @host + str << '//' + end + if self.userinfo + str << self.userinfo + str << '@' + end + if @host + str << @host + end + if @port && @port != self.default_port + str << ':' + str << @port.to_s + end + end + + str << path_query + end + + if @fragment + str << '#' + str << @fragment + end + + str + end + + # + # Compares to URI's + # + def ==(oth) + if self.class == oth.class + self.normalize.component_ary == oth.normalize.component_ary + else + false + end + end + + def hash + self.component_ary.hash + end + + def eql?(oth) + self.component_ary.eql?(oth.component_ary) + end + +=begin + +--- URI::Generic#===(oth) + +=end +# def ===(oth) +# raise NotImplementedError +# end + +=begin +=end + def component_ary + component.collect do |x| + self.send(x) + end + end + protected :component_ary + + # == Args + # + # +components+:: + # Multiple Symbol arguments defined in URI::HTTP + # + # == Description + # + # Selects specified components from URI + # + # == Usage + # + # require 'uri' + # + # uri = URI.parse('http://myuser:mypass@my.example.com/test.rbx') + # p uri.select(:userinfo, :host, :path) + # # => ["myuser:mypass", "my.example.com", "/test.rbx"] + # + def select(*components) + components.collect do |c| + if component.include?(c) + self.send(c) + else + raise ArgumentError, + "expected of components of #{self.class} (#{self.class.component.join(', ')})" + end + end + end + + @@to_s = Kernel.instance_method(:to_s) + def inspect + @@to_s.bind(self).call.sub!(/>\z/) {" URL:#{self}>"} + end + + def coerce(oth) + case oth + when String + oth = URI.parse(oth) + else + super + end + + return oth, self + end + end +end diff --git a/trunk/lib/uri/http.rb b/trunk/lib/uri/http.rb new file mode 100644 index 0000000000..87eb8893f2 --- /dev/null +++ b/trunk/lib/uri/http.rb @@ -0,0 +1,100 @@ +# +# = uri/http.rb +# +# Author:: Akira Yamada <akira@ruby-lang.org> +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id$ +# + +require 'uri/generic' + +module URI + + # + # The syntax of HTTP URIs is defined in RFC1738 section 3.3. + # + # Note that the Ruby URI library allows HTTP URLs containing usernames and + # passwords. This is not legal as per the RFC, but used to be + # supported in Internet Explorer 5 and 6, before the MS04-004 security + # update. See <URL:http://support.microsoft.com/kb/834489>. + # + class HTTP < Generic + DEFAULT_PORT = 80 + + COMPONENT = [ + :scheme, + :userinfo, :host, :port, + :path, + :query, + :fragment + ].freeze + + # + # == Description + # + # Create a new URI::HTTP object from components, with syntax checking. + # + # The components accepted are userinfo, host, port, path, query and + # fragment. + # + # The components should be provided either as an Array, or as a Hash + # with keys formed by preceding the component names with a colon. + # + # If an Array is used, the components must be passed in the order + # [userinfo, host, port, path, query, fragment]. + # + # Example: + # + # newuri = URI::HTTP.build({:host => 'www.example.com', + # :path> => '/foo/bar'}) + # + # newuri = URI::HTTP.build([nil, "www.example.com", nil, "/path", + # "query", 'fragment']) + # + # Currently, if passed userinfo components this method generates + # invalid HTTP URIs as per RFC 1738. + # + def self.build(args) + tmp = Util::make_components_hash(self, args) + return super(tmp) + end + + # + # == Description + # + # Create a new URI::HTTP object from generic URI components as per + # RFC 2396. No HTTP-specific syntax checking (as per RFC 1738) is + # performed. + # + # Arguments are +scheme+, +userinfo+, +host+, +port+, +registry+, +path+, + # +opaque+, +query+ and +fragment+, in that order. + # + # Example: + # + # uri = URI::HTTP.new(['http', nil, "www.example.com", nil, "/path", + # "query", 'fragment']) + # + def initialize(*arg) + super(*arg) + end + + # + # == Description + # + # Returns the full path for an HTTP request, as required by Net::HTTP::Get. + # + # If the URI contains a query, the full path is URI#path + '?' + URI#query. + # Otherwise, the path is simply URI#path. + # + def request_uri + r = path_query + if r[0] != ?/ + r = '/' + r + end + + r + end + end + + @@schemes['HTTP'] = HTTP +end diff --git a/trunk/lib/uri/https.rb b/trunk/lib/uri/https.rb new file mode 100644 index 0000000000..9761636304 --- /dev/null +++ b/trunk/lib/uri/https.rb @@ -0,0 +1,20 @@ +# +# = uri/https.rb +# +# Author:: Akira Yamada <akira@ruby-lang.org> +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id$ +# + +require 'uri/http' + +module URI + + # The default port for HTTPS URIs is 443, and the scheme is 'https:' rather + # than 'http:'. Other than that, HTTPS URIs are identical to HTTP URIs; + # see URI::HTTP. + class HTTPS < HTTP + DEFAULT_PORT = 443 + end + @@schemes['HTTPS'] = HTTPS +end diff --git a/trunk/lib/uri/ldap.rb b/trunk/lib/uri/ldap.rb new file mode 100644 index 0000000000..163d2cda24 --- /dev/null +++ b/trunk/lib/uri/ldap.rb @@ -0,0 +1,190 @@ +# +# = uri/ldap.rb +# +# Author:: +# Takaaki Tateishi <ttate@jaist.ac.jp> +# Akira Yamada <akira@ruby-lang.org> +# License:: +# URI::LDAP is copyrighted free software by Takaaki Tateishi and Akira Yamada. +# You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id$ +# + +require 'uri/generic' + +module URI + + # + # LDAP URI SCHEMA (described in RFC2255) + # ldap://<host>/<dn>[?<attrs>[?<scope>[?<filter>[?<extensions>]]]] + # + class LDAP < Generic + + DEFAULT_PORT = 389 + + COMPONENT = [ + :scheme, + :host, :port, + :dn, + :attributes, + :scope, + :filter, + :extensions, + ].freeze + + SCOPE = [ + SCOPE_ONE = 'one', + SCOPE_SUB = 'sub', + SCOPE_BASE = 'base', + ].freeze + + def self.build(args) + tmp = Util::make_components_hash(self, args) + + if tmp[:dn] + tmp[:path] = tmp[:dn] + end + + query = [] + [:extensions, :filter, :scope, :attributes].collect do |x| + next if !tmp[x] && query.size == 0 + query.unshift(tmp[x]) + end + + tmp[:query] = query.join('?') + + return super(tmp) + end + + def initialize(*arg) + super(*arg) + + if @fragment + raise InvalidURIError, 'bad LDAP URL' + end + + parse_dn + parse_query + end + + def parse_dn + @dn = @path[1..-1] + end + private :parse_dn + + def parse_query + @attributes = nil + @scope = nil + @filter = nil + @extensions = nil + + if @query + attrs, scope, filter, extensions = @query.split('?') + + @attributes = attrs if attrs && attrs.size > 0 + @scope = scope if scope && scope.size > 0 + @filter = filter if filter && filter.size > 0 + @extensions = extensions if extensions && extensions.size > 0 + end + end + private :parse_query + + def build_path_query + @path = '/' + @dn + + query = [] + [@extensions, @filter, @scope, @attributes].each do |x| + next if !x && query.size == 0 + query.unshift(x) + end + @query = query.join('?') + end + private :build_path_query + + def dn + @dn + end + + def set_dn(val) + @dn = val + build_path_query + @dn + end + protected :set_dn + + def dn=(val) + set_dn(val) + val + end + + def attributes + @attributes + end + + def set_attributes(val) + @attributes = val + build_path_query + @attributes + end + protected :set_attributes + + def attributes=(val) + set_attributes(val) + val + end + + def scope + @scope + end + + def set_scope(val) + @scope = val + build_path_query + @scope + end + protected :set_scope + + def scope=(val) + set_scope(val) + val + end + + def filter + @filter + end + + def set_filter(val) + @filter = val + build_path_query + @filter + end + protected :set_filter + + def filter=(val) + set_filter(val) + val + end + + def extensions + @extensions + end + + def set_extensions(val) + @extensions = val + build_path_query + @extensions + end + protected :set_extensions + + def extensions=(val) + set_extensions(val) + val + end + + def hierarchical? + false + end + end + + @@schemes['LDAP'] = LDAP +end diff --git a/trunk/lib/uri/ldaps.rb b/trunk/lib/uri/ldaps.rb new file mode 100644 index 0000000000..6da333150f --- /dev/null +++ b/trunk/lib/uri/ldaps.rb @@ -0,0 +1,12 @@ +require 'uri/ldap' + +module URI + + # The default port for LDAPS URIs is 636, and the scheme is 'ldaps:' rather + # than 'ldap:'. Other than that, LDAPS URIs are identical to LDAP URIs; + # see URI::LDAP. + class LDAPS < LDAP + DEFAULT_PORT = 636 + end + @@schemes['LDAPS'] = LDAPS +end diff --git a/trunk/lib/uri/mailto.rb b/trunk/lib/uri/mailto.rb new file mode 100644 index 0000000000..3a9d15318f --- /dev/null +++ b/trunk/lib/uri/mailto.rb @@ -0,0 +1,266 @@ +# +# = uri/mailto.rb +# +# Author:: Akira Yamada <akira@ruby-lang.org> +# License:: You can redistribute it and/or modify it under the same term as Ruby. +# Revision:: $Id$ +# + +require 'uri/generic' + +module URI + + # + # RFC2368, The mailto URL scheme + # + class MailTo < Generic + include REGEXP + + DEFAULT_PORT = nil + + COMPONENT = [ :scheme, :to, :headers ].freeze + + # :stopdoc: + # "hname" and "hvalue" are encodings of an RFC 822 header name and + # value, respectively. As with "to", all URL reserved characters must + # be encoded. + # + # "#mailbox" is as specified in RFC 822 [RFC822]. This means that it + # consists of zero or more comma-separated mail addresses, possibly + # including "phrase" and "comment" components. Note that all URL + # reserved characters in "to" must be encoded: in particular, + # parentheses, commas, and the percent sign ("%"), which commonly occur + # in the "mailbox" syntax. + # + # Within mailto URLs, the characters "?", "=", "&" are reserved. + + # hname = *urlc + # hvalue = *urlc + # header = hname "=" hvalue + HEADER_PATTERN = "(?:[^?=&]*=[^?=&]*)".freeze + HEADER_REGEXP = Regexp.new(HEADER_PATTERN, 'N').freeze + # headers = "?" header *( "&" header ) + # to = #mailbox + # mailtoURL = "mailto:" [ to ] [ headers ] + MAILBOX_PATTERN = "(?:#{PATTERN::ESCAPED}|[^(),%?=&])".freeze + MAILTO_REGEXP = Regexp.new(" # :nodoc: + \\A + (#{MAILBOX_PATTERN}*?) (?# 1: to) + (?: + \\? + (#{HEADER_PATTERN}(?:\\&#{HEADER_PATTERN})*) (?# 2: headers) + )? + (?: + \\# + (#{PATTERN::FRAGMENT}) (?# 3: fragment) + )? + \\z + ", Regexp::EXTENDED).freeze + # :startdoc: + + # + # == Description + # + # Creates a new URI::MailTo object from components, with syntax checking. + # + # Components can be provided as an Array or Hash. If an Array is used, + # the components must be supplied as [to, headers]. + # + # If a Hash is used, the keys are the component names preceded by colons. + # + # The headers can be supplied as a pre-encoded string, such as + # "subject=subscribe&cc=address", or as an Array of Arrays like + # [['subject', 'subscribe'], ['cc', 'address']] + # + # Examples: + # + # require 'uri' + # + # m1 = URI::MailTo.build(['joe@example.com', 'subject=Ruby']) + # puts m1.to_s -> mailto:joe@example.com?subject=Ruby + # + # m2 = URI::MailTo.build(['john@example.com', [['Subject', 'Ruby'], ['Cc', 'jack@example.com']]]) + # puts m2.to_s -> mailto:john@example.com?Subject=Ruby&Cc=jack@example.com + # + # m3 = URI::MailTo.build({:to => 'listman@example.com', :headers => [['subject', 'subscribe']]}) + # puts m3.to_s -> mailto:listman@example.com?subject=subscribe + # + def self.build(args) + tmp = Util::make_components_hash(self, args) + + if tmp[:to] + tmp[:opaque] = tmp[:to] + else + tmp[:opaque] = '' + end + + if tmp[:headers] + tmp[:opaque] << '?' + + if tmp[:headers].kind_of?(Array) + tmp[:opaque] << tmp[:headers].collect { |x| + if x.kind_of?(Array) + x[0] + '=' + x[1..-1].to_s + else + x.to_s + end + }.join('&') + + elsif tmp[:headers].kind_of?(Hash) + tmp[:opaque] << tmp[:headers].collect { |h,v| + h + '=' + v + }.join('&') + + else + tmp[:opaque] << tmp[:headers].to_s + end + end + + return super(tmp) + end + + # + # == Description + # + # Creates a new URI::MailTo object from generic URL components with + # no syntax checking. + # + # This method is usually called from URI::parse, which checks + # the validity of each component. + # + def initialize(*arg) + super(*arg) + + @to = nil + @headers = [] + + if MAILTO_REGEXP =~ @opaque + if arg[-1] + self.to = $1 + self.headers = $2 + else + set_to($1) + set_headers($2) + end + + else + raise InvalidComponentError, + "unrecognised opaque part for mailtoURL: #{@opaque}" + end + end + + # The primary e-mail address of the URL, as a String + attr_reader :to + + # E-mail headers set by the URL, as an Array of Arrays + attr_reader :headers + + def check_to(v) + return true unless v + return true if v.size == 0 + + if OPAQUE !~ v || /\A#{MAILBOX_PATTERN}*\z/o !~ v + raise InvalidComponentError, + "bad component(expected opaque component): #{v}" + end + + return true + end + private :check_to + + def set_to(v) + @to = v + end + protected :set_to + + def to=(v) + check_to(v) + set_to(v) + v + end + + def check_headers(v) + return true unless v + return true if v.size == 0 + + if OPAQUE !~ v || + /\A(#{HEADER_PATTERN}(?:\&#{HEADER_PATTERN})*)\z/o !~ v + raise InvalidComponentError, + "bad component(expected opaque component): #{v}" + end + + return true + end + private :check_headers + + def set_headers(v) + @headers = [] + if v + v.scan(HEADER_REGEXP) do |x| + @headers << x.split(/=/o, 2) + end + end + end + protected :set_headers + + def headers=(v) + check_headers(v) + set_headers(v) + v + end + + def to_s + @scheme + ':' + + if @to + @to + else + '' + end + + if @headers.size > 0 + '?' + @headers.collect{|x| x.join('=')}.join('&') + else + '' + end + + if @fragment + '#' + @fragment + else + '' + end + end + + # Returns the RFC822 e-mail text equivalent of the URL, as a String. + # + # Example: + # + # require 'uri' + # + # uri = URI.parse("mailto:ruby-list@ruby-lang.org?Subject=subscribe&cc=myaddr") + # uri.to_mailtext + # # => "To: ruby-list@ruby-lang.org\nSubject: subscribe\nCc: myaddr\n\n\n" + # + def to_mailtext + to = URI::unescape(@to) + head = '' + body = '' + @headers.each do |x| + case x[0] + when 'body' + body = URI::unescape(x[1]) + when 'to' + to << ', ' + URI::unescape(x[1]) + else + head << URI::unescape(x[0]).capitalize + ': ' + + URI::unescape(x[1]) + "\n" + end + end + + return "To: #{to} +#{head} +#{body} +" + end + alias to_rfc822text to_mailtext + end + + @@schemes['MAILTO'] = MailTo +end |