diff options
Diffstat (limited to 'lib/open-uri.rb')
| -rw-r--r-- | lib/open-uri.rb | 622 |
1 files changed, 419 insertions, 203 deletions
diff --git a/lib/open-uri.rb b/lib/open-uri.rb index d69f7dbe41..844865b13a 100644 --- a/lib/open-uri.rb +++ b/lib/open-uri.rb @@ -1,27 +1,29 @@ +# frozen_string_literal: true require 'uri' require 'stringio' require 'time' -module Kernel - private - alias open_uri_original_open open # :nodoc: - - # makes possible to open various resources including URIs. - # If the first argument respond to `open' method, - # the method is called with the rest arguments. +module URI + # Allows the opening of various resources including URIs. Example: + # + # require "open-uri" + # URI.open("http://example.com") { |f| f.read } + # + # If the first argument responds to the +open+ method, +open+ is called on + # it with the rest of the arguments. + # + # If the first argument is a string that begins with <code>(protocol)://</code>, it is parsed by + # URI.parse. If the parsed object responds to the +open+ method, + # +open+ is called on it with the rest of the arguments. # - # If the first argument is a string which begins with xxx://, - # it is parsed by URI.parse. If the parsed object respond to `open' method, - # the method is called with the rest arguments. + # Otherwise, Kernel#open is called. # - # Otherwise original open is called. + # OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and + # URI::FTP#open, Kernel#open. # - # Since open-uri.rb provides URI::HTTP#open, URI::HTTPS#open and - # URI::FTP#open, - # Kernel[#.]open can accepts such URIs and strings which begins with - # http://, https:// and ftp://. - # In these case, the opened file object is extended by OpenURI::Meta. - def open(name, *rest, &block) # :doc: + # We can accept URIs and strings that begin with <code>http://</code>, <code>https://</code> and + # <code>ftp://</code>. In these cases, the opened file object is extended by OpenURI::Meta. + def self.open(name, *rest, &block) if name.respond_to?(:open) name.open(*rest, &block) elsif name.respond_to?(:to_str) && @@ -29,26 +31,26 @@ module Kernel (uri = URI.parse(name)).respond_to?(:open) uri.open(*rest, &block) else - open_uri_original_open(name, *rest, &block) + super end end - module_function :open + singleton_class.send(:ruby2_keywords, :open) if respond_to?(:ruby2_keywords, true) end -# OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp. +# OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP. # -#== Example +# == Example # -# It is possible to open http/https/ftp URL as usual like opening a file: +# It is possible to open an http, https or ftp URL as though it were a file: # -# open("http://www.ruby-lang.org/") {|f| +# URI.open("http://www.ruby-lang.org/") {|f| # f.each_line {|line| p line} # } # -# The opened file has several methods for meta information as follows since -# it is extended by OpenURI::Meta. +# The opened file has several getter methods for its meta-information, as +# follows, since it is extended by OpenURI::Meta. # -# open("http://www.ruby-lang.org/en") {|f| +# URI.open("http://www.ruby-lang.org/en") {|f| # f.each_line {|line| p line} # p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/> # p f.content_type # "text/html" @@ -59,7 +61,7 @@ end # # Additional header fields can be specified by an optional hash argument. # -# open("http://www.ruby-lang.org/en/", +# URI.open("http://www.ruby-lang.org/en/", # "User-Agent" => "Ruby/#{RUBY_VERSION}", # "From" => "foo@bar.invalid", # "Referer" => "http://www.ruby-lang.org/") {|f| @@ -67,12 +69,14 @@ end # } # # The environment variables such as http_proxy, https_proxy and ftp_proxy -# are in effect by default. :proxy => nil disables proxy. +# are in effect by default. Here we disable proxy: # -# open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f| +# URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f| # # ... # } # +# See OpenURI::OpenRead.open and URI.open for more on available options. +# # URI objects can be opened in a similar way. # # uri = URI.parse("http://www.ruby-lang.org/en/") @@ -89,11 +93,28 @@ end # Author:: Tanaka Akira <akr@m17n.org> module OpenURI + + # The version string + VERSION = "0.5.0" + + # The default options Options = { :proxy => true, + :proxy_http_basic_authentication => true, :progress_proc => true, :content_length_proc => true, :http_basic_authentication => true, + :read_timeout => true, + :open_timeout => true, + :ssl_ca_cert => nil, + :ssl_verify_mode => nil, + :ssl_min_version => nil, + :ssl_max_version => nil, + :ftp_active_mode => false, + :redirect => true, + :encoding => nil, + :max_redirects => 64, + :request_specific_fields => nil, } def OpenURI.check_options(options) # :nodoc: @@ -117,12 +138,27 @@ module OpenURI def OpenURI.open_uri(name, *rest) # :nodoc: uri = URI::Generic === name ? name : URI.parse(name) - mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest) + mode, _, rest = OpenURI.scan_open_optional_arguments(*rest) options = rest.shift if !rest.empty? && Hash === rest.first raise ArgumentError.new("extra arguments") if !rest.empty? options ||= {} OpenURI.check_options(options) + if /\Arb?(?:\Z|:([^:]+))/ =~ mode + encoding, = $1,Encoding.find($1) if $1 + mode = nil + end + if options.has_key? :encoding + if !encoding.nil? + raise ArgumentError, "encoding specified twice" + end + encoding = Encoding.find(options[:encoding]) + end + if options.has_key? :request_specific_fields + if !(options[:request_specific_fields].is_a?(Hash) || options[:request_specific_fields].is_a?(Proc)) + raise ArgumentError, "Invalid request_specific_fields option: #{options[:request_specific_fields].inspect}" + end + end unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY @@ -130,11 +166,16 @@ module OpenURI end io = open_loop(uri, options) + io.set_encoding(encoding) if encoding if block_given? begin yield io ensure - io.close + if io.respond_to? :close! + io.close! # Tempfile + else + io.close if !io.closed? + end end else io @@ -142,26 +183,59 @@ module OpenURI end def OpenURI.open_loop(uri, options) # :nodoc: - case opt_proxy = options.fetch(:proxy, true) + proxy_opts = [] + proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication + proxy_opts << :proxy if options.include? :proxy + proxy_opts.compact! + if 1 < proxy_opts.length + raise ArgumentError, "multiple proxy options specified" + end + case proxy_opts.first + when :proxy_http_basic_authentication + opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication) + proxy_user = proxy_user.to_str + proxy_pass = proxy_pass.to_str + if opt_proxy == true + raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}") + end + when :proxy + opt_proxy = options.fetch(:proxy) + proxy_user = nil + proxy_pass = nil + when nil + opt_proxy = true + proxy_user = nil + proxy_pass = nil + end + case opt_proxy when true - find_proxy = lambda {|u| u.find_proxy} + find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil} when nil, false find_proxy = lambda {|u| nil} when String opt_proxy = URI.parse(opt_proxy) - find_proxy = lambda {|u| opt_proxy} + find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]} when URI::Generic - find_proxy = lambda {|u| opt_proxy} + find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]} else raise ArgumentError.new("Invalid proxy option: #{opt_proxy}") end uri_set = {} + max_redirects = options[:max_redirects] || Options.fetch(:max_redirects) buf = nil while true + request_specific_fields = {} + if options.has_key? :request_specific_fields + request_specific_fields = if options[:request_specific_fields].is_a?(Hash) + options[:request_specific_fields] + else options[:request_specific_fields].is_a?(Proc) + options[:request_specific_fields].call(uri) + end + end redirect = catch(:open_uri_redirect) { buf = Buffer.new - uri.buffer_open(buf, find_proxy.call(uri), options) + uri.buffer_open(buf, find_proxy.call(uri), options.merge(request_specific_fields)) nil } if redirect @@ -170,6 +244,9 @@ module OpenURI # URI. It is converted to absolute URI using uri as a base URI. redirect = uri + redirect end + if !options.fetch(:redirect, true) + raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect) + end unless OpenURI.redirectable?(uri, redirect) raise "redirection forbidden: #{uri} -> #{redirect}" end @@ -178,9 +255,14 @@ module OpenURI options = options.dup options.delete :http_basic_authentication end + if options.include?(:request_specific_fields) && options[:request_specific_fields].is_a?(Hash) + # Send request specific headers only for the initial request. + options.delete :request_specific_fields + end uri = redirect raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s uri_set[uri.to_s] = true + raise TooManyRedirects.new("Too many redirects", buf.io) if max_redirects && uri_set.size > max_redirects else break end @@ -192,64 +274,86 @@ module OpenURI def OpenURI.redirectable?(uri1, uri2) # :nodoc: # This test is intended to forbid a redirection from http://... to - # file:///etc/passwd. + # file:///etc/passwd, file:///dev/zero, etc. CVE-2011-1521 + # https to http redirect is also forbidden intentionally. + # It avoids sending secure cookie or referer by non-secure HTTP protocol. + # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3) # However this is ad hoc. It should be extensible/configurable. uri1.scheme.downcase == uri2.scheme.downcase || - (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme) + (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:https?|ftp)\z/i =~ uri2.scheme) end def OpenURI.open_http(buf, target, proxy, options) # :nodoc: if proxy - raise "Non-HTTP proxy URI: #{proxy}" if proxy.class != URI::HTTP + proxy_uri, proxy_user, proxy_pass = proxy + raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP end - if target.userinfo && "1.9.0" <= RUBY_VERSION - # don't raise for 1.8 because compatibility. + if target.userinfo raise ArgumentError, "userinfo not supported. [RFC3986]" end + header = {} + options.each {|k, v| header[k] = v if String === k } + require 'net/http' klass = Net::HTTP if URI::HTTP === target # HTTP or HTTPS if proxy - klass = Net::HTTP::Proxy(proxy.host, proxy.port) + unless proxy_user && proxy_pass + proxy_user, proxy_pass = proxy_uri.userinfo.split(':') if proxy_uri.userinfo + end + if proxy_user && proxy_pass + klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port, proxy_user, proxy_pass) + else + klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port) + end end - target_host = target.host + target_host = target.hostname target_port = target.port request_uri = target.request_uri else # FTP over HTTP proxy - target_host = proxy.host - target_port = proxy.port + target_host = proxy_uri.hostname + target_port = proxy_uri.port request_uri = target.to_s + if proxy_user && proxy_pass + header["Proxy-Authorization"] = + 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m0') + end end - http = klass.new(target_host, target_port) + http = proxy ? klass.new(target_host, target_port) : klass.new(target_host, target_port, nil) if target.class == URI::HTTPS require 'net/https' http.use_ssl = true - http.verify_mode = OpenSSL::SSL::VERIFY_PEER + http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER + http.min_version = options[:ssl_min_version] + http.max_version = options[:ssl_max_version] store = OpenSSL::X509::Store.new - store.set_default_paths + if options[:ssl_ca_cert] + Array(options[:ssl_ca_cert]).each do |cert| + if File.directory? cert + store.add_path cert + else + store.add_file cert + end + end + else + store.set_default_paths + end http.cert_store = store end - - header = {} - options.each {|k, v| header[k] = v if String === k } + if options.include? :read_timeout + http.read_timeout = options[:read_timeout] + end + if options.include? :open_timeout + http.open_timeout = options[:open_timeout] + end resp = nil http.start { - if target.class == URI::HTTPS - # xxx: information hiding violation - sock = http.instance_variable_get(:@socket) - if sock.respond_to?(:io) - sock = sock.io # 1.9 - else - sock = sock.instance_variable_get(:@socket) # 1.8 - end - sock.post_connection_check(target_host) - end req = Net::HTTP::Get.new(request_uri, header) if options.include? :http_basic_authentication user, pass = options[:http_basic_authentication] @@ -269,34 +373,58 @@ module OpenURI if options[:progress_proc] && Net::HTTPSuccess === resp options[:progress_proc].call(buf.size) end + str.clear } } } io = buf.io io.rewind io.status = [resp.code, resp.message] - resp.each {|name,value| buf.io.meta_add_field name, value } + resp.each_name {|name| buf.io.meta_add_field2 name, resp.get_fields(name) } case resp when Net::HTTPSuccess when Net::HTTPMovedPermanently, # 301 Net::HTTPFound, # 302 Net::HTTPSeeOther, # 303 - Net::HTTPTemporaryRedirect # 307 - throw :open_uri_redirect, URI.parse(resp['location']) + Net::HTTPTemporaryRedirect, # 307 + Net::HTTPPermanentRedirect # 308 + begin + loc_uri = URI.parse(resp['location']) + rescue URI::InvalidURIError + raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io) + end + throw :open_uri_redirect, loc_uri else raise OpenURI::HTTPError.new(io.status.join(' '), io) end end + # Raised on HTTP session failure class HTTPError < StandardError - def initialize(message, io) + def initialize(message, io) # :nodoc: super(message) @io = io end + # StringIO having the received data attr_reader :io end - class Buffer # :nodoc: + # Raised on redirection, + # only occurs when +redirect+ option for HTTP is +false+. + class HTTPRedirect < HTTPError + def initialize(message, io, uri) # :nodoc: + super(message, io) + @uri = uri + end + # URI to redirect + attr_reader :uri + end + + # Raised on too many redirection, + class TooManyRedirects < HTTPError + end + + class Buffer # :nodoc: all def initialize @io = StringIO.new @size = 0 @@ -311,73 +439,113 @@ module OpenURI require 'tempfile' io = Tempfile.new('open-uri') io.binmode - Meta.init io, @io if @io.respond_to? :meta + Meta.init io, @io if Meta === @io io << @io.string @io = io end end def io - Meta.init @io unless @io.respond_to? :meta + Meta.init @io unless Meta === @io @io end end + # :stopdoc: + RE_LWS = /[\r\n\t ]+/n + RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n + RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n + RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n + # :startdoc: + # Mixin for holding meta-information. module Meta def Meta.init(obj, src=nil) # :nodoc: obj.extend Meta obj.instance_eval { @base_uri = nil - @meta = {} + @meta = {} # name to string. legacy. + @metas = {} # name to array of strings. } if src obj.status = src.status obj.base_uri = src.base_uri - src.meta.each {|name, value| - obj.meta_add_field(name, value) + src.metas.each {|name, values| + obj.meta_add_field2(name, values) } end end - # returns an Array which consists status code and message. + # returns an Array that consists of status code and message. attr_accessor :status - # returns a URI which is base of relative URIs in the data. - # It may differ from the URI supplied by a user because redirection. + # returns a URI that is the base of relative URIs in the data. + # It may differ from the URI supplied by a user due to redirection. attr_accessor :base_uri - # returns a Hash which represents header fields. + # returns a Hash that represents header fields. # The Hash keys are downcased for canonicalization. + # The Hash values are a field body. + # If there are multiple field with same field name, + # the field values are concatenated with a comma. attr_reader :meta + # returns a Hash that represents header fields. + # The Hash keys are downcased for canonicalization. + # The Hash value are an array of field values. + attr_reader :metas + + def meta_setup_encoding # :nodoc: + charset = self.charset + enc = nil + if charset + begin + enc = Encoding.find(charset) + rescue ArgumentError + end + end + enc = Encoding::ASCII_8BIT unless enc + if self.respond_to? :force_encoding + self.force_encoding(enc) + elsif self.respond_to? :string + self.string.force_encoding(enc) + else # Tempfile + self.set_encoding enc + end + end + + def meta_add_field2(name, values) # :nodoc: + name = name.downcase + @metas[name] = values + @meta[name] = values.join(', ') + meta_setup_encoding if name == 'content-type' + end + def meta_add_field(name, value) # :nodoc: - @meta[name.downcase] = value + meta_add_field2(name, [value]) end - # returns a Time which represents Last-Modified field. + # returns a Time that represents the Last-Modified field. def last_modified - if v = @meta['last-modified'] + if vs = @metas['last-modified'] + v = vs.join(', ') Time.httpdate(v) else nil end end - RE_LWS = /[\r\n\t ]+/n - RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n - RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n - RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n - def content_type_parse # :nodoc: - v = @meta['content-type'] + vs = @metas['content-type'] # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045. - if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ v + if vs && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ vs.join(', ') type = $1.downcase subtype = $2.downcase parameters = [] $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval| - val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/) { $1 ? $1[1,1] : $& } if qval + if qval + val = qval[1...-1].gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/n) { $1 ? $1[1,1] : $& } + end parameters << [att.downcase, val] } ["#{type}/#{subtype}", *parameters] @@ -390,7 +558,7 @@ module OpenURI # It is downcased for canonicalization. # Content-Type parameters are stripped. def content_type - type, *parameters = content_type_parse + type, *_ = content_type_parse type || 'application/octet-stream' end @@ -402,28 +570,28 @@ module OpenURI # It can be used to guess charset. # # If charset parameter and block is not given, - # nil is returned except text type in HTTP. - # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1. + # nil is returned except text type. + # In that case, "utf-8" is returned as defined by RFC6838 4.2.1 def charset type, *parameters = content_type_parse if pair = parameters.assoc('charset') pair.last.downcase elsif block_given? yield - elsif type && %r{\Atext/} =~ type && - @base_uri && /\Ahttp\z/i =~ @base_uri.scheme - "iso-8859-1" # RFC2616 3.7.1 + elsif type && %r{\Atext/} =~ type + "utf-8" # RFC6838 4.2.1 else nil end end - # returns a list of encodings in Content-Encoding field - # as an Array of String. + # Returns a list of encodings in Content-Encoding field as an array of + # strings. + # # The encodings are downcased for canonicalization. def content_encoding - v = @meta['content-encoding'] - if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v + vs = @metas['content-encoding'] + if vs && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ (v = vs.join(', ')) v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase} else [] @@ -436,22 +604,25 @@ module OpenURI # OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP. # # OpenURI::OpenRead#open takes optional 3 arguments as: - # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }] # - # `mode', `perm' is same as Kernel#open. + # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }] # - # However, `mode' must be read mode because OpenURI::OpenRead#open doesn't + # OpenURI::OpenRead#open returns an IO-like object if block is not given. + # Otherwise it yields the IO object and return the value of the block. + # The IO object is extended with OpenURI::Meta. + # + # +mode+ and +perm+ are the same as Kernel#open. + # + # However, +mode+ must be read mode because OpenURI::OpenRead#open doesn't # support write mode (yet). - # Also `perm' is just ignored because it is meaningful only for file - # creation. + # Also +perm+ is ignored because it is meaningful only for file creation. # - # `options' must be a hash. + # +options+ must be a hash. # - # Each pairs which key is a string in the hash specify a extra header - # field for HTTP. - # I.e. it is ignored for FTP without HTTP proxy. + # Each option with a string key specifies an extra header field for HTTP. + # I.e., it is ignored for FTP without HTTP proxy. # - # The hash may include other options which key is a symbol: + # The hash may include other options, where keys are symbols: # # [:proxy] # Synopsis: @@ -460,16 +631,37 @@ module OpenURI # :proxy => true # :proxy => false # :proxy => nil - # + # # If :proxy option is specified, the value should be String, URI, # boolean or nil. + # # When String or URI is given, it is treated as proxy URI. + # # When true is given or the option itself is not specified, # environment variable `scheme_proxy' is examined. # `scheme' is replaced by `http', `https' or `ftp'. + # # When false or nil is given, the environment variables are ignored and # connection will be made to a server directly. # + # [:proxy_http_basic_authentication] + # Synopsis: + # :proxy_http_basic_authentication => + # ["http://proxy.foo.com:8000/", "proxy-user", "proxy-password"] + # :proxy_http_basic_authentication => + # [URI.parse("http://proxy.foo.com:8000/"), + # "proxy-user", "proxy-password"] + # + # If :proxy option is specified, the value should be an Array with 3 + # elements. It should contain a proxy URI, a proxy user name and a proxy + # password. The proxy URI should be a String, an URI or nil. The proxy + # user name and password should be a String. + # + # If nil is given for the proxy URI, this option is just ignored. + # + # If :proxy and :proxy_http_basic_authentication is specified, + # ArgumentError is raised. + # # [:http_basic_authentication] # Synopsis: # :http_basic_authentication=>[user, password] @@ -482,17 +674,16 @@ module OpenURI # [:content_length_proc] # Synopsis: # :content_length_proc => lambda {|content_length| ... } - # + # # If :content_length_proc option is specified, the option value procedure # is called before actual transfer is started. - # It takes one argument which is expected content length in bytes. - # - # If two or more transfer is done by HTTP redirection, the procedure - # is called only one for a last transfer. - # + # It takes one argument, which is expected content length in bytes. + # + # If two or more transfers are performed by HTTP redirection, the + # procedure is called only once for the last transfer. + # # When expected content length is unknown, the procedure is called with - # nil. - # It is happen when HTTP response has no Content-Length header. + # nil. This happens when the HTTP response has no Content-Length header. # # [:progress_proc] # Synopsis: @@ -500,7 +691,7 @@ module OpenURI # # If :progress_proc option is specified, the proc is called with one # argument each time when `open' gets content fragment from network. - # The argument `size' `size' is a accumulated transfered size in bytes. + # The argument +size+ is the accumulated transferred size in bytes. # # If two or more transfer is done by HTTP redirection, the procedure # is called only one for a last transfer. @@ -521,17 +712,112 @@ module OpenURI # pbar.set s if pbar # }) {|f| ... } # - # OpenURI::OpenRead#open returns an IO like object if block is not given. - # Otherwise it yields the IO object and return the value of the block. - # The IO object is extended with OpenURI::Meta. + # [:read_timeout] + # Synopsis: + # :read_timeout=>nil (no timeout) + # :read_timeout=>10 (10 second) + # + # :read_timeout option specifies a timeout of read for http connections. + # + # [:open_timeout] + # Synopsis: + # :open_timeout=>nil (no timeout) + # :open_timeout=>10 (10 second) + # + # :open_timeout option specifies a timeout of open for http connections. + # + # [:ssl_ca_cert] + # Synopsis: + # :ssl_ca_cert=>filename or an Array of filenames + # + # :ssl_ca_cert is used to specify CA certificate for SSL. + # If it is given, default certificates are not used. + # + # [:ssl_verify_mode] + # Synopsis: + # :ssl_verify_mode=>mode + # + # :ssl_verify_mode is used to specify openssl verify mode. + # + # [:ssl_min_version] + # Synopsis: + # :ssl_min_version=>:TLS1_2 + # + # :ssl_min_version option specifies the minimum allowed SSL/TLS protocol + # version. See also OpenSSL::SSL::SSLContext#min_version=. + # + # [:ssl_max_version] + # Synopsis: + # :ssl_max_version=>:TLS1_2 + # + # :ssl_max_version option specifies the maximum allowed SSL/TLS protocol + # version. See also OpenSSL::SSL::SSLContext#max_version=. + # + # [:ftp_active_mode] + # Synopsis: + # :ftp_active_mode=>bool + # + # <tt>:ftp_active_mode => true</tt> is used to make ftp active mode. + # Ruby 1.9 uses passive mode by default. + # Note that the active mode is default in Ruby 1.8 or prior. + # + # [:redirect] + # Synopsis: + # :redirect=>bool + # + # +:redirect+ is true by default. <tt>:redirect => false</tt> is used to + # disable all HTTP redirects. + # + # OpenURI::HTTPRedirect exception raised on redirection. + # Using +true+ also means that redirections between http and ftp are + # permitted. + # + # [:max_redirects] + # Synopsis: + # :max_redirects=>int + # + # Number of HTTP redirects allowed before OpenURI::TooManyRedirects is raised. + # The default is 64. + # + # [:request_specific_fields] + # Synopsis: + # :request_specific_fields => {} + # :request_specific_fields => lambda {|url| ...} + # + # :request_specific_fields option allows specifying custom header fields that + # are sent with the HTTP request. It can be passed as a Hash or a Proc that + # gets evaluated on each request and returns a Hash of header fields. + # + # If a Hash is provided, it specifies the headers only for the initial + # request and these headers will not be sent on redirects. + # + # If a Proc is provided, it will be executed for each request including + # redirects, allowing dynamic header customization based on the request URL. + # It is important that the Proc returns a Hash. And this Hash specifies the + # headers to be sent with the request. + # + # For Example with Hash + # URI.open("http://...", + # request_specific_fields: {"Authorization" => "token dummy"}) {|f| ... } + # + # For Example with Proc: + # URI.open("http://...", + # request_specific_fields: lambda { |uri| + # if uri.host == "example.com" + # {"Authorization" => "token dummy"} + # else + # {} + # end + # }) {|f| ... } + # def open(*rest, &block) OpenURI.open_uri(self, *rest, &block) end - # OpenURI::OpenRead#read([options]) reads a content referenced by self and + # OpenURI::OpenRead#read([ options ]) reads a content referenced by self and # returns the content as string. # The string is extended with OpenURI::Meta. - # The argument `options' is same as OpenURI::OpenRead#open. + # The argument +options+ is same as OpenURI::OpenRead#open. def read(options={}) self.open(options) {|f| str = f.read @@ -543,84 +829,6 @@ module OpenURI end module URI - class Generic - # returns a proxy URI. - # The proxy URI is obtained from environment variables such as http_proxy, - # ftp_proxy, no_proxy, etc. - # If there is no proper proxy, nil is returned. - # - # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.) - # are examined too. - # - # But http_proxy and HTTP_PROXY is treated specially under CGI environment. - # It's because HTTP_PROXY may be set by Proxy: header. - # So HTTP_PROXY is not used. - # http_proxy is not used too if the variable is case insensitive. - # CGI_HTTP_PROXY can be used instead. - def find_proxy - name = self.scheme.downcase + '_proxy' - proxy_uri = nil - if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI? - # HTTP_PROXY conflicts with *_proxy for proxy settings and - # HTTP_* for header information in CGI. - # So it should be careful to use it. - pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k } - case pairs.length - when 0 # no proxy setting anyway. - proxy_uri = nil - when 1 - k, v = pairs.shift - if k == 'http_proxy' && ENV[k.upcase] == nil - # http_proxy is safe to use because ENV is case sensitive. - proxy_uri = ENV[name] - else - proxy_uri = nil - end - else # http_proxy is safe to use because ENV is case sensitive. - proxy_uri = ENV[name] - end - if !proxy_uri - # Use CGI_HTTP_PROXY. cf. libwww-perl. - proxy_uri = ENV["CGI_#{name.upcase}"] - end - elsif name == 'http_proxy' - unless proxy_uri = ENV[name] - if proxy_uri = ENV[name.upcase] - warn 'The environment variable HTTP_PROXY is discouraged. Use http_proxy.' - end - end - else - proxy_uri = ENV[name] || ENV[name.upcase] - end - - if proxy_uri && self.host - require 'socket' - begin - addr = IPSocket.getaddress(self.host) - proxy_uri = nil if /\A127\.|\A::1\z/ =~ addr - rescue SocketError - end - end - - if proxy_uri - proxy_uri = URI.parse(proxy_uri) - name = 'no_proxy' - if no_proxy = ENV[name] || ENV[name.upcase] - no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port| - if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host && - (!port || self.port == port.to_i) - proxy_uri = nil - break - end - } - end - proxy_uri - else - nil - end - end - end - class HTTP def buffer_open(buf, proxy, options) # :nodoc: OpenURI.open_http(buf, self, proxy, options) @@ -635,10 +843,16 @@ module URI OpenURI.open_http(buf, self, proxy, options) return end - require 'net/ftp' - directories = self.path.split(%r{/}, -1) - directories.shift if directories[0] == '' # strip a field before leading slash + begin + require 'net/ftp' + rescue LoadError + abort "net/ftp is not found. You may need to `gem install net-ftp` to install net/ftp." + end + + path = self.path + path = path.sub(%r{\A/}, '%2F') # re-encode the beginning slash because uri library decodes it. + directories = path.split(%r{/}, -1) directories.each {|d| d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") } } @@ -659,7 +873,9 @@ module URI end # The access sequence is defined by RFC 1738 - ftp = Net::FTP.open(self.host) + ftp = Net::FTP.new + ftp.connect(self.hostname, self.port) + ftp.passive = !options[:ftp_active_mode] # todo: extract user/passwd from .netrc. user = 'anonymous' passwd = nil |
