diff options
Diffstat (limited to 'lib/open-uri.rb')
| -rw-r--r-- | lib/open-uri.rb | 175 |
1 files changed, 133 insertions, 42 deletions
diff --git a/lib/open-uri.rb b/lib/open-uri.rb index a68413e8f8..844865b13a 100644 --- a/lib/open-uri.rb +++ b/lib/open-uri.rb @@ -3,30 +3,27 @@ require 'uri' require 'stringio' require 'time' -module Kernel - private - alias open_uri_original_open open # :nodoc: - class << self - alias open_uri_original_open open # :nodoc: - end - - # Allows the opening of various resources including URIs. +module URI + # Allows the opening of various resources including URIs. Example: + # + # require "open-uri" + # URI.open("http://example.com") { |f| f.read } # - # If the first argument responds to the 'open' method, 'open' is called on + # If the first argument responds to the +open+ method, +open+ is called on # it with the rest of the arguments. # - # If the first argument is a string that begins with xxx://, it is parsed by - # URI.parse. If the parsed object responds to the 'open' method, - # 'open' is called on it with the rest of the arguments. + # If the first argument is a string that begins with <code>(protocol)://</code>, it is parsed by + # URI.parse. If the parsed object responds to the +open+ method, + # +open+ is called on it with the rest of the arguments. # - # Otherwise, the original Kernel#open is called. + # Otherwise, Kernel#open is called. # # OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and # URI::FTP#open, Kernel#open. # - # We can accept URIs and strings that begin with http://, https:// and - # ftp://. In these cases, the opened file object is extended by OpenURI::Meta. - def open(name, *rest, &block) # :doc: + # We can accept URIs and strings that begin with <code>http://</code>, <code>https://</code> and + # <code>ftp://</code>. In these cases, the opened file object is extended by OpenURI::Meta. + def self.open(name, *rest, &block) if name.respond_to?(:open) name.open(*rest, &block) elsif name.respond_to?(:to_str) && @@ -34,10 +31,10 @@ module Kernel (uri = URI.parse(name)).respond_to?(:open) uri.open(*rest, &block) else - open_uri_original_open(name, *rest, &block) + super end end - module_function :open + singleton_class.send(:ruby2_keywords, :open) if respond_to?(:ruby2_keywords, true) end # OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP. @@ -46,14 +43,14 @@ end # # It is possible to open an http, https or ftp URL as though it were a file: # -# open("http://www.ruby-lang.org/") {|f| +# URI.open("http://www.ruby-lang.org/") {|f| # f.each_line {|line| p line} # } # # The opened file has several getter methods for its meta-information, as # follows, since it is extended by OpenURI::Meta. # -# open("http://www.ruby-lang.org/en") {|f| +# URI.open("http://www.ruby-lang.org/en") {|f| # f.each_line {|line| p line} # p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/> # p f.content_type # "text/html" @@ -64,7 +61,7 @@ end # # Additional header fields can be specified by an optional hash argument. # -# open("http://www.ruby-lang.org/en/", +# URI.open("http://www.ruby-lang.org/en/", # "User-Agent" => "Ruby/#{RUBY_VERSION}", # "From" => "foo@bar.invalid", # "Referer" => "http://www.ruby-lang.org/") {|f| @@ -74,11 +71,11 @@ end # The environment variables such as http_proxy, https_proxy and ftp_proxy # are in effect by default. Here we disable proxy: # -# open("http://www.ruby-lang.org/en/", :proxy => nil) {|f| +# URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f| # # ... # } # -# See OpenURI::OpenRead.open and Kernel#open for more on available options. +# See OpenURI::OpenRead.open and URI.open for more on available options. # # URI objects can be opened in a similar way. # @@ -96,6 +93,11 @@ end # Author:: Tanaka Akira <akr@m17n.org> module OpenURI + + # The version string + VERSION = "0.5.0" + + # The default options Options = { :proxy => true, :proxy_http_basic_authentication => true, @@ -106,9 +108,13 @@ module OpenURI :open_timeout => true, :ssl_ca_cert => nil, :ssl_verify_mode => nil, + :ssl_min_version => nil, + :ssl_max_version => nil, :ftp_active_mode => false, :redirect => true, :encoding => nil, + :max_redirects => 64, + :request_specific_fields => nil, } def OpenURI.check_options(options) # :nodoc: @@ -148,7 +154,11 @@ module OpenURI end encoding = Encoding.find(options[:encoding]) end - + if options.has_key? :request_specific_fields + if !(options[:request_specific_fields].is_a?(Hash) || options[:request_specific_fields].is_a?(Proc)) + raise ArgumentError, "Invalid request_specific_fields option: #{options[:request_specific_fields].inspect}" + end + end unless mode == nil || mode == 'r' || mode == 'rb' || mode == File::RDONLY @@ -212,11 +222,20 @@ module OpenURI end uri_set = {} + max_redirects = options[:max_redirects] || Options.fetch(:max_redirects) buf = nil while true + request_specific_fields = {} + if options.has_key? :request_specific_fields + request_specific_fields = if options[:request_specific_fields].is_a?(Hash) + options[:request_specific_fields] + else options[:request_specific_fields].is_a?(Proc) + options[:request_specific_fields].call(uri) + end + end redirect = catch(:open_uri_redirect) { buf = Buffer.new - uri.buffer_open(buf, find_proxy.call(uri), options) + uri.buffer_open(buf, find_proxy.call(uri), options.merge(request_specific_fields)) nil } if redirect @@ -236,9 +255,14 @@ module OpenURI options = options.dup options.delete :http_basic_authentication end + if options.include?(:request_specific_fields) && options[:request_specific_fields].is_a?(Hash) + # Send request specific headers only for the initial request. + options.delete :request_specific_fields + end uri = redirect raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s uri_set[uri.to_s] = true + raise TooManyRedirects.new("Too many redirects", buf.io) if max_redirects && uri_set.size > max_redirects else break end @@ -305,6 +329,8 @@ module OpenURI require 'net/https' http.use_ssl = true http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER + http.min_version = options[:ssl_min_version] + http.max_version = options[:ssl_max_version] store = OpenSSL::X509::Store.new if options[:ssl_ca_cert] Array(options[:ssl_ca_cert]).each do |cert| @@ -347,6 +373,7 @@ module OpenURI if options[:progress_proc] && Net::HTTPSuccess === resp options[:progress_proc].call(buf.size) end + str.clear } } } @@ -359,7 +386,8 @@ module OpenURI when Net::HTTPMovedPermanently, # 301 Net::HTTPFound, # 302 Net::HTTPSeeOther, # 303 - Net::HTTPTemporaryRedirect # 307 + Net::HTTPTemporaryRedirect, # 307 + Net::HTTPPermanentRedirect # 308 begin loc_uri = URI.parse(resp['location']) rescue URI::InvalidURIError @@ -371,24 +399,31 @@ module OpenURI end end + # Raised on HTTP session failure class HTTPError < StandardError - def initialize(message, io) + def initialize(message, io) # :nodoc: super(message) @io = io end + # StringIO having the received data attr_reader :io end # Raised on redirection, # only occurs when +redirect+ option for HTTP is +false+. class HTTPRedirect < HTTPError - def initialize(message, io, uri) + def initialize(message, io, uri) # :nodoc: super(message, io) @uri = uri end + # URI to redirect attr_reader :uri end + # Raised on too many redirection, + class TooManyRedirects < HTTPError + end + class Buffer # :nodoc: all def initialize @io = StringIO.new @@ -416,6 +451,13 @@ module OpenURI end end + # :stopdoc: + RE_LWS = /[\r\n\t ]+/n + RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n + RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n + RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n + # :startdoc: + # Mixin for holding meta-information. module Meta def Meta.init(obj, src=nil) # :nodoc: @@ -493,13 +535,6 @@ module OpenURI end end - # :stopdoc: - RE_LWS = /[\r\n\t ]+/n - RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n - RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n - RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n - # :startdoc: - def content_type_parse # :nodoc: vs = @metas['content-type'] # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045. @@ -535,17 +570,16 @@ module OpenURI # It can be used to guess charset. # # If charset parameter and block is not given, - # nil is returned except text type in HTTP. - # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1. + # nil is returned except text type. + # In that case, "utf-8" is returned as defined by RFC6838 4.2.1 def charset type, *parameters = content_type_parse if pair = parameters.assoc('charset') pair.last.downcase elsif block_given? yield - elsif type && %r{\Atext/} =~ type && - @base_uri && /\Ahttp\z/i =~ @base_uri.scheme - "iso-8859-1" # RFC2616 3.7.1 + elsif type && %r{\Atext/} =~ type + "utf-8" # RFC6838 4.2.1 else nil end @@ -705,6 +739,20 @@ module OpenURI # # :ssl_verify_mode is used to specify openssl verify mode. # + # [:ssl_min_version] + # Synopsis: + # :ssl_min_version=>:TLS1_2 + # + # :ssl_min_version option specifies the minimum allowed SSL/TLS protocol + # version. See also OpenSSL::SSL::SSLContext#min_version=. + # + # [:ssl_max_version] + # Synopsis: + # :ssl_max_version=>:TLS1_2 + # + # :ssl_max_version option specifies the maximum allowed SSL/TLS protocol + # version. See also OpenSSL::SSL::SSLContext#max_version=. + # # [:ftp_active_mode] # Synopsis: # :ftp_active_mode=>bool @@ -724,11 +772,49 @@ module OpenURI # Using +true+ also means that redirections between http and ftp are # permitted. # + # [:max_redirects] + # Synopsis: + # :max_redirects=>int + # + # Number of HTTP redirects allowed before OpenURI::TooManyRedirects is raised. + # The default is 64. + # + # [:request_specific_fields] + # Synopsis: + # :request_specific_fields => {} + # :request_specific_fields => lambda {|url| ...} + # + # :request_specific_fields option allows specifying custom header fields that + # are sent with the HTTP request. It can be passed as a Hash or a Proc that + # gets evaluated on each request and returns a Hash of header fields. + # + # If a Hash is provided, it specifies the headers only for the initial + # request and these headers will not be sent on redirects. + # + # If a Proc is provided, it will be executed for each request including + # redirects, allowing dynamic header customization based on the request URL. + # It is important that the Proc returns a Hash. And this Hash specifies the + # headers to be sent with the request. + # + # For Example with Hash + # URI.open("http://...", + # request_specific_fields: {"Authorization" => "token dummy"}) {|f| ... } + # + # For Example with Proc: + # URI.open("http://...", + # request_specific_fields: lambda { |uri| + # if uri.host == "example.com" + # {"Authorization" => "token dummy"} + # else + # {} + # end + # }) {|f| ... } + # def open(*rest, &block) OpenURI.open_uri(self, *rest, &block) end - # OpenURI::OpenRead#read([options]) reads a content referenced by self and + # OpenURI::OpenRead#read([ options ]) reads a content referenced by self and # returns the content as string. # The string is extended with OpenURI::Meta. # The argument +options+ is same as OpenURI::OpenRead#open. @@ -757,7 +843,12 @@ module URI OpenURI.open_http(buf, self, proxy, options) return end - require 'net/ftp' + + begin + require 'net/ftp' + rescue LoadError + abort "net/ftp is not found. You may need to `gem install net-ftp` to install net/ftp." + end path = self.path path = path.sub(%r{\A/}, '%2F') # re-encode the beginning slash because uri library decodes it. |
