summaryrefslogtreecommitdiff
path: root/lib/open-uri.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/open-uri.rb')
-rw-r--r--lib/open-uri.rb175
1 files changed, 133 insertions, 42 deletions
diff --git a/lib/open-uri.rb b/lib/open-uri.rb
index a68413e8f8..844865b13a 100644
--- a/lib/open-uri.rb
+++ b/lib/open-uri.rb
@@ -3,30 +3,27 @@ require 'uri'
require 'stringio'
require 'time'
-module Kernel
- private
- alias open_uri_original_open open # :nodoc:
- class << self
- alias open_uri_original_open open # :nodoc:
- end
-
- # Allows the opening of various resources including URIs.
+module URI
+ # Allows the opening of various resources including URIs. Example:
+ #
+ # require "open-uri"
+ # URI.open("http://example.com") { |f| f.read }
#
- # If the first argument responds to the 'open' method, 'open' is called on
+ # If the first argument responds to the +open+ method, +open+ is called on
# it with the rest of the arguments.
#
- # If the first argument is a string that begins with xxx://, it is parsed by
- # URI.parse. If the parsed object responds to the 'open' method,
- # 'open' is called on it with the rest of the arguments.
+ # If the first argument is a string that begins with <code>(protocol)://</code>, it is parsed by
+ # URI.parse. If the parsed object responds to the +open+ method,
+ # +open+ is called on it with the rest of the arguments.
#
- # Otherwise, the original Kernel#open is called.
+ # Otherwise, Kernel#open is called.
#
# OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and
# URI::FTP#open, Kernel#open.
#
- # We can accept URIs and strings that begin with http://, https:// and
- # ftp://. In these cases, the opened file object is extended by OpenURI::Meta.
- def open(name, *rest, &block) # :doc:
+ # We can accept URIs and strings that begin with <code>http://</code>, <code>https://</code> and
+ # <code>ftp://</code>. In these cases, the opened file object is extended by OpenURI::Meta.
+ def self.open(name, *rest, &block)
if name.respond_to?(:open)
name.open(*rest, &block)
elsif name.respond_to?(:to_str) &&
@@ -34,10 +31,10 @@ module Kernel
(uri = URI.parse(name)).respond_to?(:open)
uri.open(*rest, &block)
else
- open_uri_original_open(name, *rest, &block)
+ super
end
end
- module_function :open
+ singleton_class.send(:ruby2_keywords, :open) if respond_to?(:ruby2_keywords, true)
end
# OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP.
@@ -46,14 +43,14 @@ end
#
# It is possible to open an http, https or ftp URL as though it were a file:
#
-# open("http://www.ruby-lang.org/") {|f|
+# URI.open("http://www.ruby-lang.org/") {|f|
# f.each_line {|line| p line}
# }
#
# The opened file has several getter methods for its meta-information, as
# follows, since it is extended by OpenURI::Meta.
#
-# open("http://www.ruby-lang.org/en") {|f|
+# URI.open("http://www.ruby-lang.org/en") {|f|
# f.each_line {|line| p line}
# p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
# p f.content_type # "text/html"
@@ -64,7 +61,7 @@ end
#
# Additional header fields can be specified by an optional hash argument.
#
-# open("http://www.ruby-lang.org/en/",
+# URI.open("http://www.ruby-lang.org/en/",
# "User-Agent" => "Ruby/#{RUBY_VERSION}",
# "From" => "foo@bar.invalid",
# "Referer" => "http://www.ruby-lang.org/") {|f|
@@ -74,11 +71,11 @@ end
# The environment variables such as http_proxy, https_proxy and ftp_proxy
# are in effect by default. Here we disable proxy:
#
-# open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
+# URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
# # ...
# }
#
-# See OpenURI::OpenRead.open and Kernel#open for more on available options.
+# See OpenURI::OpenRead.open and URI.open for more on available options.
#
# URI objects can be opened in a similar way.
#
@@ -96,6 +93,11 @@ end
# Author:: Tanaka Akira <akr@m17n.org>
module OpenURI
+
+ # The version string
+ VERSION = "0.5.0"
+
+ # The default options
Options = {
:proxy => true,
:proxy_http_basic_authentication => true,
@@ -106,9 +108,13 @@ module OpenURI
:open_timeout => true,
:ssl_ca_cert => nil,
:ssl_verify_mode => nil,
+ :ssl_min_version => nil,
+ :ssl_max_version => nil,
:ftp_active_mode => false,
:redirect => true,
:encoding => nil,
+ :max_redirects => 64,
+ :request_specific_fields => nil,
}
def OpenURI.check_options(options) # :nodoc:
@@ -148,7 +154,11 @@ module OpenURI
end
encoding = Encoding.find(options[:encoding])
end
-
+ if options.has_key? :request_specific_fields
+ if !(options[:request_specific_fields].is_a?(Hash) || options[:request_specific_fields].is_a?(Proc))
+ raise ArgumentError, "Invalid request_specific_fields option: #{options[:request_specific_fields].inspect}"
+ end
+ end
unless mode == nil ||
mode == 'r' || mode == 'rb' ||
mode == File::RDONLY
@@ -212,11 +222,20 @@ module OpenURI
end
uri_set = {}
+ max_redirects = options[:max_redirects] || Options.fetch(:max_redirects)
buf = nil
while true
+ request_specific_fields = {}
+ if options.has_key? :request_specific_fields
+ request_specific_fields = if options[:request_specific_fields].is_a?(Hash)
+ options[:request_specific_fields]
+ else options[:request_specific_fields].is_a?(Proc)
+ options[:request_specific_fields].call(uri)
+ end
+ end
redirect = catch(:open_uri_redirect) {
buf = Buffer.new
- uri.buffer_open(buf, find_proxy.call(uri), options)
+ uri.buffer_open(buf, find_proxy.call(uri), options.merge(request_specific_fields))
nil
}
if redirect
@@ -236,9 +255,14 @@ module OpenURI
options = options.dup
options.delete :http_basic_authentication
end
+ if options.include?(:request_specific_fields) && options[:request_specific_fields].is_a?(Hash)
+ # Send request specific headers only for the initial request.
+ options.delete :request_specific_fields
+ end
uri = redirect
raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
uri_set[uri.to_s] = true
+ raise TooManyRedirects.new("Too many redirects", buf.io) if max_redirects && uri_set.size > max_redirects
else
break
end
@@ -305,6 +329,8 @@ module OpenURI
require 'net/https'
http.use_ssl = true
http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
+ http.min_version = options[:ssl_min_version]
+ http.max_version = options[:ssl_max_version]
store = OpenSSL::X509::Store.new
if options[:ssl_ca_cert]
Array(options[:ssl_ca_cert]).each do |cert|
@@ -347,6 +373,7 @@ module OpenURI
if options[:progress_proc] && Net::HTTPSuccess === resp
options[:progress_proc].call(buf.size)
end
+ str.clear
}
}
}
@@ -359,7 +386,8 @@ module OpenURI
when Net::HTTPMovedPermanently, # 301
Net::HTTPFound, # 302
Net::HTTPSeeOther, # 303
- Net::HTTPTemporaryRedirect # 307
+ Net::HTTPTemporaryRedirect, # 307
+ Net::HTTPPermanentRedirect # 308
begin
loc_uri = URI.parse(resp['location'])
rescue URI::InvalidURIError
@@ -371,24 +399,31 @@ module OpenURI
end
end
+ # Raised on HTTP session failure
class HTTPError < StandardError
- def initialize(message, io)
+ def initialize(message, io) # :nodoc:
super(message)
@io = io
end
+ # StringIO having the received data
attr_reader :io
end
# Raised on redirection,
# only occurs when +redirect+ option for HTTP is +false+.
class HTTPRedirect < HTTPError
- def initialize(message, io, uri)
+ def initialize(message, io, uri) # :nodoc:
super(message, io)
@uri = uri
end
+ # URI to redirect
attr_reader :uri
end
+ # Raised on too many redirection,
+ class TooManyRedirects < HTTPError
+ end
+
class Buffer # :nodoc: all
def initialize
@io = StringIO.new
@@ -416,6 +451,13 @@ module OpenURI
end
end
+ # :stopdoc:
+ RE_LWS = /[\r\n\t ]+/n
+ RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
+ RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
+ RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
+ # :startdoc:
+
# Mixin for holding meta-information.
module Meta
def Meta.init(obj, src=nil) # :nodoc:
@@ -493,13 +535,6 @@ module OpenURI
end
end
- # :stopdoc:
- RE_LWS = /[\r\n\t ]+/n
- RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
- RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
- RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
- # :startdoc:
-
def content_type_parse # :nodoc:
vs = @metas['content-type']
# The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
@@ -535,17 +570,16 @@ module OpenURI
# It can be used to guess charset.
#
# If charset parameter and block is not given,
- # nil is returned except text type in HTTP.
- # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
+ # nil is returned except text type.
+ # In that case, "utf-8" is returned as defined by RFC6838 4.2.1
def charset
type, *parameters = content_type_parse
if pair = parameters.assoc('charset')
pair.last.downcase
elsif block_given?
yield
- elsif type && %r{\Atext/} =~ type &&
- @base_uri && /\Ahttp\z/i =~ @base_uri.scheme
- "iso-8859-1" # RFC2616 3.7.1
+ elsif type && %r{\Atext/} =~ type
+ "utf-8" # RFC6838 4.2.1
else
nil
end
@@ -705,6 +739,20 @@ module OpenURI
#
# :ssl_verify_mode is used to specify openssl verify mode.
#
+ # [:ssl_min_version]
+ # Synopsis:
+ # :ssl_min_version=>:TLS1_2
+ #
+ # :ssl_min_version option specifies the minimum allowed SSL/TLS protocol
+ # version. See also OpenSSL::SSL::SSLContext#min_version=.
+ #
+ # [:ssl_max_version]
+ # Synopsis:
+ # :ssl_max_version=>:TLS1_2
+ #
+ # :ssl_max_version option specifies the maximum allowed SSL/TLS protocol
+ # version. See also OpenSSL::SSL::SSLContext#max_version=.
+ #
# [:ftp_active_mode]
# Synopsis:
# :ftp_active_mode=>bool
@@ -724,11 +772,49 @@ module OpenURI
# Using +true+ also means that redirections between http and ftp are
# permitted.
#
+ # [:max_redirects]
+ # Synopsis:
+ # :max_redirects=>int
+ #
+ # Number of HTTP redirects allowed before OpenURI::TooManyRedirects is raised.
+ # The default is 64.
+ #
+ # [:request_specific_fields]
+ # Synopsis:
+ # :request_specific_fields => {}
+ # :request_specific_fields => lambda {|url| ...}
+ #
+ # :request_specific_fields option allows specifying custom header fields that
+ # are sent with the HTTP request. It can be passed as a Hash or a Proc that
+ # gets evaluated on each request and returns a Hash of header fields.
+ #
+ # If a Hash is provided, it specifies the headers only for the initial
+ # request and these headers will not be sent on redirects.
+ #
+ # If a Proc is provided, it will be executed for each request including
+ # redirects, allowing dynamic header customization based on the request URL.
+ # It is important that the Proc returns a Hash. And this Hash specifies the
+ # headers to be sent with the request.
+ #
+ # For Example with Hash
+ # URI.open("http://...",
+ # request_specific_fields: {"Authorization" => "token dummy"}) {|f| ... }
+ #
+ # For Example with Proc:
+ # URI.open("http://...",
+ # request_specific_fields: lambda { |uri|
+ # if uri.host == "example.com"
+ # {"Authorization" => "token dummy"}
+ # else
+ # {}
+ # end
+ # }) {|f| ... }
+ #
def open(*rest, &block)
OpenURI.open_uri(self, *rest, &block)
end
- # OpenURI::OpenRead#read([options]) reads a content referenced by self and
+ # OpenURI::OpenRead#read([ options ]) reads a content referenced by self and
# returns the content as string.
# The string is extended with OpenURI::Meta.
# The argument +options+ is same as OpenURI::OpenRead#open.
@@ -757,7 +843,12 @@ module URI
OpenURI.open_http(buf, self, proxy, options)
return
end
- require 'net/ftp'
+
+ begin
+ require 'net/ftp'
+ rescue LoadError
+ abort "net/ftp is not found. You may need to `gem install net-ftp` to install net/ftp."
+ end
path = self.path
path = path.sub(%r{\A/}, '%2F') # re-encode the beginning slash because uri library decodes it.