summaryrefslogtreecommitdiff
path: root/lib/open-uri.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/open-uri.rb')
-rw-r--r--lib/open-uri.rb940
1 files changed, 635 insertions, 305 deletions
diff --git a/lib/open-uri.rb b/lib/open-uri.rb
index 116644ce07..844865b13a 100644
--- a/lib/open-uri.rb
+++ b/lib/open-uri.rb
@@ -1,103 +1,122 @@
-#= open-uri.rb
-#
-#open-uri.rb is easy-to-use wrapper for net/http and net/ftp.
-#
-#== Example
-#
-#It is possible to open http/ftp URL as usual a file:
-#
-# open("http://www.ruby-lang.org/") {|f|
-# f.each_line {|line| p line}
-# }
-#
-#The opened file has several methods for meta information as follows since
-#it is extended by OpenURI::Meta.
-#
-# open("http://www.ruby-lang.org/en") {|f|
-# f.each_line {|line| p line}
-# p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
-# p f.content_type # "text/html"
-# p f.charset # "iso-8859-1"
-# p f.content_encoding # []
-# p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
-# }
-#
-#Additional header fields can be specified by an optional hash argument.
-#
-# open("http://www.ruby-lang.org/en/",
-# "User-Agent" => "Ruby/#{RUBY_VERSION}",
-# "From" => "foo@bar.invalid",
-# "Referer" => "http://www.ruby-lang.org/") {|f|
-# ...
-# }
-#
-#The environment variables such as http_proxy and ftp_proxy are in effect by
-#default. :proxy => nil disables proxy.
-#
-# open("http://www.ruby-lang.org/en/raa.html",
-# :proxy => nil) {|f|
-# ...
-# }
-#
-#URI objects can be opened in similar way.
-#
-# uri = URI.parse("http://www.ruby-lang.org/en/")
-# uri.open {|f|
-# ...
-# }
-#
-#URI objects can be read directly.
-#The returned string is also extended by OpenURI::Meta.
-#
-# str = uri.read
-# p str.base_uri
-#
-#Author:: Tanaka Akira <akr@m17n.org>
-
+# frozen_string_literal: true
require 'uri'
require 'stringio'
require 'time'
-module Kernel
- private
- alias open_uri_original_open open # :nodoc:
-
- # makes possible to open various resources including URIs.
- # If the first argument respond to `open' method,
- # the method is called with the rest arguments.
+module URI
+ # Allows the opening of various resources including URIs. Example:
+ #
+ # require "open-uri"
+ # URI.open("http://example.com") { |f| f.read }
+ #
+ # If the first argument responds to the +open+ method, +open+ is called on
+ # it with the rest of the arguments.
+ #
+ # If the first argument is a string that begins with <code>(protocol)://</code>, it is parsed by
+ # URI.parse. If the parsed object responds to the +open+ method,
+ # +open+ is called on it with the rest of the arguments.
#
- # If the first argument is a string and begins with xxx://,
- # it is parsed by URI.parse. If the parsed object respond to `open' method,
- # the method is called with the rest arguments.
+ # Otherwise, Kernel#open is called.
#
- # Otherwise original open is called.
+ # OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and
+ # URI::FTP#open, Kernel#open.
#
- # Since open-uri.rb provides URI::HTTP#open and URI::FTP#open,
- # Kernel[#.]open can accepts such URIs and strings which begins with
- # http:// and ftp://. In this http and ftp case, the opened file object
- # is extended by OpenURI::Meta.
- def open(name, *rest, &block)
+ # We can accept URIs and strings that begin with <code>http://</code>, <code>https://</code> and
+ # <code>ftp://</code>. In these cases, the opened file object is extended by OpenURI::Meta.
+ def self.open(name, *rest, &block)
if name.respond_to?(:open)
name.open(*rest, &block)
- elsif name.respond_to?("to_str") &&
+ elsif name.respond_to?(:to_str) &&
%r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
(uri = URI.parse(name)).respond_to?(:open)
uri.open(*rest, &block)
else
- open_uri_original_open(name, *rest, &block)
+ super
end
end
- module_function :open
+ singleton_class.send(:ruby2_keywords, :open) if respond_to?(:ruby2_keywords, true)
end
+# OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP.
+#
+# == Example
+#
+# It is possible to open an http, https or ftp URL as though it were a file:
+#
+# URI.open("http://www.ruby-lang.org/") {|f|
+# f.each_line {|line| p line}
+# }
+#
+# The opened file has several getter methods for its meta-information, as
+# follows, since it is extended by OpenURI::Meta.
+#
+# URI.open("http://www.ruby-lang.org/en") {|f|
+# f.each_line {|line| p line}
+# p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
+# p f.content_type # "text/html"
+# p f.charset # "iso-8859-1"
+# p f.content_encoding # []
+# p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
+# }
+#
+# Additional header fields can be specified by an optional hash argument.
+#
+# URI.open("http://www.ruby-lang.org/en/",
+# "User-Agent" => "Ruby/#{RUBY_VERSION}",
+# "From" => "foo@bar.invalid",
+# "Referer" => "http://www.ruby-lang.org/") {|f|
+# # ...
+# }
+#
+# The environment variables such as http_proxy, https_proxy and ftp_proxy
+# are in effect by default. Here we disable proxy:
+#
+# URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
+# # ...
+# }
+#
+# See OpenURI::OpenRead.open and URI.open for more on available options.
+#
+# URI objects can be opened in a similar way.
+#
+# uri = URI.parse("http://www.ruby-lang.org/en/")
+# uri.open {|f|
+# # ...
+# }
+#
+# URI objects can be read directly. The returned string is also extended by
+# OpenURI::Meta.
+#
+# str = uri.read
+# p str.base_uri
+#
+# Author:: Tanaka Akira <akr@m17n.org>
+
module OpenURI
+
+ # The version string
+ VERSION = "0.5.0"
+
+ # The default options
Options = {
:proxy => true,
+ :proxy_http_basic_authentication => true,
:progress_proc => true,
:content_length_proc => true,
+ :http_basic_authentication => true,
+ :read_timeout => true,
+ :open_timeout => true,
+ :ssl_ca_cert => nil,
+ :ssl_verify_mode => nil,
+ :ssl_min_version => nil,
+ :ssl_max_version => nil,
+ :ftp_active_mode => false,
+ :redirect => true,
+ :encoding => nil,
+ :max_redirects => 64,
+ :request_specific_fields => nil,
}
-
def OpenURI.check_options(options) # :nodoc:
options.each {|k, v|
next unless Symbol === k
@@ -119,12 +138,27 @@ module OpenURI
def OpenURI.open_uri(name, *rest) # :nodoc:
uri = URI::Generic === name ? name : URI.parse(name)
- mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
+ mode, _, rest = OpenURI.scan_open_optional_arguments(*rest)
options = rest.shift if !rest.empty? && Hash === rest.first
raise ArgumentError.new("extra arguments") if !rest.empty?
options ||= {}
OpenURI.check_options(options)
+ if /\Arb?(?:\Z|:([^:]+))/ =~ mode
+ encoding, = $1,Encoding.find($1) if $1
+ mode = nil
+ end
+ if options.has_key? :encoding
+ if !encoding.nil?
+ raise ArgumentError, "encoding specified twice"
+ end
+ encoding = Encoding.find(options[:encoding])
+ end
+ if options.has_key? :request_specific_fields
+ if !(options[:request_specific_fields].is_a?(Hash) || options[:request_specific_fields].is_a?(Proc))
+ raise ArgumentError, "Invalid request_specific_fields option: #{options[:request_specific_fields].inspect}"
+ end
+ end
unless mode == nil ||
mode == 'r' || mode == 'rb' ||
mode == File::RDONLY
@@ -132,11 +166,16 @@ module OpenURI
end
io = open_loop(uri, options)
+ io.set_encoding(encoding) if encoding
if block_given?
begin
yield io
ensure
- io.close
+ if io.respond_to? :close!
+ io.close! # Tempfile
+ else
+ io.close if !io.closed?
+ end
end
else
io
@@ -144,44 +183,86 @@ module OpenURI
end
def OpenURI.open_loop(uri, options) # :nodoc:
- case opt_proxy = options.fetch(:proxy, true)
+ proxy_opts = []
+ proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
+ proxy_opts << :proxy if options.include? :proxy
+ proxy_opts.compact!
+ if 1 < proxy_opts.length
+ raise ArgumentError, "multiple proxy options specified"
+ end
+ case proxy_opts.first
+ when :proxy_http_basic_authentication
+ opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
+ proxy_user = proxy_user.to_str
+ proxy_pass = proxy_pass.to_str
+ if opt_proxy == true
+ raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
+ end
+ when :proxy
+ opt_proxy = options.fetch(:proxy)
+ proxy_user = nil
+ proxy_pass = nil
+ when nil
+ opt_proxy = true
+ proxy_user = nil
+ proxy_pass = nil
+ end
+ case opt_proxy
when true
- find_proxy = lambda {|u| u.find_proxy}
+ find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
when nil, false
find_proxy = lambda {|u| nil}
when String
opt_proxy = URI.parse(opt_proxy)
- find_proxy = lambda {|u| opt_proxy}
+ find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
when URI::Generic
- find_proxy = lambda {|u| opt_proxy}
+ find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
else
raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
end
uri_set = {}
+ max_redirects = options[:max_redirects] || Options.fetch(:max_redirects)
buf = nil
while true
+ request_specific_fields = {}
+ if options.has_key? :request_specific_fields
+ request_specific_fields = if options[:request_specific_fields].is_a?(Hash)
+ options[:request_specific_fields]
+ else options[:request_specific_fields].is_a?(Proc)
+ options[:request_specific_fields].call(uri)
+ end
+ end
redirect = catch(:open_uri_redirect) {
buf = Buffer.new
- if proxy_uri = find_proxy.call(uri)
- proxy_uri.proxy_open(buf, uri, options)
- else
- uri.direct_open(buf, options)
- end
+ uri.buffer_open(buf, find_proxy.call(uri), options.merge(request_specific_fields))
nil
}
if redirect
if redirect.relative?
- # Although it violates RFC 2616, Location: field may have relative
- # URI. It is converted to absolute URI using uri.
+ # Although it violates RFC2616, Location: field may have relative
+ # URI. It is converted to absolute URI using uri as a base URI.
redirect = uri + redirect
end
+ if !options.fetch(:redirect, true)
+ raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
+ end
unless OpenURI.redirectable?(uri, redirect)
raise "redirection forbidden: #{uri} -> #{redirect}"
end
+ if options.include? :http_basic_authentication
+ # send authentication only for the URI directly specified.
+ options = options.dup
+ options.delete :http_basic_authentication
+ end
+ if options.include?(:request_specific_fields) && options[:request_specific_fields].is_a?(Hash)
+ # Send request specific headers only for the initial request.
+ options.delete :request_specific_fields
+ end
uri = redirect
raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
- uri_set[uri.to_s] = true
+ uri_set[uri.to_s] = true
+ raise TooManyRedirects.new("Too many redirects", buf.io) if max_redirects && uri_set.size > max_redirects
else
break
end
@@ -193,21 +274,157 @@ module OpenURI
def OpenURI.redirectable?(uri1, uri2) # :nodoc:
# This test is intended to forbid a redirection from http://... to
- # file:///etc/passwd.
+ # file:///etc/passwd, file:///dev/zero, etc. CVE-2011-1521
+ # https to http redirect is also forbidden intentionally.
+ # It avoids sending secure cookie or referer by non-secure HTTP protocol.
+ # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
# However this is ad hoc. It should be extensible/configurable.
uri1.scheme.downcase == uri2.scheme.downcase ||
- (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
+ (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:https?|ftp)\z/i =~ uri2.scheme)
end
+ def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
+ if proxy
+ proxy_uri, proxy_user, proxy_pass = proxy
+ raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
+ end
+
+ if target.userinfo
+ raise ArgumentError, "userinfo not supported. [RFC3986]"
+ end
+
+ header = {}
+ options.each {|k, v| header[k] = v if String === k }
+
+ require 'net/http'
+ klass = Net::HTTP
+ if URI::HTTP === target
+ # HTTP or HTTPS
+ if proxy
+ unless proxy_user && proxy_pass
+ proxy_user, proxy_pass = proxy_uri.userinfo.split(':') if proxy_uri.userinfo
+ end
+ if proxy_user && proxy_pass
+ klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port, proxy_user, proxy_pass)
+ else
+ klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port)
+ end
+ end
+ target_host = target.hostname
+ target_port = target.port
+ request_uri = target.request_uri
+ else
+ # FTP over HTTP proxy
+ target_host = proxy_uri.hostname
+ target_port = proxy_uri.port
+ request_uri = target.to_s
+ if proxy_user && proxy_pass
+ header["Proxy-Authorization"] =
+ 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m0')
+ end
+ end
+
+ http = proxy ? klass.new(target_host, target_port) : klass.new(target_host, target_port, nil)
+ if target.class == URI::HTTPS
+ require 'net/https'
+ http.use_ssl = true
+ http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
+ http.min_version = options[:ssl_min_version]
+ http.max_version = options[:ssl_max_version]
+ store = OpenSSL::X509::Store.new
+ if options[:ssl_ca_cert]
+ Array(options[:ssl_ca_cert]).each do |cert|
+ if File.directory? cert
+ store.add_path cert
+ else
+ store.add_file cert
+ end
+ end
+ else
+ store.set_default_paths
+ end
+ http.cert_store = store
+ end
+ if options.include? :read_timeout
+ http.read_timeout = options[:read_timeout]
+ end
+ if options.include? :open_timeout
+ http.open_timeout = options[:open_timeout]
+ end
+
+ resp = nil
+ http.start {
+ req = Net::HTTP::Get.new(request_uri, header)
+ if options.include? :http_basic_authentication
+ user, pass = options[:http_basic_authentication]
+ req.basic_auth user, pass
+ end
+ http.request(req) {|response|
+ resp = response
+ if options[:content_length_proc] && Net::HTTPSuccess === resp
+ if resp.key?('Content-Length')
+ options[:content_length_proc].call(resp['Content-Length'].to_i)
+ else
+ options[:content_length_proc].call(nil)
+ end
+ end
+ resp.read_body {|str|
+ buf << str
+ if options[:progress_proc] && Net::HTTPSuccess === resp
+ options[:progress_proc].call(buf.size)
+ end
+ str.clear
+ }
+ }
+ }
+ io = buf.io
+ io.rewind
+ io.status = [resp.code, resp.message]
+ resp.each_name {|name| buf.io.meta_add_field2 name, resp.get_fields(name) }
+ case resp
+ when Net::HTTPSuccess
+ when Net::HTTPMovedPermanently, # 301
+ Net::HTTPFound, # 302
+ Net::HTTPSeeOther, # 303
+ Net::HTTPTemporaryRedirect, # 307
+ Net::HTTPPermanentRedirect # 308
+ begin
+ loc_uri = URI.parse(resp['location'])
+ rescue URI::InvalidURIError
+ raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
+ end
+ throw :open_uri_redirect, loc_uri
+ else
+ raise OpenURI::HTTPError.new(io.status.join(' '), io)
+ end
+ end
+
+ # Raised on HTTP session failure
class HTTPError < StandardError
- def initialize(message, io)
+ def initialize(message, io) # :nodoc:
super(message)
@io = io
end
+ # StringIO having the received data
attr_reader :io
end
- class Buffer # :nodoc:
+ # Raised on redirection,
+ # only occurs when +redirect+ option for HTTP is +false+.
+ class HTTPRedirect < HTTPError
+ def initialize(message, io, uri) # :nodoc:
+ super(message, io)
+ @uri = uri
+ end
+ # URI to redirect
+ attr_reader :uri
+ end
+
+ # Raised on too many redirection,
+ class TooManyRedirects < HTTPError
+ end
+
+ class Buffer # :nodoc: all
def initialize
@io = StringIO.new
@size = 0
@@ -221,6 +438,7 @@ module OpenURI
if StringIO === @io && StringMax < @size
require 'tempfile'
io = Tempfile.new('open-uri')
+ io.binmode
Meta.init io, @io if Meta === @io
io << @io.string
@io = io
@@ -233,60 +451,101 @@ module OpenURI
end
end
+ # :stopdoc:
+ RE_LWS = /[\r\n\t ]+/n
+ RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
+ RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
+ RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
+ # :startdoc:
+
# Mixin for holding meta-information.
module Meta
def Meta.init(obj, src=nil) # :nodoc:
obj.extend Meta
obj.instance_eval {
@base_uri = nil
- @meta = {}
+ @meta = {} # name to string. legacy.
+ @metas = {} # name to array of strings.
}
if src
obj.status = src.status
obj.base_uri = src.base_uri
- src.meta.each {|name, value|
- obj.meta_add_field(name, value)
+ src.metas.each {|name, values|
+ obj.meta_add_field2(name, values)
}
end
end
- # returns an Array which consits status code and message.
+ # returns an Array that consists of status code and message.
attr_accessor :status
- # returns a URI which is base of relative URIs in the data.
- # It may differ from the URI supplied by a user because redirection.
+ # returns a URI that is the base of relative URIs in the data.
+ # It may differ from the URI supplied by a user due to redirection.
attr_accessor :base_uri
- # returns a Hash which represents header fields.
+ # returns a Hash that represents header fields.
# The Hash keys are downcased for canonicalization.
+ # The Hash values are a field body.
+ # If there are multiple field with same field name,
+ # the field values are concatenated with a comma.
attr_reader :meta
+ # returns a Hash that represents header fields.
+ # The Hash keys are downcased for canonicalization.
+ # The Hash value are an array of field values.
+ attr_reader :metas
+
+ def meta_setup_encoding # :nodoc:
+ charset = self.charset
+ enc = nil
+ if charset
+ begin
+ enc = Encoding.find(charset)
+ rescue ArgumentError
+ end
+ end
+ enc = Encoding::ASCII_8BIT unless enc
+ if self.respond_to? :force_encoding
+ self.force_encoding(enc)
+ elsif self.respond_to? :string
+ self.string.force_encoding(enc)
+ else # Tempfile
+ self.set_encoding enc
+ end
+ end
+
+ def meta_add_field2(name, values) # :nodoc:
+ name = name.downcase
+ @metas[name] = values
+ @meta[name] = values.join(', ')
+ meta_setup_encoding if name == 'content-type'
+ end
+
def meta_add_field(name, value) # :nodoc:
- @meta[name.downcase] = value
+ meta_add_field2(name, [value])
end
- # returns a Time which represents Last-Modified field.
+ # returns a Time that represents the Last-Modified field.
def last_modified
- if v = @meta['last-modified']
+ if vs = @metas['last-modified']
+ v = vs.join(', ')
Time.httpdate(v)
else
nil
end
end
- RE_LWS = /[\r\n\t ]+/n
- RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
- RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])"}n
- RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
-
def content_type_parse # :nodoc:
- v = @meta['content-type']
- if v && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})\z}o =~ v
+ vs = @metas['content-type']
+ # The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
+ if vs && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ vs.join(', ')
type = $1.downcase
subtype = $2.downcase
parameters = []
- $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/o) {|att, val, qval|
- val = qval.gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/) { $1 ? $1[1,1] : $& } if qval
+ $3.scan(/;#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?=#{RE_LWS}?(?:(#{RE_TOKEN})|(#{RE_QUOTED_STRING}))/no) {|att, val, qval|
+ if qval
+ val = qval[1...-1].gsub(/[\r\n\t !#-\[\]-~\x80-\xff]+|(\\[\x00-\x7f])/n) { $1 ? $1[1,1] : $& }
+ end
parameters << [att.downcase, val]
}
["#{type}/#{subtype}", *parameters]
@@ -299,7 +558,7 @@ module OpenURI
# It is downcased for canonicalization.
# Content-Type parameters are stripped.
def content_type
- type, *parameters = content_type_parse
+ type, *_ = content_type_parse
type || 'application/octet-stream'
end
@@ -311,28 +570,28 @@ module OpenURI
# It can be used to guess charset.
#
# If charset parameter and block is not given,
- # nil is returned except text type in HTTP.
- # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1.
+ # nil is returned except text type.
+ # In that case, "utf-8" is returned as defined by RFC6838 4.2.1
def charset
type, *parameters = content_type_parse
if pair = parameters.assoc('charset')
pair.last.downcase
elsif block_given?
yield
- elsif type && %r{\Atext/} =~ type &&
- @base_uri && /\Ahttp\z/i =~ @base_uri.scheme
- "iso-8859-1" # RFC2616 3.7.1
+ elsif type && %r{\Atext/} =~ type
+ "utf-8" # RFC6838 4.2.1
else
nil
end
end
- # returns a list of encodings in Content-Encoding field
- # as an Array of String.
+ # Returns a list of encodings in Content-Encoding field as an array of
+ # strings.
+ #
# The encodings are downcased for canonicalization.
def content_encoding
- v = @meta['content-encoding']
- if v && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ v
+ vs = @metas['content-encoding']
+ if vs && %r{\A#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?(?:,#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?)*}o =~ (v = vs.join(', '))
v.scan(RE_TOKEN).map {|content_coding| content_coding.downcase}
else
[]
@@ -345,87 +604,220 @@ module OpenURI
# OpenURI::OpenRead#open provides `open' for URI::HTTP and URI::FTP.
#
# OpenURI::OpenRead#open takes optional 3 arguments as:
- # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
#
- # `mode', `perm' is same as Kernel#open.
+ # OpenURI::OpenRead#open([mode [, perm]] [, options]) [{|io| ... }]
#
- # However, `mode' must be read mode because OpenURI::OpenRead#open doesn't
- # support write mode (yet).
- # Also `perm' is just ignored because it is meaningful only for file
- # creation.
- #
- # `options' must be a hash.
- #
- # Each pairs which key is a string in the hash specify a extra header
- # field for HTTP.
- # I.e. it is ignored for FTP without HTTP proxy.
- #
- # The hash may include other option which key is a symbol:
- #
- # :proxy => "http://proxy.foo.com:8000/"
- # :proxy => URI.parse("http://proxy.foo.com:8000/")
- # :proxy => true
- # :proxy => false
- # :proxy => nil
- #
- # If :proxy option is specified, the value should be String, URI,
- # boolean or nil.
- # When String or URI is given, it is treated as proxy URI.
- # When true is given or the option itself is not specified,
- # environment variable `scheme_proxy'(or `SCHEME_PROXY') is examined.
- # `scheme' is replaced by `http' or `ftp'.
- # When false or nil is given, the environment variables are ignored and
- # connection will be made to a server directly.
- #
- # :content_length_proc => lambda {|content_length| ... }
- #
- # If :content_length_proc option is specified, the option value procedure
- # is called before actual transfer is started.
- # It takes one argument which is expected content length in bytes.
- #
- # If two or more transfer is done by HTTP redirection, the procedure
- # is called only one for a last transfer.
- #
- # When expected content length is unknown, the procedure is called with
- # nil.
- # It is happen when HTTP response has no Content-Length header.
- #
- # :progress_proc => lambda {|size| ...}
- #
- # If :progress_proc option is specified, the proc is called with one
- # argument each time when `open' gets content fragment from network.
- # The argument `size' `size' is a accumulated transfered size in bytes.
- #
- # If two or more transfer is done by HTTP redirection, the procedure
- # is called only one for a last transfer.
- #
- # :progress_proc and :content_length_proc are intended to be used for
- # progress bar.
- # For example, it can be implemented as follows using Ruby/ProgressBar.
- #
- # pbar = nil
- # open("http://...",
- # :content_length_proc => lambda {|t|
- # if t && 0 < t
- # pbar = ProgressBar.new("...", t)
- # pbar.file_transfer_mode
- # end
- # },
- # :progress_proc => lambda {|s|
- # pbar.set s if pbar
- # }) {|f| ... }
- #
- # OpenURI::OpenRead#open returns an IO like object if block is not given.
+ # OpenURI::OpenRead#open returns an IO-like object if block is not given.
# Otherwise it yields the IO object and return the value of the block.
# The IO object is extended with OpenURI::Meta.
+ #
+ # +mode+ and +perm+ are the same as Kernel#open.
+ #
+ # However, +mode+ must be read mode because OpenURI::OpenRead#open doesn't
+ # support write mode (yet).
+ # Also +perm+ is ignored because it is meaningful only for file creation.
+ #
+ # +options+ must be a hash.
+ #
+ # Each option with a string key specifies an extra header field for HTTP.
+ # I.e., it is ignored for FTP without HTTP proxy.
+ #
+ # The hash may include other options, where keys are symbols:
+ #
+ # [:proxy]
+ # Synopsis:
+ # :proxy => "http://proxy.foo.com:8000/"
+ # :proxy => URI.parse("http://proxy.foo.com:8000/")
+ # :proxy => true
+ # :proxy => false
+ # :proxy => nil
+ #
+ # If :proxy option is specified, the value should be String, URI,
+ # boolean or nil.
+ #
+ # When String or URI is given, it is treated as proxy URI.
+ #
+ # When true is given or the option itself is not specified,
+ # environment variable `scheme_proxy' is examined.
+ # `scheme' is replaced by `http', `https' or `ftp'.
+ #
+ # When false or nil is given, the environment variables are ignored and
+ # connection will be made to a server directly.
+ #
+ # [:proxy_http_basic_authentication]
+ # Synopsis:
+ # :proxy_http_basic_authentication =>
+ # ["http://proxy.foo.com:8000/", "proxy-user", "proxy-password"]
+ # :proxy_http_basic_authentication =>
+ # [URI.parse("http://proxy.foo.com:8000/"),
+ # "proxy-user", "proxy-password"]
+ #
+ # If :proxy option is specified, the value should be an Array with 3
+ # elements. It should contain a proxy URI, a proxy user name and a proxy
+ # password. The proxy URI should be a String, an URI or nil. The proxy
+ # user name and password should be a String.
+ #
+ # If nil is given for the proxy URI, this option is just ignored.
+ #
+ # If :proxy and :proxy_http_basic_authentication is specified,
+ # ArgumentError is raised.
+ #
+ # [:http_basic_authentication]
+ # Synopsis:
+ # :http_basic_authentication=>[user, password]
+ #
+ # If :http_basic_authentication is specified,
+ # the value should be an array which contains 2 strings:
+ # username and password.
+ # It is used for HTTP Basic authentication defined by RFC 2617.
+ #
+ # [:content_length_proc]
+ # Synopsis:
+ # :content_length_proc => lambda {|content_length| ... }
+ #
+ # If :content_length_proc option is specified, the option value procedure
+ # is called before actual transfer is started.
+ # It takes one argument, which is expected content length in bytes.
+ #
+ # If two or more transfers are performed by HTTP redirection, the
+ # procedure is called only once for the last transfer.
+ #
+ # When expected content length is unknown, the procedure is called with
+ # nil. This happens when the HTTP response has no Content-Length header.
+ #
+ # [:progress_proc]
+ # Synopsis:
+ # :progress_proc => lambda {|size| ...}
+ #
+ # If :progress_proc option is specified, the proc is called with one
+ # argument each time when `open' gets content fragment from network.
+ # The argument +size+ is the accumulated transferred size in bytes.
+ #
+ # If two or more transfer is done by HTTP redirection, the procedure
+ # is called only one for a last transfer.
+ #
+ # :progress_proc and :content_length_proc are intended to be used for
+ # progress bar.
+ # For example, it can be implemented as follows using Ruby/ProgressBar.
+ #
+ # pbar = nil
+ # open("http://...",
+ # :content_length_proc => lambda {|t|
+ # if t && 0 < t
+ # pbar = ProgressBar.new("...", t)
+ # pbar.file_transfer_mode
+ # end
+ # },
+ # :progress_proc => lambda {|s|
+ # pbar.set s if pbar
+ # }) {|f| ... }
+ #
+ # [:read_timeout]
+ # Synopsis:
+ # :read_timeout=>nil (no timeout)
+ # :read_timeout=>10 (10 second)
+ #
+ # :read_timeout option specifies a timeout of read for http connections.
+ #
+ # [:open_timeout]
+ # Synopsis:
+ # :open_timeout=>nil (no timeout)
+ # :open_timeout=>10 (10 second)
+ #
+ # :open_timeout option specifies a timeout of open for http connections.
+ #
+ # [:ssl_ca_cert]
+ # Synopsis:
+ # :ssl_ca_cert=>filename or an Array of filenames
+ #
+ # :ssl_ca_cert is used to specify CA certificate for SSL.
+ # If it is given, default certificates are not used.
+ #
+ # [:ssl_verify_mode]
+ # Synopsis:
+ # :ssl_verify_mode=>mode
+ #
+ # :ssl_verify_mode is used to specify openssl verify mode.
+ #
+ # [:ssl_min_version]
+ # Synopsis:
+ # :ssl_min_version=>:TLS1_2
+ #
+ # :ssl_min_version option specifies the minimum allowed SSL/TLS protocol
+ # version. See also OpenSSL::SSL::SSLContext#min_version=.
+ #
+ # [:ssl_max_version]
+ # Synopsis:
+ # :ssl_max_version=>:TLS1_2
+ #
+ # :ssl_max_version option specifies the maximum allowed SSL/TLS protocol
+ # version. See also OpenSSL::SSL::SSLContext#max_version=.
+ #
+ # [:ftp_active_mode]
+ # Synopsis:
+ # :ftp_active_mode=>bool
+ #
+ # <tt>:ftp_active_mode => true</tt> is used to make ftp active mode.
+ # Ruby 1.9 uses passive mode by default.
+ # Note that the active mode is default in Ruby 1.8 or prior.
+ #
+ # [:redirect]
+ # Synopsis:
+ # :redirect=>bool
+ #
+ # +:redirect+ is true by default. <tt>:redirect => false</tt> is used to
+ # disable all HTTP redirects.
+ #
+ # OpenURI::HTTPRedirect exception raised on redirection.
+ # Using +true+ also means that redirections between http and ftp are
+ # permitted.
+ #
+ # [:max_redirects]
+ # Synopsis:
+ # :max_redirects=>int
+ #
+ # Number of HTTP redirects allowed before OpenURI::TooManyRedirects is raised.
+ # The default is 64.
+ #
+ # [:request_specific_fields]
+ # Synopsis:
+ # :request_specific_fields => {}
+ # :request_specific_fields => lambda {|url| ...}
+ #
+ # :request_specific_fields option allows specifying custom header fields that
+ # are sent with the HTTP request. It can be passed as a Hash or a Proc that
+ # gets evaluated on each request and returns a Hash of header fields.
+ #
+ # If a Hash is provided, it specifies the headers only for the initial
+ # request and these headers will not be sent on redirects.
+ #
+ # If a Proc is provided, it will be executed for each request including
+ # redirects, allowing dynamic header customization based on the request URL.
+ # It is important that the Proc returns a Hash. And this Hash specifies the
+ # headers to be sent with the request.
+ #
+ # For Example with Hash
+ # URI.open("http://...",
+ # request_specific_fields: {"Authorization" => "token dummy"}) {|f| ... }
+ #
+ # For Example with Proc:
+ # URI.open("http://...",
+ # request_specific_fields: lambda { |uri|
+ # if uri.host == "example.com"
+ # {"Authorization" => "token dummy"}
+ # else
+ # {}
+ # end
+ # }) {|f| ... }
+ #
def open(*rest, &block)
OpenURI.open_uri(self, *rest, &block)
end
- # OpenURI::OpenRead#read([options]) reads a content referenced by self and
+ # OpenURI::OpenRead#read([ options ]) reads a content referenced by self and
# returns the content as string.
# The string is extended with OpenURI::Meta.
- # The argument `options' is same as OpenURI::OpenRead#open.
+ # The argument +options+ is same as OpenURI::OpenRead#open.
def read(options={})
self.open(options) {|f|
str = f.read
@@ -437,131 +829,69 @@ module OpenURI
end
module URI
- class Generic
- # returns a proxy URI.
- # The proxy URI is obtained from environment variables such as http_proxy,
- # ftp_proxy, no_proxy, etc.
- # If there is no proper proxy, nil is returned.
- #
- # Note that capitalized variables (HTTP_PROXY, FTP_PROXY, NO_PROXY, etc.)
- # are examined too.
- #
- # But http_proxy and HTTP_PROXY is treated specially under CGI environment.
- # It's because HTTP_PROXY may be set by Proxy: header.
- # So HTTP_PROXY is not used.
- # http_proxy is not used too if the variable is case insentitive.
- # CGI_HTTP_PROXY can be used instead.
- def find_proxy
- name = self.scheme.downcase + '_proxy'
- proxy_uri = nil
- if name == 'http_proxy' && ENV.include?('REQUEST_METHOD') # CGI?
- # HTTP_PROXY conflicts with *_proxy for proxy settings and
- # HTTP_* for header informatin in CGI.
- # So it should be careful to use it.
- pairs = ENV.reject {|k, v| /\Ahttp_proxy\z/i !~ k }
- case pairs.length
- when 0 # no proxy setting anyway.
- proxy_uri = nil
- when 1
- k, v = pairs.shift
- if k == 'http_proxy' && ENV[k.upcase] == nil
- # http_proxy is safe to use becase ENV is case sensitive.
- proxy_uri = ENV[name]
- else
- proxy_uri = nil
- end
- else # http_proxy is safe to use becase ENV is case sensitive.
- proxy_uri = ENV[name]
- end
- if !proxy_uri
- # Use CGI_HTTP_PROXY. cf. libwww-perl.
- proxy_uri = ENV["CGI_#{name.upcase}"]
- end
- else
- proxy_uri = ENV[name] || ENV[name.upcase]
- end
-
- if proxy_uri
- proxy_uri = URI.parse(proxy_uri)
- name = 'no_proxy'
- if no_proxy = ENV[name] || ENV[name.upcase]
- no_proxy.scan(/([^:,]*)(?::(\d+))?/) {|host, port|
- if /(\A|\.)#{Regexp.quote host}\z/i =~ self.host &&
- (!port || self.port == port.to_i)
- proxy_uri = nil
- break
- end
- }
- end
- proxy_uri
- else
- nil
- end
- end
- end
-
class HTTP
- def direct_open(buf, options) # :nodoc:
- proxy_open(buf, request_uri, options)
- end
-
- def proxy_open(buf, uri, options) # :nodoc:
- header = {}
- options.each {|k, v| header[k] = v if String === k }
-
- require 'net/http'
- resp = nil
- Net::HTTP.start(self.host, self.port) {|http|
- http.request_get(uri.to_s, header) {|response|
- resp = response
- if options[:content_length_proc] && Net::HTTPSuccess === resp
- if resp.key?('Content-Length')
- options[:content_length_proc].call(resp['Content-Length'].to_i)
- else
- options[:content_length_proc].call(nil)
- end
- end
- resp.read_body {|str|
- buf << str
- if options[:progress_proc] && Net::HTTPSuccess === resp
- options[:progress_proc].call(buf.size)
- end
- }
- }
- }
- io = buf.io
- io.rewind
- io.status = [resp.code, resp.message]
- resp.each {|name,value| buf.io.meta_add_field name, value }
- case resp
- when Net::HTTPSuccess
- when Net::HTTPMovedPermanently, # 301
- Net::HTTPFound, # 302
- Net::HTTPSeeOther, # 303
- Net::HTTPTemporaryRedirect # 307
- throw :open_uri_redirect, URI.parse(resp['location'])
- else
- raise OpenURI::HTTPError.new(io.status.join(' '), io)
- end
+ def buffer_open(buf, proxy, options) # :nodoc:
+ OpenURI.open_http(buf, self, proxy, options)
end
include OpenURI::OpenRead
end
class FTP
- def direct_open(buf, options) # :nodoc:
- require 'net/ftp'
+ def buffer_open(buf, proxy, options) # :nodoc:
+ if proxy
+ OpenURI.open_http(buf, self, proxy, options)
+ return
+ end
+
+ begin
+ require 'net/ftp'
+ rescue LoadError
+ abort "net/ftp is not found. You may need to `gem install net-ftp` to install net/ftp."
+ end
+
+ path = self.path
+ path = path.sub(%r{\A/}, '%2F') # re-encode the beginning slash because uri library decodes it.
+ directories = path.split(%r{/}, -1)
+ directories.each {|d|
+ d.gsub!(/%([0-9A-Fa-f][0-9A-Fa-f])/) { [$1].pack("H2") }
+ }
+ unless filename = directories.pop
+ raise ArgumentError, "no filename: #{self.inspect}"
+ end
+ directories.each {|d|
+ if /[\r\n]/ =~ d
+ raise ArgumentError, "invalid directory: #{d.inspect}"
+ end
+ }
+ if /[\r\n]/ =~ filename
+ raise ArgumentError, "invalid filename: #{filename.inspect}"
+ end
+ typecode = self.typecode
+ if typecode && /\A[aid]\z/ !~ typecode
+ raise ArgumentError, "invalid typecode: #{typecode.inspect}"
+ end
+
+ # The access sequence is defined by RFC 1738
+ ftp = Net::FTP.new
+ ftp.connect(self.hostname, self.port)
+ ftp.passive = !options[:ftp_active_mode]
# todo: extract user/passwd from .netrc.
user = 'anonymous'
passwd = nil
user, passwd = self.userinfo.split(/:/) if self.userinfo
-
- ftp = Net::FTP.open(self.host)
ftp.login(user, passwd)
+ directories.each {|cwd|
+ ftp.voidcmd("CWD #{cwd}")
+ }
+ if typecode
+ # xxx: typecode D is not handled.
+ ftp.voidcmd("TYPE #{typecode.upcase}")
+ end
if options[:content_length_proc]
- options[:content_length_proc].call(ftp.size(self.path))
+ options[:content_length_proc].call(ftp.size(filename))
end
- ftp.getbinaryfile(self.path, '/dev/null', Net::FTP::DEFAULT_BLOCKSIZE) {|str|
+ ftp.retrbinary("RETR #{filename}", 4096) { |str|
buf << str
options[:progress_proc].call(buf.size) if options[:progress_proc]
}