From b1a0509b5465ce77f52e0384159237889a8d60ec Mon Sep 17 00:00:00 2001 From: drbrain Date: Thu, 19 Jul 2012 22:43:38 +0000 Subject: * lib/net/http/response.rb: Automatically inflate gzip and deflate-encoded response bodies. [Feature #6942] * lib/net/http/generic_request.rb: Automatically accept gzip and deflate content-encoding for requests. [Feature #6494] * lib/net/http/request.rb: Updated documentation for #6494. * lib/net/http.rb: Updated documentation for #6492 and #6494, removed Content-Encoding handling now present in Net::HTTPResponse. * test/net/http/test_httpresponse.rb: Tests for #6492 * test/net/http/test_http_request.rb: Tests for #6494 * test/open-uri/test_open-uri.rb (test_content_encoding): Updated test for automatic content-encoding handling. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36473 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/net/http.rb | 29 +++----- lib/net/http/generic_request.rb | 12 ++++ lib/net/http/request.rb | 7 +- lib/net/http/response.rb | 147 +++++++++++++++++++++++++++++++++++----- 4 files changed, 156 insertions(+), 39 deletions(-) (limited to 'lib/net') diff --git a/lib/net/http.rb b/lib/net/http.rb index 7efea79cdd..977daabe1c 100644 --- a/lib/net/http.rb +++ b/lib/net/http.rb @@ -283,6 +283,14 @@ module Net #:nodoc: # See Net::HTTP::Proxy for further details and examples such as proxies that # require a username and password. # + # === Compression + # + # Net::HTTP automatically adds Accept-Encoding for compression of response + # bodies and automatically decompresses gzip and deflate responses unless a + # Range header was sent. + # + # Compression can be disabled through the Accept-Encoding: identity header. + # # == HTTP Request Classes # # Here is the HTTP request class hierarchy. @@ -602,7 +610,6 @@ module Net #:nodoc: @use_ssl = false @ssl_context = nil @enable_post_connection_check = true - @compression = nil @sspi_enabled = false SSL_IVNAMES.each do |ivname| instance_variable_set ivname, nil @@ -1052,28 +1059,10 @@ module Net #:nodoc: initheader = initheader.merge({ "accept-encoding" => "gzip;q=1.0,deflate;q=0.6,identity;q=0.3" }) - @compression = true end end request(Get.new(path, initheader)) {|r| - if r.key?("content-encoding") and @compression - @compression = nil # Clear it till next set. - the_body = r.read_body dest, &block - case r["content-encoding"] - when "gzip" - r.body= Zlib::GzipReader.new(StringIO.new(the_body), encoding: "ASCII-8BIT").read - r.delete("content-encoding") - when "deflate" - r.body= Zlib::Inflate.inflate(the_body); - r.delete("content-encoding") - when "identity" - ; # nothing needed - else - ; # Don't do anything dramatic, unless we need to later - end - else - r.read_body dest, &block - end + r.read_body dest, &block res = r } res diff --git a/lib/net/http/generic_request.rb b/lib/net/http/generic_request.rb index ca4e48f277..bcf87d35be 100644 --- a/lib/net/http/generic_request.rb +++ b/lib/net/http/generic_request.rb @@ -14,6 +14,18 @@ class Net::HTTPGenericRequest raise ArgumentError, "no HTTP request path given" unless path raise ArgumentError, "HTTP request path is empty" if path.empty? @path = path + + if @response_has_body and Net::HTTP::HAVE_ZLIB then + if !initheader || + !initheader.keys.any? { |k| + %w[accept-encoding range].include? k.downcase + } then + initheader = initheader ? initheader.dup : {} + initheader["accept-encoding"] = + "gzip;q=1.0,deflate;q=0.6,identity;q=0.3" + end + end + initialize_http_header initheader self['Accept'] ||= '*/*' self['User-Agent'] ||= 'Ruby' diff --git a/lib/net/http/request.rb b/lib/net/http/request.rb index c5a6c102af..e8b0f48fcc 100644 --- a/lib/net/http/request.rb +++ b/lib/net/http/request.rb @@ -4,7 +4,12 @@ # subclasses: Net::HTTP::Get, Net::HTTP::Post, Net::HTTP::Head. # class Net::HTTPRequest < Net::HTTPGenericRequest - # Creates HTTP request object. + # Creates an HTTP request object for +path+. + # + # +initheader+ are the default headers to use. Net::HTTP adds + # Accept-Encoding to enable compression of the response body unless + # Accept-Encoding or Range are supplied in +initheader+. + def initialize(path, initheader = nil) super self.class::METHOD, self.class::REQUEST_HAS_BODY, diff --git a/lib/net/http/response.rb b/lib/net/http/response.rb index dde5ae308e..69c84bfe28 100644 --- a/lib/net/http/response.rb +++ b/lib/net/http/response.rb @@ -222,25 +222,70 @@ class Net::HTTPResponse private - def read_body_0(dest) - if chunked? - read_chunked dest - return - end - clen = content_length() - if clen - @socket.read clen, dest, true # ignore EOF - return + ## + # Checks for a supported Content-Encoding header and yields an Inflate + # wrapper for this response's socket when zlib is present. If the + # Content-Encoding is unsupported or zlib is missing the plain socket is + # yielded. + # + # If a Content-Range header is present a plain socket is yielded as the + # bytes in the range may not be a complete deflate block. + + def inflater # :nodoc: + return yield @socket unless Net::HTTP::HAVE_ZLIB + return yield @socket if self['content-range'] + + case self['content-encoding'] + when 'deflate', 'gzip', 'x-gzip' then + self.delete 'content-encoding' + + inflate_body_io = Inflater.new(@socket) + + begin + yield inflate_body_io + ensure + inflate_body_io.finish + end + when 'none', 'identity' then + self.delete 'content-encoding' + + yield @socket + else + yield @socket end - clen = range_length() - if clen - @socket.read clen, dest - return + end + + def read_body_0(dest) + inflater do |inflate_body_io| + if chunked? + read_chunked dest, inflate_body_io + return + end + + @socket = inflate_body_io + + clen = content_length() + if clen + @socket.read clen, dest, true # ignore EOF + return + end + clen = range_length() + if clen + @socket.read clen, dest + return + end + @socket.read_all dest end - @socket.read_all dest end - def read_chunked(dest) + ## + # read_chunked reads from +@socket+ for chunk-size, chunk-extension, CRLF, + # etc. and +chunk_data_io+ for chunk-data which may be deflate or gzip + # encoded. + # + # See RFC 2616 section 3.6.1 for definitions + + def read_chunked(dest, chunk_data_io) # :nodoc: len = nil total = 0 while true @@ -250,7 +295,7 @@ class Net::HTTPResponse len = hexlen.hex break if len == 0 begin - @socket.read len, dest + chunk_data_io.read len, dest ensure total += len @socket.read 2 # \r\n @@ -266,8 +311,8 @@ class Net::HTTPResponse end def procdest(dest, block) - raise ArgumentError, 'both arg and block given for HTTP method' \ - if dest and block + raise ArgumentError, 'both arg and block given for HTTP method' if + dest and block if block Net::ReadAdapter.new(block) else @@ -275,5 +320,71 @@ class Net::HTTPResponse end end + ## + # Inflater is a wrapper around Net::BufferedIO that transparently inflates + # zlib and gzip streams. + + class Inflater # :nodoc: + + ## + # Creates a new Inflater wrapping +socket+ + + def initialize socket + @socket = socket + # zlib with automatic gzip detection + @inflate = Zlib::Inflate.new(32 + Zlib::MAX_WBITS) + end + + ## + # Finishes the inflate stream. + + def finish + @inflate.finish + end + + ## + # Returns a Net::ReadAdapter that inflates each read chunk into +dest+. + # + # This allows a large response body to be inflated without storing the + # entire body in memory. + + def inflate_adapter(dest) + block = proc do |compressed_chunk| + @inflate.inflate(compressed_chunk) do |chunk| + dest << chunk + end + end + + Net::ReadAdapter.new(block) + end + + ## + # Reads +clen+ bytes from the socket, inflates them, then writes them to + # +dest+. +ignore_eof+ is passed down to Net::BufferedIO#read + # + # Unlike Net::BufferedIO#read, this method returns more than +clen+ bytes. + # At this time there is no way for a user of Net::HTTPResponse to read a + # specific number of bytes from the HTTP response body, so this internal + # API does not return the same number of bytes as were requested. + # + # See https://bugs.ruby-lang.org/issues/6492 for further discussion. + + def read clen, dest, ignore_eof = false + temp_dest = inflate_adapter(dest) + + data = @socket.read clen, temp_dest, ignore_eof + end + + ## + # Reads the rest of the socket, inflates it, then writes it to +dest+. + + def read_all dest + temp_dest = inflate_adapter(dest) + + @socket.read_all temp_dest + end + + end + end -- cgit v1.2.3