webrick/httpproxy: stream request and response bodies

Reading entire request or response bodies into memory can lead to trivial denial-of-service attacks. Introduce Fibers in both cases to allow streaming. WEBrick::HTTPRequest gains a new body_reader method to prepare itself as a source for IO.copy_stream. This allows the WEBrick::HTTPRequest object to be used as the Net::HTTPGenericRequest#body_stream= arg for Net::HTTP. For HTTP proxy response bodies, we also use a Fiber to to make the HTTP request and read the response body. * lib/webrick/httprequest.rb (body_reader): new method (readpartial): ditto * lib/webrick/httpproxy.rb (perform_proxy_request): use Fiber to stream response body (do_GET, do_HEAD): adjust call (do_POST): adjust call and supply body_reader * test/webrick/test_httprequest.rb (test_chunked): test for IO.copy_stream compatibility * test/webrick/test_httpproxy.rb (test_big_bodies): new test git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62966 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2018-03-28 08:06:55 +0000
committer: normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2018-03-28 08:06:55 +0000
commit: 706c028909df2f9526c1cde1c2baa6bc0b4d318a (patch)
tree: 0d949ef750c32079b9220f0c8264e945535b75ad /lib
parent: 32e277acbf35de454befc1573aff1063a55403cf (diff)
2 files changed, 69 insertions, 31 deletions
diff --git a/lib/webrick/httpproxy.rb b/lib/webrick/httpproxy.rb
index be5531fec0..d180ff4831 100644
--- a/lib/webrick/httpproxy.rb
+++ b/lib/webrick/httpproxy.rb
@@ -211,21 +211,15 @@ module WEBrick
     end
 
     def do_GET(req, res)
-      perform_proxy_request(req, res) do |http, path, header|
-        http.get(path, header)
-      end
+      perform_proxy_request(req, res, Net::HTTP::Get)
     end
 
     def do_HEAD(req, res)
-      perform_proxy_request(req, res) do |http, path, header|
-        http.head(path, header)
-      end
+      perform_proxy_request(req, res, Net::HTTP::Head)
     end
 
     def do_POST(req, res)
-      perform_proxy_request(req, res) do |http, path, header|
-        http.post(path, req.body || "", header)
-      end
+      perform_proxy_request(req, res, Net::HTTP::Post, req.body_reader)
     end
 
     def do_OPTIONS(req, res)
@@ -301,38 +295,56 @@ module WEBrick
       return FakeProxyURI
     end
 
-    def perform_proxy_request(req, res)
+    def perform_proxy_request(req, res, req_class, body_stream = nil)
       uri = req.request_uri
       path = uri.path.dup
       path << "?" << uri.query if uri.query
       header = setup_proxy_header(req, res)
       upstream = setup_upstream_proxy_authentication(req, res, header)
-      response = nil
 
+      body_tmp = []
       http = Net::HTTP.new(uri.host, uri.port, upstream.host, upstream.port)
-      http.start do
-        if @config[:ProxyTimeout]
-          ##################################   these issues are
-          http.open_timeout = 30   # secs  #   necessary (maybe because
-          http.read_timeout = 60   # secs  #   Ruby's bug, but why?)
-          ##################################
+      req_fib = Fiber.new do
+        http.start do
+          if @config[:ProxyTimeout]
+            ##################################   these issues are
+            http.open_timeout = 30   # secs  #   necessary (maybe because
+            http.read_timeout = 60   # secs  #   Ruby's bug, but why?)
+            ##################################
+          end
+          if body_stream && req['transfer-encoding'] =~ /\bchunked\b/i
+            header['Transfer-Encoding'] = 'chunked'
+          end
+          http_req = req_class.new(path, header)
+          http_req.body_stream = body_stream if body_stream
+          http.request(http_req) do |response|
+            # Persistent connection requirements are mysterious for me.
+            # So I will close the connection in every response.
+            res['proxy-connection'] = "close"
+            res['connection'] = "close"
+
+            # stream Net::HTTP::HTTPResponse to WEBrick::HTTPResponse
+            res.status = response.code.to_i
+            res.chunked = response.chunked?
+            choose_header(response, res)
+            set_cookie(response, res)
+            set_via(res)
+            response.read_body do |buf|
+              body_tmp << buf
+              Fiber.yield # wait for res.body Proc#call
+            end
+          end # http.request
+        end
+      end
+      req_fib.resume # read HTTP response headers and first chunk of the body
+      res.body = ->(socket) do
+        while buf = body_tmp.shift
+          socket.write(buf)
+          buf.clear
+          req_fib.resume # continue response.read_body
         end
-        response = yield(http, path, header)
       end
-
-      # Persistent connection requirements are mysterious for me.
-      # So I will close the connection in every response.
-      res['proxy-connection'] = "close"
-      res['connection'] = "close"
-
-      # Convert Net::HTTP::HTTPResponse to WEBrick::HTTPResponse
-      res.status = response.code.to_i
-      choose_header(response, res)
-      set_cookie(response, res)
-      set_via(res)
-      res.body = response.body
     end
-
     # :stopdoc:
   end
 end
diff --git a/lib/webrick/httprequest.rb b/lib/webrick/httprequest.rb
index b40bcb0d57..c40f7c16e4 100644
--- a/lib/webrick/httprequest.rb
+++ b/lib/webrick/httprequest.rb
@@ -258,6 +258,32 @@ module WEBrick
     end
 
     ##
+    # Prepares the HTTPRequest object for use as the
+    # source for IO.copy_stream
+
+    def body_reader
+      @body_tmp = []
+      @body_rd = Fiber.new do
+        body do |buf|
+          @body_tmp << buf
+          Fiber.yield
+        end
+      end
+      @body_rd.resume # grab the first chunk and yield
+      self
+    end
+
+    # for IO.copy_stream.  Note: we may return a larger string than +size+
+    # here; but IO.copy_stream does not care.
+    def readpartial(size, buf = ''.b) # :nodoc
+      res = @body_tmp.shift or raise EOFError, 'end of file reached'
+      buf.replace(res)
+      res.clear
+      @body_rd.resume # get more chunks
+      buf
+    end
+
+    ##
     # Request query as a Hash
 
     def query
author	normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2018-03-28 08:06:55 +0000
committer	normal <normal@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2018-03-28 08:06:55 +0000
commit	706c028909df2f9526c1cde1c2baa6bc0b4d318a (patch)
tree	0d949ef750c32079b9220f0c8264e945535b75ad /lib
parent	32e277acbf35de454befc1573aff1063a55403cf (diff)