bpo-44022: Fix http client infinite line reading (DoS) after a HTTP 100 Continue...

author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>

Thu, 6 May 2021 08:52:26 +0000 (01:52 -0700)

committer GitHub <noreply@github.com>

Thu, 6 May 2021 08:52:26 +0000 (10:52 +0200)
author Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Thu, 6 May 2021 08:52:26 +0000 (01:52 -0700)
committer GitHub <noreply@github.com>
Thu, 6 May 2021 08:52:26 +0000 (10:52 +0200)
diff --git a/Lib/http/client.py b/Lib/http/client.py

index 20a25de855625f950e50e2f0c7f12e3cfd510013..a3c4d978b1d1c13f8df500419fa9324bc7b4405a 100644 (file)
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -201,15 +201,11 @@ class HTTPMessage(email.message.Message):
                  lst.append(line)
          return lst
  
-def parse_headers(fp, _class=HTTPMessage):
-    """Parses only RFC2822 headers from a file pointer.
-
-    email Parser wants to see strings rather than bytes.
-    But a TextIOWrapper around self.rfile would buffer too many bytes
-    from the stream, bytes which we later need to read as bytes.
-    So we read the correct bytes here, as bytes, for email Parser
-    to parse.
+def _read_headers(fp):
+    """Reads potential header lines into a list from a file pointer.
  
+    Length of line is limited by _MAXLINE, and number of
+    headers is limited by _MAXHEADERS.
      """
      headers = []
      while True:
@@ -221,6 +217,19 @@ def parse_headers(fp, _class=HTTPMessage):
              raise HTTPException("got more than %d headers" % _MAXHEADERS)
          if line in (b'\r\n', b'\n', b''):
              break
+    return headers
+
+def parse_headers(fp, _class=HTTPMessage):
+    """Parses only RFC2822 headers from a file pointer.
+
+    email Parser wants to see strings rather than bytes.
+    But a TextIOWrapper around self.rfile would buffer too many bytes
+    from the stream, bytes which we later need to read as bytes.
+    So we read the correct bytes here, as bytes, for email Parser
+    to parse.
+
+    """
+    headers = _read_headers(fp)
      hstring = b''.join(headers).decode('iso-8859-1')
      return email.parser.Parser(_class=_class).parsestr(hstring)
  
@@ -308,15 +317,10 @@ class HTTPResponse(io.BufferedIOBase):
              if status != CONTINUE:
                  break
              # skip the header from the 100 response
-            while True:
-                skip = self.fp.readline(_MAXLINE + 1)
-                if len(skip) > _MAXLINE:
-                    raise LineTooLong("header line")
-                skip = skip.strip()
-                if not skip:
-                    break
-                if self.debuglevel > 0:
-                    print("header:", skip)
+            skipped_headers = _read_headers(self.fp)
+            if self.debuglevel > 0:
+                print("headers:", skipped_headers)
+            del skipped_headers
  
          self.code = self.status = status
          self.reason = reason.strip()
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py

index 3e423fd4e80bc2af818a87094a482c0ff1e0cb88..862a0970315d8a151f93d4e141d8d48f9bb475f9 100644 (file)
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -1003,6 +1003,14 @@ class BasicTest(TestCase):
          resp = client.HTTPResponse(FakeSocket(body))
          self.assertRaises(client.LineTooLong, resp.begin)
  
+    def test_overflowing_header_limit_after_100(self):
+        body = (
+            'HTTP/1.1 100 OK\r\n'
+            'r\n' * 32768
+        )
+        resp = client.HTTPResponse(FakeSocket(body))
+        self.assertRaises(client.HTTPException, resp.begin)
+
      def test_overflowing_chunked_line(self):
          body = (
              'HTTP/1.1 200 OK\r\n'
@@ -1404,7 +1412,7 @@ class Readliner:
  class OfflineTest(TestCase):
      def test_all(self):
          # Documented objects defined in the module should be in __all__
-        expected = {"responses"}  # White-list documented dict() object
+        expected = {"responses"}  # Allowlist documented dict() object
          # HTTPMessage, parse_headers(), and the HTTP status code constants are
          # intentionally omitted for simplicity
          blacklist = {"HTTPMessage", "parse_headers"}
diff --git a/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst

new file mode 100644 (file)

index 0000000..cf6b63e
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst
@@ -0,0 +1,2 @@
+mod:`http.client` now avoids infinitely reading potential HTTP headers after a
+``100 Continue`` status response from the server.
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
	Thu, 6 May 2021 08:52:26 +0000 (01:52 -0700)
committer	GitHub <noreply@github.com>
	Thu, 6 May 2021 08:52:26 +0000 (10:52 +0200)
Lib/http/client.py		patch \| blob \| blame \| history
Lib/test/test_httplib.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst	[new file with mode: 0644]	patch \| blob