]> git.ipfire.org Git - thirdparty/tornado.git/commitdiff
httputil: Process the Host header more strictly 3487/head
authorBen Darnell <ben@bendarnell.com>
Fri, 25 Apr 2025 18:08:18 +0000 (14:08 -0400)
committerBen Darnell <ben@bendarnell.com>
Fri, 25 Apr 2025 18:08:18 +0000 (14:08 -0400)
- It is now an error to have multiple Host headers
- The Host header is now mandatory except in HTTP/1.0 mode
- Host headers containing characters that are disallowed by RFC 3986
  are now rejected

Fixes #3468

tornado/httputil.py
tornado/test/httpserver_test.py
tornado/test/web_test.py

index b2b934634d7cca2eaad7fb1d005fdf1601496dd0..0f443cddc4c58ccb75b1d6025fa24e17cbc67585 100644 (file)
@@ -81,6 +81,19 @@ class _ABNF:
     cannot be alphabetized as they are in the RFCs because of dependencies.
     """
 
+    # RFC 3986 (URI)
+    # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6
+    # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though
+    # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any
+    # position, not only for their use in IPv6 literals.
+    uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]")
+    uri_sub_delims = re.compile(r"[!$&'()*+,;=]")
+    uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}")
+    uri_host = re.compile(
+        rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*"
+    )
+    uri_port = re.compile(r"[0-9]*")
+
     # RFC 5234 (ABNF)
     VCHAR = re.compile(r"[\x21-\x7E]")
 
@@ -91,6 +104,7 @@ class _ABNF:
     token = re.compile(rf"{tchar.pattern}+")
     field_name = token
     method = token
+    host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?")
 
     # RFC 9112 (HTTP/1.1)
     HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]")
@@ -421,7 +435,7 @@ class HTTPServerRequest:
         version: str = "HTTP/1.0",
         headers: Optional[HTTPHeaders] = None,
         body: Optional[bytes] = None,
-        host: Optional[str] = None,
+        host: Optional[str] = None,
         files: Optional[Dict[str, List["HTTPFile"]]] = None,
         connection: Optional["HTTPConnection"] = None,
         start_line: Optional["RequestStartLine"] = None,
@@ -440,7 +454,35 @@ class HTTPServerRequest:
         self.remote_ip = getattr(context, "remote_ip", None)
         self.protocol = getattr(context, "protocol", "http")
 
-        self.host = host or self.headers.get("Host") or "127.0.0.1"
+        try:
+            self.host = self.headers["Host"]
+        except KeyError:
+            if version == "HTTP/1.0":
+                # HTTP/1.0 does not require the Host header.
+                self.host = "127.0.0.1"
+            else:
+                raise HTTPInputError("Missing Host header")
+        if not _ABNF.host.fullmatch(self.host):
+            print(_ABNF.host.pattern)
+            raise HTTPInputError("Invalid Host header: %r" % self.host)
+        if "," in self.host:
+            # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target
+            # Server MUST respond with 400 Bad Request if multiple
+            # Host headers are present.
+            #
+            # We test for the presence of a comma instead of the number of
+            # headers received because a proxy may have converted
+            # multiple headers into a single comma-separated value
+            # (per RFC 9110 section 5.3).
+            #
+            # This is technically a departure from the RFC since the ABNF
+            # does not forbid commas in the host header. However, since
+            # commas are not allowed in DNS names, it is appropriate to
+            # disallow them. (The same argument could be made for other special
+            # characters, but commas are the most problematic since they could
+            # be used to exploit differences between proxies when multiple headers
+            # are supplied).
+            raise HTTPInputError("Multiple host headers not allowed: %r" % self.host)
         self.host_name = split_host_and_port(self.host.lower())[0]
         self.files = files or {}
         self.connection = connection
index 69ff065c508d8a7c3befcc642fc092151fa0a1d4..570cb64ca63c4b36c3a9024afa1a6bdd78cae705 100644 (file)
@@ -267,6 +267,7 @@ class HTTPConnectionTest(AsyncHTTPTestCase):
             b"\r\n".join(
                 [
                     b"POST /hello HTTP/1.1",
+                    b"Host: 127.0.0.1",
                     b"Content-Length: 1024",
                     b"Expect: 100-continue",
                     b"Connection: close",
@@ -467,6 +468,7 @@ class HTTPServerRawTest(AsyncHTTPTestCase):
         self.stream.write(
             b"""\
 POST /echo HTTP/1.1
+Host: 127.0.0.1
 Transfer-Encoding: chunked
 Content-Type: application/x-www-form-urlencoded
 
@@ -491,6 +493,7 @@ bar
         self.stream.write(
             b"""\
 POST /echo HTTP/1.1
+Host: 127.0.0.1
 Transfer-Encoding: Chunked
 Content-Type: application/x-www-form-urlencoded
 
@@ -515,6 +518,7 @@ bar
         self.stream.write(
             b"""\
 POST /echo HTTP/1.1
+Host: 127.0.0.1
 Transfer-Encoding: chunked
 
 1_a
@@ -537,6 +541,7 @@ Transfer-Encoding: chunked
         self.stream.write(
             b"""\
 POST /echo HTTP/1.1
+Host: 127.0.0.1
 Transfer-Encoding: chunked
 Transfer-encoding: chunked
 
@@ -561,6 +566,7 @@ ok
         self.stream.write(
             b"""\
 POST /echo HTTP/1.1
+Host: 127.0.0.1
 Transfer-Encoding: gzip, chunked
 
 2
@@ -582,6 +588,7 @@ ok
         self.stream.write(
             b"""\
 POST /echo HTTP/1.1
+Host: 127.0.0.1
 Transfer-Encoding: chunked
 Content-Length: 2
 
@@ -624,6 +631,7 @@ ok
                             textwrap.dedent(
                                 f"""\
                             POST /echo HTTP/1.1
+                            Host: 127.0.0.1
                             Content-Length: {value}
                             Connection: close
 
@@ -659,7 +667,7 @@ ok
                 expect_log,
             ):
                 yield stream.connect(("127.0.0.1", self.get_http_port()))
-                stream.write(utf8(f"{method} /echo HTTP/1.1\r\n\r\n"))
+                stream.write(utf8(f"{method} /echo HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"))
                 resp = yield stream.read_until(b"\r\n\r\n")
                 self.assertTrue(
                     resp.startswith(b"HTTP/1.1 %d" % code),
@@ -968,16 +976,18 @@ class KeepAliveTest(AsyncHTTPTestCase):
     @gen_test
     def test_two_requests(self):
         yield self.connect()
-        self.stream.write(b"GET / HTTP/1.1\r\n\r\n")
+        self.stream.write(b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
         yield self.read_response()
-        self.stream.write(b"GET / HTTP/1.1\r\n\r\n")
+        self.stream.write(b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
         yield self.read_response()
         self.close()
 
     @gen_test
     def test_request_close(self):
         yield self.connect()
-        self.stream.write(b"GET / HTTP/1.1\r\nConnection: close\r\n\r\n")
+        self.stream.write(
+            b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\nConnection: close\r\n\r\n"
+        )
         yield self.read_response()
         data = yield self.stream.read_until_close()
         self.assertTrue(not data)
@@ -1023,7 +1033,9 @@ class KeepAliveTest(AsyncHTTPTestCase):
     @gen_test
     def test_pipelined_requests(self):
         yield self.connect()
-        self.stream.write(b"GET / HTTP/1.1\r\n\r\nGET / HTTP/1.1\r\n\r\n")
+        self.stream.write(
+            b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\nGET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"
+        )
         yield self.read_response()
         yield self.read_response()
         self.close()
@@ -1031,7 +1043,9 @@ class KeepAliveTest(AsyncHTTPTestCase):
     @gen_test
     def test_pipelined_cancel(self):
         yield self.connect()
-        self.stream.write(b"GET / HTTP/1.1\r\n\r\nGET / HTTP/1.1\r\n\r\n")
+        self.stream.write(
+            b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\nGET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n"
+        )
         # only read once
         yield self.read_response()
         self.close()
@@ -1039,7 +1053,7 @@ class KeepAliveTest(AsyncHTTPTestCase):
     @gen_test
     def test_cancel_during_download(self):
         yield self.connect()
-        self.stream.write(b"GET /large HTTP/1.1\r\n\r\n")
+        self.stream.write(b"GET /large HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
         yield self.read_headers()
         yield self.stream.read_bytes(1024)
         self.close()
@@ -1047,7 +1061,7 @@ class KeepAliveTest(AsyncHTTPTestCase):
     @gen_test
     def test_finish_while_closed(self):
         yield self.connect()
-        self.stream.write(b"GET /finish_on_close HTTP/1.1\r\n\r\n")
+        self.stream.write(b"GET /finish_on_close HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
         yield self.read_headers()
         self.close()
         # Let the hanging coroutine clean up after itself
@@ -1075,10 +1089,10 @@ class KeepAliveTest(AsyncHTTPTestCase):
     @gen_test
     def test_keepalive_chunked_head_no_body(self):
         yield self.connect()
-        self.stream.write(b"HEAD /chunked HTTP/1.1\r\n\r\n")
+        self.stream.write(b"HEAD /chunked HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
         yield self.read_headers()
 
-        self.stream.write(b"HEAD /chunked HTTP/1.1\r\n\r\n")
+        self.stream.write(b"HEAD /chunked HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")
         yield self.read_headers()
         self.close()
 
@@ -1337,7 +1351,7 @@ class IdleTimeoutTest(AsyncHTTPTestCase):
 
         # Use the connection twice to make sure keep-alives are working
         for i in range(2):
-            stream.write(b"GET / HTTP/1.1\r\n\r\n")
+            stream.write(b"GET / HTTP/1.1\r\nHost: 127.0.0.1\r\n\r\n")
             yield stream.read_until(b"\r\n\r\n")
             data = yield stream.read_bytes(11)
             self.assertEqual(data, b"Hello world")
@@ -1459,6 +1473,7 @@ class BodyLimitsTest(AsyncHTTPTestCase):
             # Use a raw stream so we can make sure it's all on one connection.
             stream.write(
                 b"PUT /streaming?expected_size=10240 HTTP/1.1\r\n"
+                b"Host: 127.0.0.1\r\n"
                 b"Content-Length: 10240\r\n\r\n"
             )
             stream.write(b"a" * 10240)
@@ -1466,7 +1481,9 @@ class BodyLimitsTest(AsyncHTTPTestCase):
             self.assertEqual(response, b"10240")
             # Without the ?expected_size parameter, we get the old default value
             stream.write(
-                b"PUT /streaming HTTP/1.1\r\n" b"Content-Length: 10240\r\n\r\n"
+                b"PUT /streaming HTTP/1.1\r\n"
+                b"Host: 127.0.0.1\r\n"
+                b"Content-Length: 10240\r\n\r\n"
             )
             with ExpectLog(gen_log, ".*Content-Length too long", level=logging.INFO):
                 data = yield stream.read_until_close()
index 6a04059dbcb870111e1077b1386fa85b2729b22f..a514e2a4ca97c8275bed704f551f0d51518f7718 100644 (file)
@@ -2362,7 +2362,7 @@ class StreamingRequestBodyTest(WebTestCase):
         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0)
         s.connect(("127.0.0.1", self.get_http_port()))
         stream = IOStream(s)
-        stream.write(b"GET " + url + b" HTTP/1.1\r\n")
+        stream.write(b"GET " + url + b" HTTP/1.1\r\nHost: 127.0.0.1\r\n")
         if connection_close:
             stream.write(b"Connection: close\r\n")
         stream.write(b"Transfer-Encoding: chunked\r\n\r\n")