From 4ce700affdd23631a0514d1a0460c0854b0687fe Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Fri, 25 Apr 2025 14:08:18 -0400 Subject: [PATCH] httputil: Process the Host header more strictly - It is now an error to have multiple Host headers - The Host header is now mandatory except in HTTP/1.0 mode - Host headers containing characters that are disallowed by RFC 3986 are now rejected Fixes #3468 --- tornado/httputil.py | 46 +++++++++++++++++++++++++++++++-- tornado/test/httpserver_test.py | 41 ++++++++++++++++++++--------- tornado/test/web_test.py | 2 +- 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/tornado/httputil.py b/tornado/httputil.py index b2b93463..0f443cdd 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -81,6 +81,19 @@ class _ABNF: cannot be alphabetized as they are in the RFCs because of dependencies. """ + # RFC 3986 (URI) + # The URI hostname ABNF is both complex (including detailed vaildation of IPv4 and IPv6 + # literals) and not strict enough (a lot of punctuation is allowed by the ABNF even though + # it is not allowed by DNS). We simplify it by allowing square brackets and colons in any + # position, not only for their use in IPv6 literals. + uri_unreserved = re.compile(r"[A-Za-z0-9\-._~]") + uri_sub_delims = re.compile(r"[!$&'()*+,;=]") + uri_pct_encoded = re.compile(r"%[0-9A-Fa-f]{2}") + uri_host = re.compile( + rf"(?:[\[\]:]|{uri_unreserved.pattern}|{uri_sub_delims.pattern}|{uri_pct_encoded.pattern})*" + ) + uri_port = re.compile(r"[0-9]*") + # RFC 5234 (ABNF) VCHAR = re.compile(r"[\x21-\x7E]") @@ -91,6 +104,7 @@ class _ABNF: token = re.compile(rf"{tchar.pattern}+") field_name = token method = token + host = re.compile(rf"(?:{uri_host.pattern})(?::{uri_port.pattern})?") # RFC 9112 (HTTP/1.1) HTTP_version = re.compile(r"HTTP/[0-9]\.[0-9]") @@ -421,7 +435,7 @@ class HTTPServerRequest: version: str = "HTTP/1.0", headers: Optional[HTTPHeaders] = None, body: Optional[bytes] = None, - host: Optional[str] = None, + # host: Optional[str] = None, files: Optional[Dict[str, List["HTTPFile"]]] = None, connection: Optional["HTTPConnection"] = None, start_line: Optional["RequestStartLine"] = None, @@ -440,7 +454,35 @@ class HTTPServerRequest: self.remote_ip = getattr(context, "remote_ip", None) self.protocol = getattr(context, "protocol", "http") - self.host = host or self.headers.get("Host") or "127.0.0.1" + try: + self.host = self.headers["Host"] + except KeyError: + if version == "HTTP/1.0": + # HTTP/1.0 does not require the Host header. + self.host = "127.0.0.1" + else: + raise HTTPInputError("Missing Host header") + if not _ABNF.host.fullmatch(self.host): + print(_ABNF.host.pattern) + raise HTTPInputError("Invalid Host header: %r" % self.host) + if "," in self.host: + # https://www.rfc-editor.org/rfc/rfc9112.html#name-request-target + # Server MUST respond with 400 Bad Request if multiple + # Host headers are present. + # + # We test for the presence of a comma instead of the number of + # headers received because a proxy may have converted + # multiple headers into a single comma-separated value + # (per RFC 9110 section 5.3). + # + # This is technically a departure from the RFC since the ABNF + # does not forbid commas in the host header. However, since + # commas are not allowed in DNS names, it is appropriate to + # disallow them. (The same argument could be made for other special + # characters, but commas are the most problematic since they could + # be used to exploit differences between proxies when multiple headers + # are supplied). + raise HTTPInputError("Multiple host headers not allowed: %r" % self.host) self.host_name = split_host_and_port(self.host.lower())[0] self.files = files or {} self.connection = connection diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index 69ff065c..570cb64c 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -267,6 +267,7 @@ class HTTPConnectionTest(AsyncHTTPTestCase): b"\r\n".join( [ b"POST /hello HTTP/1.1", + b"Host: 127.0.0.1", b"Content-Length: 1024", b"Expect: 100-continue", b"Connection: close", @@ -467,6 +468,7 @@ class HTTPServerRawTest(AsyncHTTPTestCase): self.stream.write( b"""\ POST /echo HTTP/1.1 +Host: 127.0.0.1 Transfer-Encoding: chunked Content-Type: application/x-www-form-urlencoded @@ -491,6 +493,7 @@ bar self.stream.write( b"""\ POST /echo HTTP/1.1 +Host: 127.0.0.1 Transfer-Encoding: Chunked Content-Type: application/x-www-form-urlencoded @@ -515,6 +518,7 @@ bar self.stream.write( b"""\ POST /echo HTTP/1.1 +Host: 127.0.0.1 Transfer-Encoding: chunked 1_a @@ -537,6 +541,7 @@ Transfer-Encoding: chunked self.stream.write( b"""\ POST /echo HTTP/1.1 +Host: 127.0.0.1 Transfer-Encoding: chunked Transfer-encoding: chunked @@ -561,6 +566,7 @@ ok self.stream.write( b"""\ POST /echo HTTP/1.1 +Host: 127.0.0.1 Transfer-Encoding: gzip, chunked 2 @@ -582,6 +588,7 @@ ok self.stream.write( b"""\ POST /echo HTTP/1.1 +Host: 127.0.0.1 Transfer-Encoding: chunked Content-Length: 2 @@ -624,6 +631,7 @@ ok textwrap.dedent( f"""\ POST /echo HTTP/1.1 + Host: 127.0.0.1 Content-Length: {value} Connection: close @@ -659,7 +667,7 @@ ok expect_log, ): yield stream.connect(("127.0.0.1", self.get_http_port())) - stream.write(utf8(f"{method} /echo HTTP/1.1\r\n\r\n")) + stream.write(utf8(f"{method} /echo HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n")) resp = yield stream.read_until(b"\r\n\r\n") self.assertTrue( resp.startswith(b"HTTP/1.1 %d" % code), @@ -968,16 +976,18 @@ class KeepAliveTest(AsyncHTTPTestCase): @gen_test def test_two_requests(self): yield self.connect() - self.stream.write(b"GET / HTTP/1.1\r\n\r\n") + self.stream.write(b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n") yield self.read_response() - self.stream.write(b"GET / HTTP/1.1\r\n\r\n") + self.stream.write(b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n") yield self.read_response() self.close() @gen_test def test_request_close(self): yield self.connect() - self.stream.write(b"GET / HTTP/1.1\r\nConnection: close\r\n\r\n") + self.stream.write( + b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\nConnection: close\r\n\r\n" + ) yield self.read_response() data = yield self.stream.read_until_close() self.assertTrue(not data) @@ -1023,7 +1033,9 @@ class KeepAliveTest(AsyncHTTPTestCase): @gen_test def test_pipelined_requests(self): yield self.connect() - self.stream.write(b"GET / HTTP/1.1\r\n\r\nGET / HTTP/1.1\r\n\r\n") + self.stream.write( + b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\nGET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n" + ) yield self.read_response() yield self.read_response() self.close() @@ -1031,7 +1043,9 @@ class KeepAliveTest(AsyncHTTPTestCase): @gen_test def test_pipelined_cancel(self): yield self.connect() - self.stream.write(b"GET / HTTP/1.1\r\n\r\nGET / HTTP/1.1\r\n\r\n") + self.stream.write( + b"GET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\nGET / HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n" + ) # only read once yield self.read_response() self.close() @@ -1039,7 +1053,7 @@ class KeepAliveTest(AsyncHTTPTestCase): @gen_test def test_cancel_during_download(self): yield self.connect() - self.stream.write(b"GET /large HTTP/1.1\r\n\r\n") + self.stream.write(b"GET /large HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n") yield self.read_headers() yield self.stream.read_bytes(1024) self.close() @@ -1047,7 +1061,7 @@ class KeepAliveTest(AsyncHTTPTestCase): @gen_test def test_finish_while_closed(self): yield self.connect() - self.stream.write(b"GET /finish_on_close HTTP/1.1\r\n\r\n") + self.stream.write(b"GET /finish_on_close HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n") yield self.read_headers() self.close() # Let the hanging coroutine clean up after itself @@ -1075,10 +1089,10 @@ class KeepAliveTest(AsyncHTTPTestCase): @gen_test def test_keepalive_chunked_head_no_body(self): yield self.connect() - self.stream.write(b"HEAD /chunked HTTP/1.1\r\n\r\n") + self.stream.write(b"HEAD /chunked HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n") yield self.read_headers() - self.stream.write(b"HEAD /chunked HTTP/1.1\r\n\r\n") + self.stream.write(b"HEAD /chunked HTTP/1.1\r\nHost:127.0.0.1\r\n\r\n") yield self.read_headers() self.close() @@ -1337,7 +1351,7 @@ class IdleTimeoutTest(AsyncHTTPTestCase): # Use the connection twice to make sure keep-alives are working for i in range(2): - stream.write(b"GET / HTTP/1.1\r\n\r\n") + stream.write(b"GET / HTTP/1.1\r\nHost: 127.0.0.1\r\n\r\n") yield stream.read_until(b"\r\n\r\n") data = yield stream.read_bytes(11) self.assertEqual(data, b"Hello world") @@ -1459,6 +1473,7 @@ class BodyLimitsTest(AsyncHTTPTestCase): # Use a raw stream so we can make sure it's all on one connection. stream.write( b"PUT /streaming?expected_size=10240 HTTP/1.1\r\n" + b"Host: 127.0.0.1\r\n" b"Content-Length: 10240\r\n\r\n" ) stream.write(b"a" * 10240) @@ -1466,7 +1481,9 @@ class BodyLimitsTest(AsyncHTTPTestCase): self.assertEqual(response, b"10240") # Without the ?expected_size parameter, we get the old default value stream.write( - b"PUT /streaming HTTP/1.1\r\n" b"Content-Length: 10240\r\n\r\n" + b"PUT /streaming HTTP/1.1\r\n" + b"Host: 127.0.0.1\r\n" + b"Content-Length: 10240\r\n\r\n" ) with ExpectLog(gen_log, ".*Content-Length too long", level=logging.INFO): data = yield stream.read_until_close() diff --git a/tornado/test/web_test.py b/tornado/test/web_test.py index 6a04059d..a514e2a4 100644 --- a/tornado/test/web_test.py +++ b/tornado/test/web_test.py @@ -2362,7 +2362,7 @@ class StreamingRequestBodyTest(WebTestCase): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) s.connect(("127.0.0.1", self.get_http_port())) stream = IOStream(s) - stream.write(b"GET " + url + b" HTTP/1.1\r\n") + stream.write(b"GET " + url + b" HTTP/1.1\r\nHost: 127.0.0.1\r\n") if connection_close: stream.write(b"Connection: close\r\n") stream.write(b"Transfer-Encoding: chunked\r\n\r\n") -- 2.47.2