From: Ben Darnell Date: Wed, 19 Feb 2025 19:06:22 +0000 (-0500) Subject: httputil: Improve handling of trailing whitespace in headers X-Git-Tag: v6.5.0b1~10^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F3477%2Fhead;p=thirdparty%2Ftornado.git httputil: Improve handling of trailing whitespace in headers HTTPHeaders had undocumented assumptions about trailing whitespace, leading to an unintentional regression in Tornado 6.4.1 in which passing the arguments of an AsyncHTTPClient header_callback to HTTPHeaders.parse_line would result in errors. This commit moves newline parsing from parse to parse_line. It also strips trailing whitespace from continuation lines (trailing whitespace is not allowed in HTTP headers, but we didn't reject it in continuation lines). This commit also deprecates continuation lines and the legacy handling of LF without CR. Fixes #3321 --- diff --git a/tornado/httputil.py b/tornado/httputil.py index 2f1a9ba8..b2b93463 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -207,18 +207,39 @@ class HTTPHeaders(StrMutableMapping): yield (name, value) def parse_line(self, line: str) -> None: - """Updates the dictionary with a single header line. + r"""Updates the dictionary with a single header line. >>> h = HTTPHeaders() >>> h.parse_line("Content-Type: text/html") >>> h.get('content-type') 'text/html' + >>> h.parse_line("Content-Length: 42\r\n") + >>> h.get('content-type') + 'text/html' + + .. versionchanged:: 6.5 + Now supports lines with or without the trailing CRLF, making it possible + to pass lines from AsyncHTTPClient's header_callback directly to this method. + + .. deprecated:: 6.5 + In Tornado 7.0, certain deprecated features of HTTP will become errors. + Specifically, line folding and the use of LF (with CR) as a line separator + will be removed. """ + if m := re.search(r"\r?\n$", line): + # RFC 9112 section 2.2: a recipient MAY recognize a single LF as a line + # terminator and ignore any preceding CR. + # TODO(7.0): Remove this support for LF-only line endings. + line = line[: m.start()] + if not line: + # Empty line, or the final CRLF of a header block. + return if line[0].isspace(): # continuation of a multi-line header + # TODO(7.0): Remove support for line folding. if self._last_key is None: raise HTTPInputError("first header line cannot start with whitespace") - new_part = " " + line.lstrip(HTTP_WHITESPACE) + new_part = " " + line.strip(HTTP_WHITESPACE) self._as_list[self._last_key][-1] += new_part self._dict[self._last_key] += new_part else: @@ -243,13 +264,16 @@ class HTTPHeaders(StrMutableMapping): """ h = cls() - # RFC 7230 section 3.5: a recipient MAY recognize a single LF as a line - # terminator and ignore any preceding CR. - for line in headers.split("\n"): - if line.endswith("\r"): - line = line[:-1] - if line: - h.parse_line(line) + + start = 0 + while True: + lf = headers.find("\n", start) + if lf == -1: + h.parse_line(headers[start:]) + break + line = headers[start : lf + 1] + start = lf + 1 + h.parse_line(line) return h # MutableMapping abstract method implementations. diff --git a/tornado/test/httpclient_test.py b/tornado/test/httpclient_test.py index f6e7dbbe..08c809a2 100644 --- a/tornado/test/httpclient_test.py +++ b/tornado/test/httpclient_test.py @@ -482,6 +482,23 @@ Transfer-Encoding: chunked self.assertRegex(first_line[0], "HTTP/[0-9]\\.[0-9] 200.*\r\n") self.assertEqual(chunks, [b"asdf", b"qwer"]) + def test_header_callback_to_parse_line(self): + # Make a request with header_callback and feed the headers to HTTPHeaders.parse_line. + # (Instead of HTTPHeaders.parse which is used in normal cases). Ensure that the resulting + # headers are as expected, and in particular do not have trailing whitespace added + # due to the final CRLF line. + headers = HTTPHeaders() + + def header_callback(line): + if line.startswith("HTTP/"): + # Ignore the first status line + return + headers.parse_line(line) + + self.fetch("/hello", header_callback=header_callback) + for k, v in headers.get_all(): + self.assertTrue(v == v.strip(), (k, v)) + @gen_test def test_configure_defaults(self): defaults = dict(user_agent="TestDefaultUserAgent", allow_ipv6=False)