From: Ben Darnell Date: Wed, 5 Jun 2024 20:50:37 +0000 (-0400) Subject: httputil: Only strip tabs and spaces from header values X-Git-Tag: v6.4.1~1^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8d721a877dd5c2bc0693d9c4d3954eb11fbd404b;p=thirdparty%2Ftornado.git httputil: Only strip tabs and spaces from header values The RFC specifies that only tabs and spaces should be stripped. Removing additonal whitespace characters can lead to framing errors with certain proxies. --- diff --git a/tornado/httputil.py b/tornado/httputil.py index b21d8046..9ce992d8 100644 --- a/tornado/httputil.py +++ b/tornado/httputil.py @@ -62,6 +62,9 @@ if typing.TYPE_CHECKING: from asyncio import Future # noqa: F401 import unittest # noqa: F401 +# To be used with str.strip() and related methods. +HTTP_WHITESPACE = " \t" + @lru_cache(1000) def _normalize_header(name: str) -> str: @@ -171,7 +174,7 @@ class HTTPHeaders(collections.abc.MutableMapping): # continuation of a multi-line header if self._last_key is None: raise HTTPInputError("first header line cannot start with whitespace") - new_part = " " + line.lstrip() + new_part = " " + line.lstrip(HTTP_WHITESPACE) self._as_list[self._last_key][-1] += new_part self._dict[self._last_key] += new_part else: @@ -179,7 +182,7 @@ class HTTPHeaders(collections.abc.MutableMapping): name, value = line.split(":", 1) except ValueError: raise HTTPInputError("no colon in header line") - self.add(name, value.strip()) + self.add(name, value.strip(HTTP_WHITESPACE)) @classmethod def parse(cls, headers: str) -> "HTTPHeaders": diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py index aa9b6ee2..6d618839 100644 --- a/tornado/test/httputil_test.py +++ b/tornado/test/httputil_test.py @@ -334,6 +334,25 @@ Foo: even gen_log.warning("failed while trying %r in %s", newline, encoding) raise + def test_unicode_whitespace(self): + # Only tabs and spaces are to be stripped according to the HTTP standard. + # Other unicode whitespace is to be left as-is. In the context of headers, + # this specifically means the whitespace characters falling within the + # latin1 charset. + whitespace = [ + (" ", True), # SPACE + ("\t", True), # TAB + ("\u00a0", False), # NON-BREAKING SPACE + ("\u0085", False), # NEXT LINE + ] + for c, stripped in whitespace: + headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c) + if stripped: + expected = [("Transfer-Encoding", "chunked")] + else: + expected = [("Transfer-Encoding", "%schunked" % c)] + self.assertEqual(expected, list(headers.get_all())) + def test_optional_cr(self): # Both CRLF and LF should be accepted as separators. CR should not be # part of the data when followed by LF, but it is a normal char