From: Ben Darnell <ben@bendarnell.com>
Date: Wed, 5 Jun 2024 20:50:37 +0000 (-0400)
Subject: httputil: Only strip tabs and spaces from header values
X-Git-Tag: v6.4.1~1^2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8d721a877dd5c2bc0693d9c4d3954eb11fbd404b;p=thirdparty%2Ftornado.git

httputil: Only strip tabs and spaces from header values

The RFC specifies that only tabs and spaces should be stripped.
Removing additonal whitespace characters can lead to framing
errors with certain proxies.
---

diff --git a/tornado/httputil.py b/tornado/httputil.py
index b21d8046..9ce992d8 100644
--- a/tornado/httputil.py
+++ b/tornado/httputil.py
@@ -62,6 +62,9 @@ if typing.TYPE_CHECKING:
     from asyncio import Future  # noqa: F401
     import unittest  # noqa: F401
 
+# To be used with str.strip() and related methods.
+HTTP_WHITESPACE = " \t"
+
 
 @lru_cache(1000)
 def _normalize_header(name: str) -> str:
@@ -171,7 +174,7 @@ class HTTPHeaders(collections.abc.MutableMapping):
             # continuation of a multi-line header
             if self._last_key is None:
                 raise HTTPInputError("first header line cannot start with whitespace")
-            new_part = " " + line.lstrip()
+            new_part = " " + line.lstrip(HTTP_WHITESPACE)
             self._as_list[self._last_key][-1] += new_part
             self._dict[self._last_key] += new_part
         else:
@@ -179,7 +182,7 @@ class HTTPHeaders(collections.abc.MutableMapping):
                 name, value = line.split(":", 1)
             except ValueError:
                 raise HTTPInputError("no colon in header line")
-            self.add(name, value.strip())
+            self.add(name, value.strip(HTTP_WHITESPACE))
 
     @classmethod
     def parse(cls, headers: str) -> "HTTPHeaders":
diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py
index aa9b6ee2..6d618839 100644
--- a/tornado/test/httputil_test.py
+++ b/tornado/test/httputil_test.py
@@ -334,6 +334,25 @@ Foo: even
                     gen_log.warning("failed while trying %r in %s", newline, encoding)
                     raise
 
+    def test_unicode_whitespace(self):
+        # Only tabs and spaces are to be stripped according to the HTTP standard.
+        # Other unicode whitespace is to be left as-is. In the context of headers,
+        # this specifically means the whitespace characters falling within the
+        # latin1 charset.
+        whitespace = [
+            (" ", True),  # SPACE
+            ("\t", True),  # TAB
+            ("\u00a0", False),  # NON-BREAKING SPACE
+            ("\u0085", False),  # NEXT LINE
+        ]
+        for c, stripped in whitespace:
+            headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c)
+            if stripped:
+                expected = [("Transfer-Encoding", "chunked")]
+            else:
+                expected = [("Transfer-Encoding", "%schunked" % c)]
+            self.assertEqual(expected, list(headers.get_all()))
+
     def test_optional_cr(self):
         # Both CRLF and LF should be accepted as separators. CR should not be
         # part of the data when followed by LF, but it is a normal char