httputil: Only strip tabs and spaces from header values

author Ben Darnell <ben@bendarnell.com>

Wed, 5 Jun 2024 20:50:37 +0000 (16:50 -0400)

committer Ben Darnell <ben@bendarnell.com>

Thu, 6 Jun 2024 17:34:27 +0000 (13:34 -0400)
author Ben Darnell <ben@bendarnell.com>
Wed, 5 Jun 2024 20:50:37 +0000 (16:50 -0400)
committer Ben Darnell <ben@bendarnell.com>
Thu, 6 Jun 2024 17:34:27 +0000 (13:34 -0400)
diff --git a/tornado/httputil.py b/tornado/httputil.py

index b21d8046c429d254b05fefb6dc1134fc4db40def..9ce992d82b3ea9a2239a99ab4c4c8edc2b001ac1 100644 (file)
--- a/tornado/httputil.py
+++ b/tornado/httputil.py
@@ -62,6 +62,9 @@ if typing.TYPE_CHECKING:
      from asyncio import Future  # noqa: F401
      import unittest  # noqa: F401
  
+# To be used with str.strip() and related methods.
+HTTP_WHITESPACE = " \t"
+
  
  @lru_cache(1000)
  def _normalize_header(name: str) -> str:
@@ -171,7 +174,7 @@ class HTTPHeaders(collections.abc.MutableMapping):
              # continuation of a multi-line header
              if self._last_key is None:
                  raise HTTPInputError("first header line cannot start with whitespace")
-            new_part = " " + line.lstrip()
+            new_part = " " + line.lstrip(HTTP_WHITESPACE)
              self._as_list[self._last_key][-1] += new_part
              self._dict[self._last_key] += new_part
          else:
@@ -179,7 +182,7 @@ class HTTPHeaders(collections.abc.MutableMapping):
                  name, value = line.split(":", 1)
              except ValueError:
                  raise HTTPInputError("no colon in header line")
-            self.add(name, value.strip())
+            self.add(name, value.strip(HTTP_WHITESPACE))
  
      @classmethod
      def parse(cls, headers: str) -> "HTTPHeaders":
diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py

index aa9b6ee25380e14d0b4a822d23888fcdaa0ae552..6d618839e07f820d41b07aafdf330707cd0c35f2 100644 (file)
--- a/tornado/test/httputil_test.py
+++ b/tornado/test/httputil_test.py
@@ -334,6 +334,25 @@ Foo: even
                      gen_log.warning("failed while trying %r in %s", newline, encoding)
                      raise
  
+    def test_unicode_whitespace(self):
+        # Only tabs and spaces are to be stripped according to the HTTP standard.
+        # Other unicode whitespace is to be left as-is. In the context of headers,
+        # this specifically means the whitespace characters falling within the
+        # latin1 charset.
+        whitespace = [
+            (" ", True),  # SPACE
+            ("\t", True),  # TAB
+            ("\u00a0", False),  # NON-BREAKING SPACE
+            ("\u0085", False),  # NEXT LINE
+        ]
+        for c, stripped in whitespace:
+            headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c)
+            if stripped:
+                expected = [("Transfer-Encoding", "chunked")]
+            else:
+                expected = [("Transfer-Encoding", "%schunked" % c)]
+            self.assertEqual(expected, list(headers.get_all()))
+
      def test_optional_cr(self):
          # Both CRLF and LF should be accepted as separators. CR should not be
          # part of the data when followed by LF, but it is a normal char
author	Ben Darnell <ben@bendarnell.com>
	Wed, 5 Jun 2024 20:50:37 +0000 (16:50 -0400)
committer	Ben Darnell <ben@bendarnell.com>
	Thu, 6 Jun 2024 17:34:27 +0000 (13:34 -0400)
tornado/httputil.py		patch \| blob \| blame \| history
tornado/test/httputil_test.py		patch \| blob \| blame \| history