from asyncio import Future # noqa: F401
import unittest # noqa: F401
+# To be used with str.strip() and related methods.
+HTTP_WHITESPACE = " \t"
+
@lru_cache(1000)
def _normalize_header(name: str) -> str:
# continuation of a multi-line header
if self._last_key is None:
raise HTTPInputError("first header line cannot start with whitespace")
- new_part = " " + line.lstrip()
+ new_part = " " + line.lstrip(HTTP_WHITESPACE)
self._as_list[self._last_key][-1] += new_part
self._dict[self._last_key] += new_part
else:
name, value = line.split(":", 1)
except ValueError:
raise HTTPInputError("no colon in header line")
- self.add(name, value.strip())
+ self.add(name, value.strip(HTTP_WHITESPACE))
@classmethod
def parse(cls, headers: str) -> "HTTPHeaders":
gen_log.warning("failed while trying %r in %s", newline, encoding)
raise
+ def test_unicode_whitespace(self):
+ # Only tabs and spaces are to be stripped according to the HTTP standard.
+ # Other unicode whitespace is to be left as-is. In the context of headers,
+ # this specifically means the whitespace characters falling within the
+ # latin1 charset.
+ whitespace = [
+ (" ", True), # SPACE
+ ("\t", True), # TAB
+ ("\u00a0", False), # NON-BREAKING SPACE
+ ("\u0085", False), # NEXT LINE
+ ]
+ for c, stripped in whitespace:
+ headers = HTTPHeaders.parse("Transfer-Encoding: %schunked" % c)
+ if stripped:
+ expected = [("Transfer-Encoding", "chunked")]
+ else:
+ expected = [("Transfer-Encoding", "%schunked" % c)]
+ self.assertEqual(expected, list(headers.get_all()))
+
def test_optional_cr(self):
# Both CRLF and LF should be accepted as separators. CR should not be
# part of the data when followed by LF, but it is a normal char