From: Tom Christie Date: Wed, 1 May 2019 11:32:31 +0000 (+0100) Subject: Media types with 'text/' should default to iso-8859-1 X-Git-Tag: 0.3.0~52 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d7cf8bbf36dbfe72e06242cd3382646ec955a852;p=thirdparty%2Fhttpx.git Media types with 'text/' should default to iso-8859-1 --- diff --git a/httpcore/models.py b/httpcore/models.py index 33a9eda3..1bdd0fe1 100644 --- a/httpcore/models.py +++ b/httpcore/models.py @@ -68,18 +68,6 @@ class URL: def authority(self) -> str: return self.components.authority or "" - @property - def path(self) -> str: - return self.components.path or "/" - - @property - def query(self) -> str: - return self.components.query or "" - - @property - def fragment(self) -> str: - return self.components.fragment or "" - @property def host(self) -> str: return self.components.host or "" @@ -91,6 +79,14 @@ class URL: return {"https": 443, "http": 80}[self.scheme] return int(port) + @property + def path(self) -> str: + return self.components.path or "/" + + @property + def query(self) -> str: + return self.components.query or "" + @property def full_path(self) -> str: path = self.path @@ -98,6 +94,10 @@ class URL: path += "?" + self.query return path + @property + def fragment(self) -> str: + return self.components.fragment or "" + @property def is_ssl(self) -> bool: return self.components.scheme == "https" @@ -509,8 +509,13 @@ class Response: if content_type is None: return None - parsed = cgi.parse_header(content_type)[-1] - return parsed.get("charset") + # RFC 2616 specifies that 'iso-8859-1' should be used as the default + # for 'text/*' media types, if no charset is provided. + # See: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 + parsed = cgi.parse_header(content_type) + media_type, info = parsed[0], parsed[-1] + default = "iso-8859-1" if media_type.startswith("text/") else None + return info.get("charset", default) @property def apparent_encoding(self) -> typing.Optional[str]: diff --git a/tests/models/test_responses.py b/tests/models/test_responses.py index ce786563..bfe8c113 100644 --- a/tests/models/test_responses.py +++ b/tests/models/test_responses.py @@ -47,6 +47,19 @@ def test_response_fallback_to_autodetect(): assert response.encoding == "EUC-JP" +def test_response(): + """ + A media type of 'text/*' with no charset should default to ISO-8859-1. + See: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 + """ + content = b"Hello, world!" + headers = {"Content-Type": "text/plain"} + response = httpcore.Response(200, content=content, headers=headers) + assert response.status_code == 200 + assert response.encoding == "iso-8859-1" + assert response.text == "Hello, world!" + + def test_response_default_encoding(): """ Default to utf-8 if all else fails.