]> git.ipfire.org Git - thirdparty/httpx.git/commitdiff
Media types with 'text/' should default to iso-8859-1
authorTom Christie <tom@tomchristie.com>
Wed, 1 May 2019 11:32:31 +0000 (12:32 +0100)
committerTom Christie <tom@tomchristie.com>
Wed, 1 May 2019 11:32:31 +0000 (12:32 +0100)
httpcore/models.py
tests/models/test_responses.py

index 33a9eda3cbd47c2ccc35f07a8c9b06905b77e6b8..1bdd0fe1a7505b10835ecb817868e76f9785d98c 100644 (file)
@@ -68,18 +68,6 @@ class URL:
     def authority(self) -> str:
         return self.components.authority or ""
 
-    @property
-    def path(self) -> str:
-        return self.components.path or "/"
-
-    @property
-    def query(self) -> str:
-        return self.components.query or ""
-
-    @property
-    def fragment(self) -> str:
-        return self.components.fragment or ""
-
     @property
     def host(self) -> str:
         return self.components.host or ""
@@ -91,6 +79,14 @@ class URL:
             return {"https": 443, "http": 80}[self.scheme]
         return int(port)
 
+    @property
+    def path(self) -> str:
+        return self.components.path or "/"
+
+    @property
+    def query(self) -> str:
+        return self.components.query or ""
+
     @property
     def full_path(self) -> str:
         path = self.path
@@ -98,6 +94,10 @@ class URL:
             path += "?" + self.query
         return path
 
+    @property
+    def fragment(self) -> str:
+        return self.components.fragment or ""
+
     @property
     def is_ssl(self) -> bool:
         return self.components.scheme == "https"
@@ -509,8 +509,13 @@ class Response:
         if content_type is None:
             return None
 
-        parsed = cgi.parse_header(content_type)[-1]
-        return parsed.get("charset")
+        # RFC 2616 specifies that 'iso-8859-1' should be used as the default
+        # for 'text/*' media types, if no charset is provided.
+        # See: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
+        parsed = cgi.parse_header(content_type)
+        media_type, info = parsed[0], parsed[-1]
+        default = "iso-8859-1" if media_type.startswith("text/") else None
+        return info.get("charset", default)
 
     @property
     def apparent_encoding(self) -> typing.Optional[str]:
index ce78656327b35515d0371eaed1c2676b4ee51bc4..bfe8c113d77d8bcfb6cd574cec927ac680652f2d 100644 (file)
@@ -47,6 +47,19 @@ def test_response_fallback_to_autodetect():
     assert response.encoding == "EUC-JP"
 
 
+def test_response():
+    """
+    A media type of 'text/*' with no charset should default to ISO-8859-1.
+    See: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
+    """
+    content = b"Hello, world!"
+    headers = {"Content-Type": "text/plain"}
+    response = httpcore.Response(200, content=content, headers=headers)
+    assert response.status_code == 200
+    assert response.encoding == "iso-8859-1"
+    assert response.text == "Hello, world!"
+
+
 def test_response_default_encoding():
     """
     Default to utf-8 if all else fails.