]> git.ipfire.org Git - thirdparty/httpx.git/commitdiff
Add response.text and response.encoding
authorTom Christie <tom@tomchristie.com>
Tue, 30 Apr 2019 15:35:25 +0000 (16:35 +0100)
committerTom Christie <tom@tomchristie.com>
Tue, 30 Apr 2019 15:35:25 +0000 (16:35 +0100)
httpcore/models.py
httpcore/utils.py
requirements.txt
setup.py
tests/models/test_responses.py
tests/test_client.py

index afc2d1ebc503c6bee28d1a9444e6116c62e574be..77031b68366267bf2bae79a17c2c705de44363be 100644 (file)
@@ -1,6 +1,9 @@
+import cgi
 import typing
 from urllib.parse import urlsplit
 
+import chardet
+
 from .config import SSLConfig, TimeoutConfig
 from .decoders import (
     ACCEPT_ENCODING,
@@ -11,7 +14,12 @@ from .decoders import (
 )
 from .exceptions import ResponseClosed, ResponseNotRead, StreamConsumed
 from .status_codes import codes
-from .utils import get_reason_phrase, normalize_header_key, normalize_header_value
+from .utils import (
+    get_reason_phrase,
+    is_known_encoding,
+    normalize_header_key,
+    normalize_header_value,
+)
 
 URLTypes = typing.Union["URL", str]
 
@@ -200,15 +208,8 @@ class Headers(typing.MutableMapping[str, str]):
     def getlist(self, key: str, split_commas: bool = False) -> typing.List[str]:
         """
         Return multiple header values.
-
-        If there are header values that include commas, then we default to
-        spliting them into multiple results, except for Set-Cookie.
-
-        See: https://tools.ietf.org/html/rfc7230#section-3.2.2
         """
         get_header_key = key.lower().encode(self.encoding)
-        if split_commas is None:
-            split_commas = get_header_key != b"set-cookie"
 
         values = [
             item_value.decode(self.encoding)
@@ -424,13 +425,58 @@ class Response:
     def content(self) -> bytes:
         if not hasattr(self, "_content"):
             if hasattr(self, "_raw_content"):
-                self._content = (
-                    self.decoder.decode(self._raw_content) + self.decoder.flush()
-                )
+                content = self.decoder.decode(self._raw_content)
+                content += self.decoder.flush()
+                self._content = content
             else:
                 raise ResponseNotRead()
         return self._content
 
+    @property
+    def text(self) -> str:
+        if not hasattr(self, "_text"):
+            content = self.content
+            if not content:
+                self._text = ""
+            else:
+                encoding = self.encoding
+                self._text = content.decode(encoding, errors="replace")
+        return self._text
+
+    @property
+    def encoding(self) -> str:
+        if not hasattr(self, "_encoding"):
+            encoding = self.charset_encoding
+            if encoding is None or not is_known_encoding(encoding):
+                encoding = self.apparent_encoding
+                if encoding is None or not is_known_encoding(encoding):
+                    encoding = "utf-8"
+            self._encoding = encoding
+        return self._encoding
+
+    @encoding.setter
+    def encoding(self, value: str) -> None:
+        self._encoding = value
+
+    @property
+    def charset_encoding(self) -> typing.Optional[str]:
+        """
+        Return the encoding, as specified by the Content-Type header.
+        """
+        content_type = self.headers.get("Content-Type")
+        if content_type is None:
+            return None
+
+        parsed = cgi.parse_header(content_type)[-1]
+        return parsed.get("charset")
+
+    @property
+    def apparent_encoding(self) -> typing.Optional[str]:
+        """
+        Return the encoding, as it appears to autodetection.
+        """
+        return chardet.detect(self.content)["encoding"]
+
     @property
     def decoder(self) -> Decoder:
         """
index 4ab49fdb763e449ac312ab86fb8840426f000236..33c0d3c1cc7add08242da601d78bef83e952d696 100644 (file)
@@ -1,3 +1,4 @@
+import codecs
 import http
 import typing
 from urllib.parse import quote
@@ -80,3 +81,11 @@ def get_reason_phrase(status_code: int) -> str:
         return http.HTTPStatus(status_code).phrase
     except ValueError as exc:
         return ""
+
+
+def is_known_encoding(encoding: str) -> bool:
+    try:
+        codecs.lookup(encoding)
+    except LookupError:
+        return False
+    return True
index 18f9c5fe4c5970c0fa84775876b6df9352fb1c22..dd8ea66f3209b1a6411e17328bb2becb0a1ed07c 100644 (file)
@@ -1,4 +1,5 @@
 certifi
+chardet
 h11
 h2
 
index 93d02ad562e8b443a11705467f91fbb6adda6bb8..6be5ebaf6a663d15af66659ae1d663a883be1c12 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ setup(
     author_email="tom@tomchristie.com",
     packages=get_packages("httpcore"),
     data_files=[("", ["LICENSE.md"])],
-    install_requires=["h11", "h2", "certifi"],
+    install_requires=["h11", "h2", "certifi", "chardet"],
     classifiers=[
         "Development Status :: 3 - Alpha",
         "Environment :: Web Environment",
index 4cc340b694a591515cd2898e01b21c5daaf3fc74..a1376ee101a7a2d9c264c43bceeab6fdaa8d2f9f 100644 (file)
@@ -12,8 +12,37 @@ def test_response():
     response = httpcore.Response(200, content=b"Hello, world!")
     assert response.status_code == 200
     assert response.reason_phrase == "OK"
-    assert response.content == b"Hello, world!"
-    assert response.is_closed
+    assert response.text == "Hello, world!"
+
+
+def test_response_content_type_encoding():
+    headers = {"Content-Type": "text-plain; charset=latin-1"}
+    response = httpcore.Response(
+        200, content="Latin 1: ÿ".encode("latin-1"), headers=headers
+    )
+    assert response.text == "Latin 1: ÿ"
+    assert response.encoding == "latin-1"
+
+
+def test_response_autodetect_encoding():
+    response = httpcore.Response(200, content="Snowmen: ☃☃☃".encode("utf-8"))
+    assert response.text == "Snowmen: ☃☃☃"
+    assert response.encoding == "utf-8"
+
+
+def test_response_default_encoding():
+    response = httpcore.Response(200, content=b"")
+    assert response.text == ""
+    assert response.encoding == "utf-8"
+
+
+def test_response_force_encoding():
+    response = httpcore.Response(200, content="Snowman: ☃".encode("utf-8"))
+    response.encoding = "iso-8859-1"
+    assert response.status_code == 200
+    assert response.reason_phrase == "OK"
+    assert response.text == "Snowman: â\x98\x83"
+    assert response.encoding == "iso-8859-1"
 
 
 @pytest.mark.asyncio
@@ -21,7 +50,8 @@ async def test_read_response():
     response = httpcore.Response(200, content=b"Hello, world!")
 
     assert response.status_code == 200
-    assert response.content == b"Hello, world!"
+    assert response.text == "Hello, world!"
+    assert response.encoding == "ascii"
     assert response.is_closed
 
     content = await response.read()
@@ -71,3 +101,4 @@ def test_unknown_status_code():
     response = httpcore.Response(600)
     assert response.status_code == 600
     assert response.reason_phrase == ""
+    assert response.text == ""
index 4ec5744d9fb9c8dc730f1067c6fff5dd4e1fe068..43a33bef7aa2e8f4979e0ac7fc5f0ae979e3d49b 100644 (file)
@@ -9,7 +9,7 @@ async def test_get(server):
     async with httpcore.Client() as client:
         response = await client.get(url)
     assert response.status_code == 200
-    assert response.content == b"Hello, world!"
+    assert response.text == "Hello, world!"
 
 
 @pytest.mark.asyncio