From: Tom Christie Date: Tue, 30 Apr 2019 15:35:25 +0000 (+0100) Subject: Add response.text and response.encoding X-Git-Tag: 0.3.0~58 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d8df61b44b9de52c9639cf2da828fc5ed364b004;p=thirdparty%2Fhttpx.git Add response.text and response.encoding --- diff --git a/httpcore/models.py b/httpcore/models.py index afc2d1eb..77031b68 100644 --- a/httpcore/models.py +++ b/httpcore/models.py @@ -1,6 +1,9 @@ +import cgi import typing from urllib.parse import urlsplit +import chardet + from .config import SSLConfig, TimeoutConfig from .decoders import ( ACCEPT_ENCODING, @@ -11,7 +14,12 @@ from .decoders import ( ) from .exceptions import ResponseClosed, ResponseNotRead, StreamConsumed from .status_codes import codes -from .utils import get_reason_phrase, normalize_header_key, normalize_header_value +from .utils import ( + get_reason_phrase, + is_known_encoding, + normalize_header_key, + normalize_header_value, +) URLTypes = typing.Union["URL", str] @@ -200,15 +208,8 @@ class Headers(typing.MutableMapping[str, str]): def getlist(self, key: str, split_commas: bool = False) -> typing.List[str]: """ Return multiple header values. - - If there are header values that include commas, then we default to - spliting them into multiple results, except for Set-Cookie. - - See: https://tools.ietf.org/html/rfc7230#section-3.2.2 """ get_header_key = key.lower().encode(self.encoding) - if split_commas is None: - split_commas = get_header_key != b"set-cookie" values = [ item_value.decode(self.encoding) @@ -424,13 +425,58 @@ class Response: def content(self) -> bytes: if not hasattr(self, "_content"): if hasattr(self, "_raw_content"): - self._content = ( - self.decoder.decode(self._raw_content) + self.decoder.flush() - ) + content = self.decoder.decode(self._raw_content) + content += self.decoder.flush() + self._content = content else: raise ResponseNotRead() return self._content + @property + def text(self) -> str: + if not hasattr(self, "_text"): + content = self.content + if not content: + self._text = "" + else: + encoding = self.encoding + self._text = content.decode(encoding, errors="replace") + return self._text + + @property + def encoding(self) -> str: + if not hasattr(self, "_encoding"): + encoding = self.charset_encoding + if encoding is None or not is_known_encoding(encoding): + encoding = self.apparent_encoding + if encoding is None or not is_known_encoding(encoding): + encoding = "utf-8" + self._encoding = encoding + return self._encoding + + @encoding.setter + def encoding(self, value: str) -> None: + self._encoding = value + + @property + def charset_encoding(self) -> typing.Optional[str]: + """ + Return the encoding, as specified by the Content-Type header. + """ + content_type = self.headers.get("Content-Type") + if content_type is None: + return None + + parsed = cgi.parse_header(content_type)[-1] + return parsed.get("charset") + + @property + def apparent_encoding(self) -> typing.Optional[str]: + """ + Return the encoding, as it appears to autodetection. + """ + return chardet.detect(self.content)["encoding"] + @property def decoder(self) -> Decoder: """ diff --git a/httpcore/utils.py b/httpcore/utils.py index 4ab49fdb..33c0d3c1 100644 --- a/httpcore/utils.py +++ b/httpcore/utils.py @@ -1,3 +1,4 @@ +import codecs import http import typing from urllib.parse import quote @@ -80,3 +81,11 @@ def get_reason_phrase(status_code: int) -> str: return http.HTTPStatus(status_code).phrase except ValueError as exc: return "" + + +def is_known_encoding(encoding: str) -> bool: + try: + codecs.lookup(encoding) + except LookupError: + return False + return True diff --git a/requirements.txt b/requirements.txt index 18f9c5fe..dd8ea66f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ certifi +chardet h11 h2 diff --git a/setup.py b/setup.py index 93d02ad5..6be5ebaf 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ setup( author_email="tom@tomchristie.com", packages=get_packages("httpcore"), data_files=[("", ["LICENSE.md"])], - install_requires=["h11", "h2", "certifi"], + install_requires=["h11", "h2", "certifi", "chardet"], classifiers=[ "Development Status :: 3 - Alpha", "Environment :: Web Environment", diff --git a/tests/models/test_responses.py b/tests/models/test_responses.py index 4cc340b6..a1376ee1 100644 --- a/tests/models/test_responses.py +++ b/tests/models/test_responses.py @@ -12,8 +12,37 @@ def test_response(): response = httpcore.Response(200, content=b"Hello, world!") assert response.status_code == 200 assert response.reason_phrase == "OK" - assert response.content == b"Hello, world!" - assert response.is_closed + assert response.text == "Hello, world!" + + +def test_response_content_type_encoding(): + headers = {"Content-Type": "text-plain; charset=latin-1"} + response = httpcore.Response( + 200, content="Latin 1: ÿ".encode("latin-1"), headers=headers + ) + assert response.text == "Latin 1: ÿ" + assert response.encoding == "latin-1" + + +def test_response_autodetect_encoding(): + response = httpcore.Response(200, content="Snowmen: ☃☃☃".encode("utf-8")) + assert response.text == "Snowmen: ☃☃☃" + assert response.encoding == "utf-8" + + +def test_response_default_encoding(): + response = httpcore.Response(200, content=b"") + assert response.text == "" + assert response.encoding == "utf-8" + + +def test_response_force_encoding(): + response = httpcore.Response(200, content="Snowman: ☃".encode("utf-8")) + response.encoding = "iso-8859-1" + assert response.status_code == 200 + assert response.reason_phrase == "OK" + assert response.text == "Snowman: â\x98\x83" + assert response.encoding == "iso-8859-1" @pytest.mark.asyncio @@ -21,7 +50,8 @@ async def test_read_response(): response = httpcore.Response(200, content=b"Hello, world!") assert response.status_code == 200 - assert response.content == b"Hello, world!" + assert response.text == "Hello, world!" + assert response.encoding == "ascii" assert response.is_closed content = await response.read() @@ -71,3 +101,4 @@ def test_unknown_status_code(): response = httpcore.Response(600) assert response.status_code == 600 assert response.reason_phrase == "" + assert response.text == "" diff --git a/tests/test_client.py b/tests/test_client.py index 4ec5744d..43a33bef 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -9,7 +9,7 @@ async def test_get(server): async with httpcore.Client() as client: response = await client.get(url) assert response.status_code == 200 - assert response.content == b"Hello, world!" + assert response.text == "Hello, world!" @pytest.mark.asyncio