From: Tom Christie Date: Wed, 1 May 2019 10:32:03 +0000 (+0100) Subject: Carefulness with encoding everywhere X-Git-Tag: 0.3.0~54 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e1ecb360d9839d31d2c5645ee4021af7dbb97800;p=thirdparty%2Fhttpx.git Carefulness with encoding everywhere --- diff --git a/httpcore/dispatch/http11.py b/httpcore/dispatch/http11.py index f574c256..fc3a0334 100644 --- a/httpcore/dispatch/http11.py +++ b/httpcore/dispatch/http11.py @@ -54,8 +54,8 @@ class HTTP11Connection(Adapter): assert timeout is None or isinstance(timeout, TimeoutConfig) #  Start sending the request. - method = request.method.encode() - target = request.url.full_path + method = request.method.encode('ascii') + target = request.url.full_path.encode('ascii') headers = request.headers.raw event = h11.Request(method=method, target=target, headers=headers) await self._send_event(event, timeout) @@ -75,7 +75,7 @@ class HTTP11Connection(Adapter): event = await self._receive_event(timeout) assert isinstance(event, h11.Response) - reason_phrase = event.reason.decode("latin1") + reason_phrase = event.reason.decode("ascii", errors="ignore") status_code = event.status_code headers = event.headers content = self._body_iter(timeout) diff --git a/httpcore/dispatch/http2.py b/httpcore/dispatch/http2.py index 787f40e5..6d0a8f04 100644 --- a/httpcore/dispatch/http2.py +++ b/httpcore/dispatch/http2.py @@ -62,7 +62,7 @@ class HTTP2Connection(Adapter): headers = [] for k, v in event.headers: if k == b":status": - status_code = int(v.decode()) + status_code = int(v.decode('ascii', errors='ignore')) elif not k.startswith(b":"): headers.append((k, v)) @@ -98,10 +98,10 @@ class HTTP2Connection(Adapter): async def send_headers(self, request: Request, timeout: OptionalTimeout) -> int: stream_id = self.h2_state.get_next_available_stream_id() headers = [ - (b":method", request.method.encode()), - (b":authority", request.url.host.encode()), - (b":scheme", request.url.scheme.encode()), - (b":path", request.url.full_path.encode()), + (b":method", request.method.encode('ascii')), + (b":authority", request.url.authority.encode('ascii')), + (b":scheme", request.url.scheme.encode('ascii')), + (b":path", request.url.full_path.encode('ascii')), ] + request.headers.raw self.h2_state.send_headers(stream_id, headers) data_to_send = self.h2_state.data_to_send() diff --git a/httpcore/models.py b/httpcore/models.py index 6e170ef8..33a9eda3 100644 --- a/httpcore/models.py +++ b/httpcore/models.py @@ -89,18 +89,17 @@ class URL: port = self.components.port if port is None: return {"https": 443, "http": 80}[self.scheme] - return port + return int(port) @property def full_path(self) -> str: - path = self.path or "/" - query = self.query - if query: - return path + "?" + query + path = self.path + if self.query: + path += "?" + self.query return path @property - def is_secure(self) -> bool: + def is_ssl(self) -> bool: return self.components.scheme == "https" @property @@ -155,7 +154,7 @@ class Origin: def __init__(self, url: URLTypes) -> None: if not isinstance(url, URL): url = URL(url) - self.is_ssl = url.scheme == "https" + self.is_ssl = url.is_ssl self.host = url.host self.port = url.port diff --git a/tests/models/test_responses.py b/tests/models/test_responses.py index 16c2af4a..ce786563 100644 --- a/tests/models/test_responses.py +++ b/tests/models/test_responses.py @@ -16,21 +16,41 @@ def test_response(): def test_response_content_type_encoding(): + """ + Use the charset encoding in the Content-Type header if possible. + """ headers = {"Content-Type": "text-plain; charset=latin-1"} - response = httpcore.Response( - 200, content="Latin 1: ÿ".encode("latin-1"), headers=headers - ) + content = "Latin 1: ÿ".encode("latin-1") + response = httpcore.Response(200, content=content, headers=headers) assert response.text == "Latin 1: ÿ" assert response.encoding == "latin-1" def test_response_autodetect_encoding(): - response = httpcore.Response(200, content="Snowmen: ☃☃☃".encode("utf-8")) - assert response.text == "Snowmen: ☃☃☃" - assert response.encoding == "utf-8" + """ + Autodetect encoding if there is no charset info in a Content-Type header. + """ + content = "おはようございます。".encode("EUC-JP") + response = httpcore.Response(200, content=content) + assert response.text == "おはようございます。" + assert response.encoding == "EUC-JP" + + +def test_response_fallback_to_autodetect(): + """ + Fallback to autodetection if we get an invalid charset in the Content-Type header. + """ + headers = {"Content-Type": "text-plain; charset=invalid-codec-name"} + content = "おはようございます。".encode("EUC-JP") + response = httpcore.Response(200, content=content, headers=headers) + assert response.text == "おはようございます。" + assert response.encoding == "EUC-JP" def test_response_default_encoding(): + """ + Default to utf-8 if all else fails. + """ response = httpcore.Response(200, content=b"") assert response.text == "" assert response.encoding == "utf-8" diff --git a/tests/models/test_url.py b/tests/models/test_url.py index ecb4d8d7..6de64395 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -5,3 +5,19 @@ def test_idna_url(): url = URL("http://中国.icom.museum:80/") assert url == URL("http://xn--fiqs8s.icom.museum:80/") assert url.host == "xn--fiqs8s.icom.museum" + + +def test_url(): + url = URL("https://example.org:123/path/to/somewhere?abc=123#anchor") + assert url.scheme == "https" + assert url.host == "example.org" + assert url.port == 123 + assert url.authority == "example.org:123" + assert url.path == "/path/to/somewhere" + assert url.query == "abc=123" + assert url.fragment == "anchor" + assert repr(url) == "URL('https://example.org:123/path/to/somewhere?abc=123#anchor')" + + new = url.copy_with(scheme="http") + assert new == URL("http://example.org:123/path/to/somewhere?abc=123#anchor") + assert new.scheme == "http"