From: Tom Christie <tom@tomchristie.com>
Date: Tue, 30 Apr 2019 12:52:37 +0000 (+0100)
Subject: Rejig test and tighten up models API
X-Git-Tag: 0.3.0~61
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dea4d42c6d9bb092fac705b712039de7c27d57f7;p=thirdparty%2Fhttpx.git

Rejig test and tighten up models API
---

diff --git a/httpcore/exceptions.py b/httpcore/exceptions.py
index 6c7fc605..0b6efeec 100644
--- a/httpcore/exceptions.py
+++ b/httpcore/exceptions.py
@@ -1,3 +1,6 @@
+# Timeout exceptions...
+
+
 class Timeout(Exception):
     """
     A base class for all timeouts.
@@ -28,6 +31,18 @@ class PoolTimeout(Timeout):
     """
 
 
+# HTTP exceptions...
+
+
+class ProtocolError(Exception):
+    """
+    Malformed HTTP.
+    """
+
+
+# Redirect exceptions...
+
+
 class RedirectError(Exception):
     """
     Base class for HTTP redirect errors.
@@ -53,10 +68,7 @@ class RedirectLoop(RedirectError):
     """
 
 
-class ProtocolError(Exception):
-    """
-    Malformed HTTP.
-    """
+# Response exceptions...
 
 
 class StreamConsumed(Exception):
diff --git a/httpcore/models.py b/httpcore/models.py
index 2e01dab5..5325b16b 100644
--- a/httpcore/models.py
+++ b/httpcore/models.py
@@ -125,34 +125,69 @@ class Headers(typing.MutableMapping[str, str]):
     A case-insensitive multidict.
     """
 
-    def __init__(self, headers: HeaderTypes = None) -> None:
+    def __init__(self, headers: HeaderTypes = None, encoding: str = None) -> None:
         if headers is None:
             self._list = []  # type: typing.List[typing.Tuple[bytes, bytes]]
         elif isinstance(headers, Headers):
             self._list = list(headers.raw)
         elif isinstance(headers, dict):
             self._list = [
-                (normalize_header_key(k), normalize_header_value(v))
+                (normalize_header_key(k, encoding), normalize_header_value(v, encoding))
                 for k, v in headers.items()
             ]
         else:
             self._list = [
-                (normalize_header_key(k), normalize_header_value(v)) for k, v in headers
+                (normalize_header_key(k, encoding), normalize_header_value(v, encoding))
+                for k, v in headers
             ]
+        self._encoding = encoding
+
+    @property
+    def encoding(self) -> str:
+        """
+        Header encoding is mandated as ascii, but utf-8 or iso-8859-1 may be
+        seen in the wild.
+        """
+        if self._encoding is None:
+            for encoding in ["ascii", "utf-8"]:
+                for key, value in self.raw:
+                    try:
+                        key.decode(encoding)
+                        value.decode(encoding)
+                    except UnicodeDecodeError:
+                        break
+                else:
+                    # The else block runs if 'break' did not occur, meaning
+                    # all values fitted the encoding.
+                    self._encoding = encoding
+                    break
+            else:
+                # The ISO-8859-1 encoding covers all 256 code points in a byte,
+                # so will never raise decode errors.
+                self._encoding = "iso-8859-1"
+        return self._encoding
+
+    @encoding.setter
+    def encoding(self, value: str) -> None:
+        self._encoding = value
 
     @property
     def raw(self) -> typing.List[typing.Tuple[bytes, bytes]]:
+        """
+        Returns a list of the raw header items, as byte pairs.
+        May be mutated in-place.
+        """
         return self._list
 
     def keys(self) -> typing.List[str]:  # type: ignore
-        return [key.decode("latin-1") for key, value in self._list]
+        return [key.decode(self.encoding) for key, value in self._list]
 
     def values(self) -> typing.List[str]:  # type: ignore
-        return [value.decode("latin-1") for key, value in self._list]
+        return [value.decode(self.encoding) for key, value in self._list]
 
     def items(self) -> typing.List[typing.Tuple[str, str]]:  # type: ignore
         return [
-            (key.decode("latin-1"), value.decode("latin-1"))
+            (key.decode(self.encoding), value.decode(self.encoding))
             for key, value in self._list
         ]
 
@@ -162,19 +197,53 @@ class Headers(typing.MutableMapping[str, str]):
         except KeyError:
             return default
 
-    def getlist(self, key: str) -> typing.List[str]:
-        get_header_key = key.lower().encode("latin-1")
-        return [
-            item_value.decode("latin-1")
+    def getlist(self, key: str, default: typing.Any = None, split_commas = None) -> typing.List[str]:
+        """
+        Return multiple header values.
+
+        If there are header values that include commas, then we default to
+        spliting them into multiple results, except for Set-Cookie.
+
+        See: https://tools.ietf.org/html/rfc7230#section-3.2.2
+        """
+        get_header_key = key.lower().encode(self.encoding)
+        if split_commas is None:
+            split_commas = get_header_key != b'set-cookie'
+
+        values = [
+            item_value.decode(self.encoding)
             for item_key, item_value in self._list
             if item_key == get_header_key
         ]
 
+        if not values:
+            return [] if default is None else default
+
+        if not split_commas:
+            return values
+
+        split_values = []
+        for value in values:
+            split_values.extend([item.strip() for item in value.split(",")])
+        return split_values
+
     def __getitem__(self, key: str) -> str:
-        get_header_key = key.lower().encode("latin-1")
+        """
+        Return a single header value.
+
+        If there are multiple headers with the same key, then we concatenate
+        them with commas. See: https://tools.ietf.org/html/rfc7230#section-3.2.2
+        """
+        normalized_key = key.lower().encode(self.encoding)
+
+        items = []
         for header_key, header_value in self._list:
-            if header_key == get_header_key:
-                return header_value.decode("latin-1")
+            if header_key == normalized_key:
+                items.append(header_value.decode(self.encoding))
+
+        if items:
+            return ", ".join(items)
+
         raise KeyError(key)
 
     def __setitem__(self, key: str, value: str) -> None:
@@ -182,8 +251,8 @@ class Headers(typing.MutableMapping[str, str]):
         Set the header `key` to `value`, removing any duplicate entries.
         Retains insertion order.
         """
-        set_key = key.lower().encode("latin-1")
-        set_value = value.encode("latin-1")
+        set_key = key.lower().encode(self.encoding)
+        set_value = value.encode(self.encoding)
 
         found_indexes = []
         for idx, (item_key, item_value) in enumerate(self._list):
@@ -203,7 +272,7 @@ class Headers(typing.MutableMapping[str, str]):
         """
         Remove the header `key`.
         """
-        del_key = key.lower().encode("latin-1")
+        del_key = key.lower().encode(self.encoding)
 
         pop_indexes = []
         for idx, (item_key, item_value) in enumerate(self._list):
@@ -214,7 +283,7 @@ class Headers(typing.MutableMapping[str, str]):
             del self._list[idx]
 
     def __contains__(self, key: typing.Any) -> bool:
-        get_header_key = key.lower().encode("latin-1")
+        get_header_key = key.lower().encode(self.encoding)
         for header_key, header_value in self._list:
             if header_key == get_header_key:
                 return True
@@ -233,10 +302,16 @@ class Headers(typing.MutableMapping[str, str]):
 
     def __repr__(self) -> str:
         class_name = self.__class__.__name__
+
+        encoding_str = ""
+        if self.encoding != "ascii":
+            encoding_str = f", encoding={self.encoding!r}"
+
         as_dict = dict(self.items())
         if len(as_dict) == len(self):
-            return f"{class_name}({as_dict!r})"
-        return f"{class_name}(raw={self.raw!r})"
+            return f"{class_name}({as_dict!r}{encoding_str})"
+        as_list = self.items()
+        return f"{class_name}({as_list!r}{encoding_str})"
 
 
 class Request:
@@ -351,10 +426,10 @@ class Response:
         """
         if not hasattr(self, "_decoder"):
             decoders = []  # type: typing.List[Decoder]
-            value = self.headers.get("content-encoding", "identity")
-            for part in value.split(","):
-                part = part.strip().lower()
-                decoder_cls = SUPPORTED_DECODERS[part]
+            values = self.headers.getlist("content-encoding", ["identity"])
+            for value in values:
+                value = value.strip().lower()
+                decoder_cls = SUPPORTED_DECODERS[value]
                 decoders.append(decoder_cls())
 
             if len(decoders) == 1:
diff --git a/httpcore/utils.py b/httpcore/utils.py
index aa5e14ee..4ab49fdb 100644
--- a/httpcore/utils.py
+++ b/httpcore/utils.py
@@ -54,22 +54,22 @@ def requote_uri(uri: str) -> str:
         return quote(uri, safe=safe_without_percent)
 
 
-def normalize_header_key(value: typing.AnyStr) -> bytes:
+def normalize_header_key(value: typing.AnyStr, encoding: str = None) -> bytes:
     """
     Coerce str/bytes into a strictly byte-wise HTTP header key.
     """
     if isinstance(value, bytes):
         return value.lower()
-    return value.encode("latin-1").lower()
+    return value.encode(encoding or "ascii").lower()
 
 
-def normalize_header_value(value: typing.AnyStr) -> bytes:
+def normalize_header_value(value: typing.AnyStr, encoding: str = None) -> bytes:
     """
     Coerce str/bytes into a strictly byte-wise HTTP header value.
     """
     if isinstance(value, bytes):
         return value
-    return value.encode("latin-1")
+    return value.encode(encoding or "ascii")
 
 
 def get_reason_phrase(status_code: int) -> str:
diff --git a/tests/test_connection_pools.py b/tests/dispatch/test_connection_pools.py
similarity index 100%
rename from tests/test_connection_pools.py
rename to tests/dispatch/test_connection_pools.py
diff --git a/tests/test_connections.py b/tests/dispatch/test_connections.py
similarity index 100%
rename from tests/test_connections.py
rename to tests/dispatch/test_connections.py
diff --git a/tests/test_http2.py b/tests/dispatch/test_http2.py
similarity index 100%
rename from tests/test_http2.py
rename to tests/dispatch/test_http2.py
diff --git a/tests/models/test_headers.py b/tests/models/test_headers.py
new file mode 100644
index 00000000..b7995e3d
--- /dev/null
+++ b/tests/models/test_headers.py
@@ -0,0 +1,154 @@
+import httpcore
+
+
+def test_headers():
+    h = httpcore.Headers([("a", "123"), ("a", "456"), ("b", "789")])
+    assert "a" in h
+    assert "A" in h
+    assert "b" in h
+    assert "B" in h
+    assert "c" not in h
+    assert h["a"] == "123, 456"
+    assert h.get("a") == "123, 456"
+    assert h.get("nope", default=None) is None
+    assert h.getlist("a") == ["123", "456"]
+    assert h.keys() == ["a", "a", "b"]
+    assert h.values() == ["123", "456", "789"]
+    assert h.items() == [("a", "123"), ("a", "456"), ("b", "789")]
+    assert list(h) == ["a", "a", "b"]
+    assert dict(h) == {"a": "123, 456", "b": "789"}
+    assert repr(h) == "Headers([('a', '123'), ('a', '456'), ('b', '789')])"
+    assert h == httpcore.Headers([("a", "123"), ("b", "789"), ("a", "456")])
+    assert h != [("a", "123"), ("A", "456"), ("b", "789")]
+
+    h = httpcore.Headers({"a": "123", "b": "789"})
+    assert h["A"] == "123"
+    assert h["B"] == "789"
+    assert h.raw == [(b"a", b"123"), (b"b", b"789")]
+    assert repr(h) == "Headers({'a': '123', 'b': '789'})"
+
+
+def test_header_mutations():
+    h = httpcore.Headers()
+    assert dict(h) == {}
+    h["a"] = "1"
+    assert dict(h) == {"a": "1"}
+    h["a"] = "2"
+    assert dict(h) == {"a": "2"}
+    h.setdefault("a", "3")
+    assert dict(h) == {"a": "2"}
+    h.setdefault("b", "4")
+    assert dict(h) == {"a": "2", "b": "4"}
+    del h["a"]
+    assert dict(h) == {"b": "4"}
+    assert h.raw == [(b"b", b"4")]
+
+
+def test_copy_headers():
+    headers = httpcore.Headers({"custom": "example"})
+    headers_copy = httpcore.Headers(headers)
+    assert headers == headers_copy
+
+
+def test_headers_insert_retains_ordering():
+    headers = httpcore.Headers({"a": "a", "b": "b", "c": "c"})
+    headers["b"] = "123"
+    assert list(headers.values()) == ["a", "123", "c"]
+
+
+def test_headers_insert_appends_if_new():
+    headers = httpcore.Headers({"a": "a", "b": "b", "c": "c"})
+    headers["d"] = "123"
+    assert list(headers.values()) == ["a", "b", "c", "123"]
+
+
+def test_headers_insert_removes_all_existing():
+    headers = httpcore.Headers([("a", "123"), ("a", "456")])
+    headers["a"] = "789"
+    assert dict(headers) == {"a": "789"}
+
+
+def test_headers_delete_removes_all_existing():
+    headers = httpcore.Headers([("a", "123"), ("a", "456")])
+    del headers["a"]
+    assert dict(headers) == {}
+
+
+def test_headers_dict_repr():
+    """
+    Headers should display with a dict repr by default.
+    """
+    headers = httpcore.Headers({"custom": "example"})
+    assert repr(headers) == "Headers({'custom': 'example'})"
+
+
+def test_headers_encoding_in_repr():
+    """
+    Headers should display an encoding in the repr if required.
+    """
+    headers = httpcore.Headers({b"custom": "example â".encode("utf-8")})
+    assert repr(headers) == "Headers({'custom': 'example â'}, encoding='utf-8')"
+
+
+def test_headers_list_repr():
+    """
+    Headers should display with a list repr if they include multiple identical keys.
+    """
+    headers = httpcore.Headers([("custom", "example 1"), ("custom", "example 2")])
+    assert (
+        repr(headers) == "Headers([('custom', 'example 1'), ('custom', 'example 2')])"
+    )
+
+
+def test_headers_decode_ascii():
+    """
+    Headers should decode as ascii by default.
+    """
+    raw_headers = [(b"Custom", b"Example")]
+    headers = httpcore.Headers(raw_headers)
+    assert dict(headers) == {"custom": "Example"}
+    assert headers.encoding == "ascii"
+
+
+def test_headers_decode_utf_8():
+    """
+    Headers containing non-ascii codepoints should default to decoding as utf-8.
+    """
+    raw_headers = [(b"Custom", "Code point: â".encode("utf-8"))]
+    headers = httpcore.Headers(raw_headers)
+    assert dict(headers) == {"custom": "Code point: â"}
+    assert headers.encoding == "utf-8"
+
+
+def test_headers_decode_iso_8859_1():
+    """
+    Headers containing non-UTF-8 codepoints should default to decoding as iso-8859-1.
+    """
+    raw_headers = [(b"Custom", "Code point: Ã¿".encode("iso-8859-1"))]
+    headers = httpcore.Headers(raw_headers)
+    assert dict(headers) == {"custom": "Code point: Ã¿"}
+    assert headers.encoding == "iso-8859-1"
+
+
+def test_headers_decode_explicit_encoding():
+    """
+    An explicit encoding may be set on headers in order to force a
+    particular decoding.
+    """
+    raw_headers = [(b"Custom", "Code point: â".encode("utf-8"))]
+    headers = httpcore.Headers(raw_headers)
+    headers.encoding = "iso-8859-1"
+    print(headers)
+    assert dict(headers) == {"custom": "Code point: Ã¢\x98\x83"}
+    assert headers.encoding == "iso-8859-1"
+
+
+def test_multiple_headers():
+    """
+    Most headers should split by commas for `getlist`, except 'Set-Cookie'.
+    """
+    h = httpcore.Headers([('set-cookie', 'a, b'), ('set-cookie', 'c')])
+    h.getlist('Set-Cookie') == ['a, b', 'b']
+
+    h = httpcore.Headers([('vary', 'a, b'), ('vary', 'c')])
+    h.getlist('Vary') == ['a', 'b', 'c']
diff --git a/tests/test_requests.py b/tests/models/test_requests.py
similarity index 100%
rename from tests/test_requests.py
rename to tests/models/test_requests.py
diff --git a/tests/test_responses.py b/tests/models/test_responses.py
similarity index 100%
rename from tests/test_responses.py
rename to tests/models/test_responses.py
diff --git a/tests/test_api.py b/tests/test_client.py
similarity index 100%
rename from tests/test_api.py
rename to tests/test_client.py
diff --git a/tests/test_decoding.py b/tests/test_decoders.py
similarity index 100%
rename from tests/test_decoding.py
rename to tests/test_decoders.py