]> git.ipfire.org Git - thirdparty/httpx.git/commitdiff
Add support for zstd decoding (#3139)
authorMichiel W. Beijen <mb@x14.nl>
Thu, 21 Mar 2024 10:17:15 +0000 (11:17 +0100)
committerGitHub <noreply@github.com>
Thu, 21 Mar 2024 10:17:15 +0000 (10:17 +0000)
This adds support for zstd decoding using the python package zstandard.
This is similar to how it is implemented in urllib3. I also chose the
optional installation option httpx[zstd] to mimic the same option in
urllib3.

zstd decoding is similar to brotli, but in benchmarks it is supposed to
be even faster. The zstd compression is described in RFC 8878.

See https://github.com/encode/httpx/discussions/1986

Co-authored-by: Kamil Monicz <kamil@monicz.dev>
15 files changed:
CHANGELOG.md
README.md
docs/index.md
docs/quickstart.md
httpx/_compat.py
httpx/_decoders.py
httpx/_models.py
pyproject.toml
requirements.txt
tests/client/test_client.py
tests/client/test_event_hooks.py
tests/client/test_headers.py
tests/test_asgi.py
tests/test_decoders.py
tests/test_main.py

index 85d3bcec57e0daca9b40a1765222c6c7c04a231e..18ded9d27f17ba1669356288b41f692580a495a7 100644 (file)
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ## Unreleased
 
+## Added
+
+* Support for `zstd` content decoding using the python `zstandard` package is added. Installable using `httpx[zstd]`. (#3139)
+
 ### Fixed
 
 * Fix `app` type signature in `ASGITransport`. (#3109)
index 62fb295d17bc59a76a8f924e852c73b34b5ae064..bcba1bb76b464133081f7c2fdfda59a5863a201e 100644 (file)
--- a/README.md
+++ b/README.md
@@ -138,6 +138,7 @@ As well as these optional installs:
 * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)*
 * `click` - Command line client support. *(Optional, with `httpx[cli]`)*
 * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)*
+* `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)*
 
 A huge amount of credit is due to `requests` for the API layout that
 much of this work follows, as well as to `urllib3` for plenty of design
index 86b6d1cbaa1140840c1d3eea1a11357741a78c22..387e85047c0b4d596b15cc7690d92523b2f19d5c 100644 (file)
@@ -119,6 +119,7 @@ As well as these optional installs:
 * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)*
 * `click` - Command line client support. *(Optional, with `httpx[cli]`)*
 * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)*
+* `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)*
 
 A huge amount of credit is due to `requests` for the API layout that
 much of this work follows, as well as to `urllib3` for plenty of design
@@ -138,10 +139,10 @@ Or, to include the optional HTTP/2 support, use:
 $ pip install httpx[http2]
 ```
 
-To include the optional brotli decoder support, use:
+To include the optional brotli and zstandard decoders support, use:
 
 ```shell
-$ pip install httpx[brotli]
+$ pip install httpx[brotli,zstd]
 ```
 
 HTTPX requires Python 3.8+
index 974119f72cfce2e7a38864cd15366eb604d7f3c0..aa203a8336f85ad865871a89aa7864c7bee82c56 100644 (file)
@@ -100,7 +100,8 @@ b'<!doctype html>\n<html>\n<head>\n<title>Example Domain</title>...'
 
 Any `gzip` and `deflate` HTTP response encodings will automatically
 be decoded for you. If `brotlipy` is installed, then the `brotli` response
-encoding will also be supported.
+encoding will be supported. If `zstandard` is installed, then `zstd`
+response encodings will also be supported.
 
 For example, to create an image from binary data returned by a request, you can use the following code:
 
@@ -362,7 +363,8 @@ Or stream the text, on a line-by-line basis...
 
 HTTPX will use universal line endings, normalising all cases to `\n`.
 
-In some cases you might want to access the raw bytes on the response without applying any HTTP content decoding. In this case any content encoding that the web server has applied such as `gzip`, `deflate`, or `brotli` will not be automatically decoded.
+In some cases you might want to access the raw bytes on the response without applying any HTTP content decoding. In this case any content encoding that the web server has applied such as `gzip`, `deflate`, `brotli`, or `zstd` will
+not be automatically decoded.
 
 ```pycon
 >>> with httpx.stream("GET", "https://www.example.com") as r:
index 27ccc6827333cbf91584a4317e0b0c21c31a73d3..7d86dced46fda60999913267bf746edf5922d546 100644 (file)
@@ -3,8 +3,11 @@ The _compat module is used for code which requires branching between different
 Python environments. It is excluded from the code coverage checks.
 """
 
+import re
 import ssl
 import sys
+from types import ModuleType
+from typing import Optional
 
 # Brotli support is optional
 # The C bindings in `brotli` are recommended for CPython.
@@ -17,6 +20,24 @@ except ImportError:  # pragma: no cover
     except ImportError:
         brotli = None
 
+# Zstandard support is optional
+zstd: Optional[ModuleType] = None
+try:
+    import zstandard as zstd
+except (AttributeError, ImportError, ValueError):  # Defensive:
+    zstd = None
+else:
+    # The package 'zstandard' added the 'eof' property starting
+    # in v0.18.0 which we require to ensure a complete and
+    # valid zstd stream was fed into the ZstdDecoder.
+    # See: https://github.com/urllib3/urllib3/pull/2624
+    _zstd_version = tuple(
+        map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups())  # type: ignore[union-attr]
+    )
+    if _zstd_version < (0, 18):  # Defensive:
+        zstd = None
+
+
 if sys.version_info >= (3, 10) or ssl.OPENSSL_VERSION_INFO >= (1, 1, 0, 7):
 
     def set_minimum_tls_version_1_2(context: ssl.SSLContext) -> None:
index f9d3adbb5d5c3caa4eb88b4f1c3711fa84cfe40d..62f2c0b911a405fcc5510b31d51d8f5504cbeb0d 100644 (file)
@@ -11,7 +11,7 @@ import io
 import typing
 import zlib
 
-from ._compat import brotli
+from ._compat import brotli, zstd
 from ._exceptions import DecodingError
 
 
@@ -140,6 +140,44 @@ class BrotliDecoder(ContentDecoder):
             raise DecodingError(str(exc)) from exc
 
 
+class ZStandardDecoder(ContentDecoder):
+    """
+    Handle 'zstd' RFC 8878 decoding.
+
+    Requires `pip install zstandard`.
+    Can be installed as a dependency of httpx using `pip install httpx[zstd]`.
+    """
+
+    # inspired by the ZstdDecoder implementation in urllib3
+    def __init__(self) -> None:
+        if zstd is None:  # pragma: no cover
+            raise ImportError(
+                "Using 'ZStandardDecoder', ..."
+                "Make sure to install httpx using `pip install httpx[zstd]`."
+            ) from None
+
+        self.decompressor = zstd.ZstdDecompressor().decompressobj()
+
+    def decode(self, data: bytes) -> bytes:
+        assert zstd is not None
+        output = io.BytesIO()
+        try:
+            output.write(self.decompressor.decompress(data))
+            while self.decompressor.eof and self.decompressor.unused_data:
+                unused_data = self.decompressor.unused_data
+                self.decompressor = zstd.ZstdDecompressor().decompressobj()
+                output.write(self.decompressor.decompress(unused_data))
+        except zstd.ZstdError as exc:
+            raise DecodingError(str(exc)) from exc
+        return output.getvalue()
+
+    def flush(self) -> bytes:
+        ret = self.decompressor.flush()  # note: this is a no-op
+        if not self.decompressor.eof:
+            raise DecodingError("Zstandard data is incomplete")  # pragma: no cover
+        return bytes(ret)
+
+
 class MultiDecoder(ContentDecoder):
     """
     Handle the case where multiple encodings have been applied.
@@ -323,8 +361,11 @@ SUPPORTED_DECODERS = {
     "gzip": GZipDecoder,
     "deflate": DeflateDecoder,
     "br": BrotliDecoder,
+    "zstd": ZStandardDecoder,
 }
 
 
 if brotli is None:
     SUPPORTED_DECODERS.pop("br")  # pragma: no cover
+if zstd is None:
+    SUPPORTED_DECODERS.pop("zstd")  # pragma: no cover
index 92b393a233fbacc4f5499ed31658010a9b40500a..01d9583bc5edd0e16fa1e8eadddd32e6039d762f 100644 (file)
@@ -818,7 +818,7 @@ class Response:
     def iter_bytes(self, chunk_size: int | None = None) -> typing.Iterator[bytes]:
         """
         A byte-iterator over the decoded response content.
-        This allows us to handle gzip, deflate, and brotli encoded responses.
+        This allows us to handle gzip, deflate, brotli, and zstd encoded responses.
         """
         if hasattr(self, "_content"):
             chunk_size = len(self._content) if chunk_size is None else chunk_size
@@ -918,7 +918,7 @@ class Response:
     ) -> typing.AsyncIterator[bytes]:
         """
         A byte-iterator over the decoded response content.
-        This allows us to handle gzip, deflate, and brotli encoded responses.
+        This allows us to handle gzip, deflate, brotli, and zstd encoded responses.
         """
         if hasattr(self, "_content"):
             chunk_size = len(self._content) if chunk_size is None else chunk_size
index 9e6464c23a87a7dc1b925dea4842ef7254371ca4..c4c188052e61c216972258bbe51126c97348785d 100644 (file)
@@ -52,6 +52,9 @@ http2 = [
 socks = [
     "socksio==1.*",
 ]
+zstd = [
+  "zstandard>=0.18.0",
+]
 
 [project.scripts]
 httpx = "httpx:main"
index b9c9588d153ca2262ab47ffa7a3afee06824c175..3e73fbdbd3ca4e22e7c5c363c73765e1166ac651 100644 (file)
@@ -2,7 +2,7 @@
 # On the other hand, we're not pinning package dependencies, because our tests
 # needs to pass with the latest version of the packages.
 # Reference: https://github.com/encode/httpx/pull/1721#discussion_r661241588
--e .[brotli,cli,http2,socks]
+-e .[brotli,cli,http2,socks,zstd]
 
 # Optional charset auto-detection
 # Used in our test cases
index 2951e01b8a5259e634ccd7291dbdaa750e828b9b..657839018ab3ded203937f970eeeb23f26561775 100644 (file)
@@ -357,7 +357,7 @@ def test_raw_client_header():
     assert response.json() == [
         ["Host", "example.org"],
         ["Accept", "*/*"],
-        ["Accept-Encoding", "gzip, deflate, br"],
+        ["Accept-Encoding", "gzip, deflate, br, zstd"],
         ["Connection", "keep-alive"],
         ["User-Agent", f"python-httpx/{httpx.__version__}"],
         ["Example-Header", "example-value"],
index 6604dd31a36b4e800816436ddcbb1081692db82f..78fb0484e67783fd3844c059d01203a8e579eefd 100644 (file)
@@ -36,7 +36,7 @@ def test_event_hooks():
                 "host": "127.0.0.1:8000",
                 "user-agent": f"python-httpx/{httpx.__version__}",
                 "accept": "*/*",
-                "accept-encoding": "gzip, deflate, br",
+                "accept-encoding": "gzip, deflate, br, zstd",
                 "connection": "keep-alive",
                 "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
             },
@@ -87,7 +87,7 @@ async def test_async_event_hooks():
                 "host": "127.0.0.1:8000",
                 "user-agent": f"python-httpx/{httpx.__version__}",
                 "accept": "*/*",
-                "accept-encoding": "gzip, deflate, br",
+                "accept-encoding": "gzip, deflate, br, zstd",
                 "connection": "keep-alive",
                 "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
             },
@@ -144,7 +144,7 @@ def test_event_hooks_with_redirect():
                 "host": "127.0.0.1:8000",
                 "user-agent": f"python-httpx/{httpx.__version__}",
                 "accept": "*/*",
-                "accept-encoding": "gzip, deflate, br",
+                "accept-encoding": "gzip, deflate, br, zstd",
                 "connection": "keep-alive",
                 "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
             },
@@ -159,7 +159,7 @@ def test_event_hooks_with_redirect():
                 "host": "127.0.0.1:8000",
                 "user-agent": f"python-httpx/{httpx.__version__}",
                 "accept": "*/*",
-                "accept-encoding": "gzip, deflate, br",
+                "accept-encoding": "gzip, deflate, br, zstd",
                 "connection": "keep-alive",
                 "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
             },
@@ -201,7 +201,7 @@ async def test_async_event_hooks_with_redirect():
                 "host": "127.0.0.1:8000",
                 "user-agent": f"python-httpx/{httpx.__version__}",
                 "accept": "*/*",
-                "accept-encoding": "gzip, deflate, br",
+                "accept-encoding": "gzip, deflate, br, zstd",
                 "connection": "keep-alive",
                 "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
             },
@@ -216,7 +216,7 @@ async def test_async_event_hooks_with_redirect():
                 "host": "127.0.0.1:8000",
                 "user-agent": f"python-httpx/{httpx.__version__}",
                 "accept": "*/*",
-                "accept-encoding": "gzip, deflate, br",
+                "accept-encoding": "gzip, deflate, br, zstd",
                 "connection": "keep-alive",
                 "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=",
             },
index 264ca0bd67004541b85545d9aac19c0895a4f332..c51e40c3356103708706ea1cc90d9fb13df964d7 100755 (executable)
@@ -34,7 +34,7 @@ def test_client_header():
     assert response.json() == {
         "headers": {
             "accept": "*/*",
-            "accept-encoding": "gzip, deflate, br",
+            "accept-encoding": "gzip, deflate, br, zstd",
             "connection": "keep-alive",
             "example-header": "example-value",
             "host": "example.org",
@@ -56,7 +56,7 @@ def test_header_merge():
     assert response.json() == {
         "headers": {
             "accept": "*/*",
-            "accept-encoding": "gzip, deflate, br",
+            "accept-encoding": "gzip, deflate, br, zstd",
             "connection": "keep-alive",
             "host": "example.org",
             "user-agent": "python-myclient/0.2.1",
@@ -78,7 +78,7 @@ def test_header_merge_conflicting_headers():
     assert response.json() == {
         "headers": {
             "accept": "*/*",
-            "accept-encoding": "gzip, deflate, br",
+            "accept-encoding": "gzip, deflate, br, zstd",
             "connection": "keep-alive",
             "host": "example.org",
             "user-agent": f"python-httpx/{httpx.__version__}",
@@ -100,7 +100,7 @@ def test_header_update():
     assert first_response.json() == {
         "headers": {
             "accept": "*/*",
-            "accept-encoding": "gzip, deflate, br",
+            "accept-encoding": "gzip, deflate, br, zstd",
             "connection": "keep-alive",
             "host": "example.org",
             "user-agent": f"python-httpx/{httpx.__version__}",
@@ -111,7 +111,7 @@ def test_header_update():
     assert second_response.json() == {
         "headers": {
             "accept": "*/*",
-            "accept-encoding": "gzip, deflate, br",
+            "accept-encoding": "gzip, deflate, br, zstd",
             "another-header": "AThing",
             "connection": "keep-alive",
             "host": "example.org",
@@ -164,7 +164,7 @@ def test_remove_default_header():
     assert response.json() == {
         "headers": {
             "accept": "*/*",
-            "accept-encoding": "gzip, deflate, br",
+            "accept-encoding": "gzip, deflate, br, zstd",
             "connection": "keep-alive",
             "host": "example.org",
         }
@@ -192,7 +192,7 @@ def test_host_with_auth_and_port_in_url():
     assert response.json() == {
         "headers": {
             "accept": "*/*",
-            "accept-encoding": "gzip, deflate, br",
+            "accept-encoding": "gzip, deflate, br, zstd",
             "connection": "keep-alive",
             "host": "example.org",
             "user-agent": f"python-httpx/{httpx.__version__}",
@@ -215,7 +215,7 @@ def test_host_with_non_default_port_in_url():
     assert response.json() == {
         "headers": {
             "accept": "*/*",
-            "accept-encoding": "gzip, deflate, br",
+            "accept-encoding": "gzip, deflate, br, zstd",
             "connection": "keep-alive",
             "host": "example.org:123",
             "user-agent": f"python-httpx/{httpx.__version__}",
index ccc55266787e9b608293f8a4b913769e5947f9ea..8b817891e438a46c2f323bc307d6c3fe0b4dcc26 100644 (file)
@@ -157,7 +157,7 @@ async def test_asgi_headers():
         "headers": [
             ["host", "www.example.org"],
             ["accept", "*/*"],
-            ["accept-encoding", "gzip, deflate, br"],
+            ["accept-encoding", "gzip, deflate, br, zstd"],
             ["connection", "keep-alive"],
             ["user-agent", f"python-httpx/{httpx.__version__}"],
         ]
index 73644e04e658efbb1718fc1c44d994de2cb22564..bcbb18bb0ebc74dba17cde25058268ec5419769e 100644 (file)
@@ -1,10 +1,12 @@
 from __future__ import annotations
 
+import io
 import typing
 import zlib
 
 import chardet
 import pytest
+import zstandard as zstd
 
 import httpx
 
@@ -73,6 +75,53 @@ def test_brotli():
     assert response.content == body
 
 
+def test_zstd():
+    body = b"test 123"
+    compressed_body = zstd.compress(body)
+
+    headers = [(b"Content-Encoding", b"zstd")]
+    response = httpx.Response(
+        200,
+        headers=headers,
+        content=compressed_body,
+    )
+    assert response.content == body
+
+
+def test_zstd_decoding_error():
+    compressed_body = "this_is_not_zstd_compressed_data"
+
+    headers = [(b"Content-Encoding", b"zstd")]
+    with pytest.raises(httpx.DecodingError):
+        httpx.Response(
+            200,
+            headers=headers,
+            content=compressed_body,
+        )
+
+
+def test_zstd_multiframe():
+    # test inspired by urllib3 test suite
+    data = (
+        # Zstandard frame
+        zstd.compress(b"foo")
+        # skippable frame (must be ignored)
+        + bytes.fromhex(
+            "50 2A 4D 18"  # Magic_Number (little-endian)
+            "07 00 00 00"  # Frame_Size (little-endian)
+            "00 00 00 00 00 00 00"  # User_Data
+        )
+        # Zstandard frame
+        + zstd.compress(b"bar")
+    )
+    compressed_body = io.BytesIO(data)
+
+    headers = [(b"Content-Encoding", b"zstd")]
+    response = httpx.Response(200, headers=headers, content=compressed_body)
+    response.read()
+    assert response.content == b"foobar"
+
+
 def test_multi():
     body = b"test 123"
 
index 67eeb0d22849f73a903b0290bc8741092041ad7a..feb796e155ce1343b32632c8dfbe4d03ebafff3e 100644 (file)
@@ -129,7 +129,7 @@ def test_verbose(server):
         "GET / HTTP/1.1",
         f"Host: {server.url.netloc.decode('ascii')}",
         "Accept: */*",
-        "Accept-Encoding: gzip, deflate, br",
+        "Accept-Encoding: gzip, deflate, br, zstd",
         "Connection: keep-alive",
         f"User-Agent: python-httpx/{httpx.__version__}",
         "",
@@ -154,7 +154,7 @@ def test_auth(server):
         "GET / HTTP/1.1",
         f"Host: {server.url.netloc.decode('ascii')}",
         "Accept: */*",
-        "Accept-Encoding: gzip, deflate, br",
+        "Accept-Encoding: gzip, deflate, br, zstd",
         "Connection: keep-alive",
         f"User-Agent: python-httpx/{httpx.__version__}",
         "Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=",