From: Michiel W. Beijen Date: Thu, 21 Mar 2024 10:17:15 +0000 (+0100) Subject: Add support for zstd decoding (#3139) X-Git-Tag: 0.27.1~19 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=392dbe45f086d0877bd288c5d68abf860653b680;p=thirdparty%2Fhttpx.git Add support for zstd decoding (#3139) This adds support for zstd decoding using the python package zstandard. This is similar to how it is implemented in urllib3. I also chose the optional installation option httpx[zstd] to mimic the same option in urllib3. zstd decoding is similar to brotli, but in benchmarks it is supposed to be even faster. The zstd compression is described in RFC 8878. See https://github.com/encode/httpx/discussions/1986 Co-authored-by: Kamil Monicz --- diff --git a/CHANGELOG.md b/CHANGELOG.md index 85d3bcec..18ded9d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased +## Added + +* Support for `zstd` content decoding using the python `zstandard` package is added. Installable using `httpx[zstd]`. (#3139) + ### Fixed * Fix `app` type signature in `ASGITransport`. (#3109) diff --git a/README.md b/README.md index 62fb295d..bcba1bb7 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,7 @@ As well as these optional installs: * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)* * `click` - Command line client support. *(Optional, with `httpx[cli]`)* * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)* +* `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)* A huge amount of credit is due to `requests` for the API layout that much of this work follows, as well as to `urllib3` for plenty of design diff --git a/docs/index.md b/docs/index.md index 86b6d1cb..387e8504 100644 --- a/docs/index.md +++ b/docs/index.md @@ -119,6 +119,7 @@ As well as these optional installs: * `rich` - Rich terminal support. *(Optional, with `httpx[cli]`)* * `click` - Command line client support. *(Optional, with `httpx[cli]`)* * `brotli` or `brotlicffi` - Decoding for "brotli" compressed responses. *(Optional, with `httpx[brotli]`)* +* `zstandard` - Decoding for "zstd" compressed responses. *(Optional, with `httpx[zstd]`)* A huge amount of credit is due to `requests` for the API layout that much of this work follows, as well as to `urllib3` for plenty of design @@ -138,10 +139,10 @@ Or, to include the optional HTTP/2 support, use: $ pip install httpx[http2] ``` -To include the optional brotli decoder support, use: +To include the optional brotli and zstandard decoders support, use: ```shell -$ pip install httpx[brotli] +$ pip install httpx[brotli,zstd] ``` HTTPX requires Python 3.8+ diff --git a/docs/quickstart.md b/docs/quickstart.md index 974119f7..aa203a83 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -100,7 +100,8 @@ b'\n\n\nExample Domain...' Any `gzip` and `deflate` HTTP response encodings will automatically be decoded for you. If `brotlipy` is installed, then the `brotli` response -encoding will also be supported. +encoding will be supported. If `zstandard` is installed, then `zstd` +response encodings will also be supported. For example, to create an image from binary data returned by a request, you can use the following code: @@ -362,7 +363,8 @@ Or stream the text, on a line-by-line basis... HTTPX will use universal line endings, normalising all cases to `\n`. -In some cases you might want to access the raw bytes on the response without applying any HTTP content decoding. In this case any content encoding that the web server has applied such as `gzip`, `deflate`, or `brotli` will not be automatically decoded. +In some cases you might want to access the raw bytes on the response without applying any HTTP content decoding. In this case any content encoding that the web server has applied such as `gzip`, `deflate`, `brotli`, or `zstd` will +not be automatically decoded. ```pycon >>> with httpx.stream("GET", "https://www.example.com") as r: diff --git a/httpx/_compat.py b/httpx/_compat.py index 27ccc682..7d86dced 100644 --- a/httpx/_compat.py +++ b/httpx/_compat.py @@ -3,8 +3,11 @@ The _compat module is used for code which requires branching between different Python environments. It is excluded from the code coverage checks. """ +import re import ssl import sys +from types import ModuleType +from typing import Optional # Brotli support is optional # The C bindings in `brotli` are recommended for CPython. @@ -17,6 +20,24 @@ except ImportError: # pragma: no cover except ImportError: brotli = None +# Zstandard support is optional +zstd: Optional[ModuleType] = None +try: + import zstandard as zstd +except (AttributeError, ImportError, ValueError): # Defensive: + zstd = None +else: + # The package 'zstandard' added the 'eof' property starting + # in v0.18.0 which we require to ensure a complete and + # valid zstd stream was fed into the ZstdDecoder. + # See: https://github.com/urllib3/urllib3/pull/2624 + _zstd_version = tuple( + map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] + ) + if _zstd_version < (0, 18): # Defensive: + zstd = None + + if sys.version_info >= (3, 10) or ssl.OPENSSL_VERSION_INFO >= (1, 1, 0, 7): def set_minimum_tls_version_1_2(context: ssl.SSLContext) -> None: diff --git a/httpx/_decoders.py b/httpx/_decoders.py index f9d3adbb..62f2c0b9 100644 --- a/httpx/_decoders.py +++ b/httpx/_decoders.py @@ -11,7 +11,7 @@ import io import typing import zlib -from ._compat import brotli +from ._compat import brotli, zstd from ._exceptions import DecodingError @@ -140,6 +140,44 @@ class BrotliDecoder(ContentDecoder): raise DecodingError(str(exc)) from exc +class ZStandardDecoder(ContentDecoder): + """ + Handle 'zstd' RFC 8878 decoding. + + Requires `pip install zstandard`. + Can be installed as a dependency of httpx using `pip install httpx[zstd]`. + """ + + # inspired by the ZstdDecoder implementation in urllib3 + def __init__(self) -> None: + if zstd is None: # pragma: no cover + raise ImportError( + "Using 'ZStandardDecoder', ..." + "Make sure to install httpx using `pip install httpx[zstd]`." + ) from None + + self.decompressor = zstd.ZstdDecompressor().decompressobj() + + def decode(self, data: bytes) -> bytes: + assert zstd is not None + output = io.BytesIO() + try: + output.write(self.decompressor.decompress(data)) + while self.decompressor.eof and self.decompressor.unused_data: + unused_data = self.decompressor.unused_data + self.decompressor = zstd.ZstdDecompressor().decompressobj() + output.write(self.decompressor.decompress(unused_data)) + except zstd.ZstdError as exc: + raise DecodingError(str(exc)) from exc + return output.getvalue() + + def flush(self) -> bytes: + ret = self.decompressor.flush() # note: this is a no-op + if not self.decompressor.eof: + raise DecodingError("Zstandard data is incomplete") # pragma: no cover + return bytes(ret) + + class MultiDecoder(ContentDecoder): """ Handle the case where multiple encodings have been applied. @@ -323,8 +361,11 @@ SUPPORTED_DECODERS = { "gzip": GZipDecoder, "deflate": DeflateDecoder, "br": BrotliDecoder, + "zstd": ZStandardDecoder, } if brotli is None: SUPPORTED_DECODERS.pop("br") # pragma: no cover +if zstd is None: + SUPPORTED_DECODERS.pop("zstd") # pragma: no cover diff --git a/httpx/_models.py b/httpx/_models.py index 92b393a2..01d9583b 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -818,7 +818,7 @@ class Response: def iter_bytes(self, chunk_size: int | None = None) -> typing.Iterator[bytes]: """ A byte-iterator over the decoded response content. - This allows us to handle gzip, deflate, and brotli encoded responses. + This allows us to handle gzip, deflate, brotli, and zstd encoded responses. """ if hasattr(self, "_content"): chunk_size = len(self._content) if chunk_size is None else chunk_size @@ -918,7 +918,7 @@ class Response: ) -> typing.AsyncIterator[bytes]: """ A byte-iterator over the decoded response content. - This allows us to handle gzip, deflate, and brotli encoded responses. + This allows us to handle gzip, deflate, brotli, and zstd encoded responses. """ if hasattr(self, "_content"): chunk_size = len(self._content) if chunk_size is None else chunk_size diff --git a/pyproject.toml b/pyproject.toml index 9e6464c2..c4c18805 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,9 @@ http2 = [ socks = [ "socksio==1.*", ] +zstd = [ + "zstandard>=0.18.0", +] [project.scripts] httpx = "httpx:main" diff --git a/requirements.txt b/requirements.txt index b9c9588d..3e73fbdb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # On the other hand, we're not pinning package dependencies, because our tests # needs to pass with the latest version of the packages. # Reference: https://github.com/encode/httpx/pull/1721#discussion_r661241588 --e .[brotli,cli,http2,socks] +-e .[brotli,cli,http2,socks,zstd] # Optional charset auto-detection # Used in our test cases diff --git a/tests/client/test_client.py b/tests/client/test_client.py index 2951e01b..65783901 100644 --- a/tests/client/test_client.py +++ b/tests/client/test_client.py @@ -357,7 +357,7 @@ def test_raw_client_header(): assert response.json() == [ ["Host", "example.org"], ["Accept", "*/*"], - ["Accept-Encoding", "gzip, deflate, br"], + ["Accept-Encoding", "gzip, deflate, br, zstd"], ["Connection", "keep-alive"], ["User-Agent", f"python-httpx/{httpx.__version__}"], ["Example-Header", "example-value"], diff --git a/tests/client/test_event_hooks.py b/tests/client/test_event_hooks.py index 6604dd31..78fb0484 100644 --- a/tests/client/test_event_hooks.py +++ b/tests/client/test_event_hooks.py @@ -36,7 +36,7 @@ def test_event_hooks(): "host": "127.0.0.1:8000", "user-agent": f"python-httpx/{httpx.__version__}", "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=", }, @@ -87,7 +87,7 @@ async def test_async_event_hooks(): "host": "127.0.0.1:8000", "user-agent": f"python-httpx/{httpx.__version__}", "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=", }, @@ -144,7 +144,7 @@ def test_event_hooks_with_redirect(): "host": "127.0.0.1:8000", "user-agent": f"python-httpx/{httpx.__version__}", "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=", }, @@ -159,7 +159,7 @@ def test_event_hooks_with_redirect(): "host": "127.0.0.1:8000", "user-agent": f"python-httpx/{httpx.__version__}", "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=", }, @@ -201,7 +201,7 @@ async def test_async_event_hooks_with_redirect(): "host": "127.0.0.1:8000", "user-agent": f"python-httpx/{httpx.__version__}", "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=", }, @@ -216,7 +216,7 @@ async def test_async_event_hooks_with_redirect(): "host": "127.0.0.1:8000", "user-agent": f"python-httpx/{httpx.__version__}", "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "authorization": "Basic dXNlcm5hbWU6cGFzc3dvcmQ=", }, diff --git a/tests/client/test_headers.py b/tests/client/test_headers.py index 264ca0bd..c51e40c3 100755 --- a/tests/client/test_headers.py +++ b/tests/client/test_headers.py @@ -34,7 +34,7 @@ def test_client_header(): assert response.json() == { "headers": { "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "example-header": "example-value", "host": "example.org", @@ -56,7 +56,7 @@ def test_header_merge(): assert response.json() == { "headers": { "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "host": "example.org", "user-agent": "python-myclient/0.2.1", @@ -78,7 +78,7 @@ def test_header_merge_conflicting_headers(): assert response.json() == { "headers": { "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "host": "example.org", "user-agent": f"python-httpx/{httpx.__version__}", @@ -100,7 +100,7 @@ def test_header_update(): assert first_response.json() == { "headers": { "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "host": "example.org", "user-agent": f"python-httpx/{httpx.__version__}", @@ -111,7 +111,7 @@ def test_header_update(): assert second_response.json() == { "headers": { "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "another-header": "AThing", "connection": "keep-alive", "host": "example.org", @@ -164,7 +164,7 @@ def test_remove_default_header(): assert response.json() == { "headers": { "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "host": "example.org", } @@ -192,7 +192,7 @@ def test_host_with_auth_and_port_in_url(): assert response.json() == { "headers": { "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "host": "example.org", "user-agent": f"python-httpx/{httpx.__version__}", @@ -215,7 +215,7 @@ def test_host_with_non_default_port_in_url(): assert response.json() == { "headers": { "accept": "*/*", - "accept-encoding": "gzip, deflate, br", + "accept-encoding": "gzip, deflate, br, zstd", "connection": "keep-alive", "host": "example.org:123", "user-agent": f"python-httpx/{httpx.__version__}", diff --git a/tests/test_asgi.py b/tests/test_asgi.py index ccc55266..8b817891 100644 --- a/tests/test_asgi.py +++ b/tests/test_asgi.py @@ -157,7 +157,7 @@ async def test_asgi_headers(): "headers": [ ["host", "www.example.org"], ["accept", "*/*"], - ["accept-encoding", "gzip, deflate, br"], + ["accept-encoding", "gzip, deflate, br, zstd"], ["connection", "keep-alive"], ["user-agent", f"python-httpx/{httpx.__version__}"], ] diff --git a/tests/test_decoders.py b/tests/test_decoders.py index 73644e04..bcbb18bb 100644 --- a/tests/test_decoders.py +++ b/tests/test_decoders.py @@ -1,10 +1,12 @@ from __future__ import annotations +import io import typing import zlib import chardet import pytest +import zstandard as zstd import httpx @@ -73,6 +75,53 @@ def test_brotli(): assert response.content == body +def test_zstd(): + body = b"test 123" + compressed_body = zstd.compress(body) + + headers = [(b"Content-Encoding", b"zstd")] + response = httpx.Response( + 200, + headers=headers, + content=compressed_body, + ) + assert response.content == body + + +def test_zstd_decoding_error(): + compressed_body = "this_is_not_zstd_compressed_data" + + headers = [(b"Content-Encoding", b"zstd")] + with pytest.raises(httpx.DecodingError): + httpx.Response( + 200, + headers=headers, + content=compressed_body, + ) + + +def test_zstd_multiframe(): + # test inspired by urllib3 test suite + data = ( + # Zstandard frame + zstd.compress(b"foo") + # skippable frame (must be ignored) + + bytes.fromhex( + "50 2A 4D 18" # Magic_Number (little-endian) + "07 00 00 00" # Frame_Size (little-endian) + "00 00 00 00 00 00 00" # User_Data + ) + # Zstandard frame + + zstd.compress(b"bar") + ) + compressed_body = io.BytesIO(data) + + headers = [(b"Content-Encoding", b"zstd")] + response = httpx.Response(200, headers=headers, content=compressed_body) + response.read() + assert response.content == b"foobar" + + def test_multi(): body = b"test 123" diff --git a/tests/test_main.py b/tests/test_main.py index 67eeb0d2..feb796e1 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -129,7 +129,7 @@ def test_verbose(server): "GET / HTTP/1.1", f"Host: {server.url.netloc.decode('ascii')}", "Accept: */*", - "Accept-Encoding: gzip, deflate, br", + "Accept-Encoding: gzip, deflate, br, zstd", "Connection: keep-alive", f"User-Agent: python-httpx/{httpx.__version__}", "", @@ -154,7 +154,7 @@ def test_auth(server): "GET / HTTP/1.1", f"Host: {server.url.netloc.decode('ascii')}", "Accept: */*", - "Accept-Encoding: gzip, deflate, br", + "Accept-Encoding: gzip, deflate, br, zstd", "Connection: keep-alive", f"User-Agent: python-httpx/{httpx.__version__}", "Authorization: Basic dXNlcm5hbWU6cGFzc3dvcmQ=",