From: cdeler Date: Thu, 10 Sep 2020 09:16:00 +0000 (+0300) Subject: Add progress to streaming download (#1268) X-Git-Tag: 0.15.0~26 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ed16eb3a3def7c9a4759fec5f4ada537bd5d1cad;p=thirdparty%2Fhttpx.git Add progress to streaming download (#1268) * Added last_raw_chunk_size to the Response object (#1208) * Added example with progress bar (#1208) Co-authored-by: Florimond Manca * Apply suggestions from code review Co-authored-by: Florimond Manca * PR review Changed last_raw_chunk_size to num_bytes_downloaded ; Edited the example according to documentaion * Update docs/advanced.md Co-authored-by: Florimond Manca * Update docs/advanced.md Co-authored-by: Florimond Manca * Update docs/advanced.md * Update docs/advanced.md Co-authored-by: Florimond Manca Co-authored-by: Tom Christie --- diff --git a/docs/advanced.md b/docs/advanced.md index 0f0b2ddf..d9a43354 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -221,6 +221,34 @@ with httpx.Client(headers=headers) as client: ... ``` +## Monitoring download progress + +If you need to monitor download progress of large responses, you can use response streaming and inspect the `response.num_bytes_downloaded` property. + +This interface is required for properly determining download progress, because the total number of bytes returned by `response.content` or `response.iter_content()` will not always correspond with the raw content length of the response if HTTP response compression is being used. + +For example, showing a progress bar using the [`tqdm`](https://github.com/tqdm/tqdm) library while a response is being downloaded could be done like this… + +```python +import tempfile + +import httpx +from tqdm import tqdm + +with tempfile.NamedTemporaryFile() as download_file: + url = "https://speed.hetzner.de/100MB.bin" + with httpx.stream("GET", url) as response: + total = int(response.headers["Content-Length"]) + + with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress: + num_bytes_downloaded = response.num_bytes_downloaded + for chunk in response.iter_bytes(): + download_file.write(chunk) + progress.update(response.num_bytes_downloaded - num_bytes_downloaded) + num_bytes_downloaded = response.num_bytes_downloaded + print(f"The total download size is {response.num_bytes_downloaded} bytes") +``` + ## .netrc Support HTTPX supports .netrc file. In `trust_env=True` cases, if auth parameter is diff --git a/httpx/_models.py b/httpx/_models.py index 713281e6..65db9ae8 100644 --- a/httpx/_models.py +++ b/httpx/_models.py @@ -697,6 +697,8 @@ class Response: self._raw_stream = ByteStream(body=content or b"") self.read() + self._num_bytes_downloaded = 0 + @property def elapsed(self) -> datetime.timedelta: """ @@ -885,6 +887,10 @@ class Response: ldict[key] = link return ldict + @property + def num_bytes_downloaded(self) -> int: + return self._num_bytes_downloaded + def __repr__(self) -> str: return f"" @@ -951,8 +957,10 @@ class Response: raise ResponseClosed() self.is_stream_consumed = True + self._num_bytes_downloaded = 0 with map_exceptions(HTTPCORE_EXC_MAP, request=self._request): for part in self._raw_stream: + self._num_bytes_downloaded += len(part) yield part self.close() @@ -1032,8 +1040,10 @@ class Response: raise ResponseClosed() self.is_stream_consumed = True + self._num_bytes_downloaded = 0 with map_exceptions(HTTPCORE_EXC_MAP, request=self._request): async for part in self._raw_stream: + self._num_bytes_downloaded += len(part) yield part await self.aclose() diff --git a/tests/models/test_responses.py b/tests/models/test_responses.py index 2b07a270..30d60008 100644 --- a/tests/models/test_responses.py +++ b/tests/models/test_responses.py @@ -227,6 +227,20 @@ def test_iter_raw(): assert raw == b"Hello, world!" +def test_iter_raw_increments_updates_counter(): + stream = IteratorStream(iterator=streaming_body()) + + response = httpx.Response( + 200, + stream=stream, + ) + + num_downloaded = response.num_bytes_downloaded + for part in response.iter_raw(): + assert len(part) == (response.num_bytes_downloaded - num_downloaded) + num_downloaded = response.num_bytes_downloaded + + @pytest.mark.asyncio async def test_aiter_raw(): stream = AsyncIteratorStream(aiterator=async_streaming_body()) @@ -241,6 +255,21 @@ async def test_aiter_raw(): assert raw == b"Hello, world!" +@pytest.mark.asyncio +async def test_aiter_raw_increments_updates_counter(): + stream = AsyncIteratorStream(aiterator=async_streaming_body()) + + response = httpx.Response( + 200, + stream=stream, + ) + + num_downloaded = response.num_bytes_downloaded + async for part in response.aiter_raw(): + assert len(part) == (response.num_bytes_downloaded - num_downloaded) + num_downloaded = response.num_bytes_downloaded + + def test_iter_bytes(): response = httpx.Response( 200,