]> git.ipfire.org Git - thirdparty/httpx.git/commitdiff
Add progress to streaming download (#1268)
authorcdeler <serj.krotov@gmail.com>
Thu, 10 Sep 2020 09:16:00 +0000 (12:16 +0300)
committerGitHub <noreply@github.com>
Thu, 10 Sep 2020 09:16:00 +0000 (12:16 +0300)
* Added last_raw_chunk_size to the Response object (#1208)

* Added example with progress bar (#1208)

Co-authored-by: Florimond Manca <florimond.manca@gmail.com>
* Apply suggestions from code review

Co-authored-by: Florimond Manca <florimond.manca@gmail.com>
* PR review
Changed last_raw_chunk_size to num_bytes_downloaded ;
Edited the example according to documentaion

* Update docs/advanced.md

Co-authored-by: Florimond Manca <florimond.manca@gmail.com>
* Update docs/advanced.md

Co-authored-by: Florimond Manca <florimond.manca@gmail.com>
* Update docs/advanced.md

* Update docs/advanced.md

Co-authored-by: Florimond Manca <florimond.manca@gmail.com>
Co-authored-by: Tom Christie <tom@tomchristie.com>
docs/advanced.md
httpx/_models.py
tests/models/test_responses.py

index 0f0b2ddf72a531c9d3ad974c88eea0ac3249b677..d9a43354fa760d0e4123b2b032e6f58a61796ab5 100644 (file)
@@ -221,6 +221,34 @@ with httpx.Client(headers=headers) as client:
     ...
 ```
 
+## Monitoring download progress
+
+If you need to monitor download progress of large responses, you can use response streaming and inspect the `response.num_bytes_downloaded` property.
+
+This interface is required for properly determining download progress, because the total number of bytes returned by `response.content` or `response.iter_content()` will not always correspond with the raw content length of the response if HTTP response compression is being used.
+
+For example, showing a progress bar using the [`tqdm`](https://github.com/tqdm/tqdm) library while a response is being downloaded could be done like this…
+
+```python
+import tempfile
+
+import httpx
+from tqdm import tqdm
+
+with tempfile.NamedTemporaryFile() as download_file:
+    url = "https://speed.hetzner.de/100MB.bin"
+    with httpx.stream("GET", url) as response:
+        total = int(response.headers["Content-Length"])
+
+        with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress:
+            num_bytes_downloaded = response.num_bytes_downloaded
+            for chunk in response.iter_bytes():
+                download_file.write(chunk)
+                progress.update(response.num_bytes_downloaded - num_bytes_downloaded)
+                num_bytes_downloaded = response.num_bytes_downloaded
+        print(f"The total download size is {response.num_bytes_downloaded} bytes")
+```
+
 ## .netrc Support
 
 HTTPX supports .netrc file. In `trust_env=True` cases, if auth parameter is
index 713281e662de0682b5eddc5a5400598a0a7556d8..65db9ae8b8629d270e3f8d455b8ec7654c47773b 100644 (file)
@@ -697,6 +697,8 @@ class Response:
             self._raw_stream = ByteStream(body=content or b"")
             self.read()
 
+        self._num_bytes_downloaded = 0
+
     @property
     def elapsed(self) -> datetime.timedelta:
         """
@@ -885,6 +887,10 @@ class Response:
                 ldict[key] = link
         return ldict
 
+    @property
+    def num_bytes_downloaded(self) -> int:
+        return self._num_bytes_downloaded
+
     def __repr__(self) -> str:
         return f"<Response [{self.status_code} {self.reason_phrase}]>"
 
@@ -951,8 +957,10 @@ class Response:
             raise ResponseClosed()
 
         self.is_stream_consumed = True
+        self._num_bytes_downloaded = 0
         with map_exceptions(HTTPCORE_EXC_MAP, request=self._request):
             for part in self._raw_stream:
+                self._num_bytes_downloaded += len(part)
                 yield part
         self.close()
 
@@ -1032,8 +1040,10 @@ class Response:
             raise ResponseClosed()
 
         self.is_stream_consumed = True
+        self._num_bytes_downloaded = 0
         with map_exceptions(HTTPCORE_EXC_MAP, request=self._request):
             async for part in self._raw_stream:
+                self._num_bytes_downloaded += len(part)
                 yield part
         await self.aclose()
 
index 2b07a2704025ce70679cb409fb048a98b7856607..30d600086ac86419ba61d5f02647d2bd4e99e05b 100644 (file)
@@ -227,6 +227,20 @@ def test_iter_raw():
     assert raw == b"Hello, world!"
 
 
+def test_iter_raw_increments_updates_counter():
+    stream = IteratorStream(iterator=streaming_body())
+
+    response = httpx.Response(
+        200,
+        stream=stream,
+    )
+
+    num_downloaded = response.num_bytes_downloaded
+    for part in response.iter_raw():
+        assert len(part) == (response.num_bytes_downloaded - num_downloaded)
+        num_downloaded = response.num_bytes_downloaded
+
+
 @pytest.mark.asyncio
 async def test_aiter_raw():
     stream = AsyncIteratorStream(aiterator=async_streaming_body())
@@ -241,6 +255,21 @@ async def test_aiter_raw():
     assert raw == b"Hello, world!"
 
 
+@pytest.mark.asyncio
+async def test_aiter_raw_increments_updates_counter():
+    stream = AsyncIteratorStream(aiterator=async_streaming_body())
+
+    response = httpx.Response(
+        200,
+        stream=stream,
+    )
+
+    num_downloaded = response.num_bytes_downloaded
+    async for part in response.aiter_raw():
+        assert len(part) == (response.num_bytes_downloaded - num_downloaded)
+        num_downloaded = response.num_bytes_downloaded
+
+
 def test_iter_bytes():
     response = httpx.Response(
         200,