]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-150599: Prevent bz2 decompressor reuse after errors (GH-150600)
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Mon, 8 Jun 2026 09:55:32 +0000 (11:55 +0200)
committerGitHub <noreply@github.com>
Mon, 8 Jun 2026 09:55:32 +0000 (10:55 +0100)
(cherry picked from commit 157a5df8cb5d82b33f918a7489e72ce95ceb12b6)

Co-authored-by: Stan Ulbrych <stan@python.org>
Lib/test/test_bz2.py
Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst [new file with mode: 0644]
Modules/_bz2module.c

index b5cd202a613725e90a6f2d2d3398ea01369a8961..442296ab17266418aa903afb567aa3c1c1c69768 100644 (file)
@@ -1032,6 +1032,21 @@ class BZ2DecompressorTest(BaseTest):
         # Previously, a second call could crash due to internal inconsistency
         self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
 
+    def test_decompress_after_data_error(self):
+        data = bytes.fromhex(
+            "425a6839314159265359000000000000007fffff000000000000000000000000"
+            "00000000000000000000000000000000000000e0370000000000000000000000"
+            "000000000000000000000000000000000000000000000000000083f3"
+        )
+        bzd = BZ2Decompressor()
+        with self.assertRaisesRegex(OSError, "Invalid data stream"):
+            bzd.decompress(data)
+        # Previously, a second call could crash due to internal inconsistency
+        self.assertFalse(bzd.needs_input)
+        self.assertFalse(bzd.eof)
+        with self.assertRaisesRegex(ValueError, "previous error"):
+            bzd.decompress(b'\x00' * 18)
+
     @support.refcount_test
     def test_refleaks_in___init__(self):
         gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount')
diff --git a/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst b/Misc/NEWS.d/next/Security/2026-05-30-09-36-20.gh-issue-150599.nlHqU-.rst
new file mode 100644 (file)
index 0000000..a37d86c
--- /dev/null
@@ -0,0 +1,3 @@
+Fix a possible stack buffer overflow in :mod:`bz2` when a
+:class:`bz2.BZ2Decompressor` is reused after a decompression error.
+The decompressor now becomes unusable after libbz2 reports an error.
index ddf2f1dceeadcf1e7718fa01a09c0317d120bf8b..dc536835acd01b66aebb29b09636179272ca05ed 100644 (file)
@@ -116,6 +116,7 @@ typedef struct {
 typedef struct {
     PyObject_HEAD
     bz_stream bzs;
+    int bzerror;
     char eof;           /* Py_T_BOOL expects a char */
     PyObject *unused_data;
     char needs_input;
@@ -455,8 +456,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
 
         d->bzs_avail_in_real += bzs->avail_in;
 
-        if (catch_bz2_error(bzret))
+        if (catch_bz2_error(bzret)) {
+            d->bzerror = bzret;
+            d->needs_input = 0;
             goto error;
+        }
         if (bzret == BZ_STREAM_END) {
             d->eof = 1;
             break;
@@ -625,10 +629,17 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
     PyObject *result = NULL;
 
     ACQUIRE_LOCK(self);
-    if (self->eof)
+    if (self->eof) {
         PyErr_SetString(PyExc_EOFError, "End of stream already reached");
-    else
+    }
+    else if (self->bzerror) {
+        // Re-entering BZ2_bzDecompress() after an error can write out of bounds.
+        PyErr_SetString(PyExc_ValueError,
+                        "Decompressor is unusable after a previous error");
+    }
+    else {
         result = decompress(self, data->buf, data->len, max_length);
+    }
     RELEASE_LOCK(self);
     return result;
 }
@@ -662,6 +673,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type)
         return NULL;
     }
 
+    self->bzerror = 0;
     self->needs_input = 1;
     self->bzs_avail_in_real = 0;
     self->input_buffer = NULL;