]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-142831: Fix use-after-free in json encoder during re-entrant mutation ...
authorShamil <ashm.tech@proton.me>
Wed, 20 May 2026 03:33:22 +0000 (06:33 +0300)
committerGitHub <noreply@github.com>
Wed, 20 May 2026 03:33:22 +0000 (20:33 -0700)
gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851)

User callbacks invoked during JSON encoding (e.g. the `default` callback or
a custom string encoder) can mutate or clear the dict or sequence being
encoded, invalidating borrowed references to items, keys, and values. Hold
strong references unconditionally while iterating.

(cherry picked from commit 235fa7244a0474c492ae98ee444529c7ba2a9047)

Co-authored-by: Kumar Aditya <kumaraditya@python.org>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Lib/test/test_json/test_speedups.py
Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst [new file with mode: 0644]
Modules/_json.c

index 682014cfd5b344e8524521d4ddd23dff7d8727be..7c2789f8d1f235b7350c72a6b1563ef42d3d137a 100644 (file)
@@ -1,4 +1,5 @@
 from test.test_json import CTest
+from test.support import gc_collect
 
 
 class BadBool:
@@ -80,3 +81,63 @@ class TestEncode(CTest):
     def test_unsortable_keys(self):
         with self.assertRaises(TypeError):
             self.json.encoder.JSONEncoder(sort_keys=True).encode({'a': 1, 1: 'a'})
+
+    def test_mutate_dict_items_during_encode(self):
+        # gh-142831: Clearing the items list via a re-entrant key encoder
+        # must not cause a use-after-free.  BadDict.items() returns a
+        # mutable list; encode_str clears it while iterating.
+        items = None
+
+        class BadDict(dict):
+            def items(self):
+                nonlocal items
+                items = [("boom", object())]
+                return items
+
+        cleared = False
+        def encode_str(obj):
+            nonlocal items, cleared
+            if items is not None:
+                items.clear()
+                items = None
+                cleared = True
+                gc_collect()
+            return '"x"'
+
+        encoder = self.json.encoder.c_make_encoder(
+            None, lambda o: "null",
+            encode_str, None,
+            ": ", ", ", False,
+            False, True
+        )
+
+        # Must not crash (use-after-free under ASan before fix)
+        encoder(BadDict(real=1), 0)
+        self.assertTrue(cleared)
+
+    def test_mutate_list_during_encode(self):
+        # gh-142831: Clearing a list mid-iteration via the default
+        # callback must not cause a use-after-free.
+        call_count = 0
+        lst = [object() for _ in range(10)]
+
+        def default(obj):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 3:
+                lst.clear()
+                gc_collect()
+            return None
+
+        encoder = self.json.encoder.c_make_encoder(
+            None, default,
+            self.json.encoder.c_encode_basestring, None,
+            ": ", ", ", False,
+            False, True
+        )
+
+        # Must not crash (use-after-free under ASan before fix)
+        encoder(lst, 0)
+        # Verify the mutation path was actually hit and the loop
+        # stopped iterating after the list was cleared.
+        self.assertEqual(call_count, 3)
diff --git a/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst
new file mode 100644 (file)
index 0000000..5fa3cd2
--- /dev/null
@@ -0,0 +1,2 @@
+Fix a crash in the :mod:`json` module where a use-after-free could occur if
+the object being encoded is modified during serialization.
index afefc71bfbdd9a1b8b5ff2e2a0bfaa63ffc2c318..25cbd9b9bb89b474b5a5e6395ec084a176cb66e4 100644 (file)
@@ -1602,9 +1602,13 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
 
         for (Py_ssize_t  i = 0; i < PyList_GET_SIZE(items); i++) {
             PyObject *item = PyList_GET_ITEM(items, i);
+            // gh-142831: encoder_encode_key_value() can invoke user code
+            // that mutates the items list, invalidating this borrowed ref.
+            Py_INCREF(item);
 
             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
                 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
+                Py_DECREF(item);
                 goto bail;
             }
 
@@ -1612,18 +1616,30 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
             value = PyTuple_GET_ITEM(item, 1);
             if (encoder_encode_key_value(s, writer, &first, key, value,
                                          new_newline_indent,
-                                         current_item_separator) < 0)
+                                         current_item_separator) < 0) {
+                Py_DECREF(item);
                 goto bail;
+            }
+            Py_DECREF(item);
         }
         Py_CLEAR(items);
 
     } else {
         Py_ssize_t pos = 0;
         while (PyDict_Next(dct, &pos, &key, &value)) {
+            // gh-142831: encoder_encode_key_value() can invoke user code
+            // that mutates the dict, invalidating these borrowed refs.
+            Py_INCREF(key);
+            Py_INCREF(value);
             if (encoder_encode_key_value(s, writer, &first, key, value,
                                          new_newline_indent,
-                                         current_item_separator) < 0)
+                                         current_item_separator) < 0) {
+                Py_DECREF(key);
+                Py_DECREF(value);
                 goto bail;
+            }
+            Py_DECREF(key);
+            Py_DECREF(value);
         }
     }
 
@@ -1712,12 +1728,20 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
     }
     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
+        // gh-142831: encoder_listencode_obj() can invoke user code
+        // that mutates the sequence, invalidating this borrowed ref.
+        Py_INCREF(obj);
         if (i) {
-            if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
+            if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) {
+                Py_DECREF(obj);
                 goto bail;
+            }
         }
-        if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
+        if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
+            Py_DECREF(obj);
             goto bail;
+        }
+        Py_DECREF(obj);
     }
     if (ident != NULL) {
         if (PyDict_DelItem(s->markers, ident))