]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851)
authorShamil <ashm.tech@proton.me>
Sun, 12 Apr 2026 00:14:50 +0000 (03:14 +0300)
committerGitHub <noreply@github.com>
Sun, 12 Apr 2026 00:14:50 +0000 (00:14 +0000)
Hold strong references to borrowed items unconditionally (not only in
free-threading builds) in _encoder_iterate_mapping_lock_held and
_encoder_iterate_fast_seq_lock_held.  User callbacks invoked during
encoding can mutate or clear the underlying container, invalidating
borrowed references.

The dict iteration path was already fixed by gh-145244.

Co-authored-by: Kumar Aditya <kumaraditya@python.org>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Lib/test/test_json/test_speedups.py
Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst [new file with mode: 0644]
Modules/_json.c

index 4c0aa5f993b30fd4c29daf3052d7cf6914fdc31f..0b22a0bf4b953879a85bf8db24de360acccbe5bf 100644 (file)
@@ -1,4 +1,5 @@
 from test.test_json import CTest
+from test.support import gc_collect
 
 
 class BadBool:
@@ -111,3 +112,63 @@ class TestEncode(CTest):
         self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2)
         self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
         self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])
+
+    def test_mutate_dict_items_during_encode(self):
+        # gh-142831: Clearing the items list via a re-entrant key encoder
+        # must not cause a use-after-free.  BadDict.items() returns a
+        # mutable list; encode_str clears it while iterating.
+        items = None
+
+        class BadDict(dict):
+            def items(self):
+                nonlocal items
+                items = [("boom", object())]
+                return items
+
+        cleared = False
+        def encode_str(obj):
+            nonlocal items, cleared
+            if items is not None:
+                items.clear()
+                items = None
+                cleared = True
+                gc_collect()
+            return '"x"'
+
+        encoder = self.json.encoder.c_make_encoder(
+            None, lambda o: "null",
+            encode_str, None,
+            ": ", ", ", False,
+            False, True
+        )
+
+        # Must not crash (use-after-free under ASan before fix)
+        encoder(BadDict(real=1), 0)
+        self.assertTrue(cleared)
+
+    def test_mutate_list_during_encode(self):
+        # gh-142831: Clearing a list mid-iteration via the default
+        # callback must not cause a use-after-free.
+        call_count = 0
+        lst = [object() for _ in range(10)]
+
+        def default(obj):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 3:
+                lst.clear()
+                gc_collect()
+            return None
+
+        encoder = self.json.encoder.c_make_encoder(
+            None, default,
+            self.json.encoder.c_encode_basestring, None,
+            ": ", ", ", False,
+            False, True
+        )
+
+        # Must not crash (use-after-free under ASan before fix)
+        encoder(lst, 0)
+        # Verify the mutation path was actually hit and the loop
+        # stopped iterating after the list was cleared.
+        self.assertEqual(call_count, 3)
diff --git a/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst
new file mode 100644 (file)
index 0000000..5fa3cd2
--- /dev/null
@@ -0,0 +1,2 @@
+Fix a crash in the :mod:`json` module where a use-after-free could occur if
+the object being encoded is modified during serialization.
index e36e69b09b2030dff3e588f1a68707270709638f..1f454768355cc0c123d70128ea97801ba32602e8 100644 (file)
@@ -1745,15 +1745,12 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
     PyObject *key, *value;
     for (Py_ssize_t  i = 0; i < PyList_GET_SIZE(items); i++) {
         PyObject *item = PyList_GET_ITEM(items, i);
-#ifdef Py_GIL_DISABLED
-        // gh-119438: in the free-threading build the critical section on items can get suspended
+        // gh-142831: encoder_encode_key_value() can invoke user code
+        // that mutates the items list, invalidating this borrowed ref.
         Py_INCREF(item);
-#endif
         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
             PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
-#ifdef Py_GIL_DISABLED
             Py_DECREF(item);
-#endif
             return -1;
         }
 
@@ -1762,14 +1759,10 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
         if (encoder_encode_key_value(s, writer, first, dct, key, value,
                                      indent_level, indent_cache,
                                      separator) < 0) {
-#ifdef Py_GIL_DISABLED
             Py_DECREF(item);
-#endif
             return -1;
         }
-#ifdef Py_GIL_DISABLED
         Py_DECREF(item);
-#endif
     }
 
     return 0;
@@ -1784,10 +1777,8 @@ _encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
     PyObject *key, *value;
     Py_ssize_t pos = 0;
     while (PyDict_Next(dct, &pos, &key, &value)) {
-        // gh-119438, gh-145244: key and value are borrowed refs from
-        // PyDict_Next(). encoder_encode_key_value() may invoke user
-        // Python code (the 'default' callback) that can mutate or
-        // clear the dict, so we must hold strong references.
+        // gh-145244: encoder_encode_key_value() can invoke user code
+        // that mutates the dict, invalidating these borrowed refs.
         Py_INCREF(key);
         Py_INCREF(value);
         if (encoder_encode_key_value(s, writer, first, dct, key, value,
@@ -1902,28 +1893,21 @@ _encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
 {
     for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
-#ifdef Py_GIL_DISABLED
-        // gh-119438: in the free-threading build the critical section on s_fast can get suspended
+        // gh-142831: encoder_listencode_obj() can invoke user code
+        // that mutates the sequence, invalidating this borrowed ref.
         Py_INCREF(obj);
-#endif
         if (i) {
             if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
-#ifdef Py_GIL_DISABLED
                 Py_DECREF(obj);
-#endif
                 return -1;
             }
         }
         if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
             _PyErr_FormatNote("when serializing %T item %zd", seq, i);
-#ifdef Py_GIL_DISABLED
             Py_DECREF(obj);
-#endif
             return -1;
         }
-#ifdef Py_GIL_DISABLED
         Py_DECREF(obj);
-#endif
     }
     return 0;
 }