]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.14] gh-142831: Fix use-after-free in json encoder during re-entrant mutation ...
authorShamil <ashm.tech@proton.me>
Wed, 20 May 2026 03:31:43 +0000 (06:31 +0300)
committerGitHub <noreply@github.com>
Wed, 20 May 2026 03:31:43 +0000 (20:31 -0700)
gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851)

User callbacks invoked during JSON encoding (e.g. the `default` callback or
a custom string encoder) can mutate or clear the dict or sequence being
encoded, invalidating borrowed references to items, keys, and values. Hold
strong references unconditionally while iterating.

(cherry picked from commit 235fa7244a0474c492ae98ee444529c7ba2a9047)

Co-authored-by: Kumar Aditya <kumaraditya@python.org>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
Lib/test/test_json/test_speedups.py
Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst [new file with mode: 0644]
Modules/_json.c

index 4c0aa5f993b30fd4c29daf3052d7cf6914fdc31f..0b22a0bf4b953879a85bf8db24de360acccbe5bf 100644 (file)
@@ -1,4 +1,5 @@
 from test.test_json import CTest
+from test.support import gc_collect
 
 
 class BadBool:
@@ -111,3 +112,63 @@ class TestEncode(CTest):
         self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2)
         self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
         self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])
+
+    def test_mutate_dict_items_during_encode(self):
+        # gh-142831: Clearing the items list via a re-entrant key encoder
+        # must not cause a use-after-free.  BadDict.items() returns a
+        # mutable list; encode_str clears it while iterating.
+        items = None
+
+        class BadDict(dict):
+            def items(self):
+                nonlocal items
+                items = [("boom", object())]
+                return items
+
+        cleared = False
+        def encode_str(obj):
+            nonlocal items, cleared
+            if items is not None:
+                items.clear()
+                items = None
+                cleared = True
+                gc_collect()
+            return '"x"'
+
+        encoder = self.json.encoder.c_make_encoder(
+            None, lambda o: "null",
+            encode_str, None,
+            ": ", ", ", False,
+            False, True
+        )
+
+        # Must not crash (use-after-free under ASan before fix)
+        encoder(BadDict(real=1), 0)
+        self.assertTrue(cleared)
+
+    def test_mutate_list_during_encode(self):
+        # gh-142831: Clearing a list mid-iteration via the default
+        # callback must not cause a use-after-free.
+        call_count = 0
+        lst = [object() for _ in range(10)]
+
+        def default(obj):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 3:
+                lst.clear()
+                gc_collect()
+            return None
+
+        encoder = self.json.encoder.c_make_encoder(
+            None, default,
+            self.json.encoder.c_encode_basestring, None,
+            ": ", ", ", False,
+            False, True
+        )
+
+        # Must not crash (use-after-free under ASan before fix)
+        encoder(lst, 0)
+        # Verify the mutation path was actually hit and the loop
+        # stopped iterating after the list was cleared.
+        self.assertEqual(call_count, 3)
diff --git a/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst
new file mode 100644 (file)
index 0000000..5fa3cd2
--- /dev/null
@@ -0,0 +1,2 @@
+Fix a crash in the :mod:`json` module where a use-after-free could occur if
+the object being encoded is modified during serialization.
index 39ec9a969cf04d477a45fe4f17030c81fd6be77c..39cdb9fd4f40c841770daecf383720d8f5506f1d 100644 (file)
@@ -1702,9 +1702,13 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
 
         for (Py_ssize_t  i = 0; i < PyList_GET_SIZE(items); i++) {
             PyObject *item = PyList_GET_ITEM(items, i);
+            // gh-142831: encoder_encode_key_value() can invoke user code
+            // that mutates the items list, invalidating this borrowed ref.
+            Py_INCREF(item);
 
             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
                 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
+                Py_DECREF(item);
                 goto bail;
             }
 
@@ -1712,18 +1716,30 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
             value = PyTuple_GET_ITEM(item, 1);
             if (encoder_encode_key_value(s, writer, &first, dct, key, value,
                                          indent_level, indent_cache,
-                                         separator) < 0)
+                                         separator) < 0) {
+                Py_DECREF(item);
                 goto bail;
+            }
+            Py_DECREF(item);
         }
         Py_CLEAR(items);
 
     } else {
         Py_ssize_t pos = 0;
         while (PyDict_Next(dct, &pos, &key, &value)) {
+            // gh-142831: encoder_encode_key_value() can invoke user code
+            // that mutates the dict, invalidating these borrowed refs.
+            Py_INCREF(key);
+            Py_INCREF(value);
             if (encoder_encode_key_value(s, writer, &first, dct, key, value,
                                          indent_level, indent_cache,
-                                         separator) < 0)
+                                         separator) < 0) {
+                Py_DECREF(key);
+                Py_DECREF(value);
                 goto bail;
+            }
+            Py_DECREF(key);
+            Py_DECREF(value);
         }
     }
 
@@ -1800,14 +1816,21 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
     }
     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
+        // gh-142831: encoder_listencode_obj() can invoke user code
+        // that mutates the sequence, invalidating this borrowed ref.
+        Py_INCREF(obj);
         if (i) {
-            if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
+            if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
+                Py_DECREF(obj);
                 goto bail;
+            }
         }
         if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
             _PyErr_FormatNote("when serializing %T item %zd", seq, i);
+            Py_DECREF(obj);
             goto bail;
         }
+        Py_DECREF(obj);
     }
     if (ident != NULL) {
         if (PyDict_DelItem(s->markers, ident))