From: Shamil Date: Wed, 20 May 2026 03:31:43 +0000 (+0300) Subject: [3.14] gh-142831: Fix use-after-free in json encoder during re-entrant mutation ... X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=bba6c1d9d5721e763df88af187bc0a531e1708b6;p=thirdparty%2FPython%2Fcpython.git [3.14] gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) (#150078) gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. (cherry picked from commit 235fa7244a0474c492ae98ee444529c7ba2a9047) Co-authored-by: Kumar Aditya Co-authored-by: Gregory P. Smith --- diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py index 4c0aa5f993b3..0b22a0bf4b95 100644 --- a/Lib/test/test_json/test_speedups.py +++ b/Lib/test/test_json/test_speedups.py @@ -1,4 +1,5 @@ from test.test_json import CTest +from test.support import gc_collect class BadBool: @@ -111,3 +112,63 @@ class TestEncode(CTest): self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2) self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0) self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}]) + + def test_mutate_dict_items_during_encode(self): + # gh-142831: Clearing the items list via a re-entrant key encoder + # must not cause a use-after-free. BadDict.items() returns a + # mutable list; encode_str clears it while iterating. + items = None + + class BadDict(dict): + def items(self): + nonlocal items + items = [("boom", object())] + return items + + cleared = False + def encode_str(obj): + nonlocal items, cleared + if items is not None: + items.clear() + items = None + cleared = True + gc_collect() + return '"x"' + + encoder = self.json.encoder.c_make_encoder( + None, lambda o: "null", + encode_str, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(BadDict(real=1), 0) + self.assertTrue(cleared) + + def test_mutate_list_during_encode(self): + # gh-142831: Clearing a list mid-iteration via the default + # callback must not cause a use-after-free. + call_count = 0 + lst = [object() for _ in range(10)] + + def default(obj): + nonlocal call_count + call_count += 1 + if call_count == 3: + lst.clear() + gc_collect() + return None + + encoder = self.json.encoder.c_make_encoder( + None, default, + self.json.encoder.c_encode_basestring, None, + ": ", ", ", False, + False, True + ) + + # Must not crash (use-after-free under ASan before fix) + encoder(lst, 0) + # Verify the mutation path was actually hit and the loop + # stopped iterating after the list was cleared. + self.assertEqual(call_count, 3) diff --git a/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst new file mode 100644 index 000000000000..5fa3cd2727a9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-12-17-04-10-35.gh-issue-142831.ee3t4L.rst @@ -0,0 +1,2 @@ +Fix a crash in the :mod:`json` module where a use-after-free could occur if +the object being encoded is modified during serialization. diff --git a/Modules/_json.c b/Modules/_json.c index 39ec9a969cf0..39cdb9fd4f40 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1702,9 +1702,13 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) { PyObject *item = PyList_GET_ITEM(items, i); + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the items list, invalidating this borrowed ref. + Py_INCREF(item); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); + Py_DECREF(item); goto bail; } @@ -1712,18 +1716,30 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, value = PyTuple_GET_ITEM(item, 1); if (encoder_encode_key_value(s, writer, &first, dct, key, value, indent_level, indent_cache, - separator) < 0) + separator) < 0) { + Py_DECREF(item); goto bail; + } + Py_DECREF(item); } Py_CLEAR(items); } else { Py_ssize_t pos = 0; while (PyDict_Next(dct, &pos, &key, &value)) { + // gh-142831: encoder_encode_key_value() can invoke user code + // that mutates the dict, invalidating these borrowed refs. + Py_INCREF(key); + Py_INCREF(value); if (encoder_encode_key_value(s, writer, &first, dct, key, value, indent_level, indent_cache, - separator) < 0) + separator) < 0) { + Py_DECREF(key); + Py_DECREF(value); goto bail; + } + Py_DECREF(key); + Py_DECREF(value); } } @@ -1800,14 +1816,21 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, } for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); + // gh-142831: encoder_listencode_obj() can invoke user code + // that mutates the sequence, invalidating this borrowed ref. + Py_INCREF(obj); if (i) { - if (PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) { + Py_DECREF(obj); goto bail; + } } if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) { _PyErr_FormatNote("when serializing %T item %zd", seq, i); + Py_DECREF(obj); goto bail; } + Py_DECREF(obj); } if (ident != NULL) { if (PyDict_DelItem(s->markers, ident))