]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-116738: Make `_json` module thread-safe in the free-threading (#119438)
authorPieter Eendebak <pieter.eendebak@gmail.com>
Sun, 31 Aug 2025 04:12:45 +0000 (06:12 +0200)
committerGitHub <noreply@github.com>
Sun, 31 Aug 2025 04:12:45 +0000 (09:42 +0530)
Co-authored-by: Kumar Aditya <kumaraditya@python.org>
Lib/test/test_free_threading/test_json.py [new file with mode: 0644]
Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst [new file with mode: 0644]
Modules/_json.c

diff --git a/Lib/test/test_free_threading/test_json.py b/Lib/test/test_free_threading/test_json.py
new file mode 100644 (file)
index 0000000..8a541e9
--- /dev/null
@@ -0,0 +1,79 @@
+from threading import Barrier, Thread
+from test.test_json import CTest
+from test.support import threading_helper
+
+
+def encode_json_helper(
+    json, worker, data, number_of_threads=12, number_of_json_encodings=100
+):
+    worker_threads = []
+    barrier = Barrier(number_of_threads)
+    for index in range(number_of_threads):
+        worker_threads.append(
+            Thread(target=worker, args=[barrier, data, index])
+        )
+    for t in worker_threads:
+        t.start()
+    for ii in range(number_of_json_encodings):
+        json.dumps(data)
+    data.clear()
+    for t in worker_threads:
+        t.join()
+
+
+class MyMapping(dict):
+    def __init__(self):
+        self.mapping = []
+
+    def items(self):
+        return self.mapping
+
+
+@threading_helper.reap_threads
+@threading_helper.requires_working_threading()
+class TestJsonEncoding(CTest):
+    # Test encoding json with concurrent threads modifying the data cannot
+    # corrupt the interpreter
+
+    def test_json_mutating_list(self):
+        def worker(barrier, data, index):
+            barrier.wait()
+            while data:
+                for d in data:
+                    if len(d) > 5:
+                        d.clear()
+                    else:
+                        d.append(index)
+
+        data = [[], []]
+        encode_json_helper(self.json, worker, data)
+
+    def test_json_mutating_exact_dict(self):
+        def worker(barrier, data, index):
+            barrier.wait()
+            while data:
+                for d in data:
+                    if len(d) > 5:
+                        try:
+                            key = list(d)[0]
+                            d.pop()
+                        except (KeyError, IndexError):
+                            pass
+                    else:
+                        d[index] = index
+
+        data = [{}, {}]
+        encode_json_helper(self.json, worker, data)
+
+    def test_json_mutating_mapping(self):
+        def worker(barrier, data, index):
+            barrier.wait()
+            while data:
+                for d in data:
+                    if len(d.mapping) > 3:
+                        d.mapping.clear()
+                    else:
+                        d.mapping.append((index, index))
+
+        data = [MyMapping(), MyMapping()]
+        encode_json_helper(self.json, worker, data)
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-20-26-21.gh-issue-116738.q_hPYq.rst
new file mode 100644 (file)
index 0000000..fe6a722
--- /dev/null
@@ -0,0 +1 @@
+Make the module :mod:`json` safe to use under the free-threading build.
index e1d6042cb78ab5eb07ad6079362d226d59be3cb6..9a1fc3aba3611672c4b2f6f9427e6ca0eaa4db03 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "Python.h"
 #include "pycore_ceval.h"         // _Py_EnterRecursiveCall()
+#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST()
 #include "pycore_global_strings.h" // _Py_ID()
 #include "pycore_pyerrors.h"      // _PyErr_FormatNote
 #include "pycore_runtime.h"       // _PyRuntime
@@ -1456,7 +1457,7 @@ write_newline_indent(PyUnicodeWriter *writer,
 static PyObject *
 encoder_call(PyObject *op, PyObject *args, PyObject *kwds)
 {
-    /* Python callable interface to encode_listencode_obj */
+    /* Python callable interface to encoder_listencode_obj */
     static char *kwlist[] = {"obj", "_current_indent_level", NULL};
     PyObject *obj;
     Py_ssize_t indent_level;
@@ -1743,15 +1744,84 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
     return 0;
 }
 
+static inline int
+_encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
+                            bool *first, PyObject *dct, PyObject *items,
+                            Py_ssize_t indent_level, PyObject *indent_cache,
+                            PyObject *separator)
+{
+    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(items);
+    PyObject *key, *value;
+    for (Py_ssize_t  i = 0; i < PyList_GET_SIZE(items); i++) {
+        PyObject *item = PyList_GET_ITEM(items, i);
+#ifdef Py_GIL_DISABLED
+        // gh-119438: in the free-threading build the critical section on items can get suspended
+        Py_INCREF(item);
+#endif
+        if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
+            PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
+#ifdef Py_GIL_DISABLED
+            Py_DECREF(item);
+#endif
+            return -1;
+        }
+
+        key = PyTuple_GET_ITEM(item, 0);
+        value = PyTuple_GET_ITEM(item, 1);
+        if (encoder_encode_key_value(s, writer, first, dct, key, value,
+                                     indent_level, indent_cache,
+                                     separator) < 0) {
+#ifdef Py_GIL_DISABLED
+            Py_DECREF(item);
+#endif
+            return -1;
+        }
+#ifdef Py_GIL_DISABLED
+        Py_DECREF(item);
+#endif
+    }
+
+    return 0;
+}
+
+static inline int
+_encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
+                         bool *first, PyObject *dct, Py_ssize_t indent_level,
+                         PyObject *indent_cache, PyObject *separator)
+{
+    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(dct);
+    PyObject *key, *value;
+    Py_ssize_t pos = 0;
+    while (PyDict_Next(dct, &pos, &key, &value)) {
+#ifdef Py_GIL_DISABLED
+        // gh-119438: in the free-threading build the critical section on dct can get suspended
+        Py_INCREF(key);
+        Py_INCREF(value);
+#endif
+        if (encoder_encode_key_value(s, writer, first, dct, key, value,
+                                    indent_level, indent_cache,
+                                    separator) < 0) {
+#ifdef Py_GIL_DISABLED
+            Py_DECREF(key);
+            Py_DECREF(value);
+#endif
+            return -1;
+        }
+#ifdef Py_GIL_DISABLED
+        Py_DECREF(key);
+        Py_DECREF(value);
+#endif
+    }
+    return 0;
+}
+
 static int
 encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
-                        PyObject *dct,
+                       PyObject *dct,
                        Py_ssize_t indent_level, PyObject *indent_cache)
 {
     /* Encode Python dict dct a JSON term */
     PyObject *ident = NULL;
-    PyObject *items = NULL;
-    PyObject *key, *value;
     bool first = true;
 
     if (PyDict_GET_SIZE(dct) == 0) {
@@ -1788,34 +1858,30 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
     }
 
     if (s->sort_keys || !PyDict_CheckExact(dct)) {
-        items = PyMapping_Items(dct);
-        if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0))
+        PyObject *items = PyMapping_Items(dct);
+        if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0)) {
+            Py_XDECREF(items);
             goto bail;
+        }
 
-        for (Py_ssize_t  i = 0; i < PyList_GET_SIZE(items); i++) {
-            PyObject *item = PyList_GET_ITEM(items, i);
-
-            if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
-                PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
-                goto bail;
-            }
-
-            key = PyTuple_GET_ITEM(item, 0);
-            value = PyTuple_GET_ITEM(item, 1);
-            if (encoder_encode_key_value(s, writer, &first, dct, key, value,
-                                         indent_level, indent_cache,
-                                         separator) < 0)
-                goto bail;
+        int result;
+        Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(items);
+        result = _encoder_iterate_mapping_lock_held(s, writer, &first, dct,
+                    items, indent_level, indent_cache, separator);
+        Py_END_CRITICAL_SECTION_SEQUENCE_FAST();
+        Py_DECREF(items);
+        if (result < 0) {
+            goto bail;
         }
-        Py_CLEAR(items);
 
     } else {
-        Py_ssize_t pos = 0;
-        while (PyDict_Next(dct, &pos, &key, &value)) {
-            if (encoder_encode_key_value(s, writer, &first, dct, key, value,
-                                         indent_level, indent_cache,
-                                         separator) < 0)
-                goto bail;
+        int result;
+        Py_BEGIN_CRITICAL_SECTION(dct);
+        result = _encoder_iterate_dict_lock_held(s, writer, &first, dct,
+                            indent_level, indent_cache, separator);
+        Py_END_CRITICAL_SECTION();
+        if (result < 0) {
+            goto bail;
         }
     }
 
@@ -1837,11 +1903,43 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
     return 0;
 
 bail:
-    Py_XDECREF(items);
     Py_XDECREF(ident);
     return -1;
 }
 
+static inline int
+_encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
+    PyObject *seq, PyObject *s_fast,
+    Py_ssize_t indent_level, PyObject *indent_cache, PyObject *separator)
+{
+    for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
+        PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
+#ifdef Py_GIL_DISABLED
+        // gh-119438: in the free-threading build the critical section on s_fast can get suspended
+        Py_INCREF(obj);
+#endif
+        if (i) {
+            if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
+#ifdef Py_GIL_DISABLED
+                Py_DECREF(obj);
+#endif
+                return -1;
+            }
+        }
+        if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
+            _PyErr_FormatNote("when serializing %T item %zd", seq, i);
+#ifdef Py_GIL_DISABLED
+            Py_DECREF(obj);
+#endif
+            return -1;
+        }
+#ifdef Py_GIL_DISABLED
+        Py_DECREF(obj);
+#endif
+    }
+    return 0;
+}
+
 static int
 encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
                         PyObject *seq,
@@ -1849,10 +1947,8 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
 {
     PyObject *ident = NULL;
     PyObject *s_fast = NULL;
-    Py_ssize_t i;
 
-    ident = NULL;
-    s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
+    s_fast = PySequence_Fast(seq, "encoder_listencode_list needs a sequence");
     if (s_fast == NULL)
         return -1;
     if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
@@ -1890,16 +1986,13 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
             goto bail;
         }
     }
-    for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
-        PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
-        if (i) {
-            if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
-                goto bail;
-        }
-        if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
-            _PyErr_FormatNote("when serializing %T item %zd", seq, i);
-            goto bail;
-        }
+    int result;
+    Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(seq);
+    result = _encoder_iterate_fast_seq_lock_held(s, writer, seq, s_fast,
+                     indent_level, indent_cache, separator);
+    Py_END_CRITICAL_SECTION_SEQUENCE_FAST();
+    if (result < 0) {
+        goto bail;
     }
     if (ident != NULL) {
         if (PyDict_DelItem(s->markers, ident))