]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-144513: Skip critical section locking during stop-the-world (gh-144524)
authorSam Gross <colesbury@gmail.com>
Fri, 6 Feb 2026 15:14:08 +0000 (10:14 -0500)
committerGitHub <noreply@github.com>
Fri, 6 Feb 2026 15:14:08 +0000 (15:14 +0000)
When the interpreter is in a stop-the-world pause, critical sections
don't need to acquire locks since no other threads can be running.
This avoids a potential deadlock where lock fairness hands off ownership
to a thread that has already suspended for stop-the-world.

Misc/NEWS.d/next/Core_and_Builtins/2026-02-05-13-30-00.gh-issue-144513.IjSTd7.rst [new file with mode: 0644]
Modules/_testinternalcapi/test_critical_sections.c
Python/critical_section.c

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-05-13-30-00.gh-issue-144513.IjSTd7.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-05-13-30-00.gh-issue-144513.IjSTd7.rst
new file mode 100644 (file)
index 0000000..f971601
--- /dev/null
@@ -0,0 +1,2 @@
+Fix potential deadlock when using critical sections during stop-the-world
+pauses in the free-threaded build.
index e3b2fe716d48d3178be437c2f8503d51b1599833..72a1fa2cdc7224d7dd45467957e09c5dcf829845 100644 (file)
@@ -4,6 +4,8 @@
 
 #include "parts.h"
 #include "pycore_critical_section.h"
+#include "pycore_pystate.h"
+#include "pycore_pythread.h"
 
 #ifdef MS_WINDOWS
 #  include <windows.h>            // Sleep()
@@ -381,6 +383,87 @@ test_critical_section2_reacquisition(PyObject *self, PyObject *Py_UNUSED(args))
 
 #endif // Py_GIL_DISABLED
 
+#ifdef Py_CAN_START_THREADS
+
+// gh-144513: Test that critical sections don't deadlock with stop-the-world.
+// This test is designed to deadlock (timeout) on builds without the fix.
+struct test_data_stw {
+    PyObject *obj;
+    Py_ssize_t num_threads;
+    Py_ssize_t started;
+    PyEvent ready;
+};
+
+static void
+thread_stw(void *arg)
+{
+    struct test_data_stw *test_data = arg;
+    PyGILState_STATE gil = PyGILState_Ensure();
+
+    if (_Py_atomic_add_ssize(&test_data->started, 1) == test_data->num_threads - 1) {
+        _PyEvent_Notify(&test_data->ready);
+    }
+
+    // All threads: acquire critical section and hold it long enough to
+    // trigger TIME_TO_BE_FAIR_NS (1 ms), which causes direct handoff on unlock.
+    Py_BEGIN_CRITICAL_SECTION(test_data->obj);
+    pysleep(10);  // 10 ms = 10 x TIME_TO_BE_FAIR_NS
+    Py_END_CRITICAL_SECTION();
+
+    PyGILState_Release(gil);
+}
+
+static PyObject *
+test_critical_sections_stw(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    // gh-144513: Test that critical sections don't deadlock during STW.
+    //
+    // The deadlock occurs when lock ownership is handed off (due to fairness
+    // after TIME_TO_BE_FAIR_NS) to a thread that has already suspended for
+    // stop-the-world. The STW requester then cannot acquire the lock.
+    //
+    // With the fix, the STW requester detects world_stopped and skips locking.
+
+    #define STW_NUM_THREADS 2
+    struct test_data_stw test_data = {
+        .obj = PyDict_New(),
+        .num_threads = STW_NUM_THREADS,
+    };
+    if (test_data.obj == NULL) {
+        return NULL;
+    }
+
+    PyThread_handle_t handles[STW_NUM_THREADS];
+    PyThread_ident_t idents[STW_NUM_THREADS];
+    for (Py_ssize_t i = 0; i < STW_NUM_THREADS; i++) {
+        PyThread_start_joinable_thread(&thread_stw, &test_data,
+                                       &idents[i], &handles[i]);
+    }
+
+    // Wait for threads to start, then let them compete for the lock
+    PyEvent_Wait(&test_data.ready);
+    pysleep(5);
+
+    // Request stop-the-world and try to acquire the critical section.
+    // Without the fix, this may deadlock.
+    PyInterpreterState *interp = PyInterpreterState_Get();
+    _PyEval_StopTheWorld(interp);
+
+    Py_BEGIN_CRITICAL_SECTION(test_data.obj);
+    Py_END_CRITICAL_SECTION();
+
+    _PyEval_StartTheWorld(interp);
+
+    for (Py_ssize_t i = 0; i < STW_NUM_THREADS; i++) {
+        PyThread_join_thread(handles[i]);
+    }
+    #undef STW_NUM_THREADS
+    Py_DECREF(test_data.obj);
+    Py_RETURN_NONE;
+}
+
+#endif // Py_CAN_START_THREADS
+
 static PyMethodDef test_methods[] = {
     {"test_critical_sections", test_critical_sections, METH_NOARGS},
     {"test_critical_sections_nest", test_critical_sections_nest, METH_NOARGS},
@@ -392,6 +475,7 @@ static PyMethodDef test_methods[] = {
 #ifdef Py_CAN_START_THREADS
     {"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS},
     {"test_critical_sections_gc", test_critical_sections_gc, METH_NOARGS},
+    {"test_critical_sections_stw", test_critical_sections_stw, METH_NOARGS},
 #endif
     {NULL, NULL} /* sentinel */
 };
index 2c2152f5de4716e4f81b26192891af786e9667a5..98e23eda7cdd77e22c21e1a88f8e18bf427cf293 100644 (file)
@@ -1,7 +1,8 @@
 #include "Python.h"
 
-#include "pycore_lock.h"
 #include "pycore_critical_section.h"
+#include "pycore_interp.h"
+#include "pycore_lock.h"
 
 #ifdef Py_GIL_DISABLED
 static_assert(_Alignof(PyCriticalSection) >= 4,
@@ -42,6 +43,15 @@ _PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMute
             }
         }
     }
+    // If the world is stopped, we don't need to acquire the lock because
+    // there are no other threads that could be accessing the object.
+    // Without this check, acquiring a critical section while the world is
+    // stopped could lead to a deadlock.
+    if (tstate->interp->stoptheworld.world_stopped) {
+        c->_cs_mutex = NULL;
+        c->_cs_prev = 0;
+        return;
+    }
     c->_cs_mutex = NULL;
     c->_cs_prev = (uintptr_t)tstate->critical_section;
     tstate->critical_section = (uintptr_t)c;
@@ -56,6 +66,12 @@ _PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMu
                               int is_m1_locked)
 {
 #ifdef Py_GIL_DISABLED
+    if (tstate->interp->stoptheworld.world_stopped) {
+        c->_cs_base._cs_mutex = NULL;
+        c->_cs_mutex2 = NULL;
+        c->_cs_base._cs_prev = 0;
+        return;
+    }
     c->_cs_base._cs_mutex = NULL;
     c->_cs_mutex2 = NULL;
     c->_cs_base._cs_prev = tstate->critical_section;