]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-118332: Fix deadlock involving stop the world (#118412)
authorSam Gross <colesbury@gmail.com>
Tue, 30 Apr 2024 19:01:28 +0000 (15:01 -0400)
committerGitHub <noreply@github.com>
Tue, 30 Apr 2024 19:01:28 +0000 (15:01 -0400)
Avoid detaching thread state when stopping the world. When re-attaching
the thread state, the thread would attempt to resume the top-most
critical section, which might now be held by a thread paused for our
stop-the-world request.

Include/internal/pycore_lock.h
Modules/_testinternalcapi/test_critical_sections.c
Modules/_threadmodule.c
Python/lock.c
Python/pystate.c

index f993c95ecbf75aea77b8b3a2184203ea0426d4b4..a5b28e4bd4744e80ef1af61645e5185042b73d32 100644 (file)
@@ -150,8 +150,10 @@ PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);
 
 // Wait for the event to be set, or until the timeout expires. If the event is
 // already set, then this returns immediately. Returns 1 if the event was set,
-// and 0 if the timeout expired or thread was interrupted.
-PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns);
+// and 0 if the timeout expired or thread was interrupted. If `detach` is
+// true, then the thread will detach/release the GIL while waiting.
+PyAPI_FUNC(int)
+PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach);
 
 // _PyRawMutex implements a word-sized mutex that that does not depend on the
 // parking lot API, and therefore can be used in the parking lot
index 94da0468fcf149a72d1e03cef09ecc8503a1efda..cdf8a70fc79ff3f8ccf94f465d39258865cde1fa 100644 (file)
@@ -204,6 +204,90 @@ test_critical_sections_threads(PyObject *self, PyObject *Py_UNUSED(args))
     Py_DECREF(test_data.obj1);
     Py_RETURN_NONE;
 }
+
+static void
+pysleep(int ms)
+{
+#ifdef MS_WINDOWS
+    Sleep(ms);
+#else
+    usleep(ms * 1000);
+#endif
+}
+
+struct test_data_gc {
+    PyObject *obj;
+    Py_ssize_t num_threads;
+    Py_ssize_t id;
+    Py_ssize_t countdown;
+    PyEvent done_event;
+    PyEvent ready;
+};
+
+static void
+thread_gc(void *arg)
+{
+    struct test_data_gc *test_data = arg;
+    PyGILState_STATE gil = PyGILState_Ensure();
+
+    Py_ssize_t id = _Py_atomic_add_ssize(&test_data->id, 1);
+    if (id == test_data->num_threads - 1) {
+        _PyEvent_Notify(&test_data->ready);
+    }
+    else {
+        // wait for all test threads to more reliably reproduce the issue.
+        PyEvent_Wait(&test_data->ready);
+    }
+
+    if (id == 0) {
+        Py_BEGIN_CRITICAL_SECTION(test_data->obj);
+        // pause long enough that the lock would be handed off directly to
+        // a waiting thread.
+        pysleep(5);
+        PyGC_Collect();
+        Py_END_CRITICAL_SECTION();
+    }
+    else if (id == 1) {
+        pysleep(1);
+        Py_BEGIN_CRITICAL_SECTION(test_data->obj);
+        pysleep(1);
+        Py_END_CRITICAL_SECTION();
+    }
+    else if (id == 2) {
+        // sleep long enough so that thread 0 is waiting to stop the world
+        pysleep(6);
+        Py_BEGIN_CRITICAL_SECTION(test_data->obj);
+        pysleep(1);
+        Py_END_CRITICAL_SECTION();
+    }
+
+    PyGILState_Release(gil);
+    if (_Py_atomic_add_ssize(&test_data->countdown, -1) == 1) {
+        // last thread to finish sets done_event
+        _PyEvent_Notify(&test_data->done_event);
+    }
+}
+
+static PyObject *
+test_critical_sections_gc(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    // gh-118332: Contended critical sections should not deadlock with GC
+    const Py_ssize_t NUM_THREADS = 3;
+    struct test_data_gc test_data = {
+        .obj = PyDict_New(),
+        .countdown = NUM_THREADS,
+        .num_threads = NUM_THREADS,
+    };
+    assert(test_data.obj != NULL);
+
+    for (int i = 0; i < NUM_THREADS; i++) {
+        PyThread_start_new_thread(&thread_gc, &test_data);
+    }
+    PyEvent_Wait(&test_data.done_event);
+    Py_DECREF(test_data.obj);
+    Py_RETURN_NONE;
+}
+
 #endif
 
 static PyMethodDef test_methods[] = {
@@ -212,6 +296,7 @@ static PyMethodDef test_methods[] = {
     {"test_critical_sections_suspend", test_critical_sections_suspend, METH_NOARGS},
 #ifdef Py_CAN_START_THREADS
     {"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS},
+    {"test_critical_sections_gc", test_critical_sections_gc, METH_NOARGS},
 #endif
     {NULL, NULL} /* sentinel */
 };
index 5aa719c3834e614a630fa17538c1bf25406bf4f3..f5e3b42600675e46dbcb3e3ac7309fab4c111d01 100644 (file)
@@ -501,7 +501,8 @@ ThreadHandle_join(ThreadHandle *self, PyTime_t timeout_ns)
 
     // Wait until the deadline for the thread to exit.
     PyTime_t deadline = timeout_ns != -1 ? _PyDeadline_Init(timeout_ns) : 0;
-    while (!PyEvent_WaitTimed(&self->thread_is_exiting, timeout_ns)) {
+    int detach = 1;
+    while (!PyEvent_WaitTimed(&self->thread_is_exiting, timeout_ns, detach)) {
         if (deadline) {
             // _PyDeadline_Get will return a negative value if the deadline has
             // been exceeded.
index 91c66df8fd909353b982317f2430f64a901beee5..5ed95fcaf4188c385bfef5abab34f24c7f0c002b 100644 (file)
@@ -277,12 +277,12 @@ _PyEvent_Notify(PyEvent *evt)
 void
 PyEvent_Wait(PyEvent *evt)
 {
-    while (!PyEvent_WaitTimed(evt, -1))
+    while (!PyEvent_WaitTimed(evt, -1, /*detach=*/1))
         ;
 }
 
 int
-PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns)
+PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach)
 {
     for (;;) {
         uint8_t v = _Py_atomic_load_uint8(&evt->v);
@@ -298,7 +298,7 @@ PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns)
 
         uint8_t expected = _Py_HAS_PARKED;
         (void) _PyParkingLot_Park(&evt->v, &expected, sizeof(evt->v),
-                                  timeout_ns, NULL, 1);
+                                  timeout_ns, NULL, detach);
 
         return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED;
     }
index 78b39c9a577404661f02800622a3d0fef6c8ac00..9d7b73b3a071e124e555f2f798a4d9caf4f026e8 100644 (file)
@@ -2238,7 +2238,8 @@ stop_the_world(struct _stoptheworld_state *stw)
         }
 
         PyTime_t wait_ns = 1000*1000;  // 1ms (arbitrary, may need tuning)
-        if (PyEvent_WaitTimed(&stw->stop_event, wait_ns)) {
+        int detach = 0;
+        if (PyEvent_WaitTimed(&stw->stop_event, wait_ns, detach)) {
             assert(stw->thread_countdown == 0);
             break;
         }