]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-137433: Fix deadlock with stop-the-world and daemon threads (gh-137735)
authorSam Gross <colesbury@gmail.com>
Tue, 16 Sep 2025 08:21:58 +0000 (09:21 +0100)
committerGitHub <noreply@github.com>
Tue, 16 Sep 2025 08:21:58 +0000 (09:21 +0100)
There was a deadlock originally seen by Memray when a daemon thread
enabled or disabled profiling while the interpreter was shutting down.
I think this could also happen with garbage collection, but I haven't
seen that in practice.

The daemon thread could be hung while trying acquire the global rwmutex
that prevents overlapping global and per-interpreter stop-the-world events.
Since it already held the main interpreter's stop-the-world lock, it
also deadlocked the main thread, which is trying to perform interpreter
finalization.

Swap the order of lock acquisition to prevent this deadlock.
Additionally, refactor `_PyParkingLot_Park` so that the global buckets
hashtable is left in a clean state if the thread is hung in
`PyEval_AcquireThread`.

Include/internal/pycore_semaphore.h
Lib/test/test_threading.py
Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-13-39-02.gh-issue-137433.g6Atfz.rst [new file with mode: 0644]
Python/lock.c
Python/parking_lot.c
Python/pystate.c

index 269538384606ce1b031a75f928038e253c5efce4..66b4939dcacf051ca298273ef92eec56fa599608 100644 (file)
@@ -46,10 +46,8 @@ typedef struct _PySemaphore {
 } _PySemaphore;
 
 // Puts the current thread to sleep until _PySemaphore_Wakeup() is called.
-// If `detach` is true, then the thread will detach/release the GIL while
-// sleeping.
 PyAPI_FUNC(int)
-_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout_ns, int detach);
+_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout_ns);
 
 // Wakes up a single thread waiting on sema. Note that _PySemaphore_Wakeup()
 // can be called before _PySemaphore_Wait().
index 6e612b06281ca21d3becf4ffe21562b5eb84bac7..774059c21d72464bce82b80d1ab7336654e3a34d 100644 (file)
@@ -1430,6 +1430,33 @@ class ThreadTests(BaseTestCase):
         self.assertEqual(len(native_ids), 2)
         self.assertNotEqual(native_ids[0], native_ids[1])
 
+    def test_stop_the_world_during_finalization(self):
+        # gh-137433: Test functions that trigger a stop-the-world in the free
+        # threading build concurrent with interpreter finalization.
+        script = """if True:
+            import gc
+            import sys
+            import threading
+            NUM_THREADS = 5
+            b = threading.Barrier(NUM_THREADS + 1)
+            def run_in_bg():
+                b.wait()
+                while True:
+                    sys.setprofile(None)
+                    gc.collect()
+
+            for _ in range(NUM_THREADS):
+                t = threading.Thread(target=run_in_bg, daemon=True)
+                t.start()
+
+            b.wait()
+            print("Exiting...")
+        """
+        rc, out, err = assert_python_ok('-c', script)
+        self.assertEqual(rc, 0)
+        self.assertEqual(err, b"")
+        self.assertEqual(out.strip(), b"Exiting...")
+
 class ThreadJoinOnShutdown(BaseTestCase):
 
     def _run_and_join(self, script):
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-13-39-02.gh-issue-137433.g6Atfz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-13-13-39-02.gh-issue-137433.g6Atfz.rst
new file mode 100644 (file)
index 0000000..0389cb0
--- /dev/null
@@ -0,0 +1,3 @@
+Fix a potential deadlock in the :term:`free threading` build when daemon
+threads enable or disable profiling or tracing while the main thread is
+shutting down the interpreter.
index a49d587a1686d953a5cb113a5dd051db1f3f4beb..98f3f89c201fef220c370483417c336a154cfb2a 100644 (file)
@@ -227,7 +227,7 @@ _PyRawMutex_LockSlow(_PyRawMutex *m)
 
         // Wait for us to be woken up. Note that we still have to lock the
         // mutex ourselves: it is NOT handed off to us.
-        _PySemaphore_Wait(&waiter.sema, -1, /*detach=*/0);
+        _PySemaphore_Wait(&waiter.sema, -1);
     }
 
     _PySemaphore_Destroy(&waiter.sema);
index e896dea02712f2c33568dbc2911f55f1a6af4dda..99c1ad848be795d7b564ea4f8fd89674e0e3a06c 100644 (file)
@@ -91,8 +91,8 @@ _PySemaphore_Destroy(_PySemaphore *sema)
 #endif
 }
 
-static int
-_PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout)
+int
+_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout)
 {
     int res;
 #if defined(MS_WINDOWS)
@@ -225,27 +225,6 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout)
     return res;
 }
 
-int
-_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout, int detach)
-{
-    PyThreadState *tstate = NULL;
-    if (detach) {
-        tstate = _PyThreadState_GET();
-        if (tstate && _PyThreadState_IsAttached(tstate)) {
-            // Only detach if we are attached
-            PyEval_ReleaseThread(tstate);
-        }
-        else {
-            tstate = NULL;
-        }
-    }
-    int res = _PySemaphore_PlatformWait(sema, timeout);
-    if (tstate) {
-        PyEval_AcquireThread(tstate);
-    }
-    return res;
-}
-
 void
 _PySemaphore_Wakeup(_PySemaphore *sema)
 {
@@ -342,7 +321,19 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
     enqueue(bucket, addr, &wait);
     _PyRawMutex_Unlock(&bucket->mutex);
 
-    int res = _PySemaphore_Wait(&wait.sema, timeout_ns, detach);
+    PyThreadState *tstate = NULL;
+    if (detach) {
+        tstate = _PyThreadState_GET();
+        if (tstate && _PyThreadState_IsAttached(tstate)) {
+            // Only detach if we are attached
+            PyEval_ReleaseThread(tstate);
+        }
+        else {
+            tstate = NULL;
+        }
+    }
+
+    int res = _PySemaphore_Wait(&wait.sema, timeout_ns);
     if (res == Py_PARK_OK) {
         goto done;
     }
@@ -354,7 +345,7 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
         // Another thread has started to unpark us. Wait until we process the
         // wakeup signal.
         do {
-            res = _PySemaphore_Wait(&wait.sema, -1, detach);
+            res = _PySemaphore_Wait(&wait.sema, -1);
         } while (res != Py_PARK_OK);
         goto done;
     }
@@ -366,6 +357,9 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
 
 done:
     _PySemaphore_Destroy(&wait.sema);
+    if (tstate) {
+        PyEval_AcquireThread(tstate);
+    }
     return res;
 
 }
index 2465d8667472dc85ca4a9e72227a8bd0b7085cd7..a39369efc077725af68478fd71fef6105c1acf4f 100644 (file)
@@ -2253,13 +2253,15 @@ stop_the_world(struct _stoptheworld_state *stw)
 {
     _PyRuntimeState *runtime = &_PyRuntime;
 
-    PyMutex_Lock(&stw->mutex);
+    // gh-137433: Acquire the rwmutex first to avoid deadlocks with daemon
+    // threads that may hang when blocked on lock acquisition.
     if (stw->is_global) {
         _PyRWMutex_Lock(&runtime->stoptheworld_mutex);
     }
     else {
         _PyRWMutex_RLock(&runtime->stoptheworld_mutex);
     }
+    PyMutex_Lock(&stw->mutex);
 
     HEAD_LOCK(runtime);
     stw->requested = 1;
@@ -2325,13 +2327,13 @@ start_the_world(struct _stoptheworld_state *stw)
     }
     stw->requester = NULL;
     HEAD_UNLOCK(runtime);
+    PyMutex_Unlock(&stw->mutex);
     if (stw->is_global) {
         _PyRWMutex_Unlock(&runtime->stoptheworld_mutex);
     }
     else {
         _PyRWMutex_RUnlock(&runtime->stoptheworld_mutex);
     }
-    PyMutex_Unlock(&stw->mutex);
 }
 #endif  // Py_GIL_DISABLED