]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.11] gh-106883 Fix deadlock in threaded application (#117332)
authorDiego Russo <diego.russo@arm.com>
Tue, 11 Mar 2025 15:31:03 +0000 (15:31 +0000)
committerGitHub <noreply@github.com>
Tue, 11 Mar 2025 15:31:03 +0000 (15:31 +0000)
When using threaded applications, there is a high risk of a deadlock in
the interpreter. It's a lock ordering deadlock with HEAD_LOCK(&_PyRuntime); and the GIL.

By disabling the GC during the _PyThread_CurrentFrames() and
_PyThread_CurrentExceptions() calls fixes the issue.

Lib/test/test_sys.py
Misc/NEWS.d/next/C API/2024-04-05-14-32-21.gh-issue-106883.OKmc0Q.rst [new file with mode: 0644]
Python/pystate.c

index 86cf1a794f973c8384bd0582b85850412dbecb26..28931039f3792e30c185ebd3f7995ffa15543d29 100644 (file)
@@ -14,6 +14,7 @@ from test.support import os_helper
 from test.support.script_helper import assert_python_ok, assert_python_failure
 from test.support import threading_helper
 from test.support import import_helper
+from test.support import skip_if_sanitizer
 import textwrap
 import unittest
 import warnings
@@ -471,6 +472,79 @@ class SysModuleTest(unittest.TestCase):
         leave_g.set()
         t.join()
 
+    @skip_if_sanitizer(memory=True, address=True, reason= "Test too slow "
+                       "when the address sanitizer is enabled.")
+    @threading_helper.reap_threads
+    @threading_helper.requires_working_threading()
+    @support.requires_fork()
+    def test_current_frames_exceptions_deadlock(self):
+        """
+        Reproduce the bug raised in GH-106883 and GH-116969.
+        """
+        import threading
+        import time
+        import signal
+
+        class MockObject:
+            def __init__(self):
+                # Create some garbage
+                self._list = list(range(10000))
+                # Call the functions under test
+                self._trace = sys._current_frames()
+                self._exceptions = sys._current_exceptions()
+
+            def __del__(self):
+                # The presence of the __del__ method causes the deadlock when
+                # there is one thread executing the _current_frames or
+                # _current_exceptions functions and the other thread is
+                # running the GC:
+                # thread 1 has the interpreter lock and it is trying to
+                # acquire the GIL; thread 2 holds the GIL but is trying to
+                # acquire the interpreter lock.
+                # When the GC is running and it finds that an
+                # object has the __del__ method, it needs to execute the
+                # Python code in it and it requires the GIL to execute it
+                # (which will never happen because it is held by another thread
+                # blocked on the acquisition of the interpreter lock)
+                pass
+
+        def thread_function(num_objects):
+            obj = None
+            for _ in range(num_objects):
+                obj = MockObject()
+
+        # The number of objects should be big enough to increase the
+        # chances to call the GC.
+        NUM_OBJECTS = 1000
+        NUM_THREADS = 10
+
+        # 40 seconds should be enough for the test to be executed: if it
+        # is more than 40 seconds it means that the process is in deadlock
+        # hence the test fails
+        TIMEOUT = 40
+
+        # Test the sys._current_frames and sys._current_exceptions calls
+        pid = os.fork()
+        if pid:  # parent process
+            try:
+                support.wait_process(pid, exitcode=0, timeout=TIMEOUT)
+            except KeyboardInterrupt:
+                # When pressing CTRL-C kill the deadlocked process
+                os.kill(pid, signal.SIGTERM)
+                raise
+        else:  # child process
+            # Run the actual test in the forked process.
+            threads = []
+            for i in range(NUM_THREADS):
+                thread = threading.Thread(
+                    target=thread_function, args=(NUM_OBJECTS,)
+                )
+                threads.append(thread)
+                thread.start()
+            for t in threads:
+                t.join()
+            os._exit(0)
+
     @threading_helper.reap_threads
     @threading_helper.requires_working_threading()
     def test_current_exceptions(self):
diff --git a/Misc/NEWS.d/next/C API/2024-04-05-14-32-21.gh-issue-106883.OKmc0Q.rst b/Misc/NEWS.d/next/C API/2024-04-05-14-32-21.gh-issue-106883.OKmc0Q.rst
new file mode 100644 (file)
index 0000000..01c7fb0
--- /dev/null
@@ -0,0 +1 @@
+Disable GC during the _PyThread_CurrentFrames() and _PyThread_CurrentExceptions() calls to avoid the interpreter to deadlock.
index db2ce878af64ec3321850d0e5b4666368e945689..a45116665fe1956ea5ee8f05ea894f0cb0a6461b 100644 (file)
@@ -1398,6 +1398,9 @@ _PyThread_CurrentFrames(void)
         return NULL;
     }
 
+    // gh-106883: Disable the GC as this can cause the interpreter to deadlock
+    int gc_was_enabled = PyGC_Disable();
+
     /* for i in all interpreters:
      *     for t in all of i's thread states:
      *          if t's frame isn't NULL, map t's id to its frame
@@ -1440,6 +1443,12 @@ fail:
 
 done:
     HEAD_UNLOCK(runtime);
+
+    // Once the runtime is released, the GC can be reenabled.
+    if (gc_was_enabled) {
+        PyGC_Enable();
+    }
+
     return result;
 }
 
@@ -1459,6 +1468,9 @@ _PyThread_CurrentExceptions(void)
         return NULL;
     }
 
+    // gh-106883: Disable the GC as this can cause the interpreter to deadlock
+    int gc_was_enabled = PyGC_Disable();
+
     /* for i in all interpreters:
      *     for t in all of i's thread states:
      *          if t's frame isn't NULL, map t's id to its frame
@@ -1499,6 +1511,12 @@ fail:
 
 done:
     HEAD_UNLOCK(runtime);
+
+    // Once the runtime is released, the GC can be reenabled.
+    if (gc_was_enabled) {
+        PyGC_Enable();
+    }
+
     return result;
 }