]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-124878: Fix race conditions during interpreter finalization (#130649)
authorSam Gross <colesbury@gmail.com>
Thu, 6 Mar 2025 15:38:34 +0000 (10:38 -0500)
committerGitHub <noreply@github.com>
Thu, 6 Mar 2025 15:38:34 +0000 (10:38 -0500)
The PyThreadState field gains a reference count field to avoid
issues with PyThreadState being a dangling pointer to freed memory.
The refcount starts with a value of two: one reference is owned by the
interpreter's linked list of thread states and one reference is owned by
the OS thread. The reference count is decremented when the thread state
is removed from the interpreter's linked list and before the OS thread
calls `PyThread_hang_thread()`. The thread that decrements it to zero
frees the `PyThreadState` memory.

The `holds_gil` field is moved out of the `_status` bit field, to avoid
a data race where on thread calls `PyThreadState_Clear()`, modifying the
`_status` bit field while the OS thread reads `holds_gil` when
attempting to acquire the GIL.

The `PyThreadState.state` field now has `_Py_THREAD_SHUTTING_DOWN` as a
possible value. This corresponds to the `_PyThreadState_MustExit()`
check. This avoids race conditions in the free threading build when
checking `_PyThreadState_MustExit()`.

13 files changed:
Include/cpython/pystate.h
Include/internal/pycore_pystate.h
Include/internal/pycore_runtime_init.h
Include/internal/pycore_tstate.h
Misc/NEWS.d/next/Core_and_Builtins/2025-02-27-18-48-42.gh-issue-124878.DS0MIL.rst [new file with mode: 0644]
Modules/_threadmodule.c
Modules/posixmodule.c
Python/ceval_gil.c
Python/pylifecycle.c
Python/pystate.c
Python/qsbr.c
Tools/tsan/suppressions.txt
Tools/tsan/suppressions_free_threading.txt

index e0d2ac9bf93ad8764678dfef11a8e9580e6630e5..65bc11ca0f5ba9c05ded5585391518df8d008b0a 100644 (file)
@@ -83,8 +83,6 @@ struct _ts {
         unsigned int bound_gilstate:1;
         /* Currently in use (maybe holds the GIL). */
         unsigned int active:1;
-        /* Currently holds the GIL. */
-        unsigned int holds_gil:1;
 
         /* various stages of finalization */
         unsigned int finalizing:1;
@@ -92,7 +90,7 @@ struct _ts {
         unsigned int finalized:1;
 
         /* padding to align to 4 bytes */
-        unsigned int :23;
+        unsigned int :24;
     } _status;
 #ifdef Py_BUILD_CORE
 #  define _PyThreadState_WHENCE_NOTSET -1
@@ -103,6 +101,10 @@ struct _ts {
 #  define _PyThreadState_WHENCE_GILSTATE 4
 #  define _PyThreadState_WHENCE_EXEC 5
 #endif
+
+    /* Currently holds the GIL. Must be its own field to avoid data races */
+    int holds_gil;
+
     int _whence;
 
     /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED).
index 9ec59e60f609ab585fb193c28ac7682d43effec8..f3667a8aa71c27aa6d016e1cd6b240cf94eb2b1e 100644 (file)
@@ -27,6 +27,10 @@ extern "C" {
 // "suspended" state. Only the thread performing a stop-the-world pause may
 // transition a thread from the "suspended" state back to the "detached" state.
 //
+// The "shutting down" state is used when the interpreter is being finalized.
+// Threads in this state can't do anything other than block the OS thread.
+// (See _PyThreadState_HangThread).
+//
 // State transition diagram:
 //
 //            (bound thread)        (stop-the-world thread)
@@ -37,9 +41,10 @@ extern "C" {
 //
 // The (bound thread) and (stop-the-world thread) labels indicate which thread
 // is allowed to perform the transition.
-#define _Py_THREAD_DETACHED     0
-#define _Py_THREAD_ATTACHED     1
-#define _Py_THREAD_SUSPENDED    2
+#define _Py_THREAD_DETACHED         0
+#define _Py_THREAD_ATTACHED         1
+#define _Py_THREAD_SUSPENDED        2
+#define _Py_THREAD_SHUTTING_DOWN    3
 
 
 /* Check if the current thread is the main thread.
@@ -118,7 +123,8 @@ extern _Py_thread_local PyThreadState *_Py_tss_tstate;
 extern int _PyThreadState_CheckConsistency(PyThreadState *tstate);
 #endif
 
-int _PyThreadState_MustExit(PyThreadState *tstate);
+extern int _PyThreadState_MustExit(PyThreadState *tstate);
+extern void _PyThreadState_HangThread(PyThreadState *tstate);
 
 // Export for most shared extensions, used via _PyThreadState_GET() static
 // inline function.
@@ -169,6 +175,11 @@ extern void _PyThreadState_Detach(PyThreadState *tstate);
 // to the "detached" state.
 extern void _PyThreadState_Suspend(PyThreadState *tstate);
 
+// Mark the thread state as "shutting down". This is used during interpreter
+// and runtime finalization. The thread may no longer attach to the
+// interpreter and will instead block via _PyThreadState_HangThread().
+extern void _PyThreadState_SetShuttingDown(PyThreadState *tstate);
+
 // Perform a stop-the-world pause for all threads in the all interpreters.
 //
 // Threads in the "attached" state are paused and transitioned to the "GC"
@@ -238,7 +249,7 @@ PyAPI_FUNC(PyThreadState *) _PyThreadState_NewBound(
     PyInterpreterState *interp,
     int whence);
 extern PyThreadState * _PyThreadState_RemoveExcept(PyThreadState *tstate);
-extern void _PyThreadState_DeleteList(PyThreadState *list);
+extern void _PyThreadState_DeleteList(PyThreadState *list, int is_after_fork);
 extern void _PyThreadState_ClearMimallocHeaps(PyThreadState *tstate);
 
 // Export for '_testinternalcapi' shared extension
index 1260b957ce94822c578b2e5ef09366a95171593a..2ec32b64adde0bf783db7bcdfae85e11b4adc8b6 100644 (file)
@@ -171,6 +171,8 @@ extern PyTypeObject _PyExc_MemoryError;
 #define _PyThreadStateImpl_INIT \
     { \
         .base = _PyThreadState_INIT, \
+        /* The thread and the interpreter's linked list hold a reference */ \
+        .refcount = 2, \
     }
 
 #define _PyThreadState_INIT \
index 624b29e32ed463bbf3061a060a57be27f8123cfd..6f50bb2f26307aff70cf5bf0601c856f85413ee1 100644 (file)
@@ -21,6 +21,10 @@ typedef struct _PyThreadStateImpl {
     // semi-public fields are in PyThreadState.
     PyThreadState base;
 
+    // The reference count field is used to synchronize deallocation of the
+    // thread state during runtime finalization.
+    Py_ssize_t refcount;
+
     // These are addresses, but we need to convert to ints to avoid UB.
     uintptr_t c_stack_top;
     uintptr_t c_stack_soft_limit;
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-27-18-48-42.gh-issue-124878.DS0MIL.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-27-18-48-42.gh-issue-124878.DS0MIL.rst
new file mode 100644 (file)
index 0000000..8425664
--- /dev/null
@@ -0,0 +1,2 @@
+Fix race conditions during runtime finalization that could lead to accessing
+freed memory.
index dab6b395cb2e465c3b72cf58178b90c2e772ba7a..d423854b8ebff35d178ca99f7d39463b76a74d56 100644 (file)
@@ -333,9 +333,6 @@ thread_run(void *boot_raw)
     // _PyRuntimeState_SetFinalizing() is called. At this point, all Python
     // threads must exit, except of the thread calling Py_Finalize() which
     // holds the GIL and must not exit.
-    //
-    // At this stage, tstate can be a dangling pointer (point to freed memory),
-    // it's ok to call _PyThreadState_MustExit() with a dangling pointer.
     if (_PyThreadState_MustExit(tstate)) {
         // Don't call PyThreadState_Clear() nor _PyThreadState_DeleteCurrent().
         // These functions are called on tstate indirectly by Py_Finalize()
index bf8ad19416304f72e408fa3bd5fd99320f921fb2..25ebc98435c5da5d41fbf1af7c141b484505390c 100644 (file)
@@ -715,7 +715,7 @@ PyOS_AfterFork_Child(void)
     // may call destructors.
     PyThreadState *list = _PyThreadState_RemoveExcept(tstate);
     _PyEval_StartTheWorldAll(&_PyRuntime);
-    _PyThreadState_DeleteList(list);
+    _PyThreadState_DeleteList(list, /*is_after_fork=*/1);
 
     _PyImport_ReInitLock(tstate->interp);
     _PyImport_ReleaseLock(tstate->interp);
index 7a3cd8d80447394f46c32d355d735054ce4ee18b..2c1cc17b2ffa13b84626dfa2947a75f07cd2e826 100644 (file)
@@ -6,8 +6,8 @@
 #include "pycore_pyerrors.h"      // _PyErr_GetRaisedException()
 #include "pycore_pylifecycle.h"   // _PyErr_Print()
 #include "pycore_pymem.h"         // _PyMem_IsPtrFreed()
+#include "pycore_pystate.h"       // PyThread_hang_thread()
 #include "pycore_pystats.h"       // _Py_PrintSpecializationStats()
-#include "pycore_pythread.h"      // PyThread_hang_thread()
 
 /*
    Notes about the implementation:
@@ -206,7 +206,7 @@ drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil)
     _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
     _Py_atomic_store_int_relaxed(&gil->locked, 0);
     if (tstate != NULL) {
-        tstate->_status.holds_gil = 0;
+        tstate->holds_gil = 0;
     }
     COND_SIGNAL(gil->cond);
     MUTEX_UNLOCK(gil->mutex);
@@ -231,7 +231,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate, int final_release)
     // Check if we have the GIL before dropping it. tstate will be NULL if
     // take_gil() detected that this thread has been destroyed, in which case
     // we know we have the GIL.
-    if (tstate != NULL && !tstate->_status.holds_gil) {
+    if (tstate != NULL && !tstate->holds_gil) {
         return;
     }
 #endif
@@ -296,15 +296,14 @@ take_gil(PyThreadState *tstate)
            thread which called Py_Finalize(), this thread cannot continue.
 
            This code path can be reached by a daemon thread after Py_Finalize()
-           completes. In this case, tstate is a dangling pointer: points to
-           PyThreadState freed memory.
+           completes.
 
            This used to call a *thread_exit API, but that was not safe as it
            lacks stack unwinding and local variable destruction important to
            C++. gh-87135: The best that can be done is to hang the thread as
            the public APIs calling this have no error reporting mechanism (!).
          */
-        PyThread_hang_thread();
+        _PyThreadState_HangThread(tstate);
     }
 
     assert(_PyThreadState_CheckConsistency(tstate));
@@ -353,7 +352,7 @@ take_gil(PyThreadState *tstate)
                 }
                 // gh-87135: hang the thread as *thread_exit() is not a safe
                 // API. It lacks stack unwind and local variable destruction.
-                PyThread_hang_thread();
+                _PyThreadState_HangThread(tstate);
             }
             assert(_PyThreadState_CheckConsistency(tstate));
 
@@ -404,11 +403,11 @@ take_gil(PyThreadState *tstate)
         /* tstate could be a dangling pointer, so don't pass it to
            drop_gil(). */
         drop_gil(interp, NULL, 1);
-        PyThread_hang_thread();
+        _PyThreadState_HangThread(tstate);
     }
     assert(_PyThreadState_CheckConsistency(tstate));
 
-    tstate->_status.holds_gil = 1;
+    tstate->holds_gil = 1;
     _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
     update_eval_breaker_for_thread(interp, tstate);
 
@@ -460,7 +459,7 @@ PyEval_ThreadsInitialized(void)
 static inline int
 current_thread_holds_gil(struct _gil_runtime_state *gil, PyThreadState *tstate)
 {
-    int holds_gil = tstate->_status.holds_gil;
+    int holds_gil = tstate->holds_gil;
 
     // holds_gil is the source of truth; check that last_holder and gil->locked
     // are consistent with it.
index 1a7f312a6046142d318a9c9e6ce90e0e2afe47df..4007e43c98d640ef43aebf30c5772ae0dd4f93a7 100644 (file)
@@ -2036,18 +2036,23 @@ _Py_Finalize(_PyRuntimeState *runtime)
 
     // XXX Call something like _PyImport_Disable() here?
 
-    /* Destroy the state of all threads of the interpreter, except of the
+    /* Remove the state of all threads of the interpreter, except for the
        current thread. In practice, only daemon threads should still be alive,
        except if wait_for_thread_shutdown() has been cancelled by CTRL+C.
-       Clear frames of other threads to call objects destructors. Destructors
-       will be called in the current Python thread. Since
-       _PyRuntimeState_SetFinalizing() has been called, no other Python thread
-       can take the GIL at this point: if they try, they will exit
-       immediately. We start the world once we are the only thread state left,
+       We start the world once we are the only thread state left,
        before we call destructors. */
     PyThreadState *list = _PyThreadState_RemoveExcept(tstate);
+    for (PyThreadState *p = list; p != NULL; p = p->next) {
+        _PyThreadState_SetShuttingDown(p);
+    }
     _PyEval_StartTheWorldAll(runtime);
-    _PyThreadState_DeleteList(list);
+
+    /* Clear frames of other threads to call objects destructors. Destructors
+       will be called in the current Python thread. Since
+       _PyRuntimeState_SetFinalizing() has been called, no other Python thread
+       can take the GIL at this point: if they try, they will hang in
+       _PyThreadState_HangThread. */
+    _PyThreadState_DeleteList(list, /*is_after_fork=*/0);
 
     /* At this point no Python code should be running at all.
        The only thread state left should be the main thread of the main
index 4b01942f40e1239073974d28b9594dbcb7c6ee0e..fcd12d1b9333605a737b4aa6c1d603a94304450f 100644 (file)
@@ -1474,6 +1474,15 @@ free_threadstate(_PyThreadStateImpl *tstate)
     }
 }
 
+static void
+decref_threadstate(_PyThreadStateImpl *tstate)
+{
+    if (_Py_atomic_add_ssize(&tstate->refcount, -1) == 1) {
+        // The last reference to the thread state is gone.
+        free_threadstate(tstate);
+    }
+}
+
 /* Get the thread state to a minimal consistent state.
    Further init happens in pylifecycle.c before it can be used.
    All fields not initialized here are expected to be zeroed out,
@@ -1938,8 +1947,12 @@ _PyThreadState_RemoveExcept(PyThreadState *tstate)
 // Deletes the thread states in the linked list `list`.
 //
 // This is intended to be used in conjunction with _PyThreadState_RemoveExcept.
+//
+// If `is_after_fork` is true, the thread states are immediately freed.
+// Otherwise, they are decref'd because they may still be referenced by an
+// OS thread.
 void
-_PyThreadState_DeleteList(PyThreadState *list)
+_PyThreadState_DeleteList(PyThreadState *list, int is_after_fork)
 {
     // The world can't be stopped because we PyThreadState_Clear() can
     // call destructors.
@@ -1949,7 +1962,12 @@ _PyThreadState_DeleteList(PyThreadState *list)
     for (p = list; p; p = next) {
         next = p->next;
         PyThreadState_Clear(p);
-        free_threadstate((_PyThreadStateImpl *)p);
+        if (is_after_fork) {
+            free_threadstate((_PyThreadStateImpl *)p);
+        }
+        else {
+            decref_threadstate((_PyThreadStateImpl *)p);
+        }
     }
 }
 
@@ -2082,12 +2100,19 @@ static void
 tstate_wait_attach(PyThreadState *tstate)
 {
     do {
-        int expected = _Py_THREAD_SUSPENDED;
-
-        // Wait until we're switched out of SUSPENDED to DETACHED.
-        _PyParkingLot_Park(&tstate->state, &expected, sizeof(tstate->state),
-                           /*timeout=*/-1, NULL, /*detach=*/0);
-
+        int state = _Py_atomic_load_int_relaxed(&tstate->state);
+        if (state == _Py_THREAD_SUSPENDED) {
+            // Wait until we're switched out of SUSPENDED to DETACHED.
+            _PyParkingLot_Park(&tstate->state, &state, sizeof(tstate->state),
+                               /*timeout=*/-1, NULL, /*detach=*/0);
+        }
+        else if (state == _Py_THREAD_SHUTTING_DOWN) {
+            // We're shutting down, so we can't attach.
+            _PyThreadState_HangThread(tstate);
+        }
+        else {
+            assert(state == _Py_THREAD_DETACHED);
+        }
         // Once we're back in DETACHED we can re-attach
     } while (!tstate_try_attach(tstate));
 }
@@ -2118,7 +2143,7 @@ _PyThreadState_Attach(PyThreadState *tstate)
         tstate_activate(tstate);
 
 #ifdef Py_GIL_DISABLED
-        if (_PyEval_IsGILEnabled(tstate) && !tstate->_status.holds_gil) {
+        if (_PyEval_IsGILEnabled(tstate) && !tstate->holds_gil) {
             // The GIL was enabled between our call to _PyEval_AcquireLock()
             // and when we attached (the GIL can't go from enabled to disabled
             // here because only a thread holding the GIL can disable
@@ -2201,6 +2226,15 @@ _PyThreadState_Suspend(PyThreadState *tstate)
     HEAD_UNLOCK(runtime);
 }
 
+void
+_PyThreadState_SetShuttingDown(PyThreadState *tstate)
+{
+    _Py_atomic_store_int(&tstate->state, _Py_THREAD_SHUTTING_DOWN);
+#ifdef Py_GIL_DISABLED
+    _PyParkingLot_UnparkAll(&tstate->state);
+#endif
+}
+
 // Decrease stop-the-world counter of remaining number of threads that need to
 // pause. If we are the final thread to pause, notify the requesting thread.
 static void
@@ -3001,43 +3035,27 @@ _PyThreadState_CheckConsistency(PyThreadState *tstate)
 #endif
 
 
-// Check if a Python thread must exit immediately, rather than taking the GIL
-// if Py_Finalize() has been called.
+// Check if a Python thread must call _PyThreadState_HangThread(), rather than
+// taking the GIL or attaching to the interpreter if Py_Finalize() has been
+// called.
 //
 // When this function is called by a daemon thread after Py_Finalize() has been
-// called, the GIL does no longer exist.
-//
-// tstate can be a dangling pointer (point to freed memory): only tstate value
-// is used, the pointer is not deferenced.
+// called, the GIL may no longer exist.
 //
 // tstate must be non-NULL.
 int
 _PyThreadState_MustExit(PyThreadState *tstate)
 {
-    /* bpo-39877: Access _PyRuntime directly rather than using
-       tstate->interp->runtime to support calls from Python daemon threads.
-       After Py_Finalize() has been called, tstate can be a dangling pointer:
-       point to PyThreadState freed memory. */
-    unsigned long finalizing_id = _PyRuntimeState_GetFinalizingID(&_PyRuntime);
-    PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(&_PyRuntime);
-    if (finalizing == NULL) {
-        // XXX This isn't completely safe from daemon thraeds,
-        // since tstate might be a dangling pointer.
-        finalizing = _PyInterpreterState_GetFinalizing(tstate->interp);
-        finalizing_id = _PyInterpreterState_GetFinalizingID(tstate->interp);
-    }
-    // XXX else check &_PyRuntime._main_interpreter._initial_thread
-    if (finalizing == NULL) {
-        return 0;
-    }
-    else if (finalizing == tstate) {
-        return 0;
-    }
-    else if (finalizing_id == PyThread_get_thread_ident()) {
-        /* gh-109793: we must have switched interpreters. */
-        return 0;
-    }
-    return 1;
+    int state = _Py_atomic_load_int_relaxed(&tstate->state);
+    return state == _Py_THREAD_SHUTTING_DOWN;
+}
+
+void
+_PyThreadState_HangThread(PyThreadState *tstate)
+{
+    _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
+    decref_threadstate(tstate_impl);
+    PyThread_hang_thread();
 }
 
 /********************/
index 386a8451dc40c0dabe5b474bb738017910e8c936..862b2e59eb77d5411b62b8145aa201e56b6d74e9 100644 (file)
@@ -241,7 +241,7 @@ _Py_qsbr_unregister(PyThreadState *tstate)
     // gh-119369: GIL must be released (if held) to prevent deadlocks, because
     // we might not have an active tstate, which means that blocking on PyMutex
     // locks will not implicitly release the GIL.
-    assert(!tstate->_status.holds_gil);
+    assert(!tstate->holds_gil);
 
     PyMutex_Lock(&shared->mutex);
     // NOTE: we must load (or reload) the thread state's qbsr inside the mutex
index c70b0ddca059c2f4d0a58f3a5a92f19e89fd1457..6bda5ecd5708893ff2db593f11cd3001c5730474 100644 (file)
@@ -1,8 +1,5 @@
 # This file contains suppressions for the default (with GIL) build.
 # reference: https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions
 
-# gh-124878: race condition when interpreter finalized while daemon thread runs
-race:free_threadstate
-
 # https://gist.github.com/mpage/daaf32b39180c1989572957b943eb665
 thread:pthread_create
index c32c43db19cb961e676553fb7bdcf9bd75096c30..fd47c85af1adb166f87aca62811105aa3e1f57a3 100644 (file)
 # These entries are for warnings that trigger in a library function, as called
 # by a CPython function.
 
-# gh-124878: race condition when interpreter finalized while daemon thread runs
-race:free_threadstate
-
 # These warnings trigger directly in a CPython function.
 
 race_top:assign_version_tag
 race_top:_multiprocessing_SemLock_acquire_impl
 race_top:_Py_slot_tp_getattr_hook
-race_top:add_threadstate
 race_top:dump_traceback
 race_top:fatal_error
 race_top:_multiprocessing_SemLock_release_impl
 race_top:_PyFrame_GetCode
 race_top:_PyFrame_Initialize
-race_top:PyInterpreterState_ThreadHead
 race_top:_PyObject_TryGetInstanceAttribute
-race_top:PyThreadState_Next
 race_top:PyUnstable_InterpreterFrame_GetLine
-race_top:tstate_delete_common
-race_top:tstate_is_freed
 race_top:type_modified_unlocked
 race_top:write_thread_id
-race_top:PyThreadState_Clear
 
 # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading)
 race_top:rangeiter_next