From: Gregory P. Smith <68491+gpshead@users.noreply.github.com> Date: Sat, 11 Apr 2026 21:54:23 +0000 (-0700) Subject: gh-146302: make Py_IsInitialized() thread-safe and reflect true init completion ... X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=64afa947f454b0295dd08de1029da8c1d882a99f;p=thirdparty%2FPython%2Fcpython.git gh-146302: make Py_IsInitialized() thread-safe and reflect true init completion (GH-146303) ## Summary - Move the `runtime->initialized = 1` store from before `site.py` import to the end of `init_interp_main()`, so `Py_IsInitialized()` only returns true after initialization has fully completed - Access `initialized` and `core_initialized` through new inline accessors using acquire/release atomics, to also protect from data race undefined behavior - `PySys_AddAuditHook()` now uses the accessor, so with the flag move it correctly skips audit hook invocation during all init phases (matching the documented "after runtime initialization" behavior) ... We could argue that running these earlier would be good even if the intent was never explicitly expressed, but that'd be its own issue. ## Motivation `Py_IsInitialized()` returned 1 while `Py_InitializeEx()` was still running — specifically, before `site.py` had been imported. See https://github.com/PyO3/pyo3/issues/5900 where a second thread could acquire the GIL and start executing Python with an incomplete `sys.path` because `site.py` hadn't finished. The flag was also a plain `int` with no atomic operations, making concurrent reads a C-standard data race, though unlikely to manifest. ## Regression test: The added test properly fails on `main` with `ERROR: Py_IsInitialized() was true during site import`. --- Co-Authored-By: Claude Opus 4.6 (1M context) --- diff --git a/Doc/c-api/interp-lifecycle.rst b/Doc/c-api/interp-lifecycle.rst index 189d8e424f68..186ab4370bcb 100644 --- a/Doc/c-api/interp-lifecycle.rst +++ b/Doc/c-api/interp-lifecycle.rst @@ -410,6 +410,11 @@ Initializing and finalizing the interpreter (zero) if not. After :c:func:`Py_FinalizeEx` is called, this returns false until :c:func:`Py_Initialize` is called again. + .. versionchanged:: next + This function no longer returns true until initialization has fully + completed, including import of the :mod:`site` module. Previously it + could return true while :c:func:`Py_Initialize` was still running. + .. c:function:: int Py_IsFinalizing() diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 7fc7f343fe60..fcd2ae9b1d1f 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -56,6 +56,29 @@ _PyRuntimeState_SetFinalizing(_PyRuntimeState *runtime, PyThreadState *tstate) { } } +// Atomic so a thread that reads initialized=1 observes all writes +// from the initialization sequence (gh-146302). + +static inline int +_PyRuntimeState_GetCoreInitialized(_PyRuntimeState *runtime) { + return _Py_atomic_load_int(&runtime->core_initialized); +} + +static inline void +_PyRuntimeState_SetCoreInitialized(_PyRuntimeState *runtime, int initialized) { + _Py_atomic_store_int(&runtime->core_initialized, initialized); +} + +static inline int +_PyRuntimeState_GetInitialized(_PyRuntimeState *runtime) { + return _Py_atomic_load_int(&runtime->initialized); +} + +static inline void +_PyRuntimeState_SetInitialized(_PyRuntimeState *runtime, int initialized) { + _Py_atomic_store_int(&runtime->initialized, initialized); +} + #ifdef __cplusplus } diff --git a/Include/internal/pycore_runtime_structs.h b/Include/internal/pycore_runtime_structs.h index 05369ef9f009..145e66de9984 100644 --- a/Include/internal/pycore_runtime_structs.h +++ b/Include/internal/pycore_runtime_structs.h @@ -158,10 +158,18 @@ struct pyruntimestate { /* Is Python preinitialized? Set to 1 by Py_PreInitialize() */ int preinitialized; - /* Is Python core initialized? Set to 1 by _Py_InitializeCore() */ + /* Is Python core initialized? Set to 1 by _Py_InitializeCore(). + + Use _PyRuntimeState_GetCoreInitialized() and + _PyRuntimeState_SetCoreInitialized() to access it, + don't access it directly. */ int core_initialized; - /* Is Python fully initialized? Set to 1 by Py_Initialize() */ + /* Is Python fully initialized? Set to 1 by Py_Initialize(). + + Use _PyRuntimeState_GetInitialized() and + _PyRuntimeState_SetInitialized() to access it, + don't access it directly. */ int initialized; /* Set by Py_FinalizeEx(). Only reset to NULL if Py_Initialize() diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index a2de58c29264..1087cbd0836f 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1930,6 +1930,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): out, err = self.run_embedded_interpreter("test_init_in_background_thread") self.assertEqual(err, "") + def test_isinitialized_false_during_site_import(self): + # gh-146302: Py_IsInitialized() must not return true during site import. + out, err = self.run_embedded_interpreter( + "test_isinitialized_false_during_site_import") + self.assertEqual(err, "") + class AuditingTests(EmbeddingTestsMixin, unittest.TestCase): def test_open_code_hook(self): diff --git a/Misc/NEWS.d/next/C_API/2026-03-22-00-00-00.gh-issue-146302.PyIsInit.rst b/Misc/NEWS.d/next/C_API/2026-03-22-00-00-00.gh-issue-146302.PyIsInit.rst new file mode 100644 index 000000000000..e194e2bb2c37 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2026-03-22-00-00-00.gh-issue-146302.PyIsInit.rst @@ -0,0 +1,3 @@ +:c:func:`Py_IsInitialized` no longer returns true until initialization has +fully completed, including import of the :mod:`site` module. The underlying +runtime flags now use atomic operations. diff --git a/Programs/_testembed.c b/Programs/_testembed.c index d4d2a7131ccb..285f4f091b2f 100644 --- a/Programs/_testembed.c +++ b/Programs/_testembed.c @@ -2000,6 +2000,8 @@ static int test_init_main(void) config._init_main = 0; init_from_config_clear(&config); + assert(Py_IsInitialized() == 0); + /* sys.stdout don't exist yet: it is created by _Py_InitializeMain() */ int res = PyRun_SimpleString( "import sys; " @@ -2203,6 +2205,52 @@ static int test_init_in_background_thread(void) return PyThread_join_thread(handle); } +/* gh-146302: Py_IsInitialized() must not return true during site import. */ +static int _initialized_during_site_import = -1; /* -1 = not observed */ + +static int hook_check_initialized_on_site_import( + const char *event, PyObject *args, void *userData) +{ + if (strcmp(event, "import") == 0 && args != NULL) { + PyObject *name = PyTuple_GetItem(args, 0); + if (name != NULL && PyUnicode_Check(name) + && PyUnicode_CompareWithASCIIString(name, "site") == 0 + && _initialized_during_site_import == -1) + { + _initialized_during_site_import = Py_IsInitialized(); + } + } + return 0; +} + +static int test_isinitialized_false_during_site_import(void) +{ + _initialized_during_site_import = -1; + + /* Register audit hook before initialization */ + PySys_AddAuditHook(hook_check_initialized_on_site_import, NULL); + + _testembed_initialize(); + + if (_initialized_during_site_import == -1) { + error("audit hook never observed site import"); + Py_Finalize(); + return 1; + } + if (_initialized_during_site_import != 0) { + error("Py_IsInitialized() was true during site import"); + Py_Finalize(); + return 1; + } + if (!Py_IsInitialized()) { + error("Py_IsInitialized() was false after Py_Initialize()"); + return 1; + } + + Py_Finalize(); + return 0; +} + #ifndef MS_WINDOWS #include "test_frozenmain.h" // M_test_frozenmain @@ -2693,6 +2741,7 @@ static struct TestCase TestCases[] = { {"test_init_use_frozen_modules", test_init_use_frozen_modules}, {"test_init_main_interpreter_settings", test_init_main_interpreter_settings}, {"test_init_in_background_thread", test_init_in_background_thread}, + {"test_isinitialized_false_during_site_import", test_isinitialized_false_during_site_import}, // Audit {"test_open_code_hook", test_open_code_hook}, diff --git a/Python/preconfig.c b/Python/preconfig.c index 0fdc0a873177..2c8c18284c1d 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -928,7 +928,7 @@ _PyPreConfig_Write(const PyPreConfig *src_config) return status; } - if (_PyRuntime.core_initialized) { + if (_Py_IsCoreInitialized()) { /* bpo-34008: Calling this functions after Py_Initialize() ignores the new configuration. */ return _PyStatus_OK(); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 8be9e6d73738..d9fc28475a48 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -170,13 +170,13 @@ int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t) = \ int _Py_IsCoreInitialized(void) { - return _PyRuntime.core_initialized; + return _PyRuntimeState_GetCoreInitialized(&_PyRuntime); } int Py_IsInitialized(void) { - return _PyRuntime.initialized; + return _PyRuntimeState_GetInitialized(&_PyRuntime); } @@ -530,7 +530,7 @@ static PyStatus pycore_init_runtime(_PyRuntimeState *runtime, const PyConfig *config) { - if (runtime->initialized) { + if (_PyRuntimeState_GetInitialized(runtime)) { return _PyStatus_ERR("main interpreter already initialized"); } @@ -1032,7 +1032,7 @@ pyinit_config(_PyRuntimeState *runtime, } /* Only when we get here is the runtime core fully initialized */ - runtime->core_initialized = 1; + _PyRuntimeState_SetCoreInitialized(runtime, 1); return _PyStatus_OK(); } @@ -1359,7 +1359,7 @@ init_interp_main(PyThreadState *tstate) * or pure Python code in the standard library won't work. */ if (is_main_interp) { - interp->runtime->initialized = 1; + _PyRuntimeState_SetInitialized(interp->runtime, 1); } return _PyStatus_OK(); } @@ -1471,8 +1471,6 @@ init_interp_main(PyThreadState *tstate) Py_XDECREF(warnings_module); } Py_XDECREF(warnoptions); - - interp->runtime->initialized = 1; } if (config->site_import) { @@ -1568,6 +1566,10 @@ init_interp_main(PyThreadState *tstate) assert(!_PyErr_Occurred(tstate)); + if (is_main_interp) { + _PyRuntimeState_SetInitialized(interp->runtime, 1); + } + return _PyStatus_OK(); } @@ -1587,11 +1589,11 @@ static PyStatus pyinit_main(PyThreadState *tstate) { PyInterpreterState *interp = tstate->interp; - if (!interp->runtime->core_initialized) { + if (!_PyRuntimeState_GetCoreInitialized(interp->runtime)) { return _PyStatus_ERR("runtime core not initialized"); } - if (interp->runtime->initialized) { + if (_PyRuntimeState_GetInitialized(interp->runtime)) { return pyinit_main_reconfigure(tstate); } @@ -1645,9 +1647,8 @@ Py_InitializeEx(int install_sigs) if (_PyStatus_EXCEPTION(status)) { Py_ExitStatusException(status); } - _PyRuntimeState *runtime = &_PyRuntime; - if (runtime->initialized) { + if (Py_IsInitialized()) { /* bpo-33932: Calling Py_Initialize() twice does nothing. */ return; } @@ -2352,7 +2353,7 @@ _Py_Finalize(_PyRuntimeState *runtime) int status = 0; /* Bail out early if already finalized (or never initialized). */ - if (!runtime->initialized) { + if (!_PyRuntimeState_GetInitialized(runtime)) { return status; } @@ -2387,8 +2388,8 @@ _Py_Finalize(_PyRuntimeState *runtime) when they attempt to take the GIL (ex: PyEval_RestoreThread()). */ _PyInterpreterState_SetFinalizing(tstate->interp, tstate); _PyRuntimeState_SetFinalizing(runtime, tstate); - runtime->initialized = 0; - runtime->core_initialized = 0; + _PyRuntimeState_SetInitialized(runtime, 0); + _PyRuntimeState_SetCoreInitialized(runtime, 0); // XXX Call something like _PyImport_Disable() here? @@ -2614,7 +2615,7 @@ new_interpreter(PyThreadState **tstate_p, } _PyRuntimeState *runtime = &_PyRuntime; - if (!runtime->initialized) { + if (!_PyRuntimeState_GetInitialized(runtime)) { return _PyStatus_ERR("Py_Initialize must be called first"); } @@ -3454,10 +3455,10 @@ fatal_error_dump_runtime(int fd, _PyRuntimeState *runtime) _Py_DumpHexadecimal(fd, (uintptr_t)finalizing, sizeof(finalizing) * 2); PUTS(fd, ")"); } - else if (runtime->initialized) { + else if (_PyRuntimeState_GetInitialized(runtime)) { PUTS(fd, "initialized"); } - else if (runtime->core_initialized) { + else if (_PyRuntimeState_GetCoreInitialized(runtime)) { PUTS(fd, "core initialized"); } else if (runtime->preinitialized) { diff --git a/Python/pystate.c b/Python/pystate.c index 143175da0f45..3f539a4c2551 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -330,8 +330,8 @@ init_runtime(_PyRuntimeState *runtime, { assert(!runtime->preinitializing); assert(!runtime->preinitialized); - assert(!runtime->core_initialized); - assert(!runtime->initialized); + assert(!_PyRuntimeState_GetCoreInitialized(runtime)); + assert(!_PyRuntimeState_GetInitialized(runtime)); assert(!runtime->_initialized); runtime->open_code_hook = open_code_hook; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 408d04684a91..1ee0b3bec684 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -34,6 +34,7 @@ Data members: #include "pycore_pymem.h" // _PyMem_DefaultRawFree() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_pystats.h" // _Py_PrintSpecializationStats() +#include "pycore_runtime.h" // _PyRuntimeState_Get*() #include "pycore_structseq.h" // _PyStructSequence_InitBuiltinWithFlags() #include "pycore_sysmodule.h" // export _PySys_GetSizeOf() #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal() @@ -471,7 +472,7 @@ PySys_AddAuditHook(Py_AuditHookFunction hook, void *userData) PySys_AddAuditHook() can be called before Python is initialized. */ _PyRuntimeState *runtime = &_PyRuntime; PyThreadState *tstate; - if (runtime->initialized) { + if (_PyRuntimeState_GetInitialized(runtime)) { tstate = _PyThreadState_GET(); } else {