From: Gregory P. Smith <68491+gpshead@users.noreply.github.com> Date: Tue, 28 Apr 2026 08:06:23 +0000 (-0700) Subject: GH-83065: Fix import deadlock by implementing hierarchical module locking (GH-137196) X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=9a57179d74c1a20e3188779696c60c8dd812e6fb;p=thirdparty%2FPython%2Fcpython.git GH-83065: Fix import deadlock by implementing hierarchical module locking (GH-137196) Make _find_and_load() acquire the module locks for the full dotted-name chain (parent before child) when loading a nested module, so both threads contend on the same first lock and serialise instead of deadlocking. When acquiring a parent's lock would itself deadlock with another thread that is loading that parent (cross-package circular imports), the parent's lock is skipped and the partially-initialised parent is accepted -- the same policy _lock_unlock_module() already applies on the existing code path -- so concurrent circular imports that worked before continue to work. --- diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index ee49d043de26..6f0f7021de8e 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -677,6 +677,13 @@ Other language changes the existing support for unary minus. (Contributed by Bartosz Sławecki in :gh:`145239`.) +* The import system now acquires per-module locks in hierarchical order + (parent packages before their submodules). This fixes a long-standing + deadlock where one thread importing ``pkg.sub`` and another importing + ``pkg.sub.mod`` could each block the other when ``pkg/sub/__init__.py`` + imports ``pkg.sub.mod``. + (Contributed by Gregory P. Smith in :gh:`83065`.) + New modules =========== diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 45beb51659f5..06dc45d71d7c 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -424,6 +424,64 @@ class _ModuleLockManager: self._lock.release() +def _get_module_chain(name): + """Return the chain of dotted-name prefixes from root to leaf. + + For example: 'a.b.c' -> ['a', 'a.b', 'a.b.c'] + """ + parts = name.split('.') + return ['.'.join(parts[:i+1]) for i in range(len(parts))] + + +class _HierarchicalLockManager: + """Manages acquisition of multiple module locks in hierarchical order. + + This prevents deadlocks by ensuring all threads acquire locks in the + same order (parent modules before child modules). + """ + + def __init__(self, name): + self._name = name + self._module_chain = _get_module_chain(name) + self._locks = [] + + def __enter__(self): + try: + for module_name in self._module_chain: + # Only acquire lock if module is not already fully loaded + module = sys.modules.get(module_name) + if (module is None or + getattr(getattr(module, "__spec__", None), + "_initializing", False)): + lock = _get_module_lock(module_name) + try: + lock.acquire() + except _DeadlockError: + if module_name == self._name: + raise + # The parent is being initialised by a thread that + # is (transitively) waiting on a lock we hold. + # Apply the same policy as _lock_unlock_module(): + # accept a partially-initialised parent for circular + # imports rather than failing the whole chain. + continue + self._locks.append((module_name, lock)) + except: + # __exit__ is not called when __enter__ raises (e.g. _DeadlockError + # on the leaf lock, or KeyboardInterrupt), so release whatever we + # already hold to avoid permanently leaking held module locks. + for module_name, lock in reversed(self._locks): + lock.release() + self._locks.clear() + raise + return self + + def __exit__(self, *args, **kwargs): + for module_name, lock in reversed(self._locks): + lock.release() + self._locks.clear() + + # The following two functions are for consumption by Python/import.c. def _get_module_lock(name): @@ -1276,7 +1334,13 @@ def _find_and_load(name, import_): module = sys.modules.get(name, _NEEDS_LOADING) if (module is _NEEDS_LOADING or getattr(getattr(module, "__spec__", None), "_initializing", False)): - with _ModuleLockManager(name): + + if '.' in name: + lock_manager = _HierarchicalLockManager(name) + else: + lock_manager = _ModuleLockManager(name) + + with lock_manager: module = sys.modules.get(name, _NEEDS_LOADING) if module is _NEEDS_LOADING: return _find_and_load_unlocked(name, import_) diff --git a/Lib/test/test_importlib/test_threaded_import.py b/Lib/test/test_importlib/test_threaded_import.py index 8b793ebf29bc..6875fdca9c85 100644 --- a/Lib/test/test_importlib/test_threaded_import.py +++ b/Lib/test/test_importlib/test_threaded_import.py @@ -324,6 +324,143 @@ raise RuntimeError("Intentional import failure") # Neither thread should have errors about stale modules self.assertEqual(errors, [], f"Race condition detected: {errors}") + def test_hierarchical_import_deadlock(self): + # Regression test for bpo-38884 / gh-83065 + # Tests that concurrent imports at different hierarchy levels + # don't deadlock when parent imports child in __init__.py + + # Create package structure: + # package/__init__.py: from package import subpackage + # package/subpackage/__init__.py: from package.subpackage.module import * + # package/subpackage/module.py: class SomeClass: pass + + pkg_dir = os.path.join(TESTFN, 'hier_deadlock_pkg') + os.makedirs(pkg_dir) + self.addCleanup(shutil.rmtree, TESTFN) + + subpkg_dir = os.path.join(pkg_dir, 'subpackage') + os.makedirs(subpkg_dir) + + with open(os.path.join(pkg_dir, "__init__.py"), "w") as f: + f.write("from hier_deadlock_pkg import subpackage\n") + + with open(os.path.join(subpkg_dir, "__init__.py"), "w") as f: + f.write("from hier_deadlock_pkg.subpackage.module import *\n") + + with open(os.path.join(subpkg_dir, "module.py"), "w") as f: + f.write("class SomeClass:\n pass\n") + + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + self.addCleanup(forget, 'hier_deadlock_pkg') + self.addCleanup(forget, 'hier_deadlock_pkg.subpackage') + self.addCleanup(forget, 'hier_deadlock_pkg.subpackage.module') + + importlib.invalidate_caches() + + errors = [] + results = [] + barrier = threading.Barrier(2) + + def t1(): + barrier.wait() + try: + import hier_deadlock_pkg.subpackage + results.append('t1_success') + except Exception as e: + errors.append(('t1', type(e).__name__, str(e))) + + def t2(): + barrier.wait() + try: + import hier_deadlock_pkg.subpackage.module + results.append('t2_success') + except Exception as e: + errors.append(('t2', type(e).__name__, str(e))) + + # Run multiple times to increase chance of hitting race condition + for i in range(10): + for mod in ['hier_deadlock_pkg', 'hier_deadlock_pkg.subpackage', + 'hier_deadlock_pkg.subpackage.module']: + sys.modules.pop(mod, None) + + errors.clear() + results.clear() + barrier.reset() + + thread1 = threading.Thread(target=t1) + thread2 = threading.Thread(target=t2) + + thread1.start() + thread2.start() + + thread1.join(timeout=5) + thread2.join(timeout=5) + + if thread1.is_alive() or thread2.is_alive(): + self.fail(f"Threads deadlocked on iteration {i}") + + self.assertEqual( + errors, [], + f"Import(s) failed on iteration {i}: {errors}") + self.assertEqual( + sorted(results), ['t1_success', 't2_success'], + f"Not all imports succeeded on iteration {i}: {results}") + + def test_cross_package_circular_import(self): + # Two packages whose __init__.py each import a submodule of the + # other. Concurrent imports of submodules of each must not raise + # _DeadlockError; the import system accepts a partially-initialised + # parent in this case (see _lock_unlock_module). + os.makedirs(os.path.join(TESTFN, "circ_a")) + os.makedirs(os.path.join(TESTFN, "circ_b")) + self.addCleanup(shutil.rmtree, TESTFN) + with open(os.path.join(TESTFN, "circ_a", "__init__.py"), "w") as f: + f.write("import time; time.sleep(0.03)\nimport circ_b.other\n") + with open(os.path.join(TESTFN, "circ_b", "__init__.py"), "w") as f: + f.write("import time; time.sleep(0.03)\nimport circ_a.other\n") + for pkg in ("circ_a", "circ_b"): + for mod in ("sub.py", "other.py"): + with open(os.path.join(TESTFN, pkg, mod), "w") as f: + f.write("X = 1\n") + + sys.path.insert(0, TESTFN) + self.addCleanup(sys.path.remove, TESTFN) + for mod in ("circ_a", "circ_a.sub", "circ_a.other", + "circ_b", "circ_b.sub", "circ_b.other"): + self.addCleanup(forget, mod) + importlib.invalidate_caches() + + errors = [] + barrier = threading.Barrier(2) + + def do_import(name): + barrier.wait() + try: + importlib.import_module(name) + except Exception as e: + errors.append((name, type(e).__name__, str(e))) + + for i in range(10): + for mod in ("circ_a", "circ_a.sub", "circ_a.other", + "circ_b", "circ_b.sub", "circ_b.other"): + sys.modules.pop(mod, None) + errors.clear() + barrier.reset() + + thread1 = threading.Thread(target=do_import, args=("circ_a.sub",)) + thread2 = threading.Thread(target=do_import, args=("circ_b.sub",)) + thread1.start() + thread2.start() + thread1.join(timeout=5) + thread2.join(timeout=5) + + if thread1.is_alive() or thread2.is_alive(): + self.fail(f"Threads deadlocked on iteration {i}") + self.assertEqual( + errors, [], + f"Import(s) failed on iteration {i}: {errors}") + def setUpModule(): thread_info = threading_helper.threading_setup() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-05-59-17.gh-issue-83065.f0UPNE.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-05-59-17.gh-issue-83065.f0UPNE.rst new file mode 100644 index 000000000000..81bfa45c069f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-05-59-17.gh-issue-83065.f0UPNE.rst @@ -0,0 +1,7 @@ +Fix a deadlock that could occur when one thread is importing a submodule +(for example ``import pkg.sub.mod``) while another thread is importing one +of its parent packages (for example ``import pkg.sub``) and that parent's +``__init__.py`` itself imports the submodule. The import system now +acquires module locks in hierarchical (parent-before-child) order so the +two threads serialise instead of raising +``_DeadlockError``.