]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-83065: Fix import deadlock by implementing hierarchical module locking (GH-137196)
authorGregory P. Smith <68491+gpshead@users.noreply.github.com>
Tue, 28 Apr 2026 08:06:23 +0000 (01:06 -0700)
committerGitHub <noreply@github.com>
Tue, 28 Apr 2026 08:06:23 +0000 (01:06 -0700)
Make _find_and_load() acquire the module locks for the full
dotted-name chain (parent before child) when loading a nested module, so
both threads contend on the same first lock and serialise instead of
deadlocking.

When acquiring a parent's lock would itself deadlock with another thread
that is loading that parent (cross-package circular imports), the parent's
lock is skipped and the partially-initialised parent is accepted -- the
same policy _lock_unlock_module() already applies on the existing code
path -- so concurrent circular imports that worked before continue to work.

Doc/whatsnew/3.15.rst
Lib/importlib/_bootstrap.py
Lib/test/test_importlib/test_threaded_import.py
Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-05-59-17.gh-issue-83065.f0UPNE.rst [new file with mode: 0644]

index ee49d043de264195be2cfc188afd82ef267fd281..6f0f7021de8e7ad5c6bb4249f6a5da33e5b9db99 100644 (file)
@@ -677,6 +677,13 @@ Other language changes
   the existing support for unary minus.
   (Contributed by Bartosz SÅ‚awecki in :gh:`145239`.)
 
+* The import system now acquires per-module locks in hierarchical order
+  (parent packages before their submodules). This fixes a long-standing
+  deadlock where one thread importing ``pkg.sub`` and another importing
+  ``pkg.sub.mod`` could each block the other when ``pkg/sub/__init__.py``
+  imports ``pkg.sub.mod``.
+  (Contributed by Gregory P. Smith in :gh:`83065`.)
+
 
 New modules
 ===========
index 45beb51659f5b70f6d6de7f1afd82af4dbfd5c7b..06dc45d71d7ca4ab678923df1da91d295682cd31 100644 (file)
@@ -424,6 +424,64 @@ class _ModuleLockManager:
         self._lock.release()
 
 
+def _get_module_chain(name):
+    """Return the chain of dotted-name prefixes from root to leaf.
+
+    For example: 'a.b.c' -> ['a', 'a.b', 'a.b.c']
+    """
+    parts = name.split('.')
+    return ['.'.join(parts[:i+1]) for i in range(len(parts))]
+
+
+class _HierarchicalLockManager:
+    """Manages acquisition of multiple module locks in hierarchical order.
+
+    This prevents deadlocks by ensuring all threads acquire locks in the
+    same order (parent modules before child modules).
+    """
+
+    def __init__(self, name):
+        self._name = name
+        self._module_chain = _get_module_chain(name)
+        self._locks = []
+
+    def __enter__(self):
+        try:
+            for module_name in self._module_chain:
+                # Only acquire lock if module is not already fully loaded
+                module = sys.modules.get(module_name)
+                if (module is None or
+                    getattr(getattr(module, "__spec__", None),
+                            "_initializing", False)):
+                    lock = _get_module_lock(module_name)
+                    try:
+                        lock.acquire()
+                    except _DeadlockError:
+                        if module_name == self._name:
+                            raise
+                        # The parent is being initialised by a thread that
+                        # is (transitively) waiting on a lock we hold.
+                        # Apply the same policy as _lock_unlock_module():
+                        # accept a partially-initialised parent for circular
+                        # imports rather than failing the whole chain.
+                        continue
+                    self._locks.append((module_name, lock))
+        except:
+            # __exit__ is not called when __enter__ raises (e.g. _DeadlockError
+            # on the leaf lock, or KeyboardInterrupt), so release whatever we
+            # already hold to avoid permanently leaking held module locks.
+            for module_name, lock in reversed(self._locks):
+                lock.release()
+            self._locks.clear()
+            raise
+        return self
+
+    def __exit__(self, *args, **kwargs):
+        for module_name, lock in reversed(self._locks):
+            lock.release()
+        self._locks.clear()
+
+
 # The following two functions are for consumption by Python/import.c.
 
 def _get_module_lock(name):
@@ -1276,7 +1334,13 @@ def _find_and_load(name, import_):
     module = sys.modules.get(name, _NEEDS_LOADING)
     if (module is _NEEDS_LOADING or
         getattr(getattr(module, "__spec__", None), "_initializing", False)):
-        with _ModuleLockManager(name):
+
+        if '.' in name:
+            lock_manager = _HierarchicalLockManager(name)
+        else:
+            lock_manager = _ModuleLockManager(name)
+
+        with lock_manager:
             module = sys.modules.get(name, _NEEDS_LOADING)
             if module is _NEEDS_LOADING:
                 return _find_and_load_unlocked(name, import_)
index 8b793ebf29bcae4fe88a71287850619dff267e82..6875fdca9c8528d173d10bbb57524bb45a5c9bb9 100644 (file)
@@ -324,6 +324,143 @@ raise RuntimeError("Intentional import failure")
             # Neither thread should have errors about stale modules
             self.assertEqual(errors, [], f"Race condition detected: {errors}")
 
+    def test_hierarchical_import_deadlock(self):
+        # Regression test for bpo-38884 / gh-83065
+        # Tests that concurrent imports at different hierarchy levels
+        # don't deadlock when parent imports child in __init__.py
+
+        # Create package structure:
+        # package/__init__.py: from package import subpackage
+        # package/subpackage/__init__.py: from package.subpackage.module import *
+        # package/subpackage/module.py: class SomeClass: pass
+
+        pkg_dir = os.path.join(TESTFN, 'hier_deadlock_pkg')
+        os.makedirs(pkg_dir)
+        self.addCleanup(shutil.rmtree, TESTFN)
+
+        subpkg_dir = os.path.join(pkg_dir, 'subpackage')
+        os.makedirs(subpkg_dir)
+
+        with open(os.path.join(pkg_dir, "__init__.py"), "w") as f:
+            f.write("from hier_deadlock_pkg import subpackage\n")
+
+        with open(os.path.join(subpkg_dir, "__init__.py"), "w") as f:
+            f.write("from hier_deadlock_pkg.subpackage.module import *\n")
+
+        with open(os.path.join(subpkg_dir, "module.py"), "w") as f:
+            f.write("class SomeClass:\n    pass\n")
+
+        sys.path.insert(0, TESTFN)
+        self.addCleanup(sys.path.remove, TESTFN)
+        self.addCleanup(forget, 'hier_deadlock_pkg')
+        self.addCleanup(forget, 'hier_deadlock_pkg.subpackage')
+        self.addCleanup(forget, 'hier_deadlock_pkg.subpackage.module')
+
+        importlib.invalidate_caches()
+
+        errors = []
+        results = []
+        barrier = threading.Barrier(2)
+
+        def t1():
+            barrier.wait()
+            try:
+                import hier_deadlock_pkg.subpackage
+                results.append('t1_success')
+            except Exception as e:
+                errors.append(('t1', type(e).__name__, str(e)))
+
+        def t2():
+            barrier.wait()
+            try:
+                import hier_deadlock_pkg.subpackage.module
+                results.append('t2_success')
+            except Exception as e:
+                errors.append(('t2', type(e).__name__, str(e)))
+
+        # Run multiple times to increase chance of hitting race condition
+        for i in range(10):
+            for mod in ['hier_deadlock_pkg', 'hier_deadlock_pkg.subpackage',
+                       'hier_deadlock_pkg.subpackage.module']:
+                sys.modules.pop(mod, None)
+
+            errors.clear()
+            results.clear()
+            barrier.reset()
+
+            thread1 = threading.Thread(target=t1)
+            thread2 = threading.Thread(target=t2)
+
+            thread1.start()
+            thread2.start()
+
+            thread1.join(timeout=5)
+            thread2.join(timeout=5)
+
+            if thread1.is_alive() or thread2.is_alive():
+                self.fail(f"Threads deadlocked on iteration {i}")
+
+            self.assertEqual(
+                errors, [],
+                f"Import(s) failed on iteration {i}: {errors}")
+            self.assertEqual(
+                sorted(results), ['t1_success', 't2_success'],
+                f"Not all imports succeeded on iteration {i}: {results}")
+
+    def test_cross_package_circular_import(self):
+        # Two packages whose __init__.py each import a submodule of the
+        # other. Concurrent imports of submodules of each must not raise
+        # _DeadlockError; the import system accepts a partially-initialised
+        # parent in this case (see _lock_unlock_module).
+        os.makedirs(os.path.join(TESTFN, "circ_a"))
+        os.makedirs(os.path.join(TESTFN, "circ_b"))
+        self.addCleanup(shutil.rmtree, TESTFN)
+        with open(os.path.join(TESTFN, "circ_a", "__init__.py"), "w") as f:
+            f.write("import time; time.sleep(0.03)\nimport circ_b.other\n")
+        with open(os.path.join(TESTFN, "circ_b", "__init__.py"), "w") as f:
+            f.write("import time; time.sleep(0.03)\nimport circ_a.other\n")
+        for pkg in ("circ_a", "circ_b"):
+            for mod in ("sub.py", "other.py"):
+                with open(os.path.join(TESTFN, pkg, mod), "w") as f:
+                    f.write("X = 1\n")
+
+        sys.path.insert(0, TESTFN)
+        self.addCleanup(sys.path.remove, TESTFN)
+        for mod in ("circ_a", "circ_a.sub", "circ_a.other",
+                    "circ_b", "circ_b.sub", "circ_b.other"):
+            self.addCleanup(forget, mod)
+        importlib.invalidate_caches()
+
+        errors = []
+        barrier = threading.Barrier(2)
+
+        def do_import(name):
+            barrier.wait()
+            try:
+                importlib.import_module(name)
+            except Exception as e:
+                errors.append((name, type(e).__name__, str(e)))
+
+        for i in range(10):
+            for mod in ("circ_a", "circ_a.sub", "circ_a.other",
+                        "circ_b", "circ_b.sub", "circ_b.other"):
+                sys.modules.pop(mod, None)
+            errors.clear()
+            barrier.reset()
+
+            thread1 = threading.Thread(target=do_import, args=("circ_a.sub",))
+            thread2 = threading.Thread(target=do_import, args=("circ_b.sub",))
+            thread1.start()
+            thread2.start()
+            thread1.join(timeout=5)
+            thread2.join(timeout=5)
+
+            if thread1.is_alive() or thread2.is_alive():
+                self.fail(f"Threads deadlocked on iteration {i}")
+            self.assertEqual(
+                errors, [],
+                f"Import(s) failed on iteration {i}: {errors}")
+
 
 def setUpModule():
     thread_info = threading_helper.threading_setup()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-05-59-17.gh-issue-83065.f0UPNE.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-28-05-59-17.gh-issue-83065.f0UPNE.rst
new file mode 100644 (file)
index 0000000..81bfa45
--- /dev/null
@@ -0,0 +1,7 @@
+Fix a deadlock that could occur when one thread is importing a submodule
+(for example ``import pkg.sub.mod``) while another thread is importing one
+of its parent packages (for example ``import pkg.sub``) and that parent's
+``__init__.py`` itself imports the submodule. The import system now
+acquires module locks in hierarchical (parent-before-child) order so the
+two threads serialise instead of raising
+``_DeadlockError``.