]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.12] gh-114763: Protect lazy loading modules from attribute access races (GH-114781...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Mon, 26 Feb 2024 20:43:44 +0000 (21:43 +0100)
committerGitHub <noreply@github.com>
Mon, 26 Feb 2024 20:43:44 +0000 (20:43 +0000)
gh-114763: Protect lazy loading modules from attribute access races (GH-114781)

Setting the __class__ attribute of a lazy-loading module to ModuleType enables other threads to attempt to access attributes before the loading is complete. Now that is protected by a lock.
(cherry picked from commit 200271c61db44d90759f8a8934949aefd72d5724)

Co-authored-by: Chris Markiewicz <effigies@gmail.com>
Lib/importlib/util.py
Lib/test/test_importlib/test_lazy.py
Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst [new file with mode: 0644]

index f4d6e82331516f93dda069900162dab6a370da14..7a2311ff121ecb13becaa20ab0d542c75261b2a3 100644 (file)
@@ -13,6 +13,7 @@ from ._bootstrap_external import spec_from_file_location
 
 import _imp
 import sys
+import threading
 import types
 
 
@@ -171,36 +172,54 @@ class _LazyModule(types.ModuleType):
 
     def __getattribute__(self, attr):
         """Trigger the load of the module and return the attribute."""
-        # All module metadata must be garnered from __spec__ in order to avoid
-        # using mutated values.
-        # Stop triggering this method.
-        self.__class__ = types.ModuleType
-        # Get the original name to make sure no object substitution occurred
-        # in sys.modules.
-        original_name = self.__spec__.name
-        # Figure out exactly what attributes were mutated between the creation
-        # of the module and now.
-        attrs_then = self.__spec__.loader_state['__dict__']
-        attrs_now = self.__dict__
-        attrs_updated = {}
-        for key, value in attrs_now.items():
-            # Code that set the attribute may have kept a reference to the
-            # assigned object, making identity more important than equality.
-            if key not in attrs_then:
-                attrs_updated[key] = value
-            elif id(attrs_now[key]) != id(attrs_then[key]):
-                attrs_updated[key] = value
-        self.__spec__.loader.exec_module(self)
-        # If exec_module() was used directly there is no guarantee the module
-        # object was put into sys.modules.
-        if original_name in sys.modules:
-            if id(self) != id(sys.modules[original_name]):
-                raise ValueError(f"module object for {original_name!r} "
-                                  "substituted in sys.modules during a lazy "
-                                  "load")
-        # Update after loading since that's what would happen in an eager
-        # loading situation.
-        self.__dict__.update(attrs_updated)
+        __spec__ = object.__getattribute__(self, '__spec__')
+        loader_state = __spec__.loader_state
+        with loader_state['lock']:
+            # Only the first thread to get the lock should trigger the load
+            # and reset the module's class. The rest can now getattr().
+            if object.__getattribute__(self, '__class__') is _LazyModule:
+                # The first thread comes here multiple times as it descends the
+                # call stack. The first time, it sets is_loading and triggers
+                # exec_module(), which will access module.__dict__, module.__name__,
+                # and/or module.__spec__, reentering this method. These accesses
+                # need to be allowed to proceed without triggering the load again.
+                if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'):
+                    return object.__getattribute__(self, attr)
+                loader_state['is_loading'] = True
+
+                __dict__ = object.__getattribute__(self, '__dict__')
+
+                # All module metadata must be gathered from __spec__ in order to avoid
+                # using mutated values.
+                # Get the original name to make sure no object substitution occurred
+                # in sys.modules.
+                original_name = __spec__.name
+                # Figure out exactly what attributes were mutated between the creation
+                # of the module and now.
+                attrs_then = loader_state['__dict__']
+                attrs_now = __dict__
+                attrs_updated = {}
+                for key, value in attrs_now.items():
+                    # Code that set an attribute may have kept a reference to the
+                    # assigned object, making identity more important than equality.
+                    if key not in attrs_then:
+                        attrs_updated[key] = value
+                    elif id(attrs_now[key]) != id(attrs_then[key]):
+                        attrs_updated[key] = value
+                __spec__.loader.exec_module(self)
+                # If exec_module() was used directly there is no guarantee the module
+                # object was put into sys.modules.
+                if original_name in sys.modules:
+                    if id(self) != id(sys.modules[original_name]):
+                        raise ValueError(f"module object for {original_name!r} "
+                                          "substituted in sys.modules during a lazy "
+                                          "load")
+                # Update after loading since that's what would happen in an eager
+                # loading situation.
+                __dict__.update(attrs_updated)
+                # Finally, stop triggering this method.
+                self.__class__ = types.ModuleType
+
         return getattr(self, attr)
 
     def __delattr__(self, attr):
@@ -244,5 +263,7 @@ class LazyLoader(Loader):
         loader_state = {}
         loader_state['__dict__'] = module.__dict__.copy()
         loader_state['__class__'] = module.__class__
+        loader_state['lock'] = threading.RLock()
+        loader_state['is_loading'] = False
         module.__spec__.loader_state = loader_state
         module.__class__ = _LazyModule
index cc993f333e355af364d4c2e06b653fca59e35f5b..38ab21907b58d9dcdd9fcb73b368656bdaf81220 100644 (file)
@@ -2,9 +2,12 @@ import importlib
 from importlib import abc
 from importlib import util
 import sys
+import time
+import threading
 import types
 import unittest
 
+from test.support import threading_helper
 from test.test_importlib import util as test_util
 
 
@@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
     module_name = 'lazy_loader_test'
     mutated_name = 'changed'
     loaded = None
+    load_count = 0
     source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
 
     def find_spec(self, name, path, target=None):
@@ -48,8 +52,10 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
         return util.spec_from_loader(name, util.LazyLoader(self))
 
     def exec_module(self, module):
+        time.sleep(0.01)  # Simulate a slow load.
         exec(self.source_code, module.__dict__)
         self.loaded = module
+        self.load_count += 1
 
 
 class LazyLoaderTests(unittest.TestCase):
@@ -59,8 +65,9 @@ class LazyLoaderTests(unittest.TestCase):
             # Classes that don't define exec_module() trigger TypeError.
             util.LazyLoader(object)
 
-    def new_module(self, source_code=None):
-        loader = TestingImporter()
+    def new_module(self, source_code=None, loader=None):
+        if loader is None:
+            loader = TestingImporter()
         if source_code is not None:
             loader.source_code = source_code
         spec = util.spec_from_loader(TestingImporter.module_name,
@@ -140,6 +147,37 @@ class LazyLoaderTests(unittest.TestCase):
             # Force the load; just care that no exception is raised.
             module.__name__
 
+    @threading_helper.requires_working_threading()
+    def test_module_load_race(self):
+        with test_util.uncache(TestingImporter.module_name):
+            loader = TestingImporter()
+            module = self.new_module(loader=loader)
+            self.assertEqual(loader.load_count, 0)
+
+            class RaisingThread(threading.Thread):
+                exc = None
+                def run(self):
+                    try:
+                        super().run()
+                    except Exception as exc:
+                        self.exc = exc
+
+            def access_module():
+                return module.attr
+
+            threads = []
+            for _ in range(2):
+                threads.append(thread := RaisingThread(target=access_module))
+                thread.start()
+
+            # Races could cause errors
+            for thread in threads:
+                thread.join()
+                self.assertIsNone(thread.exc)
+
+            # Or multiple load attempts
+            self.assertEqual(loader.load_count, 1)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst b/Misc/NEWS.d/next/Library/2024-01-30-23-28-29.gh-issue-114763.BRjKkg.rst
new file mode 100644 (file)
index 0000000..e8bdb83
--- /dev/null
@@ -0,0 +1,3 @@
+Protect modules loaded with :class:`importlib.util.LazyLoader` from race
+conditions when multiple threads try to access attributes before the loading
+is complete.