]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-75723: Fix double evaluation of .pth and .site files in venvs (#149583)
authorBarry Warsaw <barry@python.org>
Mon, 11 May 2026 00:25:39 +0000 (17:25 -0700)
committerGitHub <noreply@github.com>
Mon, 11 May 2026 00:25:39 +0000 (17:25 -0700)
* Idempotent `.pth` execution in `site.addsitedir`
* potentially fix monkeypatch leak?

* fix blind copy paste of recommendation

* Update 2026-03-31-16-15-15.gh-issue-75723.BZ4Rsn.rst

* fix implicit merge conflict with 24c4aecc1674414d3dc3238625802778c4ad29d2

* Add failing tests for gh-75723

Based on @asottle branch !asottle-gh-75723 but refactored for `main`.
This will need a different backport.

* Repair gh-75723

The original fix is here: https://github.com/python/cpython/pull/147951
but I'm refactoring a bit for `main`.

* Refactor _make_mod() so we can use it to create package modules too

* Add myself to CODEOWNERS for the site module

---------

Co-authored-by: anthony sottile <asottile@umich.edu>
Co-authored-by: Filipe LaĆ­ns <lains@riseup.net>
.github/CODEOWNERS
Lib/site.py
Lib/test/test_site.py
Misc/NEWS.d/next/Core_and_Builtins/2026-03-31-16-15-15.gh-issue-75723.BZ4Rsn.rst [new file with mode: 0644]

index f4ffa24edca4532bec9deae45951d79ea03a0d71..709b434b0679582d2d026af9cb504d78c762fe95 100644 (file)
@@ -573,9 +573,9 @@ Lib/shutil.py                 @giampaolo
 Lib/test/test_shutil.py       @giampaolo
 
 # Site
-Lib/site.py                   @FFY00
-Lib/test/test_site.py         @FFY00
-Doc/library/site.rst          @FFY00
+Lib/site.py                   @FFY00 @warsaw
+Lib/test/test_site.py         @FFY00 @warsaw
+Doc/library/site.rst          @FFY00 @warsaw
 
 # string.templatelib
 Doc/library/string.templatelib.rst        @lysnikolaou @AA-Turner
index 52dd9648734c3ecd7afe96eea07329f51f001881..cb1108dbaf1f8188ac2074423c1f731916eab311 100644 (file)
@@ -387,42 +387,48 @@ def addsitedir(sitedir, known_paths=None, *, defer_processing_start_files=False)
     else:
         reset = False
     sitedir, sitedircase = makepath(sitedir)
-    if not sitedircase in known_paths:
-        sys.path.append(sitedir)        # Add path component
+
+    # If the normcase'd new sitedir isn't already known, append it to
+    # sys.path, keep a record of it, and process all .pth and .start files
+    # found in that directory.  If the new sitedir is known, be sure not
+    # to process all of those twice!  gh-75723
+    if sitedircase not in known_paths:
+        sys.path.append(sitedir)
         known_paths.add(sitedircase)
-    try:
-        names = os.listdir(sitedir)
-    except OSError:
-        return
 
-    # The following phases are defined by PEP 829.
-    # Phases 1-3: Read .pth files, accumulating paths and import lines.
-    pth_names = sorted(
-        name for name in names
-        if name.endswith(".pth") and not name.startswith(".")
-    )
-    for name in pth_names:
-        _read_pth_file(sitedir, name, known_paths)
-
-    # Phases 6-7: Discover .start files and accumulate their entry points.
-    # Import lines from .pth files with a matching .start file are discarded
-    # at flush time by _exec_imports().
-    start_names = sorted(
-        name for name in names
-        if name.endswith(".start") and not name.startswith(".")
-    )
-    for name in start_names:
-        _read_start_file(sitedir, name)
+        try:
+            names = os.listdir(sitedir)
+        except OSError:
+            return
+
+        # The following phases are defined by PEP 829.
+        # Phases 1-3: Read .pth files, accumulating paths and import lines.
+        pth_names = sorted(
+            name for name in names
+            if name.endswith(".pth") and not name.startswith(".")
+        )
+        for name in pth_names:
+            _read_pth_file(sitedir, name, known_paths)
+
+        # Phases 6-7: Discover .start files and accumulate their entry points.
+        # Import lines from .pth files with a matching .start file are discarded
+        # at flush time by _exec_imports().
+        start_names = sorted(
+            name for name in names
+            if name.endswith(".start") and not name.startswith(".")
+        )
+        for name in start_names:
+            _read_start_file(sitedir, name)
 
-    # Generally, when addsitedir() is called explicitly, we'll want to process
-    # all the startup file data immediately.  However, when called through
-    # main(), we'll want to batch up all the startup file processing.  main()
-    # will set this flag to True to defer processing.
-    if not defer_processing_start_files:
-        process_startup_files()
+        # Generally, when addsitedir() is called explicitly, we'll want to process
+        # all the startup file data immediately.  However, when called through
+        # main(), we'll want to batch up all the startup file processing.  main()
+        # will set this flag to True to defer processing.
+        if not defer_processing_start_files:
+            process_startup_files()
 
     if reset:
-        known_paths = None
+        return None
 
     return known_paths
 
index ac69e2cbdbbe54761b3fe566709a2abda54a9108..9990b88548fc7ce37553633abc56b713b1411d1f 100644 (file)
@@ -196,8 +196,9 @@ class HelperFunctionsTests(unittest.TestCase):
         pth_file.cleanup(prep=True)
         with pth_file.create():
             # Pass defer_processing_start_files=True to prevent flushing.
-            site.addsitedir(pth_file.base_dir, set(),
-                            defer_processing_start_files=True)
+            site.addsitedir(
+                pth_file.base_dir, set(),
+                defer_processing_start_files=True)
             self.assertNotIn(pth_file.imported, sys.modules)
             site.process_startup_files()
             self.pth_file_tests(pth_file)
@@ -423,15 +424,14 @@ class PthFile:
 
         Used as a context manager: self.cleanup() is called on exit.
         """
-        FILE = open(self.file_path, 'w')
-        try:
-            print("#import @bad module name", file=FILE)
-            print("\n", file=FILE)
-            print("import %s" % self.imported, file=FILE)
-            print(self.good_dirname, file=FILE)
-            print(self.bad_dirname, file=FILE)
-        finally:
-            FILE.close()
+        with open(self.file_path, 'w') as fp:
+            print(f"""\
+#import @bad module name
+import {self.imported}
+{self.good_dirname}
+{self.bad_dirname}
+""", file=fp)
+
         os.mkdir(self.good_dir_path)
         try:
             yield self
@@ -944,6 +944,28 @@ class StartFileTests(unittest.TestCase):
             f.write(content)
         return basename
 
+    def _make_mod(self, contents, name='mod', *, package=False, on_path=False):
+        """Write an importable module (or package), returning its parent dir."""
+        extdir = os.path.join(self.sitedir, 'extdir')
+        os.makedirs(extdir, exist_ok=True)
+
+        # Put the code in a package's dunder-init or flat module.
+        if package:
+            pkgdir = os.path.join(extdir, name)
+            os.mkdir(pkgdir)
+            modpath = os.path.join(pkgdir, '__init__.py')
+        else:
+            modpath = os.path.join(extdir, f'{name}.py')
+
+        with open(modpath, 'w') as fp:
+            fp.write(contents)
+
+        self.addCleanup(sys.modules.pop, name, None)
+        if on_path:
+            # Don't worry, DirsOnSysPath() in setUp() will clean this up.
+            sys.path.insert(0, extdir)
+        return extdir
+
     def _all_entrypoints(self):
         """Flatten _pending_entrypoints dict into a list of (filename, entry) tuples."""
         result = []
@@ -1168,18 +1190,12 @@ class StartFileTests(unittest.TestCase):
 
     def test_execute_entrypoints_with_callable(self):
         # Entrypoint with callable is invoked.
-        mod_dir = os.path.join(self.sitedir, 'epmod')
-        os.mkdir(mod_dir)
-        init_file = os.path.join(mod_dir, '__init__.py')
-        with open(init_file, 'w') as f:
-            f.write("""\
+        self._make_mod("""\
 called = False
 def startup():
     global called
     called = True
-""")
-        sys.path.insert(0, self.sitedir)
-        self.addCleanup(sys.modules.pop, 'epmod', None)
+""", name='epmod', package=True, on_path=True)
         fullname = os.path.join(self.sitedir, 'epmod.start')
         site._pending_entrypoints[fullname] = ['epmod:startup']
         site._execute_start_entrypoints()
@@ -1218,16 +1234,10 @@ def startup():
 
     def test_execute_entrypoints_callable_error(self):
         # Callable that raises prints traceback but continues.
-        mod_dir = os.path.join(self.sitedir, 'badmod')
-        os.mkdir(mod_dir)
-        init_file = os.path.join(mod_dir, '__init__.py')
-        with open(init_file, 'w') as f:
-            f.write("""\
+        self._make_mod("""\
 def fail():
     raise RuntimeError("boom")
-""")
-        sys.path.insert(0, self.sitedir)
-        self.addCleanup(sys.modules.pop, 'badmod', None)
+""", name='badmod', package=True, on_path=True)
         fullname = os.path.join(self.sitedir, 'badmod.start')
         site._pending_entrypoints[fullname] = ['badmod:fail']
         with captured_stderr() as err:
@@ -1237,18 +1247,12 @@ def fail():
 
     def test_execute_entrypoints_duplicates_called_twice(self):
         # PEP 829: duplicate entry points execute multiple times.
-        mod_dir = os.path.join(self.sitedir, 'countmod')
-        os.mkdir(mod_dir)
-        init_file = os.path.join(mod_dir, '__init__.py')
-        with open(init_file, 'w') as f:
-            f.write("""\
+        self._make_mod("""\
 call_count = 0
 def bump():
     global call_count
     call_count += 1
-""")
-        sys.path.insert(0, self.sitedir)
-        self.addCleanup(sys.modules.pop, 'countmod', None)
+""", name='countmod', package=True, on_path=True)
         fullname = os.path.join(self.sitedir, 'countmod.start')
         site._pending_entrypoints[fullname] = [
             'countmod:bump', 'countmod:bump']
@@ -1279,18 +1283,12 @@ def bump():
     def test_exec_imports_suppressed_by_empty_matching_start(self):
         self._make_start("", name='foo')
         self._make_pth("import epmod; epmod.startup()", name='foo')
-        mod_dir = os.path.join(self.sitedir, 'epmod')
-        os.mkdir(mod_dir)
-        init_file = os.path.join(mod_dir, '__init__.py')
-        with open(init_file, 'w') as f:
-            f.write("""\
+        self._make_mod("""\
 called = False
 def startup():
     global called
     called = True
-""")
-        sys.path.insert(0, self.sitedir)
-        self.addCleanup(sys.modules.pop, 'epmod', None)
+""", name='epmod', package=True, on_path=True)
         site._read_pth_file(self.sitedir, 'foo.pth', set())
         site._read_start_file(self.sitedir, 'foo.start')
         site._exec_imports()
@@ -1420,18 +1418,12 @@ def startup():
         # point may live in a module reachable only via a .pth-extended
         # path.  If the flush phases were inverted, resolving the entry
         # point would fail with ModuleNotFoundError.
-        extdir = os.path.join(self.sitedir, 'extdir')
-        os.mkdir(extdir)
-        modpath = os.path.join(extdir, 'mod.py')
-        with open(modpath, 'w') as f:
-            f.write("""\
+        extdir = self._make_mod("""\
 called = False
 def hook():
     global called
     called = True
 """)
-        self.addCleanup(sys.modules.pop, 'mod', None)
-
         # extdir is not on sys.path; only the .pth file makes it so.
         self.assertNotIn(extdir, sys.path)
         self._make_pth("extdir\n", name='extlib')
@@ -1447,6 +1439,45 @@ def hook():
             "entry point did not run; .pth path was likely not applied "
             "before .start entry-point execution")
 
+    # --- bugs ---
+
+    # gh-75723
+    def test_addsitdir_idempotent_pth(self):
+        # Adding the same sitedir twice with a known_paths, should not
+        # process .pth files twice.
+        extdir = self._make_mod("""\
+_pth_count = 0
+""")
+        self._make_pth(f"""\
+{extdir}
+import mod; mod._pth_count += 1
+""")
+        dirs = set()
+        dirs = site.addsitedir(self.sitedir, dirs)
+        dirs = site.addsitedir(self.sitedir, dirs)
+        import mod
+        self.assertEqual(mod._pth_count, 1)
+
+    def test_addsitdir_idempotent_start(self):
+        # Adding the same sitedir twice with a known_paths, should not
+        # process .pth files twice.
+        extdir = self._make_mod("""\
+_pth_count = 0
+def increment():
+    global _pth_count
+    _pth_count += 1
+""")
+        self._make_pth(f"""\
+{extdir}
+""")
+        self._make_start("""\
+mod:increment
+""")
+        dirs = set()
+        dirs = site.addsitedir(self.sitedir, dirs)
+        dirs = site.addsitedir(self.sitedir, dirs)
+        import mod
+        self.assertEqual(mod._pth_count, 1)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-31-16-15-15.gh-issue-75723.BZ4Rsn.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-31-16-15-15.gh-issue-75723.BZ4Rsn.rst
new file mode 100644 (file)
index 0000000..596ca89
--- /dev/null
@@ -0,0 +1 @@
+Avoid re-executing ``.pth`` files when :func:`site.addsitedir` is called for a known directory.