]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-125413: pathlib ABCs: use `scandir()` to speed up `glob()` (#126261)
authorBarney Gale <barney.gale@gmail.com>
Fri, 1 Nov 2024 17:48:58 +0000 (17:48 +0000)
committerGitHub <noreply@github.com>
Fri, 1 Nov 2024 17:48:58 +0000 (17:48 +0000)
Use the new `PathBase.scandir()` method in `PathBase.glob()`, which greatly
reduces the number of `PathBase.stat()` calls needed when globbing.

There are no user-facing changes, because the pathlib ABCs are still
private and `Path.glob()` doesn't use the implementation in its superclass.

Lib/glob.py
Lib/pathlib/_abc.py
Lib/test/test_pathlib/test_pathlib_abc.py

index 574e5ad51b601dde974b089f3705266e21143ac2..ce9b3698888dd9694cc76058cd8adc6e17454e47 100644 (file)
@@ -364,12 +364,6 @@ class _GlobberBase:
         """
         raise NotImplementedError
 
-    @staticmethod
-    def parse_entry(entry):
-        """Returns the path of an entry yielded from scandir().
-        """
-        raise NotImplementedError
-
     # High-level methods
 
     def compile(self, pat):
@@ -438,6 +432,7 @@ class _GlobberBase:
             except OSError:
                 pass
             else:
+                prefix = self.add_slash(path)
                 for entry in entries:
                     if match is None or match(entry.name):
                         if dir_only:
@@ -446,7 +441,7 @@ class _GlobberBase:
                                     continue
                             except OSError:
                                 continue
-                        entry_path = self.parse_entry(entry)
+                        entry_path = self.concat_path(prefix, entry.name)
                         if dir_only:
                             yield from select_next(entry_path, exists=True)
                         else:
@@ -495,6 +490,7 @@ class _GlobberBase:
             except OSError:
                 pass
             else:
+                prefix = self.add_slash(path)
                 for entry in entries:
                     is_dir = False
                     try:
@@ -504,7 +500,7 @@ class _GlobberBase:
                         pass
 
                     if is_dir or not dir_only:
-                        entry_path = self.parse_entry(entry)
+                        entry_path = self.concat_path(prefix, entry.name)
                         if match is None or match(str(entry_path), match_pos):
                             if dir_only:
                                 yield from select_next(entry_path, exists=True)
@@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase):
     """
     lexists = staticmethod(os.path.lexists)
     scandir = staticmethod(os.scandir)
-    parse_entry = operator.attrgetter('path')
     concat_path = operator.add
 
     if os.name == 'nt':
index dfff8b460d1bf106104bf6eb65489e3acc19f8ae..cc7c1991d0e528a7352c0a9a72553bf3f39c2d69 100644 (file)
@@ -94,25 +94,13 @@ class PathGlobber(_GlobberBase):
 
     lexists = operator.methodcaller('exists', follow_symlinks=False)
     add_slash = operator.methodcaller('joinpath', '')
-
-    @staticmethod
-    def scandir(path):
-        """Emulates os.scandir(), which returns an object that can be used as
-        a context manager. This method is called by walk() and glob().
-        """
-        import contextlib
-        return contextlib.nullcontext(path.iterdir())
+    scandir = operator.methodcaller('scandir')
 
     @staticmethod
     def concat_path(path, text):
         """Appends text to the given path."""
         return path.with_segments(path._raw_path + text)
 
-    @staticmethod
-    def parse_entry(entry):
-        """Returns the path of an entry yielded from scandir()."""
-        return entry
-
 
 class PurePathBase:
     """Base class for pure path objects.
index 11e34f5d378a58cba5e417e498d3a5c71cc684c0..4596d0b0e26763026479760ff3e67cc69d8666b5 100644 (file)
@@ -1633,8 +1633,10 @@ class DummyPathTest(DummyPurePathTest):
             p.joinpath('linkA').symlink_to('fileA')
             p.joinpath('brokenLink').symlink_to('non-existing')
             p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
-            p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
-            p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
+            p.joinpath('dirA', 'linkC').symlink_to(
+                parser.join('..', 'dirB'), target_is_directory=True)
+            p.joinpath('dirB', 'linkD').symlink_to(
+                parser.join('..', 'dirB'), target_is_directory=True)
             p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
 
     def tearDown(self):
@@ -2479,7 +2481,7 @@ class DummyPathTest(DummyPurePathTest):
             if i % 2:
                 link.symlink_to(P(self.base, "dirE", "nonexistent"))
             else:
-                link.symlink_to(P(self.base, "dirC"))
+                link.symlink_to(P(self.base, "dirC"), target_is_directory=True)
 
         self.assertEqual(len(set(base.glob("*"))), 100)
         self.assertEqual(len(set(base.glob("*/"))), 50)