]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-115060: Speed up `pathlib.Path.glob()` by omitting initial `stat()` (#117831)
authorBarney Gale <barney.gale@gmail.com>
Sat, 13 Apr 2024 23:08:03 +0000 (00:08 +0100)
committerGitHub <noreply@github.com>
Sat, 13 Apr 2024 23:08:03 +0000 (00:08 +0100)
Since 6258844c, paths that might not exist can be fed into pathlib's
globbing implementation, which will call `os.scandir()` / `os.lstat()` only
when strictly necessary. This allows us to drop an initial `self.is_dir()`
call, which saves a `stat()`.

Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Doc/library/pathlib.rst
Lib/pathlib/__init__.py
Lib/pathlib/_abc.py
Lib/test/test_pathlib/test_pathlib.py
Lib/test/test_pathlib/test_pathlib_abc.py
Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst [new file with mode: 0644]

index f4ed479401f65cf4b3c3405011c928a713e1d76f..2e18e41869376e35ba79fc9143f0cfabecc5752b 100644 (file)
@@ -1004,10 +1004,6 @@ call fails (for example because the path doesn't exist).
    .. seealso::
       :ref:`pathlib-pattern-language` documentation.
 
-   This method calls :meth:`Path.is_dir` on the top-level directory and
-   propagates any :exc:`OSError` exception that is raised. Subsequent
-   :exc:`OSError` exceptions from scanning directories are suppressed.
-
    By default, or when the *case_sensitive* keyword-only argument is set to
    ``None``, this method matches paths using platform-specific casing rules:
    typically, case-sensitive on POSIX, and case-insensitive on Windows.
@@ -1028,6 +1024,11 @@ call fails (for example because the path doesn't exist).
    .. versionchanged:: 3.13
       The *pattern* parameter accepts a :term:`path-like object`.
 
+   .. versionchanged:: 3.13
+      Any :exc:`OSError` exceptions raised from scanning the filesystem are
+      suppressed. In previous versions, such exceptions are suppressed in many
+      cases, but not all.
+
 
 .. method:: Path.rglob(pattern, *, case_sensitive=None, recurse_symlinks=False)
 
index 66eb08a45b1bb3660dce844577b0bfdba06f679d..a4721fbe81396228aaa8bf9acc447f8889bc4aaf 100644 (file)
@@ -607,11 +607,9 @@ class Path(_abc.PathBase, PurePath):
         if raw[-1] in (self.parser.sep, self.parser.altsep):
             # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
             parts.append('')
-        if not self.is_dir():
-            return iter([])
         select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks)
         root = str(self)
-        paths = select(root, exists=True)
+        paths = select(root)
 
         # Normalize results
         if root == '.':
index b51ad6f46d292abe3ab9167bed395c7c8b34b9d7..05698d5de24afb5cea1e309955241eecac0db7e7 100644 (file)
@@ -705,10 +705,8 @@ class PathBase(PurePathBase):
         anchor, parts = pattern._stack
         if anchor:
             raise NotImplementedError("Non-relative patterns are unsupported")
-        if not self.is_dir():
-            return iter([])
         select = self._glob_selector(parts, case_sensitive, recurse_symlinks)
-        return select(self, exists=True)
+        return select(self)
 
     def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
         """Recursively yield all existing files (of any kind, including
index 651d66656cbd61dc2559a4a17fd5aebfbb675df0..5fd1a41cbee17b2415167f343e7dc97ce338abfb 100644 (file)
@@ -1263,6 +1263,13 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest):
             self.assertEqual(
                 set(P('.').glob('**/*/*')), {P("dirD/fileD")})
 
+    def test_glob_inaccessible(self):
+        P = self.cls
+        p = P(self.base, "mydir1", "mydir2")
+        p.mkdir(parents=True)
+        p.parent.chmod(0)
+        self.assertEqual(set(p.glob('*')), set())
+
     def test_rglob_pathlike(self):
         P = self.cls
         p = P(self.base, "dirC")
index 6656b032cde28e9fad574a60b22afa0d18809ff6..aadecbc142cca6f4f181fee211ce94fa7a7f4f83 100644 (file)
@@ -8,6 +8,7 @@ import unittest
 from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase
 import posixpath
 
+from test.support import is_wasi
 from test.support.os_helper import TESTFN
 
 
@@ -1920,6 +1921,8 @@ class DummyPathTest(DummyPurePathTest):
                   }
         self.assertEqual(given, {p / x for x in expect})
 
+    # See https://github.com/WebAssembly/wasi-filesystem/issues/26
+    @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX")
     def test_glob_dotdot(self):
         # ".." is not special in globs.
         P = self.cls
diff --git a/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst b/Misc/NEWS.d/next/Library/2024-04-13-01-45-15.gh-issue-115060.IxoM03.rst
new file mode 100644 (file)
index 0000000..50b374a
--- /dev/null
@@ -0,0 +1,3 @@
+Speed up :meth:`pathlib.Path.glob` by omitting an initial
+:meth:`~pathlib.Path.is_dir` call. As a result of this change,
+:meth:`~pathlib.Path.glob` can no longer raise :exc:`OSError`.