]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-77609: Add recurse_symlinks argument to `pathlib.Path.glob()` (#117311)
authorBarney Gale <barney.gale@gmail.com>
Fri, 5 Apr 2024 18:51:54 +0000 (19:51 +0100)
committerGitHub <noreply@github.com>
Fri, 5 Apr 2024 18:51:54 +0000 (18:51 +0000)
Replace tri-state `follow_symlinks` with boolean `recurse_symlinks` argument. The new argument controls whether symlinks are followed when expanding recursive `**` wildcards. The possible argument values correspond as follows:

    follow_symlinks  recurse_symlinks
    ===============  ================
    False            N/A
    None             False
    True             True

We therefore drop support for not following symlinks when expanding non-recursive pattern parts; it wasn't requested in the original issue, and it's a feature not found in any shells.

This makes the API a easier to grok by eliminating `None` as an option.

No news blurb as `follow_symlinks` was new in 3.13.

Doc/library/pathlib.rst
Doc/whatsnew/3.13.rst
Lib/pathlib/__init__.py
Lib/pathlib/_abc.py
Lib/test/test_pathlib/test_pathlib_abc.py

index 9122df7a476632dade968180db150539b475ef04..f4ed479401f65cf4b3c3405011c928a713e1d76f 100644 (file)
@@ -985,7 +985,7 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.5
 
 
-.. method:: Path.glob(pattern, *, case_sensitive=None, follow_symlinks=None)
+.. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False)
 
    Glob the given relative *pattern* in the directory represented by this path,
    yielding all matching files (of any kind)::
@@ -1013,14 +1013,9 @@ call fails (for example because the path doesn't exist).
    typically, case-sensitive on POSIX, and case-insensitive on Windows.
    Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
 
-   By default, or when the *follow_symlinks* keyword-only argument is set to
-   ``None``, this method follows symlinks except when expanding "``**``"
-   wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
-   ``False`` to treat all symlinks as files.
-
-   .. tip::
-      Set *follow_symlinks* to ``True`` or ``False`` to improve performance
-      of recursive globbing.
+   By default, or when the *recurse_symlinks* keyword-only argument is set to
+   ``False``, this method follows symlinks except when expanding "``**``"
+   wildcards. Set *recurse_symlinks* to ``True`` to always follow symlinks.
 
    .. audit-event:: pathlib.Path.glob self,pattern pathlib.Path.glob
 
@@ -1028,13 +1023,13 @@ call fails (for example because the path doesn't exist).
       The *case_sensitive* parameter was added.
 
    .. versionchanged:: 3.13
-      The *follow_symlinks* parameter was added.
+      The *recurse_symlinks* parameter was added.
 
    .. versionchanged:: 3.13
       The *pattern* parameter accepts a :term:`path-like object`.
 
 
-.. method:: Path.rglob(pattern, *, case_sensitive=None, follow_symlinks=None)
+.. method:: Path.rglob(pattern, *, case_sensitive=None, recurse_symlinks=False)
 
    Glob the given relative *pattern* recursively.  This is like calling
    :func:`Path.glob` with "``**/``" added in front of the *pattern*.
@@ -1048,7 +1043,7 @@ call fails (for example because the path doesn't exist).
       The *case_sensitive* parameter was added.
 
    .. versionchanged:: 3.13
-      The *follow_symlinks* parameter was added.
+      The *recurse_symlinks* parameter was added.
 
    .. versionchanged:: 3.13
       The *pattern* parameter accepts a :term:`path-like object`.
@@ -1675,7 +1670,7 @@ The patterns accepted and results generated by :meth:`Path.glob` and
    passing ``recursive=True`` to :func:`glob.glob`.
 3. "``**``" pattern components do not follow symlinks by default in pathlib.
    This behaviour has no equivalent in :func:`glob.glob`, but you can pass
-   ``follow_symlinks=True`` to :meth:`Path.glob` for compatible behaviour.
+   ``recurse_symlinks=True`` to :meth:`Path.glob` for compatible behaviour.
 4. Like all :class:`PurePath` and :class:`Path` objects, the values returned
    from :meth:`Path.glob` and :meth:`Path.rglob` don't include trailing
    slashes.
index 99a9545dd4e58612c09db2c57f2dd552ba47ea76..e31f0c52d4c5f54a6f5c0d8bda691d1c08293f1c 100644 (file)
@@ -559,12 +559,15 @@ pathlib
   implementation of :mod:`os.path` used for low-level path parsing and
   joining: either ``posixpath`` or ``ntpath``.
 
-* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`,
-  :meth:`~pathlib.Path.rglob`, :meth:`~pathlib.Path.is_file`,
+* Add *recurse_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`
+  and :meth:`~pathlib.Path.rglob`.
+  (Contributed by Barney Gale in :gh:`77609`).
+
+* Add *follow_symlinks* keyword-only argument to :meth:`~pathlib.Path.is_file`,
   :meth:`~pathlib.Path.is_dir`, :meth:`~pathlib.Path.owner`,
   :meth:`~pathlib.Path.group`.
-  (Contributed by Barney Gale in :gh:`77609` and :gh:`105793`, and
-  Kamil Turek in :gh:`107962`).
+  (Contributed by Barney Gale in :gh:`105793`, and Kamil Turek in
+  :gh:`107962`).
 
 * Return files and directories from :meth:`pathlib.Path.glob` and
   :meth:`~pathlib.Path.rglob` when given a pattern that ends with "``**``". In
index 6cccfb864e8206e4f1e560902e34243b2011a695..747000f1a43475c7117edaea2d6428340a704aa2 100644 (file)
@@ -619,7 +619,7 @@ class Path(_abc.PathBase, PurePath):
         path._tail_cached = tail + [name]
         return path
 
-    def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
+    def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
         """Iterate over this subtree and yield all existing files (of any
         kind, including directories) matching the given relative pattern.
         """
@@ -627,9 +627,9 @@ class Path(_abc.PathBase, PurePath):
         if not isinstance(pattern, PurePath):
             pattern = self.with_segments(pattern)
         return _abc.PathBase.glob(
-            self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
+            self, pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
 
-    def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
+    def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False):
         """Recursively yield all existing files (of any kind, including
         directories) matching the given relative pattern, anywhere in
         this subtree.
@@ -639,7 +639,7 @@ class Path(_abc.PathBase, PurePath):
             pattern = self.with_segments(pattern)
         pattern = '**' / pattern
         return _abc.PathBase.glob(
-            self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
+            self, pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
 
     def walk(self, top_down=True, on_error=None, follow_symlinks=False):
         """Walk the directory tree from this directory, similar to os.walk()."""
index 932020e6d0866c4b4cdc9678d739c5de258e9b07..ca38a51d072cfbdb24bfd4e48d0e7b8e3d70f2e5 100644 (file)
@@ -66,10 +66,8 @@ def _select_special(paths, part):
         yield path._make_child_relpath(part)
 
 
-def _select_children(parent_paths, dir_only, follow_symlinks, match):
+def _select_children(parent_paths, dir_only, match):
     """Yield direct children of given paths, filtering by name and type."""
-    if follow_symlinks is None:
-        follow_symlinks = True
     for parent_path in parent_paths:
         try:
             # We must close the scandir() object before proceeding to
@@ -82,7 +80,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
             for entry in entries:
                 if dir_only:
                     try:
-                        if not entry.is_dir(follow_symlinks=follow_symlinks):
+                        if not entry.is_dir():
                             continue
                     except OSError:
                         continue
@@ -96,8 +94,6 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks, match):
     """Yield given paths and all their children, recursively, filtering by
     string and type.
     """
-    if follow_symlinks is None:
-        follow_symlinks = False
     for parent_path in parent_paths:
         if match is not None:
             # If we're filtering paths through a regex, record the length of
@@ -789,7 +785,7 @@ class PathBase(PurePathBase):
     def _make_child_relpath(self, name):
         return self.joinpath(name)
 
-    def glob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
+    def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
         """Iterate over this subtree and yield all existing files (of any
         kind, including directories) matching the given relative pattern.
         """
@@ -818,7 +814,7 @@ class PathBase(PurePathBase):
                 # Consume following non-special components, provided we're
                 # treating symlinks consistently. Each component is joined
                 # onto 'part', which is used to generate an re.Pattern object.
-                if follow_symlinks is not None:
+                if recurse_symlinks:
                     while stack and stack[-1] not in specials:
                         part += sep + stack.pop()
 
@@ -827,7 +823,7 @@ class PathBase(PurePathBase):
                 match = _compile_pattern(part, sep, case_sensitive) if part != '**' else None
 
                 # Recursively walk directories, filtering by type and regex.
-                paths = _select_recursive(paths, bool(stack), follow_symlinks, match)
+                paths = _select_recursive(paths, bool(stack), recurse_symlinks, match)
 
                 # De-duplicate if we've already seen a '**' component.
                 if deduplicate_paths:
@@ -843,10 +839,10 @@ class PathBase(PurePathBase):
                 match = _compile_pattern(part, sep, case_sensitive) if part != '*' else None
 
                 # Iterate over directories' children filtering by type and regex.
-                paths = _select_children(paths, bool(stack), follow_symlinks, match)
+                paths = _select_children(paths, bool(stack), match)
         return paths
 
-    def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
+    def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
         """Recursively yield all existing files (of any kind, including
         directories) matching the given relative pattern, anywhere in
         this subtree.
@@ -854,7 +850,7 @@ class PathBase(PurePathBase):
         if not isinstance(pattern, PurePathBase):
             pattern = self.with_segments(pattern)
         pattern = '**' / pattern
-        return self.glob(pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
+        return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
 
     def walk(self, top_down=True, on_error=None, follow_symlinks=False):
         """Walk the directory tree from this directory, similar to os.walk()."""
index a7e35a3e1fc7dac7e6e43fe52af7560488d6b9f7..336115cf0fead2530e8358aedb4063d66c8e061d 100644 (file)
@@ -1776,9 +1776,9 @@ class DummyPathTest(DummyPurePathTest):
         _check(path, "dirb/file*", False, ["dirB/fileB"])
 
     @needs_symlinks
-    def test_glob_follow_symlinks_common(self):
+    def test_glob_recurse_symlinks_common(self):
         def _check(path, glob, expected):
-            actual = {path for path in path.glob(glob, follow_symlinks=True)
+            actual = {path for path in path.glob(glob, recurse_symlinks=True)
                       if path.parts.count("linkD") <= 1}  # exclude symlink loop.
             self.assertEqual(actual, { P(self.base, q) for q in expected })
         P = self.cls
@@ -1812,39 +1812,9 @@ class DummyPathTest(DummyPurePathTest):
         _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"])
         _check(p, "*/dirD/**/", ["dirC/dirD/"])
 
-    @needs_symlinks
-    def test_glob_no_follow_symlinks_common(self):
-        def _check(path, glob, expected):
-            actual = {path for path in path.glob(glob, follow_symlinks=False)}
-            self.assertEqual(actual, { P(self.base, q) for q in expected })
-        P = self.cls
-        p = P(self.base)
-        _check(p, "fileB", [])
-        _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
-        _check(p, "*A", ["dirA", "fileA", "linkA"])
-        _check(p, "*B/*", ["dirB/fileB", "dirB/linkD"])
-        _check(p, "*/fileB", ["dirB/fileB"])
-        _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/"])
-        _check(p, "dir*/*/..", ["dirC/dirD/.."])
-        _check(p, "dir*/**", [
-            "dirA/", "dirA/linkC",
-            "dirB/", "dirB/fileB", "dirB/linkD",
-            "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt",
-            "dirE/"])
-        _check(p, "dir*/**/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"])
-        _check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."])
-        _check(p, "dir*/*/**", ["dirC/dirD/", "dirC/dirD/fileD"])
-        _check(p, "dir*/*/**/", ["dirC/dirD/"])
-        _check(p, "dir*/*/**/..", ["dirC/dirD/.."])
-        _check(p, "dir*/**/fileC", ["dirC/fileC"])
-        _check(p, "dir*/*/../dirD/**", ["dirC/dirD/../dirD/", "dirC/dirD/../dirD/fileD"])
-        _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"])
-        _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"])
-        _check(p, "*/dirD/**/", ["dirC/dirD/"])
-
-    def test_rglob_follow_symlinks_none(self):
+    def test_rglob_recurse_symlinks_false(self):
         def _check(path, glob, expected):
-            actual = set(path.rglob(glob, follow_symlinks=None))
+            actual = set(path.rglob(glob, recurse_symlinks=False))
             self.assertEqual(actual, { P(self.base, q) for q in expected })
         P = self.cls
         p = P(self.base)
@@ -1901,9 +1871,9 @@ class DummyPathTest(DummyPurePathTest):
         self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})
 
     @needs_symlinks
-    def test_rglob_follow_symlinks_common(self):
+    def test_rglob_recurse_symlinks_common(self):
         def _check(path, glob, expected):
-            actual = {path for path in path.rglob(glob, follow_symlinks=True)
+            actual = {path for path in path.rglob(glob, recurse_symlinks=True)
                       if path.parts.count("linkD") <= 1}  # exclude symlink loop.
             self.assertEqual(actual, { P(self.base, q) for q in expected })
         P = self.cls
@@ -1932,37 +1902,12 @@ class DummyPathTest(DummyPurePathTest):
         _check(p, "*.txt", ["dirC/novel.txt"])
         _check(p, "*.*", ["dirC/novel.txt"])
 
-    @needs_symlinks
-    def test_rglob_no_follow_symlinks_common(self):
-        def _check(path, glob, expected):
-            actual = {path for path in path.rglob(glob, follow_symlinks=False)}
-            self.assertEqual(actual, { P(self.base, q) for q in expected })
-        P = self.cls
-        p = P(self.base)
-        _check(p, "fileB", ["dirB/fileB"])
-        _check(p, "*/fileA", [])
-        _check(p, "*/fileB", ["dirB/fileB"])
-        _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ])
-        _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"])
-        _check(p, "", ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"])
-
-        p = P(self.base, "dirC")
-        _check(p, "*", ["dirC/fileC", "dirC/novel.txt",
-                        "dirC/dirD", "dirC/dirD/fileD"])
-        _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
-        _check(p, "*/*", ["dirC/dirD/fileD"])
-        _check(p, "*/", ["dirC/dirD/"])
-        _check(p, "", ["dirC/", "dirC/dirD/"])
-        # gh-91616, a re module regression
-        _check(p, "*.txt", ["dirC/novel.txt"])
-        _check(p, "*.*", ["dirC/novel.txt"])
-
     @needs_symlinks
     def test_rglob_symlink_loop(self):
         # Don't get fooled by symlink loops (Issue #26012).
         P = self.cls
         p = P(self.base)
-        given = set(p.rglob('*', follow_symlinks=None))
+        given = set(p.rglob('*', recurse_symlinks=False))
         expect = {'brokenLink',
                   'dirA', 'dirA/linkC',
                   'dirB', 'dirB/fileB', 'dirB/linkD',