]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-81079: Add case_sensitive argument to `pathlib.Path.glob()` (GH-102710)
authorBarney Gale <barney.gale@gmail.com>
Thu, 4 May 2023 16:44:36 +0000 (17:44 +0100)
committerGitHub <noreply@github.com>
Thu, 4 May 2023 16:44:36 +0000 (16:44 +0000)
This argument allows case-sensitive matching to be enabled on Windows, and
case-insensitive matching to be enabled on Posix.

Co-authored-by: Steve Dower <steve.dower@microsoft.com>
Doc/library/pathlib.rst
Lib/pathlib.py
Lib/test/test_pathlib.py
Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst [new file with mode: 0644]

index 4847ac24c775134736b18a9d8cce7a20a97f43fb..14118127835bbe2c17b2c997f4d57d67c1f1c2e1 100644 (file)
@@ -855,7 +855,7 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.5
 
 
-.. method:: Path.glob(pattern)
+.. method:: Path.glob(pattern, *, case_sensitive=None)
 
    Glob the given relative *pattern* in the directory represented by this path,
    yielding all matching files (of any kind)::
@@ -876,6 +876,11 @@ call fails (for example because the path doesn't exist).
        PosixPath('setup.py'),
        PosixPath('test_pathlib.py')]
 
+   By default, or when the *case_sensitive* keyword-only argument is set to
+   ``None``, this method matches paths using platform-specific casing rules:
+   typically, case-sensitive on POSIX, and case-insensitive on Windows.
+   Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
+
    .. note::
       Using the "``**``" pattern in large directory trees may consume
       an inordinate amount of time.
@@ -886,6 +891,9 @@ call fails (for example because the path doesn't exist).
       Return only directories if *pattern* ends with a pathname components
       separator (:data:`~os.sep` or :data:`~os.altsep`).
 
+   .. versionadded:: 3.12
+      The *case_sensitive* argument.
+
 .. method:: Path.group()
 
    Return the name of the group owning the file.  :exc:`KeyError` is raised
@@ -1271,7 +1279,7 @@ call fails (for example because the path doesn't exist).
    .. versionadded:: 3.6
       The *strict* argument (pre-3.6 behavior is strict).
 
-.. method:: Path.rglob(pattern)
+.. method:: Path.rglob(pattern, *, case_sensitive=None)
 
    Glob the given relative *pattern* recursively.  This is like calling
    :func:`Path.glob` with "``**/``" added in front of the *pattern*, where
@@ -1284,12 +1292,20 @@ call fails (for example because the path doesn't exist).
        PosixPath('setup.py'),
        PosixPath('test_pathlib.py')]
 
+   By default, or when the *case_sensitive* keyword-only argument is set to
+   ``None``, this method matches paths using platform-specific casing rules:
+   typically, case-sensitive on POSIX, and case-insensitive on Windows.
+   Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
+
    .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob
 
    .. versionchanged:: 3.11
       Return only directories if *pattern* ends with a pathname components
       separator (:data:`~os.sep` or :data:`~os.altsep`).
 
+   .. versionadded:: 3.12
+      The *case_sensitive* argument.
+
 .. method:: Path.rmdir()
 
    Remove this directory.  The directory must be empty.
index 8a1651c23d7f4deb8fa897b3d61445d871e119d1..f32e1e2d822834beb27ea67640777689fabbeacb 100644 (file)
@@ -62,7 +62,7 @@ def _is_case_sensitive(flavour):
 #
 
 @functools.lru_cache()
-def _make_selector(pattern_parts, flavour):
+def _make_selector(pattern_parts, flavour, case_sensitive):
     pat = pattern_parts[0]
     child_parts = pattern_parts[1:]
     if not pat:
@@ -75,17 +75,17 @@ def _make_selector(pattern_parts, flavour):
         raise ValueError("Invalid pattern: '**' can only be an entire path component")
     else:
         cls = _WildcardSelector
-    return cls(pat, child_parts, flavour)
+    return cls(pat, child_parts, flavour, case_sensitive)
 
 
 class _Selector:
     """A selector matches a specific glob pattern part against the children
     of a given path."""
 
-    def __init__(self, child_parts, flavour):
+    def __init__(self, child_parts, flavour, case_sensitive):
         self.child_parts = child_parts
         if child_parts:
-            self.successor = _make_selector(child_parts, flavour)
+            self.successor = _make_selector(child_parts, flavour, case_sensitive)
             self.dironly = True
         else:
             self.successor = _TerminatingSelector()
@@ -108,8 +108,9 @@ class _TerminatingSelector:
 
 
 class _ParentSelector(_Selector):
-    def __init__(self, name, child_parts, flavour):
-        _Selector.__init__(self, child_parts, flavour)
+
+    def __init__(self, name, child_parts, flavour, case_sensitive):
+        _Selector.__init__(self, child_parts, flavour, case_sensitive)
 
     def _select_from(self,  parent_path, scandir):
         path = parent_path._make_child_relpath('..')
@@ -119,10 +120,13 @@ class _ParentSelector(_Selector):
 
 class _WildcardSelector(_Selector):
 
-    def __init__(self, pat, child_parts, flavour):
-        flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE
+    def __init__(self, pat, child_parts, flavour, case_sensitive):
+        _Selector.__init__(self, child_parts, flavour, case_sensitive)
+        if case_sensitive is None:
+            # TODO: evaluate case-sensitivity of each directory in _select_from()
+            case_sensitive = _is_case_sensitive(flavour)
+        flags = re.NOFLAG if case_sensitive else re.IGNORECASE
         self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
-        _Selector.__init__(self, child_parts, flavour)
 
     def _select_from(self, parent_path, scandir):
         try:
@@ -153,8 +157,8 @@ class _WildcardSelector(_Selector):
 
 class _RecursiveWildcardSelector(_Selector):
 
-    def __init__(self, pat, child_parts, flavour):
-        _Selector.__init__(self, child_parts, flavour)
+    def __init__(self, pat, child_parts, flavour, case_sensitive):
+        _Selector.__init__(self, child_parts, flavour, case_sensitive)
 
     def _iterate_directories(self, parent_path, scandir):
         yield parent_path
@@ -819,7 +823,7 @@ class Path(PurePath):
         # includes scandir(), which is used to implement glob().
         return os.scandir(self)
 
-    def glob(self, pattern):
+    def glob(self, pattern, *, case_sensitive=None):
         """Iterate over this subtree and yield all existing files (of any
         kind, including directories) matching the given relative pattern.
         """
@@ -831,11 +835,11 @@ class Path(PurePath):
             raise NotImplementedError("Non-relative patterns are unsupported")
         if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
             pattern_parts.append('')
-        selector = _make_selector(tuple(pattern_parts), self._flavour)
+        selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
         for p in selector.select_from(self):
             yield p
 
-    def rglob(self, pattern):
+    def rglob(self, pattern, *, case_sensitive=None):
         """Recursively yield all existing files (of any kind, including
         directories) matching the given relative pattern, anywhere in
         this subtree.
@@ -846,7 +850,7 @@ class Path(PurePath):
             raise NotImplementedError("Non-relative patterns are unsupported")
         if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
             pattern_parts.append('')
-        selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
+        selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
         for p in selector.select_from(self):
             yield p
 
index 424bb92a87d112c6c119eaa8743430b86cba0518..a932e03df4236d07f13a1cc9803809b9daad8298 100644 (file)
@@ -1816,6 +1816,18 @@ class _BasePathTest(object):
         else:
             _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])
 
+    def test_glob_case_sensitive(self):
+        P = self.cls
+        def _check(path, pattern, case_sensitive, expected):
+            actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)}
+            expected = {str(P(BASE, q)) for q in expected}
+            self.assertEqual(actual, expected)
+        path = P(BASE)
+        _check(path, "DIRB/FILE*", True, [])
+        _check(path, "DIRB/FILE*", False, ["dirB/fileB"])
+        _check(path, "dirb/file*", True, [])
+        _check(path, "dirb/file*", False, ["dirB/fileB"])
+
     def test_rglob_common(self):
         def _check(glob, expected):
             self.assertEqual(set(glob), { P(BASE, q) for q in expected })
diff --git a/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst
new file mode 100644 (file)
index 0000000..ef56905
--- /dev/null
@@ -0,0 +1,2 @@
+Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and
+:meth:`~pathlib.Path.rglob`.