]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-104104: Optimize `pathlib.Path.glob()` by avoiding repeated calls to `os.path...
authorBarney Gale <barney.gale@gmail.com>
Tue, 2 May 2023 21:51:18 +0000 (22:51 +0100)
committerGitHub <noreply@github.com>
Tue, 2 May 2023 21:51:18 +0000 (22:51 +0100)
Use `re.IGNORECASE` to implement case-insensitive matching. This
restores behaviour from before GH-31691.

Lib/pathlib.py
Misc/NEWS.d/next/Library/2023-05-02-21-05-30.gh-issue-104104.9tjplT.rst [new file with mode: 0644]

index 8eb08949fa9b43108aa2b945c3a187ca6a92bc58..61e7f3e4430caef116bf5930290d3a9a7503fbb7 100644 (file)
@@ -59,6 +59,9 @@ def _is_wildcard_pattern(pat):
     # be looked up directly as a file.
     return "*" in pat or "?" in pat or "[" in pat
 
+def _is_case_sensitive(flavour):
+    return flavour.normcase('Aa') == 'Aa'
+
 #
 # Globbing helpers
 #
@@ -100,15 +103,14 @@ class _Selector:
         is_dir = path_cls.is_dir
         exists = path_cls.exists
         scandir = path_cls._scandir
-        normcase = path_cls._flavour.normcase
         if not is_dir(parent_path):
             return iter([])
-        return self._select_from(parent_path, is_dir, exists, scandir, normcase)
+        return self._select_from(parent_path, is_dir, exists, scandir)
 
 
 class _TerminatingSelector:
 
-    def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
+    def _select_from(self, parent_path, is_dir, exists, scandir):
         yield parent_path
 
 
@@ -118,11 +120,11 @@ class _PreciseSelector(_Selector):
         self.name = name
         _Selector.__init__(self, child_parts, flavour)
 
-    def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
+    def _select_from(self, parent_path, is_dir, exists, scandir):
         try:
             path = parent_path._make_child_relpath(self.name)
             if (is_dir if self.dironly else exists)(path):
-                for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
+                for p in self.successor._select_from(path, is_dir, exists, scandir):
                     yield p
         except PermissionError:
             return
@@ -131,10 +133,11 @@ class _PreciseSelector(_Selector):
 class _WildcardSelector(_Selector):
 
     def __init__(self, pat, child_parts, flavour):
-        self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch
+        flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE
+        self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
         _Selector.__init__(self, child_parts, flavour)
 
-    def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
+    def _select_from(self, parent_path, is_dir, exists, scandir):
         try:
             # We must close the scandir() object before proceeding to
             # avoid exhausting file descriptors when globbing deep trees.
@@ -153,9 +156,9 @@ class _WildcardSelector(_Selector):
                             raise
                         continue
                 name = entry.name
-                if self.match(normcase(name)):
+                if self.match(name):
                     path = parent_path._make_child_relpath(name)
-                    for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
+                    for p in self.successor._select_from(path, is_dir, exists, scandir):
                         yield p
         except PermissionError:
             return
@@ -187,13 +190,13 @@ class _RecursiveWildcardSelector(_Selector):
         except PermissionError:
             return
 
-    def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
+    def _select_from(self, parent_path, is_dir, exists, scandir):
         try:
             yielded = set()
             try:
                 successor_select = self.successor._select_from
                 for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
-                    for p in successor_select(starting_point, is_dir, exists, scandir, normcase):
+                    for p in successor_select(starting_point, is_dir, exists, scandir):
                         if p not in yielded:
                             yield p
                             yielded.add(p)
diff --git a/Misc/NEWS.d/next/Library/2023-05-02-21-05-30.gh-issue-104104.9tjplT.rst b/Misc/NEWS.d/next/Library/2023-05-02-21-05-30.gh-issue-104104.9tjplT.rst
new file mode 100644 (file)
index 0000000..935a0e2
--- /dev/null
@@ -0,0 +1,2 @@
+Improve performance of :meth:`pathlib.Path.glob` by using
+:data:`re.IGNORECASE` to implement case-insensitive matching.