]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
Improve extract performance via ignoring directories early during os.walk p694-redux 968/head
authorAarni Koskela <akx@iki.fi>
Tue, 7 Feb 2023 14:42:25 +0000 (16:42 +0200)
committerAarni Koskela <akx@iki.fi>
Mon, 13 Jan 2025 12:15:31 +0000 (14:15 +0200)
Co-authored-by: Steven Kao <st.kao.05@gmail.com>
babel/messages/extract.py

index 94221df75c1b8358f94c22b3217b22f8e473c34d..5483f824ad6faf008304e7e0acc693f2e85180f9 100644 (file)
@@ -111,10 +111,25 @@ def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]):
     comments[:] = map(_strip, comments)
 
 
-def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:
-    subdir = os.path.basename(dirpath)
-    # Legacy default behavior: ignore dot and underscore directories
-    return not (subdir.startswith('.') or subdir.startswith('_'))
+def make_default_directory_filter(
+    method_map: Iterable[tuple[str, str]],
+    root_dir: str | os.PathLike[str],
+):
+    def directory_filter(dirpath: str | os.PathLike[str]) -> bool:
+        subdir = os.path.basename(dirpath)
+        # Legacy default behavior: ignore dot and underscore directories
+        if subdir.startswith('.') or subdir.startswith('_'):
+            return False
+
+        dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/')
+
+        for pattern, method in method_map:
+            if method == "ignore" and pathmatch(pattern, dir_rel):
+                return False
+
+        return True
+
+    return directory_filter
 
 
 def extract_from_dir(
@@ -198,13 +213,19 @@ def extract_from_dir(
     """
     if dirname is None:
         dirname = os.getcwd()
+
     if options_map is None:
         options_map = {}
+
+    dirname = os.path.abspath(dirname)
+
     if directory_filter is None:
-        directory_filter = default_directory_filter
+        directory_filter = make_default_directory_filter(
+            method_map=method_map,
+            root_dir=dirname,
+        )
 
-    absname = os.path.abspath(dirname)
-    for root, dirnames, filenames in os.walk(absname):
+    for root, dirnames, filenames in os.walk(dirname):
         dirnames[:] = [
             subdir for subdir in dirnames
             if directory_filter(os.path.join(root, subdir))
@@ -222,7 +243,7 @@ def extract_from_dir(
                 keywords,
                 comment_tags,
                 strip_comment_tags,
-                dirpath=absname,
+                dirpath=dirname,
             )