]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
Improve extract performance via ignoring directories early during os.walk (#968)
authorAarni Koskela <akx@iki.fi>
Mon, 29 Dec 2025 11:40:10 +0000 (13:40 +0200)
committerGitHub <noreply@github.com>
Mon, 29 Dec 2025 11:40:10 +0000 (13:40 +0200)
Co-authored-by: Steven Kao <st.kao.05@gmail.com>
babel/messages/extract.py
tests/messages/frontend/test_extract.py

index 07e13e34f14f12a4cf106179c244830ba3d09e21..a8f15469d84d6a52abf8d5bf345f01db7f755360 100644 (file)
@@ -23,6 +23,7 @@ import io
 import os
 import sys
 import tokenize
+import warnings
 from collections.abc import (
     Callable,
     Collection,
@@ -114,7 +115,35 @@ def _strip_comment_tags(comments: MutableSequence[str], tags: Iterable[str]):
     comments[:] = [_strip(c) for c in comments]
 
 
-def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:
+def _make_default_directory_filter(
+    method_map: Iterable[tuple[str, str]],
+    root_dir: str | os.PathLike[str],
+):
+    method_map = tuple(method_map)
+
+    def directory_filter(dirpath: str | os.PathLike[str]) -> bool:
+        subdir = os.path.basename(dirpath)
+        # Legacy default behavior: ignore dot and underscore directories
+        if subdir.startswith('.') or subdir.startswith('_'):
+            return False
+
+        dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/')
+
+        for pattern, method in method_map:
+            if method == "ignore" and pathmatch(pattern, dir_rel):
+                return False
+
+        return True
+
+    return directory_filter
+
+
+def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:  # pragma: no cover
+    warnings.warn(
+        "`default_directory_filter` is deprecated and will be removed in a future version of Babel.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
     subdir = os.path.basename(dirpath)
     # Legacy default behavior: ignore dot and underscore directories
     return not (subdir.startswith('.') or subdir.startswith('_'))
@@ -201,13 +230,19 @@ def extract_from_dir(
     """
     if dirname is None:
         dirname = os.getcwd()
+
     if options_map is None:
         options_map = {}
+
+    dirname = os.path.abspath(dirname)
+
     if directory_filter is None:
-        directory_filter = default_directory_filter
+        directory_filter = _make_default_directory_filter(
+            method_map=method_map,
+            root_dir=dirname,
+        )
 
-    absname = os.path.abspath(dirname)
-    for root, dirnames, filenames in os.walk(absname):
+    for root, dirnames, filenames in os.walk(dirname):
         dirnames[:] = [
             subdir for subdir in dirnames if directory_filter(os.path.join(root, subdir))
         ]
@@ -224,7 +259,7 @@ def extract_from_dir(
                 keywords,
                 comment_tags,
                 strip_comment_tags,
-                dirpath=absname,
+                dirpath=dirname,
             )
 
 
index 7980eddad54767bfa6bf00bbc0200d92277b4d00..1c4532f5fd5d6c69a5d0e6a1211a77beca8661ae 100644 (file)
@@ -202,10 +202,11 @@ msgstr[1] ""
 
 
 @freeze_time("1994-11-11")
-def test_extraction_with_mapping_dict(extract_cmd, pot_file):
+@pytest.mark.parametrize("ignore_pattern", ['**/ignored/**.*', 'ignored'])
+def test_extraction_with_mapping_dict(extract_cmd, pot_file, ignore_pattern):
     extract_cmd.distribution.message_extractors = {
         'project': [
-            ('**/ignored/**.*', 'ignore', None),
+            (ignore_pattern, 'ignore', None),
             ('**.py', 'python', None),
         ],
     }