]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-113225: Speed up `pathlib.Path.walk(top_down=False)` (#113693)
authorBarney Gale <barney.gale@gmail.com>
Sat, 20 Jan 2024 03:06:00 +0000 (03:06 +0000)
committerGitHub <noreply@github.com>
Sat, 20 Jan 2024 03:06:00 +0000 (03:06 +0000)
Use `_make_child_entry()` rather than `_make_child_relpath()` to retrieve
path objects for directories to visit. This saves the allocation of one
path object per directory in user subclasses of `PathBase`, and avoids a
second loop.

This trick does not apply when walking top-down, because users can affect
the walk by modifying *dirnames* in-place.

A side effect of this change is that, in bottom-up mode, subdirectories of
each directory are visited in reverse order, and that this order doesn't
match that of the names in *dirnames*. I suspect this is fine as the
order is arbitrary anyway.

Lib/pathlib/_abc.py
Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst [new file with mode: 0644]

index e5eeb4afce2ea9e70e8107b0b82335672ff4ae5c..553e1a399061d3ddc7547fc6a03353d872ad95f7 100644 (file)
@@ -820,6 +820,8 @@ class PathBase(PurePathBase):
             with scandir_obj as scandir_it:
                 dirnames = []
                 filenames = []
+                if not top_down:
+                    paths.append((path, dirnames, filenames))
                 for entry in scandir_it:
                     try:
                         is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
@@ -828,16 +830,15 @@ class PathBase(PurePathBase):
                         is_dir = False
 
                     if is_dir:
+                        if not top_down:
+                            paths.append(path._make_child_entry(entry))
                         dirnames.append(entry.name)
                     else:
                         filenames.append(entry.name)
 
             if top_down:
                 yield path, dirnames, filenames
-            else:
-                paths.append((path, dirnames, filenames))
-
-            paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
+                paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
 
     def absolute(self):
         """Return an absolute version of this path
diff --git a/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst
new file mode 100644 (file)
index 0000000..0c07f42
--- /dev/null
@@ -0,0 +1,2 @@
+Speed up :meth:`pathlib.Path.walk` by using :attr:`os.DirEntry.path` where
+possible.