]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)
authorStanislav Zmiev <szmiev2000@gmail.com>
Wed, 22 Mar 2023 14:45:25 +0000 (18:45 +0400)
committerGitHub <noreply@github.com>
Wed, 22 Mar 2023 14:45:25 +0000 (14:45 +0000)
Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.

Co-authored-by: Barney Gale <barney.gale@gmail.com>
Co-authored-by: Brett Cannon <brett@python.org>
Lib/pathlib.py
Lib/test/test_pathlib.py
Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst [new file with mode: 0644]

index 55c44f12e5a2fba601022348bd56a67ddeabaffb..a126bf2fe5570a7d1ccc66dd415e8eb29d68d581 100644 (file)
@@ -1197,45 +1197,47 @@ class Path(PurePath):
     def walk(self, top_down=True, on_error=None, follow_symlinks=False):
         """Walk the directory tree from this directory, similar to os.walk()."""
         sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
-        return self._walk(top_down, on_error, follow_symlinks)
-
-    def _walk(self, top_down, on_error, follow_symlinks):
-        # We may not have read permission for self, in which case we can't
-        # get a list of the files the directory contains. os.walk
-        # always suppressed the exception then, rather than blow up for a
-        # minor reason when (say) a thousand readable directories are still
-        # left to visit. That logic is copied here.
-        try:
-            scandir_it = self._scandir()
-        except OSError as error:
-            if on_error is not None:
-                on_error(error)
-            return
-
-        with scandir_it:
-            dirnames = []
-            filenames = []
-            for entry in scandir_it:
-                try:
-                    is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
-                except OSError:
-                    # Carried over from os.path.isdir().
-                    is_dir = False
-
-                if is_dir:
-                    dirnames.append(entry.name)
-                else:
-                    filenames.append(entry.name)
-
-        if top_down:
-            yield self, dirnames, filenames
-
-        for dirname in dirnames:
-            dirpath = self._make_child_relpath(dirname)
-            yield from dirpath._walk(top_down, on_error, follow_symlinks)
+        paths = [self]
+
+        while paths:
+            path = paths.pop()
+            if isinstance(path, tuple):
+                yield path
+                continue
+
+            # We may not have read permission for self, in which case we can't
+            # get a list of the files the directory contains. os.walk()
+            # always suppressed the exception in that instance, rather than
+            # blow up for a minor reason when (say) a thousand readable
+            # directories are still left to visit. That logic is copied here.
+            try:
+                scandir_it = path._scandir()
+            except OSError as error:
+                if on_error is not None:
+                    on_error(error)
+                continue
+
+            with scandir_it:
+                dirnames = []
+                filenames = []
+                for entry in scandir_it:
+                    try:
+                        is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
+                    except OSError:
+                        # Carried over from os.path.isdir().
+                        is_dir = False
+
+                    if is_dir:
+                        dirnames.append(entry.name)
+                    else:
+                        filenames.append(entry.name)
+
+            if top_down:
+                yield path, dirnames, filenames
+            else:
+                paths.append((path, dirnames, filenames))
 
-        if not top_down:
-            yield self, dirnames, filenames
+            paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
 
 
 class PosixPath(Path, PurePosixPath):
index f05dead5886743e7c6a09fc24f0cd91c56ea1138..3041630da678998a1dd738b3ee21f829b3ae86e2 100644 (file)
@@ -13,6 +13,7 @@ import unittest
 from unittest import mock
 
 from test.support import import_helper
+from test.support import set_recursion_limit
 from test.support import is_emscripten, is_wasi
 from test.support import os_helper
 from test.support.os_helper import TESTFN, FakePath
@@ -2793,6 +2794,18 @@ class WalkTests(unittest.TestCase):
                 self.assertEqual(next(it), expected)
             path = path / 'd'
 
+    def test_walk_above_recursion_limit(self):
+        recursion_limit = 40
+        # directory_depth > recursion_limit
+        directory_depth = recursion_limit + 10
+        base = pathlib.Path(os_helper.TESTFN, 'deep')
+        path = pathlib.Path(base, *(['d'] * directory_depth))
+        path.mkdir(parents=True)
+
+        with set_recursion_limit(recursion_limit):
+            list(base.walk())
+            list(base.walk(top_down=False))
+
 
 class PathTest(_BasePathTest, unittest.TestCase):
     cls = pathlib.Path
diff --git a/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst b/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst
new file mode 100644 (file)
index 0000000..f9ac147
--- /dev/null
@@ -0,0 +1 @@
+Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion.