]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-89727: Fix `os.fwalk()` recursion error on deep trees (#119638)
authorBarney Gale <barney.gale@gmail.com>
Thu, 30 May 2024 03:05:36 +0000 (04:05 +0100)
committerGitHub <noreply@github.com>
Thu, 30 May 2024 03:05:36 +0000 (03:05 +0000)
Implement `os.fwalk()` using a list as a stack to avoid emitting recursion
errors on deeply nested trees.

Lib/os.py
Lib/test/test_os.py
Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst [new file with mode: 0644]

index ae9e646361e82c86e2b36329ec3b7d9ebaa99d24..cef5d90a8c23dfc94ef12503330d457e950ce02c 100644 (file)
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -478,24 +478,52 @@ if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
         """
         sys.audit("os.fwalk", top, topdown, onerror, follow_symlinks, dir_fd)
         top = fspath(top)
-        # Note: To guard against symlink races, we use the standard
-        # lstat()/open()/fstat() trick.
-        if not follow_symlinks:
-            orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
-        topfd = open(top, O_RDONLY | O_NONBLOCK, dir_fd=dir_fd)
-        try:
-            if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
-                                    path.samestat(orig_st, stat(topfd)))):
-                yield from _fwalk(topfd, top, isinstance(top, bytes),
-                                  topdown, onerror, follow_symlinks)
-        finally:
-            close(topfd)
-
-    def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):
+        stack = [(_fwalk_walk, (True, dir_fd, top, top, None))]
+        isbytes = isinstance(top, bytes)
+        while stack:
+            yield from _fwalk(stack, isbytes, topdown, onerror, follow_symlinks)
+
+    # Each item in the _fwalk() stack is a pair (action, args).
+    _fwalk_walk = 0  # args: (isroot, dirfd, toppath, topname, entry)
+    _fwalk_yield = 1  # args: (toppath, dirnames, filenames, topfd)
+    _fwalk_close = 2  # args: dirfd
+
+    def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks):
         # Note: This uses O(depth of the directory tree) file descriptors: if
         # necessary, it can be adapted to only require O(1) FDs, see issue
         # #13734.
 
+        action, value = stack.pop()
+        if action == _fwalk_close:
+            close(value)
+            return
+        elif action == _fwalk_yield:
+            yield value
+            return
+        assert action == _fwalk_walk
+        isroot, dirfd, toppath, topname, entry = value
+        try:
+            if not follow_symlinks:
+                # Note: To guard against symlink races, we use the standard
+                # lstat()/open()/fstat() trick.
+                if entry is None:
+                    orig_st = stat(topname, follow_symlinks=False, dir_fd=dirfd)
+                else:
+                    orig_st = entry.stat(follow_symlinks=False)
+            topfd = open(topname, O_RDONLY | O_NONBLOCK, dir_fd=dirfd)
+        except OSError as err:
+            if isroot:
+                raise
+            if onerror is not None:
+                onerror(err)
+            return
+        stack.append((_fwalk_close, topfd))
+        if not follow_symlinks:
+            if isroot and not st.S_ISDIR(orig_st.st_mode):
+                return
+            if not path.samestat(orig_st, stat(topfd)):
+                return
+
         scandir_it = scandir(topfd)
         dirs = []
         nondirs = []
@@ -521,31 +549,18 @@ if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
 
         if topdown:
             yield toppath, dirs, nondirs, topfd
+        else:
+            stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd)))
 
-        for name in dirs if entries is None else zip(dirs, entries):
-            try:
-                if not follow_symlinks:
-                    if topdown:
-                        orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
-                    else:
-                        assert entries is not None
-                        name, entry = name
-                        orig_st = entry.stat(follow_symlinks=False)
-                dirfd = open(name, O_RDONLY | O_NONBLOCK, dir_fd=topfd)
-            except OSError as err:
-                if onerror is not None:
-                    onerror(err)
-                continue
-            try:
-                if follow_symlinks or path.samestat(orig_st, stat(dirfd)):
-                    dirpath = path.join(toppath, name)
-                    yield from _fwalk(dirfd, dirpath, isbytes,
-                                      topdown, onerror, follow_symlinks)
-            finally:
-                close(dirfd)
-
-        if not topdown:
-            yield toppath, dirs, nondirs, topfd
+        toppath = path.join(toppath, toppath[:0])  # Add trailing slash.
+        if entries is None:
+            stack.extend(
+                (_fwalk_walk, (False, topfd, toppath + name, name, None))
+                for name in dirs[::-1])
+        else:
+            stack.extend(
+                (_fwalk_walk, (False, topfd, toppath + name, name, entry))
+                for name, entry in zip(dirs[::-1], entries[::-1]))
 
     __all__.append("fwalk")
 
index 941fa2b2c5c87f34ea266deff90585e5dc80498e..7dc5784820e8476185f33307f481b04e04f81db6 100644 (file)
@@ -1687,8 +1687,6 @@ class FwalkTests(WalkTests):
 
     # fwalk() keeps file descriptors open
     test_walk_many_open_files = None
-    # fwalk() still uses recursion
-    test_walk_above_recursion_limit = None
 
 
 class BytesWalkTests(WalkTests):
diff --git a/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst b/Misc/NEWS.d/next/Library/2024-05-28-00-56-59.gh-issue-89727._bxoL3.rst
new file mode 100644 (file)
index 0000000..92222bc
--- /dev/null
@@ -0,0 +1,3 @@
+Fix issue with :func:`os.fwalk` where a :exc:`RecursionError` was raised on
+deep directory trees by adjusting the implementation to be iterative instead
+of recursive.