]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-89727: Fix `shutil.rmtree()` recursion error on deep trees (#119808)
authorBarney Gale <barney.gale@gmail.com>
Sat, 1 Jun 2024 18:49:12 +0000 (19:49 +0100)
committerGitHub <noreply@github.com>
Sat, 1 Jun 2024 18:49:12 +0000 (19:49 +0100)
Implement `shutil._rmtree_safe_fd()` using a list as a stack to avoid emitting recursion errors on deeply nested trees.

`shutil._rmtree_unsafe()` was fixed in a150679f90.

Lib/shutil.py
Lib/test/test_shutil.py
Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst [new file with mode: 0644]

index 03a9d7560304303824ad21857591ac0500053cc2..b0d49e98cfe5f9857c38c0dbe3dca1f909675af7 100644 (file)
@@ -635,81 +635,76 @@ def _rmtree_unsafe(path, onexc):
         onexc(os.rmdir, path, err)
 
 # Version using fd-based APIs to protect against races
-def _rmtree_safe_fd(topfd, path, onexc):
+def _rmtree_safe_fd(stack, onexc):
+    # Each stack item has four elements:
+    # * func: The first operation to perform: os.lstat, os.close or os.rmdir.
+    #   Walking a directory starts with an os.lstat() to detect symlinks; in
+    #   this case, func is updated before subsequent operations and passed to
+    #   onexc() if an error occurs.
+    # * dirfd: Open file descriptor, or None if we're processing the top-level
+    #   directory given to rmtree() and the user didn't supply dir_fd.
+    # * path: Path of file to operate upon. This is passed to onexc() if an
+    #   error occurs.
+    # * orig_entry: os.DirEntry, or None if we're processing the top-level
+    #   directory given to rmtree(). We used the cached stat() of the entry to
+    #   save a call to os.lstat() when walking subdirectories.
+    func, dirfd, path, orig_entry = stack.pop()
+    name = path if orig_entry is None else orig_entry.name
     try:
+        if func is os.close:
+            os.close(dirfd)
+            return
+        if func is os.rmdir:
+            os.rmdir(name, dir_fd=dirfd)
+            return
+
+        # Note: To guard against symlink races, we use the standard
+        # lstat()/open()/fstat() trick.
+        assert func is os.lstat
+        if orig_entry is None:
+            orig_st = os.lstat(name, dir_fd=dirfd)
+        else:
+            orig_st = orig_entry.stat(follow_symlinks=False)
+
+        func = os.open  # For error reporting.
+        topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd)
+
+        func = os.path.islink  # For error reporting.
+        try:
+            if not os.path.samestat(orig_st, os.fstat(topfd)):
+                # Symlinks to directories are forbidden, see GH-46010.
+                raise OSError("Cannot call rmtree on a symbolic link")
+            stack.append((os.rmdir, dirfd, path, orig_entry))
+        finally:
+            stack.append((os.close, topfd, path, orig_entry))
+
+        func = os.scandir  # For error reporting.
         with os.scandir(topfd) as scandir_it:
             entries = list(scandir_it)
-    except FileNotFoundError:
-        return
-    except OSError as err:
-        err.filename = path
-        onexc(os.scandir, path, err)
-        return
-    for entry in entries:
-        fullname = os.path.join(path, entry.name)
-        try:
-            is_dir = entry.is_dir(follow_symlinks=False)
-        except FileNotFoundError:
-            continue
-        except OSError:
-            is_dir = False
-        else:
-            if is_dir:
-                try:
-                    orig_st = entry.stat(follow_symlinks=False)
-                    is_dir = stat.S_ISDIR(orig_st.st_mode)
-                except FileNotFoundError:
-                    continue
-                except OSError as err:
-                    onexc(os.lstat, fullname, err)
-                    continue
-        if is_dir:
+        for entry in entries:
+            fullname = os.path.join(path, entry.name)
             try:
-                dirfd = os.open(entry.name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=topfd)
-                dirfd_closed = False
+                if entry.is_dir(follow_symlinks=False):
+                    # Traverse into sub-directory.
+                    stack.append((os.lstat, topfd, fullname, entry))
+                    continue
             except FileNotFoundError:
                 continue
-            except OSError as err:
-                onexc(os.open, fullname, err)
-            else:
-                try:
-                    if os.path.samestat(orig_st, os.fstat(dirfd)):
-                        _rmtree_safe_fd(dirfd, fullname, onexc)
-                        try:
-                            os.close(dirfd)
-                        except OSError as err:
-                            # close() should not be retried after an error.
-                            dirfd_closed = True
-                            onexc(os.close, fullname, err)
-                        dirfd_closed = True
-                        try:
-                            os.rmdir(entry.name, dir_fd=topfd)
-                        except FileNotFoundError:
-                            continue
-                        except OSError as err:
-                            onexc(os.rmdir, fullname, err)
-                    else:
-                        try:
-                            # This can only happen if someone replaces
-                            # a directory with a symlink after the call to
-                            # os.scandir or stat.S_ISDIR above.
-                            raise OSError("Cannot call rmtree on a symbolic "
-                                          "link")
-                        except OSError as err:
-                            onexc(os.path.islink, fullname, err)
-                finally:
-                    if not dirfd_closed:
-                        try:
-                            os.close(dirfd)
-                        except OSError as err:
-                            onexc(os.close, fullname, err)
-        else:
+            except OSError:
+                pass
             try:
                 os.unlink(entry.name, dir_fd=topfd)
             except FileNotFoundError:
                 continue
             except OSError as err:
                 onexc(os.unlink, fullname, err)
+    except FileNotFoundError as err:
+        if orig_entry is None or func is os.close:
+            err.filename = path
+            onexc(func, path, err)
+    except OSError as err:
+        err.filename = path
+        onexc(func, path, err)
 
 _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
                      os.supports_dir_fd and
@@ -762,41 +757,16 @@ def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None):
         # While the unsafe rmtree works fine on bytes, the fd based does not.
         if isinstance(path, bytes):
             path = os.fsdecode(path)
-        # Note: To guard against symlink races, we use the standard
-        # lstat()/open()/fstat() trick.
-        try:
-            orig_st = os.lstat(path, dir_fd=dir_fd)
-        except OSError as err:
-            onexc(os.lstat, path, err)
-            return
+        stack = [(os.lstat, dir_fd, path, None)]
         try:
-            fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dir_fd)
-            fd_closed = False
-        except OSError as err:
-            onexc(os.open, path, err)
-            return
-        try:
-            if os.path.samestat(orig_st, os.fstat(fd)):
-                _rmtree_safe_fd(fd, path, onexc)
-                try:
-                    os.close(fd)
-                except OSError as err:
-                    # close() should not be retried after an error.
-                    fd_closed = True
-                    onexc(os.close, path, err)
-                fd_closed = True
-                try:
-                    os.rmdir(path, dir_fd=dir_fd)
-                except OSError as err:
-                    onexc(os.rmdir, path, err)
-            else:
-                try:
-                    # symlinks to directories are forbidden, see bug #1669
-                    raise OSError("Cannot call rmtree on a symbolic link")
-                except OSError as err:
-                    onexc(os.path.islink, path, err)
+            while stack:
+                _rmtree_safe_fd(stack, onexc)
         finally:
-            if not fd_closed:
+            # Close any file descriptors still on the stack.
+            while stack:
+                func, fd, path, entry = stack.pop()
+                if func is not os.close:
+                    continue
                 try:
                     os.close(fd)
                 except OSError as err:
index 01f139073dcd97e66843872db26c77ae671d3168..bccb81e0737c5709a9bac5af2434e8acace5ce1e 100644 (file)
@@ -741,7 +741,6 @@ class TestRmTree(BaseTest, unittest.TestCase):
             shutil.rmtree(TESTFN)
             raise
 
-    @unittest.skipIf(shutil._use_fd_functions, "fd-based functions remain unfixed (GH-89727)")
     def test_rmtree_above_recursion_limit(self):
         recursion_limit = 40
         # directory_depth > recursion_limit
diff --git a/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst b/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst
new file mode 100644 (file)
index 0000000..854c566
--- /dev/null
@@ -0,0 +1,2 @@
+Fix issue with :func:`shutil.rmtree` where a :exc:`RecursionError` is raised
+on deep directory trees.