]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
discover-image: make sure we can remove mstacks
authorLennart Poettering <lennart@amutable.com>
Fri, 28 Nov 2025 17:26:56 +0000 (18:26 +0100)
committerLennart Poettering <lennart@amutable.com>
Thu, 19 Feb 2026 14:08:20 +0000 (15:08 +0100)
src/shared/discover-image.c

index 0575310a2c200db620653da68acc6bf24a02d028..4c0752c97912825d060bd19055232bda9c0ac8c0 100644 (file)
@@ -45,6 +45,7 @@
 #include "path-lookup.h"
 #include "path-util.h"
 #include "process-util.h"
+#include "recurse-dir.h"
 #include "rm-rf.h"
 #include "runtime-scope.h"
 #include "stat-util.h"
@@ -1275,11 +1276,95 @@ int image_discover(
         return 0;
 }
 
+static int unpriv_remove_cb(
+                RecurseDirEvent event,
+                const char *path,
+                int dir_fd,
+                int inode_fd,
+                const struct dirent *de,
+                const struct statx *sx,
+                void *userdata) {
+
+        int r, userns_fd = PTR_TO_FD(userdata);
+
+        assert(sx);
+
+        if (event == RECURSE_DIR_ENTER &&
+            S_ISDIR(sx->stx_mode) &&
+            uid_is_foreign(sx->stx_uid)) {
+
+                /* This is owned by the foreign UID range, and a dir, let's remove it via mountfsd userns
+                 * shenanigans. */
+
+                _cleanup_close_ int tree_fd = -EBADF;
+                r = mountfsd_mount_directory_fd(
+                                /* vl= */ NULL,
+                                inode_fd,
+                                userns_fd,
+                                DISSECT_IMAGE_FOREIGN_UID,
+                                &tree_fd);
+                if (r < 0)
+                        return r;
+
+                /* Fork off child that moves into userns and does the copying */
+                r = pidref_safe_fork_full(
+                                "rm-tree",
+                                /* stdio_fds= */ NULL,
+                                (int[]) { userns_fd, tree_fd, }, 2,
+                                FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
+                                /* ret= */ NULL);
+                if (r < 0)
+                        log_debug_errno(r, "Process that was supposed to remove subtree '%s' failed, ignoring: %m", empty_to_root(path));
+                else if (r == 0) {
+                        /* child */
+
+                        r = namespace_enter(
+                                        /* pidns_fd= */ -EBADF,
+                                        /* mntns_fd= */ -EBADF,
+                                        /* netns_fd= */ -EBADF,
+                                        userns_fd,
+                                        /* root_fd= */ -EBADF);
+                        if (r < 0) {
+                                log_debug_errno(r, "Failed to join user namespace: %m");
+                                _exit(EXIT_FAILURE);
+                        }
+
+                        _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC);
+                        if (dfd < 0) {
+                                log_error_errno(r, "Failed to reopen tree fd: %m");
+                                _exit(EXIT_FAILURE);
+                        }
+
+                        r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL);
+                        if (r < 0) {
+                                log_error_errno(r, "Failed to empty '%s' directory in foreign UID mode: %m", empty_to_root(path));
+                                _exit(EXIT_FAILURE);
+                        }
+
+                        _exit(EXIT_SUCCESS);
+                }
+
+                /* Don't descent further into this one, and delete it immediately */
+                return RECURSE_DIR_UNLINK_GRACEFUL;
+        }
+
+        /* Everything else try to remove */
+        if (event == RECURSE_DIR_LEAVE)
+                return RECURSE_DIR_UNLINK_GRACEFUL;
+
+        return RECURSE_DIR_CONTINUE;
+}
+
 static int unprivileged_remove(Image *i) {
         int r;
 
         assert(i);
 
+        /* We want this to work in complex .mstack/ hierarchies, where the main directory (and maybe a .v/
+         * directory below or two) might be owned by the user themselves, but some subdirs might be owned by
+         * the foreign UID range. We deal with this by recursively descending down the tree, and removing
+         * foreign-owned ranges via userns shenanigans, and the rest just like that. */
+
         _cleanup_close_ int userns_fd = nsresource_allocate_userns(
                         /* vl= */ NULL,
                         /* name= */ NULL,
@@ -1287,52 +1372,16 @@ static int unprivileged_remove(Image *i) {
         if (userns_fd < 0)
                 return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m");
 
-        _cleanup_close_ int tree_fd = -EBADF;
-        r = mountfsd_mount_directory(
-                        /* vl= */ NULL,
+        r = recurse_dir_at(
+                        AT_FDCWD,
                         i->path,
-                        userns_fd,
-                        DISSECT_IMAGE_FOREIGN_UID,
-                        &tree_fd);
+                        /* statx_mask= */ STATX_TYPE|STATX_UID,
+                        /* n_depth_max= */ UINT_MAX,
+                        RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE|RECURSE_DIR_SAME_MOUNT|RECURSE_DIR_INODE_FD|RECURSE_DIR_TOPLEVEL,
+                        unpriv_remove_cb,
+                        FD_TO_PTR(userns_fd));
         if (r < 0)
                 return r;
-        /* Fork off child that moves into userns and does the copying */
-        r = pidref_safe_fork_full(
-                        "rm-tree",
-                        /* stdio_fds= */ NULL,
-                        (int[]) { userns_fd, tree_fd, }, 2,
-                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
-                        /* ret= */ NULL);
-        if (r < 0)
-                return log_debug_errno(r, "Process that was supposed to remove tree failed: %m");
-        if (r == 0) {
-                /* child */
-
-                r = namespace_enter(
-                                /* pidns_fd= */ -EBADF,
-                                /* mntns_fd= */ -EBADF,
-                                /* netns_fd= */ -EBADF,
-                                userns_fd,
-                                /* root_fd= */ -EBADF);
-                if (r < 0) {
-                        log_debug_errno(r, "Failed to join user namespace: %m");
-                        _exit(EXIT_FAILURE);
-                }
-
-                _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC);
-                if (dfd < 0) {
-                        log_error_errno(r, "Failed to reopen tree fd: %m");
-                        _exit(EXIT_FAILURE);
-                }
-
-                r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL);
-                if (r < 0) {
-                        log_error_errno(r, "Failed to empty '%s' directory in foreign UID mode: %m", i->path);
-                        _exit(EXIT_FAILURE);
-                }
-
-                _exit(EXIT_SUCCESS);
-        }
 
         return 0;
 }
@@ -1374,6 +1423,9 @@ int image_remove(Image *i, RuntimeScope scope) {
                 /* Allow deletion of read-only directories */
                 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
 
+                _fallthrough_;
+
+        case IMAGE_MSTACK:
                 /* If this is foreign owned, try an unprivileged remove first, but accept if that doesn't work, and do it directly either way, maybe it works */
                 if (i->foreign_uid_owned)
                         (void) unprivileged_remove(i);