From 968f3ebd20d049ecefdcd41e567150bbe449ed82 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 28 Nov 2025 18:26:56 +0100 Subject: [PATCH] discover-image: make sure we can remove mstacks --- src/shared/discover-image.c | 138 +++++++++++++++++++++++++----------- 1 file changed, 95 insertions(+), 43 deletions(-) diff --git a/src/shared/discover-image.c b/src/shared/discover-image.c index 0575310a2c2..4c0752c9791 100644 --- a/src/shared/discover-image.c +++ b/src/shared/discover-image.c @@ -45,6 +45,7 @@ #include "path-lookup.h" #include "path-util.h" #include "process-util.h" +#include "recurse-dir.h" #include "rm-rf.h" #include "runtime-scope.h" #include "stat-util.h" @@ -1275,11 +1276,95 @@ int image_discover( return 0; } +static int unpriv_remove_cb( + RecurseDirEvent event, + const char *path, + int dir_fd, + int inode_fd, + const struct dirent *de, + const struct statx *sx, + void *userdata) { + + int r, userns_fd = PTR_TO_FD(userdata); + + assert(sx); + + if (event == RECURSE_DIR_ENTER && + S_ISDIR(sx->stx_mode) && + uid_is_foreign(sx->stx_uid)) { + + /* This is owned by the foreign UID range, and a dir, let's remove it via mountfsd userns + * shenanigans. */ + + _cleanup_close_ int tree_fd = -EBADF; + r = mountfsd_mount_directory_fd( + /* vl= */ NULL, + inode_fd, + userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &tree_fd); + if (r < 0) + return r; + + /* Fork off child that moves into userns and does the copying */ + r = pidref_safe_fork_full( + "rm-tree", + /* stdio_fds= */ NULL, + (int[]) { userns_fd, tree_fd, }, 2, + FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG, + /* ret= */ NULL); + if (r < 0) + log_debug_errno(r, "Process that was supposed to remove subtree '%s' failed, ignoring: %m", empty_to_root(path)); + else if (r == 0) { + /* child */ + + r = namespace_enter( + /* pidns_fd= */ -EBADF, + /* mntns_fd= */ -EBADF, + /* netns_fd= */ -EBADF, + userns_fd, + /* root_fd= */ -EBADF); + if (r < 0) { + log_debug_errno(r, "Failed to join user namespace: %m"); + _exit(EXIT_FAILURE); + } + + _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC); + if (dfd < 0) { + log_error_errno(r, "Failed to reopen tree fd: %m"); + _exit(EXIT_FAILURE); + } + + r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL); + if (r < 0) { + log_error_errno(r, "Failed to empty '%s' directory in foreign UID mode: %m", empty_to_root(path)); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + /* Don't descent further into this one, and delete it immediately */ + return RECURSE_DIR_UNLINK_GRACEFUL; + } + + /* Everything else try to remove */ + if (event == RECURSE_DIR_LEAVE) + return RECURSE_DIR_UNLINK_GRACEFUL; + + return RECURSE_DIR_CONTINUE; +} + static int unprivileged_remove(Image *i) { int r; assert(i); + /* We want this to work in complex .mstack/ hierarchies, where the main directory (and maybe a .v/ + * directory below or two) might be owned by the user themselves, but some subdirs might be owned by + * the foreign UID range. We deal with this by recursively descending down the tree, and removing + * foreign-owned ranges via userns shenanigans, and the rest just like that. */ + _cleanup_close_ int userns_fd = nsresource_allocate_userns( /* vl= */ NULL, /* name= */ NULL, @@ -1287,52 +1372,16 @@ static int unprivileged_remove(Image *i) { if (userns_fd < 0) return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m"); - _cleanup_close_ int tree_fd = -EBADF; - r = mountfsd_mount_directory( - /* vl= */ NULL, + r = recurse_dir_at( + AT_FDCWD, i->path, - userns_fd, - DISSECT_IMAGE_FOREIGN_UID, - &tree_fd); + /* statx_mask= */ STATX_TYPE|STATX_UID, + /* n_depth_max= */ UINT_MAX, + RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE|RECURSE_DIR_SAME_MOUNT|RECURSE_DIR_INODE_FD|RECURSE_DIR_TOPLEVEL, + unpriv_remove_cb, + FD_TO_PTR(userns_fd)); if (r < 0) return r; - /* Fork off child that moves into userns and does the copying */ - r = pidref_safe_fork_full( - "rm-tree", - /* stdio_fds= */ NULL, - (int[]) { userns_fd, tree_fd, }, 2, - FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG, - /* ret= */ NULL); - if (r < 0) - return log_debug_errno(r, "Process that was supposed to remove tree failed: %m"); - if (r == 0) { - /* child */ - - r = namespace_enter( - /* pidns_fd= */ -EBADF, - /* mntns_fd= */ -EBADF, - /* netns_fd= */ -EBADF, - userns_fd, - /* root_fd= */ -EBADF); - if (r < 0) { - log_debug_errno(r, "Failed to join user namespace: %m"); - _exit(EXIT_FAILURE); - } - - _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC); - if (dfd < 0) { - log_error_errno(r, "Failed to reopen tree fd: %m"); - _exit(EXIT_FAILURE); - } - - r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL); - if (r < 0) { - log_error_errno(r, "Failed to empty '%s' directory in foreign UID mode: %m", i->path); - _exit(EXIT_FAILURE); - } - - _exit(EXIT_SUCCESS); - } return 0; } @@ -1374,6 +1423,9 @@ int image_remove(Image *i, RuntimeScope scope) { /* Allow deletion of read-only directories */ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL); + _fallthrough_; + + case IMAGE_MSTACK: /* If this is foreign owned, try an unprivileged remove first, but accept if that doesn't work, and do it directly either way, maybe it works */ if (i->foreign_uid_owned) (void) unprivileged_remove(i); -- 2.47.3