From 9d5fc235f4bccf0bff1ecfb4f9c90f38bfde661c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 26 Aug 2025 17:30:13 +0200 Subject: [PATCH] discover-image: support clone + rm operation also unpriv --- src/shared/discover-image.c | 176 ++++++++++++++++++++++++++++++++++-- src/shared/discover-image.h | 5 +- 2 files changed, 169 insertions(+), 12 deletions(-) diff --git a/src/shared/discover-image.c b/src/shared/discover-image.c index f903e3a6bd2..f7552f3bcd6 100644 --- a/src/shared/discover-image.c +++ b/src/shared/discover-image.c @@ -36,10 +36,13 @@ #include "log.h" #include "loop-util.h" #include "mkdir.h" +#include "namespace-util.h" +#include "nsresource.h" #include "nulstr-util.h" #include "os-util.h" #include "path-lookup.h" #include "path-util.h" +#include "process-util.h" #include "rm-rf.h" #include "runtime-scope.h" #include "stat-util.h" @@ -47,6 +50,7 @@ #include "string-util.h" #include "strv.h" #include "time-util.h" +#include "uid-classification.h" #include "vpick.h" #include "xattr-util.h" @@ -480,6 +484,7 @@ static int image_make( if (r < 0) return r; + (*ret)->foreign_uid_owned = uid_is_foreign(st->st_uid); (void) image_update_quota(*ret, fd); return 0; } @@ -505,6 +510,7 @@ static int image_make( if (r < 0) return r; + (*ret)->foreign_uid_owned = uid_is_foreign(st->st_uid); return 0; } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) { @@ -1131,6 +1137,64 @@ int image_discover( return 0; } +static int unprivileged_remove(Image *i) { + int r; + + assert(i); + + _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K); + if (userns_fd < 0) + return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m"); + + _cleanup_close_ int tree_fd = -EBADF; + r = mountfsd_mount_directory( + i->path, + userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &tree_fd); + if (r < 0) + return r; + /* Fork off child that moves into userns and does the copying */ + r = safe_fork_full( + "rm-tree", + /* stdio_fds= */ NULL, + (int[]) { userns_fd, tree_fd, }, 2, + FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG, + /* ret_pid= */ NULL); + if (r < 0) + return log_debug_errno(r, "Process that was supposed to remove tree failed: %m"); + if (r == 0) { + /* child */ + + r = namespace_enter( + /* pidns_fd= */ -EBADF, + /* mntns_fd= */ -EBADF, + /* netns_fd= */ -EBADF, + userns_fd, + /* root_fd= */ -EBADF); + if (r < 0) { + log_debug_errno(r, "Failed to join user namespace: %m"); + _exit(EXIT_FAILURE); + } + + _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC); + if (dfd < 0) { + log_error_errno(r, "Failed to reopen tree fd: %m"); + _exit(EXIT_FAILURE); + } + + r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL); + if (r < 0) { + log_error_errno(r, "Failed to empty '%s' directory in foreign UID mode: %m", i->path); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + return 0; +} + int image_remove(Image *i, RuntimeScope scope) { _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; _cleanup_strv_free_ char **settings = NULL; @@ -1172,6 +1236,11 @@ int image_remove(Image *i, RuntimeScope scope) { case IMAGE_DIRECTORY: /* Allow deletion of read-only directories */ (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL); + + /* If this is foreign owned, try an unprivileged remove first, but accept if that doesn't work, and do it directly either way, maybe it works */ + if (i->foreign_uid_owned) + (void) unprivileged_remove(i); + r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); if (r < 0) return r; @@ -1398,6 +1467,87 @@ static int get_pool_directory( return 0; } +static int unpriviled_clone(Image *i, const char *new_path) { + int r; + + assert(i); + assert(new_path); + + _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K); + if (userns_fd < 0) + return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m"); + + /* Map original image */ + _cleanup_close_ int tree_fd = -EBADF; + r = mountfsd_mount_directory( + i->path, + userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &tree_fd); + if (r < 0) + return r; + + /* Make new image */ + _cleanup_close_ int new_fd = -EBADF; + r = mountfsd_make_directory( + new_path, + /* flags= */ 0, + &new_fd); + if (r < 0) + return 0; + + /* Mount new image */ + _cleanup_close_ int target_fd = -EBADF; + r = mountfsd_mount_directory_fd( + new_fd, + userns_fd, + DISSECT_IMAGE_FOREIGN_UID, + &target_fd); + if (r < 0) + return r; + + /* Fork off child that moves into userns and does the copying */ + r = safe_fork_full( + "clone-tree", + /* stdio_fds= */ NULL, + (int[]) { userns_fd, tree_fd, target_fd }, 3, + FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG, + /* ret_pid= */ NULL); + if (r < 0) + return log_debug_errno(r, "Process that was supposed to clone tree failed: %m"); + if (r == 0) { + /* child */ + + r = namespace_enter( + /* pidns_fd= */ -EBADF, + /* mntns_fd= */ -EBADF, + /* netns_fd= */ -EBADF, + userns_fd, + /* root_fd= */ -EBADF); + if (r < 0) { + log_debug_errno(r, "Failed to join user namespace: %m"); + _exit(EXIT_FAILURE); + } + + r = copy_tree_at( + tree_fd, /* from= */ NULL, + target_fd, /* to= */ NULL, + /* override_uid= */ UID_INVALID, + /* override_gid= */ GID_INVALID, + COPY_REFLINK|COPY_HARDLINKS|COPY_MERGE_EMPTY|COPY_MERGE_APPLY_STAT|COPY_SAME_MOUNT|COPY_ALL_XATTRS, + /* denylist= */ NULL, + /* subvolumes= */ NULL); + if (r < 0) { + log_debug_errno(r, "Failed to copy clone tree: %m"); + _exit(EXIT_FAILURE); + } + + _exit(EXIT_SUCCESS); + } + + return 0; +} + int image_clone(Image *i, const char *new_name, bool read_only, RuntimeScope scope) { _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT; _cleanup_strv_free_ char **settings = NULL; @@ -1442,16 +1592,22 @@ int image_clone(Image *i, const char *new_name, bool read_only, RuntimeScope sco if (r < 0) return r; - r = btrfs_subvol_snapshot_at(AT_FDCWD, i->path, AT_FDCWD, new_path, - (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | - BTRFS_SNAPSHOT_FALLBACK_COPY | - BTRFS_SNAPSHOT_FALLBACK_DIRECTORY | - BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE | - BTRFS_SNAPSHOT_RECURSIVE | - BTRFS_SNAPSHOT_QUOTA); - if (r >= 0) - /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */ - (void) btrfs_subvol_auto_qgroup(new_path, 0, true); + if (i->foreign_uid_owned) + r = unpriviled_clone(i, new_path); + else { + r = btrfs_subvol_snapshot_at( + AT_FDCWD, i->path, + AT_FDCWD, new_path, + (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | + BTRFS_SNAPSHOT_FALLBACK_COPY | + BTRFS_SNAPSHOT_FALLBACK_DIRECTORY | + BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE | + BTRFS_SNAPSHOT_RECURSIVE | + BTRFS_SNAPSHOT_QUOTA); + if (r >= 0) + /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */ + (void) btrfs_subvol_auto_qgroup(new_path, /* subvol_id= */ 0, /* create_intermediary_qgroup= */ true); + } break; } diff --git a/src/shared/discover-image.h b/src/shared/discover-image.h index e4bc73390a2..1322a26b25f 100644 --- a/src/shared/discover-image.h +++ b/src/shared/discover-image.h @@ -39,8 +39,9 @@ typedef struct Image { char **sysext_release; char **confext_release; - bool metadata_valid:1; - bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */ + bool metadata_valid:1; /* true if the above 6 metadata fields have been read from the image */ + bool discoverable:1; /* true if we know for sure that image_find() would find the image given just the short name */ + bool foreign_uid_owned:1; /* true if this is of type IMAGE_DIRECTORY/IMAGE_SUBVOLUME and owned by foreign UID range */ void *userdata; } Image; -- 2.47.3