]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
discover-image: support clone + rm operation also unpriv 39408/head
authorLennart Poettering <lennart@poettering.net>
Tue, 26 Aug 2025 15:30:13 +0000 (17:30 +0200)
committerLennart Poettering <lennart@poettering.net>
Fri, 24 Oct 2025 15:51:00 +0000 (17:51 +0200)
src/shared/discover-image.c
src/shared/discover-image.h

index f903e3a6bd27e452a5c641a6f660bcc6adf7ed7d..f7552f3bcd649b15331fe58ab6104f7cc524d21c 100644 (file)
 #include "log.h"
 #include "loop-util.h"
 #include "mkdir.h"
+#include "namespace-util.h"
+#include "nsresource.h"
 #include "nulstr-util.h"
 #include "os-util.h"
 #include "path-lookup.h"
 #include "path-util.h"
+#include "process-util.h"
 #include "rm-rf.h"
 #include "runtime-scope.h"
 #include "stat-util.h"
@@ -47,6 +50,7 @@
 #include "string-util.h"
 #include "strv.h"
 #include "time-util.h"
+#include "uid-classification.h"
 #include "vpick.h"
 #include "xattr-util.h"
 
@@ -480,6 +484,7 @@ static int image_make(
                                 if (r < 0)
                                         return r;
 
+                                (*ret)->foreign_uid_owned = uid_is_foreign(st->st_uid);
                                 (void) image_update_quota(*ret, fd);
                                 return 0;
                         }
@@ -505,6 +510,7 @@ static int image_make(
                 if (r < 0)
                         return r;
 
+                (*ret)->foreign_uid_owned = uid_is_foreign(st->st_uid);
                 return 0;
 
         } else if (S_ISREG(st->st_mode) && endswith(filename, ".raw")) {
@@ -1131,6 +1137,64 @@ int image_discover(
         return 0;
 }
 
+static int unprivileged_remove(Image *i) {
+        int r;
+
+        assert(i);
+
+        _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K);
+        if (userns_fd < 0)
+                return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m");
+
+        _cleanup_close_ int tree_fd = -EBADF;
+        r = mountfsd_mount_directory(
+                        i->path,
+                        userns_fd,
+                        DISSECT_IMAGE_FOREIGN_UID,
+                        &tree_fd);
+        if (r < 0)
+                return r;
+        /* Fork off child that moves into userns and does the copying */
+        r = safe_fork_full(
+                        "rm-tree",
+                        /* stdio_fds= */ NULL,
+                        (int[]) { userns_fd, tree_fd, }, 2,
+                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
+                        /* ret_pid= */ NULL);
+        if (r < 0)
+                return log_debug_errno(r, "Process that was supposed to remove tree failed: %m");
+        if (r == 0) {
+                /* child */
+
+                r = namespace_enter(
+                                /* pidns_fd= */ -EBADF,
+                                /* mntns_fd= */ -EBADF,
+                                /* netns_fd= */ -EBADF,
+                                userns_fd,
+                                /* root_fd= */ -EBADF);
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to join user namespace: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _cleanup_close_ int dfd = fd_reopen(tree_fd, O_DIRECTORY|O_CLOEXEC);
+                if (dfd < 0) {
+                        log_error_errno(r, "Failed to reopen tree fd: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = rm_rf_children(dfd, REMOVE_PHYSICAL|REMOVE_SUBVOLUME|REMOVE_CHMOD, /* root_dev= */ NULL);
+                if (r < 0) {
+                        log_error_errno(r, "Failed to empty '%s' directory in foreign UID mode: %m", i->path);
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        return 0;
+}
+
 int image_remove(Image *i, RuntimeScope scope) {
         _cleanup_(release_lock_file) LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
         _cleanup_strv_free_ char **settings = NULL;
@@ -1172,6 +1236,11 @@ int image_remove(Image *i, RuntimeScope scope) {
         case IMAGE_DIRECTORY:
                 /* Allow deletion of read-only directories */
                 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
+
+                /* If this is foreign owned, try an unprivileged remove first, but accept if that doesn't work, and do it directly either way, maybe it works */
+                if (i->foreign_uid_owned)
+                        (void) unprivileged_remove(i);
+
                 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
                 if (r < 0)
                         return r;
@@ -1398,6 +1467,87 @@ static int get_pool_directory(
         return 0;
 }
 
+static int unpriviled_clone(Image *i, const char *new_path) {
+        int r;
+
+        assert(i);
+        assert(new_path);
+
+        _cleanup_close_ int userns_fd = nsresource_allocate_userns(/* name= */ NULL, /* size= */ NSRESOURCE_UIDS_64K);
+        if (userns_fd < 0)
+                return log_debug_errno(userns_fd, "Failed to allocate transient user namespace: %m");
+
+        /* Map original image */
+        _cleanup_close_ int tree_fd = -EBADF;
+        r = mountfsd_mount_directory(
+                        i->path,
+                        userns_fd,
+                        DISSECT_IMAGE_FOREIGN_UID,
+                        &tree_fd);
+        if (r < 0)
+                return r;
+
+        /* Make new image */
+        _cleanup_close_ int new_fd = -EBADF;
+        r = mountfsd_make_directory(
+                        new_path,
+                        /* flags= */ 0,
+                        &new_fd);
+        if (r < 0)
+                return 0;
+
+        /* Mount new image */
+        _cleanup_close_ int target_fd = -EBADF;
+        r = mountfsd_mount_directory_fd(
+                        new_fd,
+                        userns_fd,
+                        DISSECT_IMAGE_FOREIGN_UID,
+                        &target_fd);
+        if (r < 0)
+                return r;
+
+        /* Fork off child that moves into userns and does the copying */
+        r = safe_fork_full(
+                        "clone-tree",
+                        /* stdio_fds= */ NULL,
+                        (int[]) { userns_fd, tree_fd, target_fd }, 3,
+                        FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGTERM|FORK_WAIT|FORK_REOPEN_LOG,
+                        /* ret_pid= */ NULL);
+        if (r < 0)
+                return log_debug_errno(r, "Process that was supposed to clone tree failed: %m");
+        if (r == 0) {
+                /* child */
+
+                r = namespace_enter(
+                                /* pidns_fd= */ -EBADF,
+                                /* mntns_fd= */ -EBADF,
+                                /* netns_fd= */ -EBADF,
+                                userns_fd,
+                                /* root_fd= */ -EBADF);
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to join user namespace: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = copy_tree_at(
+                                tree_fd, /* from= */ NULL,
+                                target_fd, /* to= */ NULL,
+                                /* override_uid= */ UID_INVALID,
+                                /* override_gid= */ GID_INVALID,
+                                COPY_REFLINK|COPY_HARDLINKS|COPY_MERGE_EMPTY|COPY_MERGE_APPLY_STAT|COPY_SAME_MOUNT|COPY_ALL_XATTRS,
+                                /* denylist= */ NULL,
+                                /* subvolumes= */ NULL);
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to copy clone tree: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        return 0;
+}
+
 int image_clone(Image *i, const char *new_name, bool read_only, RuntimeScope scope) {
         _cleanup_(release_lock_file) LockFile name_lock = LOCK_FILE_INIT;
         _cleanup_strv_free_ char **settings = NULL;
@@ -1442,16 +1592,22 @@ int image_clone(Image *i, const char *new_name, bool read_only, RuntimeScope sco
                 if (r < 0)
                         return r;
 
-                r = btrfs_subvol_snapshot_at(AT_FDCWD, i->path, AT_FDCWD, new_path,
-                                             (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
-                                             BTRFS_SNAPSHOT_FALLBACK_COPY |
-                                             BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
-                                             BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
-                                             BTRFS_SNAPSHOT_RECURSIVE |
-                                             BTRFS_SNAPSHOT_QUOTA);
-                if (r >= 0)
-                        /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
-                        (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
+                if (i->foreign_uid_owned)
+                        r = unpriviled_clone(i, new_path);
+                else {
+                        r = btrfs_subvol_snapshot_at(
+                                        AT_FDCWD, i->path,
+                                        AT_FDCWD, new_path,
+                                        (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+                                        BTRFS_SNAPSHOT_FALLBACK_COPY |
+                                        BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+                                        BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+                                        BTRFS_SNAPSHOT_RECURSIVE |
+                                        BTRFS_SNAPSHOT_QUOTA);
+                        if (r >= 0)
+                                /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
+                                (void) btrfs_subvol_auto_qgroup(new_path, /* subvol_id= */ 0, /* create_intermediary_qgroup= */ true);
+                }
 
                 break;
         }
index e4bc73390a2d036a0ba680b78520ea20bc596da1..1322a26b25f87e43b1cb953d5b10a616ef6909ea 100644 (file)
@@ -39,8 +39,9 @@ typedef struct Image {
         char **sysext_release;
         char **confext_release;
 
-        bool metadata_valid:1;
-        bool discoverable:1;  /* true if we know for sure that image_find() would find the image given just the short name */
+        bool metadata_valid:1;     /* true if the above 6 metadata fields have been read from the image */
+        bool discoverable:1;       /* true if we know for sure that image_find() would find the image given just the short name */
+        bool foreign_uid_owned:1;  /* true if this is of type IMAGE_DIRECTORY/IMAGE_SUBVOLUME and owned by foreign UID range */
 
         void *userdata;
 } Image;