]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
dissect-image: optionally allow mounting via new kernel mount API in two steps
authorLennart Poettering <lennart@poettering.net>
Tue, 14 Mar 2023 10:09:46 +0000 (11:09 +0100)
committerLuca Boccassi <bluca@debian.org>
Mon, 2 Oct 2023 13:02:32 +0000 (14:02 +0100)
This adds support for the new fsmount() logic of the kernel: we'll first
create an unattached fsmount fd, and then in a second step attach this
to some real file system inode – as opposed to attaching file system
directly. The benefit of this is that we can pass the open fsmount fds
over some sockets if need be, to isolate the mounting code from the
attaching code.

src/core/namespace.c
src/dissect/dissect.c
src/nspawn/nspawn.c
src/portable/portable.c
src/shared/dissect-image.c
src/shared/dissect-image.h
src/sysext/sysext.c
src/test/test-loop-block.c

index 206453f30f9cf45c3f276a09fe14f04e1a4198ac..dc3fa8d4a279b7371ce142ef850ffb055a49fbfc 100644 (file)
@@ -2494,7 +2494,13 @@ int setup_namespace(
 
         if (root_image) {
                 /* A root image is specified, mount it to the right place */
-                r = dissected_image_mount(dissected_image, root, UID_INVALID, UID_INVALID, dissect_image_flags);
+                r = dissected_image_mount(
+                                dissected_image,
+                                root,
+                                /* uid_shift= */ UID_INVALID,
+                                /* uid_range= */ UID_INVALID,
+                                /* userns_fd= */ -EBADF,
+                                dissect_image_flags);
                 if (r < 0) {
                         log_debug_errno(r, "Failed to mount root image: %m");
                         goto finish;
index 291cb66375dd23bbed341ce62da37e5be2ec3774..0b4c5ba22fe5e6eaab7b791c18c5d8936a513ed1 100644 (file)
@@ -1063,7 +1063,13 @@ static int action_mount(DissectedImage *m, LoopDevice *d) {
         assert(d);
         assert(arg_action == ACTION_MOUNT);
 
-        r = dissected_image_mount_and_warn(m, arg_path, UID_INVALID, UID_INVALID, arg_flags);
+        r = dissected_image_mount_and_warn(
+                        m,
+                        arg_path,
+                        /* uid_shift= */ UID_INVALID,
+                        /* uid_range= */ UID_INVALID,
+                        /* userns_fd= */ -EBADF,
+                        arg_flags);
         if (r < 0)
                 return r;
 
@@ -1298,7 +1304,13 @@ static int action_list_or_mtree_or_copy(DissectedImage *m, LoopDevice *d) {
 
                 created_dir = TAKE_PTR(temp);
 
-                r = dissected_image_mount_and_warn(m, created_dir, UID_INVALID, UID_INVALID, arg_flags);
+                r = dissected_image_mount_and_warn(
+                                m,
+                                created_dir,
+                                /* uid_shift= */ UID_INVALID,
+                                /* uid_range= */ UID_INVALID,
+                                /* userns_fd= */ -EBADF,
+                                arg_flags);
                 if (r < 0)
                         return r;
 
@@ -1549,7 +1561,13 @@ static int action_with(DissectedImage *m, LoopDevice *d) {
 
         created_dir = TAKE_PTR(temp);
 
-        r = dissected_image_mount_and_warn(m, created_dir, UID_INVALID, UID_INVALID, arg_flags);
+        r = dissected_image_mount_and_warn(
+                        m,
+                        created_dir,
+                        /* uid_shift= */ UID_INVALID,
+                        /* uid_range= */ UID_INVALID,
+                        /* userns_fd= */ -EBADF,
+                        arg_flags);
         if (r < 0)
                 return r;
 
index 3674f6b023b64da931968e1b8490640955015af0..05d04dce1328ae6a2320e7ff2e8d7cdd473d5c1b 100644 (file)
@@ -3681,6 +3681,7 @@ static int outer_child(
                                 directory,
                                 arg_uid_shift,
                                 arg_uid_range,
+                                /* userns_fd= */ -EBADF,
                                 DISSECT_IMAGE_MOUNT_ROOT_ONLY|
                                 DISSECT_IMAGE_DISCARD_ON_LOOP|
                                 DISSECT_IMAGE_USR_NO_ROOT|
@@ -3845,6 +3846,7 @@ static int outer_child(
                                 directory,
                                 arg_uid_shift,
                                 arg_uid_range,
+                                /* userns_fd= */ -EBADF,
                                 DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY|
                                 DISSECT_IMAGE_DISCARD_ON_LOOP|
                                 DISSECT_IMAGE_USR_NO_ROOT|
index 52f38dc83ecf97da83bd30d3dbea753167be8752..a672bff3c5479e119d573adac52aa770eca87aa8 100644 (file)
@@ -424,7 +424,13 @@ static int portable_extract_by_path(
                         else
                                 flags |= DISSECT_IMAGE_VALIDATE_OS;
 
-                        r = dissected_image_mount(m, tmpdir, UID_INVALID, UID_INVALID, flags);
+                        r = dissected_image_mount(
+                                        m,
+                                        tmpdir,
+                                        /* uid_shift= */ UID_INVALID,
+                                        /* uid_range= */ UID_INVALID,
+                                        /* userns_fd= */ -EBADF,
+                                        flags);
                         if (r < 0) {
                                 log_debug_errno(r, "Failed to mount dissected image: %m");
                                 goto child_finish;
index be4a3d1738f6c4f8b6026435911e62bc3b62d4c7..2036b5adbaaddb2d73d074a985ef896827cb3aa3 100644 (file)
@@ -51,6 +51,7 @@
 #include "import-util.h"
 #include "io-util.h"
 #include "missing_mount.h"
+#include "missing_syscall.h"
 #include "mkdir-label.h"
 #include "mount-util.h"
 #include "mountpoint-util.h"
@@ -515,6 +516,7 @@ static void dissected_partition_done(DissectedPartition *p) {
         free(p->decrypted_node);
         free(p->mount_options);
         safe_close(p->mount_node_fd);
+        safe_close(p->fsmount_fd);
 
         *p = DISSECTED_PARTITION_NULL;
 }
@@ -836,6 +838,7 @@ static int dissect_image(
                                 .mount_node_fd = TAKE_FD(mount_node_fd),
                                 .offset = 0,
                                 .size = UINT64_MAX,
+                                .fsmount_fd = -EBADF,
                         };
 
                         return 0;
@@ -1252,6 +1255,7 @@ static int dissect_image(
                                         .offset = (uint64_t) start * 512,
                                         .size = (uint64_t) size * 512,
                                         .gpt_flags = pflags,
+                                        .fsmount_fd = -EBADF,
                                 };
                         }
 
@@ -1326,6 +1330,7 @@ static int dissect_image(
                                         .mount_node_fd = TAKE_FD(mount_node_fd),
                                         .offset = (uint64_t) start * 512,
                                         .size = (uint64_t) size * 512,
+                                        .fsmount_fd = -EBADF,
                                 };
 
                                 break;
@@ -1416,6 +1421,7 @@ static int dissect_image(
                                         .mount_node_fd = TAKE_FD(mount_node_fd),
                                         .offset = UINT64_MAX,
                                         .size = UINT64_MAX,
+                                        .fsmount_fd = -EBADF,
                                 };
                         }
                 }
@@ -1739,11 +1745,15 @@ static int run_fsck(int node_fd, const char *fstype) {
         return 0;
 }
 
-static int fs_grow(const char *node_path, const char *mount_path) {
-        _cleanup_close_ int mount_fd = -EBADF, node_fd = -EBADF;
+static int fs_grow(const char *node_path, int mount_fd, const char *mount_path) {
+        _cleanup_close_ int _mount_fd = -EBADF, node_fd = -EBADF;
         uint64_t size, newsize;
+        const char *id;
         int r;
 
+        assert(node_path);
+        assert(mount_fd >= 0 || mount_path);
+
         node_fd = open(node_path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
         if (node_fd < 0)
                 return log_debug_errno(errno, "Failed to open node device %s: %m", node_path);
@@ -1751,22 +1761,34 @@ static int fs_grow(const char *node_path, const char *mount_path) {
         if (ioctl(node_fd, BLKGETSIZE64, &size) != 0)
                 return log_debug_errno(errno, "Failed to get block device size of %s: %m", node_path);
 
-        mount_fd = open(mount_path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
-        if (mount_fd < 0)
-                return log_debug_errno(errno, "Failed to open mountd file system %s: %m", mount_path);
+        if (mount_fd < 0) {
+                assert(mount_path);
+
+                _mount_fd = open(mount_path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+                if (_mount_fd < 0)
+                        return log_debug_errno(errno, "Failed to open mounted file system %s: %m", mount_path);
+
+                mount_fd = _mount_fd;
+        } else {
+                mount_fd = fd_reopen_condition(mount_fd, O_RDONLY|O_DIRECTORY|O_CLOEXEC, O_RDONLY|O_DIRECTORY|O_CLOEXEC, &_mount_fd);
+                if (mount_fd < 0)
+                        return log_debug_errno(errno, "Failed to reopen mount node: %m");
+        }
+
+        id = mount_path ?: node_path;
 
-        log_debug("Resizing \"%s\" to %"PRIu64" bytes...", mount_path, size);
+        log_debug("Resizing \"%s\" to %"PRIu64" bytes...", id, size);
         r = resize_fs(mount_fd, size, &newsize);
         if (r < 0)
-                return log_debug_errno(r, "Failed to resize \"%s\" to %"PRIu64" bytes: %m", mount_path, size);
+                return log_debug_errno(r, "Failed to resize \"%s\" to %"PRIu64" bytes: %m", id, size);
 
         if (newsize == size)
                 log_debug("Successfully resized \"%s\" to %s bytes.",
-                          mount_path, FORMAT_BYTES(newsize));
+                          id, FORMAT_BYTES(newsize));
         else {
                 assert(newsize < size);
                 log_debug("Successfully resized \"%s\" to %s bytes (%"PRIu64" bytes lost due to blocksize).",
-                          mount_path, FORMAT_BYTES(newsize), size - newsize);
+                          id, FORMAT_BYTES(newsize), size - newsize);
         }
 
         return 0;
@@ -1863,6 +1885,14 @@ int partition_pick_mount_options(
         return 0;
 }
 
+static bool need_user_mapping(uid_t uid_shift, uid_t uid_range) {
+
+        if (!uid_is_valid(uid_shift))
+                return false;
+
+        return uid_shift != 0 || uid_range != UINT32_MAX;
+}
+
 static int mount_partition(
                 PartitionDesignator d,
                 DissectedPartition *m,
@@ -1870,81 +1900,92 @@ static int mount_partition(
                 const char *directory,
                 uid_t uid_shift,
                 uid_t uid_range,
+                int userns_fd,
                 DissectImageFlags flags) {
 
         _cleanup_free_ char *chased = NULL, *options = NULL;
-        bool rw, discard, remap_uid_gid = false;
-        const char *p, *node, *fstype;
+        const char *p = NULL, *node, *fstype = NULL;
+        bool rw, discard, grow;
         unsigned long ms_flags;
         int r;
 
         assert(m);
-        assert(where);
 
-        if (m->mount_node_fd < 0)
+        if (!m->found)
                 return 0;
 
-        /* Use decrypted node and matching fstype if available, otherwise use the original device */
-        node = FORMAT_PROC_FD_PATH(m->mount_node_fd);
-        fstype = dissected_partition_fstype(m);
+        /* Check the various combinations when we can't do anything anymore */
+        if (m->fsmount_fd < 0 && m->mount_node_fd < 0)
+                return 0;
+        if (m->fsmount_fd >= 0 && !where)
+                return 0;
+        if (!where && m->mount_node_fd < 0)
+                return 0;
 
-        if (!fstype)
-                return -EAFNOSUPPORT;
+        if (m->fsmount_fd < 0) {
+                fstype = dissected_partition_fstype(m);
+                if (!fstype)
+                        return -EAFNOSUPPORT;
 
-        /* We are looking at an encrypted partition? This either means stacked encryption, or the caller
-         * didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this
-         * case. */
-        if (streq(fstype, "crypto_LUKS"))
-                return -EUNATCH;
+                /* We are looking at an encrypted partition? This either means stacked encryption, or the
+                 * caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error
+                 * for this case. */
+                if (streq(fstype, "crypto_LUKS"))
+                        return -EUNATCH;
 
-        r = dissect_fstype_ok(fstype);
-        if (r < 0)
-                return r;
-        if (!r)
-                return -EIDRM; /* Recognizable error */
+                r = dissect_fstype_ok(fstype);
+                if (r < 0)
+                        return r;
+                if (!r)
+                        return -EIDRM; /* Recognizable error */
+        }
 
+        node = m->mount_node_fd < 0 ? NULL : FORMAT_PROC_FD_PATH(m->mount_node_fd);
         rw = m->rw && !(flags & DISSECT_IMAGE_MOUNT_READ_ONLY);
 
         discard = ((flags & DISSECT_IMAGE_DISCARD) ||
-                   ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0));
+                   ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && (m->node && is_loop_device(m->node) > 0)));
 
-        if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) {
+        grow = rw && m->growfs && FLAGS_SET(flags, DISSECT_IMAGE_GROWFS);
+
+        if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw && m->mount_node_fd >= 0 && m->fsmount_fd < 0) {
                 r = run_fsck(m->mount_node_fd, fstype);
                 if (r < 0)
                         return r;
         }
 
-        if (directory) {
-                /* Automatically create missing mount points inside the image, if necessary. */
-                r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755, NULL);
-                if (r < 0 && r != -EROFS)
-                        return r;
-
-                r = chase(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
-                if (r < 0)
-                        return r;
+        if (where) {
+                if (directory) {
+                        /* Automatically create missing mount points inside the image, if necessary. */
+                        r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755, NULL);
+                        if (r < 0 && r != -EROFS)
+                                return r;
 
-                p = chased;
-        } else {
-                /* Create top-level mount if missing – but only if this is asked for. This won't modify the
-                 * image (as the branch above does) but the host hierarchy, and the created directory might
-                 * survive our mount in the host hierarchy hence. */
-                if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
-                        r = mkdir_p(where, 0755);
+                        r = chase(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
                         if (r < 0)
                                 return r;
-                }
 
-                p = where;
-        }
+                        p = chased;
+                } else {
+                        /* Create top-level mount if missing – but only if this is asked for. This won't modify the
+                         * image (as the branch above does) but the host hierarchy, and the created directory might
+                         * survive our mount in the host hierarchy hence. */
+                        if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
+                                r = mkdir_p(where, 0755);
+                                if (r < 0)
+                                        return r;
+                        }
 
-        r = partition_pick_mount_options(d, dissected_partition_fstype(m), rw, discard, &options, &ms_flags);
-        if (r < 0)
-                return r;
+                        p = where;
+                }
+        }
 
-        if (uid_is_valid(uid_shift) && uid_shift != 0) {
+        if (m->fsmount_fd < 0) {
+                r = partition_pick_mount_options(d, fstype, rw, discard, &options, &ms_flags);
+                if (r < 0)
+                        return r;
 
-                if (fstype_can_uid_gid(fstype)) {
+                if (need_user_mapping(uid_shift, uid_range) && fstype_can_uid_gid(fstype)) {
                         _cleanup_free_ char *uid_option = NULL;
 
                         if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
@@ -1952,31 +1993,56 @@ static int mount_partition(
 
                         if (!strextend_with_separator(&options, ",", uid_option))
                                 return -ENOMEM;
-                } else if (FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_IDMAPPED))
-                        remap_uid_gid = true;
+
+                        userns_fd = -EBADF; /* Not needed */
+                }
+
+                if (!isempty(m->mount_options))
+                        if (!strextend_with_separator(&options, ",", m->mount_options))
+                                return -ENOMEM;
         }
 
-        if (!isempty(m->mount_options))
-                if (!strextend_with_separator(&options, ",", m->mount_options))
-                        return -ENOMEM;
+        if (p) {
+                if (m->fsmount_fd >= 0) {
+                        /* Case #1: Attach existing fsmount fd to the file system */
 
-        r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, ms_flags, options);
-        if (r < 0)
-                return r;
+                        if (move_mount(m->fsmount_fd, "", -EBADF, p, MOVE_MOUNT_F_EMPTY_PATH) < 0)
+                                return -errno;
+
+                } else {
+                        assert(node);
 
-        if (rw && m->growfs && FLAGS_SET(flags, DISSECT_IMAGE_GROWFS))
-                (void) fs_grow(node, p);
+                        /* Case #2: Mount directly into place */
+                        r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, ms_flags, options);
+                        if (r < 0)
+                                return r;
 
-        if (remap_uid_gid) {
-                r = remount_idmap(p, uid_shift, uid_range, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
-                if (r < 0)
-                        return r;
+                        if (grow)
+                                (void) fs_grow(node, -EBADF, p);
+
+                        if (userns_fd >= 0) {
+                                r = remount_idmap_fd(p, userns_fd);
+                                if (r < 0)
+                                        return r;
+                        }
+                }
+        } else {
+                assert(node);
+
+                /* Case #3: Create fsmount fd */
+
+                m->fsmount_fd = make_fsmount(LOG_DEBUG, node, fstype, ms_flags, options, userns_fd);
+                if (m->fsmount_fd < 0)
+                        return m->fsmount_fd;
+
+                if (grow)
+                        (void) fs_grow(node, m->fsmount_fd, NULL);
         }
 
         return 1;
 }
 
-static int mount_root_tmpfs(const char *where, uid_t uid_shift, DissectImageFlags flags) {
+static int mount_root_tmpfs(const char *where, uid_t uid_shift, uid_t uid_range, DissectImageFlags flags) {
         _cleanup_free_ char *options = NULL;
         int r;
 
@@ -1990,7 +2056,7 @@ static int mount_root_tmpfs(const char *where, uid_t uid_shift, DissectImageFlag
                         return r;
         }
 
-        if (uid_is_valid(uid_shift)) {
+        if (need_user_mapping(uid_shift, uid_range)) {
                 if (asprintf(&options, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
                         return -ENOMEM;
         }
@@ -2028,14 +2094,25 @@ int dissected_image_mount(
                 const char *where,
                 uid_t uid_shift,
                 uid_t uid_range,
+                int userns_fd,
                 DissectImageFlags flags) {
 
+        _cleanup_close_ int my_userns_fd = -EBADF;
         int r;
 
         assert(m);
-        assert(where);
 
-        /* Returns:
+        /* If 'where' is NULL then we'll use the new mount API to create fsmount() fds for the mounts and
+         * store them in DissectedPartition.fsmount_fd.
+         *
+         * If 'where' is not NULL then we'll either mount the partitions to the right places ourselves,
+         * or use DissectedPartition.fsmount_fd and bind it to the right places.
+         *
+         * This allows splitting the setting up up the superblocks and the binding to file systems paths into
+         * two distinct and differently privileged components: one that gets the fsmount fds, and the other
+         * that then applies them.
+         *
+         * Returns:
          *
          *  -ENXIO        → No root partition found
          *  -EMEDIUMTYPE  → DISSECT_IMAGE_VALIDATE_OS set but no os-release/extension-release file found
@@ -2046,82 +2123,103 @@ int dissected_image_mount(
          *  -EIDRM        → File system is not among allowlisted "common" file systems
          */
 
+        if (!where && (flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0)
+                return -EOPNOTSUPP; /* for now, not supported */
+
         if (!(m->partitions[PARTITION_ROOT].found ||
               (m->partitions[PARTITION_USR].found && FLAGS_SET(flags, DISSECT_IMAGE_USR_NO_ROOT))))
                 return -ENXIO; /* Require a root fs or at least a /usr/ fs (the latter is subject to a flag of its own) */
 
+        if (userns_fd < 0 && need_user_mapping(uid_shift, uid_range) && FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_IDMAPPED)) {
+
+                my_userns_fd = make_userns(uid_shift, uid_range, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
+                if (my_userns_fd < 0)
+                        return my_userns_fd;
+
+                userns_fd = my_userns_fd;
+        }
+
         if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
 
                 /* First mount the root fs. If there's none we use a tmpfs. */
-                if (m->partitions[PARTITION_ROOT].found)
-                        r = mount_partition(PARTITION_ROOT, m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, flags);
-                else
-                        r = mount_root_tmpfs(where, uid_shift, flags);
-                if (r < 0)
-                        return r;
+                if (m->partitions[PARTITION_ROOT].found) {
+                        r = mount_partition(PARTITION_ROOT, m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, userns_fd, flags);
+                        if (r < 0)
+                                return r;
+
+                } else if (where) {
+                        r = mount_root_tmpfs(where, uid_shift, uid_range, flags);
+                        if (r < 0)
+                                return r;
+                }
 
                 /* For us mounting root always means mounting /usr as well */
-                r = mount_partition(PARTITION_USR, m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, flags);
+                r = mount_partition(PARTITION_USR, m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, userns_fd, flags);
                 if (r < 0)
                         return r;
+        }
 
-                if ((flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0) {
-                        /* If either one of the validation flags are set, ensure that the image qualifies
-                         * as one or the other (or both). */
-                        bool ok = false;
+        if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0 &&
+            (flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0) {
+                /* If either one of the validation flags are set, ensure that the image qualifies as
+                 * one or the other (or both). */
+                bool ok = false;
 
-                        if (FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS)) {
-                                r = path_is_os_tree(where);
+                assert(where);
+
+                if (FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS)) {
+                        r = path_is_os_tree(where);
+                        if (r < 0)
+                                return r;
+                        if (r > 0)
+                                ok = true;
+                }
+                if (!ok && FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS_EXT)) {
+                        r = extension_has_forbidden_content(where);
+                        if (r < 0)
+                                return r;
+                        if (r == 0) {
+                                r = path_is_extension_tree(IMAGE_SYSEXT, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
+                                if (r == 0)
+                                        r = path_is_extension_tree(IMAGE_CONFEXT, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
                                 if (r < 0)
                                         return r;
                                 if (r > 0)
                                         ok = true;
                         }
-                        if (!ok && FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS_EXT)) {
-                                r = extension_has_forbidden_content(where);
-                                if (r < 0)
-                                        return r;
-                                if (r == 0) {
-                                        r = path_is_extension_tree(IMAGE_SYSEXT, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
-                                        if (r == 0)
-                                                r = path_is_extension_tree(IMAGE_CONFEXT, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
-                                        if (r < 0)
-                                                return r;
-                                        if (r > 0)
-                                                ok = true;
-                                }
-                        }
-
-                        if (!ok)
-                                return -ENOMEDIUM;
                 }
+
+                if (!ok)
+                        return -ENOMEDIUM;
         }
 
         if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
                 return 0;
 
-        r = mount_partition(PARTITION_HOME, m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, flags);
+        r = mount_partition(PARTITION_HOME, m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, userns_fd, flags);
         if (r < 0)
                 return r;
 
-        r = mount_partition(PARTITION_SRV, m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, flags);
+        r = mount_partition(PARTITION_SRV, m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, userns_fd, flags);
         if (r < 0)
                 return r;
 
-        r = mount_partition(PARTITION_VAR, m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, flags);
+        r = mount_partition(PARTITION_VAR, m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, userns_fd, flags);
         if (r < 0)
                 return r;
 
-        r = mount_partition(PARTITION_TMP, m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, flags);
+        r = mount_partition(PARTITION_TMP, m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, userns_fd, flags);
         if (r < 0)
                 return r;
 
-        int slash_boot_is_available;
-        r = slash_boot_is_available = mount_point_is_available(where, "/boot", /* missing_ok = */ true);
-        if (r < 0)
-                return r;
-        if (r > 0) {
-                r = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
+        int slash_boot_is_available = 0;
+        if (where) {
+                r = slash_boot_is_available = mount_point_is_available(where, "/boot", /* missing_ok = */ true);
+                if (r < 0)
+                        return r;
+        }
+        if (!where || slash_boot_is_available) {
+                r = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, userns_fd, flags);
                 if (r < 0)
                         return r;
                 slash_boot_is_available = !r;
@@ -2130,31 +2228,32 @@ int dissected_image_mount(
         if (m->partitions[PARTITION_ESP].found) {
                 const char *esp_path = NULL;
 
-                /* Mount the ESP to /boot/ if it exists and is empty and we didn't already mount the XBOOTLDR
-                 * partition into it. Otherwise, use /efi instead, but only if it exists and is empty. */
+                if (where) {
+                        /* Mount the ESP to /boot/ if it exists and is empty and we didn't already mount the
+                         * XBOOTLDR partition into it. Otherwise, use /efi instead, but only if it exists
+                         * and is empty. */
 
-                if (slash_boot_is_available) {
-                        r = mount_point_is_available(where, "/boot", /* missing_ok = */ false);
-                        if (r < 0)
-                                return r;
-                        if (r > 0)
-                                esp_path = "/boot";
-                }
+                        if (slash_boot_is_available) {
+                                r = mount_point_is_available(where, "/boot", /* missing_ok = */ false);
+                                if (r < 0)
+                                        return r;
+                                if (r > 0)
+                                        esp_path = "/boot";
+                        }
 
-                if (!esp_path) {
-                        r = mount_point_is_available(where, "/efi", /* missing_ok = */ true);
-                        if (r < 0)
-                                return r;
-                        if (r > 0)
-                                esp_path = "/efi";
+                        if (!esp_path) {
+                                r = mount_point_is_available(where, "/efi", /* missing_ok = */ true);
+                                if (r < 0)
+                                        return r;
+                                if (r > 0)
+                                        esp_path = "/efi";
+                        }
                 }
 
-                if (esp_path) {
-                        /* OK, let's mount the ESP now (possibly creating the dir if missing) */
-                        r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, esp_path, uid_shift, uid_range, flags);
-                        if (r < 0)
-                                return r;
-                }
+                /* OK, let's mount the ESP now (possibly creating the dir if missing) */
+                r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, esp_path, uid_shift, uid_range, userns_fd, flags);
+                if (r < 0)
+                        return r;
         }
 
         return 0;
@@ -2165,14 +2264,14 @@ int dissected_image_mount_and_warn(
                 const char *where,
                 uid_t uid_shift,
                 uid_t uid_range,
+                int userns_fd,
                 DissectImageFlags flags) {
 
         int r;
 
         assert(m);
-        assert(where);
 
-        r = dissected_image_mount(m, where, uid_shift, uid_range, flags);
+        r = dissected_image_mount(m, where, uid_shift, uid_range, userns_fd, flags);
         if (r == -ENXIO)
                 return log_error_errno(r, "Not root file system found in image.");
         if (r == -EMEDIUMTYPE)
@@ -3315,8 +3414,9 @@ int dissected_image_acquire_metadata(DissectedImage *m, DissectImageFlags extra_
                 r = dissected_image_mount(
                                 m,
                                 t,
-                                UID_INVALID,
-                                UID_INVALID,
+                                /* uid_shift= */ UID_INVALID,
+                                /* uid_range= */ UID_INVALID,
+                                /* userns_fd= */ -EBADF,
                                 extra_flags |
                                 DISSECT_IMAGE_READ_ONLY |
                                 DISSECT_IMAGE_MOUNT_ROOT_ONLY |
@@ -3755,6 +3855,7 @@ int mount_image_privately_interactively(
                         "/run/systemd/mount-rootfs",
                         /* uid_shift= */ UID_INVALID,
                         /* uid_range= */ UID_INVALID,
+                        /* userns_fd= */ -EBADF,
                         flags);
         if (r < 0)
                 return r;
@@ -3882,7 +3983,13 @@ int verity_dissect_and_mount(
         if (r < 0)
                 return log_debug_errno(r, "Failed to umount under destination directory %s: %m", dest);
 
-        r = dissected_image_mount(dissected_image, dest, UID_INVALID, UID_INVALID, dissect_image_flags);
+        r = dissected_image_mount(
+                        dissected_image,
+                        dest,
+                        /* uid_shift= */ UID_INVALID,
+                        /* uid_range= */ UID_INVALID,
+                        /* userns_fd= */ -EBADF,
+                        dissect_image_flags);
         if (r < 0)
                 return log_debug_errno(r, "Failed to mount image: %m");
 
index 508085fd1a75ad42daa92f060eb4a02562efde33..7a8d5ff81dd9ef19eb910425c50cfad6881259eb 100644 (file)
@@ -36,6 +36,7 @@ struct DissectedPartition {
         uint64_t size;
         uint64_t offset;
         uint64_t gpt_flags;
+        int fsmount_fd;
 };
 
 #define DISSECTED_PARTITION_NULL                                        \
@@ -43,6 +44,7 @@ struct DissectedPartition {
                 .partno = -1,                                           \
                 .architecture = _ARCHITECTURE_INVALID,                  \
                 .mount_node_fd = -EBADF,                                \
+                .fsmount_fd = -EBADF,                                   \
         })
 #define TAKE_PARTITION(p)                                       \
         ({                                                      \
@@ -160,8 +162,8 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
 
 int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags);
 int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags);
-int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, uid_t uid_range, DissectImageFlags flags);
-int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, uid_t uid_range, DissectImageFlags flags);
+int dissected_image_mount(DissectedImage *m, const char *dest, uid_t uid_shift, uid_t uid_range, int userns_fd, DissectImageFlags flags);
+int dissected_image_mount_and_warn(DissectedImage *m, const char *where, uid_t uid_shift, uid_t uid_range, int userns_fd, DissectImageFlags flags);
 
 int dissected_image_acquire_metadata(DissectedImage *m, DissectImageFlags extra_flags);
 
index 5099bdcb4ea6e25fc274a69d04eb888db9d3dd2f..33bfb3dc80a5b7c70ba772c812e5bd9e7f399da8 100644 (file)
@@ -712,8 +712,9 @@ static int merge_subprocess(Hashmap *images, const char *workspace) {
                         r = dissected_image_mount_and_warn(
                                         m,
                                         p,
-                                        UID_INVALID,
-                                        UID_INVALID,
+                                        /* uid_shift= */ UID_INVALID,
+                                        /* uid_range= */ UID_INVALID,
+                                        /* userns_fd= */ -EBADF,
                                         flags);
                         if (r < 0 && r != -ENOMEDIUM)
                                 return r;
index f76d3543eb9f5f8680b6a3d77afa42af7cc8eac6..1bd00d1af110860d00643bcc28da0684064e3177 100644 (file)
@@ -101,7 +101,13 @@ static void* thread_func(void *ptr) {
 
                 verify_dissected_image(dissected);
 
-                r = dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, DISSECT_IMAGE_READ_ONLY);
+                r = dissected_image_mount(
+                                dissected,
+                                mounted,
+                                /* uid_shift= */ UID_INVALID,
+                                /* uid_range= */ UID_INVALID,
+                                /* userns_fd= */ -EBADF,
+                                DISSECT_IMAGE_READ_ONLY);
                 log_notice_errno(r, "Mounted %s → %s: %m", loop->node, mounted);
                 assert_se(r >= 0);
 
@@ -291,7 +297,13 @@ static int run(int argc, char *argv[]) {
         assert_se(detach_mount_namespace() >= 0);
 
         /* This first (writable) mount will initialize the mount point dirs, so that the subsequent read-only ones can work */
-        assert_se(dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, 0) >= 0);
+        assert_se(dissected_image_mount(
+                                  dissected,
+                                  mounted,
+                                  /* uid_shift= */ UID_INVALID,
+                                  /* uid_range= */ UID_INVALID,
+                                  /* usernfs_fd= */ -EBADF,
+                                  0) >= 0);
 
         /* Now we mounted everything, the partitions are pinned. Now it's fine to release the lock
          * fully. This means udev could now issue BLKRRPART again, but that's OK given this will fail because