#include "import-util.h"
#include "io-util.h"
#include "missing_mount.h"
+#include "missing_syscall.h"
#include "mkdir-label.h"
#include "mount-util.h"
#include "mountpoint-util.h"
"btrfs",
"erofs",
"ext4",
+ "f2fs",
"squashfs",
"vfat",
"xfs");
free(p->decrypted_node);
free(p->mount_options);
safe_close(p->mount_node_fd);
+ safe_close(p->fsmount_fd);
*p = DISSECTED_PARTITION_NULL;
}
.mount_node_fd = TAKE_FD(mount_node_fd),
.offset = 0,
.size = UINT64_MAX,
+ .fsmount_fd = -EBADF,
};
return 0;
.offset = (uint64_t) start * 512,
.size = (uint64_t) size * 512,
.gpt_flags = pflags,
+ .fsmount_fd = -EBADF,
};
}
.mount_node_fd = TAKE_FD(mount_node_fd),
.offset = (uint64_t) start * 512,
.size = (uint64_t) size * 512,
+ .fsmount_fd = -EBADF,
};
break;
.mount_node_fd = TAKE_FD(mount_node_fd),
.offset = UINT64_MAX,
.size = UINT64_MAX,
+ .fsmount_fd = -EBADF,
};
}
}
if (verity->designator >= 0 && !m->partitions[verity->designator].found)
return -EADDRNOTAVAIL;
- bool have_verity_sig_partition =
- m->partitions[verity->designator == PARTITION_USR ? PARTITION_USR_VERITY_SIG : PARTITION_ROOT_VERITY_SIG].found;
+ bool have_verity_sig_partition;
+ if (verity->designator >= 0)
+ have_verity_sig_partition = m->partitions[verity->designator == PARTITION_USR ? PARTITION_USR_VERITY_SIG : PARTITION_ROOT_VERITY_SIG].found;
+ else
+ have_verity_sig_partition = m->partitions[PARTITION_USR_VERITY_SIG].found || m->partitions[PARTITION_ROOT_VERITY_SIG].found;
if (verity->root_hash) {
/* If we have an explicit root hash and found the partitions for it, then we are ready to use
/* If we found an embedded signature partition, we are ready, too. */
m->verity_ready = m->verity_sig_ready = true;
- m->partitions[verity->designator == PARTITION_USR ? PARTITION_USR : PARTITION_ROOT].rw = false;
+ if (verity->designator >= 0)
+ m->partitions[verity->designator == PARTITION_USR ? PARTITION_USR : PARTITION_ROOT].rw = false;
+ else if (m->partitions[PARTITION_USR_VERITY_SIG].found)
+ m->partitions[PARTITION_USR].rw = false;
+ else if (m->partitions[PARTITION_ROOT_VERITY_SIG].found)
+ m->partitions[PARTITION_ROOT].rw = false;
}
}
#if HAVE_BLKID
_cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
_cleanup_close_ int fd = -EBADF;
+ struct stat st;
int r;
assert(path);
if (fd < 0)
return -errno;
- r = fd_verify_regular(fd);
+ if (fstat(fd, &st) < 0)
+ return -errno;
+
+ r = stat_verify_regular(&st);
if (r < 0)
return r;
if (r < 0)
return r;
+ m->image_size = st.st_size;
+
r = probe_sector_size(fd, &m->sector_size);
if (r < 0)
return r;
strv_free(m->machine_info);
strv_free(m->os_release);
strv_free(m->initrd_release);
- strv_free(m->extension_release);
+ strv_free(m->confext_release);
+ strv_free(m->sysext_release);
return mfree(m);
}
"(fsck)",
NULL,
&node_fd, 1, /* Leave the node fd open */
- FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG|FORK_REARRANGE_STDIO|FORK_CLOEXEC_OFF,
+ FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_RLIMIT_NOFILE_SAFE|FORK_DEATHSIG_SIGTERM|FORK_REARRANGE_STDIO|FORK_CLOEXEC_OFF,
&pid);
if (r < 0)
return log_debug_errno(r, "Failed to fork off fsck: %m");
return 0;
}
-static int fs_grow(const char *node_path, const char *mount_path) {
- _cleanup_close_ int mount_fd = -EBADF, node_fd = -EBADF;
+static int fs_grow(const char *node_path, int mount_fd, const char *mount_path) {
+ _cleanup_close_ int _mount_fd = -EBADF, node_fd = -EBADF;
uint64_t size, newsize;
+ const char *id;
int r;
+ assert(node_path);
+ assert(mount_fd >= 0 || mount_path);
+
node_fd = open(node_path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (node_fd < 0)
return log_debug_errno(errno, "Failed to open node device %s: %m", node_path);
- if (ioctl(node_fd, BLKGETSIZE64, &size) != 0)
- return log_debug_errno(errno, "Failed to get block device size of %s: %m", node_path);
+ r = blockdev_get_device_size(node_fd, &size);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to get block device size of %s: %m", node_path);
+
+ if (mount_fd < 0) {
+ assert(mount_path);
- mount_fd = open(mount_path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
- if (mount_fd < 0)
- return log_debug_errno(errno, "Failed to open mountd file system %s: %m", mount_path);
+ _mount_fd = open(mount_path, O_RDONLY|O_DIRECTORY|O_CLOEXEC);
+ if (_mount_fd < 0)
+ return log_debug_errno(errno, "Failed to open mounted file system %s: %m", mount_path);
- log_debug("Resizing \"%s\" to %"PRIu64" bytes...", mount_path, size);
+ mount_fd = _mount_fd;
+ } else {
+ mount_fd = fd_reopen_condition(mount_fd, O_RDONLY|O_DIRECTORY|O_CLOEXEC, O_RDONLY|O_DIRECTORY|O_CLOEXEC, &_mount_fd);
+ if (mount_fd < 0)
+ return log_debug_errno(errno, "Failed to reopen mount node: %m");
+ }
+
+ id = mount_path ?: node_path;
+
+ log_debug("Resizing \"%s\" to %"PRIu64" bytes...", id, size);
r = resize_fs(mount_fd, size, &newsize);
if (r < 0)
- return log_debug_errno(r, "Failed to resize \"%s\" to %"PRIu64" bytes: %m", mount_path, size);
+ return log_debug_errno(r, "Failed to resize \"%s\" to %"PRIu64" bytes: %m", id, size);
if (newsize == size)
log_debug("Successfully resized \"%s\" to %s bytes.",
- mount_path, FORMAT_BYTES(newsize));
+ id, FORMAT_BYTES(newsize));
else {
assert(newsize < size);
log_debug("Successfully resized \"%s\" to %s bytes (%"PRIu64" bytes lost due to blocksize).",
- mount_path, FORMAT_BYTES(newsize), size - newsize);
+ id, FORMAT_BYTES(newsize), size - newsize);
}
return 0;
* anymore (since in some cases the kernel implementations will refuse mounting when corrupted,
* read-only and "norecovery" is specified). But I think for the case of automatically determined
* mount options for loopback devices this is the right choice, since otherwise using the same
- * loopback file twice even in read-only mode, is going to fail badly sooner or later. The usecase of
+ * loopback file twice even in read-only mode, is going to fail badly sooner or later. The use case of
* making reuse of the immutable images "just work" is more relevant to us than having read-only
* access that actually modifies stuff work on such image files. Or to say this differently: if
* people want their file systems to be fixed up they should just open them in writable mode, where
return 0;
}
+static bool need_user_mapping(uid_t uid_shift, uid_t uid_range) {
+
+ if (!uid_is_valid(uid_shift))
+ return false;
+
+ return uid_shift != 0 || uid_range != UINT32_MAX;
+}
+
static int mount_partition(
PartitionDesignator d,
DissectedPartition *m,
const char *directory,
uid_t uid_shift,
uid_t uid_range,
+ int userns_fd,
DissectImageFlags flags) {
_cleanup_free_ char *chased = NULL, *options = NULL;
- bool rw, discard, remap_uid_gid = false;
- const char *p, *node, *fstype;
+ const char *p = NULL, *node, *fstype = NULL;
+ bool rw, discard, grow;
unsigned long ms_flags;
int r;
assert(m);
- assert(where);
- if (m->mount_node_fd < 0)
+ if (!m->found)
return 0;
- /* Use decrypted node and matching fstype if available, otherwise use the original device */
- node = FORMAT_PROC_FD_PATH(m->mount_node_fd);
- fstype = dissected_partition_fstype(m);
+ /* Check the various combinations when we can't do anything anymore */
+ if (m->fsmount_fd < 0 && m->mount_node_fd < 0)
+ return 0;
+ if (m->fsmount_fd >= 0 && !where)
+ return 0;
+ if (!where && m->mount_node_fd < 0)
+ return 0;
- if (!fstype)
- return -EAFNOSUPPORT;
+ if (m->fsmount_fd < 0) {
+ fstype = dissected_partition_fstype(m);
+ if (!fstype)
+ return -EAFNOSUPPORT;
- /* We are looking at an encrypted partition? This either means stacked encryption, or the caller
- * didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error for this
- * case. */
- if (streq(fstype, "crypto_LUKS"))
- return -EUNATCH;
+ /* We are looking at an encrypted partition? This either means stacked encryption, or the
+ * caller didn't call dissected_image_decrypt() beforehand. Let's return a recognizable error
+ * for this case. */
+ if (streq(fstype, "crypto_LUKS"))
+ return -EUNATCH;
- r = dissect_fstype_ok(fstype);
- if (r < 0)
- return r;
- if (!r)
- return -EIDRM; /* Recognizable error */
+ r = dissect_fstype_ok(fstype);
+ if (r < 0)
+ return r;
+ if (!r)
+ return -EIDRM; /* Recognizable error */
+ }
+ node = m->mount_node_fd < 0 ? NULL : FORMAT_PROC_FD_PATH(m->mount_node_fd);
rw = m->rw && !(flags & DISSECT_IMAGE_MOUNT_READ_ONLY);
discard = ((flags & DISSECT_IMAGE_DISCARD) ||
- ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && is_loop_device(m->node) > 0));
+ ((flags & DISSECT_IMAGE_DISCARD_ON_LOOP) && (m->node && is_loop_device(m->node) > 0)));
+
+ grow = rw && m->growfs && FLAGS_SET(flags, DISSECT_IMAGE_GROWFS);
- if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw) {
+ if (FLAGS_SET(flags, DISSECT_IMAGE_FSCK) && rw && m->mount_node_fd >= 0 && m->fsmount_fd < 0) {
r = run_fsck(m->mount_node_fd, fstype);
if (r < 0)
return r;
}
- if (directory) {
- /* Automatically create missing mount points inside the image, if necessary. */
- r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755, NULL);
- if (r < 0 && r != -EROFS)
- return r;
-
- r = chase(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
- if (r < 0)
- return r;
+ if (where) {
+ if (directory) {
+ /* Automatically create missing mount points inside the image, if necessary. */
+ r = mkdir_p_root(where, directory, uid_shift, (gid_t) uid_shift, 0755, NULL);
+ if (r < 0 && r != -EROFS)
+ return r;
- p = chased;
- } else {
- /* Create top-level mount if missing – but only if this is asked for. This won't modify the
- * image (as the branch above does) but the host hierarchy, and the created directory might
- * survive our mount in the host hierarchy hence. */
- if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
- r = mkdir_p(where, 0755);
+ r = chase(directory, where, CHASE_PREFIX_ROOT, &chased, NULL);
if (r < 0)
return r;
- }
- p = where;
- }
+ p = chased;
+ } else {
+ /* Create top-level mount if missing – but only if this is asked for. This won't modify the
+ * image (as the branch above does) but the host hierarchy, and the created directory might
+ * survive our mount in the host hierarchy hence. */
+ if (FLAGS_SET(flags, DISSECT_IMAGE_MKDIR)) {
+ r = mkdir_p(where, 0755);
+ if (r < 0)
+ return r;
+ }
- r = partition_pick_mount_options(d, dissected_partition_fstype(m), rw, discard, &options, &ms_flags);
- if (r < 0)
- return r;
+ p = where;
+ }
+ }
- if (uid_is_valid(uid_shift) && uid_shift != 0) {
+ if (m->fsmount_fd < 0) {
+ r = partition_pick_mount_options(d, fstype, rw, discard, &options, &ms_flags);
+ if (r < 0)
+ return r;
- if (fstype_can_uid_gid(fstype)) {
+ if (need_user_mapping(uid_shift, uid_range) && fstype_can_uid_gid(fstype)) {
_cleanup_free_ char *uid_option = NULL;
if (asprintf(&uid_option, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
if (!strextend_with_separator(&options, ",", uid_option))
return -ENOMEM;
- } else if (FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_IDMAPPED))
- remap_uid_gid = true;
+
+ userns_fd = -EBADF; /* Not needed */
+ }
+
+ if (!isempty(m->mount_options))
+ if (!strextend_with_separator(&options, ",", m->mount_options))
+ return -ENOMEM;
}
- if (!isempty(m->mount_options))
- if (!strextend_with_separator(&options, ",", m->mount_options))
- return -ENOMEM;
+ if (p) {
+ if (m->fsmount_fd >= 0) {
+ /* Case #1: Attach existing fsmount fd to the file system */
- r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, ms_flags, options);
- if (r < 0)
- return r;
+ r = mount_exchange_graceful(
+ m->fsmount_fd,
+ p,
+ FLAGS_SET(flags, DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE));
+ if (r < 0)
+ return log_debug_errno(r, "Failed to mount image on '%s': %m", p);
- if (rw && m->growfs && FLAGS_SET(flags, DISSECT_IMAGE_GROWFS))
- (void) fs_grow(node, p);
+ } else {
+ assert(node);
- if (remap_uid_gid) {
- r = remount_idmap(p, uid_shift, uid_range, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
- if (r < 0)
- return r;
+ /* Case #2: Mount directly into place */
+ r = mount_nofollow_verbose(LOG_DEBUG, node, p, fstype, ms_flags, options);
+ if (r < 0)
+ return r;
+
+ if (grow)
+ (void) fs_grow(node, -EBADF, p);
+
+ if (userns_fd >= 0) {
+ r = remount_idmap_fd(STRV_MAKE(p), userns_fd);
+ if (r < 0)
+ return r;
+ }
+ }
+ } else {
+ assert(node);
+
+ /* Case #3: Create fsmount fd */
+
+ m->fsmount_fd = make_fsmount(LOG_DEBUG, node, fstype, ms_flags, options, userns_fd);
+ if (m->fsmount_fd < 0)
+ return m->fsmount_fd;
+
+ if (grow)
+ (void) fs_grow(node, m->fsmount_fd, NULL);
}
return 1;
}
-static int mount_root_tmpfs(const char *where, uid_t uid_shift, DissectImageFlags flags) {
+static int mount_root_tmpfs(const char *where, uid_t uid_shift, uid_t uid_range, DissectImageFlags flags) {
_cleanup_free_ char *options = NULL;
int r;
return r;
}
- if (uid_is_valid(uid_shift)) {
+ if (need_user_mapping(uid_shift, uid_range)) {
if (asprintf(&options, "uid=" UID_FMT ",gid=" GID_FMT, uid_shift, (gid_t) uid_shift) < 0)
return -ENOMEM;
}
const char *where,
uid_t uid_shift,
uid_t uid_range,
+ int userns_fd,
DissectImageFlags flags) {
+ _cleanup_close_ int my_userns_fd = -EBADF;
int r;
assert(m);
- assert(where);
- /* Returns:
+ /* If 'where' is NULL then we'll use the new mount API to create fsmount() fds for the mounts and
+ * store them in DissectedPartition.fsmount_fd.
+ *
+ * If 'where' is not NULL then we'll either mount the partitions to the right places ourselves,
+ * or use DissectedPartition.fsmount_fd and bind it to the right places.
+ *
+ * This allows splitting the setting up up the superblocks and the binding to file systems paths into
+ * two distinct and differently privileged components: one that gets the fsmount fds, and the other
+ * that then applies them.
+ *
+ * Returns:
*
* -ENXIO → No root partition found
* -EMEDIUMTYPE → DISSECT_IMAGE_VALIDATE_OS set but no os-release/extension-release file found
* -EIDRM → File system is not among allowlisted "common" file systems
*/
+ if (!where && (flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0)
+ return -EOPNOTSUPP; /* for now, not supported */
+
if (!(m->partitions[PARTITION_ROOT].found ||
(m->partitions[PARTITION_USR].found && FLAGS_SET(flags, DISSECT_IMAGE_USR_NO_ROOT))))
return -ENXIO; /* Require a root fs or at least a /usr/ fs (the latter is subject to a flag of its own) */
+ if (userns_fd < 0 && need_user_mapping(uid_shift, uid_range) && FLAGS_SET(flags, DISSECT_IMAGE_MOUNT_IDMAPPED)) {
+
+ my_userns_fd = make_userns(uid_shift, uid_range, UID_INVALID, REMOUNT_IDMAPPING_HOST_ROOT);
+ if (my_userns_fd < 0)
+ return my_userns_fd;
+
+ userns_fd = my_userns_fd;
+ }
+
if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0) {
/* First mount the root fs. If there's none we use a tmpfs. */
- if (m->partitions[PARTITION_ROOT].found)
- r = mount_partition(PARTITION_ROOT, m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, flags);
- else
- r = mount_root_tmpfs(where, uid_shift, flags);
- if (r < 0)
- return r;
+ if (m->partitions[PARTITION_ROOT].found) {
+ r = mount_partition(PARTITION_ROOT, m->partitions + PARTITION_ROOT, where, NULL, uid_shift, uid_range, userns_fd, flags);
+ if (r < 0)
+ return r;
+
+ } else if (where) {
+ r = mount_root_tmpfs(where, uid_shift, uid_range, flags);
+ if (r < 0)
+ return r;
+ }
/* For us mounting root always means mounting /usr as well */
- r = mount_partition(PARTITION_USR, m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, flags);
+ r = mount_partition(PARTITION_USR, m->partitions + PARTITION_USR, where, "/usr", uid_shift, uid_range, userns_fd, flags);
if (r < 0)
return r;
+ }
+
+ if ((flags & DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY) == 0 &&
+ (flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0) {
+ /* If either one of the validation flags are set, ensure that the image qualifies as
+ * one or the other (or both). */
+ bool ok = false;
- if ((flags & (DISSECT_IMAGE_VALIDATE_OS|DISSECT_IMAGE_VALIDATE_OS_EXT)) != 0) {
- /* If either one of the validation flags are set, ensure that the image qualifies
- * as one or the other (or both). */
- bool ok = false;
+ assert(where);
- if (FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS)) {
- r = path_is_os_tree(where);
+ if (FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS)) {
+ r = path_is_os_tree(where);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ ok = true;
+ }
+ if (!ok && FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS_EXT) && m->image_name) {
+ r = extension_has_forbidden_content(where);
+ if (r < 0)
+ return r;
+ if (r == 0) {
+ r = path_is_extension_tree(IMAGE_SYSEXT, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
+ if (r == 0)
+ r = path_is_extension_tree(IMAGE_CONFEXT, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
if (r < 0)
return r;
if (r > 0)
ok = true;
}
- if (!ok && FLAGS_SET(flags, DISSECT_IMAGE_VALIDATE_OS_EXT)) {
- r = extension_has_forbidden_content(where);
- if (r < 0)
- return r;
- if (r == 0) {
- r = path_is_extension_tree(IMAGE_SYSEXT, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
- if (r == 0)
- r = path_is_extension_tree(IMAGE_CONFEXT, where, m->image_name, FLAGS_SET(flags, DISSECT_IMAGE_RELAX_EXTENSION_CHECK));
- if (r < 0)
- return r;
- if (r > 0)
- ok = true;
- }
- }
-
- if (!ok)
- return -ENOMEDIUM;
}
+
+ if (!ok)
+ return -ENOMEDIUM;
}
if (flags & DISSECT_IMAGE_MOUNT_ROOT_ONLY)
return 0;
- r = mount_partition(PARTITION_HOME, m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, flags);
+ r = mount_partition(PARTITION_HOME, m->partitions + PARTITION_HOME, where, "/home", uid_shift, uid_range, userns_fd, flags);
if (r < 0)
return r;
- r = mount_partition(PARTITION_SRV, m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, flags);
+ r = mount_partition(PARTITION_SRV, m->partitions + PARTITION_SRV, where, "/srv", uid_shift, uid_range, userns_fd, flags);
if (r < 0)
return r;
- r = mount_partition(PARTITION_VAR, m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, flags);
+ r = mount_partition(PARTITION_VAR, m->partitions + PARTITION_VAR, where, "/var", uid_shift, uid_range, userns_fd, flags);
if (r < 0)
return r;
- r = mount_partition(PARTITION_TMP, m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, flags);
+ r = mount_partition(PARTITION_TMP, m->partitions + PARTITION_TMP, where, "/var/tmp", uid_shift, uid_range, userns_fd, flags);
if (r < 0)
return r;
- int slash_boot_is_available;
- r = slash_boot_is_available = mount_point_is_available(where, "/boot", /* missing_ok = */ true);
- if (r < 0)
- return r;
- if (r > 0) {
- r = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, flags);
+ int slash_boot_is_available = 0;
+ if (where) {
+ r = slash_boot_is_available = mount_point_is_available(where, "/boot", /* missing_ok = */ true);
+ if (r < 0)
+ return r;
+ }
+ if (!where || slash_boot_is_available) {
+ r = mount_partition(PARTITION_XBOOTLDR, m->partitions + PARTITION_XBOOTLDR, where, "/boot", uid_shift, uid_range, userns_fd, flags);
if (r < 0)
return r;
slash_boot_is_available = !r;
if (m->partitions[PARTITION_ESP].found) {
const char *esp_path = NULL;
- /* Mount the ESP to /boot/ if it exists and is empty and we didn't already mount the XBOOTLDR
- * partition into it. Otherwise, use /efi instead, but only if it exists and is empty. */
+ if (where) {
+ /* Mount the ESP to /boot/ if it exists and is empty and we didn't already mount the
+ * XBOOTLDR partition into it. Otherwise, use /efi instead, but only if it exists
+ * and is empty. */
- if (slash_boot_is_available) {
- r = mount_point_is_available(where, "/boot", /* missing_ok = */ false);
- if (r < 0)
- return r;
- if (r > 0)
- esp_path = "/boot";
- }
+ if (slash_boot_is_available) {
+ r = mount_point_is_available(where, "/boot", /* missing_ok = */ false);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ esp_path = "/boot";
+ }
- if (!esp_path) {
- r = mount_point_is_available(where, "/efi", /* missing_ok = */ true);
- if (r < 0)
- return r;
- if (r > 0)
- esp_path = "/efi";
+ if (!esp_path) {
+ r = mount_point_is_available(where, "/efi", /* missing_ok = */ true);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ esp_path = "/efi";
+ }
}
- if (esp_path) {
- /* OK, let's mount the ESP now (possibly creating the dir if missing) */
- r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, esp_path, uid_shift, uid_range, flags);
- if (r < 0)
- return r;
- }
+ /* OK, let's mount the ESP now (possibly creating the dir if missing) */
+ r = mount_partition(PARTITION_ESP, m->partitions + PARTITION_ESP, where, esp_path, uid_shift, uid_range, userns_fd, flags);
+ if (r < 0)
+ return r;
}
return 0;
const char *where,
uid_t uid_shift,
uid_t uid_range,
+ int userns_fd,
DissectImageFlags flags) {
int r;
assert(m);
- assert(where);
- r = dissected_image_mount(m, where, uid_shift, uid_range, flags);
+ r = dissected_image_mount(m, where, uid_shift, uid_range, userns_fd, flags);
if (r == -ENXIO)
return log_error_errno(r, "Not root file system found in image.");
if (r == -EMEDIUMTYPE)
const VeritySettings *verity) {
bool check_signature;
- int r;
+ int r, k;
assert(cd);
assert(name);
if (r >= 0)
return r;
- log_debug("Validation of dm-verity signature failed via the kernel, trying userspace validation instead.");
+ log_debug_errno(r, "Validation of dm-verity signature failed via the kernel, trying userspace validation instead: %m");
#else
log_debug("Activation of verity device with signature requested, but not supported via the kernel by %s due to missing crypt_activate_by_signed_key(), trying userspace validation instead.",
program_invocation_short_name);
+ r = 0; /* Set for the propagation below */
#endif
/* So this didn't work via the kernel, then let's try userspace validation instead. If that
* works we'll try to activate without telling the kernel the signature. */
- r = validate_signature_userspace(verity);
- if (r < 0)
- return r;
- if (r == 0)
- return log_debug_errno(SYNTHETIC_ERRNO(ENOKEY),
+ /* Preferably propagate the original kernel error, so that the fallback logic can work,
+ * as the device-mapper is finicky around concurrent activations of the same volume */
+ k = validate_signature_userspace(verity);
+ if (k < 0)
+ return r < 0 ? r : k;
+ if (k == 0)
+ return log_debug_errno(r < 0 ? r : SYNTHETIC_ERRNO(ENOKEY),
"Activation of signed Verity volume worked neither via the kernel nor in userspace, can't activate.");
}
DecryptedImage *d) {
_cleanup_(sym_crypt_freep) struct crypt_device *cd = NULL;
- _cleanup_(dm_deferred_remove_cleanp) char *restore_deferred_remove = NULL;
_cleanup_free_ char *node = NULL, *name = NULL;
_cleanup_close_ int mount_node_fd = -EBADF;
int r;
* In case of ENODEV/ENOENT, which can happen if another process is activating at the exact same time,
* retry a few times before giving up. */
for (unsigned i = 0; i < N_DEVICE_NODE_LIST_ATTEMPTS; i++) {
+ _cleanup_(dm_deferred_remove_cleanp) char *restore_deferred_remove = NULL;
_cleanup_(sym_crypt_freep) struct crypt_device *existing_cd = NULL;
_cleanup_close_ int fd = -EBADF;
return log_debug_errno(r, "Failed to activate verity device %s: %m", node);
check:
- if (!restore_deferred_remove){
- /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */
- r = dm_deferred_remove_cancel(name);
- /* -EBUSY and -ENXIO: the device has already been removed or being removed. We cannot
- * use the device, try to open again. See target_message() in drivers/md/dm-ioctl.c
- * and dm_cancel_deferred_remove() in drivers/md/dm.c */
- if (IN_SET(r, -EBUSY, -ENXIO))
- goto try_again;
- if (r < 0)
- return log_debug_errno(r, "Failed to disable automated deferred removal for verity device %s: %m", node);
+ /* To avoid races, disable automatic removal on umount while setting up the new device. Restore it on failure. */
+ r = dm_deferred_remove_cancel(name);
+ /* -EBUSY and -ENXIO: the device has already been removed or being removed. We cannot
+ * use the device, try to open again. See target_message() in drivers/md/dm-ioctl.c
+ * and dm_cancel_deferred_remove() in drivers/md/dm.c */
+ if (IN_SET(r, -EBUSY, -ENXIO))
+ goto try_again;
+ if (r < 0)
+ return log_debug_errno(r, "Failed to disable automated deferred removal for verity device %s: %m", node);
- restore_deferred_remove = strdup(name);
- if (!restore_deferred_remove)
- return log_oom_debug();
- }
+ restore_deferred_remove = strdup(name);
+ if (!restore_deferred_remove)
+ return log_oom_debug();
r = verity_can_reuse(verity, name, &existing_cd);
/* Same as above, -EINVAL can randomly happen when it actually means -EEXIST */
}
}
+ /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */
+ restore_deferred_remove = mfree(restore_deferred_remove);
+
mount_node_fd = TAKE_FD(fd);
if (existing_cd)
crypt_free_and_replace(cd, existing_cd);
return log_debug_errno(SYNTHETIC_ERRNO(EBUSY), "All attempts to activate verity device %s failed.", name);
success:
- /* Everything looks good and we'll be able to mount the device, so deferred remove will be re-enabled at that point. */
- restore_deferred_remove = mfree(restore_deferred_remove);
-
d->decrypted[d->n_decrypted++] = (DecryptedPartition) {
.name = TAKE_PTR(name),
.device = TAKE_PTR(cd),
META_MACHINE_INFO,
META_OS_RELEASE,
META_INITRD_RELEASE,
- META_EXTENSION_RELEASE,
+ META_SYSEXT_RELEASE,
+ META_CONFEXT_RELEASE,
META_HAS_INIT_SYSTEM,
_META_MAX,
};
[META_HOSTNAME] = "/etc/hostname\0",
[META_MACHINE_ID] = "/etc/machine-id\0",
[META_MACHINE_INFO] = "/etc/machine-info\0",
- [META_OS_RELEASE] = ("/etc/os-release\0"
- "/usr/lib/os-release\0"),
- [META_INITRD_RELEASE] = ("/etc/initrd-release\0"
- "/usr/lib/initrd-release\0"),
- [META_EXTENSION_RELEASE] = "extension-release\0", /* Used only for logging. */
+ [META_OS_RELEASE] = "/etc/os-release\0"
+ "/usr/lib/os-release\0",
+ [META_INITRD_RELEASE] = "/etc/initrd-release\0"
+ "/usr/lib/initrd-release\0",
+ [META_SYSEXT_RELEASE] = "sysext-release\0", /* String used only for logging. */
+ [META_CONFEXT_RELEASE] = "confext-release\0", /* ditto */
[META_HAS_INIT_SYSTEM] = "has-init-system\0", /* ditto */
};
- _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **initrd_release = NULL, **extension_release = NULL;
- _cleanup_close_pair_ int error_pipe[2] = PIPE_EBADF;
+ _cleanup_strv_free_ char **machine_info = NULL, **os_release = NULL, **initrd_release = NULL, **sysext_release = NULL, **confext_release = NULL;
+ _cleanup_close_pair_ int error_pipe[2] = EBADF_PAIR;
_cleanup_(rmdir_and_freep) char *t = NULL;
_cleanup_(sigkill_waitp) pid_t child = 0;
sd_id128_t machine_id = SD_ID128_NULL;
int fds[2 * _META_MAX], r, v;
int has_init_system = -1;
ssize_t n;
- ImageClass image_class = IMAGE_SYSEXT;
BLOCK_SIGNALS(SIGCHLD);
assert(m);
- assert(image_class);
for (; n_meta_initialized < _META_MAX; n_meta_initialized ++) {
- if (!paths[n_meta_initialized]) {
- fds[2*n_meta_initialized] = fds[2*n_meta_initialized+1] = -EBADF;
- continue;
- }
+ assert(paths[n_meta_initialized]);
if (pipe2(fds + 2*n_meta_initialized, O_CLOEXEC) < 0) {
r = -errno;
goto finish;
}
- r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
+ r = safe_fork("(sd-dissect)", FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGTERM|FORK_NEW_MOUNTNS|FORK_MOUNTNS_SLAVE, &child);
if (r < 0)
goto finish;
if (r == 0) {
r = dissected_image_mount(
m,
t,
- UID_INVALID,
- UID_INVALID,
+ /* uid_shift= */ UID_INVALID,
+ /* uid_range= */ UID_INVALID,
+ /* userns_fd= */ -EBADF,
extra_flags |
DISSECT_IMAGE_READ_ONLY |
DISSECT_IMAGE_MOUNT_ROOT_ONLY |
for (unsigned k = 0; k < _META_MAX; k++) {
_cleanup_close_ int fd = -ENOENT;
- if (!paths[k])
- continue;
+ assert(paths[k]);
fds[2*k] = safe_close(fds[2*k]);
switch (k) {
- case META_EXTENSION_RELEASE: {
- /* As per the os-release spec, if the image is an extension it will have a file
- * named after the image name in extension-release.d/ - we use the image name
- * and try to resolve it with the extension-release helpers, as sometimes
- * the image names are mangled on deployment and do not match anymore.
- * Unlike other paths this is not fixed, and the image name
- * can be mangled on deployment, so by calling into the helper
- * we allow a fallback that matches on the first extension-release
- * file found in the directory, if one named after the image cannot
- * be found first. */
- ImageClass class = IMAGE_SYSEXT;
- r = open_extension_release(t, IMAGE_SYSEXT, m->image_name, /* relax_extension_release_check= */ false, NULL, &fd);
- if (r == -ENOENT) {
- r = open_extension_release(t, IMAGE_CONFEXT, m->image_name, /* relax_extension_release_check= */ false, NULL, &fd);
- if (r >= 0)
- class = IMAGE_CONFEXT;
- }
+ case META_SYSEXT_RELEASE:
+ if (!m->image_name)
+ goto next;
+
+ /* As per the os-release spec, if the image is an extension it will have a
+ * file named after the image name in extension-release.d/ - we use the image
+ * name and try to resolve it with the extension-release helpers, as
+ * sometimes the image names are mangled on deployment and do not match
+ * anymore. Unlike other paths this is not fixed, and the image name can be
+ * mangled on deployment, so by calling into the helper we allow a fallback
+ * that matches on the first extension-release file found in the directory,
+ * if one named after the image cannot be found first. */
+ r = open_extension_release(
+ t,
+ IMAGE_SYSEXT,
+ m->image_name,
+ /* relax_extension_release_check= */ false,
+ /* ret_path= */ NULL,
+ &fd);
+ if (r < 0)
+ fd = r;
+ break;
+
+ case META_CONFEXT_RELEASE:
+ if (!m->image_name)
+ goto next;
+
+ /* As above */
+ r = open_extension_release(
+ t,
+ IMAGE_CONFEXT,
+ m->image_name,
+ /* relax_extension_release_check= */ false,
+ /* ret_path= */ NULL,
+ &fd);
if (r < 0)
fd = r;
- else {
- r = loop_write(fds[2*k+1], &class, sizeof(class), false);
- if (r < 0)
- goto inner_fail; /* Propagate the error to the parent */
- }
break;
- }
case META_HAS_INIT_SYSTEM: {
bool found = false;
FOREACH_STRING(init,
- "/usr/lib/systemd/systemd", /* systemd on /usr merged system */
- "/lib/systemd/systemd", /* systemd on /usr non-merged systems */
+ "/usr/lib/systemd/systemd", /* systemd on /usr/ merged system */
+ "/lib/systemd/systemd", /* systemd on /usr/ non-merged systems */
"/sbin/init") { /* traditional path the Linux kernel invokes */
r = chase(init, t, CHASE_PREFIX_ROOT, NULL, NULL);
}
}
- r = loop_write(fds[2*k+1], &found, sizeof(found), false);
+ r = loop_write(fds[2*k+1], &found, sizeof(found));
if (r < 0)
goto inner_fail;
- continue;
+ goto next;
}
default:
if (fd < 0) {
log_debug_errno(fd, "Failed to read %s file of image, ignoring: %m", paths[k]);
- fds[2*k+1] = safe_close(fds[2*k+1]);
- continue;
+ goto next;
}
r = copy_bytes(fd, fds[2*k+1], UINT64_MAX, 0);
if (r < 0)
goto inner_fail;
+ next:
fds[2*k+1] = safe_close(fds[2*k+1]);
}
for (unsigned k = 0; k < _META_MAX; k++) {
_cleanup_fclose_ FILE *f = NULL;
- if (!paths[k])
- continue;
+ assert(paths[k]);
fds[2*k+1] = safe_close(fds[2*k+1]);
break;
- case META_EXTENSION_RELEASE: {
- ImageClass cl = IMAGE_SYSEXT;
- size_t nr;
+ case META_SYSEXT_RELEASE:
+ r = load_env_file_pairs(f, "sysext-release", &sysext_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read sysext release file of image: %m");
- errno = 0;
- nr = fread(&cl, 1, sizeof(cl), f);
- if (nr != sizeof(cl))
- log_debug_errno(errno_or_else(EIO), "Failed to read class of extension image: %m");
- else {
- image_class = cl;
- r = load_env_file_pairs(f, "extension-release", &extension_release);
- if (r < 0)
- log_debug_errno(r, "Failed to read extension release file of image: %m");
- }
+ break;
+
+ case META_CONFEXT_RELEASE:
+ r = load_env_file_pairs(f, "confext-release", &confext_release);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read confext release file of image: %m");
break;
- }
case META_HAS_INIT_SYSTEM: {
bool b = false;
r = wait_for_terminate_and_check("(sd-dissect)", child, 0);
child = 0;
if (r < 0)
- return r;
+ goto finish;
n = read(error_pipe[0], &v, sizeof(v));
- if (n < 0)
- return -errno;
- if (n == sizeof(v))
- return v; /* propagate error sent to us from child */
- if (n != 0)
- return -EIO;
-
- if (r != EXIT_SUCCESS)
- return -EPROTO;
+ if (n < 0) {
+ r = -errno;
+ goto finish;
+ }
+ if (n == sizeof(v)) {
+ r = v; /* propagate error sent to us from child */
+ goto finish;
+ }
+ if (n != 0) {
+ r = -EIO;
+ goto finish;
+ }
+ if (r != EXIT_SUCCESS) {
+ r = -EPROTO;
+ goto finish;
+ }
free_and_replace(m->hostname, hostname);
m->machine_id = machine_id;
strv_free_and_replace(m->machine_info, machine_info);
strv_free_and_replace(m->os_release, os_release);
strv_free_and_replace(m->initrd_release, initrd_release);
- strv_free_and_replace(m->extension_release, extension_release);
+ strv_free_and_replace(m->sysext_release, sysext_release);
+ strv_free_and_replace(m->confext_release, confext_release);
m->has_init_system = has_init_system;
- m->image_class = image_class;
finish:
for (unsigned k = 0; k < n_meta_initialized; k++)
return r;
m->loop = loop_device_ref(loop);
+ m->image_size = m->loop->device_size;
m->sector_size = m->loop->sector_size;
r = dissect_image(m, loop->fd, loop->node, verity, mount_options, image_policy, flags);
"/run/systemd/mount-rootfs",
/* uid_shift= */ UID_INVALID,
/* uid_range= */ UID_INVALID,
+ /* userns_fd= */ -EBADF,
flags);
if (r < 0)
return r;
const char *required_host_os_release_id,
const char *required_host_os_release_version_id,
const char *required_host_os_release_sysext_level,
- const char *required_sysext_scope) {
+ const char *required_host_os_release_confext_level,
+ const char *required_sysext_scope,
+ DissectedImage **ret_image) {
_cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
int r;
assert(src);
- assert(dest);
+ /* Verifying release metadata requires mounted image for now, so ensure the check is skipped when
+ * opening an image without mounting it immediately (i.e.: 'dest' is NULL). */
+ assert(!required_host_os_release_id || dest);
relax_extension_release_check = mount_options_relax_extension_release_checks(options);
if (r < 0)
return log_debug_errno(r, "Failed to decrypt dissected image: %m");
- r = mkdir_p_label(dest, 0755);
- if (r < 0)
- return log_debug_errno(r, "Failed to create destination directory %s: %m", dest);
- r = umount_recursive(dest, 0);
- if (r < 0)
- return log_debug_errno(r, "Failed to umount under destination directory %s: %m", dest);
+ if (dest) {
+ r = mkdir_p_label(dest, 0755);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to create destination directory %s: %m", dest);
+ r = umount_recursive(dest, 0);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to umount under destination directory %s: %m", dest);
+ }
- r = dissected_image_mount(dissected_image, dest, UID_INVALID, UID_INVALID, dissect_image_flags);
+ r = dissected_image_mount(
+ dissected_image,
+ dest,
+ /* uid_shift= */ UID_INVALID,
+ /* uid_range= */ UID_INVALID,
+ /* userns_fd= */ -EBADF,
+ dissect_image_flags);
if (r < 0)
return log_debug_errno(r, "Failed to mount image: %m");
dissected_image->image_name,
required_host_os_release_id,
required_host_os_release_version_id,
- required_host_os_release_sysext_level,
+ class == IMAGE_SYSEXT ? required_host_os_release_sysext_level : required_host_os_release_confext_level,
required_sysext_scope,
extension_release,
class);
if (r < 0)
return log_debug_errno(r, "Failed to relinquish dissected image: %m");
+ if (ret_image)
+ *ret_image = TAKE_PTR(dissected_image);
+
return 0;
}