#include "fd-util.h"
#include "format-util.h"
#include "glyph-util.h"
-#include "label.h"
+#include "label-util.h"
#include "list.h"
#include "lock-util.h"
#include "loop-util.h"
/* ProtectKernelModules= option */
static const MountEntry protect_kernel_modules_table[] = {
-#if HAVE_SPLIT_USR
- { "/lib/modules", INACCESSIBLE, true },
-#endif
{ "/usr/lib/modules", INACCESSIBLE, true },
};
{ "/usr", READONLY, false },
{ "/boot", READONLY, true },
{ "/efi", READONLY, true },
-#if HAVE_SPLIT_USR
- { "/lib", READONLY, true },
- { "/lib64", READONLY, true },
- { "/bin", READONLY, true },
-# if HAVE_SPLIT_BIN
- { "/sbin", READONLY, true },
-# endif
-#endif
};
/* ProtectSystem=full includes ProtectSystem=yes */
{ "/boot", READONLY, true },
{ "/efi", READONLY, true },
{ "/etc", READONLY, false },
-#if HAVE_SPLIT_USR
- { "/lib", READONLY, true },
- { "/lib64", READONLY, true },
- { "/bin", READONLY, true },
-# if HAVE_SPLIT_BIN
- { "/sbin", READONLY, true },
-# endif
-#endif
};
/*
};
static const char * const mount_mode_table[_MOUNT_MODE_MAX] = {
- [INACCESSIBLE] = "inaccessible",
- [OVERLAY_MOUNT] = "overlay",
- [BIND_MOUNT] = "bind",
- [BIND_MOUNT_RECURSIVE] = "rbind",
- [PRIVATE_TMP] = "private-tmp",
- [PRIVATE_DEV] = "private-dev",
- [BIND_DEV] = "bind-dev",
- [EMPTY_DIR] = "empty",
- [PRIVATE_SYSFS] = "private-sysfs",
- [BIND_SYSFS] = "bind-sysfs",
- [PROCFS] = "procfs",
- [READONLY] = "read-only",
- [READWRITE] = "read-write",
- [TMPFS] = "tmpfs",
- [MOUNT_IMAGES] = "mount-images",
- [READWRITE_IMPLICIT] = "rw-implicit",
- [EXEC] = "exec",
- [NOEXEC] = "noexec",
- [MQUEUEFS] = "mqueuefs",
+ [INACCESSIBLE] = "inaccessible",
+ [OVERLAY_MOUNT] = "overlay",
+ [MOUNT_IMAGES] = "mount-images",
+ [BIND_MOUNT] = "bind",
+ [BIND_MOUNT_RECURSIVE] = "rbind",
+ [PRIVATE_TMP] = "private-tmp",
+ [PRIVATE_TMP_READONLY] = "private-tmp-read-only",
+ [PRIVATE_DEV] = "private-dev",
+ [BIND_DEV] = "bind-dev",
+ [EMPTY_DIR] = "empty",
+ [PRIVATE_SYSFS] = "private-sysfs",
+ [BIND_SYSFS] = "bind-sysfs",
+ [PROCFS] = "procfs",
+ [READONLY] = "read-only",
+ [READWRITE] = "read-write",
+ [NOEXEC] = "noexec",
+ [EXEC] = "exec",
+ [TMPFS] = "tmpfs",
+ [RUN] = "run",
+ [EXTENSION_DIRECTORIES] = "extension-directories",
+ [EXTENSION_IMAGES] = "extension-images",
+ [MQUEUEFS] = "mqueuefs",
+ [READWRITE_IMPLICIT] = "read-write-implicit",
+};
+
+/* Helper struct for naming simplicity and reusability */
+static const struct {
+ const char *level_env;
+ const char *level_env_print;
+} image_class_info[_IMAGE_CLASS_MAX] = {
+ [IMAGE_SYSEXT] = {
+ .level_env = "SYSEXT_LEVEL",
+ .level_env_print = " SYSEXT_LEVEL=",
+ },
+ [IMAGE_CONFEXT] = {
+ .level_env = "CONFEXT_LEVEL",
+ .level_env_print = " CONFEXT_LEVEL=",
+ }
};
DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(mount_mode, MountMode);
*((*p)++) = (MountEntry) {
.path_malloc = TAKE_PTR(mount_point),
- .source_const = TAKE_PTR(source),
+ .source_malloc = TAKE_PTR(source),
.mode = EXTENSION_DIRECTORIES,
.ignore = ignore_enoent,
.has_prefix = true,
return 0;
}
-static int mount_private_dev(MountEntry *m) {
+static char *settle_runtime_dir(RuntimeScope scope) {
+ char *runtime_dir;
+
+ if (scope != RUNTIME_SCOPE_USER)
+ return strdup("/run/");
+
+ if (asprintf(&runtime_dir, "/run/user/" UID_FMT, geteuid()) < 0)
+ return NULL;
+
+ return runtime_dir;
+}
+
+static int mount_private_dev(MountEntry *m, RuntimeScope scope) {
static const char devnodes[] =
"/dev/null\0"
"/dev/zero\0"
"/dev/urandom\0"
"/dev/tty\0";
- char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
+ _cleanup_free_ char *runtime_dir = NULL, *temporary_mount = NULL;
const char *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
bool can_mknod = true;
int r;
assert(m);
+ runtime_dir = settle_runtime_dir(scope);
+ if (!runtime_dir)
+ return log_oom_debug();
+
+ temporary_mount = path_join(runtime_dir, "systemd/namespace-dev-XXXXXX");
+ if (!temporary_mount)
+ return log_oom_debug();
+
if (!mkdtemp(temporary_mount))
return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount);
if (r < 0)
log_debug_errno(r, "Failed to set up basic device tree at '%s', ignoring: %m", temporary_mount);
+ /* Make the bind mount read-only. */
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, dev, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
+ if (r < 0)
+ return r;
+
/* Create the /dev directory if missing. It is more likely to be missing when the service is started
* with RootDirectory. This is consistent with mount units creating the mount points when missing. */
(void) mkdir_p_label(mount_entry_path(m), 0755);
if (r > 0) /* make this a NOP if /dev is already a mount point */
return 0;
- r = mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
- if (r < 0)
- return r;
-
- return 1;
-}
-
-static int mount_private_sysfs(const MountEntry *m) {
- const char *p = mount_entry_path(ASSERT_PTR(m));
- int r;
-
- (void) mkdir_p_label(p, 0755);
-
- r = remount_sysfs(p);
- if (r < 0 && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) {
- /* Running with an unprivileged user (PrivateUsers=yes), or the kernel seems old. Falling
- * back to bind mount the host's version so that we get all child mounts of it, too. */
-
- log_debug_errno(r, "Failed to remount sysfs on %s, falling back to bind mount: %m", p);
-
- (void) umount_recursive(p, 0);
-
- r = mount_nofollow_verbose(LOG_DEBUG, "/sys", p, NULL, MS_BIND|MS_REC, NULL);
- }
- if (r < 0)
- return log_debug_errno(r, "Failed to remount sysfs on %s: %m", p);
-
- return 1;
+ return mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
}
static int mount_bind_sysfs(const MountEntry *m) {
return 0;
/* Bind mount the host's version so that we get all child mounts of it, too. */
- r = mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
- if (r < 0)
+ return mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL);
+}
+
+static int mount_private_sysfs(const MountEntry *m) {
+ const char *entry_path = mount_entry_path(ASSERT_PTR(m));
+ int r, n;
+
+ (void) mkdir_p_label(entry_path, 0755);
+
+ n = umount_recursive(entry_path, 0);
+
+ r = mount_nofollow_verbose(LOG_DEBUG, "sysfs", entry_path, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
+ if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+ /* When we do not have enough privileges to mount sysfs, fall back to use existing /sys. */
+
+ if (n > 0)
+ /* /sys or some of sub-mounts are umounted in the above. Refuse incomplete tree.
+ * Propagate the original error code returned by mount() in the above. */
+ return r;
+
+ return mount_bind_sysfs(m);
+
+ } else if (r < 0)
return r;
- return 1;
+ /* We mounted a new instance now. Let's bind mount the children over now. */
+ (void) bind_mount_submounts("/sys", entry_path);
+ return 0;
}
static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) {
* means we really don't want to use it, since it would affect our host's /proc
* mount. Hence let's gracefully fallback to a classic, unrestricted version. */
r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL);
- if (r == -EPERM) {
- /* When we do not have enough privileges to mount /proc, fallback to use existing /proc. */
+ if (ERRNO_IS_NEG_PRIVILEGE(r)) {
+ /* When we do not have enough privileges to mount /proc, fall back to use existing /proc. */
if (n > 0)
/* /proc or some of sub-mounts are umounted in the above. Refuse incomplete tree.
* Propagate the original error code returned by mount() in the above. */
- return -EPERM;
+ return r;
r = path_is_mount_point(entry_path, NULL, 0);
if (r < 0)
return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m");
- if (r == 0) {
- /* We lack permissions to mount a new instance of /proc, and it is not already
- * mounted. But we can access the host's, so as a final fallback bind-mount it to
- * the destination, as most likely we are inside a user manager in an unprivileged
- * user namespace. */
- r = mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL);
- if (r < 0)
- return -EPERM;
- }
+ if (r > 0)
+ return 0;
+
+ /* We lack permissions to mount a new instance of /proc, and it is not already mounted. But
+ * we can access the host's, so as a final fallback bind-mount it to the destination, as most
+ * likely we are inside a user manager in an unprivileged user namespace. */
+ return mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL);
+
} else if (r < 0)
return r;
- return 1;
+ /* We mounted a new instance now. Let's bind mount the children over now. This matters for nspawn
+ * where a bunch of files are overmounted, in particular the boot id */
+ (void) bind_mount_submounts("/proc", entry_path);
+ return 0;
}
static int mount_tmpfs(const MountEntry *m) {
if (r < 0)
return log_debug_errno(r, "Failed to fix label of '%s' as '%s': %m", entry_path, inner_path);
- return 1;
+ return 0;
}
static int mount_run(const MountEntry *m) {
return 0;
}
-static int mount_image(const MountEntry *m, const char *root_directory) {
+static int mount_image(
+ const MountEntry *m,
+ const char *root_directory,
+ const ImagePolicy *image_policy) {
_cleanup_free_ char *host_os_release_id = NULL, *host_os_release_version_id = NULL,
- *host_os_release_sysext_level = NULL;
+ *host_os_release_level = NULL, *extension_name = NULL;
+ _cleanup_strv_free_ char **extension_release = NULL;
+ ImageClass class = IMAGE_SYSEXT;
int r;
assert(m);
+ r = path_extract_filename(mount_entry_source(m), &extension_name);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to extract extension name from %s: %m", mount_entry_source(m));
+
if (m->mode == EXTENSION_IMAGES) {
+ r = load_extension_release_pairs(mount_entry_source(m), IMAGE_SYSEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release);
+ if (r == -ENOENT) {
+ r = load_extension_release_pairs(mount_entry_source(m), IMAGE_CONFEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release);
+ if (r >= 0)
+ class = IMAGE_CONFEXT;
+ }
+ if (r == -ENOENT)
+ return r;
+
r = parse_os_release(
empty_to_root(root_directory),
"ID", &host_os_release_id,
"VERSION_ID", &host_os_release_version_id,
- "SYSEXT_LEVEL", &host_os_release_sysext_level,
+ image_class_info[class].level_env, &host_os_release_level,
NULL);
if (r < 0)
return log_debug_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory));
}
r = verity_dissect_and_mount(
- /* src_fd= */ -1, mount_entry_source(m), mount_entry_path(m), m->image_options,
- host_os_release_id, host_os_release_version_id, host_os_release_sysext_level, NULL);
+ /* src_fd= */ -1,
+ mount_entry_source(m),
+ mount_entry_path(m),
+ m->image_options,
+ image_policy,
+ host_os_release_id,
+ host_os_release_version_id,
+ host_os_release_level,
+ NULL);
if (r == -ENOENT && m->ignore)
return 0;
if (r == -ESTALE && host_os_release_id)
host_os_release_id,
host_os_release_version_id ? " VERSION_ID=" : "",
strempty(host_os_release_version_id),
- host_os_release_sysext_level ? " SYSEXT_LEVEL=" : "",
- strempty(host_os_release_sysext_level));
+ host_os_release_level ? image_class_info[class].level_env_print : "",
+ strempty(host_os_release_level));
if (r < 0)
return log_debug_errno(r, "Failed to mount image %s on %s: %m", mount_entry_source(m), mount_entry_path(m));
- return 1;
+ return 0;
}
static int mount_overlay(const MountEntry *m) {
r = mount_nofollow_verbose(LOG_DEBUG, "overlay", mount_entry_path(m), "overlay", MS_RDONLY, options);
if (r == -ENOENT && m->ignore)
return 0;
- if (r < 0)
- return r;
- return 1;
+ return r;
}
static int follow_symlink(
static int apply_one_mount(
const char *root_directory,
MountEntry *m,
- const NamespaceInfo *ns_info) {
+ const ImagePolicy *mount_image_policy,
+ const ImagePolicy *extension_image_policy,
+ const NamespaceInfo *ns_info,
+ RuntimeScope scope) {
_cleanup_free_ char *inaccessible = NULL;
bool rbind = true, make = false;
switch (m->mode) {
case INACCESSIBLE: {
- _cleanup_free_ char *tmp = NULL;
- const char *runtime_dir;
+ _cleanup_free_ char *runtime_dir = NULL;
struct stat target;
/* First, get rid of everything that is below if there
mount_entry_path(m));
}
- if (geteuid() == 0)
- runtime_dir = "/run";
- else {
- if (asprintf(&tmp, "/run/user/" UID_FMT, geteuid()) < 0)
- return -ENOMEM;
-
- runtime_dir = tmp;
- }
+ /* We don't pass the literal runtime scope through here but one based purely on our UID. This
+ * means that the root user's --user services will use the host's inaccessible inodes rather
+ * then root's private ones. This is preferable since it means device nodes that are
+ * overmounted to make them inaccessible will be overmounted with a device node, rather than
+ * an AF_UNIX socket inode. */
+ runtime_dir = settle_runtime_dir(geteuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER);
+ if (!runtime_dir)
+ return log_oom_debug();
r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible);
if (r < 0)
case EXTENSION_DIRECTORIES: {
_cleanup_free_ char *host_os_release_id = NULL, *host_os_release_version_id = NULL,
- *host_os_release_sysext_level = NULL, *extension_name = NULL;
+ *host_os_release_level = NULL, *extension_name = NULL;
_cleanup_strv_free_ char **extension_release = NULL;
+ ImageClass class = IMAGE_SYSEXT;
r = path_extract_filename(mount_entry_source(m), &extension_name);
if (r < 0)
return log_debug_errno(r, "Failed to extract extension name from %s: %m", mount_entry_source(m));
+ r = load_extension_release_pairs(mount_entry_source(m), IMAGE_SYSEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release);
+ if (r == -ENOENT) {
+ r = load_extension_release_pairs(mount_entry_source(m), IMAGE_CONFEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release);
+ if (r >= 0)
+ class = IMAGE_CONFEXT;
+ }
+ if (r == -ENOENT)
+ return r;
+
r = parse_os_release(
empty_to_root(root_directory),
"ID", &host_os_release_id,
"VERSION_ID", &host_os_release_version_id,
- "SYSEXT_LEVEL", &host_os_release_sysext_level,
+ image_class_info[class].level_env, &host_os_release_level,
NULL);
if (r < 0)
return log_debug_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory));
if (isempty(host_os_release_id))
return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'ID' field not found or empty in 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory));
- r = load_extension_release_pairs(mount_entry_source(m), IMAGE_SYSEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release);
+ r = load_extension_release_pairs(mount_entry_source(m), class, extension_name, /* relax_extension_release_check= */ false, &extension_release);
if (r == -ENOENT && m->ignore)
return 0;
if (r < 0)
extension_name,
host_os_release_id,
host_os_release_version_id,
- host_os_release_sysext_level,
- /* host_sysext_scope */ NULL, /* Leave empty, we need to accept both system and portable */
+ host_os_release_level,
+ /* host_extension_scope */ NULL, /* Leave empty, we need to accept both system and portable */
extension_release,
- IMAGE_SYSEXT);
+ class);
if (r == 0)
return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Directory %s extension-release metadata does not match the root's", extension_name);
if (r < 0)
break;
case PRIVATE_DEV:
- return mount_private_dev(m);
+ return mount_private_dev(m, scope);
case BIND_DEV:
return mount_bind_dev(m);
return mount_mqueuefs(m);
case MOUNT_IMAGES:
- return mount_image(m, NULL);
+ return mount_image(m, NULL, mount_image_policy);
case EXTENSION_IMAGES:
- return mount_image(m, root_directory);
+ return mount_image(m, root_directory, extension_image_policy);
case OVERLAY_MOUNT:
return mount_overlay(m);
const char *creds_path,
const char* log_namespace,
bool setup_propagate,
- const char* notify_socket) {
+ const char* notify_socket,
+ const char* host_os_release) {
size_t protect_home_cnt;
size_t protect_system_cnt =
!!log_namespace +
setup_propagate + /* /run/systemd/incoming */
!!notify_socket +
+ !!host_os_release +
ns_info->private_network + /* /sys */
ns_info->private_ipc; /* /dev/mqueue */
}
return 0;
}
+static void mount_entry_path_debug_string(const char *root, MountEntry *m, char **error_path) {
+ assert(m);
+
+ /* Create a string suitable for debugging logs, stripping for example the local working directory.
+ * For example, with a BindPaths=/var/bar that does not exist on the host:
+ *
+ * Before:
+ * foo.service: Failed to set up mount namespacing: /run/systemd/unit-root/var/bar: No such file or directory
+ * After:
+ * foo.service: Failed to set up mount namespacing: /var/bar: No such file or directory
+ *
+ * Note that this is an error path, so no OOM check is done on purpose. */
+
+ if (!error_path)
+ return;
+
+ if (!mount_entry_path(m)) {
+ *error_path = NULL;
+ return;
+ }
+
+ if (root) {
+ const char *e = startswith(mount_entry_path(m), root);
+ if (e) {
+ *error_path = strdup(e);
+ return;
+ }
+ }
+
+ *error_path = strdup(mount_entry_path(m));
+ return;
+}
+
static int apply_mounts(
const char *root,
+ const ImagePolicy *mount_image_policy,
+ const ImagePolicy *extension_image_policy,
const NamespaceInfo *ns_info,
MountEntry *mounts,
size_t *n_mounts,
- char **exec_dir_symlinks,
+ RuntimeScope scope,
+ char **symlinks,
char **error_path) {
_cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
/* ExtensionImages/Directories are first opened in the propagate directory, not in the root_directory */
r = follow_symlink(!IN_SET(m->mode, EXTENSION_IMAGES, EXTENSION_DIRECTORIES) ? root : NULL, m);
if (r < 0) {
- if (error_path && mount_entry_path(m))
- *error_path = strdup(mount_entry_path(m));
+ mount_entry_path_debug_string(root, m, error_path);
return r;
}
if (r == 0) {
break;
}
- r = apply_one_mount(root, m, ns_info);
+ r = apply_one_mount(root, m, mount_image_policy, extension_image_policy, ns_info, scope);
if (r < 0) {
- if (error_path && mount_entry_path(m))
- *error_path = strdup(mount_entry_path(m));
+ mount_entry_path_debug_string(root, m, error_path);
return r;
}
}
/* Now that all filesystems have been set up, but before the
- * read-only switches are flipped, create the exec dirs symlinks.
+ * read-only switches are flipped, create the exec dirs and other symlinks.
* Note that when /var/lib is not empty/tmpfs, these symlinks will already
* exist, which means this will be a no-op. */
- r = create_symlinks_from_tuples(root, exec_dir_symlinks);
+ r = create_symlinks_from_tuples(root, symlinks);
if (r < 0)
- return log_debug_errno(r, "Failed to set up ExecDirectories symlinks inside mount namespace: %m");
+ return log_debug_errno(r, "Failed to set up symlinks inside mount namespace: %m");
/* Create a deny list we can pass to bind_mount_recursive() */
deny_list = new(char*, (*n_mounts)+1);
for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) {
r = make_read_only(m, deny_list, proc_self_mountinfo);
if (r < 0) {
- if (error_path && mount_entry_path(m))
- *error_path = strdup(mount_entry_path(m));
+ mount_entry_path_debug_string(root, m, error_path);
return r;
}
}
for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) {
r = make_noexec(m, deny_list, proc_self_mountinfo);
if (r < 0) {
- if (error_path && mount_entry_path(m))
- *error_path = strdup(mount_entry_path(m));
+ mount_entry_path_debug_string(root, m, error_path);
return r;
}
}
for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) {
r = make_nosuid(m, proc_self_mountinfo);
if (r < 0) {
- if (error_path && mount_entry_path(m))
- *error_path = strdup(mount_entry_path(m));
+ mount_entry_path_debug_string(root, m, error_path);
return r;
}
}
return false;
}
-static int verity_settings_prepare(
- VeritySettings *verity,
- const char *root_image,
- const void *root_hash,
- size_t root_hash_size,
- const char *root_hash_path,
- const void *root_hash_sig,
- size_t root_hash_sig_size,
- const char *root_hash_sig_path,
- const char *verity_data_path) {
-
- int r;
-
- assert(verity);
-
- if (root_hash) {
- void *d;
-
- d = memdup(root_hash, root_hash_size);
- if (!d)
- return -ENOMEM;
-
- free_and_replace(verity->root_hash, d);
- verity->root_hash_size = root_hash_size;
- verity->designator = PARTITION_ROOT;
- }
-
- if (root_hash_sig) {
- void *d;
-
- d = memdup(root_hash_sig, root_hash_sig_size);
- if (!d)
- return -ENOMEM;
-
- free_and_replace(verity->root_hash_sig, d);
- verity->root_hash_sig_size = root_hash_sig_size;
- verity->designator = PARTITION_ROOT;
- }
-
- if (verity_data_path) {
- r = free_and_strdup(&verity->data_path, verity_data_path);
- if (r < 0)
- return r;
- }
-
- r = verity_settings_load(
- verity,
- root_image,
- root_hash_path,
- root_hash_sig_path);
- if (r < 0)
- return log_debug_errno(r, "Failed to load root hash: %m");
-
- return 0;
-}
-
int setup_namespace(
const char* root_directory,
const char* root_image,
- const MountOptions *root_image_options,
+ const MountOptions *root_image_mount_options,
+ const ImagePolicy *root_image_policy,
const NamespaceInfo *ns_info,
char** read_write_paths,
char** read_only_paths,
char** exec_paths,
char** no_exec_paths,
char** empty_directories,
- char** exec_dir_symlinks,
+ char** symlinks,
const BindMount *bind_mounts,
size_t n_bind_mounts,
const TemporaryFileSystem *temporary_filesystems,
size_t n_temporary_filesystems,
const MountImage *mount_images,
size_t n_mount_images,
+ const ImagePolicy *mount_image_policy,
const char* tmp_dir,
const char* var_tmp_dir,
const char *creds_path,
const char *log_namespace,
unsigned long mount_propagation_flag,
- const void *root_hash,
- size_t root_hash_size,
- const char *root_hash_path,
- const void *root_hash_sig,
- size_t root_hash_sig_size,
- const char *root_hash_sig_path,
- const char *verity_data_path,
+ VeritySettings *verity,
const MountImage *extension_images,
size_t n_extension_images,
+ const ImagePolicy *extension_image_policy,
char **extension_directories,
const char *propagate_dir,
const char *incoming_dir,
const char *extension_dir,
const char *notify_socket,
+ const char *host_os_release_stage,
+ RuntimeScope scope,
char **error_path) {
_cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL;
- _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT;
_cleanup_strv_free_ char **hierarchies = NULL;
MountEntry *m = NULL, *mounts = NULL;
bool require_prefix = false, setup_propagate = false;
strv_isempty(read_write_paths))
dissect_image_flags |= DISSECT_IMAGE_READ_ONLY;
- r = verity_settings_prepare(
- &verity,
- root_image,
- root_hash, root_hash_size, root_hash_path,
- root_hash_sig, root_hash_sig_size, root_hash_sig_path,
- verity_data_path);
- if (r < 0)
- return r;
-
- SET_FLAG(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE, verity.data_path);
+ SET_FLAG(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE, verity && verity->data_path);
r = loop_device_make_by_path(
root_image,
r = dissect_loop_device(
loop_device,
- &verity,
- root_image_options,
+ verity,
+ root_image_mount_options,
+ root_image_policy,
dissect_image_flags,
&dissected_image);
if (r < 0)
r = dissected_image_load_verity_sig_partition(
dissected_image,
loop_device->fd,
- &verity);
+ verity);
if (r < 0)
return r;
r = dissected_image_decrypt(
dissected_image,
NULL,
- &verity,
+ verity,
dissect_image_flags);
if (r < 0)
return log_debug_errno(r, "Failed to decrypt dissected image: %m");
* in the root. The temporary directory prevents any mounts from being potentially obscured
* my other mounts we already applied. We use the same mount point for all images, which is
* safe, since they all live in their own namespaces after all, and hence won't see each
- * other. */
+ * other. (Note: this directory is also created by PID 1 early on, we create it here for
+ * similar reasons as /run/systemd/ first.) */
+ root = "/run/systemd/mount-rootfs";
+ (void) mkdir_label(root, 0555);
- root = "/run/systemd/unit-root";
- (void) mkdir_label(root, 0700);
require_prefix = true;
}
if (n_extension_images > 0 || !strv_isempty(extension_directories)) {
- r = parse_env_extension_hierarchies(&hierarchies, "SYSTEMD_SYSEXT_HIERARCHIES");
+ /* Hierarchy population needs to be done for sysext and confext extension images */
+ r = parse_env_extension_hierarchies(&hierarchies, "SYSTEMD_SYSEXT_AND_CONFEXT_HIERARCHIES");
if (r < 0)
return r;
}
creds_path,
log_namespace,
setup_propagate,
- notify_socket);
+ notify_socket,
+ host_os_release_stage);
if (n_mounts > 0) {
m = mounts = new0(MountEntry, n_mounts);
.mode = BIND_MOUNT,
.read_only = true,
.source_const = creds_path,
+ .ignore = true,
};
} else {
/* If our service has no credentials store configured, then make the whole
.read_only = true,
};
+ if (host_os_release_stage)
+ *(m++) = (MountEntry) {
+ .path_const = "/run/host/.os-release-stage/",
+ .source_const = host_os_release_stage,
+ .mode = BIND_MOUNT,
+ .read_only = true,
+ .ignore = true, /* Live copy, don't hard-fail if it goes missing */
+ };
+
assert(mounts + n_mounts == m);
/* Prepend the root directory where that's necessary */
(void) base_filesystem_create(root, UID_INVALID, GID_INVALID);
/* Now make the magic happen */
- r = apply_mounts(root, ns_info, mounts, &n_mounts, exec_dir_symlinks, error_path);
+ r = apply_mounts(root,
+ mount_image_policy,
+ extension_image_policy,
+ ns_info,
+ mounts, &n_mounts,
+ scope,
+ symlinks,
+ error_path);
if (r < 0)
goto finish;