X-Git-Url: http://git.ipfire.org/?a=blobdiff_plain;f=src%2Fcore%2Fnamespace.c;h=206453f30f9cf45c3f276a09fe14f04e1a4198ac;hb=1e5e902f60fdaf6c88f5ffb5c15b84f3e9afe60b;hp=a71beeb18bc123c46e83208c322ef086befc2cad;hpb=ddd43f31e3be45a276ec4191eace333f844b98b8;p=thirdparty%2Fsystemd.git diff --git a/src/core/namespace.c b/src/core/namespace.c index a71beeb18bc..206453f30f9 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -22,7 +22,7 @@ #include "fd-util.h" #include "format-util.h" #include "glyph-util.h" -#include "label.h" +#include "label-util.h" #include "list.h" #include "lock-util.h" #include "loop-util.h" @@ -138,9 +138,6 @@ static const MountEntry protect_kernel_tunables_sys_table[] = { /* ProtectKernelModules= option */ static const MountEntry protect_kernel_modules_table[] = { -#if HAVE_SPLIT_USR - { "/lib/modules", INACCESSIBLE, true }, -#endif { "/usr/lib/modules", INACCESSIBLE, true }, }; @@ -182,14 +179,6 @@ static const MountEntry protect_system_yes_table[] = { { "/usr", READONLY, false }, { "/boot", READONLY, true }, { "/efi", READONLY, true }, -#if HAVE_SPLIT_USR - { "/lib", READONLY, true }, - { "/lib64", READONLY, true }, - { "/bin", READONLY, true }, -# if HAVE_SPLIT_BIN - { "/sbin", READONLY, true }, -# endif -#endif }; /* ProtectSystem=full includes ProtectSystem=yes */ @@ -198,14 +187,6 @@ static const MountEntry protect_system_full_table[] = { { "/boot", READONLY, true }, { "/efi", READONLY, true }, { "/etc", READONLY, false }, -#if HAVE_SPLIT_USR - { "/lib", READONLY, true }, - { "/lib64", READONLY, true }, - { "/bin", READONLY, true }, -# if HAVE_SPLIT_BIN - { "/sbin", READONLY, true }, -# endif -#endif }; /* @@ -227,25 +208,44 @@ static const MountEntry protect_system_strict_table[] = { }; static const char * const mount_mode_table[_MOUNT_MODE_MAX] = { - [INACCESSIBLE] = "inaccessible", - [OVERLAY_MOUNT] = "overlay", - [BIND_MOUNT] = "bind", - [BIND_MOUNT_RECURSIVE] = "rbind", - [PRIVATE_TMP] = "private-tmp", - [PRIVATE_DEV] = "private-dev", - [BIND_DEV] = "bind-dev", - [EMPTY_DIR] = "empty", - [PRIVATE_SYSFS] = "private-sysfs", - [BIND_SYSFS] = "bind-sysfs", - [PROCFS] = "procfs", - [READONLY] = "read-only", - [READWRITE] = "read-write", - [TMPFS] = "tmpfs", - [MOUNT_IMAGES] = "mount-images", - [READWRITE_IMPLICIT] = "rw-implicit", - [EXEC] = "exec", - [NOEXEC] = "noexec", - [MQUEUEFS] = "mqueuefs", + [INACCESSIBLE] = "inaccessible", + [OVERLAY_MOUNT] = "overlay", + [MOUNT_IMAGES] = "mount-images", + [BIND_MOUNT] = "bind", + [BIND_MOUNT_RECURSIVE] = "rbind", + [PRIVATE_TMP] = "private-tmp", + [PRIVATE_TMP_READONLY] = "private-tmp-read-only", + [PRIVATE_DEV] = "private-dev", + [BIND_DEV] = "bind-dev", + [EMPTY_DIR] = "empty", + [PRIVATE_SYSFS] = "private-sysfs", + [BIND_SYSFS] = "bind-sysfs", + [PROCFS] = "procfs", + [READONLY] = "read-only", + [READWRITE] = "read-write", + [NOEXEC] = "noexec", + [EXEC] = "exec", + [TMPFS] = "tmpfs", + [RUN] = "run", + [EXTENSION_DIRECTORIES] = "extension-directories", + [EXTENSION_IMAGES] = "extension-images", + [MQUEUEFS] = "mqueuefs", + [READWRITE_IMPLICIT] = "read-write-implicit", +}; + +/* Helper struct for naming simplicity and reusability */ +static const struct { + const char *level_env; + const char *level_env_print; +} image_class_info[_IMAGE_CLASS_MAX] = { + [IMAGE_SYSEXT] = { + .level_env = "SYSEXT_LEVEL", + .level_env_print = " SYSEXT_LEVEL=", + }, + [IMAGE_CONFEXT] = { + .level_env = "CONFEXT_LEVEL", + .level_env_print = " CONFEXT_LEVEL=", + } }; DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(mount_mode, MountMode); @@ -536,7 +536,7 @@ static int append_extensions( *((*p)++) = (MountEntry) { .path_malloc = TAKE_PTR(mount_point), - .source_const = TAKE_PTR(source), + .source_malloc = TAKE_PTR(source), .mode = EXTENSION_DIRECTORIES, .ignore = ignore_enoent, .has_prefix = true, @@ -909,7 +909,19 @@ add_symlink: return 0; } -static int mount_private_dev(MountEntry *m) { +static char *settle_runtime_dir(RuntimeScope scope) { + char *runtime_dir; + + if (scope != RUNTIME_SCOPE_USER) + return strdup("/run/"); + + if (asprintf(&runtime_dir, "/run/user/" UID_FMT, geteuid()) < 0) + return NULL; + + return runtime_dir; +} + +static int mount_private_dev(MountEntry *m, RuntimeScope scope) { static const char devnodes[] = "/dev/null\0" "/dev/zero\0" @@ -918,13 +930,21 @@ static int mount_private_dev(MountEntry *m) { "/dev/urandom\0" "/dev/tty\0"; - char temporary_mount[] = "/tmp/namespace-dev-XXXXXX"; + _cleanup_free_ char *runtime_dir = NULL, *temporary_mount = NULL; const char *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL; bool can_mknod = true; int r; assert(m); + runtime_dir = settle_runtime_dir(scope); + if (!runtime_dir) + return log_oom_debug(); + + temporary_mount = path_join(runtime_dir, "systemd/namespace-dev-XXXXXX"); + if (!temporary_mount) + return log_oom_debug(); + if (!mkdtemp(temporary_mount)) return log_debug_errno(errno, "Failed to create temporary directory '%s': %m", temporary_mount); @@ -995,6 +1015,11 @@ static int mount_private_dev(MountEntry *m) { if (r < 0) log_debug_errno(r, "Failed to set up basic device tree at '%s', ignoring: %m", temporary_mount); + /* Make the bind mount read-only. */ + r = mount_nofollow_verbose(LOG_DEBUG, NULL, dev, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL); + if (r < 0) + return r; + /* Create the /dev directory if missing. It is more likely to be missing when the service is started * with RootDirectory. This is consistent with mount units creating the mount points when missing. */ (void) mkdir_p_label(mount_entry_path(m), 0755); @@ -1049,34 +1074,7 @@ static int mount_bind_dev(const MountEntry *m) { if (r > 0) /* make this a NOP if /dev is already a mount point */ return 0; - r = mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL); - if (r < 0) - return r; - - return 1; -} - -static int mount_private_sysfs(const MountEntry *m) { - const char *p = mount_entry_path(ASSERT_PTR(m)); - int r; - - (void) mkdir_p_label(p, 0755); - - r = remount_sysfs(p); - if (r < 0 && (ERRNO_IS_PRIVILEGE(r) || ERRNO_IS_NOT_SUPPORTED(r))) { - /* Running with an unprivileged user (PrivateUsers=yes), or the kernel seems old. Falling - * back to bind mount the host's version so that we get all child mounts of it, too. */ - - log_debug_errno(r, "Failed to remount sysfs on %s, falling back to bind mount: %m", p); - - (void) umount_recursive(p, 0); - - r = mount_nofollow_verbose(LOG_DEBUG, "/sys", p, NULL, MS_BIND|MS_REC, NULL); - } - if (r < 0) - return log_debug_errno(r, "Failed to remount sysfs on %s: %m", p); - - return 1; + return mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL); } static int mount_bind_sysfs(const MountEntry *m) { @@ -1093,11 +1091,34 @@ static int mount_bind_sysfs(const MountEntry *m) { return 0; /* Bind mount the host's version so that we get all child mounts of it, too. */ - r = mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL); - if (r < 0) + return mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL); +} + +static int mount_private_sysfs(const MountEntry *m) { + const char *entry_path = mount_entry_path(ASSERT_PTR(m)); + int r, n; + + (void) mkdir_p_label(entry_path, 0755); + + n = umount_recursive(entry_path, 0); + + r = mount_nofollow_verbose(LOG_DEBUG, "sysfs", entry_path, "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); + if (ERRNO_IS_NEG_PRIVILEGE(r)) { + /* When we do not have enough privileges to mount sysfs, fall back to use existing /sys. */ + + if (n > 0) + /* /sys or some of sub-mounts are umounted in the above. Refuse incomplete tree. + * Propagate the original error code returned by mount() in the above. */ + return r; + + return mount_bind_sysfs(m); + + } else if (r < 0) return r; - return 1; + /* We mounted a new instance now. Let's bind mount the children over now. */ + (void) bind_mount_submounts("/sys", entry_path); + return 0; } static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) { @@ -1156,30 +1177,32 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) { * means we really don't want to use it, since it would affect our host's /proc * mount. Hence let's gracefully fallback to a classic, unrestricted version. */ r = mount_nofollow_verbose(LOG_DEBUG, "proc", entry_path, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL); - if (r == -EPERM) { - /* When we do not have enough privileges to mount /proc, fallback to use existing /proc. */ + if (ERRNO_IS_NEG_PRIVILEGE(r)) { + /* When we do not have enough privileges to mount /proc, fall back to use existing /proc. */ if (n > 0) /* /proc or some of sub-mounts are umounted in the above. Refuse incomplete tree. * Propagate the original error code returned by mount() in the above. */ - return -EPERM; + return r; r = path_is_mount_point(entry_path, NULL, 0); if (r < 0) return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m"); - if (r == 0) { - /* We lack permissions to mount a new instance of /proc, and it is not already - * mounted. But we can access the host's, so as a final fallback bind-mount it to - * the destination, as most likely we are inside a user manager in an unprivileged - * user namespace. */ - r = mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL); - if (r < 0) - return -EPERM; - } + if (r > 0) + return 0; + + /* We lack permissions to mount a new instance of /proc, and it is not already mounted. But + * we can access the host's, so as a final fallback bind-mount it to the destination, as most + * likely we are inside a user manager in an unprivileged user namespace. */ + return mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL); + } else if (r < 0) return r; - return 1; + /* We mounted a new instance now. Let's bind mount the children over now. This matters for nspawn + * where a bunch of files are overmounted, in particular the boot id */ + (void) bind_mount_submounts("/proc", entry_path); + return 0; } static int mount_tmpfs(const MountEntry *m) { @@ -1205,7 +1228,7 @@ static int mount_tmpfs(const MountEntry *m) { if (r < 0) return log_debug_errno(r, "Failed to fix label of '%s' as '%s': %m", entry_path, inner_path); - return 1; + return 0; } static int mount_run(const MountEntry *m) { @@ -1240,20 +1263,38 @@ static int mount_mqueuefs(const MountEntry *m) { return 0; } -static int mount_image(const MountEntry *m, const char *root_directory) { +static int mount_image( + const MountEntry *m, + const char *root_directory, + const ImagePolicy *image_policy) { _cleanup_free_ char *host_os_release_id = NULL, *host_os_release_version_id = NULL, - *host_os_release_sysext_level = NULL; + *host_os_release_level = NULL, *extension_name = NULL; + _cleanup_strv_free_ char **extension_release = NULL; + ImageClass class = IMAGE_SYSEXT; int r; assert(m); + r = path_extract_filename(mount_entry_source(m), &extension_name); + if (r < 0) + return log_debug_errno(r, "Failed to extract extension name from %s: %m", mount_entry_source(m)); + if (m->mode == EXTENSION_IMAGES) { + r = load_extension_release_pairs(mount_entry_source(m), IMAGE_SYSEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release); + if (r == -ENOENT) { + r = load_extension_release_pairs(mount_entry_source(m), IMAGE_CONFEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release); + if (r >= 0) + class = IMAGE_CONFEXT; + } + if (r == -ENOENT) + return r; + r = parse_os_release( empty_to_root(root_directory), "ID", &host_os_release_id, "VERSION_ID", &host_os_release_version_id, - "SYSEXT_LEVEL", &host_os_release_sysext_level, + image_class_info[class].level_env, &host_os_release_level, NULL); if (r < 0) return log_debug_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory)); @@ -1262,8 +1303,15 @@ static int mount_image(const MountEntry *m, const char *root_directory) { } r = verity_dissect_and_mount( - /* src_fd= */ -1, mount_entry_source(m), mount_entry_path(m), m->image_options, - host_os_release_id, host_os_release_version_id, host_os_release_sysext_level, NULL); + /* src_fd= */ -1, + mount_entry_source(m), + mount_entry_path(m), + m->image_options, + image_policy, + host_os_release_id, + host_os_release_version_id, + host_os_release_level, + NULL); if (r == -ENOENT && m->ignore) return 0; if (r == -ESTALE && host_os_release_id) @@ -1273,12 +1321,12 @@ static int mount_image(const MountEntry *m, const char *root_directory) { host_os_release_id, host_os_release_version_id ? " VERSION_ID=" : "", strempty(host_os_release_version_id), - host_os_release_sysext_level ? " SYSEXT_LEVEL=" : "", - strempty(host_os_release_sysext_level)); + host_os_release_level ? image_class_info[class].level_env_print : "", + strempty(host_os_release_level)); if (r < 0) return log_debug_errno(r, "Failed to mount image %s on %s: %m", mount_entry_source(m), mount_entry_path(m)); - return 1; + return 0; } static int mount_overlay(const MountEntry *m) { @@ -1294,10 +1342,8 @@ static int mount_overlay(const MountEntry *m) { r = mount_nofollow_verbose(LOG_DEBUG, "overlay", mount_entry_path(m), "overlay", MS_RDONLY, options); if (r == -ENOENT && m->ignore) return 0; - if (r < 0) - return r; - return 1; + return r; } static int follow_symlink( @@ -1336,7 +1382,10 @@ static int follow_symlink( static int apply_one_mount( const char *root_directory, MountEntry *m, - const NamespaceInfo *ns_info) { + const ImagePolicy *mount_image_policy, + const ImagePolicy *extension_image_policy, + const NamespaceInfo *ns_info, + RuntimeScope scope) { _cleanup_free_ char *inaccessible = NULL; bool rbind = true, make = false; @@ -1351,8 +1400,7 @@ static int apply_one_mount( switch (m->mode) { case INACCESSIBLE: { - _cleanup_free_ char *tmp = NULL; - const char *runtime_dir; + _cleanup_free_ char *runtime_dir = NULL; struct stat target; /* First, get rid of everything that is below if there @@ -1368,14 +1416,14 @@ static int apply_one_mount( mount_entry_path(m)); } - if (geteuid() == 0) - runtime_dir = "/run"; - else { - if (asprintf(&tmp, "/run/user/" UID_FMT, geteuid()) < 0) - return -ENOMEM; - - runtime_dir = tmp; - } + /* We don't pass the literal runtime scope through here but one based purely on our UID. This + * means that the root user's --user services will use the host's inaccessible inodes rather + * then root's private ones. This is preferable since it means device nodes that are + * overmounted to make them inaccessible will be overmounted with a device node, rather than + * an AF_UNIX socket inode. */ + runtime_dir = settle_runtime_dir(geteuid() == 0 ? RUNTIME_SCOPE_SYSTEM : RUNTIME_SCOPE_USER); + if (!runtime_dir) + return log_oom_debug(); r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible); if (r < 0) @@ -1405,25 +1453,35 @@ static int apply_one_mount( case EXTENSION_DIRECTORIES: { _cleanup_free_ char *host_os_release_id = NULL, *host_os_release_version_id = NULL, - *host_os_release_sysext_level = NULL, *extension_name = NULL; + *host_os_release_level = NULL, *extension_name = NULL; _cleanup_strv_free_ char **extension_release = NULL; + ImageClass class = IMAGE_SYSEXT; r = path_extract_filename(mount_entry_source(m), &extension_name); if (r < 0) return log_debug_errno(r, "Failed to extract extension name from %s: %m", mount_entry_source(m)); + r = load_extension_release_pairs(mount_entry_source(m), IMAGE_SYSEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release); + if (r == -ENOENT) { + r = load_extension_release_pairs(mount_entry_source(m), IMAGE_CONFEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release); + if (r >= 0) + class = IMAGE_CONFEXT; + } + if (r == -ENOENT) + return r; + r = parse_os_release( empty_to_root(root_directory), "ID", &host_os_release_id, "VERSION_ID", &host_os_release_version_id, - "SYSEXT_LEVEL", &host_os_release_sysext_level, + image_class_info[class].level_env, &host_os_release_level, NULL); if (r < 0) return log_debug_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory)); if (isempty(host_os_release_id)) return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "'ID' field not found or empty in 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory)); - r = load_extension_release_pairs(mount_entry_source(m), IMAGE_SYSEXT, extension_name, /* relax_extension_release_check= */ false, &extension_release); + r = load_extension_release_pairs(mount_entry_source(m), class, extension_name, /* relax_extension_release_check= */ false, &extension_release); if (r == -ENOENT && m->ignore) return 0; if (r < 0) @@ -1433,10 +1491,10 @@ static int apply_one_mount( extension_name, host_os_release_id, host_os_release_version_id, - host_os_release_sysext_level, - /* host_sysext_scope */ NULL, /* Leave empty, we need to accept both system and portable */ + host_os_release_level, + /* host_extension_scope */ NULL, /* Leave empty, we need to accept both system and portable */ extension_release, - IMAGE_SYSEXT); + class); if (r == 0) return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Directory %s extension-release metadata does not match the root's", extension_name); if (r < 0) @@ -1485,7 +1543,7 @@ static int apply_one_mount( break; case PRIVATE_DEV: - return mount_private_dev(m); + return mount_private_dev(m, scope); case BIND_DEV: return mount_bind_dev(m); @@ -1506,10 +1564,10 @@ static int apply_one_mount( return mount_mqueuefs(m); case MOUNT_IMAGES: - return mount_image(m, NULL); + return mount_image(m, NULL, mount_image_policy); case EXTENSION_IMAGES: - return mount_image(m, root_directory); + return mount_image(m, root_directory, extension_image_policy); case OVERLAY_MOUNT: return mount_overlay(m); @@ -1685,7 +1743,8 @@ static size_t namespace_calculate_mounts( const char *creds_path, const char* log_namespace, bool setup_propagate, - const char* notify_socket) { + const char* notify_socket, + const char* host_os_release) { size_t protect_home_cnt; size_t protect_system_cnt = @@ -1730,6 +1789,7 @@ static size_t namespace_calculate_mounts( !!log_namespace + setup_propagate + /* /run/systemd/incoming */ !!notify_socket + + !!host_os_release + ns_info->private_network + /* /sys */ ns_info->private_ipc; /* /dev/mqueue */ } @@ -1777,12 +1837,48 @@ static int create_symlinks_from_tuples(const char *root, char **strv_symlinks) { return 0; } +static void mount_entry_path_debug_string(const char *root, MountEntry *m, char **error_path) { + assert(m); + + /* Create a string suitable for debugging logs, stripping for example the local working directory. + * For example, with a BindPaths=/var/bar that does not exist on the host: + * + * Before: + * foo.service: Failed to set up mount namespacing: /run/systemd/unit-root/var/bar: No such file or directory + * After: + * foo.service: Failed to set up mount namespacing: /var/bar: No such file or directory + * + * Note that this is an error path, so no OOM check is done on purpose. */ + + if (!error_path) + return; + + if (!mount_entry_path(m)) { + *error_path = NULL; + return; + } + + if (root) { + const char *e = startswith(mount_entry_path(m), root); + if (e) { + *error_path = strdup(e); + return; + } + } + + *error_path = strdup(mount_entry_path(m)); + return; +} + static int apply_mounts( const char *root, + const ImagePolicy *mount_image_policy, + const ImagePolicy *extension_image_policy, const NamespaceInfo *ns_info, MountEntry *mounts, size_t *n_mounts, - char **exec_dir_symlinks, + RuntimeScope scope, + char **symlinks, char **error_path) { _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; @@ -1820,8 +1916,7 @@ static int apply_mounts( /* ExtensionImages/Directories are first opened in the propagate directory, not in the root_directory */ r = follow_symlink(!IN_SET(m->mode, EXTENSION_IMAGES, EXTENSION_DIRECTORIES) ? root : NULL, m); if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); + mount_entry_path_debug_string(root, m, error_path); return r; } if (r == 0) { @@ -1833,10 +1928,9 @@ static int apply_mounts( break; } - r = apply_one_mount(root, m, ns_info); + r = apply_one_mount(root, m, mount_image_policy, extension_image_policy, ns_info, scope); if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); + mount_entry_path_debug_string(root, m, error_path); return r; } @@ -1850,12 +1944,12 @@ static int apply_mounts( } /* Now that all filesystems have been set up, but before the - * read-only switches are flipped, create the exec dirs symlinks. + * read-only switches are flipped, create the exec dirs and other symlinks. * Note that when /var/lib is not empty/tmpfs, these symlinks will already * exist, which means this will be a no-op. */ - r = create_symlinks_from_tuples(root, exec_dir_symlinks); + r = create_symlinks_from_tuples(root, symlinks); if (r < 0) - return log_debug_errno(r, "Failed to set up ExecDirectories symlinks inside mount namespace: %m"); + return log_debug_errno(r, "Failed to set up symlinks inside mount namespace: %m"); /* Create a deny list we can pass to bind_mount_recursive() */ deny_list = new(char*, (*n_mounts)+1); @@ -1869,8 +1963,7 @@ static int apply_mounts( for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) { r = make_read_only(m, deny_list, proc_self_mountinfo); if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); + mount_entry_path_debug_string(root, m, error_path); return r; } } @@ -1884,8 +1977,7 @@ static int apply_mounts( for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) { r = make_noexec(m, deny_list, proc_self_mountinfo); if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); + mount_entry_path_debug_string(root, m, error_path); return r; } } @@ -1895,8 +1987,7 @@ static int apply_mounts( for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) { r = make_nosuid(m, proc_self_mountinfo); if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); + mount_entry_path_debug_string(root, m, error_path); return r; } } @@ -1953,66 +2044,11 @@ static bool home_read_only( return false; } -static int verity_settings_prepare( - VeritySettings *verity, - const char *root_image, - const void *root_hash, - size_t root_hash_size, - const char *root_hash_path, - const void *root_hash_sig, - size_t root_hash_sig_size, - const char *root_hash_sig_path, - const char *verity_data_path) { - - int r; - - assert(verity); - - if (root_hash) { - void *d; - - d = memdup(root_hash, root_hash_size); - if (!d) - return -ENOMEM; - - free_and_replace(verity->root_hash, d); - verity->root_hash_size = root_hash_size; - verity->designator = PARTITION_ROOT; - } - - if (root_hash_sig) { - void *d; - - d = memdup(root_hash_sig, root_hash_sig_size); - if (!d) - return -ENOMEM; - - free_and_replace(verity->root_hash_sig, d); - verity->root_hash_sig_size = root_hash_sig_size; - verity->designator = PARTITION_ROOT; - } - - if (verity_data_path) { - r = free_and_strdup(&verity->data_path, verity_data_path); - if (r < 0) - return r; - } - - r = verity_settings_load( - verity, - root_image, - root_hash_path, - root_hash_sig_path); - if (r < 0) - return log_debug_errno(r, "Failed to load root hash: %m"); - - return 0; -} - int setup_namespace( const char* root_directory, const char* root_image, - const MountOptions *root_image_options, + const MountOptions *root_image_mount_options, + const ImagePolicy *root_image_policy, const NamespaceInfo *ns_info, char** read_write_paths, char** read_only_paths, @@ -2020,37 +2056,34 @@ int setup_namespace( char** exec_paths, char** no_exec_paths, char** empty_directories, - char** exec_dir_symlinks, + char** symlinks, const BindMount *bind_mounts, size_t n_bind_mounts, const TemporaryFileSystem *temporary_filesystems, size_t n_temporary_filesystems, const MountImage *mount_images, size_t n_mount_images, + const ImagePolicy *mount_image_policy, const char* tmp_dir, const char* var_tmp_dir, const char *creds_path, const char *log_namespace, unsigned long mount_propagation_flag, - const void *root_hash, - size_t root_hash_size, - const char *root_hash_path, - const void *root_hash_sig, - size_t root_hash_sig_size, - const char *root_hash_sig_path, - const char *verity_data_path, + VeritySettings *verity, const MountImage *extension_images, size_t n_extension_images, + const ImagePolicy *extension_image_policy, char **extension_directories, const char *propagate_dir, const char *incoming_dir, const char *extension_dir, const char *notify_socket, + const char *host_os_release_stage, + RuntimeScope scope, char **error_path) { _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; - _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT; _cleanup_strv_free_ char **hierarchies = NULL; MountEntry *m = NULL, *mounts = NULL; bool require_prefix = false, setup_propagate = false; @@ -2090,16 +2123,7 @@ int setup_namespace( strv_isempty(read_write_paths)) dissect_image_flags |= DISSECT_IMAGE_READ_ONLY; - r = verity_settings_prepare( - &verity, - root_image, - root_hash, root_hash_size, root_hash_path, - root_hash_sig, root_hash_sig_size, root_hash_sig_path, - verity_data_path); - if (r < 0) - return r; - - SET_FLAG(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE, verity.data_path); + SET_FLAG(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE, verity && verity->data_path); r = loop_device_make_by_path( root_image, @@ -2113,8 +2137,9 @@ int setup_namespace( r = dissect_loop_device( loop_device, - &verity, - root_image_options, + verity, + root_image_mount_options, + root_image_policy, dissect_image_flags, &dissected_image); if (r < 0) @@ -2123,14 +2148,14 @@ int setup_namespace( r = dissected_image_load_verity_sig_partition( dissected_image, loop_device->fd, - &verity); + verity); if (r < 0) return r; r = dissected_image_decrypt( dissected_image, NULL, - &verity, + verity, dissect_image_flags); if (r < 0) return log_debug_errno(r, "Failed to decrypt dissected image: %m"); @@ -2148,15 +2173,17 @@ int setup_namespace( * in the root. The temporary directory prevents any mounts from being potentially obscured * my other mounts we already applied. We use the same mount point for all images, which is * safe, since they all live in their own namespaces after all, and hence won't see each - * other. */ + * other. (Note: this directory is also created by PID 1 early on, we create it here for + * similar reasons as /run/systemd/ first.) */ + root = "/run/systemd/mount-rootfs"; + (void) mkdir_label(root, 0555); - root = "/run/systemd/unit-root"; - (void) mkdir_label(root, 0700); require_prefix = true; } if (n_extension_images > 0 || !strv_isempty(extension_directories)) { - r = parse_env_extension_hierarchies(&hierarchies, "SYSTEMD_SYSEXT_HIERARCHIES"); + /* Hierarchy population needs to be done for sysext and confext extension images */ + r = parse_env_extension_hierarchies(&hierarchies, "SYSTEMD_SYSEXT_AND_CONFEXT_HIERARCHIES"); if (r < 0) return r; } @@ -2179,7 +2206,8 @@ int setup_namespace( creds_path, log_namespace, setup_propagate, - notify_socket); + notify_socket, + host_os_release_stage); if (n_mounts > 0) { m = mounts = new0(MountEntry, n_mounts); @@ -2367,6 +2395,7 @@ int setup_namespace( .mode = BIND_MOUNT, .read_only = true, .source_const = creds_path, + .ignore = true, }; } else { /* If our service has no credentials store configured, then make the whole @@ -2413,6 +2442,15 @@ int setup_namespace( .read_only = true, }; + if (host_os_release_stage) + *(m++) = (MountEntry) { + .path_const = "/run/host/.os-release-stage/", + .source_const = host_os_release_stage, + .mode = BIND_MOUNT, + .read_only = true, + .ignore = true, /* Live copy, don't hard-fail if it goes missing */ + }; + assert(mounts + n_mounts == m); /* Prepend the root directory where that's necessary */ @@ -2502,7 +2540,14 @@ int setup_namespace( (void) base_filesystem_create(root, UID_INVALID, GID_INVALID); /* Now make the magic happen */ - r = apply_mounts(root, ns_info, mounts, &n_mounts, exec_dir_symlinks, error_path); + r = apply_mounts(root, + mount_image_policy, + extension_image_policy, + ns_info, + mounts, &n_mounts, + scope, + symlinks, + error_path); if (r < 0) goto finish;