From: Mike Yuan Date: Sun, 16 Mar 2025 20:55:29 +0000 (+0100) Subject: core/namespace: stop applying mount options on private cgroupfs mount X-Git-Tag: v258-rc1~952^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1614d0c45190eed6cccae3b98856cefcd9f0bcbc;p=thirdparty%2Fsystemd.git core/namespace: stop applying mount options on private cgroupfs mount We always unshare cgroup ns for ProtectControlGroups=private/strict, while the mount options only apply to the cgroupfs instance in initial cgns (c.f. https://github.com/torvalds/linux/blob/b69bb476dee99d564d65d418e9a20acca6f32c3f/kernel/cgroup/cgroup.c#L1984) Hence let's drop the thing wholesale. Also, as noted in the comment already, mount_private_apivfs() internally enforces nosuid/noexec, so drop explicit flags too. --- diff --git a/src/core/namespace.c b/src/core/namespace.c index 56a3f93c3ea..aecc827797f 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -26,7 +26,6 @@ #include "loopback-setup.h" #include "missing_syscall.h" #include "mkdir-label.h" -#include "mount-setup.h" #include "mount-util.h" #include "mountpoint-util.h" #include "namespace-util.h" @@ -207,14 +206,14 @@ static const MountEntry protect_control_groups_yes_table[] = { }; /* ProtectControlGroups=private table. Note mount_private_apivfs() always use MS_NOSUID|MS_NOEXEC|MS_NODEV so - * flags is not set here. nsdelegate has been supported since kernels >= 4.13 so it is safe to use. */ + * flags is not set here. */ static const MountEntry protect_control_groups_private_table[] = { - { "/sys/fs/cgroup", MOUNT_PRIVATE_CGROUP2FS, false, .read_only = false, .nosuid = true, .noexec = true, .options_const = "nsdelegate" }, + { "/sys/fs/cgroup", MOUNT_PRIVATE_CGROUP2FS, false, .read_only = false }, }; /* ProtectControlGroups=strict table */ static const MountEntry protect_control_groups_strict_table[] = { - { "/sys/fs/cgroup", MOUNT_PRIVATE_CGROUP2FS, false, .read_only = true, .nosuid = true, .noexec = true, .options_const = "nsdelegate" }, + { "/sys/fs/cgroup", MOUNT_PRIVATE_CGROUP2FS, false, .read_only = true }, }; /* ProtectSystem=yes table */ @@ -338,7 +337,7 @@ static bool mount_entry_read_only(const MountEntry *p) { static bool mount_entry_noexec(const MountEntry *p) { assert(p); - return p->noexec || IN_SET(p->mode, MOUNT_NOEXEC, MOUNT_INACCESSIBLE, MOUNT_PRIVATE_SYSFS, MOUNT_BIND_SYSFS, MOUNT_PROCFS); + return p->noexec || IN_SET(p->mode, MOUNT_NOEXEC, MOUNT_INACCESSIBLE, MOUNT_PRIVATE_SYSFS, MOUNT_BIND_SYSFS, MOUNT_PROCFS, MOUNT_PRIVATE_CGROUP2FS); } static bool mount_entry_exec(const MountEntry *p) { @@ -1375,18 +1374,9 @@ static int mount_private_sysfs(const MountEntry *m, const NamespaceParameters *p } static int mount_private_cgroup2fs(const MountEntry *m, const NamespaceParameters *p) { - _cleanup_free_ char *opts = NULL; - assert(m); assert(p); - - if (cgroupfs_recursiveprot_supported()) { - opts = strextend_with_separator(NULL, ",", mount_entry_options(m) ?: POINTER_MAX, "memory_recursiveprot"); - if (!opts) - return -ENOMEM; - } - - return mount_private_apivfs("cgroup2", mount_entry_path(m), "/sys/fs/cgroup", opts ?: mount_entry_options(m), p->runtime_scope); + return mount_private_apivfs("cgroup2", mount_entry_path(m), "/sys/fs/cgroup", /* opts = */ NULL, p->runtime_scope); } static int mount_procfs(const MountEntry *m, const NamespaceParameters *p) { diff --git a/src/shared/mount-setup.c b/src/shared/mount-setup.c index db963df39e6..c628c879420 100644 --- a/src/shared/mount-setup.c +++ b/src/shared/mount-setup.c @@ -52,7 +52,7 @@ typedef struct MountPoint { MountMode mode; } MountPoint; -bool cgroupfs_recursiveprot_supported(void) { +static bool cgroupfs_recursiveprot_supported(void) { int r; /* Added in kernel 5.7 */ diff --git a/src/shared/mount-setup.h b/src/shared/mount-setup.h index c07fe86364d..34de1dad0be 100644 --- a/src/shared/mount-setup.h +++ b/src/shared/mount-setup.h @@ -8,5 +8,3 @@ bool mount_point_ignore(const char *path); int mount_setup_early(void); int mount_setup(bool loaded_policy, bool leave_propagation); - -bool cgroupfs_recursiveprot_supported(void);