From fbc8d90bd5e65aae5eb9ed57ecaf76b97a7116cb Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sun, 25 Feb 2024 09:07:17 +0800 Subject: [PATCH] shared/mount-setup: split out mount_cgroup_legacy_controllers --- src/core/main.c | 14 ++- src/shared/mount-setup.c | 240 ++++++++++++++++++++------------------- src/shared/mount-setup.h | 2 +- 3 files changed, 132 insertions(+), 124 deletions(-) diff --git a/src/core/main.c b/src/core/main.c index f09f19458e9..20937e565f4 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -2290,12 +2290,6 @@ static int initialize_runtime( install_crash_handler(); if (!skip_setup) { - r = mount_cgroup_controllers(); - if (r < 0) { - *ret_error_message = "Failed to mount cgroup hierarchies"; - return r; - } - /* Pull credentials from various sources into a common credential directory (we do * this here, before setting up the machine ID, so that we can use credential info * for setting up the machine ID) */ @@ -3022,6 +3016,14 @@ int main(int argc, char *argv[]) { goto finish; } + if (!skip_setup) { + r = mount_cgroup_legacy_controllers(loaded_policy); + if (r < 0) { + error_message = "Failed to mount cgroup v1 hierarchy"; + goto finish; + } + } + /* The efivarfs is now mounted, let's lock down the system token. */ lock_down_efi_variables(); diff --git a/src/shared/mount-setup.c b/src/shared/mount-setup.c index 5dfb30ccd1b..03690518c03 100644 --- a/src/shared/mount-setup.c +++ b/src/shared/mount-setup.c @@ -107,16 +107,6 @@ static const MountPoint mount_table[] = { cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, { "cgroup2", "/sys/fs/cgroup", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_unified_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=0755" TMPFS_LIMITS_SYS_FS_CGROUP, MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, - cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, - { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_wanted, MNT_IN_CONTAINER }, - { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, #if ENABLE_PSTORE { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL, MNT_NONE }, @@ -296,81 +286,6 @@ static int symlink_controller(const char *target, const char *alias) { return 0; } -int mount_cgroup_controllers(void) { - _cleanup_set_free_ Set *controllers = NULL; - int r; - - if (!cg_is_legacy_wanted()) - return 0; - - /* Mount all available cgroup controllers that are built into the kernel. */ - r = cg_kernel_controllers(&controllers); - if (r < 0) - return log_error_errno(r, "Failed to enumerate cgroup controllers: %m"); - - for (;;) { - _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL; - const char *other_controller; - MountPoint p = { - .what = "cgroup", - .type = "cgroup", - .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV, - .mode = MNT_IN_CONTAINER, - }; - - controller = set_steal_first(controllers); - if (!controller) - break; - - /* Check if we shall mount this together with another controller */ - other_controller = join_with(controller); - if (other_controller) { - _cleanup_free_ char *c = NULL; - - /* Check if the other controller is actually available in the kernel too */ - c = set_remove(controllers, other_controller); - if (c) { - - /* Join the two controllers into one string, and maintain a stable ordering */ - if (strcmp(controller, other_controller) < 0) - options = strjoin(controller, ",", other_controller); - else - options = strjoin(other_controller, ",", controller); - if (!options) - return log_oom(); - } - } - - /* The simple case, where there's only one controller to mount together */ - if (!options) - options = TAKE_PTR(controller); - - where = path_join("/sys/fs/cgroup", options); - if (!where) - return log_oom(); - - p.where = where; - p.options = options; - - r = mount_one(&p, true); - if (r < 0) - return r; - - /* Create symlinks from the individual controller names, in case we have a joined mount */ - if (controller) - (void) symlink_controller(options, controller); - if (other_controller) - (void) symlink_controller(options, other_controller); - } - - /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */ - (void) mount_nofollow("tmpfs", "/sys/fs/cgroup", "tmpfs", - MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, - "mode=0755" TMPFS_LIMITS_SYS_FS_CGROUP); - - return 0; -} - #if HAVE_SELINUX || ENABLE_SMACK static int relabel_cb( RecurseDirEvent event, @@ -414,34 +329,6 @@ static int relabel_tree(const char *path) { return r; } -static int relabel_cgroup_filesystems(void) { - int r; - struct statfs st; - - r = cg_all_unified(); - if (r == 0) { - /* Temporarily remount the root cgroup filesystem to give it a proper label. Do this - only when the filesystem has been already populated by a previous instance of systemd - running from initrd. Otherwise don't remount anything and leave the filesystem read-write - for the cgroup filesystems to be mounted inside. */ - if (statfs("/sys/fs/cgroup", &st) < 0) - return log_error_errno(errno, "Failed to determine mount flags for /sys/fs/cgroup: %m"); - - if (st.f_flags & ST_RDONLY) - (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT, NULL); - - (void) label_fix("/sys/fs/cgroup", 0); - (void) relabel_tree("/sys/fs/cgroup"); - - if (st.f_flags & ST_RDONLY) - (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT|MS_RDONLY, NULL); - - } else if (r < 0) - return log_error_errno(r, "Failed to determine whether we are in all unified mode: %m"); - - return 0; -} - static int relabel_extra(void) { _cleanup_strv_free_ char **files = NULL; int r, c = 0; @@ -532,14 +419,12 @@ int mount_setup(bool loaded_policy, bool leave_propagation) { FOREACH_STRING(i, "/dev", "/dev/shm", "/run") (void) relabel_tree(i); - (void) relabel_cgroup_filesystems(); - n_extra = relabel_extra(); after_relabel = now(CLOCK_MONOTONIC); - log_info("Relabeled /dev, /dev/shm, /run, /sys/fs/cgroup%s in %s.", - n_extra > 0 ? ", additional files" : "", + log_info("Relabeled /dev/, /dev/shm/, /run/%s in %s.", + n_extra > 0 ? ", and additional files" : "", FORMAT_TIMESPAN(after_relabel - before_relabel, 0)); } #endif @@ -588,3 +473,124 @@ int mount_setup(bool loaded_policy, bool leave_propagation) { return 0; } + +static const MountPoint cgroupv1_mount_table[] = { + { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=0755" TMPFS_LIMITS_SYS_FS_CGROUP, MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, + cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, + { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", "nsdelegate", MS_NOSUID|MS_NOEXEC|MS_NODEV, + cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, + { "cgroup2", "/sys/fs/cgroup/unified", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + cg_is_hybrid_wanted, MNT_IN_CONTAINER|MNT_CHECK_WRITABLE }, + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, + cg_is_legacy_wanted, MNT_IN_CONTAINER }, + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, + cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, +}; + +static void relabel_cgroup_legacy_hierarchy(void) { +#if HAVE_SELINUX || ENABLE_SMACK + struct statfs st; + + assert(cg_is_legacy_wanted()); + + /* Temporarily remount the root cgroup filesystem to give it a proper label. Do this + only when the filesystem has been already populated by a previous instance of systemd + running from initrd. Otherwise don't remount anything and leave the filesystem read-write + for the cgroup filesystems to be mounted inside. */ + if (statfs("/sys/fs/cgroup", &st) < 0) + return (void) log_error_errno(errno, "Failed to determine mount flags for /sys/fs/cgroup/: %m"); + + if (st.f_flags & ST_RDONLY) + (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT, NULL); + + (void) label_fix("/sys/fs/cgroup", 0); + (void) relabel_tree("/sys/fs/cgroup"); + + if (st.f_flags & ST_RDONLY) + (void) mount_nofollow(NULL, "/sys/fs/cgroup", NULL, MS_REMOUNT|MS_RDONLY, NULL); +#endif +} + +int mount_cgroup_legacy_controllers(bool loaded_policy) { + _cleanup_set_free_ Set *controllers = NULL; + int r; + + if (!cg_is_legacy_wanted()) + return 0; + + FOREACH_ARRAY(mp, cgroupv1_mount_table, ELEMENTSOF(cgroupv1_mount_table)) { + r = mount_one(mp, loaded_policy); + if (r < 0) + return r; + } + + if (loaded_policy) + relabel_cgroup_legacy_hierarchy(); + + /* Mount all available cgroup controllers that are built into the kernel. */ + r = cg_kernel_controllers(&controllers); + if (r < 0) + return log_error_errno(r, "Failed to enumerate cgroup controllers: %m"); + + for (;;) { + _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL; + const char *other_controller; + MountPoint p = { + .what = "cgroup", + .type = "cgroup", + .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV, + .mode = MNT_IN_CONTAINER, + }; + + controller = set_steal_first(controllers); + if (!controller) + break; + + /* Check if we shall mount this together with another controller */ + other_controller = join_with(controller); + if (other_controller) { + _cleanup_free_ char *c = NULL; + + /* Check if the other controller is actually available in the kernel too */ + c = set_remove(controllers, other_controller); + if (c) { + + /* Join the two controllers into one string, and maintain a stable ordering */ + if (strcmp(controller, other_controller) < 0) + options = strjoin(controller, ",", other_controller); + else + options = strjoin(other_controller, ",", controller); + if (!options) + return log_oom(); + } + } + + /* The simple case, where there's only one controller to mount together */ + if (!options) + options = TAKE_PTR(controller); + + where = path_join("/sys/fs/cgroup", options); + if (!where) + return log_oom(); + + p.where = where; + p.options = options; + + r = mount_one(&p, true); + if (r < 0) + return r; + + /* Create symlinks from the individual controller names, in case we have a joined mount */ + if (controller) + (void) symlink_controller(options, controller); + if (other_controller) + (void) symlink_controller(options, other_controller); + } + + /* Now that we mounted everything, let's make the tmpfs the cgroup file systems are mounted into read-only. */ + (void) mount_nofollow("tmpfs", "/sys/fs/cgroup", "tmpfs", + MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, + "mode=0755" TMPFS_LIMITS_SYS_FS_CGROUP); + + return 1; +} diff --git a/src/shared/mount-setup.h b/src/shared/mount-setup.h index 584722b48ef..9fc49331d91 100644 --- a/src/shared/mount-setup.h +++ b/src/shared/mount-setup.h @@ -9,4 +9,4 @@ bool mount_point_ignore(const char *path); int mount_setup_early(void); int mount_setup(bool loaded_policy, bool leave_propagation); -int mount_cgroup_controllers(void); +int mount_cgroup_legacy_controllers(bool loaded_policy); -- 2.39.5