From 720f0a2f3c928cc9379501a52146be9fbb4d9be2 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 2 May 2018 14:24:54 +0200 Subject: [PATCH] nspawn: move nspawn cgroup hierarchy one level down unconditionally We need to do this in all cases, including on cgroupsv1 in order to ensure the host systemd and any systemd in the payload won't fight for the cgroup attributes of the top-level cgroup of the payload. This is because systemd for Delegate=yes units will only delegate the right to create children as well as their attributes. However, nspawn expects that the cgroup delegated covers both the right to create children and the attributes of the cgroup itself. Hence, to clear this up, let's unconditionally insert a intermediary cgroup, on cgroupsv1 as well as cgroupsv2, unconditionally. This is also nice as it reduces the differences in the various setups and exposes very close behaviour everywhere. --- src/nspawn/nspawn-cgroup.c | 59 ++++++++++++++++++++++---------------- src/nspawn/nspawn-cgroup.h | 2 +- src/nspawn/nspawn.c | 8 ++---- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 682ea65080c..761d737dc9d 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -141,44 +141,53 @@ finish: return r; } -int create_subcgroup(pid_t pid, CGroupUnified unified_requested) { +int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested) { _cleanup_free_ char *cgroup = NULL; - const char *child; - int r; CGroupMask supported; + const char *payload; + int r; - /* In the unified hierarchy inner nodes may only contain - * subgroups, but not processes. Hence, if we running in the - * unified hierarchy and the container does the same, and we - * did not create a scope unit for the container move us and - * the container into two separate subcgroups. */ - - if (unified_requested == CGROUP_UNIFIED_NONE) - return 0; - - r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER); - if (r < 0) - return log_error_errno(r, "Failed to determine whether the systemd controller is unified: %m"); - if (r == 0) - return 0; + assert(pid > 1); + + /* In the unified hierarchy inner nodes may only contain subgroups, but not processes. Hence, if we running in + * the unified hierarchy and the container does the same, and we did not create a scope unit for the container + * move us and the container into two separate subcgroups. + * + * Moreover, container payloads such as systemd try to manage the cgroup they run in in full (i.e. including + * its attributes), while the host systemd will only delegate cgroups for children of the cgroup created for a + * delegation unit, instead of the cgroup itself. This means, if we'd pass on the cgroup allocated from the + * host systemd directly to the payload, the host and payload systemd might fight for the cgroup + * attributes. Hence, let's insert an intermediary cgroup to cover that case too. + * + * Note that we only bother with the main hierarchy here, not with any secondary ones. On the unified setup + * that's fine because there's only one hiearchy anyway and controllers are enabled directly on it. On the + * legacy setup, this is fine too, since delegation of controllers is generally not safe there, hence we won't + * do it. */ r = cg_mask_supported(&supported); if (r < 0) return log_error_errno(r, "Failed to determine supported controllers: %m"); - r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); + if (keep_unit) + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup); + else + r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup); if (r < 0) return log_error_errno(r, "Failed to get our control group: %m"); - child = strjoina(cgroup, "/payload"); - r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, pid); + payload = strjoina(cgroup, "/payload"); + r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, payload, pid); if (r < 0) - return log_error_errno(r, "Failed to create %s subcgroup: %m", child); + return log_error_errno(r, "Failed to create %s subcgroup: %m", payload); - child = strjoina(cgroup, "/supervisor"); - r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, 0); - if (r < 0) - return log_error_errno(r, "Failed to create %s subcgroup: %m", child); + if (keep_unit) { + const char *supervisor; + + supervisor = strjoina(cgroup, "/supervisor"); + r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, supervisor, 0); + if (r < 0) + return log_error_errno(r, "Failed to create %s subcgroup: %m", supervisor); + } /* Try to enable as many controllers as possible for the new payload. */ (void) cg_enable_everywhere(supported, supported, cgroup); diff --git a/src/nspawn/nspawn-cgroup.h b/src/nspawn/nspawn-cgroup.h index 3a8e98e1221..7639b483ae7 100644 --- a/src/nspawn/nspawn-cgroup.h +++ b/src/nspawn/nspawn-cgroup.h @@ -14,4 +14,4 @@ int chown_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift); int sync_cgroup(pid_t pid, CGroupUnified unified_requested, uid_t uid_shift); -int create_subcgroup(pid_t pid, CGroupUnified unified_requested); +int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 162f3508b9f..d7ceb4ed445 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3643,11 +3643,9 @@ static int run(int master, if (r < 0) return r; - if (arg_keep_unit) { - r = create_subcgroup(*pid, arg_unified_cgroup_hierarchy); - if (r < 0) - return r; - } + r = create_subcgroup(*pid, arg_keep_unit, arg_unified_cgroup_hierarchy); + if (r < 0) + return r; r = chown_cgroup(*pid, arg_unified_cgroup_hierarchy, arg_uid_shift); if (r < 0) -- 2.39.5