From: Christian Brauner Date: Wed, 17 Feb 2021 09:03:42 +0000 (+0100) Subject: cgroups: rework cgroup tree creation X-Git-Tag: lxc-5.0.0~281^2~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a6aeb9f1b927ce314c589e5b0db2cb4eb15aef6f;p=thirdparty%2Flxc.git cgroups: rework cgroup tree creation Signed-off-by: Christian Brauner --- diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index 396d81675..e0126775c 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -859,6 +859,11 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, return; } + if (!ops->container_limit_cgroup) { + WARN("Uninitialized limit cgroup"); + return; + } + #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX ret = bpf_program_cgroup_detach(handler->cgroup_ops->cgroup2_devices); if (ret < 0) @@ -1097,30 +1102,27 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode, } static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, - struct hierarchy *h, const char *cgroup_leaf, - const char *cgroup_limit_dir, bool payload) + struct hierarchy *h, const char *cgroup_limit_dir, + const char *cgroup_leaf, bool payload) { __do_close int fd_limit = -EBADF, fd_final = -EBADF; __do_free char *path = NULL, *limit_path = NULL; bool cpuset_v1 = false; - /* Don't bother with all the rest if the final cgroup already exists. */ - if (exists_dir_at(h->dfd_base, cgroup_leaf)) - return syswarn(false, "The %d(%s) cgroup already existed", h->dfd_base, cgroup_leaf); - /* * The legacy cpuset controller needs massaging in case inheriting * settings from its immediate ancestor cgroup hasn't been turned on. */ cpuset_v1 = !is_unified_hierarchy(h) && string_in_list(h->controllers, "cpuset"); - if (payload && cgroup_limit_dir) { + if (payload && cgroup_leaf) { /* With isolation both parts need to not already exist. */ fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false); if (fd_limit < 0) return syserrno(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir); - limit_path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL); + TRACE("Created limit cgroup %d->%d(%s)", + fd_limit, h->dfd_base, cgroup_limit_dir); /* * With isolation the devices legacy cgroup needs to be @@ -1131,13 +1133,27 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, if (string_in_list(h->controllers, "devices") && !ops->setup_limits_legacy(ops, conf, true)) return log_error(false, "Failed to setup legacy device limits"); - } - fd_final = __cgroup_tree_create(h->dfd_base, cgroup_leaf, 0755, cpuset_v1, false); + limit_path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL); + path = must_make_path(limit_path, cgroup_leaf, NULL); + + /* + * If we use a separate limit cgroup, the leaf cgroup, i.e. the + * cgroup the container actually resides in, is below fd_limit. + */ + fd_final = __cgroup_tree_create(fd_limit, cgroup_leaf, 0755, cpuset_v1, false); + TRACE("Created container cgroup %d->%d(%s)", + fd_final, fd_limit, cgroup_leaf); + } else { + fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false); + TRACE("Created %s cgroup %d->%d(%s)", payload ? "payload" : "monitor", + fd_final, h->dfd_base, cgroup_leaf); + + path = must_make_path(h->mountpoint, h->container_base_path, cgroup_limit_dir, NULL); + } if (fd_final < 0) return syserrno(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir); - path = must_make_path(h->mountpoint, h->container_base_path, cgroup_leaf, NULL); if (payload) { h->cgfd_con = move_fd(fd_final); h->container_full_path = move_ptr(path); @@ -1147,10 +1163,10 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, else h->cgfd_limit = move_fd(fd_limit); - if (!limit_path) - h->container_limit_path = h->container_full_path; - else + if (limit_path) h->container_limit_path = move_ptr(limit_path); + else + h->container_limit_path = h->container_full_path; } else { h->cgfd_mon = move_fd(fd_final); h->monitor_full_path = move_ptr(path); @@ -1379,7 +1395,8 @@ __cgfsng_ops static bool cgfsng_monitor_create(struct cgroup_ops *ops, struct lx */ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lxc_handler *handler) { - __do_free char *container_cgroup = NULL, *limiting_cgroup = NULL; + __do_free char *container_cgroup = NULL, *__limit_cgroup = NULL; + char *limit_cgroup; int idx = 0; int i; size_t len; @@ -1404,23 +1421,26 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx return false; if (conf->cgroup_meta.container_dir) { - limiting_cgroup = strdup(conf->cgroup_meta.container_dir); - if (!limiting_cgroup) + __limit_cgroup = strdup(conf->cgroup_meta.container_dir); + if (!__limit_cgroup) return ret_set_errno(false, ENOMEM); if (conf->cgroup_meta.namespace_dir) { - container_cgroup = must_make_path(limiting_cgroup, + container_cgroup = must_make_path(__limit_cgroup, conf->cgroup_meta.namespace_dir, NULL); + limit_cgroup = __limit_cgroup; } else { /* explicit paths but without isolation */ - container_cgroup = move_ptr(limiting_cgroup); + limit_cgroup = move_ptr(__limit_cgroup); + container_cgroup = limit_cgroup; } } else if (conf->cgroup_meta.dir) { - container_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/", - DEFAULT_PAYLOAD_CGROUP_PREFIX, - handler->name, - CGROUP_CREATE_RETRY, NULL); + limit_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/", + DEFAULT_PAYLOAD_CGROUP_PREFIX, + handler->name, + CGROUP_CREATE_RETRY, NULL); + container_cgroup = limit_cgroup; } else if (ops->cgroup_pattern) { __do_free char *cgroup_tree = NULL; @@ -1428,15 +1448,17 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx if (!cgroup_tree) return ret_set_errno(false, ENOMEM); - container_cgroup = must_concat(&len, cgroup_tree, "/", - DEFAULT_PAYLOAD_CGROUP, - CGROUP_CREATE_RETRY, NULL); + limit_cgroup = must_concat(&len, cgroup_tree, "/", + DEFAULT_PAYLOAD_CGROUP, + CGROUP_CREATE_RETRY, NULL); + container_cgroup = limit_cgroup; } else { - container_cgroup = must_concat(&len, DEFAULT_PAYLOAD_CGROUP_PREFIX, - handler->name, - CGROUP_CREATE_RETRY, NULL); + limit_cgroup = must_concat(&len, DEFAULT_PAYLOAD_CGROUP_PREFIX, + handler->name, + CGROUP_CREATE_RETRY, NULL); + container_cgroup = limit_cgroup; } - if (!container_cgroup) + if (!limit_cgroup) return ret_set_errno(false, ENOMEM); if (!conf->cgroup_meta.container_dir) { @@ -1449,17 +1471,15 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx for (i = 0; ops->hierarchies[i]; i++) { if (cgroup_tree_create(ops, handler->conf, - ops->hierarchies[i], - container_cgroup, - limiting_cgroup, + ops->hierarchies[i], limit_cgroup, + conf->cgroup_meta.namespace_dir, true)) continue; DEBUG("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)"); for (int j = 0; j <= i; j++) cgroup_tree_prune_leaf(ops->hierarchies[j], - limiting_cgroup ?: container_cgroup, - true); + limit_cgroup, true); idx++; break; @@ -1470,11 +1490,12 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx return log_error_errno(false, ERANGE, "Failed to create container cgroup"); ops->container_cgroup = move_ptr(container_cgroup); - if (limiting_cgroup) - ops->container_limit_cgroup = move_ptr(limiting_cgroup); + if (__limit_cgroup) + ops->container_limit_cgroup = move_ptr(__limit_cgroup); else ops->container_limit_cgroup = ops->container_cgroup; - INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup); + INFO("The container process uses \"%s\" as inner and \"%s\" as limit cgroup", + ops->container_cgroup, ops->container_limit_cgroup); return true; }