From c55fe36d28129391f231ae9aeef7cd3b4db31b0e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 16 Feb 2021 23:05:23 +0100 Subject: [PATCH] cgroups: fd-only cgroup tree pruning Signed-off-by: Christian Brauner --- src/lxc/cgroups/cgfsng.c | 32 ++++++++++------- src/lxc/cgroups/cgroup.c | 7 +++- src/lxc/cgroups/cgroup.h | 1 + src/lxc/cgroups/cgroup_utils.c | 64 ++++++++++++++++++++++++++++++++++ src/lxc/cgroups/cgroup_utils.h | 2 ++ src/lxc/file_utils.h | 1 + 6 files changed, 94 insertions(+), 13 deletions(-) diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index f1c35760a..80137a2a7 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -777,9 +777,9 @@ static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist, TRACE("named subsystem %d: %s", k, *it); } -static int cgroup_tree_remove(struct hierarchy **hierarchies, const char *container_cgroup) +static int cgroup_tree_remove(struct hierarchy **hierarchies, const char *path_prune) { - if (!container_cgroup || !hierarchies) + if (!path_prune || !hierarchies) return 0; for (int i = 0; hierarchies[i]; i++) { @@ -789,9 +789,11 @@ static int cgroup_tree_remove(struct hierarchy **hierarchies, const char *contai if (!h->container_limit_path) continue; - ret = lxc_rm_rf(h->container_limit_path); + ret = cgroup_tree_prune(h->dfd_base, path_prune); if (ret < 0) - WARN("Failed to destroy \"%s\"", h->container_limit_path); + SYSWARN("Failed to destroy %d(%s)", h->dfd_base, path_prune); + else + TRACE("Removed cgroup tree %d(%s)", h->dfd_base, path_prune); if (h->container_limit_path != h->container_full_path) free_disarm(h->container_limit_path); @@ -803,7 +805,7 @@ static int cgroup_tree_remove(struct hierarchy **hierarchies, const char *contai struct generic_userns_exec_data { struct hierarchy **hierarchies; - const char *container_cgroup; + const char *path_prune; struct lxc_conf *conf; uid_t origuid; /* target uid in parent namespace */ char *path; @@ -829,7 +831,7 @@ static int cgroup_tree_remove_wrapper(void *data) return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)", (int)nsuid, (int)nsuid, (int)nsuid); - return cgroup_tree_remove(arg->hierarchies, arg->container_cgroup); + return cgroup_tree_remove(arg->hierarchies, arg->path_prune); } __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, @@ -864,14 +866,14 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, if (!lxc_list_empty(&handler->conf->id_map)) { struct generic_userns_exec_data wrap = { .conf = handler->conf, - .container_cgroup = ops->container_cgroup, + .path_prune = ops->container_limit_cgroup, .hierarchies = ops->hierarchies, .origuid = 0, }; ret = userns_exec_1(handler->conf, cgroup_tree_remove_wrapper, &wrap, "cgroup_tree_remove_wrapper"); } else { - ret = cgroup_tree_remove(ops->hierarchies, ops->container_cgroup); + ret = cgroup_tree_remove(ops->hierarchies, ops->container_limit_cgroup); } if (ret < 0) SYSWARN("Failed to destroy cgroups"); @@ -1221,7 +1223,7 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, /* Monitor might have died before we entered the cgroup. */ if (handler->monitor_pid <= 0) { WARN("No valid monitor process found while destroying cgroups"); - goto try_lxc_rm_rf; + goto cgroup_prune_tree; } if (conf->cgroup_meta.monitor_pivot_dir) @@ -1247,10 +1249,12 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, continue; } -try_lxc_rm_rf: - ret = lxc_rm_rf(h->monitor_full_path); +cgroup_prune_tree: + ret = cgroup_tree_prune(h->dfd_base, ops->monitor_cgroup); if (ret < 0) - WARN("Failed to destroy \"%s\"", h->monitor_full_path); + SYSWARN("Failed to destroy %d(%s)", h->dfd_base, ops->monitor_cgroup); + else + TRACE("Removed cgroup tree %d(%s)", h->dfd_base, ops->monitor_cgroup); } } @@ -1468,6 +1472,10 @@ __cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lx return log_error_errno(false, ERANGE, "Failed to create container cgroup"); ops->container_cgroup = move_ptr(container_cgroup); + if (limiting_cgroup) + ops->container_limit_cgroup = move_ptr(limiting_cgroup); + else + ops->container_limit_cgroup = ops->container_cgroup; INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup); return true; } diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c index 96049b359..4fb92698e 100644 --- a/src/lxc/cgroups/cgroup.c +++ b/src/lxc/cgroups/cgroup.c @@ -66,9 +66,14 @@ void cgroup_exit(struct cgroup_ops *ops) free(*cur); free(ops->cgroup_pattern); - free(ops->container_cgroup); free(ops->monitor_cgroup); + { + if (ops->container_cgroup != ops->container_limit_cgroup) + free(ops->container_limit_cgroup); + free(ops->container_cgroup); + } + if (ops->cgroup2_devices) bpf_program_free(ops->cgroup2_devices); diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h index 2ec5f0a7c..7a620a190 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h @@ -134,6 +134,7 @@ struct cgroup_ops { char **cgroup_use; char *cgroup_pattern; char *container_cgroup; + char *container_limit_cgroup; char *monitor_cgroup; /* @hierarchies diff --git a/src/lxc/cgroups/cgroup_utils.c b/src/lxc/cgroups/cgroup_utils.c index fb936393d..ac748fe6d 100644 --- a/src/lxc/cgroups/cgroup_utils.c +++ b/src/lxc/cgroups/cgroup_utils.c @@ -13,10 +13,13 @@ #include "cgroup_utils.h" #include "config.h" #include "file_utils.h" +#include "log.h" #include "macro.h" #include "memory_utils.h" #include "utils.h" +lxc_log_define(cgroup_utils, lxc); + int get_cgroup_version(char *line) { if (is_cgroupfs_v1(line)) @@ -95,3 +98,64 @@ int unified_cgroup_fd(int fd) return false; } + +int cgroup_tree_prune(int dfd, const char *path) +{ + __do_close int dfd_disown = -EBADF, dfd_dup = -EBADF; + __do_closedir DIR *dir = NULL; + int ret; + struct dirent *direntp; + + /* + * The unlinkat() syscall doesn't work with empty paths, i.e. it isn't + * possible to remove the fd itself. + */ + if (is_empty_string(path) || strequal(path, ".")) + return ret_errno(EINVAL); + + /* + * Note that O_PATH file descriptors can't be used with getdents() and + * therefore with readdir(). + */ + dfd_disown = open_at(dfd, path, PROTECT_OPEN, + PROTECT_LOOKUP_BENEATH_WITH_SYMLINKS, 0); + if (dfd_disown < 0) + return -errno; + + dfd_dup = dup_cloexec(dfd_disown); + if (dfd_dup < 0) + return -errno; + + dir = fdopendir(dfd_disown); + if (!dir) + return -errno; + + /* Transfer ownership to fdopendir(). */ + move_fd(dfd_disown); + + while ((direntp = readdir(dir))) { + struct stat st; + + if (strequal(direntp->d_name, ".") || + strequal(direntp->d_name, "..")) + continue; + + ret = fstatat(dfd_dup, direntp->d_name, &st, + AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW); + if (ret < 0) + continue; + + if (!S_ISDIR(st.st_mode)) + continue; + + ret = cgroup_tree_prune(dfd_dup, direntp->d_name); + if (ret < 0) + return -errno; + } + + ret = unlinkat(dfd, path, AT_REMOVEDIR); + if (ret < 0) + return -errno; + + return 0; +} diff --git a/src/lxc/cgroups/cgroup_utils.h b/src/lxc/cgroups/cgroup_utils.h index f85ac35d4..142b4db79 100644 --- a/src/lxc/cgroups/cgroup_utils.h +++ b/src/lxc/cgroups/cgroup_utils.h @@ -41,4 +41,6 @@ static inline bool cgns_supported(void) return supported == 1; } +__hidden extern int cgroup_tree_prune(int dfd, const char *path); + #endif /* __LXC_CGROUP_UTILS_H */ diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h index af8014e7b..cd9f447ff 100644 --- a/src/lxc/file_utils.h +++ b/src/lxc/file_utils.h @@ -13,6 +13,7 @@ #include #include "compiler.h" +#include "memory_utils.h" #include "syscall_wrappers.h" /* read and write whole files */ -- 2.47.2