From: Christian Brauner Date: Thu, 4 Feb 2021 13:00:18 +0000 (+0100) Subject: cgroups: switch to fd-based cgroup mounting X-Git-Tag: lxc-5.0.0~301^2~13 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=02efd04151fab52c9ad0703f76c697a474a5558f;p=thirdparty%2Flxc.git cgroups: switch to fd-based cgroup mounting Signed-off-by: Christian Brauner --- diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index 5bae07aaa..bdfbeaebc 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -1759,50 +1759,72 @@ static int cg_legacy_mount_controllers(int type, struct hierarchy *h, * cgroups for the LXC_AUTO_CGROUP_FULL option. */ static int __cg_mount_direct(int type, struct hierarchy *h, - const char *controllerpath) + struct lxc_rootfs *rootfs, + int dfd_mnt_cgroupfs, const char *hierarchy_mnt) { - __do_free char *controllers = NULL; - char *fstype = "cgroup2"; - unsigned long flags = 0; - int ret; + __do_free char *controllers = NULL; + unsigned long flags = 0; + char *fstype; + int ret; + + if (dfd_mnt_cgroupfs < 0) + return ret_errno(EINVAL); + + flags |= MS_NOSUID; + flags |= MS_NOEXEC; + flags |= MS_NODEV; + flags |= MS_RELATIME; - flags |= MS_NOSUID; - flags |= MS_NOEXEC; - flags |= MS_NODEV; - flags |= MS_RELATIME; + if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO) + flags |= MS_RDONLY; - if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO) - flags |= MS_RDONLY; + if (is_unified_hierarchy(h)) { + fstype = "cgroup2"; + } else { + fstype = "cgroup"; - if (h->version != CGROUP2_SUPER_MAGIC) { - controllers = lxc_string_join(",", (const char **)h->controllers, false); - if (!controllers) - return -ENOMEM; - fstype = "cgroup"; + controllers = lxc_string_join(",", (const char **)h->controllers, false); + if (!controllers) + return ret_errno(ENOMEM); } - ret = mount("cgroup", controllerpath, fstype, flags, controllers); + ret = mount_at(dfd_mnt_cgroupfs, NULL, hierarchy_mnt, + PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, fstype, + flags, controllers); + if (ret < 0 && errno == ENOSYS) { + __do_free char *target = NULL; + const char *rootfs_mnt; + + rootfs_mnt = get_rootfs_mnt(rootfs); + target = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, hierarchy_mnt, NULL); + ret = safe_mount(NULL, target, fstype, flags, controllers, rootfs_mnt); + } if (ret < 0) - return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s", - controllerpath, fstype); + return log_error_errno(ret, errno, "Failed to mount %s filesystem onto %d(%s)", + fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt)); - DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype); + DEBUG("Mounted cgroup filesystem %s onto %d(%s)", + fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt)); return 0; } static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h, - const char *controllerpath) + struct lxc_rootfs *rootfs, + int dfd_mnt_cgroupfs, + const char *hierarchy_mnt) { - return __cg_mount_direct(type, h, controllerpath); + return __cg_mount_direct(type, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt); } static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, - const char *controllerpath) + struct lxc_rootfs *rootfs, + int dfd_mnt_cgroupfs, + const char *hierarchy_mnt) { if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) return 0; - return __cg_mount_direct(type, h, controllerpath); + return __cg_mount_direct(type, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt); } __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, @@ -1812,7 +1834,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, __do_free char *cgroup_root = NULL; bool has_cgns = false, wants_force_mount = false; struct lxc_rootfs *rootfs = &conf->rootfs; - const char *root = rootfs->path ? rootfs->mount : ""; + const char *rootfs_mnt = get_rootfs_mnt(rootfs); int ret; if (!ops) @@ -1858,18 +1880,26 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC) type = LXC_AUTO_CGROUP_FULL_MIXED; - cgroup_root = must_make_path(root, DEFAULT_CGROUP_MOUNTPOINT, NULL); - if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) { + /* This is really the codepath that we want. */ + if (pure_unified_layout(ops)) { + dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd, + DEFAULT_CGROUP_MOUNTPOINT_RELATIVE, + PROTECT_OPATH_DIRECTORY, + PROTECT_LOOKUP_BENEATH_XDEV, 0); + if (dfd_mnt_cgroupfs < 0) + return log_error_errno(-errno, errno, "Failed to open %d(%s)", + rootfs->mntpt_fd, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE); + if (has_cgns && wants_force_mount) { /* * If cgroup namespaces are supported but the container * will not have CAP_SYS_ADMIN after it has started we * need to mount the cgroups manually. */ - return cg_mount_in_cgroup_namespace(type, ops->unified, cgroup_root) == 0; + return cg_mount_in_cgroup_namespace(type, ops->unified, rootfs, dfd_mnt_cgroupfs, "") == 0; } - return cg_mount_cgroup_full(type, ops->unified, cgroup_root) == 0; + return cg_mount_cgroup_full(type, ops->unified, rootfs, dfd_mnt_cgroupfs, "") == 0; } /* @@ -1881,18 +1911,16 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, "tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, "size=10240k,mode=755"); - if (ret < 0) { - if (errno != ENOSYS) - return log_error_errno(false, errno, - "Failed to mount tmpfs on %s", - DEFAULT_CGROUP_MOUNTPOINT_RELATIVE); + if (ret < 0 && errno == ENOSYS) { + cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL); ret = safe_mount(NULL, cgroup_root, "tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, - "size=10240k,mode=755", root); + "size=10240k,mode=755", rootfs_mnt); } if (ret < 0) - return false; + return log_error_errno(false, errno, "Failed to mount tmpfs on %s", + DEFAULT_CGROUP_MOUNTPOINT_RELATIVE); dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE, @@ -1911,41 +1939,41 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, continue; controller++; - controllerpath = must_make_path(cgroup_root, controller, NULL); - if (dir_exists(controllerpath)) - continue; - ret = mkdirat(dfd_mnt_cgroupfs, controller, 0000); if (ret < 0) - return log_error_errno(false, errno, "Error creating cgroup path: %s", controllerpath); + return log_error_errno(false, errno, "Failed to create cgroup mountpoint %d(%s)", dfd_mnt_cgroupfs, controller); if (has_cgns && wants_force_mount) { - /* If cgroup namespaces are supported but the container + /* + * If cgroup namespaces are supported but the container * will not have CAP_SYS_ADMIN after it has started we * need to mount the cgroups manually. */ - ret = cg_mount_in_cgroup_namespace(type, h, controllerpath); + ret = cg_mount_in_cgroup_namespace(type, h, rootfs, dfd_mnt_cgroupfs, controller); if (ret < 0) return false; continue; } - ret = cg_mount_cgroup_full(type, h, controllerpath); + /* Here is where the ancient kernel section begins. */ + ret = cg_mount_cgroup_full(type, h, rootfs, dfd_mnt_cgroupfs, controller); if (ret < 0) return false; if (!cg_mount_needs_subdirs(type)) continue; - path2 = must_make_path(controllerpath, h->container_base_path, - ops->container_cgroup, NULL); + controllerpath = must_make_path(cgroup_root, controller, NULL); + if (dir_exists(controllerpath)) + continue; + + path2 = must_make_path(controllerpath, h->container_base_path, ops->container_cgroup, NULL); ret = mkdir_p(path2, 0755); if (ret < 0) return false; - ret = cg_legacy_mount_controllers(type, h, controllerpath, - path2, ops->container_cgroup); + ret = cg_legacy_mount_controllers(type, h, controllerpath, path2, ops->container_cgroup); if (ret < 0) return false; } diff --git a/src/lxc/conf.h b/src/lxc/conf.h index ab0a2bacc..3b45f0e61 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -23,6 +23,7 @@ #include "memory_utils.h" #include "ringbuf.h" #include "start.h" +#include "string_utils.h" #include "terminal.h" #if HAVE_SYS_RESOURCE_H @@ -547,4 +548,11 @@ static inline int chown_mapped_root(const char *path, const struct lxc_conf *con __hidden int lxc_setup_devpts_parent(struct lxc_handler *handler); +static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs) +{ + static const char *s = "/"; + + return !is_empty_string(rootfs->path) ? rootfs->mount : s; +} + #endif /* __LXC_CONF_H */ diff --git a/src/lxc/string_utils.h b/src/lxc/string_utils.h index d358b4954..a489cac44 100644 --- a/src/lxc/string_utils.h +++ b/src/lxc/string_utils.h @@ -117,6 +117,8 @@ static inline bool is_empty_string(const char *s) return !s || strcmp(s, "") == 0; } +#define maybe_empty(s) ((!is_empty_string(s)) ? (s) : ("(null)")) + static inline ssize_t safe_strlcat(char *src, const char *append, size_t len) { size_t new_len; diff --git a/src/lxc/utils.c b/src/lxc/utils.c index 0938ffee1..e664e6276 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -1231,9 +1231,6 @@ int mount_at(int dfd, if (!is_empty_string(src_buf) && *src_buf == '/') return log_error_errno(-EINVAL, EINVAL, "Absolute path specified"); - if (is_empty_string(dst_under_dfd)) - return log_error_errno(-EINVAL, EINVAL, "No target path specified"); - if (!is_empty_string(src_under_dfd)) { source_fd = openat2(dfd, src_under_dfd, &how, sizeof(how)); if (source_fd < 0) @@ -1244,11 +1241,17 @@ int mount_at(int dfd, return -EIO; } - target_fd = openat2(dfd, dst_under_dfd, &how, sizeof(how)); - if (target_fd < 0) - return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd, dst_under_dfd); + if (!is_empty_string(dst_under_dfd)) { + target_fd = openat2(dfd, dst_under_dfd, &how, sizeof(how)); + if (target_fd < 0) + return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd, dst_under_dfd); - ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", target_fd); + TRACE("Mounting %d(%s) through /proc/self/fd/%d", target_fd, dst_under_dfd, target_fd); + ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", target_fd); + } else { + TRACE("Mounting %d through /proc/self/fd/%d", dfd, dfd); + ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", dfd); + } if (ret < 0 || ret >= sizeof(dst_buf)) return -EIO;