]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
cgroups: stash host's cgroupfs file descriptor
authorChristian Brauner <christian.brauner@ubuntu.com>
Tue, 16 Feb 2021 10:22:56 +0000 (11:22 +0100)
committerChristian Brauner <christian.brauner@ubuntu.com>
Tue, 16 Feb 2021 10:22:56 +0000 (11:22 +0100)
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
src/lxc/cgroups/cgfsng.c
src/lxc/cgroups/cgroup.c
src/lxc/cgroups/cgroup.h
src/lxc/cgroups/cgroup_utils.c
src/lxc/cgroups/cgroup_utils.h

index b65d392fc66a3282dfb436142b8138a2c6cfe5c2..5aa6eb81d15b628aef5c774ca5f4f5c0a3ab6597 100644 (file)
@@ -3474,18 +3474,9 @@ static char *cg_unified_get_current_cgroup(bool relative)
 static int cg_unified_init(struct cgroup_ops *ops, bool relative,
                           bool unprivileged)
 {
-       __do_close int cgroup_root_fd = -EBADF;
        __do_free char *base_cgroup = NULL, *controllers_path = NULL;
        __do_free_string_list char **delegatable = NULL;
        __do_free struct hierarchy *new = NULL;
-       int ret;
-
-       ret = unified_cgroup_hierarchy();
-       if (ret == -ENOMEDIUM)
-               return ret_errno(ENOMEDIUM);
-
-       if (ret != CGROUP2_SUPER_MAGIC)
-               return 0;
 
        base_cgroup = cg_unified_get_current_cgroup(relative);
        if (!base_cgroup)
@@ -3493,18 +3484,13 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
        if (!relative)
                prune_init_scope(base_cgroup);
 
-       cgroup_root_fd = openat(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
-                               O_NOCTTY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);
-       if (cgroup_root_fd < 0)
-               return -errno;
-
        /*
         * We assume that the cgroup we're currently in has been delegated to
         * us and we are free to further delege all of the controllers listed
         * in cgroup.controllers further down the hierarchy.
         */
        controllers_path = must_make_path_relative(base_cgroup, "cgroup.controllers", NULL);
-       delegatable = cg_unified_get_controllers(cgroup_root_fd, controllers_path);
+       delegatable = cg_unified_get_controllers(ops->dfd_mnt_cgroupfs_host, controllers_path);
        if (!delegatable)
                delegatable = cg_unified_make_empty_controller();
        if (!delegatable[0])
@@ -3539,9 +3525,23 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
 
 static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf)
 {
+       __do_close int dfd = -EBADF;
+       bool relative = conf->cgroup_meta.relative;
        int ret;
        const char *tmp;
-       bool relative = conf->cgroup_meta.relative;
+
+       if (ops->dfd_mnt_cgroupfs_host >= 0)
+               return ret_errno(EINVAL);
+
+       /*
+        * I don't see the need for allowing symlinks here. If users want to
+        * have their hierarchy available in different locations I strongly
+        * suggest bind-mounts.
+        */
+       dfd = open_at(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
+                       PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
+       if (dfd < 0)
+               return syserrno(-errno, "Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
 
        tmp = lxc_global_config_value("lxc.cgroup.use");
        if (tmp) {
@@ -3555,14 +3555,23 @@ static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf)
                        must_append_string(&ops->cgroup_use, cur);
        }
 
-       ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
-       if (ret < 0)
-               return -1;
+       /*
+        * Keep dfd referenced by the cleanup function and actually move the fd
+        * once we know the initialization succeeded. So if we fail we clean up
+        * the dfd.
+        */
+       ops->dfd_mnt_cgroupfs_host = dfd;
 
-       if (ret == CGROUP2_SUPER_MAGIC)
-               return 0;
+       if (unified_cgroup_fd(dfd))
+               ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
+       else
+               ret = cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
+       if (ret < 0)
+               return syserrno(ret, "Failed to initialize cgroups");
 
-       return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
+       /* Transfer ownership to cgroup_ops. */
+       move_fd(dfd);
+       return 0;
 }
 
 __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
@@ -3589,6 +3598,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
                return ret_set_errno(NULL, ENOMEM);
 
        cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+       cgfsng_ops->dfd_mnt_cgroupfs_host = -EBADF;
 
        if (__cgroup_init(cgfsng_ops, conf))
                return NULL;
index acfda8af3cb88bead183064db6a1280b13b9a702..874cdf5789d6e04c42ce7f4e0d54e2cf1f36e190 100644 (file)
@@ -68,6 +68,9 @@ void cgroup_exit(struct cgroup_ops *ops)
        if (ops->cgroup2_devices)
                bpf_program_free(ops->cgroup2_devices);
 
+       if (ops->dfd_mnt_cgroupfs_host >= 0)
+               close(ops->dfd_mnt_cgroupfs_host);
+
        for (struct hierarchy **it = ops->hierarchies; it && *it; it++) {
                for (char **p = (*it)->controllers; p && *p; p++)
                        free(*p);
index 67c52e509173fa4aacf974e4eb989b8314988779..c7abd475c78b59faf2936cd659529d8a6244bce5 100644 (file)
@@ -106,6 +106,18 @@ struct cgroup_ops {
        /* string constant */
        const char *version;
 
+       /*
+        * File descriptor for the host's cgroupfs mount.  On
+        * CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID hybrid systems
+        * @dfd_mnt_cgroupfs_host will be a tmpfs fd and the individual
+        * controllers will be cgroupfs fds. On CGROUP_LAYOUT_UNIFIED it will
+        * be a cgroupfs fd itself.
+        *
+        * So for CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID we allow
+        * mountpoint crossing iff we cross from a tmpfs into a cgroupfs mount.
+        * */
+       int dfd_mnt_cgroupfs_host;
+
        /* What controllers is the container supposed to use. */
        char **cgroup_use;
        char *cgroup_pattern;
index 7961d0c90292791ceb45b7352fc5dfa31a55b516..fb936393d42095e33ee101cf8cb3db7d3f3b38dd 100644 (file)
@@ -83,22 +83,6 @@ bool test_writeable_v2(char *mountpoint, char *path)
        return (access(cgroup_threads_file, W_OK) == 0);
 }
 
-int unified_cgroup_hierarchy(void)
-{
-
-       int ret;
-       struct statfs fs;
-
-       ret = statfs(DEFAULT_CGROUP_MOUNTPOINT, &fs);
-       if (ret < 0)
-               return -ENOMEDIUM;
-
-       if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
-               return CGROUP2_SUPER_MAGIC;
-
-       return 0;
-}
-
 int unified_cgroup_fd(int fd)
 {
 
index 02f4b474904ad402fc6db0cb586e4613321cc4dc..f85ac35d46fa260f098e1604dab6223363298a8a 100644 (file)
@@ -29,8 +29,6 @@ __hidden extern bool test_writeable_v1(char *mountpoint, char *path);
  */
 __hidden extern bool test_writeable_v2(char *mountpoint, char *path);
 
-__hidden extern int unified_cgroup_hierarchy(void);
-
 __hidden extern int unified_cgroup_fd(int fd);
 
 static inline bool cgns_supported(void)