cgroups: stash host's cgroupfs file descriptor

author Christian Brauner <christian.brauner@ubuntu.com>

Tue, 16 Feb 2021 10:22:56 +0000 (11:22 +0100)

committer Christian Brauner <christian.brauner@ubuntu.com>

Tue, 16 Feb 2021 10:22:56 +0000 (11:22 +0100)
author Christian Brauner <christian.brauner@ubuntu.com>
Tue, 16 Feb 2021 10:22:56 +0000 (11:22 +0100)
committer Christian Brauner <christian.brauner@ubuntu.com>
Tue, 16 Feb 2021 10:22:56 +0000 (11:22 +0100)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c

index b65d392fc66a3282dfb436142b8138a2c6cfe5c2..5aa6eb81d15b628aef5c774ca5f4f5c0a3ab6597 100644 (file)
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -3474,18 +3474,9 @@ static char *cg_unified_get_current_cgroup(bool relative)
  static int cg_unified_init(struct cgroup_ops *ops, bool relative,
                            bool unprivileged)
  {
-       __do_close int cgroup_root_fd = -EBADF;
         __do_free char *base_cgroup = NULL, *controllers_path = NULL;
         __do_free_string_list char **delegatable = NULL;
         __do_free struct hierarchy *new = NULL;
-       int ret;
-
-       ret = unified_cgroup_hierarchy();
-       if (ret == -ENOMEDIUM)
-               return ret_errno(ENOMEDIUM);
-
-       if (ret != CGROUP2_SUPER_MAGIC)
-               return 0;
  
         base_cgroup = cg_unified_get_current_cgroup(relative);
         if (!base_cgroup)
@@ -3493,18 +3484,13 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
         if (!relative)
                 prune_init_scope(base_cgroup);
  
-       cgroup_root_fd = openat(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
-                               O_NOCTTY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);
-       if (cgroup_root_fd < 0)
-               return -errno;
-
         /*
          * We assume that the cgroup we're currently in has been delegated to
          * us and we are free to further delege all of the controllers listed
          * in cgroup.controllers further down the hierarchy.
          */
         controllers_path = must_make_path_relative(base_cgroup, "cgroup.controllers", NULL);
-       delegatable = cg_unified_get_controllers(cgroup_root_fd, controllers_path);
+       delegatable = cg_unified_get_controllers(ops->dfd_mnt_cgroupfs_host, controllers_path);
         if (!delegatable)
                 delegatable = cg_unified_make_empty_controller();
         if (!delegatable[0])
@@ -3539,9 +3525,23 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
  
  static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf)
  {
+       __do_close int dfd = -EBADF;
+       bool relative = conf->cgroup_meta.relative;
         int ret;
         const char *tmp;
-       bool relative = conf->cgroup_meta.relative;
+
+       if (ops->dfd_mnt_cgroupfs_host >= 0)
+               return ret_errno(EINVAL);
+
+       /*
+        * I don't see the need for allowing symlinks here. If users want to
+        * have their hierarchy available in different locations I strongly
+        * suggest bind-mounts.
+        */
+       dfd = open_at(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
+                       PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
+       if (dfd < 0)
+               return syserrno(-errno, "Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
  
         tmp = lxc_global_config_value("lxc.cgroup.use");
         if (tmp) {
@@ -3555,14 +3555,23 @@ static int __cgroup_init(struct cgroup_ops *ops, struct lxc_conf *conf)
                         must_append_string(&ops->cgroup_use, cur);
         }
  
-       ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
-       if (ret < 0)
-               return -1;
+       /*
+        * Keep dfd referenced by the cleanup function and actually move the fd
+        * once we know the initialization succeeded. So if we fail we clean up
+        * the dfd.
+        */
+       ops->dfd_mnt_cgroupfs_host = dfd;
  
-       if (ret == CGROUP2_SUPER_MAGIC)
-               return 0;
+       if (unified_cgroup_fd(dfd))
+               ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
+       else
+               ret = cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
+       if (ret < 0)
+               return syserrno(ret, "Failed to initialize cgroups");
  
-       return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
+       /* Transfer ownership to cgroup_ops. */
+       move_fd(dfd);
+       return 0;
  }
  
  __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
@@ -3589,6 +3598,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
                 return ret_set_errno(NULL, ENOMEM);
  
         cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+       cgfsng_ops->dfd_mnt_cgroupfs_host = -EBADF;
  
         if (__cgroup_init(cgfsng_ops, conf))
                 return NULL;
diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c

index acfda8af3cb88bead183064db6a1280b13b9a702..874cdf5789d6e04c42ce7f4e0d54e2cf1f36e190 100644 (file)
--- a/src/lxc/cgroups/cgroup.c
+++ b/src/lxc/cgroups/cgroup.c
@@ -68,6 +68,9 @@ void cgroup_exit(struct cgroup_ops *ops)
         if (ops->cgroup2_devices)
                 bpf_program_free(ops->cgroup2_devices);
  
+       if (ops->dfd_mnt_cgroupfs_host >= 0)
+               close(ops->dfd_mnt_cgroupfs_host);
+
         for (struct hierarchy **it = ops->hierarchies; it && *it; it++) {
                 for (char **p = (*it)->controllers; p && *p; p++)
                         free(*p);
diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h

index 67c52e509173fa4aacf974e4eb989b8314988779..c7abd475c78b59faf2936cd659529d8a6244bce5 100644 (file)
--- a/src/lxc/cgroups/cgroup.h
+++ b/src/lxc/cgroups/cgroup.h
@@ -106,6 +106,18 @@ struct cgroup_ops {
         /* string constant */
         const char *version;
  
+       /*
+        * File descriptor for the host's cgroupfs mount.  On
+        * CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID hybrid systems
+        * @dfd_mnt_cgroupfs_host will be a tmpfs fd and the individual
+        * controllers will be cgroupfs fds. On CGROUP_LAYOUT_UNIFIED it will
+        * be a cgroupfs fd itself.
+        *
+        * So for CGROUP_LAYOUT_LEGACY or CGROUP_LAYOUT_HYBRID we allow
+        * mountpoint crossing iff we cross from a tmpfs into a cgroupfs mount.
+        * */
+       int dfd_mnt_cgroupfs_host;
+
         /* What controllers is the container supposed to use. */
         char **cgroup_use;
         char *cgroup_pattern;
diff --git a/src/lxc/cgroups/cgroup_utils.c b/src/lxc/cgroups/cgroup_utils.c

index 7961d0c90292791ceb45b7352fc5dfa31a55b516..fb936393d42095e33ee101cf8cb3db7d3f3b38dd 100644 (file)
--- a/src/lxc/cgroups/cgroup_utils.c
+++ b/src/lxc/cgroups/cgroup_utils.c
@@ -83,22 +83,6 @@ bool test_writeable_v2(char *mountpoint, char *path)
         return (access(cgroup_threads_file, W_OK) == 0);
  }
  
-int unified_cgroup_hierarchy(void)
-{
-
-       int ret;
-       struct statfs fs;
-
-       ret = statfs(DEFAULT_CGROUP_MOUNTPOINT, &fs);
-       if (ret < 0)
-               return -ENOMEDIUM;
-
-       if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
-               return CGROUP2_SUPER_MAGIC;
-
-       return 0;
-}
-
  int unified_cgroup_fd(int fd)
  {
  
diff --git a/src/lxc/cgroups/cgroup_utils.h b/src/lxc/cgroups/cgroup_utils.h

index 02f4b474904ad402fc6db0cb586e4613321cc4dc..f85ac35d46fa260f098e1604dab6223363298a8a 100644 (file)
--- a/src/lxc/cgroups/cgroup_utils.h
+++ b/src/lxc/cgroups/cgroup_utils.h
@@ -29,8 +29,6 @@ __hidden extern bool test_writeable_v1(char *mountpoint, char *path);
   */
  __hidden extern bool test_writeable_v2(char *mountpoint, char *path);
  
-__hidden extern int unified_cgroup_hierarchy(void);
-
  __hidden extern int unified_cgroup_fd(int fd);
  
  static inline bool cgns_supported(void)
author	Christian Brauner <christian.brauner@ubuntu.com>
	Tue, 16 Feb 2021 10:22:56 +0000 (11:22 +0100)
committer	Christian Brauner <christian.brauner@ubuntu.com>
	Tue, 16 Feb 2021 10:22:56 +0000 (11:22 +0100)
src/lxc/cgroups/cgfsng.c		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup.c		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup.h		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup_utils.c		patch \| blob \| blame \| history
src/lxc/cgroups/cgroup_utils.h		patch \| blob \| blame \| history