cgroups: switch to fd-based cgroup mounting

author Christian Brauner <christian.brauner@ubuntu.com>

Thu, 4 Feb 2021 13:00:18 +0000 (14:00 +0100)

committer Christian Brauner <christian.brauner@ubuntu.com>

Thu, 4 Feb 2021 14:59:52 +0000 (15:59 +0100)
author Christian Brauner <christian.brauner@ubuntu.com>
Thu, 4 Feb 2021 13:00:18 +0000 (14:00 +0100)
committer Christian Brauner <christian.brauner@ubuntu.com>
Thu, 4 Feb 2021 14:59:52 +0000 (15:59 +0100)
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c

index 5bae07aaa11e5b65022803e2501711e409591e17..bdfbeaebc71da45e90a1ef6e2d0cf883ff3e62e1 100644 (file)
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -1759,50 +1759,72 @@ static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
   * cgroups for the LXC_AUTO_CGROUP_FULL option.
   */
  static int __cg_mount_direct(int type, struct hierarchy *h,
-                            const char *controllerpath)
+                            struct lxc_rootfs *rootfs,
+                            int dfd_mnt_cgroupfs, const char *hierarchy_mnt)
  {
-        __do_free char *controllers = NULL;
-        char *fstype = "cgroup2";
-        unsigned long flags = 0;
-        int ret;
+       __do_free char *controllers = NULL;
+       unsigned long flags = 0;
+       char *fstype;
+       int ret;
+
+       if (dfd_mnt_cgroupfs < 0)
+               return ret_errno(EINVAL);
+
+       flags |= MS_NOSUID;
+       flags |= MS_NOEXEC;
+       flags |= MS_NODEV;
+       flags |= MS_RELATIME;
  
-        flags |= MS_NOSUID;
-        flags |= MS_NOEXEC;
-        flags |= MS_NODEV;
-        flags |= MS_RELATIME;
+       if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
+               flags |= MS_RDONLY;
  
-        if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
-                flags |= MS_RDONLY;
+       if (is_unified_hierarchy(h)) {
+               fstype = "cgroup2";
+       } else {
+               fstype = "cgroup";
  
-        if (h->version != CGROUP2_SUPER_MAGIC) {
-                controllers = lxc_string_join(",", (const char **)h->controllers, false);
-                if (!controllers)
-                        return -ENOMEM;
-                fstype = "cgroup";
+               controllers = lxc_string_join(",", (const char **)h->controllers, false);
+               if (!controllers)
+                       return ret_errno(ENOMEM);
         }
  
-       ret = mount("cgroup", controllerpath, fstype, flags, controllers);
+       ret = mount_at(dfd_mnt_cgroupfs, NULL, hierarchy_mnt,
+                      PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, fstype,
+                      flags, controllers);
+       if (ret < 0 && errno == ENOSYS) {
+               __do_free char *target = NULL;
+               const char *rootfs_mnt;
+
+               rootfs_mnt = get_rootfs_mnt(rootfs);
+               target = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, hierarchy_mnt, NULL);
+               ret = safe_mount(NULL, target, fstype, flags, controllers, rootfs_mnt);
+       }
         if (ret < 0)
-               return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s",
-                                      controllerpath, fstype);
+               return log_error_errno(ret, errno, "Failed to mount %s filesystem onto %d(%s)",
+                                      fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
  
-       DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
+       DEBUG("Mounted cgroup filesystem %s onto %d(%s)",
+             fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
         return 0;
  }
  
  static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
-                                              const char *controllerpath)
+                                              struct lxc_rootfs *rootfs,
+                                              int dfd_mnt_cgroupfs,
+                                              const char *hierarchy_mnt)
  {
-       return __cg_mount_direct(type, h, controllerpath);
+       return __cg_mount_direct(type, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt);
  }
  
  static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
-                                      const char *controllerpath)
+                                      struct lxc_rootfs *rootfs,
+                                      int dfd_mnt_cgroupfs,
+                                      const char *hierarchy_mnt)
  {
         if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
                 return 0;
  
-       return __cg_mount_direct(type, h, controllerpath);
+       return __cg_mount_direct(type, h, rootfs, dfd_mnt_cgroupfs, hierarchy_mnt);
  }
  
  __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
@@ -1812,7 +1834,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
         __do_free char *cgroup_root = NULL;
         bool has_cgns = false, wants_force_mount = false;
         struct lxc_rootfs *rootfs = &conf->rootfs;
-       const char *root = rootfs->path ? rootfs->mount : "";
+       const char *rootfs_mnt = get_rootfs_mnt(rootfs);
         int ret;
  
         if (!ops)
@@ -1858,18 +1880,26 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
         else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
                 type = LXC_AUTO_CGROUP_FULL_MIXED;
  
-       cgroup_root = must_make_path(root, DEFAULT_CGROUP_MOUNTPOINT, NULL);
-       if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+       /* This is really the codepath that we want. */
+       if (pure_unified_layout(ops)) {
+               dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd,
+                                          DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
+                                          PROTECT_OPATH_DIRECTORY,
+                                          PROTECT_LOOKUP_BENEATH_XDEV, 0);
+               if (dfd_mnt_cgroupfs < 0)
+                       return log_error_errno(-errno, errno, "Failed to open %d(%s)",
+                                              rootfs->mntpt_fd, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+
                 if (has_cgns && wants_force_mount) {
                         /*
                          * If cgroup namespaces are supported but the container
                          * will not have CAP_SYS_ADMIN after it has started we
                          * need to mount the cgroups manually.
                          */
-                       return cg_mount_in_cgroup_namespace(type, ops->unified, cgroup_root) == 0;
+                       return cg_mount_in_cgroup_namespace(type, ops->unified, rootfs, dfd_mnt_cgroupfs, "") == 0;
                 }
  
-               return cg_mount_cgroup_full(type, ops->unified, cgroup_root) == 0;
+               return cg_mount_cgroup_full(type, ops->unified, rootfs, dfd_mnt_cgroupfs, "") == 0;
         }
  
         /*
@@ -1881,18 +1911,16 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                        PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV,
                        "tmpfs", MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
                        "size=10240k,mode=755");
-       if (ret < 0) {
-               if (errno != ENOSYS)
-                       return log_error_errno(false, errno,
-                                              "Failed to mount tmpfs on %s",
-                                              DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+       if (ret < 0 && errno == ENOSYS) {
+               cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL);
  
                 ret = safe_mount(NULL, cgroup_root, "tmpfs",
                                  MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
-                                "size=10240k,mode=755", root);
+                                "size=10240k,mode=755", rootfs_mnt);
         }
         if (ret < 0)
-               return false;
+               return log_error_errno(false, errno, "Failed to mount tmpfs on %s",
+                                      DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
  
         dfd_mnt_cgroupfs = open_at(rootfs->mntpt_fd,
                                    DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
@@ -1911,41 +1939,41 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                         continue;
                 controller++;
  
-               controllerpath = must_make_path(cgroup_root, controller, NULL);
-               if (dir_exists(controllerpath))
-                       continue;
-
                 ret = mkdirat(dfd_mnt_cgroupfs, controller, 0000);
                 if (ret < 0)
-                       return log_error_errno(false, errno, "Error creating cgroup path: %s", controllerpath);
+                       return log_error_errno(false, errno, "Failed to create cgroup mountpoint %d(%s)", dfd_mnt_cgroupfs, controller);
  
                 if (has_cgns && wants_force_mount) {
-                       /* If cgroup namespaces are supported but the container
+                       /*
+                        * If cgroup namespaces are supported but the container
                          * will not have CAP_SYS_ADMIN after it has started we
                          * need to mount the cgroups manually.
                          */
-                       ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
+                       ret = cg_mount_in_cgroup_namespace(type, h, rootfs, dfd_mnt_cgroupfs, controller);
                         if (ret < 0)
                                 return false;
  
                         continue;
                 }
  
-               ret = cg_mount_cgroup_full(type, h, controllerpath);
+               /* Here is where the ancient kernel section begins. */
+               ret = cg_mount_cgroup_full(type, h, rootfs, dfd_mnt_cgroupfs, controller);
                 if (ret < 0)
                         return false;
  
                 if (!cg_mount_needs_subdirs(type))
                         continue;
  
-               path2 = must_make_path(controllerpath, h->container_base_path,
-                                      ops->container_cgroup, NULL);
+               controllerpath = must_make_path(cgroup_root, controller, NULL);
+               if (dir_exists(controllerpath))
+                       continue;
+
+               path2 = must_make_path(controllerpath, h->container_base_path, ops->container_cgroup, NULL);
                 ret = mkdir_p(path2, 0755);
                 if (ret < 0)
                         return false;
  
-               ret = cg_legacy_mount_controllers(type, h, controllerpath,
-                                                 path2, ops->container_cgroup);
+               ret = cg_legacy_mount_controllers(type, h, controllerpath, path2, ops->container_cgroup);
                 if (ret < 0)
                         return false;
         }
diff --git a/src/lxc/conf.h b/src/lxc/conf.h

index ab0a2bacca8323beb095d0f14d9ff9bd2b71b035..3b45f0e61ee2642e570f9edb0d2eb091b1cdac1f 100644 (file)
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -23,6 +23,7 @@
  #include "memory_utils.h"
  #include "ringbuf.h"
  #include "start.h"
+#include "string_utils.h"
  #include "terminal.h"
  
  #if HAVE_SYS_RESOURCE_H
@@ -547,4 +548,11 @@ static inline int chown_mapped_root(const char *path, const struct lxc_conf *con
  
  __hidden int lxc_setup_devpts_parent(struct lxc_handler *handler);
  
+static inline const char *get_rootfs_mnt(const struct lxc_rootfs *rootfs)
+{
+       static const char *s = "/";
+
+       return !is_empty_string(rootfs->path) ? rootfs->mount : s;
+}
+
  #endif /* __LXC_CONF_H */
diff --git a/src/lxc/string_utils.h b/src/lxc/string_utils.h

index d358b49540dd8b553aef42e88978883b52cdc84e..a489cac445988104e5ffad180383307a9c0eb172 100644 (file)
--- a/src/lxc/string_utils.h
+++ b/src/lxc/string_utils.h
@@ -117,6 +117,8 @@ static inline bool is_empty_string(const char *s)
         return !s || strcmp(s, "") == 0;
  }
  
+#define maybe_empty(s) ((!is_empty_string(s)) ? (s) : ("(null)"))
+
  static inline ssize_t safe_strlcat(char *src, const char *append, size_t len)
  {
         size_t new_len;
diff --git a/src/lxc/utils.c b/src/lxc/utils.c

index 0938ffee160acea8ae77b6b8839583aa175ec472..e664e62769480a63a2a9f0440bff41cdd5f0ed9f 100644 (file)
--- a/src/lxc/utils.c
+++ b/src/lxc/utils.c
@@ -1231,9 +1231,6 @@ int mount_at(int dfd,
         if (!is_empty_string(src_buf) && *src_buf == '/')
                 return log_error_errno(-EINVAL, EINVAL, "Absolute path specified");
  
-       if (is_empty_string(dst_under_dfd))
-               return log_error_errno(-EINVAL, EINVAL, "No target path specified");
-
         if (!is_empty_string(src_under_dfd)) {
                 source_fd = openat2(dfd, src_under_dfd, &how, sizeof(how));
                 if (source_fd < 0)
@@ -1244,11 +1241,17 @@ int mount_at(int dfd,
                         return -EIO;
         }
  
-       target_fd = openat2(dfd, dst_under_dfd, &how, sizeof(how));
-       if (target_fd < 0)
-               return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd, dst_under_dfd);
+       if (!is_empty_string(dst_under_dfd)) {
+               target_fd = openat2(dfd, dst_under_dfd, &how, sizeof(how));
+               if (target_fd < 0)
+                       return log_error_errno(-errno, errno, "Failed to open %d(%s)", dfd, dst_under_dfd);
  
-       ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", target_fd);
+               TRACE("Mounting %d(%s) through /proc/self/fd/%d", target_fd, dst_under_dfd, target_fd);
+               ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", target_fd);
+       } else {
+               TRACE("Mounting %d through /proc/self/fd/%d", dfd, dfd);
+               ret = snprintf(dst_buf, sizeof(dst_buf), "/proc/self/fd/%d", dfd);
+       }
         if (ret < 0 || ret >= sizeof(dst_buf))
                 return -EIO;
author	Christian Brauner <christian.brauner@ubuntu.com>
	Thu, 4 Feb 2021 13:00:18 +0000 (14:00 +0100)
committer	Christian Brauner <christian.brauner@ubuntu.com>
	Thu, 4 Feb 2021 14:59:52 +0000 (15:59 +0100)
src/lxc/cgroups/cgfsng.c		patch \| blob \| blame \| history
src/lxc/conf.h		patch \| blob \| blame \| history
src/lxc/string_utils.h		patch \| blob \| blame \| history
src/lxc/utils.c		patch \| blob \| blame \| history