]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
cgroups: refactor cgroup handling
authorChristian Brauner <christian.brauner@ubuntu.com>
Fri, 18 May 2018 18:16:22 +0000 (20:16 +0200)
committerChristian Brauner <christian.brauner@ubuntu.com>
Sat, 19 May 2018 20:33:34 +0000 (22:33 +0200)
This replaces the constructor implementation of cgroup handling with a simpler,
thread-safe on-demand model of cgroup driver initialization.
Making the cgroup initialization code run in a constructor means that each time
the shared library gets mapped the cgroup parsing code gets run. That's
unnecessary overhead.
It also feels to me that this is only accidently thread-safe because
constructors are only run once. But should threads actually end up manipulating
or freeing memory that is file-global to cgfsng.c we'd be screwed. Now, I might
be wrong here but the cleaner implementation is to allocate a cgroup driver on
demand whenever we need it.
Take the chance and rework the cgroup_ops interface to make the functions it
wants to have implemented a lot cleaner.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
13 files changed:
src/lxc/attach.c
src/lxc/cgroups/cgfsng.c
src/lxc/cgroups/cgroup.c
src/lxc/cgroups/cgroup.h
src/lxc/commands.c
src/lxc/conf.c
src/lxc/criu.c
src/lxc/freezer.c
src/lxc/lxc.h
src/lxc/lxccontainer.c
src/lxc/start.c
src/lxc/start.h
src/tests/cgpath.c

index e1699b137d48f55ff48618bb3808b7b88745d43a..b62dcecfac481cd7ae59add162197c0355957e53 100644 (file)
@@ -1272,10 +1272,17 @@ int lxc_attach(const char *name, const char *lxcpath,
 
                /* Attach to cgroup, if requested. */
                if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
-                       if (!cgroup_attach(name, lxcpath, pid))
+                       struct cgroup_ops *cgroup_ops;
+
+                       cgroup_ops = cgroup_init(NULL);
+                       if (!cgroup_ops)
+                               goto on_error;
+
+                       if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
                                goto on_error;
-                       TRACE("Moved intermediate process %d into container's "
-                             "cgroups", pid);
+
+                       cgroup_exit(cgroup_ops);
+                       TRACE("Moved intermediate process %d into container's cgroups", pid);
                }
 
                /* Setup /proc limits */
index 2f9c15c56ea02147756a1164096984de09d48ba4..544772c61897b9f1a59f8b0b3c29ae6e041ab547 100644 (file)
 
 lxc_log_define(lxc_cgfsng, lxc);
 
-static struct cgroup_ops cgfsng_ops;
-
-/* A descriptor for a mounted hierarchy
- *
- * @controllers
- * - legacy hierarchy
- *   Either NULL, or a null-terminated list of all the co-mounted controllers.
- * - unified hierarchy
- *   Either NULL, or a null-terminated list of all enabled controllers.
- *
- * @mountpoint
- * - The mountpoint we will use.
- * - legacy hierarchy
- *   It will be either /sys/fs/cgroup/controller or
- *   /sys/fs/cgroup/controllerlist.
- * - unified hierarchy
- *   It will either be /sys/fs/cgroup or /sys/fs/cgroup/<mountpoint-name>
- *   depending on whether this is a hybrid cgroup layout (mix of legacy and
- *   unified hierarchies) or a pure unified cgroup layout.
- *
- * @base_cgroup
- * - The cgroup under which the container cgroup path
- *   is created. This will be either the caller's cgroup (if not root), or
- *   init's cgroup (if root).
- *
- * @fullcgpath
- * - The full path to the containers cgroup.
- *
- * @version
- * - legacy hierarchy
- *   If the hierarchy is a legacy hierarchy this will be set to
- *   CGROUP_SUPER_MAGIC.
- * - unified hierarchy
- *   If the hierarchy is a legacy hierarchy this will be set to
- *   CGROUP2_SUPER_MAGIC.
- */
-struct hierarchy {
-       char **controllers;
-       char *mountpoint;
-       char *base_cgroup;
-       char *fullcgpath;
-       int version;
-};
-
-/* The cgroup data which is attached to the lxc_handler.
- *
- * @cgroup_pattern
- * - A copy of lxc.cgroup.pattern.
- *
- * @container_cgroup
- * - If not null, the cgroup which was created for the container. For each
- *   hierarchy, it is created under the @hierarchy->base_cgroup directory.
- *   Relative to the base_cgroup it is the same for all hierarchies.
- *
- * @name
- * - The name of the container.
- *
- * @cgroup_meta
- * - A copy of the container's cgroup information. This overrides
- *   @cgroup_pattern.
- *
- * @cgroup_layout
- * - What cgroup layout the container is running with.
- *   - CGROUP_LAYOUT_UNKNOWN
- *     The cgroup layout could not be determined. This should be treated as an
- *     error condition.
- *   - CGROUP_LAYOUT_LEGACY
- *     The container is running with all controllers mounted into legacy cgroup
- *     hierarchies.
- *   - CGROUP_LAYOUT_HYBRID
- *     The container is running with at least one controller mounted into a
- *     legacy cgroup hierarchy and a mountpoint for the unified hierarchy.  The
- *     unified hierarchy can be empty (no controllers enabled) or non-empty
- *     (controllers enabled).
- *   - CGROUP_LAYOUT_UNIFIED
- *     The container is running on a pure unified cgroup hierarchy. The unified
- *     hierarchy can be empty (no controllers enabled) or non-empty (controllers
- *     enabled).
- */
-struct cgfsng_handler_data {
-       char *cgroup_pattern;
-       char *container_cgroup; /* cgroup we created for the container */
-       char *name; /* container name */
-       /* per-container cgroup information */
-       struct lxc_cgroup cgroup_meta;
-       cgroup_layout_t cgroup_layout;
-};
-
-/* @hierarchies
- * - A NULL-terminated array of struct hierarchy, one per legacy hierarchy. No
- *   duplicates. First sufficient, writeable mounted hierarchy wins.
- */
-struct hierarchy **hierarchies;
-/* Pointer to the unified hierarchy in the null terminated list @hierarchies.
- * This is merely a convenience for hybrid cgroup layouts to easily retrieve the
- * unified hierarchy without iterating throught @hierarchies.
- */
-struct hierarchy *unified;
-/*
- * @cgroup_layout
- * - What cgroup layout the container is running with.
- *   - CGROUP_LAYOUT_UNKNOWN
- *     The cgroup layout could not be determined. This should be treated as an
- *     error condition.
- *   - CGROUP_LAYOUT_LEGACY
- *     The container is running with all controllers mounted into legacy cgroup
- *     hierarchies.
- *   - CGROUP_LAYOUT_HYBRID
- *     The container is running with at least one controller mounted into a
- *     legacy cgroup hierarchy and a mountpoint for the unified hierarchy.  The
- *     unified hierarchy can be empty (no controllers enabled) or non-empty
- *     (controllers enabled).
- *   - CGROUP_LAYOUT_UNIFIED
- *     The container is running on a pure unified cgroup hierarchy. The unified
- *     hierarchy can be empty (no controllers enabled) or non-empty (controllers
- *     enabled).
- */
-cgroup_layout_t cgroup_layout;
-/* What controllers is the container supposed to use. */
-char *cgroup_use;
-
-/* @lxc_cgfsng_debug
- * - Whether to print debug info to stdout for the cgfsng driver.
- */
-static bool lxc_cgfsng_debug;
-
-#define CGFSNG_DEBUG(format, ...)                                              \
-       do {                                                                   \
-               if (lxc_cgfsng_debug)                                          \
-                       printf("cgfsng: " format, ##__VA_ARGS__);              \
-       } while (0)
-
 static void free_string_list(char **clist)
 {
        int i;
@@ -298,40 +166,28 @@ static void must_append_controller(char **klist, char **nlist, char ***clist,
        (*clist)[newentry] = copy;
 }
 
-static void free_handler_data(struct cgfsng_handler_data *d)
-{
-       free(d->cgroup_pattern);
-       free(d->container_cgroup);
-       free(d->name);
-       if (d->cgroup_meta.dir)
-               free(d->cgroup_meta.dir);
-       if (d->cgroup_meta.controllers)
-               free(d->cgroup_meta.controllers);
-       free(d);
-}
-
 /* Given a handler's cgroup data, return the struct hierarchy for the controller
  * @c, or NULL if there is none.
  */
-struct hierarchy *get_hierarchy(const char *c)
+struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *c)
 {
        int i;
 
-       if (!hierarchies)
+       if (!ops->hierarchies)
                return NULL;
 
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; ops->hierarchies[i]; i++) {
                if (!c) {
                        /* This is the empty unified hierarchy. */
-                       if (hierarchies[i]->controllers &&
-                           !hierarchies[i]->controllers[0])
-                               return hierarchies[i];
+                       if (ops->hierarchies[i]->controllers &&
+                           !ops->hierarchies[i]->controllers[0])
+                               return ops->hierarchies[i];
 
                        continue;
                }
 
-               if (string_in_list(hierarchies[i]->controllers, c))
-                       return hierarchies[i];
+               if (string_in_list(ops->hierarchies[i]->controllers, c))
+                       return ops->hierarchies[i];
        }
 
        return NULL;
@@ -829,23 +685,23 @@ static bool controller_found(struct hierarchy **hlist, char *entry)
 /* Return true if all of the controllers which we require have been found.  The
  * required list is  freezer and anything in lxc.cgroup.use.
  */
-static bool all_controllers_found(void)
+static bool all_controllers_found(struct cgroup_ops *ops)
 {
        char *p;
        char *saveptr = NULL;
-       struct hierarchy **hlist = hierarchies;
+       struct hierarchy **hlist = ops->hierarchies;
 
        if (!controller_found(hlist, "freezer")) {
-               CGFSNG_DEBUG("No freezer controller mountpoint found\n");
+               ERROR("No freezer controller mountpoint found");
                return false;
        }
 
-       if (!cgroup_use)
+       if (!ops->cgroup_use)
                return true;
 
-       for (; (p = strtok_r(cgroup_use, ",", &saveptr)); cgroup_use = NULL)
+       for (; (p = strtok_r(ops->cgroup_use, ",", &saveptr)); ops->cgroup_use = NULL)
                if (!controller_found(hlist, p)) {
-                       CGFSNG_DEBUG("No %s controller mountpoint found\n", p);
+                       ERROR("No %s controller mountpoint found", p);
                        return false;
                }
 
@@ -879,14 +735,14 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
         * verify /sys/fs/cgroup/ in this field.
         */
        if (strncmp(p, "/sys/fs/cgroup/", 15) != 0) {
-               CGFSNG_DEBUG("Found hierarchy not under /sys/fs/cgroup: \"%s\"\n", p);
+               ERROR("Found hierarchy not under /sys/fs/cgroup: \"%s\"", p);
                return NULL;
        }
 
        p += 15;
        p2 = strchr(p, ' ');
        if (!p2) {
-               CGFSNG_DEBUG("Corrupt mountinfo\n");
+               ERROR("Corrupt mountinfo");
                return NULL;
        }
        *p2 = '\0';
@@ -944,7 +800,7 @@ static char **cg_unified_get_controllers(const char *file)
        return aret;
 }
 
-static struct hierarchy *add_hierarchy(char **clist, char *mountpoint,
+static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
                                       char *base_cgroup, int type)
 {
        struct hierarchy *new;
@@ -957,8 +813,8 @@ static struct hierarchy *add_hierarchy(char **clist, char *mountpoint,
        new->fullcgpath = NULL;
        new->version = type;
 
-       newentry = append_null_to_list((void ***)&hierarchies);
-       hierarchies[newentry] = new;
+       newentry = append_null_to_list((void ***)h);
+       (*h)[newentry] = new;
        return new;
 }
 
@@ -1137,39 +993,26 @@ static void trim(char *s)
                s[--len] = '\0';
 }
 
-static void lxc_cgfsng_print_handler_data(const struct cgfsng_handler_data *d)
-{
-       printf("Cgroup information:\n");
-       printf("  container name: %s\n", d->name ? d->name : "(null)");
-       printf("  lxc.cgroup.use: %s\n", cgroup_use ? cgroup_use : "(null)");
-       printf("  lxc.cgroup.pattern: %s\n",
-              d->cgroup_pattern ? d->cgroup_pattern : "(null)");
-       printf("  lxc.cgroup.dir: %s\n",
-              d->cgroup_meta.dir ? d->cgroup_meta.dir : "(null)");
-       printf("  cgroup: %s\n",
-              d->container_cgroup ? d->container_cgroup : "(null)");
-}
-
-static void lxc_cgfsng_print_hierarchies()
+static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
 {
        int i;
        struct hierarchy **it;
 
-       if (!hierarchies) {
-               printf("  No hierarchies found\n");
+       if (!ops->hierarchies) {
+               TRACE("  No hierarchies found");
                return;
        }
 
-       printf("  Hierarchies:\n");
-       for (i = 0, it = hierarchies; it && *it; it++, i++) {
+       TRACE("  Hierarchies:");
+       for (i = 0, it = ops->hierarchies; it && *it; it++, i++) {
                int j;
                char **cit;
 
-               printf("  %d: base_cgroup: %s\n", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)");
-               printf("      mountpoint:  %s\n", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
-               printf("      controllers:\n");
+               TRACE("  %d: base_cgroup: %s", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)");
+               TRACE("      mountpoint:  %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
+               TRACE("      controllers:");
                for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
-                       printf("      %d: %s\n", j, *cit);
+                       TRACE("      %d: %s", j, *cit);
        }
 }
 
@@ -1179,491 +1022,155 @@ static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
        int k;
        char **it;
 
-       printf("basecginfo is:\n");
-       printf("%s\n", basecginfo);
+       TRACE("basecginfo is:");
+       TRACE("%s", basecginfo);
 
        for (k = 0, it = klist; it && *it; it++, k++)
-               printf("kernel subsystem %d: %s\n", k, *it);
+               TRACE("kernel subsystem %d: %s", k, *it);
 
        for (k = 0, it = nlist; it && *it; it++, k++)
-               printf("named subsystem %d: %s\n", k, *it);
+               TRACE("named subsystem %d: %s", k, *it);
 }
 
-static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d)
-{
-       lxc_cgfsng_print_handler_data(d);
-       lxc_cgfsng_print_hierarchies();
-}
-
-/* At startup, parse_hierarchies finds all the info we need about cgroup
- * mountpoints and current cgroups, and stores it in @d.
- */
-static bool cg_hybrid_init(void)
+static int recursive_destroy(char *dirname)
 {
        int ret;
-       char *basecginfo;
-       bool will_escape;
-       FILE *f;
-       size_t len = 0;
-       char *line = NULL;
-       char **klist = NULL, **nlist = NULL;
-
-       /* Root spawned containers escape the current cgroup, so use init's
-        * cgroups as our base in that case.
-        */
-       will_escape = (geteuid() == 0);
-       if (will_escape)
-               basecginfo = read_file("/proc/1/cgroup");
-       else
-               basecginfo = read_file("/proc/self/cgroup");
-       if (!basecginfo)
-               return false;
-
-       ret = get_existing_subsystems(&klist, &nlist);
-       if (ret < 0) {
-               CGFSNG_DEBUG("Failed to retrieve available legacy cgroup controllers\n");
-               free(basecginfo);
-               return false;
-       }
-
-       f = fopen("/proc/self/mountinfo", "r");
-       if (!f) {
-               CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n");
-               free(basecginfo);
-               return false;
-       }
-
-       if (lxc_cgfsng_debug)
-               lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
-
-       while (getline(&line, &len, f) != -1) {
-               int type;
-               bool writeable;
-               struct hierarchy *new;
-               char *base_cgroup = NULL, *mountpoint = NULL;
-               char **controller_list = NULL;
-
-               type = get_cgroup_version(line);
-               if (type == 0)
-                       continue;
+       struct dirent *direntp;
+       DIR *dir;
+       int r = 0;
 
-               if (type == CGROUP2_SUPER_MAGIC && unified)
-                       continue;
+       dir = opendir(dirname);
+       if (!dir)
+               return -1;
 
-               if (cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
-                       if (type == CGROUP2_SUPER_MAGIC)
-                               cgroup_layout = CGROUP_LAYOUT_UNIFIED;
-                       else if (type == CGROUP_SUPER_MAGIC)
-                               cgroup_layout = CGROUP_LAYOUT_LEGACY;
-               } else if (cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
-                       if (type == CGROUP_SUPER_MAGIC)
-                               cgroup_layout = CGROUP_LAYOUT_HYBRID;
-               } else if (cgroup_layout == CGROUP_LAYOUT_LEGACY) {
-                       if (type == CGROUP2_SUPER_MAGIC)
-                               cgroup_layout = CGROUP_LAYOUT_HYBRID;
-               }
+       while ((direntp = readdir(dir))) {
+               char *pathname;
+               struct stat mystat;
 
-               controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
-               if (!controller_list && type == CGROUP_SUPER_MAGIC)
+               if (!strcmp(direntp->d_name, ".") ||
+                   !strcmp(direntp->d_name, ".."))
                        continue;
 
-               if (type == CGROUP_SUPER_MAGIC)
-                       if (controller_list_is_dup(hierarchies, controller_list))
-                               goto next;
-
-               mountpoint = cg_hybrid_get_mountpoint(line);
-               if (!mountpoint) {
-                       CGFSNG_DEBUG("Failed parsing mountpoint from \"%s\"\n", line);
-                       goto next;
-               }
+               pathname = must_make_path(dirname, direntp->d_name, NULL);
 
-               if (type == CGROUP_SUPER_MAGIC)
-                       base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
-               else
-                       base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
-               if (!base_cgroup) {
-                       CGFSNG_DEBUG("Failed to find current cgroup\n");
+               ret = lstat(pathname, &mystat);
+               if (ret < 0) {
+                       if (!r)
+                               WARN("Failed to stat \"%s\"", pathname);
+                       r = -1;
                        goto next;
                }
 
-               trim(base_cgroup);
-               prune_init_scope(base_cgroup);
-               if (type == CGROUP2_SUPER_MAGIC)
-                       writeable = test_writeable_v2(mountpoint, base_cgroup);
-               else
-                       writeable = test_writeable_v1(mountpoint, base_cgroup);
-               if (!writeable)
+               if (!S_ISDIR(mystat.st_mode))
                        goto next;
 
-               if (type == CGROUP2_SUPER_MAGIC) {
-                       char *cgv2_ctrl_path;
-
-                       cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
-                                                       "cgroup.controllers",
-                                                       NULL);
-
-                       controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
-                       free(cgv2_ctrl_path);
-                       if (!controller_list) {
-                               controller_list = cg_unified_make_empty_controller();
-                               CGFSNG_DEBUG("No controllers are enabled for "
-                                            "delegation in the unified hierarchy\n");
-                       }
-               }
-
-               new = add_hierarchy(controller_list, mountpoint, base_cgroup, type);
-               if (type == CGROUP2_SUPER_MAGIC && !unified)
-                       unified = new;
-
-               continue;
-
+               ret = recursive_destroy(pathname);
+               if (ret < 0)
+                       r = -1;
        next:
-               free_string_list(controller_list);
-               free(mountpoint);
-               free(base_cgroup);
+               free(pathname);
        }
 
-       free_string_list(klist);
-       free_string_list(nlist);
-
-       free(basecginfo);
-
-       fclose(f);
-       free(line);
-
-       if (lxc_cgfsng_debug) {
-               printf("Writable cgroup hierarchies:\n");
-               lxc_cgfsng_print_hierarchies();
+       ret = rmdir(dirname);
+       if (ret < 0) {
+               if (!r)
+                       WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname);
+               r = -1;
        }
 
-       /* verify that all controllers in cgroup.use and all crucial
-        * controllers are accounted for
-        */
-       if (!all_controllers_found())
-               return false;
+       ret = closedir(dir);
+       if (ret < 0) {
+               if (!r)
+                       WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname);
+               r = -1;
+       }
 
-       return true;
+       return r;
 }
 
-static int cg_is_pure_unified(void)
+static int cgroup_rmdir(struct hierarchy **hierarchies,
+                       const char *container_cgroup)
 {
+       int i;
 
-       int ret;
-       struct statfs fs;
+       if (!container_cgroup || !hierarchies)
+               return 0;
 
-       ret = statfs("/sys/fs/cgroup", &fs);
-       if (ret < 0)
-               return -ENOMEDIUM;
+       for (i = 0; hierarchies[i]; i++) {
+               int ret;
+               struct hierarchy *h = hierarchies[i];
 
-       if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
-               return CGROUP2_SUPER_MAGIC;
+               if (!h->fullcgpath)
+                       continue;
+
+               ret = recursive_destroy(h->fullcgpath);
+               if (ret < 0)
+                       WARN("Failed to destroy \"%s\"", h->fullcgpath);
+
+               free(h->fullcgpath);
+               h->fullcgpath = NULL;
+       }
 
        return 0;
 }
 
-/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
-static char *cg_unified_get_current_cgroup(void)
-{
-       char *basecginfo, *base_cgroup;
-       bool will_escape;
-       char *copy = NULL;
+struct generic_userns_exec_data {
+       struct hierarchy **hierarchies;
+       const char *container_cgroup;
+       struct lxc_conf *conf;
+       uid_t origuid; /* target uid in parent namespace */
+       char *path;
+};
 
-       will_escape = (geteuid() == 0);
-       if (will_escape)
-               basecginfo = read_file("/proc/1/cgroup");
-       else
-               basecginfo = read_file("/proc/self/cgroup");
-       if (!basecginfo)
-               return NULL;
+static int cgroup_rmdir_wrapper(void *data)
+{
+       int ret;
+       struct generic_userns_exec_data *arg = data;
+       uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+       gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
 
-       base_cgroup = strstr(basecginfo, "0::/");
-       if (!base_cgroup)
-               goto cleanup_on_err;
+       ret = setresgid(nsgid, nsgid, nsgid);
+       if (ret < 0) {
+               SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid,
+                        (int)nsgid, (int)nsgid);
+               return -1;
+       }
 
-       base_cgroup = base_cgroup + 3;
-       copy = copy_to_eol(base_cgroup);
-       if (!copy)
-               goto cleanup_on_err;
+       ret = setresuid(nsuid, nsuid, nsuid);
+       if (ret < 0) {
+               SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid,
+                        (int)nsuid, (int)nsuid);
+               return -1;
+       }
 
-cleanup_on_err:
-       free(basecginfo);
-       if (copy)
-               trim(copy);
+       ret = setgroups(0, NULL);
+       if (ret < 0 && errno != EPERM) {
+               SYSERROR("Failed to setgroups(0, NULL)");
+               return -1;
+       }
 
-       return copy;
+       return cgroup_rmdir(arg->hierarchies, arg->container_cgroup);
 }
 
-static int cg_unified_init(void)
+static void cgfsng_destroy(struct cgroup_ops *ops, struct lxc_handler *handler)
 {
        int ret;
-       char *mountpoint, *subtree_path;
-       char **delegatable;
-       char *base_cgroup = NULL;
-
-       ret = cg_is_pure_unified();
-       if (ret == -ENOMEDIUM)
-               return -ENOMEDIUM;
-
-       if (ret != CGROUP2_SUPER_MAGIC)
-               return 0;
-
-       base_cgroup = cg_unified_get_current_cgroup();
-       if (!base_cgroup)
-               return -EINVAL;
-       prune_init_scope(base_cgroup);
-
-       /* We assume that we have already been given controllers to delegate
-        * further down the hierarchy. If not it is up to the user to delegate
-        * them to us.
-        */
-       mountpoint = must_copy_string("/sys/fs/cgroup");
-       subtree_path = must_make_path(mountpoint, base_cgroup,
-                                     "cgroup.subtree_control", NULL);
-       delegatable = cg_unified_get_controllers(subtree_path);
-       free(subtree_path);
-       if (!delegatable)
-               delegatable = cg_unified_make_empty_controller();
-       if (!delegatable[0])
-               CGFSNG_DEBUG("No controllers are enabled for delegation\n");
-
-       /* TODO: If the user requested specific controllers via lxc.cgroup.use
-        * we should verify here. The reason I'm not doing it right is that I'm
-        * not convinced that lxc.cgroup.use will be the future since it is a
-        * global property. I much rather have an option that lets you request
-        * controllers per container.
-        */
-
-       add_hierarchy(delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
-       unified = hierarchies[0];
-
-       cgroup_layout = CGROUP_LAYOUT_UNIFIED;
-       return CGROUP2_SUPER_MAGIC;
-}
-
-static bool cg_init(void)
-{
-       int ret;
-       const char *tmp;
-
-       errno = 0;
-       tmp = lxc_global_config_value("lxc.cgroup.use");
-       if (!cgroup_use && errno != 0) { /* lxc.cgroup.use can be NULL */
-               CGFSNG_DEBUG("Failed to retrieve list of cgroups to use\n");
-               return false;
-       }
-       cgroup_use = must_copy_string(tmp);
-
-       ret = cg_unified_init();
-       if (ret < 0)
-               return false;
-
-       if (ret == CGROUP2_SUPER_MAGIC)
-               return true;
-
-       return cg_hybrid_init();
-}
-
-static void *cgfsng_init(struct lxc_handler *handler)
-{
-       const char *cgroup_pattern;
-       struct cgfsng_handler_data *d;
-
-       d = must_alloc(sizeof(*d));
-       memset(d, 0, sizeof(*d));
-
-       /* copy container name */
-       d->name = must_copy_string(handler->name);
-
-       /* copy per-container cgroup information */
-       d->cgroup_meta.dir = NULL;
-       d->cgroup_meta.controllers = NULL;
-       if (handler->conf) {
-               d->cgroup_meta.dir = must_copy_string(handler->conf->cgroup_meta.dir);
-               d->cgroup_meta.controllers = must_copy_string(handler->conf->cgroup_meta.controllers);
-       }
-
-       /* copy system-wide cgroup information */
-       cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
-       if (!cgroup_pattern) {
-               /* lxc.cgroup.pattern is only NULL on error. */
-               ERROR("Failed to retrieve cgroup pattern");
-               goto out_free;
-       }
-       d->cgroup_pattern = must_copy_string(cgroup_pattern);
-
-       d->cgroup_layout = cgroup_layout;
-       if (d->cgroup_layout == CGROUP_LAYOUT_LEGACY)
-               TRACE("Running with legacy cgroup layout");
-       else if (d->cgroup_layout == CGROUP_LAYOUT_HYBRID)
-               TRACE("Running with hybrid cgroup layout");
-       else if (d->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
-               TRACE("Running with unified cgroup layout");
-       else
-               WARN("Running with unknown cgroup layout");
-
-       if (lxc_cgfsng_debug)
-               lxc_cgfsng_print_debuginfo(d);
-
-       return d;
-
-out_free:
-       free_handler_data(d);
-       return NULL;
-}
-
-static int recursive_destroy(char *dirname)
-{
-       int ret;
-       struct dirent *direntp;
-       DIR *dir;
-       int r = 0;
-
-       dir = opendir(dirname);
-       if (!dir)
-               return -1;
-
-       while ((direntp = readdir(dir))) {
-               char *pathname;
-               struct stat mystat;
-
-               if (!strcmp(direntp->d_name, ".") ||
-                   !strcmp(direntp->d_name, ".."))
-                       continue;
-
-               pathname = must_make_path(dirname, direntp->d_name, NULL);
-
-               ret = lstat(pathname, &mystat);
-               if (ret < 0) {
-                       if (!r)
-                               WARN("Failed to stat \"%s\"", pathname);
-                       r = -1;
-                       goto next;
-               }
-
-               if (!S_ISDIR(mystat.st_mode))
-                       goto next;
-
-               ret = recursive_destroy(pathname);
-               if (ret < 0)
-                       r = -1;
-       next:
-               free(pathname);
-       }
-
-       ret = rmdir(dirname);
-       if (ret < 0) {
-               if (!r)
-                       WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname);
-               r = -1;
-       }
-
-       ret = closedir(dir);
-       if (ret < 0) {
-               if (!r)
-                       WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname);
-               r = -1;
-       }
-
-       return r;
-}
-
-static int cgroup_rmdir(char *container_cgroup)
-{
-       int i;
-
-       if (!container_cgroup || !hierarchies)
-               return 0;
-
-       for (i = 0; hierarchies[i]; i++) {
-               int ret;
-               struct hierarchy *h = hierarchies[i];
-
-               if (!h->fullcgpath)
-                       continue;
-
-               ret = recursive_destroy(h->fullcgpath);
-               if (ret < 0)
-                       WARN("Failed to destroy \"%s\"", h->fullcgpath);
-
-               free(h->fullcgpath);
-               h->fullcgpath = NULL;
-       }
-
-       return 0;
-}
-
-struct generic_userns_exec_data {
-       struct cgfsng_handler_data *d;
-       struct lxc_conf *conf;
-       uid_t origuid; /* target uid in parent namespace */
-       char *path;
-};
-
-static int cgroup_rmdir_wrapper(void *data)
-{
-       int ret;
-       struct generic_userns_exec_data *arg = data;
-       uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
-       gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
-
-       ret = setresgid(nsgid, nsgid, nsgid);
-       if (ret < 0) {
-               SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid,
-                        (int)nsgid, (int)nsgid);
-               return -1;
-       }
-
-       ret = setresuid(nsuid, nsuid, nsuid);
-       if (ret < 0) {
-               SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid,
-                        (int)nsuid, (int)nsuid);
-               return -1;
-       }
-
-       ret = setgroups(0, NULL);
-       if (ret < 0 && errno != EPERM) {
-               SYSERROR("Failed to setgroups(0, NULL)");
-               return -1;
-       }
-
-       return cgroup_rmdir(arg->d->container_cgroup);
-}
-
-static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
-{
-       int ret;
-       struct cgfsng_handler_data *d = hdata;
-       struct generic_userns_exec_data wrap;
-
-       if (!d)
-               return;
+       struct generic_userns_exec_data wrap;
 
        wrap.origuid = 0;
-       wrap.d = hdata;
-       wrap.conf = conf;
+       wrap.container_cgroup = ops->container_cgroup;
+       wrap.hierarchies = ops->hierarchies;
+       wrap.conf = handler->conf;
 
-       if (conf && !lxc_list_empty(&conf->id_map))
-               ret = userns_exec_1(conf, cgroup_rmdir_wrapper, &wrap,
+       if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
+               ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
                                    "cgroup_rmdir_wrapper");
        else
-               ret = cgroup_rmdir(d->container_cgroup);
+               ret = cgroup_rmdir(ops->hierarchies, ops->container_cgroup);
        if (ret < 0) {
                WARN("Failed to destroy cgroups");
                return;
        }
-
-       free_handler_data(d);
-}
-
-struct cgroup_ops *cgfsng_ops_init(void)
-{
-       if (getenv("LXC_DEBUG_CGFSNG"))
-               lxc_cgfsng_debug = true;
-
-       if (!cg_init())
-               return NULL;
-
-       return &cgfsng_ops;
 }
 
 static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
@@ -1769,26 +1276,28 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
 /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
  * next cgroup_pattern-1, -2, ..., -999.
  */
-static inline bool cgfsng_create(void *hdata)
+static inline bool cgfsng_create(struct cgroup_ops *ops,
+                                struct lxc_handler *handler)
 {
        int i;
        size_t len;
        char *container_cgroup, *offset, *tmp;
        int idx = 0;
-       struct cgfsng_handler_data *d = hdata;
+       struct lxc_conf *conf = handler->conf;
+       const char *join_args[] = {conf->cgroup_meta.dir, handler->name, NULL};
 
-       if (!d)
+       if (ops->container_cgroup) {
+               WARN("cgfsng_create called a second time: %s", ops->container_cgroup);
                return false;
+       }
 
-       if (d->container_cgroup) {
-               WARN("cgfsng_create called a second time");
+       if (!conf)
                return false;
-       }
 
-       if (d->cgroup_meta.dir)
-               tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false);
+       if (conf->cgroup_meta.dir)
+               tmp = lxc_string_join("/", join_args, false);
        else
-               tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
+               tmp = lxc_string_replace("%n", handler->name, ops->cgroup_pattern);
        if (!tmp) {
                ERROR("Failed expanding cgroup name pattern");
                return false;
@@ -1820,20 +1329,20 @@ again:
                }
        }
 
-       for (i = 0; hierarchies[i]; i++) {
-               if (!create_path_for_hierarchy(hierarchies[i], container_cgroup)) {
+       for (i = 0; ops->hierarchies[i]; i++) {
+               if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
                        int j;
-                       ERROR("Failed to create cgroup \"%s\"", hierarchies[i]->fullcgpath);
-                       free(hierarchies[i]->fullcgpath);
-                       hierarchies[i]->fullcgpath = NULL;
+                       ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->fullcgpath);
+                       free(ops->hierarchies[i]->fullcgpath);
+                       ops->hierarchies[i]->fullcgpath = NULL;
                        for (j = 0; j < i; j++)
-                               remove_path_for_hierarchy(hierarchies[j], container_cgroup);
+                               remove_path_for_hierarchy(ops->hierarchies[j], container_cgroup);
                        idx++;
                        goto again;
                }
        }
 
-       d->container_cgroup = container_cgroup;
+       ops->container_cgroup = container_cgroup;
 
        return true;
 
@@ -1843,7 +1352,7 @@ out_free:
        return false;
 }
 
-static bool cgfsng_enter(void *hdata, pid_t pid)
+static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid)
 {
        int i, len;
        char pidstr[25];
@@ -1852,11 +1361,11 @@ static bool cgfsng_enter(void *hdata, pid_t pid)
        if (len < 0 || len >= 25)
                return false;
 
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; ops->hierarchies[i]; i++) {
                int ret;
                char *fullpath;
 
-               fullpath = must_make_path(hierarchies[i]->fullcgpath,
+               fullpath = must_make_path(ops->hierarchies[i]->fullcgpath,
                                          "cgroup.procs", NULL);
                ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
                if (ret != 0) {
@@ -1929,9 +1438,9 @@ static int chown_cgroup_wrapper(void *data)
 
        destuid = get_ns_uid(arg->origuid);
 
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; arg->hierarchies[i]; i++) {
                char *fullpath;
-               char *path = hierarchies[i]->fullcgpath;
+               char *path = arg->hierarchies[i]->fullcgpath;
 
                ret = chowmod(path, destuid, nsgid, 0775);
                if (ret < 0)
@@ -1944,17 +1453,17 @@ static int chown_cgroup_wrapper(void *data)
                 * files (which systemd in wily insists on doing).
                 */
 
-               if (hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
+               if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
                        fullpath = must_make_path(path, "tasks", NULL);
                        (void)chowmod(fullpath, destuid, nsgid, 0664);
                        free(fullpath);
                }
 
                fullpath = must_make_path(path, "cgroup.procs", NULL);
-               (void)chowmod(fullpath, destuid, 0, 0664);
+               (void)chowmod(fullpath, destuid, nsgid, 0664);
                free(fullpath);
 
-               if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
+               if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
                        continue;
 
                fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
@@ -1969,20 +1478,16 @@ static int chown_cgroup_wrapper(void *data)
        return 0;
 }
 
-static bool cgfsng_chown(void *hdata, struct lxc_conf *conf)
+static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf)
 {
-       struct cgfsng_handler_data *d = hdata;
        struct generic_userns_exec_data wrap;
 
-       if (!d)
-               return false;
-
        if (lxc_list_empty(&conf->id_map))
                return true;
 
        wrap.origuid = geteuid();
        wrap.path = NULL;
-       wrap.d = d;
+       wrap.hierarchies = ops->hierarchies;
        wrap.conf = conf;
 
        if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
@@ -2122,13 +1627,12 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
        return __cg_mount_direct(type, h, controllerpath);
 }
 
-static bool cgfsng_mount(void *hdata, const char *root, int type)
+static bool cgfsng_mount(struct cgroup_ops *ops, struct lxc_handler *handler,
+                        const char *root, int type)
 {
        int i, ret;
        char *tmpfspath = NULL;
        bool has_cgns = false, retval = false, wants_force_mount = false;
-       struct lxc_handler *handler = hdata;
-       struct cgfsng_handler_data *d = handler->cgroup_data;
 
        if ((type & LXC_AUTO_CGROUP_MASK) == 0)
                return true;
@@ -2162,9 +1666,9 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
        if (ret < 0)
                goto on_error;
 
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; ops->hierarchies[i]; i++) {
                char *controllerpath, *path2;
-               struct hierarchy *h = hierarchies[i];
+               struct hierarchy *h = ops->hierarchies[i];
                char *controller = strrchr(h->mountpoint, '/');
 
                if (!controller)
@@ -2209,7 +1713,7 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
                }
 
                path2 = must_make_path(controllerpath, h->base_cgroup,
-                                      d->container_cgroup, NULL);
+                                      ops->container_cgroup, NULL);
                ret = mkdir_p(path2, 0755);
                if (ret < 0) {
                        free(controllerpath);
@@ -2218,7 +1722,7 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
                }
 
                ret = cg_legacy_mount_controllers(type, h, controllerpath,
-                                                 path2, d->container_cgroup);
+                                                 path2, ops->container_cgroup);
                free(controllerpath);
                free(path2);
                if (ret < 0)
@@ -2276,35 +1780,34 @@ static int recursive_count_nrtasks(char *dirname)
        return count;
 }
 
-static int cgfsng_nrtasks(void *hdata)
+static int cgfsng_nrtasks(struct cgroup_ops *ops)
 {
        int count;
        char *path;
-       struct cgfsng_handler_data *d = hdata;
 
-       if (!d || !d->container_cgroup || !hierarchies)
+       if (!ops->container_cgroup || !ops->hierarchies)
                return -1;
 
-       path = must_make_path(hierarchies[0]->fullcgpath, NULL);
+       path = must_make_path(ops->hierarchies[0]->fullcgpath, NULL);
        count = recursive_count_nrtasks(path);
        free(path);
        return count;
 }
 
 /* Only root needs to escape to the cgroup of its init. */
-static bool cgfsng_escape()
+static bool cgfsng_escape(const struct cgroup_ops *ops)
 {
        int i;
 
        if (geteuid())
                return true;
 
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; ops->hierarchies[i]; i++) {
                int ret;
                char *fullpath;
 
-               fullpath = must_make_path(hierarchies[i]->mountpoint,
-                                         hierarchies[i]->base_cgroup,
+               fullpath = must_make_path(ops->hierarchies[i]->mountpoint,
+                                         ops->hierarchies[i]->base_cgroup,
                                          "cgroup.procs", NULL);
                ret = lxc_write_to_file(fullpath, "0", 2, false, 0666);
                if (ret != 0) {
@@ -2318,26 +1821,26 @@ static bool cgfsng_escape()
        return true;
 }
 
-static int cgfsng_num_hierarchies(void)
+static int cgfsng_num_hierarchies(struct cgroup_ops *ops)
 {
        int i;
 
-       for (i = 0; hierarchies[i]; i++)
+       for (i = 0; ops->hierarchies[i]; i++)
                ;
 
        return i;
 }
 
-static bool cgfsng_get_hierarchies(int n, char ***out)
+static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out)
 {
        int i;
 
        /* sanity check n */
        for (i = 0; i < n; i++)
-               if (!hierarchies[i])
+               if (!ops->hierarchies[i])
                        return false;
 
-       *out = hierarchies[i]->controllers;
+       *out = ops->hierarchies[i]->controllers;
 
        return true;
 }
@@ -2348,13 +1851,13 @@ static bool cgfsng_get_hierarchies(int n, char ***out)
 /* TODO: If the unified cgroup hierarchy grows a freezer controller this needs
  * to be adapted.
  */
-static bool cgfsng_unfreeze(void *hdata)
+static bool cgfsng_unfreeze(struct cgroup_ops *ops)
 {
        int ret;
        char *fullpath;
        struct hierarchy *h;
 
-       h = get_hierarchy("freezer");
+       h = get_hierarchy(ops, "freezer");
        if (!h)
                return false;
 
@@ -2367,14 +1870,15 @@ static bool cgfsng_unfreeze(void *hdata)
        return true;
 }
 
-static const char *cgfsng_get_cgroup(void *hdata, const char *controller)
+static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
+                                    const char *controller)
 {
        struct hierarchy *h;
 
-       h = get_hierarchy(controller);
+       h = get_hierarchy(ops, controller);
        if (!h) {
-               SYSERROR("Failed to find hierarchy for controller \"%s\"",
-                        controller ? controller : "(null)");
+               WARN("Failed to find hierarchy for controller \"%s\"",
+                    controller ? controller : "(null)");
                return NULL;
        }
 
@@ -2465,7 +1969,8 @@ on_error:
        return fret;
 }
 
-static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
+static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
+                         const char *lxcpath, pid_t pid)
 {
        int i, len, ret;
        char pidstr[25];
@@ -2474,10 +1979,10 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
        if (len < 0 || len >= 25)
                return false;
 
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; ops->hierarchies[i]; i++) {
                char *path;
                char *fullpath = NULL;
-               struct hierarchy *h = hierarchies[i];
+               struct hierarchy *h = ops->hierarchies[i];
 
                if (h->version == CGROUP2_SUPER_MAGIC) {
                        ret = __cg_unified_attach(h, name, lxcpath, pidstr, len,
@@ -2511,8 +2016,8 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
  */
-static int cgfsng_get(const char *filename, char *value, size_t len,
-                     const char *name, const char *lxcpath)
+static int cgfsng_get(struct cgroup_ops *ops, const char *filename, char *value,
+                     size_t len, const char *name, const char *lxcpath)
 {
        int ret = -1;
        size_t controller_len;
@@ -2531,7 +2036,7 @@ static int cgfsng_get(const char *filename, char *value, size_t len,
        if (!path)
                return -1;
 
-       h = get_hierarchy(controller);
+       h = get_hierarchy(ops, controller);
        if (h) {
                char *fullpath;
 
@@ -2548,8 +2053,8 @@ static int cgfsng_get(const char *filename, char *value, size_t len,
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
  */
-static int cgfsng_set(const char *filename, const char *value, const char *name,
-                     const char *lxcpath)
+static int cgfsng_set(struct cgroup_ops *ops, const char *filename,
+                     const char *value, const char *name, const char *lxcpath)
 {
        int ret = -1;
        size_t controller_len;
@@ -2568,7 +2073,7 @@ static int cgfsng_set(const char *filename, const char *value, const char *name,
        if (!path)
                return -1;
 
-       h = get_hierarchy(controller);
+       h = get_hierarchy(ops, controller);
        if (h) {
                char *fullpath;
 
@@ -2662,8 +2167,8 @@ out:
 /* Called from setup_limits - here we have the container's cgroup_data because
  * we created the cgroups.
  */
-static int cg_legacy_set_data(const char *filename, const char *value,
-                             struct cgfsng_handler_data *d)
+static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
+                             const char *value)
 {
        size_t len;
        char *fullpath, *p;
@@ -2687,7 +2192,7 @@ static int cg_legacy_set_data(const char *filename, const char *value,
                value = converted_value;
        }
 
-       h = get_hierarchy(controller);
+       h = get_hierarchy(ops, controller);
        if (!h) {
                ERROR("Failed to setup limits for the \"%s\" controller. "
                      "The controller seems to be unused by \"cgfsng\" cgroup "
@@ -2703,13 +2208,12 @@ static int cg_legacy_set_data(const char *filename, const char *value,
        return ret;
 }
 
-static bool __cg_legacy_setup_limits(void *hdata,
+static bool __cg_legacy_setup_limits(struct cgroup_ops *ops,
                                     struct lxc_list *cgroup_settings,
                                     bool do_devices)
 {
        struct lxc_list *iterator, *next, *sorted_cgroup_settings;
        struct lxc_cgroup *cg;
-       struct cgfsng_handler_data *d = hdata;
        bool ret = false;
 
        if (lxc_list_empty(cgroup_settings))
@@ -2723,7 +2227,7 @@ static bool __cg_legacy_setup_limits(void *hdata,
                cg = iterator->elem;
 
                if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
-                       if (cg_legacy_set_data(cg->subsystem, cg->value, d)) {
+                       if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) {
                                if (do_devices && (errno == EACCES || errno == EPERM)) {
                                        WARN("Failed to set \"%s\" to \"%s\"",
                                             cg->subsystem, cg->value);
@@ -2749,11 +2253,11 @@ out:
        return ret;
 }
 
-static bool __cg_unified_setup_limits(void *hdata,
+static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
                                      struct lxc_list *cgroup_settings)
 {
        struct lxc_list *iterator;
-       struct hierarchy *h = unified;
+       struct hierarchy *h = ops->unified;
 
        if (lxc_list_empty(cgroup_settings))
                return true;
@@ -2781,35 +2285,328 @@ static bool __cg_unified_setup_limits(void *hdata,
        return true;
 }
 
-static bool cgfsng_setup_limits(void *hdata, struct lxc_conf *conf,
+static bool cgfsng_setup_limits(struct cgroup_ops *ops, struct lxc_conf *conf,
                                bool do_devices)
 {
        bool bret;
 
-       bret = __cg_legacy_setup_limits(hdata, &conf->cgroup, do_devices);
+       bret = __cg_legacy_setup_limits(ops, &conf->cgroup, do_devices);
        if (!bret)
                return false;
 
-       return __cg_unified_setup_limits(hdata, &conf->cgroup2);
-}
-
-static struct cgroup_ops cgfsng_ops = {
-       .init = cgfsng_init,
-       .destroy = cgfsng_destroy,
-       .create = cgfsng_create,
-       .enter = cgfsng_enter,
-       .escape = cgfsng_escape,
-       .num_hierarchies = cgfsng_num_hierarchies,
-       .get_hierarchies = cgfsng_get_hierarchies,
-       .get_cgroup = cgfsng_get_cgroup,
-       .get = cgfsng_get,
-       .set = cgfsng_set,
-       .unfreeze = cgfsng_unfreeze,
-       .setup_limits = cgfsng_setup_limits,
-       .driver = "cgfsng",
-       .version = "1.0.0",
-       .attach = cgfsng_attach,
-       .chown = cgfsng_chown,
-       .mount_cgroup = cgfsng_mount,
-       .nrtasks = cgfsng_nrtasks,
-};
+       return __cg_unified_setup_limits(ops, &conf->cgroup2);
+}
+
+/* At startup, parse_hierarchies finds all the info we need about cgroup
+ * mountpoints and current cgroups, and stores it in @d.
+ */
+static bool cg_hybrid_init(struct cgroup_ops *ops)
+{
+       int ret;
+       char *basecginfo;
+       bool will_escape;
+       FILE *f;
+       size_t len = 0;
+       char *line = NULL;
+       char **klist = NULL, **nlist = NULL;
+
+       /* Root spawned containers escape the current cgroup, so use init's
+        * cgroups as our base in that case.
+        */
+       will_escape = (geteuid() == 0);
+       if (will_escape)
+               basecginfo = read_file("/proc/1/cgroup");
+       else
+               basecginfo = read_file("/proc/self/cgroup");
+       if (!basecginfo)
+               return false;
+
+       ret = get_existing_subsystems(&klist, &nlist);
+       if (ret < 0) {
+               ERROR("Failed to retrieve available legacy cgroup controllers");
+               free(basecginfo);
+               return false;
+       }
+
+       f = fopen("/proc/self/mountinfo", "r");
+       if (!f) {
+               ERROR("Failed to open \"/proc/self/mountinfo\"");
+               free(basecginfo);
+               return false;
+       }
+
+       lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
+
+       while (getline(&line, &len, f) != -1) {
+               int type;
+               bool writeable;
+               struct hierarchy *new;
+               char *base_cgroup = NULL, *mountpoint = NULL;
+               char **controller_list = NULL;
+
+               type = get_cgroup_version(line);
+               if (type == 0)
+                       continue;
+
+               if (type == CGROUP2_SUPER_MAGIC && ops->unified)
+                       continue;
+
+               if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
+                       if (type == CGROUP2_SUPER_MAGIC)
+                               ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+                       else if (type == CGROUP_SUPER_MAGIC)
+                               ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
+               } else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+                       if (type == CGROUP_SUPER_MAGIC)
+                               ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+               } else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
+                       if (type == CGROUP2_SUPER_MAGIC)
+                               ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+               }
+
+               controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
+               if (!controller_list && type == CGROUP_SUPER_MAGIC)
+                       continue;
+
+               if (type == CGROUP_SUPER_MAGIC)
+                       if (controller_list_is_dup(ops->hierarchies, controller_list))
+                               goto next;
+
+               mountpoint = cg_hybrid_get_mountpoint(line);
+               if (!mountpoint) {
+                       ERROR("Failed parsing mountpoint from \"%s\"", line);
+                       goto next;
+               }
+
+               if (type == CGROUP_SUPER_MAGIC)
+                       base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+               else
+                       base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
+               if (!base_cgroup) {
+                       ERROR("Failed to find current cgroup");
+                       goto next;
+               }
+
+               trim(base_cgroup);
+               prune_init_scope(base_cgroup);
+               if (type == CGROUP2_SUPER_MAGIC)
+                       writeable = test_writeable_v2(mountpoint, base_cgroup);
+               else
+                       writeable = test_writeable_v1(mountpoint, base_cgroup);
+               if (!writeable)
+                       goto next;
+
+               if (type == CGROUP2_SUPER_MAGIC) {
+                       char *cgv2_ctrl_path;
+
+                       cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
+                                                       "cgroup.controllers",
+                                                       NULL);
+
+                       controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
+                       free(cgv2_ctrl_path);
+                       if (!controller_list) {
+                               controller_list = cg_unified_make_empty_controller();
+                               TRACE("No controllers are enabled for "
+                                     "delegation in the unified hierarchy");
+                       }
+               }
+
+               new = add_hierarchy(&ops->hierarchies, controller_list, mountpoint, base_cgroup, type);
+               if (type == CGROUP2_SUPER_MAGIC && !ops->unified)
+                       ops->unified = new;
+
+               continue;
+
+       next:
+               free_string_list(controller_list);
+               free(mountpoint);
+               free(base_cgroup);
+       }
+
+       free_string_list(klist);
+       free_string_list(nlist);
+
+       free(basecginfo);
+
+       fclose(f);
+       free(line);
+
+       TRACE("Writable cgroup hierarchies:");
+       lxc_cgfsng_print_hierarchies(ops);
+
+       /* verify that all controllers in cgroup.use and all crucial
+        * controllers are accounted for
+        */
+       if (!all_controllers_found(ops))
+               return false;
+
+       return true;
+}
+
+static int cg_is_pure_unified(void)
+{
+
+       int ret;
+       struct statfs fs;
+
+       ret = statfs("/sys/fs/cgroup", &fs);
+       if (ret < 0)
+               return -ENOMEDIUM;
+
+       if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
+               return CGROUP2_SUPER_MAGIC;
+
+       return 0;
+}
+
+/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
+static char *cg_unified_get_current_cgroup(void)
+{
+       char *basecginfo, *base_cgroup;
+       bool will_escape;
+       char *copy = NULL;
+
+       will_escape = (geteuid() == 0);
+       if (will_escape)
+               basecginfo = read_file("/proc/1/cgroup");
+       else
+               basecginfo = read_file("/proc/self/cgroup");
+       if (!basecginfo)
+               return NULL;
+
+       base_cgroup = strstr(basecginfo, "0::/");
+       if (!base_cgroup)
+               goto cleanup_on_err;
+
+       base_cgroup = base_cgroup + 3;
+       copy = copy_to_eol(base_cgroup);
+       if (!copy)
+               goto cleanup_on_err;
+
+cleanup_on_err:
+       free(basecginfo);
+       if (copy)
+               trim(copy);
+
+       return copy;
+}
+
+static int cg_unified_init(struct cgroup_ops *ops)
+{
+       int ret;
+       char *mountpoint, *subtree_path;
+       char **delegatable;
+       char *base_cgroup = NULL;
+
+       ret = cg_is_pure_unified();
+       if (ret == -ENOMEDIUM)
+               return -ENOMEDIUM;
+
+       if (ret != CGROUP2_SUPER_MAGIC)
+               return 0;
+
+       base_cgroup = cg_unified_get_current_cgroup();
+       if (!base_cgroup)
+               return -EINVAL;
+       prune_init_scope(base_cgroup);
+
+       /* We assume that we have already been given controllers to delegate
+        * further down the hierarchy. If not it is up to the user to delegate
+        * them to us.
+        */
+       mountpoint = must_copy_string("/sys/fs/cgroup");
+       subtree_path = must_make_path(mountpoint, base_cgroup,
+                                     "cgroup.subtree_control", NULL);
+       delegatable = cg_unified_get_controllers(subtree_path);
+       free(subtree_path);
+       if (!delegatable)
+               delegatable = cg_unified_make_empty_controller();
+       if (!delegatable[0])
+               TRACE("No controllers are enabled for delegation");
+
+       /* TODO: If the user requested specific controllers via lxc.cgroup.use
+        * we should verify here. The reason I'm not doing it right is that I'm
+        * not convinced that lxc.cgroup.use will be the future since it is a
+        * global property. I much rather have an option that lets you request
+        * controllers per container.
+        */
+
+       add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
+
+       ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+       return CGROUP2_SUPER_MAGIC;
+}
+
+static bool cg_init(struct cgroup_ops *ops)
+{
+       int ret;
+       const char *tmp;
+
+       tmp = lxc_global_config_value("lxc.cgroup.use");
+       if (tmp)
+               ops->cgroup_use = must_copy_string(tmp);
+
+       ret = cg_unified_init(ops);
+       if (ret < 0)
+               return false;
+
+       if (ret == CGROUP2_SUPER_MAGIC)
+               return true;
+
+       return cg_hybrid_init(ops);
+}
+
+static bool cgfsng_data_init(struct cgroup_ops *ops)
+{
+       const char *cgroup_pattern;
+
+       /* copy system-wide cgroup information */
+       cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+       if (!cgroup_pattern) {
+               /* lxc.cgroup.pattern is only NULL on error. */
+               ERROR("Failed to retrieve cgroup pattern");
+               return false;
+       }
+       ops->cgroup_pattern = must_copy_string(cgroup_pattern);
+
+       return true;
+}
+
+struct cgroup_ops *cgfsng_ops_init(void)
+{
+       struct cgroup_ops *cgfsng_ops;
+
+       cgfsng_ops = malloc(sizeof(struct cgroup_ops));
+       if (!cgfsng_ops)
+               return NULL;
+
+       memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
+       cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+
+       if (!cg_init(cgfsng_ops)) {
+               free(cgfsng_ops);
+               return NULL;
+       }
+
+       cgfsng_ops->data_init = cgfsng_data_init;
+       cgfsng_ops->destroy = cgfsng_destroy;
+       cgfsng_ops->create = cgfsng_create;
+       cgfsng_ops->enter = cgfsng_enter;
+       cgfsng_ops->escape = cgfsng_escape;
+       cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
+       cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
+       cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
+       cgfsng_ops->get = cgfsng_get;
+       cgfsng_ops->set = cgfsng_set;
+       cgfsng_ops->unfreeze = cgfsng_unfreeze;
+       cgfsng_ops->setup_limits = cgfsng_setup_limits;
+       cgfsng_ops->driver = "cgfsng";
+       cgfsng_ops->version = "1.0.0";
+       cgfsng_ops->attach = cgfsng_attach;
+       cgfsng_ops->chown = cgfsng_chown;
+       cgfsng_ops->mount = cgfsng_mount;
+       cgfsng_ops->nrtasks = cgfsng_nrtasks;
+
+       return cgfsng_ops;
+}
index 9e7b26e00b9f7818c56c9638c06dcd6cc749b3f1..cd67c3c5de7836a619728c177a8a81813c7cc8f4 100644 (file)
 
 lxc_log_define(lxc_cgroup, lxc);
 
-static struct cgroup_ops *ops = NULL;
-
 extern struct cgroup_ops *cgfsng_ops_init(void);
 
-__attribute__((constructor)) void cgroup_ops_init(void)
+struct cgroup_ops *cgroup_init(struct lxc_handler *handler)
 {
-       if (ops) {
-               INFO("Running with %s in version %s", ops->driver, ops->version);
-               return;
-       }
-
-       DEBUG("cgroup_init");
-       ops = cgfsng_ops_init();
-       if (ops)
-               INFO("Initialized cgroup driver %s", ops->driver);
-}
+       struct cgroup_ops *cgroup_ops;
 
-bool cgroup_init(struct lxc_handler *handler)
-{
-       if (handler->cgroup_data) {
-               ERROR("cgroup_init called on already initialized handler");
-               return true;
+       cgroup_ops = cgfsng_ops_init();
+       if (!cgroup_ops) {
+               ERROR("Failed to initialize cgroup driver");
+               return NULL;
        }
 
-       if (ops) {
-               INFO("cgroup driver %s initing for %s", ops->driver, handler->name);
-               handler->cgroup_data = ops->init(handler);
-       }
+       if (!cgroup_ops->data_init(cgroup_ops))
+               return NULL;
 
-       return handler->cgroup_data != NULL;
-}
+       TRACE("Initialized cgroup driver %s", cgroup_ops->driver);
 
-void cgroup_destroy(struct lxc_handler *handler)
-{
-       if (ops) {
-               ops->destroy(handler->cgroup_data, handler->conf);
-               handler->cgroup_data = NULL;
-       }
-}
+       if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_LEGACY)
+               TRACE("Running with legacy cgroup layout");
+       else if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_HYBRID)
+               TRACE("Running with hybrid cgroup layout");
+       else if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
+               TRACE("Running with unified cgroup layout");
+       else
+               WARN("Running with unknown cgroup layout");
 
-/* Create the container cgroups for all requested controllers. */
-bool cgroup_create(struct lxc_handler *handler)
-{
-       if (ops)
-               return ops->create(handler->cgroup_data);
-
-       return false;
+       return cgroup_ops;
 }
 
-/* Enter the container init into its new cgroups for all requested controllers. */
-bool cgroup_enter(struct lxc_handler *handler)
+void cgroup_exit(struct cgroup_ops *ops)
 {
-       if (ops)
-               return ops->enter(handler->cgroup_data, handler->pid);
+       struct hierarchy **it;
 
-       return false;
-}
-
-bool cgroup_create_legacy(struct lxc_handler *handler)
-{
-       if (ops && ops->create_legacy)
-               return ops->create_legacy(handler->cgroup_data, handler->pid);
-
-       return true;
-}
-
-const char *cgroup_get_cgroup(struct lxc_handler *handler,
-                             const char *subsystem)
-{
-       if (ops)
-               return ops->get_cgroup(handler->cgroup_data, subsystem);
-
-       return NULL;
-}
-
-bool cgroup_escape(struct lxc_handler *handler)
-{
-       if (ops)
-               return ops->escape(handler->cgroup_data);
-
-       return false;
-}
-
-int cgroup_num_hierarchies(void)
-{
-       if (!ops)
-               return -1;
-
-       return ops->num_hierarchies();
-}
-
-bool cgroup_get_hierarchies(int n, char ***out)
-{
        if (!ops)
-               return false;
-
-       return ops->get_hierarchies(n, out);
-}
-
-bool cgroup_unfreeze(struct lxc_handler *handler)
-{
-       if (ops)
-               return ops->unfreeze(handler->cgroup_data);
-
-       return false;
-}
-
-bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices)
-{
-       if (ops)
-               return ops->setup_limits(handler->cgroup_data,
-                                        handler->conf, with_devices);
-
-       return false;
-}
+               return;
 
-bool cgroup_chown(struct lxc_handler *handler)
-{
-       if (ops && ops->chown)
-               return ops->chown(handler->cgroup_data, handler->conf);
+       free(ops->cgroup_use);
+       free(ops->cgroup_pattern);
+       free(ops->container_cgroup);
 
-       return true;
-}
+       for (it = ops->hierarchies; it && *it; it++) {
+               char **ctrlr;
 
-bool cgroup_mount(const char *root, struct lxc_handler *handler, int type)
-{
-       if (ops)
-               return ops->mount_cgroup(handler, root, type);
-
-       return false;
-}
+               for (ctrlr = (*it)->controllers; ctrlr && *ctrlr; ctrlr++)
+                       free(*ctrlr);
+               free((*it)->controllers);
 
-int cgroup_nrtasks(struct lxc_handler *handler)
-{
-       if (ops) {
-               if (ops->nrtasks)
-                       return ops->nrtasks(handler->cgroup_data);
-               else
-                       WARN("cgroup driver \"%s\" doesn't implement nrtasks", ops->driver);
+               free((*it)->mountpoint);
+               free((*it)->base_cgroup);
+               free((*it)->fullcgpath);
+               free(*it);
        }
+       free(ops->hierarchies);
 
-       return -1;
-}
-
-bool cgroup_attach(const char *name, const char *lxcpath, pid_t pid)
-{
-       if (ops)
-               return ops->attach(name, lxcpath, pid);
-
-       return false;
-}
-
-int lxc_cgroup_set(const char *filename, const char *value, const char *name,
-                  const char *lxcpath)
-{
-       if (ops)
-               return ops->set(filename, value, name, lxcpath);
-
-       return -1;
-}
-
-int lxc_cgroup_get(const char *filename, char *value, size_t len,
-                  const char *name, const char *lxcpath)
-{
-       if (ops)
-               return ops->get(filename, value, len, name, lxcpath);
-
-       return -1;
-}
-
-void cgroup_disconnect(void)
-{
-       if (ops && ops->disconnect)
-               ops->disconnect();
+       return;
 }
 
 #define INIT_SCOPE "/init.scope"
index 0f04e8b7ac6de4f1952f38f9d0257b15523fd492..ae910be8b14f625b2d5b50f2b0a8a1fc134580b5 100644 (file)
@@ -39,48 +39,114 @@ typedef enum {
         CGROUP_LAYOUT_UNIFIED =  2,
 } cgroup_layout_t;
 
+/* A descriptor for a mounted hierarchy
+ *
+ * @controllers
+ * - legacy hierarchy
+ *   Either NULL, or a null-terminated list of all the co-mounted controllers.
+ * - unified hierarchy
+ *   Either NULL, or a null-terminated list of all enabled controllers.
+ *
+ * @mountpoint
+ * - The mountpoint we will use.
+ * - legacy hierarchy
+ *   It will be either /sys/fs/cgroup/controller or
+ *   /sys/fs/cgroup/controllerlist.
+ * - unified hierarchy
+ *   It will either be /sys/fs/cgroup or /sys/fs/cgroup/<mountpoint-name>
+ *   depending on whether this is a hybrid cgroup layout (mix of legacy and
+ *   unified hierarchies) or a pure unified cgroup layout.
+ *
+ * @base_cgroup
+ * - The cgroup under which the container cgroup path
+ *   is created. This will be either the caller's cgroup (if not root), or
+ *   init's cgroup (if root).
+ *
+ * @fullcgpath
+ * - The full path to the containers cgroup.
+ *
+ * @version
+ * - legacy hierarchy
+ *   If the hierarchy is a legacy hierarchy this will be set to
+ *   CGROUP_SUPER_MAGIC.
+ * - unified hierarchy
+ *   If the hierarchy is a legacy hierarchy this will be set to
+ *   CGROUP2_SUPER_MAGIC.
+ */
+struct hierarchy {
+       char **controllers;
+       char *mountpoint;
+       char *base_cgroup;
+       char *fullcgpath;
+       int version;
+};
+
 struct cgroup_ops {
+       /* string constant */
        const char *driver;
+
+       /* string constant */
        const char *version;
 
-       void *(*init)(struct lxc_handler *handler);
-       void (*destroy)(void *hdata, struct lxc_conf *conf);
-       bool (*create)(void *hdata);
-       bool (*enter)(void *hdata, pid_t pid);
-       bool (*create_legacy)(void *hdata, pid_t pid);
-       const char *(*get_cgroup)(void *hdata, const char *subsystem);
-       bool (*escape)();
-       int (*num_hierarchies)();
-       bool (*get_hierarchies)(int n, char ***out);
-       int (*set)(const char *filename, const char *value, const char *name, const char *lxcpath);
-       int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
-       bool (*unfreeze)(void *hdata);
-       bool (*setup_limits)(void *hdata, struct lxc_conf *conf, bool with_devices);
-       bool (*chown)(void *hdata, struct lxc_conf *conf);
-       bool (*attach)(const char *name, const char *lxcpath, pid_t pid);
-       bool (*mount_cgroup)(void *hdata, const char *root, int type);
-       int (*nrtasks)(void *hdata);
-       void (*disconnect)(void);
+       /* What controllers is the container supposed to use. */
+       char *cgroup_use;
+       char *cgroup_pattern;
+       char *container_cgroup;
+
+       /* @hierarchies
+        * - A NULL-terminated array of struct hierarchy, one per legacy
+        *   hierarchy. No duplicates. First sufficient, writeable mounted
+        *   hierarchy wins.
+        */
+       struct hierarchy **hierarchies;
+       struct hierarchy *unified;
+
+       /*
+        * @cgroup_layout
+        * - What cgroup layout the container is running with.
+        *   - CGROUP_LAYOUT_UNKNOWN
+        *     The cgroup layout could not be determined. This should be treated
+        *     as an error condition.
+        *   - CGROUP_LAYOUT_LEGACY
+        *     The container is running with all controllers mounted into legacy
+        *     cgroup hierarchies.
+        *   - CGROUP_LAYOUT_HYBRID
+        *     The container is running with at least one controller mounted
+        *     into a legacy cgroup hierarchy and a mountpoint for the unified
+        *     hierarchy. The unified hierarchy can be empty (no controllers
+        *     enabled) or non-empty (controllers enabled).
+        *   - CGROUP_LAYOUT_UNIFIED
+        *     The container is running on a pure unified cgroup hierarchy. The
+        *     unified hierarchy can be empty (no controllers enabled) or
+        *     non-empty (controllers enabled).
+        */
+       cgroup_layout_t cgroup_layout;
+
+       bool (*data_init)(struct cgroup_ops *ops);
+       void (*destroy)(struct cgroup_ops *ops, struct lxc_handler *handler);
+       bool (*create)(struct cgroup_ops *ops, struct lxc_handler *handler);
+       bool (*enter)(struct cgroup_ops *ops, pid_t pid);
+       const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller);
+       bool (*escape)(const struct cgroup_ops *ops);
+       int (*num_hierarchies)(struct cgroup_ops *ops);
+       bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out);
+       int (*set)(struct cgroup_ops *ops, const char *filename,
+                  const char *value, const char *name, const char *lxcpath);
+       int (*get)(struct cgroup_ops *ops, const char *filename, char *value,
+                  size_t len, const char *name, const char *lxcpath);
+       bool (*unfreeze)(struct cgroup_ops *ops);
+       bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf,
+                            bool with_devices);
+       bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf);
+       bool (*attach)(struct cgroup_ops *ops, const char *name,
+                      const char *lxcpath, pid_t pid);
+       bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler,
+                     const char *root, int type);
+       int (*nrtasks)(struct cgroup_ops *ops);
 };
 
-extern bool cgroup_attach(const char *name, const char *lxcpath, pid_t pid);
-extern bool cgroup_mount(const char *root, struct lxc_handler *handler, int type);
-extern void cgroup_destroy(struct lxc_handler *handler);
-extern bool cgroup_init(struct lxc_handler *handler);
-extern bool cgroup_create(struct lxc_handler *handler);
-extern bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices);
-extern bool cgroup_chown(struct lxc_handler *handler);
-extern bool cgroup_enter(struct lxc_handler *handler);
-extern void cgroup_cleanup(struct lxc_handler *handler);
-extern bool cgroup_create_legacy(struct lxc_handler *handler);
-extern int cgroup_nrtasks(struct lxc_handler *handler);
-extern const char *cgroup_get_cgroup(struct lxc_handler *handler,
-                                    const char *subsystem);
-extern bool cgroup_escape();
-extern int cgroup_num_hierarchies();
-extern bool cgroup_get_hierarchies(int i, char ***out);
-extern bool cgroup_unfreeze(struct lxc_handler *handler);
-extern void cgroup_disconnect(void);
+extern struct cgroup_ops *cgroup_init(struct lxc_handler *handler);
+extern void cgroup_exit(struct cgroup_ops *ops);
 
 extern void prune_init_scope(char *cg);
 extern bool is_crucial_cgroup_subsystem(const char *s);
index 54e9f75c557e0a934c61bc7bf40523d9e748e171..1ec6c7e70c8bf3b12e481b9f30555bc58a55fde2 100644 (file)
@@ -473,11 +473,12 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req,
 {
        const char *path;
        struct lxc_cmd_rsp rsp;
+       struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
 
        if (req->datalen > 0)
-               path = cgroup_get_cgroup(handler, req->data);
+               path = cgroup_ops->get_cgroup(cgroup_ops, req->data);
        else
-               path = cgroup_get_cgroup(handler, NULL);
+               path = cgroup_ops->get_cgroup(cgroup_ops, NULL);
        if (!path)
                return -1;
 
@@ -637,6 +638,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
 {
        struct lxc_cmd_rsp rsp;
        int stopsignal = SIGKILL;
+       struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
 
        if (handler->conf->stopsignal)
                stopsignal = handler->conf->stopsignal;
@@ -648,7 +650,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req,
                 * lxc_unfreeze() would do another cmd (GET_CGROUP) which would
                 * deadlock us.
                 */
-               if (cgroup_unfreeze(handler))
+               if (cgroup_ops->unfreeze(cgroup_ops))
                        return 0;
 
                ERROR("Failed to unfreeze container \"%s\"", handler->name);
index 4f5b0751cf66b442f940d770c19f1f51bff8d4a5..0d7152b711e85b5a15bd1f4a4bfc3456e58e009a 100644 (file)
@@ -757,7 +757,10 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
                if (flags & LXC_AUTO_CGROUP_FORCE)
                        cg_flags |= LXC_AUTO_CGROUP_FORCE;
 
-               if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
+               if (!handler->cgroup_ops->mount(handler->cgroup_ops,
+                                               handler,
+                                               conf->rootfs.path ? conf->rootfs.mount : "",
+                                               cg_flags)) {
                        SYSERROR("Failed to mount \"/sys/fs/cgroup\"");
                        return -1;
                }
index 9c70c592198f09382600730766e38dfc31db4ed7..de2bc677390cbe33a81b658e643715f057a65fe9 100644 (file)
@@ -171,7 +171,7 @@ static int cmp_version(const char *v1, const char *v2)
        return -1;
 }
 
-static void exec_criu(struct criu_opts *opts)
+static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts)
 {
        char **argv, log[PATH_MAX];
        int static_args = 23, argc = 0, i, ret;
@@ -190,7 +190,7 @@ static void exec_criu(struct criu_opts *opts)
         * /actual/ root cgroup so that lxcfs thinks criu has enough rights to
         * see all cgroups.
         */
-       if (!cgroup_escape()) {
+       if (!cgroup_ops->escape(cgroup_ops)) {
                ERROR("failed to escape cgroups");
                return;
        }
@@ -248,8 +248,8 @@ static void exec_criu(struct criu_opts *opts)
                return;
        }
 
-       if (cgroup_num_hierarchies() > 0)
-               static_args += 2 * cgroup_num_hierarchies();
+       if (cgroup_ops->num_hierarchies(cgroup_ops) > 0)
+               static_args += 2 * cgroup_ops->num_hierarchies(cgroup_ops);
 
        if (opts->user->verbose)
                static_args++;
@@ -306,11 +306,11 @@ static void exec_criu(struct criu_opts *opts)
        DECLARE_ARG("-o");
        DECLARE_ARG(log);
 
-       for (i = 0; i < cgroup_num_hierarchies(); i++) {
+       for (i = 0; i < cgroup_ops->num_hierarchies(cgroup_ops); i++) {
                char **controllers = NULL, *fullname;
                char *path, *tmp;
 
-               if (!cgroup_get_hierarchies(i, &controllers)) {
+               if (!cgroup_ops->get_hierarchies(cgroup_ops, i, &controllers)) {
                        ERROR("failed to get hierarchy %d", i);
                        goto err;
                }
@@ -328,7 +328,7 @@ static void exec_criu(struct criu_opts *opts)
                } else {
                        const char *p;
 
-                       p = cgroup_get_cgroup(opts->handler, controllers[0]);
+                       p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]);
                        if (!p) {
                                ERROR("failed to get cgroup path for %s", controllers[0]);
                                goto err;
@@ -937,6 +937,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
        struct lxc_handler *handler;
        int status = 0;
        int pipes[2] = {-1, -1};
+       struct cgroup_ops *cgroup_ops;
 
        /* Try to detach from the current controlling tty if it exists.
         * Othwerise, lxc_init (via lxc_console) will attach the container's
@@ -958,12 +959,12 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
        if (lxc_init(c->name, handler) < 0)
                goto out;
 
-       if (!cgroup_init(handler)) {
-               ERROR("failed initing cgroups");
+       cgroup_ops = cgroup_init(NULL);
+       if (!cgroup_ops)
                goto out_fini_handler;
-       }
+       handler->cgroup_ops = cgroup_ops;
 
-       if (!cgroup_create(handler)) {
+       if (!cgroup_ops->create(cgroup_ops, handler)) {
                ERROR("failed creating groups");
                goto out_fini_handler;
        }
@@ -1052,7 +1053,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_
                os.console_name = c->lxc_conf->console.name;
 
                /* exec_criu() returning is an error */
-               exec_criu(&os);
+               exec_criu(cgroup_ops, &os);
                umount(rootfs->mount);
                rmdir(rootfs->mount);
                goto out_fini_handler;
@@ -1253,16 +1254,21 @@ static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *op
        if (pid == 0) {
                struct criu_opts os;
                struct lxc_handler h;
+               struct cgroup_ops *cgroup_ops;
 
                close(criuout[0]);
 
                lxc_zero_handler(&h);
 
                h.name = c->name;
-               if (!cgroup_init(&h)) {
+
+               cgroup_ops = cgroup_init(NULL);
+               if (!cgroup_ops) {
                        ERROR("failed to cgroup_init()");
                        _exit(EXIT_FAILURE);
+                       return -1;
                }
+               h.cgroup_ops = cgroup_ops;
 
                os.pipefd = criuout[1];
                os.action = mode;
@@ -1278,7 +1284,7 @@ static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *op
                }
 
                /* exec_criu() returning is an error */
-               exec_criu(&os);
+               exec_criu(cgroup_ops, &os);
                free(criu_version);
                _exit(EXIT_FAILURE);
        } else {
index 96d54be3deb7c34dd96f05ebd902687a45b444eb..c842b293248f43ec2d102484bdeddd89b1ab618d 100644 (file)
@@ -31,6 +31,7 @@
 #include <sys/types.h>
 #include <sys/param.h>
 
+#include "cgroup.h"
 #include "commands.h"
 #include "error.h"
 #include "log.h"
@@ -45,8 +46,14 @@ lxc_state_t freezer_state(const char *name, const char *lxcpath)
 {
        int ret;
        char v[100];
+       struct cgroup_ops *cgroup_ops;
 
-       ret = lxc_cgroup_get("freezer.state", v, sizeof(v), name, lxcpath);
+       cgroup_ops = cgroup_init(NULL);
+       if (!cgroup_ops)
+               return -1;
+
+       ret = cgroup_ops->get(cgroup_ops, "freezer.state", v, sizeof(v), name, lxcpath);
+       cgroup_exit(cgroup_ops);
        if (ret < 0)
                return -1;
 
@@ -60,19 +67,26 @@ static int do_freeze_thaw(bool freeze, const char *name, const char *lxcpath)
 {
        int ret;
        char v[100];
+       struct cgroup_ops *cgroup_ops;
        const char *state = freeze ? "FROZEN" : "THAWED";
        size_t state_len = 6;
        lxc_state_t new_state = freeze ? FROZEN : THAWED;
 
-       ret = lxc_cgroup_set("freezer.state", state, name, lxcpath);
+       cgroup_ops = cgroup_init(NULL);
+       if (!cgroup_ops)
+               return -1;
+
+       ret = cgroup_ops->set(cgroup_ops, "freezer.state", state, name, lxcpath);
        if (ret < 0) {
+               cgroup_exit(cgroup_ops);
                ERROR("Failed to freeze %s", name);
                return -1;
        }
 
        for (;;) {
-               ret = lxc_cgroup_get("freezer.state", v, sizeof(v), name, lxcpath);
+               ret = cgroup_ops->get(cgroup_ops, "freezer.state", v, sizeof(v), name, lxcpath);
                if (ret < 0) {
+                       cgroup_exit(cgroup_ops);
                        ERROR("Failed to get freezer state of %s", name);
                        return -1;
                }
@@ -82,6 +96,7 @@ static int do_freeze_thaw(bool freeze, const char *name, const char *lxcpath)
 
                ret = strncmp(v, state, state_len);
                if (ret == 0) {
+                       cgroup_exit(cgroup_ops);
                        lxc_cmd_serve_state_clients(name, lxcpath, new_state);
                        lxc_monitor_send_state(name, new_state, lxcpath);
                        return 0;
index d3c08ddf22341e1385063af1ddbe58bf96e4889e..8ae8c717d785388ce8c969c1299ccbbbaae8cac1 100644 (file)
@@ -97,29 +97,6 @@ extern int lxc_unfreeze(const char *name, const char *lxcpath);
  */
 extern lxc_state_t lxc_state(const char *name, const char *lxcpath);
 
-/*
- * Set a specified value for a specified subsystem. The specified
- * subsystem must be fully specified, eg. "cpu.shares"
- * @filename  : the cgroup attribute filename
- * @value     : the value to be set
- * @name      : the name of the container
- * @lxcpath   : lxc config path for container
- * Returns 0 on success, < 0 otherwise
- */
-extern int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath);
-
-/*
- * Get a specified value for a specified subsystem. The specified
- * subsystem must be fully specified, eg. "cpu.shares"
- * @filename  : the cgroup attribute filename
- * @value     : the value to be set
- * @len       : the len of the value variable
- * @name      : the name of the container
- * @lxcpath   : lxc config path for container
- * Returns the number of bytes read, < 0 on error
- */
-extern int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath);
-
 /*
  * Create and return a new lxccontainer struct.
  */
index a9041c860140ff77b205266f8dada046da41a667..21f5ce6d8c8c8e442131d52f0b3dc79d08552e8d 100644 (file)
@@ -3141,6 +3141,7 @@ WRAP_API_1(bool, lxcapi_set_config_path, const char *)
 static bool do_lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys, const char *value)
 {
        int ret;
+       struct cgroup_ops *cgroup_ops;
 
        if (!c)
                return false;
@@ -3148,12 +3149,19 @@ static bool do_lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsy
        if (is_stopped(c))
                return false;
 
+       cgroup_ops = cgroup_init(NULL);
+       if (!cgroup_ops)
+               return false;
+
        if (container_disk_lock(c))
                return false;
 
-       ret = lxc_cgroup_set(subsys, value, c->name, c->config_path);
+       ret = cgroup_ops->set(cgroup_ops, subsys, value, c->name, c->config_path);
 
        container_disk_unlock(c);
+
+       cgroup_exit(cgroup_ops);
+
        return ret == 0;
 }
 
@@ -3162,6 +3170,7 @@ WRAP_API_2(bool, lxcapi_set_cgroup_item, const char *, const char *)
 static int do_lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, char *retv, int inlen)
 {
        int ret;
+       struct cgroup_ops *cgroup_ops;
 
        if (!c)
                return -1;
@@ -3169,12 +3178,20 @@ static int do_lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys
        if (is_stopped(c))
                return -1;
 
+       cgroup_ops = cgroup_init(NULL);
+       if (!cgroup_ops)
+               return -1;
+
        if (container_disk_lock(c))
                return -1;
 
-       ret = lxc_cgroup_get(subsys, retv, inlen, c->name, c->config_path);
+       ret = cgroup_ops->get(cgroup_ops, subsys, retv, inlen, c->name,
+                             c->config_path);
 
        container_disk_unlock(c);
+
+       cgroup_exit(cgroup_ops);
+
        return ret;
 }
 
index b8404f99e0cc18e6ea50e412b44da31ffef48567..f4f8e520c2d45a511e224e16f36dd3e1cf533e56 100644 (file)
@@ -849,6 +849,13 @@ int lxc_init(const char *name, struct lxc_handler *handler)
        }
        TRACE("Chowned console");
 
+       handler->cgroup_ops = cgroup_init(handler);
+       if (!handler->cgroup_ops) {
+               ERROR("Failed to initialize cgroup driver");
+               goto out_restore_sigmask;
+       }
+       TRACE("Initialized cgroup driver");
+
        INFO("Container \"%s\" is initialized", name);
        return 0;
 
@@ -871,6 +878,7 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
        struct lxc_list *cur, *next;
        char *namespaces[LXC_NS_MAX + 1];
        size_t namespace_count = 0;
+       struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
 
        /* The STOPPING state is there for future cleanup code which can take
         * awhile.
@@ -935,7 +943,8 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
        while (namespace_count--)
                free(namespaces[namespace_count]);
 
-       cgroup_destroy(handler);
+       cgroup_ops->destroy(cgroup_ops, handler);
+       cgroup_exit(cgroup_ops);
 
        if (handler->conf->reboot == 0) {
                /* For all new state clients simply close the command socket.
@@ -1506,8 +1515,9 @@ static int lxc_spawn(struct lxc_handler *handler)
        struct lxc_list *id_map;
        const char *name = handler->name;
        const char *lxcpath = handler->lxcpath;
-       bool cgroups_connected = false, share_ns = false;
+       bool share_ns = false;
        struct lxc_conf *conf = handler->conf;
+       struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
 
        id_map = &conf->id_map;
        wants_to_map_ids = !lxc_list_empty(id_map);
@@ -1567,14 +1577,7 @@ static int lxc_spawn(struct lxc_handler *handler)
                }
        }
 
-       if (!cgroup_init(handler)) {
-               ERROR("Failed initializing cgroup support");
-               goto out_delete_net;
-       }
-
-       cgroups_connected = true;
-
-       if (!cgroup_create(handler)) {
+       if (!cgroup_ops->create(cgroup_ops, handler)) {
                ERROR("Failed creating cgroups");
                goto out_delete_net;
        }
@@ -1663,15 +1666,15 @@ static int lxc_spawn(struct lxc_handler *handler)
        if (ret < 0)
                goto out_delete_net;
 
-       if (!cgroup_setup_limits(handler, false)) {
+       if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) {
                ERROR("Failed to setup cgroup limits for container \"%s\"", name);
                goto out_delete_net;
        }
 
-       if (!cgroup_enter(handler))
+       if (!cgroup_ops->enter(cgroup_ops, handler->pid))
                goto out_delete_net;
 
-       if (!cgroup_chown(handler))
+       if (!cgroup_ops->chown(cgroup_ops, handler->conf))
                goto out_delete_net;
 
        /* Now we're ready to preserve the network namespace */
@@ -1736,15 +1739,12 @@ static int lxc_spawn(struct lxc_handler *handler)
        if (ret < 0)
                goto out_delete_net;
 
-       if (!cgroup_setup_limits(handler, true)) {
+       if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) {
                ERROR("Failed to setup legacy device cgroup controller limits");
                goto out_delete_net;
        }
        TRACE("Set up legacy device cgroup controller limits");
 
-       cgroup_disconnect();
-       cgroups_connected = false;
-
        if (handler->ns_clone_flags & CLONE_NEWCGROUP) {
                /* Now we're ready to preserve the cgroup namespace */
                ret = lxc_try_preserve_ns(handler->pid, "cgroup");
@@ -1821,9 +1821,6 @@ static int lxc_spawn(struct lxc_handler *handler)
        return 0;
 
 out_delete_net:
-       if (cgroups_connected)
-               cgroup_disconnect();
-
        if (handler->ns_clone_flags & CLONE_NEWNET)
                lxc_delete_network(handler);
 
index 5455ca5f35f3f7207ef438b494da6e87d288d046..466dbf5f3059652552fd2db465420547d802afb1 100644 (file)
@@ -132,6 +132,8 @@ struct lxc_handler {
         * true.
         */
        int exit_status;
+
+       struct cgroup_ops *cgroup_ops;
 };
 
 struct execute_args {
index 42c84bcdd86720d64e1338cf9407e65373a04243..e794e565f01931f946aefb470b32bf057fe32172 100644 (file)
@@ -53,6 +53,7 @@ static int test_running_container(const char *lxcpath,
        char *cgrelpath;
        char  relpath[PATH_MAX+1];
        char  value[NAME_MAX], value_save[NAME_MAX];
+       struct cgroup_ops *cgroup_ops;
 
        sprintf(relpath, "%s/%s", group ? group : "lxc", name);
 
@@ -75,36 +76,41 @@ static int test_running_container(const char *lxcpath,
                goto err3;
        }
 
+       cgroup_ops = cgroup_init(NULL);
+       if (!cgroup_ops)
+               goto err3;
+
        /* test get/set value using memory.soft_limit_in_bytes file */
-       ret = lxc_cgroup_get("memory.soft_limit_in_bytes", value, sizeof(value),
-                            c->name, c->config_path);
+       ret = cgroup_ops->get(cgroup_ops, "memory.soft_limit_in_bytes", value,
+                             sizeof(value), c->name, c->config_path);
        if (ret < 0) {
-               TSTERR("lxc_cgroup_get failed");
+               TSTERR("cgroup_get failed");
                goto err3;
        }
        strcpy(value_save, value);
 
-       ret = lxc_cgroup_set("memory.soft_limit_in_bytes", "512M", c->name, c->config_path);
+       ret = cgroup_ops->set(cgroup_ops, "memory.soft_limit_in_bytes", "512M",
+                             c->name, c->config_path);
        if (ret < 0) {
-               TSTERR("lxc_cgroup_set failed %d %d", ret, errno);
+               TSTERR("cgroup_set failed %d %d", ret, errno);
                goto err3;
        }
-       ret = lxc_cgroup_get("memory.soft_limit_in_bytes", value, sizeof(value),
-                            c->name, c->config_path);
+       ret = cgroup_ops->get(cgroup_ops, "memory.soft_limit_in_bytes", value,
+                             sizeof(value), c->name, c->config_path);
        if (ret < 0) {
-               TSTERR("lxc_cgroup_get failed");
+               TSTERR("cgroup_get failed");
                goto err3;
        }
        if (strcmp(value, "536870912\n")) {
-               TSTERR("lxc_cgroup_set_bypath failed to set value >%s<", value);
+               TSTERR("cgroup_set_bypath failed to set value >%s<", value);
                goto err3;
        }
 
        /* restore original value */
-       ret = lxc_cgroup_set("memory.soft_limit_in_bytes", value_save,
-                            c->name, c->config_path);
+       ret = cgroup_ops->set(cgroup_ops, "memory.soft_limit_in_bytes",
+                             value_save, c->name, c->config_path);
        if (ret < 0) {
-               TSTERR("lxc_cgroup_set failed");
+               TSTERR("cgroup_set failed");
                goto err3;
        }