From: Christian Brauner Date: Fri, 18 May 2018 18:16:22 +0000 (+0200) Subject: cgroups: refactor cgroup handling X-Git-Tag: lxc-3.1.0~298^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2202afc960d7d90df1fd5a1bb27be2529fcc235b;p=thirdparty%2Flxc.git cgroups: refactor cgroup handling This replaces the constructor implementation of cgroup handling with a simpler, thread-safe on-demand model of cgroup driver initialization. Making the cgroup initialization code run in a constructor means that each time the shared library gets mapped the cgroup parsing code gets run. That's unnecessary overhead. It also feels to me that this is only accidently thread-safe because constructors are only run once. But should threads actually end up manipulating or freeing memory that is file-global to cgfsng.c we'd be screwed. Now, I might be wrong here but the cleaner implementation is to allocate a cgroup driver on demand whenever we need it. Take the chance and rework the cgroup_ops interface to make the functions it wants to have implemented a lot cleaner. Signed-off-by: Christian Brauner --- diff --git a/src/lxc/attach.c b/src/lxc/attach.c index e1699b137..b62dcecfa 100644 --- a/src/lxc/attach.c +++ b/src/lxc/attach.c @@ -1272,10 +1272,17 @@ int lxc_attach(const char *name, const char *lxcpath, /* Attach to cgroup, if requested. */ if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) { - if (!cgroup_attach(name, lxcpath, pid)) + struct cgroup_ops *cgroup_ops; + + cgroup_ops = cgroup_init(NULL); + if (!cgroup_ops) + goto on_error; + + if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid)) goto on_error; - TRACE("Moved intermediate process %d into container's " - "cgroups", pid); + + cgroup_exit(cgroup_ops); + TRACE("Moved intermediate process %d into container's cgroups", pid); } /* Setup /proc limits */ diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index 2f9c15c56..544772c61 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -60,138 +60,6 @@ lxc_log_define(lxc_cgfsng, lxc); -static struct cgroup_ops cgfsng_ops; - -/* A descriptor for a mounted hierarchy - * - * @controllers - * - legacy hierarchy - * Either NULL, or a null-terminated list of all the co-mounted controllers. - * - unified hierarchy - * Either NULL, or a null-terminated list of all enabled controllers. - * - * @mountpoint - * - The mountpoint we will use. - * - legacy hierarchy - * It will be either /sys/fs/cgroup/controller or - * /sys/fs/cgroup/controllerlist. - * - unified hierarchy - * It will either be /sys/fs/cgroup or /sys/fs/cgroup/ - * depending on whether this is a hybrid cgroup layout (mix of legacy and - * unified hierarchies) or a pure unified cgroup layout. - * - * @base_cgroup - * - The cgroup under which the container cgroup path - * is created. This will be either the caller's cgroup (if not root), or - * init's cgroup (if root). - * - * @fullcgpath - * - The full path to the containers cgroup. - * - * @version - * - legacy hierarchy - * If the hierarchy is a legacy hierarchy this will be set to - * CGROUP_SUPER_MAGIC. - * - unified hierarchy - * If the hierarchy is a legacy hierarchy this will be set to - * CGROUP2_SUPER_MAGIC. - */ -struct hierarchy { - char **controllers; - char *mountpoint; - char *base_cgroup; - char *fullcgpath; - int version; -}; - -/* The cgroup data which is attached to the lxc_handler. - * - * @cgroup_pattern - * - A copy of lxc.cgroup.pattern. - * - * @container_cgroup - * - If not null, the cgroup which was created for the container. For each - * hierarchy, it is created under the @hierarchy->base_cgroup directory. - * Relative to the base_cgroup it is the same for all hierarchies. - * - * @name - * - The name of the container. - * - * @cgroup_meta - * - A copy of the container's cgroup information. This overrides - * @cgroup_pattern. - * - * @cgroup_layout - * - What cgroup layout the container is running with. - * - CGROUP_LAYOUT_UNKNOWN - * The cgroup layout could not be determined. This should be treated as an - * error condition. - * - CGROUP_LAYOUT_LEGACY - * The container is running with all controllers mounted into legacy cgroup - * hierarchies. - * - CGROUP_LAYOUT_HYBRID - * The container is running with at least one controller mounted into a - * legacy cgroup hierarchy and a mountpoint for the unified hierarchy. The - * unified hierarchy can be empty (no controllers enabled) or non-empty - * (controllers enabled). - * - CGROUP_LAYOUT_UNIFIED - * The container is running on a pure unified cgroup hierarchy. The unified - * hierarchy can be empty (no controllers enabled) or non-empty (controllers - * enabled). - */ -struct cgfsng_handler_data { - char *cgroup_pattern; - char *container_cgroup; /* cgroup we created for the container */ - char *name; /* container name */ - /* per-container cgroup information */ - struct lxc_cgroup cgroup_meta; - cgroup_layout_t cgroup_layout; -}; - -/* @hierarchies - * - A NULL-terminated array of struct hierarchy, one per legacy hierarchy. No - * duplicates. First sufficient, writeable mounted hierarchy wins. - */ -struct hierarchy **hierarchies; -/* Pointer to the unified hierarchy in the null terminated list @hierarchies. - * This is merely a convenience for hybrid cgroup layouts to easily retrieve the - * unified hierarchy without iterating throught @hierarchies. - */ -struct hierarchy *unified; -/* - * @cgroup_layout - * - What cgroup layout the container is running with. - * - CGROUP_LAYOUT_UNKNOWN - * The cgroup layout could not be determined. This should be treated as an - * error condition. - * - CGROUP_LAYOUT_LEGACY - * The container is running with all controllers mounted into legacy cgroup - * hierarchies. - * - CGROUP_LAYOUT_HYBRID - * The container is running with at least one controller mounted into a - * legacy cgroup hierarchy and a mountpoint for the unified hierarchy. The - * unified hierarchy can be empty (no controllers enabled) or non-empty - * (controllers enabled). - * - CGROUP_LAYOUT_UNIFIED - * The container is running on a pure unified cgroup hierarchy. The unified - * hierarchy can be empty (no controllers enabled) or non-empty (controllers - * enabled). - */ -cgroup_layout_t cgroup_layout; -/* What controllers is the container supposed to use. */ -char *cgroup_use; - -/* @lxc_cgfsng_debug - * - Whether to print debug info to stdout for the cgfsng driver. - */ -static bool lxc_cgfsng_debug; - -#define CGFSNG_DEBUG(format, ...) \ - do { \ - if (lxc_cgfsng_debug) \ - printf("cgfsng: " format, ##__VA_ARGS__); \ - } while (0) - static void free_string_list(char **clist) { int i; @@ -298,40 +166,28 @@ static void must_append_controller(char **klist, char **nlist, char ***clist, (*clist)[newentry] = copy; } -static void free_handler_data(struct cgfsng_handler_data *d) -{ - free(d->cgroup_pattern); - free(d->container_cgroup); - free(d->name); - if (d->cgroup_meta.dir) - free(d->cgroup_meta.dir); - if (d->cgroup_meta.controllers) - free(d->cgroup_meta.controllers); - free(d); -} - /* Given a handler's cgroup data, return the struct hierarchy for the controller * @c, or NULL if there is none. */ -struct hierarchy *get_hierarchy(const char *c) +struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *c) { int i; - if (!hierarchies) + if (!ops->hierarchies) return NULL; - for (i = 0; hierarchies[i]; i++) { + for (i = 0; ops->hierarchies[i]; i++) { if (!c) { /* This is the empty unified hierarchy. */ - if (hierarchies[i]->controllers && - !hierarchies[i]->controllers[0]) - return hierarchies[i]; + if (ops->hierarchies[i]->controllers && + !ops->hierarchies[i]->controllers[0]) + return ops->hierarchies[i]; continue; } - if (string_in_list(hierarchies[i]->controllers, c)) - return hierarchies[i]; + if (string_in_list(ops->hierarchies[i]->controllers, c)) + return ops->hierarchies[i]; } return NULL; @@ -829,23 +685,23 @@ static bool controller_found(struct hierarchy **hlist, char *entry) /* Return true if all of the controllers which we require have been found. The * required list is freezer and anything in lxc.cgroup.use. */ -static bool all_controllers_found(void) +static bool all_controllers_found(struct cgroup_ops *ops) { char *p; char *saveptr = NULL; - struct hierarchy **hlist = hierarchies; + struct hierarchy **hlist = ops->hierarchies; if (!controller_found(hlist, "freezer")) { - CGFSNG_DEBUG("No freezer controller mountpoint found\n"); + ERROR("No freezer controller mountpoint found"); return false; } - if (!cgroup_use) + if (!ops->cgroup_use) return true; - for (; (p = strtok_r(cgroup_use, ",", &saveptr)); cgroup_use = NULL) + for (; (p = strtok_r(ops->cgroup_use, ",", &saveptr)); ops->cgroup_use = NULL) if (!controller_found(hlist, p)) { - CGFSNG_DEBUG("No %s controller mountpoint found\n", p); + ERROR("No %s controller mountpoint found", p); return false; } @@ -879,14 +735,14 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line, * verify /sys/fs/cgroup/ in this field. */ if (strncmp(p, "/sys/fs/cgroup/", 15) != 0) { - CGFSNG_DEBUG("Found hierarchy not under /sys/fs/cgroup: \"%s\"\n", p); + ERROR("Found hierarchy not under /sys/fs/cgroup: \"%s\"", p); return NULL; } p += 15; p2 = strchr(p, ' '); if (!p2) { - CGFSNG_DEBUG("Corrupt mountinfo\n"); + ERROR("Corrupt mountinfo"); return NULL; } *p2 = '\0'; @@ -944,7 +800,7 @@ static char **cg_unified_get_controllers(const char *file) return aret; } -static struct hierarchy *add_hierarchy(char **clist, char *mountpoint, +static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint, char *base_cgroup, int type) { struct hierarchy *new; @@ -957,8 +813,8 @@ static struct hierarchy *add_hierarchy(char **clist, char *mountpoint, new->fullcgpath = NULL; new->version = type; - newentry = append_null_to_list((void ***)&hierarchies); - hierarchies[newentry] = new; + newentry = append_null_to_list((void ***)h); + (*h)[newentry] = new; return new; } @@ -1137,39 +993,26 @@ static void trim(char *s) s[--len] = '\0'; } -static void lxc_cgfsng_print_handler_data(const struct cgfsng_handler_data *d) -{ - printf("Cgroup information:\n"); - printf(" container name: %s\n", d->name ? d->name : "(null)"); - printf(" lxc.cgroup.use: %s\n", cgroup_use ? cgroup_use : "(null)"); - printf(" lxc.cgroup.pattern: %s\n", - d->cgroup_pattern ? d->cgroup_pattern : "(null)"); - printf(" lxc.cgroup.dir: %s\n", - d->cgroup_meta.dir ? d->cgroup_meta.dir : "(null)"); - printf(" cgroup: %s\n", - d->container_cgroup ? d->container_cgroup : "(null)"); -} - -static void lxc_cgfsng_print_hierarchies() +static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops) { int i; struct hierarchy **it; - if (!hierarchies) { - printf(" No hierarchies found\n"); + if (!ops->hierarchies) { + TRACE(" No hierarchies found"); return; } - printf(" Hierarchies:\n"); - for (i = 0, it = hierarchies; it && *it; it++, i++) { + TRACE(" Hierarchies:"); + for (i = 0, it = ops->hierarchies; it && *it; it++, i++) { int j; char **cit; - printf(" %d: base_cgroup: %s\n", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)"); - printf(" mountpoint: %s\n", (*it)->mountpoint ? (*it)->mountpoint : "(null)"); - printf(" controllers:\n"); + TRACE(" %d: base_cgroup: %s", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)"); + TRACE(" mountpoint: %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)"); + TRACE(" controllers:"); for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++) - printf(" %d: %s\n", j, *cit); + TRACE(" %d: %s", j, *cit); } } @@ -1179,491 +1022,155 @@ static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist, int k; char **it; - printf("basecginfo is:\n"); - printf("%s\n", basecginfo); + TRACE("basecginfo is:"); + TRACE("%s", basecginfo); for (k = 0, it = klist; it && *it; it++, k++) - printf("kernel subsystem %d: %s\n", k, *it); + TRACE("kernel subsystem %d: %s", k, *it); for (k = 0, it = nlist; it && *it; it++, k++) - printf("named subsystem %d: %s\n", k, *it); + TRACE("named subsystem %d: %s", k, *it); } -static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d) -{ - lxc_cgfsng_print_handler_data(d); - lxc_cgfsng_print_hierarchies(); -} - -/* At startup, parse_hierarchies finds all the info we need about cgroup - * mountpoints and current cgroups, and stores it in @d. - */ -static bool cg_hybrid_init(void) +static int recursive_destroy(char *dirname) { int ret; - char *basecginfo; - bool will_escape; - FILE *f; - size_t len = 0; - char *line = NULL; - char **klist = NULL, **nlist = NULL; - - /* Root spawned containers escape the current cgroup, so use init's - * cgroups as our base in that case. - */ - will_escape = (geteuid() == 0); - if (will_escape) - basecginfo = read_file("/proc/1/cgroup"); - else - basecginfo = read_file("/proc/self/cgroup"); - if (!basecginfo) - return false; - - ret = get_existing_subsystems(&klist, &nlist); - if (ret < 0) { - CGFSNG_DEBUG("Failed to retrieve available legacy cgroup controllers\n"); - free(basecginfo); - return false; - } - - f = fopen("/proc/self/mountinfo", "r"); - if (!f) { - CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n"); - free(basecginfo); - return false; - } - - if (lxc_cgfsng_debug) - lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist); - - while (getline(&line, &len, f) != -1) { - int type; - bool writeable; - struct hierarchy *new; - char *base_cgroup = NULL, *mountpoint = NULL; - char **controller_list = NULL; - - type = get_cgroup_version(line); - if (type == 0) - continue; + struct dirent *direntp; + DIR *dir; + int r = 0; - if (type == CGROUP2_SUPER_MAGIC && unified) - continue; + dir = opendir(dirname); + if (!dir) + return -1; - if (cgroup_layout == CGROUP_LAYOUT_UNKNOWN) { - if (type == CGROUP2_SUPER_MAGIC) - cgroup_layout = CGROUP_LAYOUT_UNIFIED; - else if (type == CGROUP_SUPER_MAGIC) - cgroup_layout = CGROUP_LAYOUT_LEGACY; - } else if (cgroup_layout == CGROUP_LAYOUT_UNIFIED) { - if (type == CGROUP_SUPER_MAGIC) - cgroup_layout = CGROUP_LAYOUT_HYBRID; - } else if (cgroup_layout == CGROUP_LAYOUT_LEGACY) { - if (type == CGROUP2_SUPER_MAGIC) - cgroup_layout = CGROUP_LAYOUT_HYBRID; - } + while ((direntp = readdir(dir))) { + char *pathname; + struct stat mystat; - controller_list = cg_hybrid_get_controllers(klist, nlist, line, type); - if (!controller_list && type == CGROUP_SUPER_MAGIC) + if (!strcmp(direntp->d_name, ".") || + !strcmp(direntp->d_name, "..")) continue; - if (type == CGROUP_SUPER_MAGIC) - if (controller_list_is_dup(hierarchies, controller_list)) - goto next; - - mountpoint = cg_hybrid_get_mountpoint(line); - if (!mountpoint) { - CGFSNG_DEBUG("Failed parsing mountpoint from \"%s\"\n", line); - goto next; - } + pathname = must_make_path(dirname, direntp->d_name, NULL); - if (type == CGROUP_SUPER_MAGIC) - base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC); - else - base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC); - if (!base_cgroup) { - CGFSNG_DEBUG("Failed to find current cgroup\n"); + ret = lstat(pathname, &mystat); + if (ret < 0) { + if (!r) + WARN("Failed to stat \"%s\"", pathname); + r = -1; goto next; } - trim(base_cgroup); - prune_init_scope(base_cgroup); - if (type == CGROUP2_SUPER_MAGIC) - writeable = test_writeable_v2(mountpoint, base_cgroup); - else - writeable = test_writeable_v1(mountpoint, base_cgroup); - if (!writeable) + if (!S_ISDIR(mystat.st_mode)) goto next; - if (type == CGROUP2_SUPER_MAGIC) { - char *cgv2_ctrl_path; - - cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup, - "cgroup.controllers", - NULL); - - controller_list = cg_unified_get_controllers(cgv2_ctrl_path); - free(cgv2_ctrl_path); - if (!controller_list) { - controller_list = cg_unified_make_empty_controller(); - CGFSNG_DEBUG("No controllers are enabled for " - "delegation in the unified hierarchy\n"); - } - } - - new = add_hierarchy(controller_list, mountpoint, base_cgroup, type); - if (type == CGROUP2_SUPER_MAGIC && !unified) - unified = new; - - continue; - + ret = recursive_destroy(pathname); + if (ret < 0) + r = -1; next: - free_string_list(controller_list); - free(mountpoint); - free(base_cgroup); + free(pathname); } - free_string_list(klist); - free_string_list(nlist); - - free(basecginfo); - - fclose(f); - free(line); - - if (lxc_cgfsng_debug) { - printf("Writable cgroup hierarchies:\n"); - lxc_cgfsng_print_hierarchies(); + ret = rmdir(dirname); + if (ret < 0) { + if (!r) + WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname); + r = -1; } - /* verify that all controllers in cgroup.use and all crucial - * controllers are accounted for - */ - if (!all_controllers_found()) - return false; + ret = closedir(dir); + if (ret < 0) { + if (!r) + WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname); + r = -1; + } - return true; + return r; } -static int cg_is_pure_unified(void) +static int cgroup_rmdir(struct hierarchy **hierarchies, + const char *container_cgroup) { + int i; - int ret; - struct statfs fs; + if (!container_cgroup || !hierarchies) + return 0; - ret = statfs("/sys/fs/cgroup", &fs); - if (ret < 0) - return -ENOMEDIUM; + for (i = 0; hierarchies[i]; i++) { + int ret; + struct hierarchy *h = hierarchies[i]; - if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC)) - return CGROUP2_SUPER_MAGIC; + if (!h->fullcgpath) + continue; + + ret = recursive_destroy(h->fullcgpath); + if (ret < 0) + WARN("Failed to destroy \"%s\"", h->fullcgpath); + + free(h->fullcgpath); + h->fullcgpath = NULL; + } return 0; } -/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */ -static char *cg_unified_get_current_cgroup(void) -{ - char *basecginfo, *base_cgroup; - bool will_escape; - char *copy = NULL; +struct generic_userns_exec_data { + struct hierarchy **hierarchies; + const char *container_cgroup; + struct lxc_conf *conf; + uid_t origuid; /* target uid in parent namespace */ + char *path; +}; - will_escape = (geteuid() == 0); - if (will_escape) - basecginfo = read_file("/proc/1/cgroup"); - else - basecginfo = read_file("/proc/self/cgroup"); - if (!basecginfo) - return NULL; +static int cgroup_rmdir_wrapper(void *data) +{ + int ret; + struct generic_userns_exec_data *arg = data; + uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid; + gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid; - base_cgroup = strstr(basecginfo, "0::/"); - if (!base_cgroup) - goto cleanup_on_err; + ret = setresgid(nsgid, nsgid, nsgid); + if (ret < 0) { + SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid, + (int)nsgid, (int)nsgid); + return -1; + } - base_cgroup = base_cgroup + 3; - copy = copy_to_eol(base_cgroup); - if (!copy) - goto cleanup_on_err; + ret = setresuid(nsuid, nsuid, nsuid); + if (ret < 0) { + SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid, + (int)nsuid, (int)nsuid); + return -1; + } -cleanup_on_err: - free(basecginfo); - if (copy) - trim(copy); + ret = setgroups(0, NULL); + if (ret < 0 && errno != EPERM) { + SYSERROR("Failed to setgroups(0, NULL)"); + return -1; + } - return copy; + return cgroup_rmdir(arg->hierarchies, arg->container_cgroup); } -static int cg_unified_init(void) +static void cgfsng_destroy(struct cgroup_ops *ops, struct lxc_handler *handler) { int ret; - char *mountpoint, *subtree_path; - char **delegatable; - char *base_cgroup = NULL; - - ret = cg_is_pure_unified(); - if (ret == -ENOMEDIUM) - return -ENOMEDIUM; - - if (ret != CGROUP2_SUPER_MAGIC) - return 0; - - base_cgroup = cg_unified_get_current_cgroup(); - if (!base_cgroup) - return -EINVAL; - prune_init_scope(base_cgroup); - - /* We assume that we have already been given controllers to delegate - * further down the hierarchy. If not it is up to the user to delegate - * them to us. - */ - mountpoint = must_copy_string("/sys/fs/cgroup"); - subtree_path = must_make_path(mountpoint, base_cgroup, - "cgroup.subtree_control", NULL); - delegatable = cg_unified_get_controllers(subtree_path); - free(subtree_path); - if (!delegatable) - delegatable = cg_unified_make_empty_controller(); - if (!delegatable[0]) - CGFSNG_DEBUG("No controllers are enabled for delegation\n"); - - /* TODO: If the user requested specific controllers via lxc.cgroup.use - * we should verify here. The reason I'm not doing it right is that I'm - * not convinced that lxc.cgroup.use will be the future since it is a - * global property. I much rather have an option that lets you request - * controllers per container. - */ - - add_hierarchy(delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC); - unified = hierarchies[0]; - - cgroup_layout = CGROUP_LAYOUT_UNIFIED; - return CGROUP2_SUPER_MAGIC; -} - -static bool cg_init(void) -{ - int ret; - const char *tmp; - - errno = 0; - tmp = lxc_global_config_value("lxc.cgroup.use"); - if (!cgroup_use && errno != 0) { /* lxc.cgroup.use can be NULL */ - CGFSNG_DEBUG("Failed to retrieve list of cgroups to use\n"); - return false; - } - cgroup_use = must_copy_string(tmp); - - ret = cg_unified_init(); - if (ret < 0) - return false; - - if (ret == CGROUP2_SUPER_MAGIC) - return true; - - return cg_hybrid_init(); -} - -static void *cgfsng_init(struct lxc_handler *handler) -{ - const char *cgroup_pattern; - struct cgfsng_handler_data *d; - - d = must_alloc(sizeof(*d)); - memset(d, 0, sizeof(*d)); - - /* copy container name */ - d->name = must_copy_string(handler->name); - - /* copy per-container cgroup information */ - d->cgroup_meta.dir = NULL; - d->cgroup_meta.controllers = NULL; - if (handler->conf) { - d->cgroup_meta.dir = must_copy_string(handler->conf->cgroup_meta.dir); - d->cgroup_meta.controllers = must_copy_string(handler->conf->cgroup_meta.controllers); - } - - /* copy system-wide cgroup information */ - cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern"); - if (!cgroup_pattern) { - /* lxc.cgroup.pattern is only NULL on error. */ - ERROR("Failed to retrieve cgroup pattern"); - goto out_free; - } - d->cgroup_pattern = must_copy_string(cgroup_pattern); - - d->cgroup_layout = cgroup_layout; - if (d->cgroup_layout == CGROUP_LAYOUT_LEGACY) - TRACE("Running with legacy cgroup layout"); - else if (d->cgroup_layout == CGROUP_LAYOUT_HYBRID) - TRACE("Running with hybrid cgroup layout"); - else if (d->cgroup_layout == CGROUP_LAYOUT_UNIFIED) - TRACE("Running with unified cgroup layout"); - else - WARN("Running with unknown cgroup layout"); - - if (lxc_cgfsng_debug) - lxc_cgfsng_print_debuginfo(d); - - return d; - -out_free: - free_handler_data(d); - return NULL; -} - -static int recursive_destroy(char *dirname) -{ - int ret; - struct dirent *direntp; - DIR *dir; - int r = 0; - - dir = opendir(dirname); - if (!dir) - return -1; - - while ((direntp = readdir(dir))) { - char *pathname; - struct stat mystat; - - if (!strcmp(direntp->d_name, ".") || - !strcmp(direntp->d_name, "..")) - continue; - - pathname = must_make_path(dirname, direntp->d_name, NULL); - - ret = lstat(pathname, &mystat); - if (ret < 0) { - if (!r) - WARN("Failed to stat \"%s\"", pathname); - r = -1; - goto next; - } - - if (!S_ISDIR(mystat.st_mode)) - goto next; - - ret = recursive_destroy(pathname); - if (ret < 0) - r = -1; - next: - free(pathname); - } - - ret = rmdir(dirname); - if (ret < 0) { - if (!r) - WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname); - r = -1; - } - - ret = closedir(dir); - if (ret < 0) { - if (!r) - WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname); - r = -1; - } - - return r; -} - -static int cgroup_rmdir(char *container_cgroup) -{ - int i; - - if (!container_cgroup || !hierarchies) - return 0; - - for (i = 0; hierarchies[i]; i++) { - int ret; - struct hierarchy *h = hierarchies[i]; - - if (!h->fullcgpath) - continue; - - ret = recursive_destroy(h->fullcgpath); - if (ret < 0) - WARN("Failed to destroy \"%s\"", h->fullcgpath); - - free(h->fullcgpath); - h->fullcgpath = NULL; - } - - return 0; -} - -struct generic_userns_exec_data { - struct cgfsng_handler_data *d; - struct lxc_conf *conf; - uid_t origuid; /* target uid in parent namespace */ - char *path; -}; - -static int cgroup_rmdir_wrapper(void *data) -{ - int ret; - struct generic_userns_exec_data *arg = data; - uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid; - gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid; - - ret = setresgid(nsgid, nsgid, nsgid); - if (ret < 0) { - SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid, - (int)nsgid, (int)nsgid); - return -1; - } - - ret = setresuid(nsuid, nsuid, nsuid); - if (ret < 0) { - SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid, - (int)nsuid, (int)nsuid); - return -1; - } - - ret = setgroups(0, NULL); - if (ret < 0 && errno != EPERM) { - SYSERROR("Failed to setgroups(0, NULL)"); - return -1; - } - - return cgroup_rmdir(arg->d->container_cgroup); -} - -static void cgfsng_destroy(void *hdata, struct lxc_conf *conf) -{ - int ret; - struct cgfsng_handler_data *d = hdata; - struct generic_userns_exec_data wrap; - - if (!d) - return; + struct generic_userns_exec_data wrap; wrap.origuid = 0; - wrap.d = hdata; - wrap.conf = conf; + wrap.container_cgroup = ops->container_cgroup; + wrap.hierarchies = ops->hierarchies; + wrap.conf = handler->conf; - if (conf && !lxc_list_empty(&conf->id_map)) - ret = userns_exec_1(conf, cgroup_rmdir_wrapper, &wrap, + if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) + ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap, "cgroup_rmdir_wrapper"); else - ret = cgroup_rmdir(d->container_cgroup); + ret = cgroup_rmdir(ops->hierarchies, ops->container_cgroup); if (ret < 0) { WARN("Failed to destroy cgroups"); return; } - - free_handler_data(d); -} - -struct cgroup_ops *cgfsng_ops_init(void) -{ - if (getenv("LXC_DEBUG_CGFSNG")) - lxc_cgfsng_debug = true; - - if (!cg_init()) - return NULL; - - return &cgfsng_ops; } static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname) @@ -1769,26 +1276,28 @@ static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname) /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; * next cgroup_pattern-1, -2, ..., -999. */ -static inline bool cgfsng_create(void *hdata) +static inline bool cgfsng_create(struct cgroup_ops *ops, + struct lxc_handler *handler) { int i; size_t len; char *container_cgroup, *offset, *tmp; int idx = 0; - struct cgfsng_handler_data *d = hdata; + struct lxc_conf *conf = handler->conf; + const char *join_args[] = {conf->cgroup_meta.dir, handler->name, NULL}; - if (!d) + if (ops->container_cgroup) { + WARN("cgfsng_create called a second time: %s", ops->container_cgroup); return false; + } - if (d->container_cgroup) { - WARN("cgfsng_create called a second time"); + if (!conf) return false; - } - if (d->cgroup_meta.dir) - tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false); + if (conf->cgroup_meta.dir) + tmp = lxc_string_join("/", join_args, false); else - tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern); + tmp = lxc_string_replace("%n", handler->name, ops->cgroup_pattern); if (!tmp) { ERROR("Failed expanding cgroup name pattern"); return false; @@ -1820,20 +1329,20 @@ again: } } - for (i = 0; hierarchies[i]; i++) { - if (!create_path_for_hierarchy(hierarchies[i], container_cgroup)) { + for (i = 0; ops->hierarchies[i]; i++) { + if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) { int j; - ERROR("Failed to create cgroup \"%s\"", hierarchies[i]->fullcgpath); - free(hierarchies[i]->fullcgpath); - hierarchies[i]->fullcgpath = NULL; + ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->fullcgpath); + free(ops->hierarchies[i]->fullcgpath); + ops->hierarchies[i]->fullcgpath = NULL; for (j = 0; j < i; j++) - remove_path_for_hierarchy(hierarchies[j], container_cgroup); + remove_path_for_hierarchy(ops->hierarchies[j], container_cgroup); idx++; goto again; } } - d->container_cgroup = container_cgroup; + ops->container_cgroup = container_cgroup; return true; @@ -1843,7 +1352,7 @@ out_free: return false; } -static bool cgfsng_enter(void *hdata, pid_t pid) +static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid) { int i, len; char pidstr[25]; @@ -1852,11 +1361,11 @@ static bool cgfsng_enter(void *hdata, pid_t pid) if (len < 0 || len >= 25) return false; - for (i = 0; hierarchies[i]; i++) { + for (i = 0; ops->hierarchies[i]; i++) { int ret; char *fullpath; - fullpath = must_make_path(hierarchies[i]->fullcgpath, + fullpath = must_make_path(ops->hierarchies[i]->fullcgpath, "cgroup.procs", NULL); ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); if (ret != 0) { @@ -1929,9 +1438,9 @@ static int chown_cgroup_wrapper(void *data) destuid = get_ns_uid(arg->origuid); - for (i = 0; hierarchies[i]; i++) { + for (i = 0; arg->hierarchies[i]; i++) { char *fullpath; - char *path = hierarchies[i]->fullcgpath; + char *path = arg->hierarchies[i]->fullcgpath; ret = chowmod(path, destuid, nsgid, 0775); if (ret < 0) @@ -1944,17 +1453,17 @@ static int chown_cgroup_wrapper(void *data) * files (which systemd in wily insists on doing). */ - if (hierarchies[i]->version == CGROUP_SUPER_MAGIC) { + if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC) { fullpath = must_make_path(path, "tasks", NULL); (void)chowmod(fullpath, destuid, nsgid, 0664); free(fullpath); } fullpath = must_make_path(path, "cgroup.procs", NULL); - (void)chowmod(fullpath, destuid, 0, 0664); + (void)chowmod(fullpath, destuid, nsgid, 0664); free(fullpath); - if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC) + if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) continue; fullpath = must_make_path(path, "cgroup.subtree_control", NULL); @@ -1969,20 +1478,16 @@ static int chown_cgroup_wrapper(void *data) return 0; } -static bool cgfsng_chown(void *hdata, struct lxc_conf *conf) +static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf) { - struct cgfsng_handler_data *d = hdata; struct generic_userns_exec_data wrap; - if (!d) - return false; - if (lxc_list_empty(&conf->id_map)) return true; wrap.origuid = geteuid(); wrap.path = NULL; - wrap.d = d; + wrap.hierarchies = ops->hierarchies; wrap.conf = conf; if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap, @@ -2122,13 +1627,12 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, return __cg_mount_direct(type, h, controllerpath); } -static bool cgfsng_mount(void *hdata, const char *root, int type) +static bool cgfsng_mount(struct cgroup_ops *ops, struct lxc_handler *handler, + const char *root, int type) { int i, ret; char *tmpfspath = NULL; bool has_cgns = false, retval = false, wants_force_mount = false; - struct lxc_handler *handler = hdata; - struct cgfsng_handler_data *d = handler->cgroup_data; if ((type & LXC_AUTO_CGROUP_MASK) == 0) return true; @@ -2162,9 +1666,9 @@ static bool cgfsng_mount(void *hdata, const char *root, int type) if (ret < 0) goto on_error; - for (i = 0; hierarchies[i]; i++) { + for (i = 0; ops->hierarchies[i]; i++) { char *controllerpath, *path2; - struct hierarchy *h = hierarchies[i]; + struct hierarchy *h = ops->hierarchies[i]; char *controller = strrchr(h->mountpoint, '/'); if (!controller) @@ -2209,7 +1713,7 @@ static bool cgfsng_mount(void *hdata, const char *root, int type) } path2 = must_make_path(controllerpath, h->base_cgroup, - d->container_cgroup, NULL); + ops->container_cgroup, NULL); ret = mkdir_p(path2, 0755); if (ret < 0) { free(controllerpath); @@ -2218,7 +1722,7 @@ static bool cgfsng_mount(void *hdata, const char *root, int type) } ret = cg_legacy_mount_controllers(type, h, controllerpath, - path2, d->container_cgroup); + path2, ops->container_cgroup); free(controllerpath); free(path2); if (ret < 0) @@ -2276,35 +1780,34 @@ static int recursive_count_nrtasks(char *dirname) return count; } -static int cgfsng_nrtasks(void *hdata) +static int cgfsng_nrtasks(struct cgroup_ops *ops) { int count; char *path; - struct cgfsng_handler_data *d = hdata; - if (!d || !d->container_cgroup || !hierarchies) + if (!ops->container_cgroup || !ops->hierarchies) return -1; - path = must_make_path(hierarchies[0]->fullcgpath, NULL); + path = must_make_path(ops->hierarchies[0]->fullcgpath, NULL); count = recursive_count_nrtasks(path); free(path); return count; } /* Only root needs to escape to the cgroup of its init. */ -static bool cgfsng_escape() +static bool cgfsng_escape(const struct cgroup_ops *ops) { int i; if (geteuid()) return true; - for (i = 0; hierarchies[i]; i++) { + for (i = 0; ops->hierarchies[i]; i++) { int ret; char *fullpath; - fullpath = must_make_path(hierarchies[i]->mountpoint, - hierarchies[i]->base_cgroup, + fullpath = must_make_path(ops->hierarchies[i]->mountpoint, + ops->hierarchies[i]->base_cgroup, "cgroup.procs", NULL); ret = lxc_write_to_file(fullpath, "0", 2, false, 0666); if (ret != 0) { @@ -2318,26 +1821,26 @@ static bool cgfsng_escape() return true; } -static int cgfsng_num_hierarchies(void) +static int cgfsng_num_hierarchies(struct cgroup_ops *ops) { int i; - for (i = 0; hierarchies[i]; i++) + for (i = 0; ops->hierarchies[i]; i++) ; return i; } -static bool cgfsng_get_hierarchies(int n, char ***out) +static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out) { int i; /* sanity check n */ for (i = 0; i < n; i++) - if (!hierarchies[i]) + if (!ops->hierarchies[i]) return false; - *out = hierarchies[i]->controllers; + *out = ops->hierarchies[i]->controllers; return true; } @@ -2348,13 +1851,13 @@ static bool cgfsng_get_hierarchies(int n, char ***out) /* TODO: If the unified cgroup hierarchy grows a freezer controller this needs * to be adapted. */ -static bool cgfsng_unfreeze(void *hdata) +static bool cgfsng_unfreeze(struct cgroup_ops *ops) { int ret; char *fullpath; struct hierarchy *h; - h = get_hierarchy("freezer"); + h = get_hierarchy(ops, "freezer"); if (!h) return false; @@ -2367,14 +1870,15 @@ static bool cgfsng_unfreeze(void *hdata) return true; } -static const char *cgfsng_get_cgroup(void *hdata, const char *controller) +static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, + const char *controller) { struct hierarchy *h; - h = get_hierarchy(controller); + h = get_hierarchy(ops, controller); if (!h) { - SYSERROR("Failed to find hierarchy for controller \"%s\"", - controller ? controller : "(null)"); + WARN("Failed to find hierarchy for controller \"%s\"", + controller ? controller : "(null)"); return NULL; } @@ -2465,7 +1969,8 @@ on_error: return fret; } -static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid) +static bool cgfsng_attach(struct cgroup_ops *ops, const char *name, + const char *lxcpath, pid_t pid) { int i, len, ret; char pidstr[25]; @@ -2474,10 +1979,10 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid) if (len < 0 || len >= 25) return false; - for (i = 0; hierarchies[i]; i++) { + for (i = 0; ops->hierarchies[i]; i++) { char *path; char *fullpath = NULL; - struct hierarchy *h = hierarchies[i]; + struct hierarchy *h = ops->hierarchies[i]; if (h->version == CGROUP2_SUPER_MAGIC) { ret = __cg_unified_attach(h, name, lxcpath, pidstr, len, @@ -2511,8 +2016,8 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid) * don't have a cgroup_data set up, so we ask the running container through the * commands API for the cgroup path. */ -static int cgfsng_get(const char *filename, char *value, size_t len, - const char *name, const char *lxcpath) +static int cgfsng_get(struct cgroup_ops *ops, const char *filename, char *value, + size_t len, const char *name, const char *lxcpath) { int ret = -1; size_t controller_len; @@ -2531,7 +2036,7 @@ static int cgfsng_get(const char *filename, char *value, size_t len, if (!path) return -1; - h = get_hierarchy(controller); + h = get_hierarchy(ops, controller); if (h) { char *fullpath; @@ -2548,8 +2053,8 @@ static int cgfsng_get(const char *filename, char *value, size_t len, * don't have a cgroup_data set up, so we ask the running container through the * commands API for the cgroup path. */ -static int cgfsng_set(const char *filename, const char *value, const char *name, - const char *lxcpath) +static int cgfsng_set(struct cgroup_ops *ops, const char *filename, + const char *value, const char *name, const char *lxcpath) { int ret = -1; size_t controller_len; @@ -2568,7 +2073,7 @@ static int cgfsng_set(const char *filename, const char *value, const char *name, if (!path) return -1; - h = get_hierarchy(controller); + h = get_hierarchy(ops, controller); if (h) { char *fullpath; @@ -2662,8 +2167,8 @@ out: /* Called from setup_limits - here we have the container's cgroup_data because * we created the cgroups. */ -static int cg_legacy_set_data(const char *filename, const char *value, - struct cgfsng_handler_data *d) +static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, + const char *value) { size_t len; char *fullpath, *p; @@ -2687,7 +2192,7 @@ static int cg_legacy_set_data(const char *filename, const char *value, value = converted_value; } - h = get_hierarchy(controller); + h = get_hierarchy(ops, controller); if (!h) { ERROR("Failed to setup limits for the \"%s\" controller. " "The controller seems to be unused by \"cgfsng\" cgroup " @@ -2703,13 +2208,12 @@ static int cg_legacy_set_data(const char *filename, const char *value, return ret; } -static bool __cg_legacy_setup_limits(void *hdata, +static bool __cg_legacy_setup_limits(struct cgroup_ops *ops, struct lxc_list *cgroup_settings, bool do_devices) { struct lxc_list *iterator, *next, *sorted_cgroup_settings; struct lxc_cgroup *cg; - struct cgfsng_handler_data *d = hdata; bool ret = false; if (lxc_list_empty(cgroup_settings)) @@ -2723,7 +2227,7 @@ static bool __cg_legacy_setup_limits(void *hdata, cg = iterator->elem; if (do_devices == !strncmp("devices", cg->subsystem, 7)) { - if (cg_legacy_set_data(cg->subsystem, cg->value, d)) { + if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) { if (do_devices && (errno == EACCES || errno == EPERM)) { WARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value); @@ -2749,11 +2253,11 @@ out: return ret; } -static bool __cg_unified_setup_limits(void *hdata, +static bool __cg_unified_setup_limits(struct cgroup_ops *ops, struct lxc_list *cgroup_settings) { struct lxc_list *iterator; - struct hierarchy *h = unified; + struct hierarchy *h = ops->unified; if (lxc_list_empty(cgroup_settings)) return true; @@ -2781,35 +2285,328 @@ static bool __cg_unified_setup_limits(void *hdata, return true; } -static bool cgfsng_setup_limits(void *hdata, struct lxc_conf *conf, +static bool cgfsng_setup_limits(struct cgroup_ops *ops, struct lxc_conf *conf, bool do_devices) { bool bret; - bret = __cg_legacy_setup_limits(hdata, &conf->cgroup, do_devices); + bret = __cg_legacy_setup_limits(ops, &conf->cgroup, do_devices); if (!bret) return false; - return __cg_unified_setup_limits(hdata, &conf->cgroup2); -} - -static struct cgroup_ops cgfsng_ops = { - .init = cgfsng_init, - .destroy = cgfsng_destroy, - .create = cgfsng_create, - .enter = cgfsng_enter, - .escape = cgfsng_escape, - .num_hierarchies = cgfsng_num_hierarchies, - .get_hierarchies = cgfsng_get_hierarchies, - .get_cgroup = cgfsng_get_cgroup, - .get = cgfsng_get, - .set = cgfsng_set, - .unfreeze = cgfsng_unfreeze, - .setup_limits = cgfsng_setup_limits, - .driver = "cgfsng", - .version = "1.0.0", - .attach = cgfsng_attach, - .chown = cgfsng_chown, - .mount_cgroup = cgfsng_mount, - .nrtasks = cgfsng_nrtasks, -}; + return __cg_unified_setup_limits(ops, &conf->cgroup2); +} + +/* At startup, parse_hierarchies finds all the info we need about cgroup + * mountpoints and current cgroups, and stores it in @d. + */ +static bool cg_hybrid_init(struct cgroup_ops *ops) +{ + int ret; + char *basecginfo; + bool will_escape; + FILE *f; + size_t len = 0; + char *line = NULL; + char **klist = NULL, **nlist = NULL; + + /* Root spawned containers escape the current cgroup, so use init's + * cgroups as our base in that case. + */ + will_escape = (geteuid() == 0); + if (will_escape) + basecginfo = read_file("/proc/1/cgroup"); + else + basecginfo = read_file("/proc/self/cgroup"); + if (!basecginfo) + return false; + + ret = get_existing_subsystems(&klist, &nlist); + if (ret < 0) { + ERROR("Failed to retrieve available legacy cgroup controllers"); + free(basecginfo); + return false; + } + + f = fopen("/proc/self/mountinfo", "r"); + if (!f) { + ERROR("Failed to open \"/proc/self/mountinfo\""); + free(basecginfo); + return false; + } + + lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist); + + while (getline(&line, &len, f) != -1) { + int type; + bool writeable; + struct hierarchy *new; + char *base_cgroup = NULL, *mountpoint = NULL; + char **controller_list = NULL; + + type = get_cgroup_version(line); + if (type == 0) + continue; + + if (type == CGROUP2_SUPER_MAGIC && ops->unified) + continue; + + if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) { + if (type == CGROUP2_SUPER_MAGIC) + ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED; + else if (type == CGROUP_SUPER_MAGIC) + ops->cgroup_layout = CGROUP_LAYOUT_LEGACY; + } else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) { + if (type == CGROUP_SUPER_MAGIC) + ops->cgroup_layout = CGROUP_LAYOUT_HYBRID; + } else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) { + if (type == CGROUP2_SUPER_MAGIC) + ops->cgroup_layout = CGROUP_LAYOUT_HYBRID; + } + + controller_list = cg_hybrid_get_controllers(klist, nlist, line, type); + if (!controller_list && type == CGROUP_SUPER_MAGIC) + continue; + + if (type == CGROUP_SUPER_MAGIC) + if (controller_list_is_dup(ops->hierarchies, controller_list)) + goto next; + + mountpoint = cg_hybrid_get_mountpoint(line); + if (!mountpoint) { + ERROR("Failed parsing mountpoint from \"%s\"", line); + goto next; + } + + if (type == CGROUP_SUPER_MAGIC) + base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC); + else + base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC); + if (!base_cgroup) { + ERROR("Failed to find current cgroup"); + goto next; + } + + trim(base_cgroup); + prune_init_scope(base_cgroup); + if (type == CGROUP2_SUPER_MAGIC) + writeable = test_writeable_v2(mountpoint, base_cgroup); + else + writeable = test_writeable_v1(mountpoint, base_cgroup); + if (!writeable) + goto next; + + if (type == CGROUP2_SUPER_MAGIC) { + char *cgv2_ctrl_path; + + cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup, + "cgroup.controllers", + NULL); + + controller_list = cg_unified_get_controllers(cgv2_ctrl_path); + free(cgv2_ctrl_path); + if (!controller_list) { + controller_list = cg_unified_make_empty_controller(); + TRACE("No controllers are enabled for " + "delegation in the unified hierarchy"); + } + } + + new = add_hierarchy(&ops->hierarchies, controller_list, mountpoint, base_cgroup, type); + if (type == CGROUP2_SUPER_MAGIC && !ops->unified) + ops->unified = new; + + continue; + + next: + free_string_list(controller_list); + free(mountpoint); + free(base_cgroup); + } + + free_string_list(klist); + free_string_list(nlist); + + free(basecginfo); + + fclose(f); + free(line); + + TRACE("Writable cgroup hierarchies:"); + lxc_cgfsng_print_hierarchies(ops); + + /* verify that all controllers in cgroup.use and all crucial + * controllers are accounted for + */ + if (!all_controllers_found(ops)) + return false; + + return true; +} + +static int cg_is_pure_unified(void) +{ + + int ret; + struct statfs fs; + + ret = statfs("/sys/fs/cgroup", &fs); + if (ret < 0) + return -ENOMEDIUM; + + if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC)) + return CGROUP2_SUPER_MAGIC; + + return 0; +} + +/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */ +static char *cg_unified_get_current_cgroup(void) +{ + char *basecginfo, *base_cgroup; + bool will_escape; + char *copy = NULL; + + will_escape = (geteuid() == 0); + if (will_escape) + basecginfo = read_file("/proc/1/cgroup"); + else + basecginfo = read_file("/proc/self/cgroup"); + if (!basecginfo) + return NULL; + + base_cgroup = strstr(basecginfo, "0::/"); + if (!base_cgroup) + goto cleanup_on_err; + + base_cgroup = base_cgroup + 3; + copy = copy_to_eol(base_cgroup); + if (!copy) + goto cleanup_on_err; + +cleanup_on_err: + free(basecginfo); + if (copy) + trim(copy); + + return copy; +} + +static int cg_unified_init(struct cgroup_ops *ops) +{ + int ret; + char *mountpoint, *subtree_path; + char **delegatable; + char *base_cgroup = NULL; + + ret = cg_is_pure_unified(); + if (ret == -ENOMEDIUM) + return -ENOMEDIUM; + + if (ret != CGROUP2_SUPER_MAGIC) + return 0; + + base_cgroup = cg_unified_get_current_cgroup(); + if (!base_cgroup) + return -EINVAL; + prune_init_scope(base_cgroup); + + /* We assume that we have already been given controllers to delegate + * further down the hierarchy. If not it is up to the user to delegate + * them to us. + */ + mountpoint = must_copy_string("/sys/fs/cgroup"); + subtree_path = must_make_path(mountpoint, base_cgroup, + "cgroup.subtree_control", NULL); + delegatable = cg_unified_get_controllers(subtree_path); + free(subtree_path); + if (!delegatable) + delegatable = cg_unified_make_empty_controller(); + if (!delegatable[0]) + TRACE("No controllers are enabled for delegation"); + + /* TODO: If the user requested specific controllers via lxc.cgroup.use + * we should verify here. The reason I'm not doing it right is that I'm + * not convinced that lxc.cgroup.use will be the future since it is a + * global property. I much rather have an option that lets you request + * controllers per container. + */ + + add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC); + + ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED; + return CGROUP2_SUPER_MAGIC; +} + +static bool cg_init(struct cgroup_ops *ops) +{ + int ret; + const char *tmp; + + tmp = lxc_global_config_value("lxc.cgroup.use"); + if (tmp) + ops->cgroup_use = must_copy_string(tmp); + + ret = cg_unified_init(ops); + if (ret < 0) + return false; + + if (ret == CGROUP2_SUPER_MAGIC) + return true; + + return cg_hybrid_init(ops); +} + +static bool cgfsng_data_init(struct cgroup_ops *ops) +{ + const char *cgroup_pattern; + + /* copy system-wide cgroup information */ + cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern"); + if (!cgroup_pattern) { + /* lxc.cgroup.pattern is only NULL on error. */ + ERROR("Failed to retrieve cgroup pattern"); + return false; + } + ops->cgroup_pattern = must_copy_string(cgroup_pattern); + + return true; +} + +struct cgroup_ops *cgfsng_ops_init(void) +{ + struct cgroup_ops *cgfsng_ops; + + cgfsng_ops = malloc(sizeof(struct cgroup_ops)); + if (!cgfsng_ops) + return NULL; + + memset(cgfsng_ops, 0, sizeof(struct cgroup_ops)); + cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN; + + if (!cg_init(cgfsng_ops)) { + free(cgfsng_ops); + return NULL; + } + + cgfsng_ops->data_init = cgfsng_data_init; + cgfsng_ops->destroy = cgfsng_destroy; + cgfsng_ops->create = cgfsng_create; + cgfsng_ops->enter = cgfsng_enter; + cgfsng_ops->escape = cgfsng_escape; + cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies; + cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies; + cgfsng_ops->get_cgroup = cgfsng_get_cgroup; + cgfsng_ops->get = cgfsng_get; + cgfsng_ops->set = cgfsng_set; + cgfsng_ops->unfreeze = cgfsng_unfreeze; + cgfsng_ops->setup_limits = cgfsng_setup_limits; + cgfsng_ops->driver = "cgfsng"; + cgfsng_ops->version = "1.0.0"; + cgfsng_ops->attach = cgfsng_attach; + cgfsng_ops->chown = cgfsng_chown; + cgfsng_ops->mount = cgfsng_mount; + cgfsng_ops->nrtasks = cgfsng_nrtasks; + + return cgfsng_ops; +} diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c index 9e7b26e00..cd67c3c5d 100644 --- a/src/lxc/cgroups/cgroup.c +++ b/src/lxc/cgroups/cgroup.c @@ -32,180 +32,61 @@ lxc_log_define(lxc_cgroup, lxc); -static struct cgroup_ops *ops = NULL; - extern struct cgroup_ops *cgfsng_ops_init(void); -__attribute__((constructor)) void cgroup_ops_init(void) +struct cgroup_ops *cgroup_init(struct lxc_handler *handler) { - if (ops) { - INFO("Running with %s in version %s", ops->driver, ops->version); - return; - } - - DEBUG("cgroup_init"); - ops = cgfsng_ops_init(); - if (ops) - INFO("Initialized cgroup driver %s", ops->driver); -} + struct cgroup_ops *cgroup_ops; -bool cgroup_init(struct lxc_handler *handler) -{ - if (handler->cgroup_data) { - ERROR("cgroup_init called on already initialized handler"); - return true; + cgroup_ops = cgfsng_ops_init(); + if (!cgroup_ops) { + ERROR("Failed to initialize cgroup driver"); + return NULL; } - if (ops) { - INFO("cgroup driver %s initing for %s", ops->driver, handler->name); - handler->cgroup_data = ops->init(handler); - } + if (!cgroup_ops->data_init(cgroup_ops)) + return NULL; - return handler->cgroup_data != NULL; -} + TRACE("Initialized cgroup driver %s", cgroup_ops->driver); -void cgroup_destroy(struct lxc_handler *handler) -{ - if (ops) { - ops->destroy(handler->cgroup_data, handler->conf); - handler->cgroup_data = NULL; - } -} + if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) + TRACE("Running with legacy cgroup layout"); + else if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_HYBRID) + TRACE("Running with hybrid cgroup layout"); + else if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) + TRACE("Running with unified cgroup layout"); + else + WARN("Running with unknown cgroup layout"); -/* Create the container cgroups for all requested controllers. */ -bool cgroup_create(struct lxc_handler *handler) -{ - if (ops) - return ops->create(handler->cgroup_data); - - return false; + return cgroup_ops; } -/* Enter the container init into its new cgroups for all requested controllers. */ -bool cgroup_enter(struct lxc_handler *handler) +void cgroup_exit(struct cgroup_ops *ops) { - if (ops) - return ops->enter(handler->cgroup_data, handler->pid); + struct hierarchy **it; - return false; -} - -bool cgroup_create_legacy(struct lxc_handler *handler) -{ - if (ops && ops->create_legacy) - return ops->create_legacy(handler->cgroup_data, handler->pid); - - return true; -} - -const char *cgroup_get_cgroup(struct lxc_handler *handler, - const char *subsystem) -{ - if (ops) - return ops->get_cgroup(handler->cgroup_data, subsystem); - - return NULL; -} - -bool cgroup_escape(struct lxc_handler *handler) -{ - if (ops) - return ops->escape(handler->cgroup_data); - - return false; -} - -int cgroup_num_hierarchies(void) -{ - if (!ops) - return -1; - - return ops->num_hierarchies(); -} - -bool cgroup_get_hierarchies(int n, char ***out) -{ if (!ops) - return false; - - return ops->get_hierarchies(n, out); -} - -bool cgroup_unfreeze(struct lxc_handler *handler) -{ - if (ops) - return ops->unfreeze(handler->cgroup_data); - - return false; -} - -bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices) -{ - if (ops) - return ops->setup_limits(handler->cgroup_data, - handler->conf, with_devices); - - return false; -} + return; -bool cgroup_chown(struct lxc_handler *handler) -{ - if (ops && ops->chown) - return ops->chown(handler->cgroup_data, handler->conf); + free(ops->cgroup_use); + free(ops->cgroup_pattern); + free(ops->container_cgroup); - return true; -} + for (it = ops->hierarchies; it && *it; it++) { + char **ctrlr; -bool cgroup_mount(const char *root, struct lxc_handler *handler, int type) -{ - if (ops) - return ops->mount_cgroup(handler, root, type); - - return false; -} + for (ctrlr = (*it)->controllers; ctrlr && *ctrlr; ctrlr++) + free(*ctrlr); + free((*it)->controllers); -int cgroup_nrtasks(struct lxc_handler *handler) -{ - if (ops) { - if (ops->nrtasks) - return ops->nrtasks(handler->cgroup_data); - else - WARN("cgroup driver \"%s\" doesn't implement nrtasks", ops->driver); + free((*it)->mountpoint); + free((*it)->base_cgroup); + free((*it)->fullcgpath); + free(*it); } + free(ops->hierarchies); - return -1; -} - -bool cgroup_attach(const char *name, const char *lxcpath, pid_t pid) -{ - if (ops) - return ops->attach(name, lxcpath, pid); - - return false; -} - -int lxc_cgroup_set(const char *filename, const char *value, const char *name, - const char *lxcpath) -{ - if (ops) - return ops->set(filename, value, name, lxcpath); - - return -1; -} - -int lxc_cgroup_get(const char *filename, char *value, size_t len, - const char *name, const char *lxcpath) -{ - if (ops) - return ops->get(filename, value, len, name, lxcpath); - - return -1; -} - -void cgroup_disconnect(void) -{ - if (ops && ops->disconnect) - ops->disconnect(); + return; } #define INIT_SCOPE "/init.scope" diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h index 0f04e8b7a..ae910be8b 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h @@ -39,48 +39,114 @@ typedef enum { CGROUP_LAYOUT_UNIFIED = 2, } cgroup_layout_t; +/* A descriptor for a mounted hierarchy + * + * @controllers + * - legacy hierarchy + * Either NULL, or a null-terminated list of all the co-mounted controllers. + * - unified hierarchy + * Either NULL, or a null-terminated list of all enabled controllers. + * + * @mountpoint + * - The mountpoint we will use. + * - legacy hierarchy + * It will be either /sys/fs/cgroup/controller or + * /sys/fs/cgroup/controllerlist. + * - unified hierarchy + * It will either be /sys/fs/cgroup or /sys/fs/cgroup/ + * depending on whether this is a hybrid cgroup layout (mix of legacy and + * unified hierarchies) or a pure unified cgroup layout. + * + * @base_cgroup + * - The cgroup under which the container cgroup path + * is created. This will be either the caller's cgroup (if not root), or + * init's cgroup (if root). + * + * @fullcgpath + * - The full path to the containers cgroup. + * + * @version + * - legacy hierarchy + * If the hierarchy is a legacy hierarchy this will be set to + * CGROUP_SUPER_MAGIC. + * - unified hierarchy + * If the hierarchy is a legacy hierarchy this will be set to + * CGROUP2_SUPER_MAGIC. + */ +struct hierarchy { + char **controllers; + char *mountpoint; + char *base_cgroup; + char *fullcgpath; + int version; +}; + struct cgroup_ops { + /* string constant */ const char *driver; + + /* string constant */ const char *version; - void *(*init)(struct lxc_handler *handler); - void (*destroy)(void *hdata, struct lxc_conf *conf); - bool (*create)(void *hdata); - bool (*enter)(void *hdata, pid_t pid); - bool (*create_legacy)(void *hdata, pid_t pid); - const char *(*get_cgroup)(void *hdata, const char *subsystem); - bool (*escape)(); - int (*num_hierarchies)(); - bool (*get_hierarchies)(int n, char ***out); - int (*set)(const char *filename, const char *value, const char *name, const char *lxcpath); - int (*get)(const char *filename, char *value, size_t len, const char *name, const char *lxcpath); - bool (*unfreeze)(void *hdata); - bool (*setup_limits)(void *hdata, struct lxc_conf *conf, bool with_devices); - bool (*chown)(void *hdata, struct lxc_conf *conf); - bool (*attach)(const char *name, const char *lxcpath, pid_t pid); - bool (*mount_cgroup)(void *hdata, const char *root, int type); - int (*nrtasks)(void *hdata); - void (*disconnect)(void); + /* What controllers is the container supposed to use. */ + char *cgroup_use; + char *cgroup_pattern; + char *container_cgroup; + + /* @hierarchies + * - A NULL-terminated array of struct hierarchy, one per legacy + * hierarchy. No duplicates. First sufficient, writeable mounted + * hierarchy wins. + */ + struct hierarchy **hierarchies; + struct hierarchy *unified; + + /* + * @cgroup_layout + * - What cgroup layout the container is running with. + * - CGROUP_LAYOUT_UNKNOWN + * The cgroup layout could not be determined. This should be treated + * as an error condition. + * - CGROUP_LAYOUT_LEGACY + * The container is running with all controllers mounted into legacy + * cgroup hierarchies. + * - CGROUP_LAYOUT_HYBRID + * The container is running with at least one controller mounted + * into a legacy cgroup hierarchy and a mountpoint for the unified + * hierarchy. The unified hierarchy can be empty (no controllers + * enabled) or non-empty (controllers enabled). + * - CGROUP_LAYOUT_UNIFIED + * The container is running on a pure unified cgroup hierarchy. The + * unified hierarchy can be empty (no controllers enabled) or + * non-empty (controllers enabled). + */ + cgroup_layout_t cgroup_layout; + + bool (*data_init)(struct cgroup_ops *ops); + void (*destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); + bool (*create)(struct cgroup_ops *ops, struct lxc_handler *handler); + bool (*enter)(struct cgroup_ops *ops, pid_t pid); + const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); + bool (*escape)(const struct cgroup_ops *ops); + int (*num_hierarchies)(struct cgroup_ops *ops); + bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out); + int (*set)(struct cgroup_ops *ops, const char *filename, + const char *value, const char *name, const char *lxcpath); + int (*get)(struct cgroup_ops *ops, const char *filename, char *value, + size_t len, const char *name, const char *lxcpath); + bool (*unfreeze)(struct cgroup_ops *ops); + bool (*setup_limits)(struct cgroup_ops *ops, struct lxc_conf *conf, + bool with_devices); + bool (*chown)(struct cgroup_ops *ops, struct lxc_conf *conf); + bool (*attach)(struct cgroup_ops *ops, const char *name, + const char *lxcpath, pid_t pid); + bool (*mount)(struct cgroup_ops *ops, struct lxc_handler *handler, + const char *root, int type); + int (*nrtasks)(struct cgroup_ops *ops); }; -extern bool cgroup_attach(const char *name, const char *lxcpath, pid_t pid); -extern bool cgroup_mount(const char *root, struct lxc_handler *handler, int type); -extern void cgroup_destroy(struct lxc_handler *handler); -extern bool cgroup_init(struct lxc_handler *handler); -extern bool cgroup_create(struct lxc_handler *handler); -extern bool cgroup_setup_limits(struct lxc_handler *handler, bool with_devices); -extern bool cgroup_chown(struct lxc_handler *handler); -extern bool cgroup_enter(struct lxc_handler *handler); -extern void cgroup_cleanup(struct lxc_handler *handler); -extern bool cgroup_create_legacy(struct lxc_handler *handler); -extern int cgroup_nrtasks(struct lxc_handler *handler); -extern const char *cgroup_get_cgroup(struct lxc_handler *handler, - const char *subsystem); -extern bool cgroup_escape(); -extern int cgroup_num_hierarchies(); -extern bool cgroup_get_hierarchies(int i, char ***out); -extern bool cgroup_unfreeze(struct lxc_handler *handler); -extern void cgroup_disconnect(void); +extern struct cgroup_ops *cgroup_init(struct lxc_handler *handler); +extern void cgroup_exit(struct cgroup_ops *ops); extern void prune_init_scope(char *cg); extern bool is_crucial_cgroup_subsystem(const char *s); diff --git a/src/lxc/commands.c b/src/lxc/commands.c index 54e9f75c5..1ec6c7e70 100644 --- a/src/lxc/commands.c +++ b/src/lxc/commands.c @@ -473,11 +473,12 @@ static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, { const char *path; struct lxc_cmd_rsp rsp; + struct cgroup_ops *cgroup_ops = handler->cgroup_ops; if (req->datalen > 0) - path = cgroup_get_cgroup(handler, req->data); + path = cgroup_ops->get_cgroup(cgroup_ops, req->data); else - path = cgroup_get_cgroup(handler, NULL); + path = cgroup_ops->get_cgroup(cgroup_ops, NULL); if (!path) return -1; @@ -637,6 +638,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req, { struct lxc_cmd_rsp rsp; int stopsignal = SIGKILL; + struct cgroup_ops *cgroup_ops = handler->cgroup_ops; if (handler->conf->stopsignal) stopsignal = handler->conf->stopsignal; @@ -648,7 +650,7 @@ static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req, * lxc_unfreeze() would do another cmd (GET_CGROUP) which would * deadlock us. */ - if (cgroup_unfreeze(handler)) + if (cgroup_ops->unfreeze(cgroup_ops)) return 0; ERROR("Failed to unfreeze container \"%s\"", handler->name); diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 4f5b0751c..0d7152b71 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -757,7 +757,10 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha if (flags & LXC_AUTO_CGROUP_FORCE) cg_flags |= LXC_AUTO_CGROUP_FORCE; - if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) { + if (!handler->cgroup_ops->mount(handler->cgroup_ops, + handler, + conf->rootfs.path ? conf->rootfs.mount : "", + cg_flags)) { SYSERROR("Failed to mount \"/sys/fs/cgroup\""); return -1; } diff --git a/src/lxc/criu.c b/src/lxc/criu.c index 9c70c5921..de2bc6773 100644 --- a/src/lxc/criu.c +++ b/src/lxc/criu.c @@ -171,7 +171,7 @@ static int cmp_version(const char *v1, const char *v2) return -1; } -static void exec_criu(struct criu_opts *opts) +static void exec_criu(struct cgroup_ops *cgroup_ops, struct criu_opts *opts) { char **argv, log[PATH_MAX]; int static_args = 23, argc = 0, i, ret; @@ -190,7 +190,7 @@ static void exec_criu(struct criu_opts *opts) * /actual/ root cgroup so that lxcfs thinks criu has enough rights to * see all cgroups. */ - if (!cgroup_escape()) { + if (!cgroup_ops->escape(cgroup_ops)) { ERROR("failed to escape cgroups"); return; } @@ -248,8 +248,8 @@ static void exec_criu(struct criu_opts *opts) return; } - if (cgroup_num_hierarchies() > 0) - static_args += 2 * cgroup_num_hierarchies(); + if (cgroup_ops->num_hierarchies(cgroup_ops) > 0) + static_args += 2 * cgroup_ops->num_hierarchies(cgroup_ops); if (opts->user->verbose) static_args++; @@ -306,11 +306,11 @@ static void exec_criu(struct criu_opts *opts) DECLARE_ARG("-o"); DECLARE_ARG(log); - for (i = 0; i < cgroup_num_hierarchies(); i++) { + for (i = 0; i < cgroup_ops->num_hierarchies(cgroup_ops); i++) { char **controllers = NULL, *fullname; char *path, *tmp; - if (!cgroup_get_hierarchies(i, &controllers)) { + if (!cgroup_ops->get_hierarchies(cgroup_ops, i, &controllers)) { ERROR("failed to get hierarchy %d", i); goto err; } @@ -328,7 +328,7 @@ static void exec_criu(struct criu_opts *opts) } else { const char *p; - p = cgroup_get_cgroup(opts->handler, controllers[0]); + p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]); if (!p) { ERROR("failed to get cgroup path for %s", controllers[0]); goto err; @@ -937,6 +937,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ struct lxc_handler *handler; int status = 0; int pipes[2] = {-1, -1}; + struct cgroup_ops *cgroup_ops; /* Try to detach from the current controlling tty if it exists. * Othwerise, lxc_init (via lxc_console) will attach the container's @@ -958,12 +959,12 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ if (lxc_init(c->name, handler) < 0) goto out; - if (!cgroup_init(handler)) { - ERROR("failed initing cgroups"); + cgroup_ops = cgroup_init(NULL); + if (!cgroup_ops) goto out_fini_handler; - } + handler->cgroup_ops = cgroup_ops; - if (!cgroup_create(handler)) { + if (!cgroup_ops->create(cgroup_ops, handler)) { ERROR("failed creating groups"); goto out_fini_handler; } @@ -1052,7 +1053,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ os.console_name = c->lxc_conf->console.name; /* exec_criu() returning is an error */ - exec_criu(&os); + exec_criu(cgroup_ops, &os); umount(rootfs->mount); rmdir(rootfs->mount); goto out_fini_handler; @@ -1253,16 +1254,21 @@ static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *op if (pid == 0) { struct criu_opts os; struct lxc_handler h; + struct cgroup_ops *cgroup_ops; close(criuout[0]); lxc_zero_handler(&h); h.name = c->name; - if (!cgroup_init(&h)) { + + cgroup_ops = cgroup_init(NULL); + if (!cgroup_ops) { ERROR("failed to cgroup_init()"); _exit(EXIT_FAILURE); + return -1; } + h.cgroup_ops = cgroup_ops; os.pipefd = criuout[1]; os.action = mode; @@ -1278,7 +1284,7 @@ static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *op } /* exec_criu() returning is an error */ - exec_criu(&os); + exec_criu(cgroup_ops, &os); free(criu_version); _exit(EXIT_FAILURE); } else { diff --git a/src/lxc/freezer.c b/src/lxc/freezer.c index 96d54be3d..c842b2932 100644 --- a/src/lxc/freezer.c +++ b/src/lxc/freezer.c @@ -31,6 +31,7 @@ #include #include +#include "cgroup.h" #include "commands.h" #include "error.h" #include "log.h" @@ -45,8 +46,14 @@ lxc_state_t freezer_state(const char *name, const char *lxcpath) { int ret; char v[100]; + struct cgroup_ops *cgroup_ops; - ret = lxc_cgroup_get("freezer.state", v, sizeof(v), name, lxcpath); + cgroup_ops = cgroup_init(NULL); + if (!cgroup_ops) + return -1; + + ret = cgroup_ops->get(cgroup_ops, "freezer.state", v, sizeof(v), name, lxcpath); + cgroup_exit(cgroup_ops); if (ret < 0) return -1; @@ -60,19 +67,26 @@ static int do_freeze_thaw(bool freeze, const char *name, const char *lxcpath) { int ret; char v[100]; + struct cgroup_ops *cgroup_ops; const char *state = freeze ? "FROZEN" : "THAWED"; size_t state_len = 6; lxc_state_t new_state = freeze ? FROZEN : THAWED; - ret = lxc_cgroup_set("freezer.state", state, name, lxcpath); + cgroup_ops = cgroup_init(NULL); + if (!cgroup_ops) + return -1; + + ret = cgroup_ops->set(cgroup_ops, "freezer.state", state, name, lxcpath); if (ret < 0) { + cgroup_exit(cgroup_ops); ERROR("Failed to freeze %s", name); return -1; } for (;;) { - ret = lxc_cgroup_get("freezer.state", v, sizeof(v), name, lxcpath); + ret = cgroup_ops->get(cgroup_ops, "freezer.state", v, sizeof(v), name, lxcpath); if (ret < 0) { + cgroup_exit(cgroup_ops); ERROR("Failed to get freezer state of %s", name); return -1; } @@ -82,6 +96,7 @@ static int do_freeze_thaw(bool freeze, const char *name, const char *lxcpath) ret = strncmp(v, state, state_len); if (ret == 0) { + cgroup_exit(cgroup_ops); lxc_cmd_serve_state_clients(name, lxcpath, new_state); lxc_monitor_send_state(name, new_state, lxcpath); return 0; diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h index d3c08ddf2..8ae8c717d 100644 --- a/src/lxc/lxc.h +++ b/src/lxc/lxc.h @@ -97,29 +97,6 @@ extern int lxc_unfreeze(const char *name, const char *lxcpath); */ extern lxc_state_t lxc_state(const char *name, const char *lxcpath); -/* - * Set a specified value for a specified subsystem. The specified - * subsystem must be fully specified, eg. "cpu.shares" - * @filename : the cgroup attribute filename - * @value : the value to be set - * @name : the name of the container - * @lxcpath : lxc config path for container - * Returns 0 on success, < 0 otherwise - */ -extern int lxc_cgroup_set(const char *filename, const char *value, const char *name, const char *lxcpath); - -/* - * Get a specified value for a specified subsystem. The specified - * subsystem must be fully specified, eg. "cpu.shares" - * @filename : the cgroup attribute filename - * @value : the value to be set - * @len : the len of the value variable - * @name : the name of the container - * @lxcpath : lxc config path for container - * Returns the number of bytes read, < 0 on error - */ -extern int lxc_cgroup_get(const char *filename, char *value, size_t len, const char *name, const char *lxcpath); - /* * Create and return a new lxccontainer struct. */ diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index a9041c860..21f5ce6d8 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -3141,6 +3141,7 @@ WRAP_API_1(bool, lxcapi_set_config_path, const char *) static bool do_lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys, const char *value) { int ret; + struct cgroup_ops *cgroup_ops; if (!c) return false; @@ -3148,12 +3149,19 @@ static bool do_lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsy if (is_stopped(c)) return false; + cgroup_ops = cgroup_init(NULL); + if (!cgroup_ops) + return false; + if (container_disk_lock(c)) return false; - ret = lxc_cgroup_set(subsys, value, c->name, c->config_path); + ret = cgroup_ops->set(cgroup_ops, subsys, value, c->name, c->config_path); container_disk_unlock(c); + + cgroup_exit(cgroup_ops); + return ret == 0; } @@ -3162,6 +3170,7 @@ WRAP_API_2(bool, lxcapi_set_cgroup_item, const char *, const char *) static int do_lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, char *retv, int inlen) { int ret; + struct cgroup_ops *cgroup_ops; if (!c) return -1; @@ -3169,12 +3178,20 @@ static int do_lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys if (is_stopped(c)) return -1; + cgroup_ops = cgroup_init(NULL); + if (!cgroup_ops) + return -1; + if (container_disk_lock(c)) return -1; - ret = lxc_cgroup_get(subsys, retv, inlen, c->name, c->config_path); + ret = cgroup_ops->get(cgroup_ops, subsys, retv, inlen, c->name, + c->config_path); container_disk_unlock(c); + + cgroup_exit(cgroup_ops); + return ret; } diff --git a/src/lxc/start.c b/src/lxc/start.c index b8404f99e..f4f8e520c 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -849,6 +849,13 @@ int lxc_init(const char *name, struct lxc_handler *handler) } TRACE("Chowned console"); + handler->cgroup_ops = cgroup_init(handler); + if (!handler->cgroup_ops) { + ERROR("Failed to initialize cgroup driver"); + goto out_restore_sigmask; + } + TRACE("Initialized cgroup driver"); + INFO("Container \"%s\" is initialized", name); return 0; @@ -871,6 +878,7 @@ void lxc_fini(const char *name, struct lxc_handler *handler) struct lxc_list *cur, *next; char *namespaces[LXC_NS_MAX + 1]; size_t namespace_count = 0; + struct cgroup_ops *cgroup_ops = handler->cgroup_ops; /* The STOPPING state is there for future cleanup code which can take * awhile. @@ -935,7 +943,8 @@ void lxc_fini(const char *name, struct lxc_handler *handler) while (namespace_count--) free(namespaces[namespace_count]); - cgroup_destroy(handler); + cgroup_ops->destroy(cgroup_ops, handler); + cgroup_exit(cgroup_ops); if (handler->conf->reboot == 0) { /* For all new state clients simply close the command socket. @@ -1506,8 +1515,9 @@ static int lxc_spawn(struct lxc_handler *handler) struct lxc_list *id_map; const char *name = handler->name; const char *lxcpath = handler->lxcpath; - bool cgroups_connected = false, share_ns = false; + bool share_ns = false; struct lxc_conf *conf = handler->conf; + struct cgroup_ops *cgroup_ops = handler->cgroup_ops; id_map = &conf->id_map; wants_to_map_ids = !lxc_list_empty(id_map); @@ -1567,14 +1577,7 @@ static int lxc_spawn(struct lxc_handler *handler) } } - if (!cgroup_init(handler)) { - ERROR("Failed initializing cgroup support"); - goto out_delete_net; - } - - cgroups_connected = true; - - if (!cgroup_create(handler)) { + if (!cgroup_ops->create(cgroup_ops, handler)) { ERROR("Failed creating cgroups"); goto out_delete_net; } @@ -1663,15 +1666,15 @@ static int lxc_spawn(struct lxc_handler *handler) if (ret < 0) goto out_delete_net; - if (!cgroup_setup_limits(handler, false)) { + if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, false)) { ERROR("Failed to setup cgroup limits for container \"%s\"", name); goto out_delete_net; } - if (!cgroup_enter(handler)) + if (!cgroup_ops->enter(cgroup_ops, handler->pid)) goto out_delete_net; - if (!cgroup_chown(handler)) + if (!cgroup_ops->chown(cgroup_ops, handler->conf)) goto out_delete_net; /* Now we're ready to preserve the network namespace */ @@ -1736,15 +1739,12 @@ static int lxc_spawn(struct lxc_handler *handler) if (ret < 0) goto out_delete_net; - if (!cgroup_setup_limits(handler, true)) { + if (!cgroup_ops->setup_limits(cgroup_ops, handler->conf, true)) { ERROR("Failed to setup legacy device cgroup controller limits"); goto out_delete_net; } TRACE("Set up legacy device cgroup controller limits"); - cgroup_disconnect(); - cgroups_connected = false; - if (handler->ns_clone_flags & CLONE_NEWCGROUP) { /* Now we're ready to preserve the cgroup namespace */ ret = lxc_try_preserve_ns(handler->pid, "cgroup"); @@ -1821,9 +1821,6 @@ static int lxc_spawn(struct lxc_handler *handler) return 0; out_delete_net: - if (cgroups_connected) - cgroup_disconnect(); - if (handler->ns_clone_flags & CLONE_NEWNET) lxc_delete_network(handler); diff --git a/src/lxc/start.h b/src/lxc/start.h index 5455ca5f3..466dbf5f3 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -132,6 +132,8 @@ struct lxc_handler { * true. */ int exit_status; + + struct cgroup_ops *cgroup_ops; }; struct execute_args { diff --git a/src/tests/cgpath.c b/src/tests/cgpath.c index 42c84bcdd..e794e565f 100644 --- a/src/tests/cgpath.c +++ b/src/tests/cgpath.c @@ -53,6 +53,7 @@ static int test_running_container(const char *lxcpath, char *cgrelpath; char relpath[PATH_MAX+1]; char value[NAME_MAX], value_save[NAME_MAX]; + struct cgroup_ops *cgroup_ops; sprintf(relpath, "%s/%s", group ? group : "lxc", name); @@ -75,36 +76,41 @@ static int test_running_container(const char *lxcpath, goto err3; } + cgroup_ops = cgroup_init(NULL); + if (!cgroup_ops) + goto err3; + /* test get/set value using memory.soft_limit_in_bytes file */ - ret = lxc_cgroup_get("memory.soft_limit_in_bytes", value, sizeof(value), - c->name, c->config_path); + ret = cgroup_ops->get(cgroup_ops, "memory.soft_limit_in_bytes", value, + sizeof(value), c->name, c->config_path); if (ret < 0) { - TSTERR("lxc_cgroup_get failed"); + TSTERR("cgroup_get failed"); goto err3; } strcpy(value_save, value); - ret = lxc_cgroup_set("memory.soft_limit_in_bytes", "512M", c->name, c->config_path); + ret = cgroup_ops->set(cgroup_ops, "memory.soft_limit_in_bytes", "512M", + c->name, c->config_path); if (ret < 0) { - TSTERR("lxc_cgroup_set failed %d %d", ret, errno); + TSTERR("cgroup_set failed %d %d", ret, errno); goto err3; } - ret = lxc_cgroup_get("memory.soft_limit_in_bytes", value, sizeof(value), - c->name, c->config_path); + ret = cgroup_ops->get(cgroup_ops, "memory.soft_limit_in_bytes", value, + sizeof(value), c->name, c->config_path); if (ret < 0) { - TSTERR("lxc_cgroup_get failed"); + TSTERR("cgroup_get failed"); goto err3; } if (strcmp(value, "536870912\n")) { - TSTERR("lxc_cgroup_set_bypath failed to set value >%s<", value); + TSTERR("cgroup_set_bypath failed to set value >%s<", value); goto err3; } /* restore original value */ - ret = lxc_cgroup_set("memory.soft_limit_in_bytes", value_save, - c->name, c->config_path); + ret = cgroup_ops->set(cgroup_ops, "memory.soft_limit_in_bytes", + value_save, c->name, c->config_path); if (ret < 0) { - TSTERR("lxc_cgroup_set failed"); + TSTERR("cgroup_set failed"); goto err3; }