From: Christian Brauner Date: Wed, 31 Jan 2018 15:25:11 +0000 (+0100) Subject: cgroups: get controllers on the unified hierarchy X-Git-Tag: lxc-2.0.10~360 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8f430ad408e776f8cf5349424dc675a4574d94e2;p=thirdparty%2Flxc.git cgroups: get controllers on the unified hierarchy Signed-off-by: Christian Brauner --- diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c index 007301163..c49025555 100644 --- a/src/lxc/cgroups/cgfsng.c +++ b/src/lxc/cgroups/cgfsng.c @@ -78,7 +78,7 @@ struct hierarchy { char *mountpoint; char *base_cgroup; char *fullcgpath; - bool is_cgroup_v2; + int version; }; /* @@ -92,16 +92,19 @@ struct hierarchy { */ struct cgfsng_handler_data { char *cgroup_pattern; - char *container_cgroup; // cgroup we created for the container - char *name; // container name + char *container_cgroup; /* cgroup we created for the container */ + char *name; /* container name */ + cgroup_layout_t cgroup_layout; }; /* * @hierarchies - a NULL-terminated array of struct hierarchy, one per - * hierarchy. No duplicates. First sufficient, writeable mounted - * hierarchy wins + * legacy hierarchy. No duplicates. First sufficient, writeable + * mounted hierarchy wins */ struct hierarchy **hierarchies; +struct hierarchy *unified; +cgroup_layout_t cgroup_layout; /* * @cgroup_use - a copy of the lxc.cgroup.use @@ -179,6 +182,7 @@ static bool string_in_list(char **list, const char *entry) if (!list) return false; + for (i = 0; list[i]; i++) if (strcmp(list[i], entry) == 0) return true; @@ -216,8 +220,6 @@ static void must_append_controller(char **klist, char **nlist, char ***clist, ch copy = must_copy_string(entry); else if (string_in_list(klist, entry)) copy = must_copy_string(entry); - else if (!strcmp(entry, "cgroup2")) - copy = must_copy_string(entry); else copy = must_prefix_named(entry); @@ -242,10 +244,21 @@ struct hierarchy *get_hierarchy(const char *c) if (!hierarchies) return NULL; + for (i = 0; hierarchies[i]; i++) { + if (!c) { + /* This is the empty unified hierarchy. */ + if (hierarchies[i]->controllers && + !hierarchies[i]->controllers[0]) + return hierarchies[i]; + + return NULL; + } + if (string_in_list(hierarchies[i]->controllers, c)) return hierarchies[i]; } + return NULL; } @@ -270,7 +283,7 @@ static void append_line(char **dest, size_t oldlen, char *new, size_t newlen) } /* Slurp in a whole file */ -static char *read_file(char *fnam) +static char *read_file(const char *fnam) { FILE *f; char *line = NULL, *buf = NULL; @@ -705,12 +718,14 @@ static bool controller_list_is_dup(struct hierarchy **hlist, char **clist) static bool controller_found(struct hierarchy **hlist, char *entry) { int i; + if (!hlist) return false; for (i = 0; hlist[i]; i++) if (string_in_list(hlist[i]->controllers, entry)) return true; + return false; } @@ -749,12 +764,13 @@ static bool all_controllers_found(void) * options. But we simply assume that the mountpoint must be * /sys/fs/cgroup/controller-list */ -static char **get_controllers(char **klist, char **nlist, char *line, int type) +static char **get_controllers_on_hybrid_layout(char **klist, char **nlist, + char *line, int type) { /* the fourth field is /sys/fs/cgroup/comma-delimited-controller-list */ int i; char *dup, *p2, *tok; - char *p = line, *saveptr = NULL; + char *p = line, *saveptr = NULL, *sep = ","; char **aret = NULL; for (i = 0; i < 4; i++) { @@ -770,6 +786,7 @@ static char **get_controllers(char **klist, char **nlist, char *line, int type) CGFSNG_DEBUG("Found hierarchy not under /sys/fs/cgroup: \"%s\"\n", p); return NULL; } + p += 15; p2 = strchr(p, ' '); if (!p2) { @@ -778,30 +795,60 @@ static char **get_controllers(char **klist, char **nlist, char *line, int type) } *p2 = '\0'; - /* cgroup v2 does not have separate mountpoints for controllers */ - if (type == CGROUP_V2) { - must_append_controller(klist, nlist, &aret, "cgroup2"); - return aret; + if (type == CGROUP_SUPER_MAGIC) { + /* strdup() here for v1 hierarchies. Otherwise strtok_r() will + * destroy mountpoints such as "/sys/fs/cgroup/cpu,cpuacct". + */ + dup = strdup(p); + if (!dup) + return NULL; + + for (tok = strtok_r(dup, sep, &saveptr); tok; + tok = strtok_r(NULL, sep, &saveptr)) + must_append_controller(klist, nlist, &aret, tok); + + free(dup); } + *p2 = ' '; + return aret; +} - /* strdup() here for v1 hierarchies. Otherwise strtok_r() will destroy - * mountpoints such as "/sys/fs/cgroup/cpu,cpuacct". - */ - dup = strdup(p); - if (!dup) +static char **cg_unified_make_empty_controller(void) +{ + int newentry; + char **aret = NULL; + + newentry = append_null_to_list((void ***)&aret); + aret[newentry] = NULL; + return aret; +} + +static char **cg_unified_get_controllers(const char *file) +{ + char *buf, *tok; + char *saveptr = NULL, *sep = " \t\n"; + char **aret = NULL; + + buf = read_file(file); + if (!buf) return NULL; - for (tok = strtok_r(dup, ",", &saveptr); tok; - tok = strtok_r(NULL, ",", &saveptr)) { - must_append_controller(klist, nlist, &aret, tok); + for (tok = strtok_r(buf, sep, &saveptr); tok; + tok = strtok_r(NULL, sep, &saveptr)) { + int newentry; + char *copy; + + newentry = append_null_to_list((void ***)&aret); + copy = must_copy_string(tok); + aret[newentry] = copy; } - free(dup); + free(buf); return aret; } -/* Add a controller to our list of hierarchies */ -static void add_controller(char **clist, char *mountpoint, char *base_cgroup) +static struct hierarchy *add_hierarchy(char **clist, char *mountpoint, + char *base_cgroup, int type) { struct hierarchy *new; int newentry; @@ -811,26 +858,24 @@ static void add_controller(char **clist, char *mountpoint, char *base_cgroup) new->mountpoint = mountpoint; new->base_cgroup = base_cgroup; new->fullcgpath = NULL; - - /* record if this is the cgroup v2 hierarchy */ - if (clist && !strcmp(*clist, "cgroup2")) - new->is_cgroup_v2 = true; - else - new->is_cgroup_v2 = false; + new->version = type; newentry = append_null_to_list((void ***)&hierarchies); hierarchies[newentry] = new; + return new; } /* * Get a copy of the mountpoint from @line, which is a line from * /proc/self/mountinfo */ -static char *get_mountpoint(char *line) +static char *get_mountpoint_on_hybrid_layout(char *line) { int i; - char *p = line, *sret; + char *p2; size_t len; + char *p = line; + char *sret = NULL; for (i = 0; i < 4; i++) { p = strchr(p, ' '); @@ -838,7 +883,15 @@ static char *get_mountpoint(char *line) return NULL; p++; } - /* we've already stuck a \0 after the mountpoint */ + + if (strncmp(p, "/sys/fs/cgroup/", 15)) + return NULL; + + p2 = strchr(p + 15, ' '); + if (!p2) + return NULL; + *p2 = '\0'; + len = strlen(p); sret = must_alloc(len + 1); memcpy(sret, p, len); @@ -885,10 +938,11 @@ static bool controller_in_clist(char *cgline, char *c) tmp[len] = '\0'; for (tok = strtok_r(tmp, ",", &saveptr); tok; - tok = strtok_r(NULL, ",", &saveptr)) { + tok = strtok_r(NULL, ",", &saveptr)) { if (strcmp(tok, c) == 0) return true; } + return false; } @@ -896,24 +950,23 @@ static bool controller_in_clist(char *cgline, char *c) * @basecginfo is a copy of /proc/$$/cgroup. Return the current * cgroup for @controller */ -static char *get_current_cgroup(char *basecginfo, char *controller) +static char *get_current_cgroup(char *basecginfo, char *controller, int type) { char *p = basecginfo; - bool is_cgroup_v2; - bool is_cgroup_v2_base_cgroup; - is_cgroup_v2 = !strcmp(controller, "cgroup2"); - while (true) { - is_cgroup_v2_base_cgroup = false; + for (;;) { + bool is_cgv2_base_cgroup = false; + /* cgroup v2 entry in "/proc//cgroup": "0::/some/path" */ - if (is_cgroup_v2 && (*p == '0')) - is_cgroup_v2_base_cgroup = true; + if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0')) + is_cgv2_base_cgroup = true; p = strchr(p, ':'); if (!p) return NULL; p++; - if (is_cgroup_v2_base_cgroup || controller_in_clist(p, controller)) { + + if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) { p = strchr(p, ':'); if (!p) return NULL; @@ -937,14 +990,16 @@ static void must_append_string(char ***list, char *entry) (*list)[newentry] = copy; } -static void get_existing_subsystems(char ***klist, char ***nlist) +static int get_existing_subsystems(char ***klist, char ***nlist) { FILE *f; char *line = NULL; size_t len = 0; - if ((f = fopen("/proc/self/cgroup", "r")) == NULL) - return; + f = fopen("/proc/self/cgroup", "r"); + if (!f) + return -1; + while (getline(&line, &len, f) != -1) { char *p, *p2, *tok, *saveptr = NULL; p = strchr(line, ':'); @@ -969,7 +1024,7 @@ static void get_existing_subsystems(char ***klist, char ***nlist) } for (tok = strtok_r(p, ",", &saveptr); tok; - tok = strtok_r(NULL, ",", &saveptr)) { + tok = strtok_r(NULL, ",", &saveptr)) { if (strncmp(tok, "name=", 5) == 0) must_append_string(nlist, tok); else @@ -979,6 +1034,7 @@ static void get_existing_subsystems(char ***klist, char ***nlist) free(line); fclose(f); + return 0; } static void trim(char *s) @@ -1044,82 +1100,125 @@ static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d) * At startup, parse_hierarchies finds all the info we need about * cgroup mountpoints and current cgroups, and stores it in @d. */ -static bool parse_hierarchies(void) +static bool cg_init_hybrid(void) { + int ret; + char *basecginfo; + bool will_escape; FILE *f; - char * line = NULL, *basecginfo; - char **klist = NULL, **nlist = NULL; size_t len = 0; + char *line = NULL; + char **klist = NULL, **nlist = NULL; /* * Root spawned containers escape the current cgroup, so use init's * cgroups as our base in that case. */ - if (geteuid()) - basecginfo = read_file("/proc/self/cgroup"); - else + will_escape = (geteuid() == 0); + if (will_escape) basecginfo = read_file("/proc/1/cgroup"); + else + basecginfo = read_file("/proc/self/cgroup"); if (!basecginfo) return false; - if ((f = fopen("/proc/self/mountinfo", "r")) == NULL) { - CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n"); + ret = get_existing_subsystems(&klist, &nlist); + if (ret < 0) { + CGFSNG_DEBUG("Failed to retrieve available cgroup v1 controllers\n"); + free(basecginfo); return false; } - get_existing_subsystems(&klist, &nlist); + f = fopen("/proc/self/mountinfo", "r"); + if (!f) { + CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n"); + return false; + } if (lxc_cgfsng_debug) lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist); - /* we support simple cgroup mounts and lxcfs mounts */ while (getline(&line, &len, f) != -1) { - char **controller_list = NULL; - char *mountpoint, *base_cgroup; - bool writeable; int type; + bool writeable; + struct hierarchy *new; + char *mountpoint = NULL, *base_cgroup = NULL; + char **controller_list = NULL; type = get_cgroup_version(line); - if (type < 0) + if (type == 0) continue; - controller_list = get_controllers(klist, nlist, line, type); - if (!controller_list) + if (type == CGROUP2_SUPER_MAGIC && unified) continue; - if (controller_list_is_dup(hierarchies, controller_list)) { - free(controller_list); - continue; + if (cgroup_layout == CGROUP_LAYOUT_UNKNOWN) { + if (type == CGROUP2_SUPER_MAGIC) + cgroup_layout = CGROUP_LAYOUT_UNIFIED; + else if (type == CGROUP_SUPER_MAGIC) + cgroup_layout = CGROUP_LAYOUT_LEGACY; + } else if (cgroup_layout == CGROUP_LAYOUT_UNIFIED) { + if (type == CGROUP_SUPER_MAGIC) + cgroup_layout = CGROUP_LAYOUT_HYBRID; + } else if (cgroup_layout == CGROUP_LAYOUT_LEGACY) { + if (type == CGROUP2_SUPER_MAGIC) + cgroup_layout = CGROUP_LAYOUT_HYBRID; } - mountpoint = get_mountpoint(line); + controller_list = get_controllers_on_hybrid_layout(klist, nlist, line, type); + if (!controller_list && type == CGROUP_SUPER_MAGIC) + continue; + + if (type == CGROUP_SUPER_MAGIC) + if (controller_list_is_dup(hierarchies, controller_list)) + goto next; + + mountpoint = get_mountpoint_on_hybrid_layout(line); if (!mountpoint) { CGFSNG_DEBUG("Failed parsing mountpoint from \"%s\"\n", line); - free_string_list(controller_list); - continue; + goto next; } - base_cgroup = get_current_cgroup(basecginfo, controller_list[0]); + if (type == CGROUP_SUPER_MAGIC) + base_cgroup = get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC); + else + base_cgroup = get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC); if (!base_cgroup) { - CGFSNG_DEBUG("Failed to find current cgroup for controller \"%s\"\n", controller_list[0]); - free_string_list(controller_list); - free(mountpoint); - continue; + CGFSNG_DEBUG("Failed to find current cgroup\n"); + goto next; } trim(base_cgroup); prune_init_scope(base_cgroup); - if (type == CGROUP_V2) + if (type == CGROUP2_SUPER_MAGIC) writeable = test_writeable_v2(mountpoint, base_cgroup); else writeable = test_writeable_v1(mountpoint, base_cgroup); - if (!writeable) { - free_string_list(controller_list); - free(mountpoint); - free(base_cgroup); - continue; + if (!writeable) + goto next; + + if (type == CGROUP2_SUPER_MAGIC) { + char *cgv2_ctrl_path; + + cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup, + "cgroup.controllers", + NULL); + + controller_list = cg_unified_get_controllers(cgv2_ctrl_path); + free(cgv2_ctrl_path); + if (!controller_list) + controller_list = cg_unified_make_empty_controller(); } - add_controller(controller_list, mountpoint, base_cgroup); + new = add_hierarchy(controller_list, mountpoint, base_cgroup, type); + if (type == CGROUP2_SUPER_MAGIC && !unified) + unified = new; + + continue; + + next: + free_string_list(controller_list); + free(mountpoint); + free(base_cgroup); } free_string_list(klist); @@ -1144,9 +1243,106 @@ static bool parse_hierarchies(void) return true; } -static bool collect_hierarchy_info(void) +static int cg_is_pure_unified(void) { + + int ret; + struct statfs fs; + + ret = statfs("/sys/fs/cgroup", &fs); + if (ret < 0) + return -ENOMEDIUM; + + if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC)) + return CGROUP2_SUPER_MAGIC; + + return 0; +} + +/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */ +static char *cg_get_current_cgroup_unified(void) { + char *basecginfo; + char *base_cgroup; + bool will_escape; + char *copy = NULL; + + will_escape = (geteuid() == 0); + if (will_escape) + basecginfo = read_file("/proc/1/cgroup"); + else + basecginfo = read_file("/proc/self/cgroup"); + if (!basecginfo) + return NULL; + + base_cgroup = strstr(basecginfo, "0::/"); + if (!base_cgroup) + goto cleanup_on_err; + + base_cgroup = base_cgroup + 3; + copy = copy_to_eol(base_cgroup); + if (!copy) + goto cleanup_on_err; + +cleanup_on_err: + free(basecginfo); + if (copy) + trim(copy); + + return copy; +} + +static int cg_init_unified(void) +{ + int ret; + char *mountpoint, *subtree_path; + char **delegatable; + char *base_cgroup = NULL; + + ret = cg_is_pure_unified(); + if (ret == -ENOMEDIUM) + return -ENOMEDIUM; + + if (ret != CGROUP2_SUPER_MAGIC) + return 0; + + base_cgroup = cg_get_current_cgroup_unified(); + if (!base_cgroup) + return -EINVAL; + prune_init_scope(base_cgroup); + + /* We assume that we have already been given controllers to delegate + * further down the hierarchy. If not it is up to the user to delegate + * them to us. + */ + mountpoint = must_copy_string("/sys/fs/cgroup"); + subtree_path = must_make_path(mountpoint, base_cgroup, + "cgroup.subtree_control", NULL); + delegatable = cg_unified_get_controllers(subtree_path); + free(subtree_path); + if (!delegatable) + delegatable = cg_unified_make_empty_controller(); + if (!delegatable[0]) + CGFSNG_DEBUG("No controllers are enabled for delegation\n"); + + /* TODO: If the user requested specific controllers via lxc.cgroup.use + * we should verify here. The reason I'm not doing it right is that I'm + * not convinced that lxc.cgroup.use will be the future since it is a + * global property. I much rather have an option that lets you request + * controllers per container. + */ + + add_hierarchy(delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC); + unified = hierarchies[0]; + + cgroup_layout = CGROUP_LAYOUT_UNIFIED; + return CGROUP2_SUPER_MAGIC; +} + +static bool cg_init(void) +{ + int ret; const char *tmp; + errno = 0; tmp = lxc_global_config_value("lxc.cgroup.use"); if (!cgroup_use && errno != 0) { /* lxc.cgroup.use can be NULL */ @@ -1155,7 +1351,14 @@ static bool collect_hierarchy_info(void) } cgroup_use = must_copy_string(tmp); - return parse_hierarchies(); + ret = cg_init_unified(); + if (ret < 0) + return false; + + if (ret == CGROUP2_SUPER_MAGIC) + return true; + + return cg_init_hybrid(); } static void *cgfsng_init(struct lxc_handler *handler) @@ -1178,6 +1381,16 @@ static void *cgfsng_init(struct lxc_handler *handler) } d->cgroup_pattern = must_copy_string(cgroup_pattern); + d->cgroup_layout = cgroup_layout; + if (d->cgroup_layout == CGROUP_LAYOUT_LEGACY) + TRACE("Running with legacy cgroup layout"); + else if (d->cgroup_layout == CGROUP_LAYOUT_HYBRID) + TRACE("Running with hybrid cgroup layout"); + else if (d->cgroup_layout == CGROUP_LAYOUT_UNIFIED) + TRACE("Running with unified cgroup layout"); + else + WARN("Running with unknown cgroup layout"); + if (lxc_cgfsng_debug) lxc_cgfsng_print_debuginfo(d); @@ -1325,7 +1538,7 @@ struct cgroup_ops *cgfsng_ops_init(void) if (getenv("LXC_DEBUG_CGFSNG")) lxc_cgfsng_debug = true; - if (!collect_hierarchy_info()) + if (!cg_init()) return NULL; return &cgfsng_ops; @@ -1508,7 +1721,7 @@ static int chown_cgroup_wrapper(void *data) WARN("Error chmoding %s: %s", path, strerror(errno)); free(fullpath); - if (!hierarchies[i]->is_cgroup_v2) + if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC) continue; fullpath = must_make_path(path, "cgroup.subtree_control", NULL); @@ -1658,7 +1871,7 @@ static int mount_cgroup_cgns_supported(int type, struct hierarchy *h, const char if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO) flags |= MS_RDONLY; - if (!h->is_cgroup_v2) { + if (h->version != CGROUP2_SUPER_MAGIC) { controllers = lxc_string_join(",", (const char **)h->controllers, false); if (!controllers) return -ENOMEM; @@ -1881,25 +2094,33 @@ static bool cgfsng_get_hierarchies(int n, char ***out) #define THAWED "THAWED" #define THAWED_LEN (strlen(THAWED)) +/* TODO: If the unified cgroup hierarchy grows a freezer controller this needs + * to be adapted. + */ static bool cgfsng_unfreeze(void *hdata) { + int ret; char *fullpath; - struct hierarchy *h = get_hierarchy("freezer"); + struct hierarchy *h; + h = get_hierarchy("freezer"); if (!h) return false; + fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL); - if (lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false) != 0) { - free(fullpath); - return false; - } + ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false); free(fullpath); + if (ret < 0) + return false; + return true; } static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem) { - struct hierarchy *h = get_hierarchy(subsystem); + struct hierarchy *h; + + h = get_hierarchy(subsystem); if (!h) return NULL; diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h index f17a6abe0..f409eee7c 100644 --- a/src/lxc/cgroups/cgroup.h +++ b/src/lxc/cgroups/cgroup.h @@ -32,6 +32,13 @@ struct lxc_handler; struct lxc_conf; struct lxc_list; +typedef enum { + CGROUP_LAYOUT_UNKNOWN = -1, + CGROUP_LAYOUT_LEGACY = 0, + CGROUP_LAYOUT_HYBRID = 1, + CGROUP_LAYOUT_UNIFIED = 2, +} cgroup_layout_t; + typedef enum { CGFS, CGMANAGER, diff --git a/src/lxc/cgroups/cgroup_utils.c b/src/lxc/cgroups/cgroup_utils.c index 6dda1a617..8e2a40eda 100644 --- a/src/lxc/cgroups/cgroup_utils.c +++ b/src/lxc/cgroups/cgroup_utils.c @@ -35,12 +35,12 @@ int get_cgroup_version(char *line) { if (is_cgroupfs_v1(line)) - return CGROUP_V1; + return CGROUP_SUPER_MAGIC; if (is_cgroupfs_v2(line)) - return CGROUP_V2; + return CGROUP2_SUPER_MAGIC; - return -1; + return 0; } bool is_cgroupfs_v1(char *line) diff --git a/src/lxc/cgroups/cgroup_utils.h b/src/lxc/cgroups/cgroup_utils.h index e9e4448a6..3a4726e5b 100644 --- a/src/lxc/cgroups/cgroup_utils.h +++ b/src/lxc/cgroups/cgroup_utils.h @@ -28,10 +28,6 @@ #include #include -#define CGROUP_V1 0 -#define CGROUP_V2 1 -#define LXCFS_CGROUP 2 - /* Retrieve the cgroup version of a given entry from /proc//mountinfo. */ extern int get_cgroup_version(char *line);