#include <string.h>
#include <dirent.h>
#include <fcntl.h>
+#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
#include "error.h"
#include "config.h"
#include "commands.h"
+#include "list.h"
+#include "conf.h"
#include <lxc/log.h>
#include <lxc/cgroup.h>
/*
* Determine mountpoint for a cgroup subsystem.
- * @subsystem: cgroup subsystem (i.e. freezer). If this is NULL, the first
- * cgroup mountpoint with any subsystems is used.
- * @mnt: a passed-in buffer of at least size MAXPATHLEN into which the path
+ * @dest: a passed-in buffer of at least size MAXPATHLEN into which the path
* is copied.
+ * @subsystem: cgroup subsystem (i.e. freezer)
*
- * Returns 0 on success, -1 on error.
+ * Returns true on success, false on error.
*/
-static int get_cgroup_mount(const char *subsystem, char *mnt)
+bool get_subsys_mount(char *dest, const char *subsystem)
{
struct mntent mntent_r;
FILE *file = NULL;
- int ret, err = -1;
-
+ int ret;
+ bool retv = false;
char buf[LARGE_MAXPATHLEN] = {0};
file = setmntent(MTAB, "r");
}
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
- if (strcmp(mntent_r.mnt_type, "cgroup") != 0)
+ if (strcmp(mntent_r.mnt_type, "cgroup"))
continue;
if (subsystem) {
continue;
}
- ret = snprintf(mnt, MAXPATHLEN, "%s", mntent_r.mnt_dir);
+ ret = snprintf(dest, MAXPATHLEN, "%s", mntent_r.mnt_dir);
if (ret < 0 || ret >= MAXPATHLEN)
goto fail;
- DEBUG("using cgroup mounted at '%s'", mnt);
- err = 0;
+ retv = true;
goto out;
};
subsystem ? subsystem : "(NULL)");
out:
endmntent(file);
- return err;
+ return retv;
}
/*
- * cgroup_path_get: Get the absolute path to a particular subsystem,
- * plus a passed-in (to be appended) relative cgpath for a container.
- *
- * @subsystem : subsystem of interest (e.g. "freezer")
- * @cgrelpath : a container's relative cgroup path (e.g. "lxc/c1")
- *
- * Returns absolute path on success, NULL on error. The caller must free()
- * the returned path.
- *
- * Note that @subsystem may be the name of an item (e.g. "freezer.state")
- * in which case the subsystem will be determined by taking the string up
- * to the first '.'
+ * is_in_cgroup: check whether pid is found in the passed-in cgroup tasks
+ * file.
+ * @path: in full path to a cgroup tasks file
+ * Note that in most cases the file will simply not exist, which is ok - it
+ * just means that's not our cgroup.
*/
-char *cgroup_path_get(const char *subsystem, const char *cgrelpath)
+static bool is_in_cgroup(pid_t pid, char *path)
{
- int rc;
-
- char *buf = NULL;
- char *cgabspath = NULL;
-
- buf = malloc(MAXPATHLEN * sizeof(char));
- if (!buf) {
- ERROR("malloc failed");
- goto out1;
- }
-
- cgabspath = malloc(MAXPATHLEN * sizeof(char));
- if (!cgabspath) {
- ERROR("malloc failed");
- goto out2;
- }
+ int cmppid;
+ FILE *f = fopen(path, "r");
+ char *line = NULL;
+ size_t sz = 0;
- /* lxc_cgroup_set passes a state object for the subsystem,
- * so trim it to just the subsystem part */
- if (subsystem) {
- rc = snprintf(cgabspath, MAXPATHLEN, "%s", subsystem);
- if (rc < 0 || rc >= MAXPATHLEN) {
- ERROR("subsystem name too long");
- goto err3;
+ if (!f)
+ return false;
+ while (getline(&line, &sz, f) != -1) {
+ if (sscanf(line, "%d", &cmppid) == 1 && cmppid == pid) {
+ fclose(f);
+ free(line);
+ return true;
}
- char *s = index(cgabspath, '.');
- if (s)
- *s = '\0';
- DEBUG("%s: called for subsys %s name %s\n", __func__,
- subsystem, cgrelpath);
- }
- if (get_cgroup_mount(subsystem ? cgabspath : NULL, buf)) {
- ERROR("cgroup is not mounted");
- goto err3;
}
-
- rc = snprintf(cgabspath, MAXPATHLEN, "%s/%s", buf, cgrelpath);
- if (rc < 0 || rc >= MAXPATHLEN) {
- ERROR("name too long");
- goto err3;
- }
-
- DEBUG("%s: returning %s for subsystem %s relpath %s", __func__,
- cgabspath, subsystem, cgrelpath);
- goto out2;
-
-err3:
- free(cgabspath);
- cgabspath = NULL;
-out2:
- free(buf);
-out1:
- return cgabspath;
+ fclose(f);
+ if (line)
+ free(line);
+ return false;
}
/*
char *lxc_cgroup_path_get(const char *subsystem, const char *name,
const char *lxcpath)
{
- char *cgabspath;
- char *cgrelpath;
+ char *cgpath, *cgp, path[MAXPATHLEN], *pathp, *p;
+ pid_t initpid = lxc_cmd_get_init_pid(name, lxcpath);
+ int ret;
+
+ if (initpid < 0) {
+ ERROR("Error getting init pid for container %s:%s",
+ lxcpath, name);
+ return NULL;
+ }
- cgrelpath = lxc_cmd_get_cgroup_path(name, lxcpath);
- if (!cgrelpath)
+ cgpath = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
+ if (!cgpath)
return NULL;
- cgabspath = cgroup_path_get(subsystem, cgrelpath);
- free(cgrelpath);
- return cgabspath;
+ if (!get_subsys_mount(path, subsystem))
+ return NULL;
+
+ pathp = path + strlen(path);
+ /*
+ * find a mntpt where i have the subsystem mounted, then find
+ * a subset cgpath under that which has pid in it.
+ *
+ * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z,
+ * then look for ourselves in:
+ * /x/y/z/a/b/c/d/tasks
+ * /x/y/z/b/c/d/tasks
+ * /x/y/z/c/d/tasks
+ * /x/y/z/d/tasks
+ * /x/y/z/tasks
+ */
+ cgp = cgpath;
+ while (cgp[0]) {
+ ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "%s/tasks", cgp);
+ if (ret < 0 || ret >= MAXPATHLEN)
+ return NULL;
+ if (!is_in_cgroup(initpid, path)) {
+ // does not exist, try the next one
+ cgp = index(cgp+1, '/');
+ if (!cgp)
+ break;
+ continue;
+ }
+ break;
+ }
+ if (!cgp || !*cgp) {
+ // try just the path
+ ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "/tasks");
+ if (ret < 0 || ret >= MAXPATHLEN)
+ return NULL;
+ if (!is_in_cgroup(initpid, path)) {
+ return NULL;
+ }
+ return strdup("/");
+ }
+ // path still has 'tasks' on the end, drop it
+ if ((p = rindex(path, '/')) != NULL)
+ *p = '\0';
+ return strdup(path);
}
/*
return 0;
}
+static bool cgroup_devices_has_deny(struct lxc_handler *h, char *v)
+{
+ char *cgabspath, path[MAXPATHLEN];
+ FILE *f;
+ char *line = NULL;
+ size_t len = 0;
+ bool ret = true;
+
+ // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
+ // not sure they ever do, but they *could*
+ // right now, I'm assuming they do NOT
+ if (strcmp(v, "a") && strcmp(v, "a *:* rwm"))
+ return false;
+ cgabspath = cgroup_get_subsys_path(h, "devices");
+ if (!cgabspath)
+ return -1;
+
+ ret = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ ERROR("pathname too long for devices.list");
+ return -1;
+ }
+
+ if (!(f = fopen(path, "r"))) {
+ SYSERROR("Error opening devices.list");
+ return -1;
+ }
+
+ while (getline(&line, &len, f) != -1) {
+ size_t len = strlen(line);
+ if (len > 0 && line[len-1] == '\n')
+ line[len-1] = '\0';
+ if (strcmp(line, "a *:* rwm") == 0) {
+ ret = false;
+ goto out;
+ }
+ }
+
+out:
+ fclose(f);
+ if (line)
+ free(line);
+ return ret;
+}
+
+static bool cgroup_devices_has_allow(struct lxc_handler *h, char *v)
+{
+ char *cgabspath, path[MAXPATHLEN];
+ bool ret = false;
+ FILE *f;
+ char *line = NULL;
+ size_t len = 0;
+
+ cgabspath = cgroup_get_subsys_path(h, "devices");
+ if (!cgabspath)
+ return -1;
+
+ ret = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ ERROR("pathname too long to for devices.list");
+ return -1;
+ }
+
+ if (!(f = fopen(path, "r"))) {
+ SYSERROR("Error opening devices.list");
+ return -1;
+ }
+
+ while (getline(&line, &len, f) != -1) {
+ size_t len = strlen(line);
+ if (len > 0 && line[len-1] == '\n')
+ line[len-1] = '\0';
+ if (strcmp(line, v) == 0) {
+ ret = true;
+ goto out;
+ }
+ }
+
+out:
+ if (line)
+ free(line);
+ fclose(f);
+ return ret;
+}
+
/*
* lxc_cgroup_set_bypath: Write a value into a cgroup file
*
*
* Returns 0 on success, < 0 on error.
*/
-int lxc_cgroup_set_bypath(const char *cgrelpath, const char *filename, const char *value)
+int lxc_cgroup_set_value(struct lxc_handler *handler, const char *filename,
+ const char *value)
{
+ char *cgabspath, path[MAXPATHLEN], *p;
int ret;
- char *cgabspath;
- char path[MAXPATHLEN];
- cgabspath = cgroup_path_get(filename, cgrelpath);
+ ret = snprintf(path, MAXPATHLEN, "%s", filename);
+ if (ret < 0 || ret >= MAXPATHLEN)
+ return -1;
+ if ((p = index(path, '.')) != NULL)
+ *p = '\0';
+ cgabspath = cgroup_get_subsys_path(handler, path);
if (!cgabspath)
return -1;
ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename);
if (ret < 0 || ret >= MAXPATHLEN) {
- ERROR("pathname too long");
- ret = -1;
- goto out;
+ ERROR("pathname too long to set cgroup value %s to %s",
+ filename, value);
+ return -1;
}
- ret = do_cgroup_set(path, value);
-
-out:
- free(cgabspath);
- return ret;
+ return do_cgroup_set(path, value);
}
/*
int ret;
char *cgabspath;
char path[MAXPATHLEN];
+ char *subsystem = alloca(strlen(filename)+1), *p;
+ strcpy(subsystem, filename);
+
+ if ((p = index(subsystem, '.')) != NULL)
+ *p = '\0';
- cgabspath = lxc_cgroup_path_get(filename, name, lxcpath);
+ cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath);
if (!cgabspath)
return -1;
int fd, ret;
char *cgabspath;
char path[MAXPATHLEN];
+ char *subsystem = alloca(strlen(filename)+1), *p;
- cgabspath = lxc_cgroup_path_get(filename, name, lxcpath);
+ strcpy(subsystem, filename);
+
+ if ((p = index(subsystem, '.')) != NULL)
+ *p = '\0';
+
+ cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath);
if (!cgabspath)
return -1;
return ret;
}
-int lxc_cgroup_nrtasks(const char *cgrelpath)
+int lxc_cgroup_nrtasks(struct lxc_handler *handler)
{
- char *cgabspath = NULL;
char path[MAXPATHLEN];
int pid, ret;
FILE *file;
- cgabspath = cgroup_path_get(NULL, cgrelpath);
- if (!cgabspath)
+ if (!handler->cgroup)
return -1;
- ret = snprintf(path, MAXPATHLEN, "%s/tasks", cgabspath);
+ /* XXX Should we use a specific subsystem rather than the first one we
+ * found (handler->cgroup->curcgroup)? */
+ ret = snprintf(path, MAXPATHLEN, "%s/tasks", handler->cgroup->curcgroup);
if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("pathname too long");
- ret = -1;
- goto out;
+ return -1;
}
file = fopen(path, "r");
if (!file) {
SYSERROR("fopen '%s' failed", path);
- ret = -1;
- goto out;
+ return -1;
}
ret = 0;
ret++;
fclose(file);
-
-out:
- free(cgabspath);
return ret;
}
-/*
- * If first creating the /sys/fs/cgroup/$subsys/lxc container, then
- * try to set clone_children to 1. Some kernels don't support
- * clone_children, and cgroup maintainer wants to deprecate it. So
- * XXX TODO we should instead after each cgroup mkdir (here and in
- * hooks/mountcgroup) check if cpuset is in the subsystems, and if so
- * manually copy over mems and cpus.
- */
-static void set_clone_children(const char *mntdir)
+static int in_subsys_list(const char *s, const char *list)
+{
+ char *token, *str, *saveptr = NULL;
+
+ if (!list || !s)
+ return 0;
+
+ str = alloca(strlen(list)+1);
+ strcpy(str, list);
+ for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
+ if (strcmp(s, token) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static void set_clone_children(struct mntent *m)
{
char path[MAXPATHLEN];
FILE *fout;
int ret;
- ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", mntdir);
- INFO("writing to %s\n", path);
+ if (!in_subsys_list("cpuset", m->mnt_opts))
+ return;
+ ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", m->mnt_dir);
if (ret < 0 || ret > MAXPATHLEN)
return;
fout = fopen(path, "w");
fclose(fout);
}
-static char *get_all_cgroups(void)
+static bool have_visited(char *opts, char *visited, char *all_subsystems)
+{
+ char *str, *s = NULL, *token;
+
+ str = alloca(strlen(opts)+1);
+ strcpy(str, opts);
+ for (; (token = strtok_r(str, ",", &s)); str = NULL) {
+ if (!in_subsys_list(token, all_subsystems))
+ continue;
+ if (visited && in_subsys_list(token, visited))
+ return true;
+ }
+
+ return false;
+}
+
+static bool is_in_desclist(struct cgroup_desc *d, char *opts, char *all_subsystems)
+{
+ while (d) {
+ if (have_visited(opts, d->subsystems, all_subsystems))
+ return true;
+ d = d->next;
+ }
+ return false;
+}
+
+static char *record_visited(char *opts, char *all_subsystems)
+{
+ char *s = NULL, *token, *str;
+ int oldlen = 0, newlen, toklen;
+ char *visited = NULL;
+
+ str = alloca(strlen(opts)+1);
+ strcpy(str, opts);
+ for (; (token = strtok_r(str, ",", &s)); str = NULL) {
+ if (!in_subsys_list(token, all_subsystems))
+ continue;
+ toklen = strlen(token);
+ newlen = oldlen + toklen + 1; // ',' + token or token + '\0'
+ visited = realloc(visited, newlen);
+ if (!visited)
+ return (char *)-ENOMEM;
+ if (oldlen)
+ strcat(visited, ",");
+ else
+ *visited = '\0';
+ strcat(visited, token);
+ }
+
+ return visited;
+}
+
+static char *get_all_subsystems(void)
{
FILE *f;
char *line = NULL, *ret = NULL;
return ret;
}
-static int in_cgroup_list(char *s, char *list)
+/*
+ * /etc/lxc/lxc.conf can contain lxc.cgroup.use = entries.
+ * If any of those are present, then lxc will ONLY consider
+ * cgroup filesystems mounted at one of the listed entries.
+ */
+static char *get_cgroup_uselist()
{
- char *token, *str, *saveptr = NULL;
-
- if (!list || !s)
- return 0;
+ FILE *f;
+ char *line = NULL, *ret = NULL;
+ size_t sz = 0, retsz = 0, newsz;
- for (str = strdupa(list); (token = strtok_r(str, ",", &saveptr)); str = NULL) {
- if (strcmp(s, token) == 0)
- return 1;
+ if ((f = fopen(LXC_GLOBAL_CONF, "r")) == NULL)
+ return NULL;
+ while (getline(&line, &sz, f) != -1) {
+ char *p = line;
+ while (*p && isblank(*p))
+ p++;
+ if (strncmp(p, "lxc.cgroup.use", 14) != 0)
+ continue;
+ p = index(p, '=');
+ if (!p)
+ continue;
+ p++;
+ while (*p && isblank(*p))
+ p++;
+ if (strlen(p) < 1)
+ continue;
+ newsz = retsz + strlen(p);
+ if (retsz == 0)
+ newsz += 1; // for trailing \0
+ // the last line in the file could lack \n
+ if (p[strlen(p)-1] != '\n')
+ newsz += 1;
+ ret = realloc(ret, newsz);
+ if (!ret) {
+ ERROR("Out of memory reading cgroup uselist");
+ fclose(f);
+ free(line);
+ return (char *)-ENOMEM;
+ }
+ if (retsz == 0)
+ strcpy(ret, p);
+ else
+ strcat(ret, p);
+ if (p[strlen(p)-1] != '\n')
+ ret[newsz-2] = '\0';
+ ret[newsz-1] = '\0';
+ retsz = newsz;
}
- return 0;
+ if (line)
+ free(line);
+ return ret;
}
-static int have_visited(char *opts, char *visited, char *allcgroups)
+static bool is_in_uselist(char *uselist, struct mntent *m)
{
- char *str, *s = NULL, *token;
-
- for (str = strdupa(opts); (token = strtok_r(str, ",", &s)); str = NULL) {
- if (!in_cgroup_list(token, allcgroups))
- continue;
- if (visited && in_cgroup_list(token, visited))
- return 1;
+ char *p;
+ if (!uselist)
+ return true;
+ if (!*uselist)
+ return false;
+ while (*uselist) {
+ p = index(uselist, '\n');
+ if (strncmp(m->mnt_dir, uselist, p - uselist) == 0)
+ return true;
+ uselist = p+1;
}
-
- return 0;
+ return false;
}
-static int record_visited(char *opts, char **visitedp, char *allcgroups)
+static bool find_real_cgroup(struct cgroup_desc *d, char *path)
{
- char *s = NULL, *token, *str;
- int oldlen, newlen, ret;
+ FILE *f;
+ char *line = NULL, *p, *p2;
+ int ret = 0;
+ size_t len;
- for (str = strdupa(opts); (token = strtok_r(str, ",", &s)); str = NULL) {
- if (!in_cgroup_list(token, allcgroups))
+ if ((f = fopen("/proc/self/cgroup", "r")) == NULL) {
+ SYSERROR("Error opening /proc/self/cgroups");
+ return false;
+ }
+
+ // If there is no subsystem, ignore the mount. Note we may want
+ // to change this, so that unprivileged users can use a unbound
+ // cgroup mount to arrange their container tasks.
+ if (!d->subsystems) {
+ fclose(f);
+ return false;
+ }
+ while (getline(&line, &len, f) != -1) {
+ if (!(p = index(line, ':')))
continue;
- if (*visitedp && in_cgroup_list(token, *visitedp))
+ if (!(p2 = index(++p, ':')))
continue;
- oldlen = (*visitedp) ? strlen(*visitedp) : 0;
- newlen = oldlen + strlen(token) + 2;
- (*visitedp) = realloc(*visitedp, newlen);
- if (!(*visitedp))
- return -1;
- ret = snprintf((*visitedp)+oldlen, newlen, ",%s", token);
- if (ret < 0 || ret >= newlen)
- return -1;
- }
+ *p2 = '\0';
+ // in case of multiple mounts it may be more correct to
+ // insist all subsystems be the same
+ if (in_subsys_list(p, d->subsystems))
+ goto found;
+ }
- return 0;
+ if (line)
+ free(line);
+ fclose(f);
+ return false;;
+
+found:
+ fclose(f);
+ ret = snprintf(path, MAXPATHLEN, "%s", p2+1);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ free(line);
+ return false;
+ }
+ free(line);
+ return true;
}
+
/*
- * Make sure the 'cgroup group' exists, so that we don't have to worry about
- * that later.
- *
- * @lxcgroup: the cgroup group, i.e. 'lxc' by default.
- *
- * See detailed comments at lxc_cgroup_path_create for more information.
- *
- * Returns 0 on success, -1 on error.
+ * for a given cgroup mount entry, and a to-be-created container,
+ * 1. Figure out full path of the cgroup we are currently in,
+ * 2. Find a new free cgroup which is $path / $lxc_name with an
+ * optional '-$n' where n is an ever-increasing integer.
*/
-static int create_lxcgroups(const char *lxcgroup)
+static char *find_free_cgroup(struct cgroup_desc *d, const char *lxc_name)
{
- FILE *file = NULL;
- struct mntent mntent_r;
- int ret, retv = -1;
- char path[MAXPATHLEN];
-
- char buf[LARGE_MAXPATHLEN] = {0};
+ char tail[20], cgpath[MAXPATHLEN], *cgp, path[MAXPATHLEN];
+ int i = 0, ret;
+ size_t l;
- file = setmntent(MTAB, "r");
- if (!file) {
- SYSERROR("failed to open %s", MTAB);
- return -1;
+ if (!find_real_cgroup(d, cgpath)) {
+ ERROR("Failed to find current cgroup");
+ return NULL;
}
- while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
-
- if (strcmp(mntent_r.mnt_type, "cgroup"))
- continue;
- if (!mount_has_subsystem(&mntent_r))
- continue;
-
- /*
- * TODO - handle case where lxcgroup has subdirs? (i.e. build/l1)
- * We probably only want to support that for /users/joe
- */
- ret = snprintf(path, MAXPATHLEN, "%s/%s", mntent_r.mnt_dir, lxcgroup);
+ /*
+ * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z,
+ * then look for ourselves in:
+ * /x/y/z/a/b/c/d/tasks
+ * /x/y/z/b/c/d/tasks
+ * /x/y/z/c/d/tasks
+ * /x/y/z/d/tasks
+ * /x/y/z/tasks
+ */
+ cgp = cgpath;
+ while (cgp[0]) {
+ ret = snprintf(path, MAXPATHLEN, "%s%s/tasks", d->mntpt, cgp);
if (ret < 0 || ret >= MAXPATHLEN)
- goto fail;
- if (access(path, F_OK)) {
- set_clone_children(mntent_r.mnt_dir);
- ret = mkdir(path, 0755);
- if (ret == -1 && errno != EEXIST) {
- SYSERROR("failed to create '%s' directory", path);
- goto fail;
- }
+ return NULL;
+ if (!is_in_cgroup(getpid(), path)) {
+ // does not exist, try the next one
+ cgp = index(cgp+1, '/');
+ if (!cgp)
+ break;
+ continue;
}
+ break;
+ }
+ if (!cgp || !*cgp) {
+ // try just the path
+ ret = snprintf(path, MAXPATHLEN, "%s/tasks", d->mntpt);
+ if (ret < 0 || ret >= MAXPATHLEN)
+ return NULL;
+ if (!is_in_cgroup(getpid(), path))
+ return NULL;
+ }
+ // found it
+ // path has '/tasks' at end, drop that
+ if (!(cgp = rindex(path, '/'))) {
+ ERROR("Got nonsensical path name %s\n", path);
+ return NULL;
+ }
+ *cgp = '\0';
+ if (strlen(path) + strlen(lxc_name) + 20 > MAXPATHLEN) {
+ ERROR("Error: cgroup path too long");
+ return NULL;
+ }
+ tail[0] = '\0';
+ while (1) {
+ struct stat sb;
+ int freebytes = MAXPATHLEN - (cgp - path);
+
+ if (i) {
+ ret = snprintf(tail, 20, "-%d", i);
+ if (ret < 0 || ret >= 20)
+ return NULL;
+ }
+ ret = snprintf(cgp, freebytes, "/%s%s", lxc_name, tail);
+ if (ret < 0 || ret >= freebytes)
+ return NULL;
+ if (stat(path, &sb) == -1)
+ break;
+ i++;
}
- retv = 0;
-fail:
- endmntent(file);
- return retv;
+ l = strlen(cgpath);
+ ret = snprintf(cgpath + l, MAXPATHLEN - l, "/%s%s", lxc_name, tail);
+ if (ret < 0 || ret >= (MAXPATHLEN - l)) {
+ ERROR("Out of memory");
+ return NULL;
+ }
+ if ((d->realcgroup = strdup(cgpath)) == NULL) {
+ ERROR("Out of memory");
+ return NULL;
+ }
+ l = strlen(d->realcgroup);
+ if (l > 0 && d->realcgroup[l-1] == '\n')
+ d->realcgroup[l-1] = '\0';
+ return strdup(path);
}
/*
* is at /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the
* freezer cgroup's full path will be /sys/fs/cgroup/freezer/lxc/r1/.
*
- * XXX This should probably be locked globally
- *
* Races won't be determintal, you'll just end up with leftover unused cgroups
*/
-char *lxc_cgroup_path_create(const char *lxcgroup, const char *name)
+struct cgroup_desc *lxc_cgroup_path_create(const char *name)
{
- int i = 0, ret;
- char *retpath, path[MAXPATHLEN];
- char tail[12];
+ struct cgroup_desc *retdesc = NULL, *newdesc = NULL;
FILE *file = NULL;
struct mntent mntent_r;
- char *allcgroups = get_all_cgroups();
- char *visited = NULL;
-
char buf[LARGE_MAXPATHLEN] = {0};
+ char *all_subsystems = get_all_subsystems();
+ char *cgroup_uselist = get_cgroup_uselist();
- if (!lxcgroup || strlen(lxcgroup) == 0 || strcmp(lxcgroup, "/") == 0)
- lxcgroup = "lxc";
- if (!allcgroups)
+ if (cgroup_uselist == (char *)-ENOMEM) {
+ if (all_subsystems)
+ free(all_subsystems);
+ return NULL;
+ }
+ if (!all_subsystems) {
+ ERROR("failed to get a list of all cgroup subsystems");
+ if (cgroup_uselist)
+ free(cgroup_uselist);
return NULL;
-
- if (create_lxcgroups(lxcgroup) < 0)
- goto err1;
-
-again:
- if (visited) {
- /* we're checking for a new name, so start over with all cgroup
- * mounts */
- free(visited);
- visited = NULL;
}
file = setmntent(MTAB, "r");
if (!file) {
SYSERROR("failed to open %s", MTAB);
- goto err1;
+ free(all_subsystems);
+ if (cgroup_uselist)
+ free(cgroup_uselist);
+ return NULL;
}
- if (i)
- snprintf(tail, 12, "-%d", i);
- else
- *tail = '\0';
-
while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
if (strcmp(mntent_r.mnt_type, "cgroup"))
continue;
- if (!mount_has_subsystem(&mntent_r))
+
+ if (cgroup_uselist && !is_in_uselist(cgroup_uselist, &mntent_r))
continue;
/* make sure we haven't checked this subsystem already */
- if (have_visited(mntent_r.mnt_opts, visited, allcgroups))
+ if (is_in_desclist(retdesc, mntent_r.mnt_opts, all_subsystems))
continue;
- if (record_visited(mntent_r.mnt_opts, &visited, allcgroups) < 0)
- goto fail;
- /* find unused mnt_dir + lxcgroup + name + -$i */
- ret = snprintf(path, MAXPATHLEN, "%s/%s/%s%s", mntent_r.mnt_dir,
- lxcgroup, name, tail);
- if (ret < 0 || ret >= MAXPATHLEN)
+ if (!(newdesc = malloc(sizeof(struct cgroup_desc)))) {
+ ERROR("Out of memory reading cgroups");
goto fail;
+ }
+ newdesc->subsystems = record_visited(mntent_r.mnt_opts, all_subsystems);
+ if (newdesc->subsystems == (char *)-ENOMEM) {
+ ERROR("Out of memory recording cgroup subsystems");
+ free(newdesc);
+ newdesc = NULL;
+ goto fail;
+ }
+ if (!newdesc->subsystems) {
+ free(newdesc);
+ newdesc = NULL;
+ continue;
+ }
+ newdesc->mntpt = strdup(mntent_r.mnt_dir);
+ newdesc->realcgroup = NULL;
+ newdesc->curcgroup = find_free_cgroup(newdesc, name);
+ if (!newdesc->mntpt || !newdesc->curcgroup) {
+ ERROR("Out of memory reading cgroups");
+ goto fail;
+ }
- INFO("lxcgroup %s name %s tail %s, makes path .%s.",
- lxcgroup, name, tail, path);
-
- if (access(path, F_OK) == 0) goto next;
+ set_clone_children(&mntent_r);
- if (mkdir(path, 0755)) {
- ERROR("Error creating cgroup %s", path);
+ if (mkdir(newdesc->curcgroup, 0755)) {
+ ERROR("Error creating cgroup %s", newdesc->curcgroup);
goto fail;
}
-
+ newdesc->next = retdesc;
+ retdesc = newdesc;
}
endmntent(file);
-
- // print out the cgpath part
- ret = snprintf(path, MAXPATHLEN, "%s/%s%s", lxcgroup, name, tail);
- if (ret < 0 || ret >= MAXPATHLEN) // can't happen
- goto fail;
-
- retpath = strdup(path);
- free(allcgroups);
- if (visited)
- free(visited);
-
- return retpath;
-
-next:
- endmntent(file);
- i++;
- goto again;
+ free(all_subsystems);
+ if (cgroup_uselist)
+ free(cgroup_uselist);
+ return retdesc;
fail:
endmntent(file);
-err1:
- free(allcgroups);
- if (visited)
- free(visited);
+ free(all_subsystems);
+ if (cgroup_uselist)
+ free(cgroup_uselist);
+ if (newdesc) {
+ if (newdesc->mntpt)
+ free(newdesc->mntpt);
+ if (newdesc->subsystems)
+ free(newdesc->subsystems);
+ if (newdesc->curcgroup)
+ free(newdesc->curcgroup);
+ if (newdesc->realcgroup)
+ free(newdesc->realcgroup);
+ free(newdesc);
+ }
+ while (retdesc) {
+ struct cgroup_desc *t = retdesc;
+ retdesc = retdesc->next;
+ if (t->mntpt)
+ free(t->mntpt);
+ if (t->subsystems)
+ free(t->subsystems);
+ if (t->curcgroup)
+ free(t->curcgroup);
+ if (t->realcgroup)
+ free(t->realcgroup);
+ free(t);
+
+ }
return NULL;
}
-int lxc_cgroup_enter(const char *cgpath, pid_t pid)
+static bool lxc_cgroup_enter_one(const char *dir, int pid)
{
char path[MAXPATHLEN];
- FILE *file = NULL, *fout;
- struct mntent mntent_r;
- int ret, retv = -1;
- char buf[LARGE_MAXPATHLEN] = {0};
+ int ret;
+ FILE *fout;
- file = setmntent(MTAB, "r");
- if (!file) {
- SYSERROR("failed to open %s", MTAB);
- return -1;
+ ret = snprintf(path, MAXPATHLEN, "%s/tasks", dir);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ ERROR("Error entering cgroup");
+ return false;
}
-
- while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
- if (strcmp(mntent_r.mnt_type, "cgroup"))
- continue;
- if (!mount_has_subsystem(&mntent_r))
- continue;
- ret = snprintf(path, MAXPATHLEN, "%s/%s/tasks",
- mntent_r.mnt_dir, cgpath);
- if (ret < 0 || ret >= MAXPATHLEN) {
- ERROR("Error entering cgroup");
- goto out;
- }
- fout = fopen(path, "w");
- if (!fout) {
- SYSERROR("Error entering cgroup");
- goto out;
- }
- if (fprintf(fout, "%d\n", (int)pid) < 0) {
- ERROR("Error writing pid to %s", path);
- fclose(fout);
- goto out;
- }
- if (fclose(fout) < 0) {
- SYSERROR("Error writing pid to %s", path);
- goto out;
- }
+ fout = fopen(path, "w");
+ if (!fout) {
+ SYSERROR("Error entering cgroup");
+ return false;
+ }
+ if (fprintf(fout, "%d\n", (int)pid) < 0) {
+ ERROR("Error writing pid to %s to enter cgroup", path);
+ fclose(fout);
+ return false;
+ }
+ if (fclose(fout) < 0) {
+ SYSERROR("Error writing pid to %s to enter cgroup", path);
+ return false;
}
- retv = 0;
-out:
- endmntent(file);
- return retv;
+ return true;
+}
+
+int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid)
+{
+ while (cgroups) {
+ if (!cgroups->subsystems)
+ goto next;
+
+ if (!lxc_cgroup_enter_one(cgroups->curcgroup, pid))
+ return -1;
+next:
+ cgroups = cgroups->next;
+ }
+ return 0;
}
static int cgroup_rmdir(char *dirname)
break;
if (!strcmp(direntp->d_name, ".") ||
- !strcmp(direntp->d_name, ".."))
+ !strcmp(direntp->d_name, ".."))
continue;
rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
return ret;
}
-static int lxc_one_cgroup_destroy(struct mntent *mntent, const char *cgpath)
-{
- char cgname[MAXPATHLEN];
- char *cgmnt = mntent->mnt_dir;
- int rc;
-
- rc = snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, cgpath);
- if (rc < 0 || rc >= MAXPATHLEN) {
- ERROR("name too long");
- return -1;
- }
- DEBUG("destroying %s\n", cgname);
- if (cgroup_rmdir(cgname)) {
- SYSERROR("failed to remove cgroup '%s'", cgname);
- return -1;
- }
-
- DEBUG("'%s' unlinked", cgname);
-
- return 0;
-}
-
/*
* for each mounted cgroup, destroy the cgroup for the container
*/
-int lxc_cgroup_destroy(const char *cgpath)
+void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups)
{
- struct mntent mntent_r;
- FILE *file = NULL;
- int err, retv = 0;
-
- char buf[LARGE_MAXPATHLEN] = {0};
-
- file = setmntent(MTAB, "r");
- if (!file) {
- SYSERROR("failed to open %s", MTAB);
- return -1;
+ while (cgroups) {
+ struct cgroup_desc *next = cgroups->next;
+ if (cgroup_rmdir(cgroups->curcgroup) < 0)
+ SYSERROR("Error removing cgroup directory %s", cgroups->curcgroup);
+ free(cgroups->mntpt);
+ free(cgroups->subsystems);
+ free(cgroups->curcgroup);
+ free(cgroups->realcgroup);
+ free(cgroups);
+ cgroups = next;
}
-
- while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
- if (strcmp(mntent_r.mnt_type, "cgroup"))
- continue;
- if (!mount_has_subsystem(&mntent_r))
- continue;
-
- err = lxc_one_cgroup_destroy(&mntent_r, cgpath);
- if (err) // keep trying to clean up the others
- retv = -1;
- }
-
- endmntent(file);
- return retv;
}
int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath)
{
- int ret;
+ FILE *f;
+ char *line = NULL, ret = -1;
+ size_t len = 0;
+ int first = 1;
char *dirpath;
- dirpath = lxc_cmd_get_cgroup_path(name, lxcpath);
- if (!dirpath) {
- ERROR("Error getting cgroup for container %s: %s", lxcpath, name);
- return -1;
+ /* read the list of subsystems from the kernel */
+ f = fopen("/proc/cgroups", "r");
+ if (!f)
+ return ret;
+
+ while (getline(&line, &len, f) != -1) {
+ char *c;
+
+ /* skip the first line */
+ if (first) {
+ first=0;
+ continue;
+ }
+
+ c = strchr(line, '\t');
+ if (!c)
+ continue;
+ *c = '\0';
+ dirpath = lxc_cgroup_path_get(line, name, lxcpath);
+ if (!dirpath)
+ continue;
+
+ INFO("joining pid %d to cgroup %s", pid, dirpath);
+ if (lxc_cgroup_enter_one(dirpath, pid)) {
+ ERROR("Failed joining %d to %s\n", pid, dirpath);
+ continue;
+ }
}
- INFO("joining pid %d to cgroup %s", pid, dirpath);
- ret = lxc_cgroup_enter(dirpath, pid);
- free(dirpath);
+ if (line)
+ free(line);
+ fclose(f);
return ret;
}
-bool is_in_subcgroup(int pid, const char *subsystem, const char *cgpath)
+bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d)
{
char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN];
FILE *f;
int ret, junk;
- size_t sz = 0, l1 = strlen(cgpath), l2;
+ size_t sz = 0, l1, l2;
char *end = index(subsystem, '.');
int len = end ? (end - subsystem) : strlen(subsystem);
+ const char *cgpath = NULL;
+
+ while (d) {
+ if (in_subsys_list("devices", d->subsystems)) {
+ cgpath = d->realcgroup;
+ l1 = strlen(cgpath);
+ break;
+ }
+ d = d->next;
+ }
+ if (!d)
+ return false;
ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid);
if (ret < 0 || ret >= MAXPATHLEN)
ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2);
if (ret != 3) {
fclose(f);
+ free(line);
return false;
}
len = end ? end - subsystem : strlen(subsystem);
l2 = strlen(v2+1);
if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) {
fclose(f);
+ free(line);
return true;
}
}
fclose(f);
+ if (line)
+ free(line);
return false;
}
-/*
- * Return cgroup of current task.
- * This assumes that task is in the same cgroup for each controller. This
- * may or may not *always* be a reasonable assumption - it generally is,
- * and handling or at least checking for this condition is a TODO.
- */
-int lxc_curcgroup(char *cgroup, int inlen)
+char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys)
{
- FILE *f;
- char *line = NULL, *p, *p2;
- int ret = 0;
- size_t len;
+ struct cgroup_desc *d;
- f = fopen("/proc/self/cgroup", "r");
- if (!f)
- return -1;
+ for (d = handler->cgroup; d; d = d->next) {
+ if (in_subsys_list(subsys, d->subsystems))
+ return d->realcgroup;
+ }
- while (getline(&line, &len, f) != -1) {
- if (strstr(line, ":freezer:") == NULL && strstr(line, ":devices:") == NULL)
- continue;
- p = rindex(line, ':');
- if (!p)
- continue;
- p++;
- len = strlen(p) + 1;
- p2 = p + len - 2;
- while (*p2 == '\n') { len--; *p2 = '\0'; p2--; }
- if (!cgroup || inlen <= 0) {
- ret = len;
- break;
- }
- if (cgroup && len > inlen) {
- ret = -1;
- break;
+ return NULL;
+}
+
+static int _setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups,
+ int devices)
+{
+ struct lxc_list *iterator;
+ struct lxc_cgroup *cg;
+ int ret = -1;
+
+ if (lxc_list_empty(cgroups))
+ return 0;
+
+ lxc_list_for_each(iterator, cgroups) {
+ cg = iterator->elem;
+
+ if (devices == !strncmp("devices", cg->subsystem, 7)) {
+ if (strcmp(cg->subsystem, "devices.deny") == 0 &&
+ cgroup_devices_has_deny(h, cg->value))
+ continue;
+ if (strcmp(cg->subsystem, "devices.allow") == 0 &&
+ cgroup_devices_has_allow(h, cg->value))
+ continue;
+ if (lxc_cgroup_set_value(h, cg->subsystem, cg->value)) {
+ ERROR("Error setting %s to %s for %s\n",
+ cg->subsystem, cg->value, h->name);
+ goto out;
+ }
}
- strncpy(cgroup, p, len);
- ret = len;
- cgroup[len-1] = '\0';
- break;
+
+ DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
}
- if (line)
- free(line);
+ ret = 0;
+ INFO("cgroup has been setup");
+out:
return ret;
}
+
+int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups)
+{
+ return _setup_cgroup(h, cgroups, 1);
+}
+
+int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups)
+{
+ return _setup_cgroup(h, cgroups, 0);
+}