return ret;
}
-static int lxc_one_cgroup_attach(const char *name,
- struct mntent *mntent, pid_t pid)
+static int lxc_one_cgroup_finish_attach(int fd, pid_t pid)
{
- FILE *f;
+ char buf[32];
+ int ret;
+
+ snprintf(buf, 32, "%ld", (long)pid);
+
+ ret = write(fd, buf, strlen(buf));
+ if (ret <= 0) {
+ SYSERROR("failed to write pid '%ld' to fd '%d'", (long)pid, fd);
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+ close(fd);
+ return ret;
+}
+
+static int lxc_one_cgroup_dispose_attach(int fd)
+{
+ close(fd);
+ return 0;
+}
+
+static int lxc_one_cgroup_prepare_attach(const char *name,
+ struct mntent *mntent)
+{
+ int fd;
char tasks[MAXPATHLEN], initcgroup[MAXPATHLEN];
char *cgmnt = mntent->mnt_dir;
- int flags, ret = 0;
+ int flags;
int rc;
flags = get_cgroup_flags(mntent);
return -1;
}
- f = fopen(tasks, "w");
- if (!f) {
+ fd = open(tasks, O_WRONLY);
+ if (fd < 0) {
SYSERROR("failed to open '%s'", tasks);
return -1;
}
- if (fprintf(f, "%d", pid) <= 0) {
- SYSERROR("failed to write pid '%d' to '%s'", pid, tasks);
- ret = -1;
+ return fd;
+}
+
+static int lxc_one_cgroup_attach(const char *name, struct mntent *mntent, pid_t pid)
+{
+ int fd;
+
+ fd = lxc_one_cgroup_prepare_attach(name, mntent);
+ if (fd < 0) {
+ return -1;
}
- fclose(f);
+ return lxc_one_cgroup_finish_attach(fd, pid);
+}
+
+int lxc_cgroup_dispose_attach(void *data)
+{
+ int *fds = data;
+ int ret, err;
+
+ if (!fds) {
+ return 0;
+ }
+
+ ret = 0;
+
+ for (; *fds >= 0; fds++) {
+ err = lxc_one_cgroup_dispose_attach(*fds);
+ if (err) {
+ ret = err;
+ }
+ }
+
+ free(data);
return ret;
}
-/*
- * for each mounted cgroup, attach a pid to the cgroup for the container
- */
-int lxc_cgroup_attach(const char *name, pid_t pid)
+int lxc_cgroup_finish_attach(void *data, pid_t pid)
+{
+ int *fds = data;
+ int err;
+
+ if (!fds) {
+ return 0;
+ }
+
+ for (; *fds >= 0; fds++) {
+ err = lxc_one_cgroup_finish_attach(*fds, pid);
+ if (err) {
+ /* get rid of the rest of them */
+ lxc_cgroup_dispose_attach(data);
+ return -1;
+ }
+ *fds = -1;
+ }
+
+ free(data);
+
+ return 0;
+}
+
+int lxc_cgroup_prepare_attach(const char *name, void **data)
{
struct mntent *mntent;
FILE *file = NULL;
int err = -1;
int found = 0;
+ int *fds;
+ int i;
+ static const int MAXFDS = 256;
file = setmntent(MTAB, "r");
if (!file) {
return -1;
}
+ /* create a large enough buffer for all practical
+ * use cases
+ */
+ fds = malloc(sizeof(int) * MAXFDS);
+ if (!fds) {
+ err = -1;
+ goto out;
+ }
+ for (i = 0; i < MAXFDS; i++) {
+ fds[i] = -1;
+ }
+
+ err = 0;
+ i = 0;
while ((mntent = getmntent(file))) {
+ if (i >= MAXFDS - 1) {
+ ERROR("too many cgroups to attach to, aborting");
+ lxc_cgroup_dispose_attach(fds);
+ errno = ENOMEM;
+ err = -1;
+ goto out;
+ }
+
DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
if (strcmp(mntent->mnt_type, "cgroup"))
INFO("[%d] found cgroup mounted at '%s',opts='%s'",
++found, mntent->mnt_dir, mntent->mnt_opts);
- err = lxc_one_cgroup_attach(name, mntent, pid);
- if (err)
+ fds[i] = lxc_one_cgroup_prepare_attach(name, mntent);
+ if (fds[i] < 0) {
+ err = fds[i];
+ lxc_cgroup_dispose_attach(fds);
goto out;
+ }
+ i++;
};
if (!found)
ERROR("No cgroup mounted on the system");
+ *data = fds;
+
out:
endmntent(file);
return err;
}
+/*
+ * for each mounted cgroup, attach a pid to the cgroup for the container
+ */
+int lxc_cgroup_attach(const char *name, pid_t pid)
+{
+ void *data = NULL;
+ int ret;
+
+ ret = lxc_cgroup_prepare_attach(name, &data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return lxc_cgroup_finish_attach(data, pid);
+}
+
/*
* rename cgname, which is under cgparent, to a new name starting
* with 'cgparent/dead'. That way cgname can be reused. Return
struct passwd *passwd;
struct lxc_proc_context_info *init_ctx;
struct lxc_handler *handler;
+ void *cgroup_data = NULL;
uid_t uid;
char *curdir;
return -1;
}
+ if (!elevated_privileges) {
+ /* we have to do this now since /sys/fs/cgroup may not
+ * be available inside the container or we may not have
+ * the required permissions anymore
+ */
+ ret = lxc_cgroup_prepare_attach(my_args.name, &cgroup_data);
+ if (ret < 0) {
+ ERROR("failed to prepare attaching to cgroup");
+ return -1;
+ }
+ }
+
+ curdir = get_current_dir_name();
+
+ /* we need to attach before we fork since certain namespaces
+ * (such as pid namespaces) only really affect children of the
+ * current process and not the process itself
+ */
+ ret = lxc_attach_to_ns(init_pid);
+ if (ret < 0) {
+ ERROR("failed to enter the namespace");
+ return -1;
+ }
+
+ if (curdir && chdir(curdir))
+ WARN("could not change directory to '%s'", curdir);
+
+ free(curdir);
+
/* hack: we need sync.h infrastructure - and that needs a handler */
handler = calloc(1, sizeof(*handler));
if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
return -1;
- if (!elevated_privileges && lxc_cgroup_attach(my_args.name, pid))
- return -1;
+ /* now that we are done with all privileged operations,
+ * we can add ourselves to the cgroup. Since we smuggled in
+ * the fds earlier, we still have write permission
+ */
+ if (!elevated_privileges) {
+ /* since setns() for pid namespaces only really
+ * affects child processes, the pid we have is
+ * still valid outside the container, so this is
+ * fine
+ */
+ ret = lxc_cgroup_finish_attach(cgroup_data, pid);
+ if (ret < 0) {
+ ERROR("failed to attach process to cgroup");
+ return -1;
+ }
+ }
/* tell the child we are done initializing */
if (lxc_sync_wake_child(handler, LXC_SYNC_POST_CONFIGURE))
if (!pid) {
lxc_sync_fini_parent(handler);
-
- curdir = get_current_dir_name();
-
- ret = lxc_attach_to_ns(init_pid);
- if (ret < 0) {
- ERROR("failed to enter the namespace");
- return -1;
- }
-
- if (curdir && chdir(curdir))
- WARN("could not change directory to '%s'", curdir);
-
- free(curdir);
+ lxc_cgroup_dispose_attach(cgroup_data);
if (new_personality < 0)
new_personality = init_ctx->personality;