lxc-attach: Remodel cgroup attach logic and attach to namespaces again in parent...

author Christian Seiler <christian@iwakd.de>

Tue, 21 Aug 2012 22:03:12 +0000 (00:03 +0200)

committer Stéphane Graber <stgraber@ubuntu.com>

Mon, 12 Nov 2012 18:13:52 +0000 (13:13 -0500)
author Christian Seiler <christian@iwakd.de>
Tue, 21 Aug 2012 22:03:12 +0000 (00:03 +0200)
committer Stéphane Graber <stgraber@ubuntu.com>
Mon, 12 Nov 2012 18:13:52 +0000 (13:13 -0500)
diff --git a/src/lxc/cgroup.c b/src/lxc/cgroup.c

index 69ba4e5d1ddcb970473c8a9a7ae366fab9cf4c3e..a02ebc2fa65aa604b2016e43990e5f8c562cb629 100644 (file)
--- a/src/lxc/cgroup.c
+++ b/src/lxc/cgroup.c
@@ -254,13 +254,38 @@ static int cgroup_enable_clone_children(const char *path)
         return ret;
  }
  
-static int lxc_one_cgroup_attach(const char *name,
-                                struct mntent *mntent, pid_t pid)
+static int lxc_one_cgroup_finish_attach(int fd, pid_t pid)
  {
-       FILE *f;
+       char buf[32];
+       int ret;
+
+       snprintf(buf, 32, "%ld", (long)pid);
+
+       ret = write(fd, buf, strlen(buf));
+       if (ret <= 0) {
+               SYSERROR("failed to write pid '%ld' to fd '%d'", (long)pid, fd);
+               ret = -1;
+       } else {
+               ret = 0;
+       }
+
+       close(fd);
+       return ret;
+}
+
+static int lxc_one_cgroup_dispose_attach(int fd)
+{
+       close(fd);
+       return 0;
+}
+
+static int lxc_one_cgroup_prepare_attach(const char *name,
+                                        struct mntent *mntent)
+{
+       int fd;
         char tasks[MAXPATHLEN], initcgroup[MAXPATHLEN];
         char *cgmnt = mntent->mnt_dir;
-       int flags, ret = 0;
+       int flags;
         int rc;
  
         flags = get_cgroup_flags(mntent);
@@ -274,31 +299,83 @@ static int lxc_one_cgroup_attach(const char *name,
                 return -1;
         }
  
-       f = fopen(tasks, "w");
-       if (!f) {
+       fd = open(tasks, O_WRONLY);
+       if (fd < 0) {
                 SYSERROR("failed to open '%s'", tasks);
                 return -1;
         }
  
-       if (fprintf(f, "%d", pid) <= 0) {
-               SYSERROR("failed to write pid '%d' to '%s'", pid, tasks);
-               ret = -1;
+       return fd;
+}
+
+static int lxc_one_cgroup_attach(const char *name, struct mntent *mntent, pid_t pid)
+{
+       int fd;
+
+       fd = lxc_one_cgroup_prepare_attach(name, mntent);
+       if (fd < 0) {
+               return -1;
         }
  
-       fclose(f);
+       return lxc_one_cgroup_finish_attach(fd, pid);
+}
+
+int lxc_cgroup_dispose_attach(void *data)
+{
+       int *fds = data;
+       int ret, err;
+
+       if (!fds) {
+               return 0;
+       }
+
+       ret = 0;
+
+       for (; *fds >= 0; fds++) {
+               err = lxc_one_cgroup_dispose_attach(*fds);
+               if (err) {
+                       ret = err;
+               }
+       }
+
+       free(data);
  
         return ret;
  }
  
-/*
- * for each mounted cgroup, attach a pid to the cgroup for the container
- */
-int lxc_cgroup_attach(const char *name, pid_t pid)
+int lxc_cgroup_finish_attach(void *data, pid_t pid)
+{
+       int *fds = data;
+       int err;
+
+       if (!fds) {
+               return 0;
+       }
+
+       for (; *fds >= 0; fds++) {
+               err = lxc_one_cgroup_finish_attach(*fds, pid);
+               if (err) {
+                       /* get rid of the rest of them */
+                       lxc_cgroup_dispose_attach(data);
+                       return -1;
+               }
+               *fds = -1;
+       }
+
+       free(data);
+
+       return 0;
+}
+
+int lxc_cgroup_prepare_attach(const char *name, void **data)
  {
         struct mntent *mntent;
         FILE *file = NULL;
         int err = -1;
         int found = 0;
+       int *fds;
+       int i;
+       static const int MAXFDS = 256;
  
         file = setmntent(MTAB, "r");
         if (!file) {
@@ -306,7 +383,29 @@ int lxc_cgroup_attach(const char *name, pid_t pid)
                 return -1;
         }
  
+       /* create a large enough buffer for all practical
+        * use cases
+        */
+       fds = malloc(sizeof(int) * MAXFDS);
+       if (!fds) {
+               err = -1;
+               goto out;
+       }
+       for (i = 0; i < MAXFDS; i++) {
+               fds[i] = -1;
+       }
+
+       err = 0;
+       i = 0;
         while ((mntent = getmntent(file))) {
+               if (i >= MAXFDS - 1) {
+                       ERROR("too many cgroups to attach to, aborting");
+                       lxc_cgroup_dispose_attach(fds);
+                       errno = ENOMEM;
+                       err = -1;
+                       goto out;
+               }
+
                 DEBUG("checking '%s' (%s)", mntent->mnt_dir, mntent->mnt_type);
  
                 if (strcmp(mntent->mnt_type, "cgroup"))
@@ -317,19 +416,41 @@ int lxc_cgroup_attach(const char *name, pid_t pid)
                 INFO("[%d] found cgroup mounted at '%s',opts='%s'",
                      ++found, mntent->mnt_dir, mntent->mnt_opts);
  
-               err = lxc_one_cgroup_attach(name, mntent, pid);
-               if (err)
+               fds[i] = lxc_one_cgroup_prepare_attach(name, mntent);
+               if (fds[i] < 0) {
+                       err = fds[i];
+                       lxc_cgroup_dispose_attach(fds);
                         goto out;
+               }
+               i++;
         };
  
         if (!found)
                 ERROR("No cgroup mounted on the system");
  
+       *data = fds;
+
  out:
         endmntent(file);
         return err;
  }
  
+/*
+ * for each mounted cgroup, attach a pid to the cgroup for the container
+ */
+int lxc_cgroup_attach(const char *name, pid_t pid)
+{
+       void *data = NULL;
+       int ret;
+
+       ret = lxc_cgroup_prepare_attach(name, &data);
+       if (ret < 0) {
+               return ret;
+       }
+
+       return lxc_cgroup_finish_attach(data, pid);
+}
+
  /*
   * rename cgname, which is under cgparent, to a new name starting
   * with 'cgparent/dead'.  That way cgname can be reused.  Return
diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h

index 3c90696dc16d45b98302e1c7335146a10672330e..8167f3920d057f5e6e98e8977f138c04941781fb 100644 (file)
--- a/src/lxc/cgroup.h
+++ b/src/lxc/cgroup.h
@@ -31,5 +31,8 @@ extern int lxc_cgroup_destroy(const char *name);
  extern int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name);
  extern int lxc_cgroup_nrtasks(const char *name);
  extern int lxc_cgroup_attach(const char *name, pid_t pid);
+extern int lxc_cgroup_prepare_attach(const char *name, void **data);
+extern int lxc_cgroup_finish_attach(void *data, pid_t pid);
+extern int lxc_cgroup_dispose_attach(void *data);
  extern int lxc_ns_is_mounted(void);
  #endif
diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c

index 955e9f445b359b9d94e6c9097e42dd254d4b951f..e4f604ba329fc83ff3db66294e522081fde3659b 100644 (file)
--- a/src/lxc/lxc_attach.c
+++ b/src/lxc/lxc_attach.c
@@ -96,6 +96,7 @@ int main(int argc, char *argv[])
         struct passwd *passwd;
         struct lxc_proc_context_info *init_ctx;
         struct lxc_handler *handler;
+       void *cgroup_data = NULL;
         uid_t uid;
         char *curdir;
  
@@ -124,6 +125,35 @@ int main(int argc, char *argv[])
                 return -1;
         }
  
+       if (!elevated_privileges) {
+               /* we have to do this now since /sys/fs/cgroup may not
+                * be available inside the container or we may not have
+                * the required permissions anymore
+                */
+               ret = lxc_cgroup_prepare_attach(my_args.name, &cgroup_data);
+               if (ret < 0) {
+                       ERROR("failed to prepare attaching to cgroup");
+                       return -1;
+               }
+       }
+
+       curdir = get_current_dir_name();
+
+       /* we need to attach before we fork since certain namespaces
+        * (such as pid namespaces) only really affect children of the
+        * current process and not the process itself
+        */
+       ret = lxc_attach_to_ns(init_pid);
+       if (ret < 0) {
+               ERROR("failed to enter the namespace");
+               return -1;
+       }
+
+       if (curdir && chdir(curdir))
+               WARN("could not change directory to '%s'", curdir);
+
+       free(curdir);
+
         /* hack: we need sync.h infrastructure - and that needs a handler */
         handler = calloc(1, sizeof(*handler));
  
@@ -150,8 +180,22 @@ int main(int argc, char *argv[])
                 if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
                         return -1;
  
-               if (!elevated_privileges && lxc_cgroup_attach(my_args.name, pid))
-                       return -1;
+               /* now that we are done with all privileged operations,
+                * we can add ourselves to the cgroup. Since we smuggled in
+                * the fds earlier, we still have write permission
+                */
+               if (!elevated_privileges) {
+                       /* since setns() for pid namespaces only really
+                        * affects child processes, the pid we have is
+                        * still valid outside the container, so this is
+                        * fine
+                        */
+                       ret = lxc_cgroup_finish_attach(cgroup_data, pid);
+                       if (ret < 0) {
+                               ERROR("failed to attach process to cgroup");
+                               return -1;
+                       }
+               }
  
                 /* tell the child we are done initializing */
                 if (lxc_sync_wake_child(handler, LXC_SYNC_POST_CONFIGURE))
@@ -175,19 +219,7 @@ int main(int argc, char *argv[])
  
         if (!pid) {
                 lxc_sync_fini_parent(handler);
-
-               curdir = get_current_dir_name();
-
-               ret = lxc_attach_to_ns(init_pid);
-               if (ret < 0) {
-                       ERROR("failed to enter the namespace");
-                       return -1;
-               }
-
-               if (curdir && chdir(curdir))
-                       WARN("could not change directory to '%s'", curdir);
-
-               free(curdir);
+               lxc_cgroup_dispose_attach(cgroup_data);
  
                 if (new_personality < 0)
                         new_personality = init_ctx->personality;
author	Christian Seiler <christian@iwakd.de>
	Tue, 21 Aug 2012 22:03:12 +0000 (00:03 +0200)
committer	Stéphane Graber <stgraber@ubuntu.com>
	Mon, 12 Nov 2012 18:13:52 +0000 (13:13 -0500)
src/lxc/cgroup.c		patch \| blob \| blame \| history
src/lxc/cgroup.h		patch \| blob \| blame \| history
src/lxc/lxc_attach.c		patch \| blob \| blame \| history