confile: add "force" to cgroup:{mixed,ro,rw}

author Shukui Yang <yangshukui@huawei.com>

Fri, 16 Feb 2018 04:16:40 +0000 (23:16 -0500)

committer Christian Brauner <christian.brauner@ubuntu.com>

Wed, 21 Feb 2018 14:45:55 +0000 (15:45 +0100)
author Shukui Yang <yangshukui@huawei.com>
Fri, 16 Feb 2018 04:16:40 +0000 (23:16 -0500)
committer Christian Brauner <christian.brauner@ubuntu.com>
Wed, 21 Feb 2018 14:45:55 +0000 (15:45 +0100)
diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in

index bbf0c681cd4a516b4978c7043084d26aa8718b16..a75bdba24c541dc3b6fa3a9bdbc43426127da06f 100644 (file)
--- a/doc/lxc.container.conf.sgml.in
+++ b/doc/lxc.container.conf.sgml.in
@@ -930,36 +930,75 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                    <filename>/sys</filename> as read-write
                  </para>
                </listitem>
+
                <listitem>
                  <para>
                    <option>cgroup:mixed</option>:
-                  mount a tmpfs to <filename>/sys/fs/cgroup</filename>,
-                  create directories for all hierarchies to which
-                  the container is added, create subdirectories
-                  there with the name of the cgroup, and bind-mount
-                  the container's own cgroup into that directory.
-                  The container will be able to write to its own
-                  cgroup directory, but not the parents, since they
-                  will be remounted read-only.
+                  Mount a tmpfs to <filename>/sys/fs/cgroup</filename>,
+                  create directories for all hierarchies to which the container
+                  is added, create subdirectories in those hierarchies with the
+                  name of the cgroup, and bind-mount the container's own cgroup
+                  into that directory. The container will be able to write to
+                  its own cgroup directory, but not the parents, since they will
+                  be remounted read-only.
                  </para>
                </listitem>
+
                <listitem>
                  <para>
-                  <option>cgroup:ro</option>: similar to
-                  <option>cgroup:mixed</option>, but everything will
-                be mounted read-only.
+                  <option>cgroup:mixed:force</option>:
+                  The <option>force</option> option will cause LXC to perform
+                  the cgroup mounts for the container under all circumstances.
+                  Otherwise it is similar to <option>cgroup:mixed</option>.
+                  This is mainly useful when the cgroup namespaces are enabled
+                  where LXC will normally leave mounting cgroups to the init
+                  binary of the container since it is perfectly safe to do so.
                  </para>
                </listitem>
+
+              <listitem>
+                <para>
+                  <option>cgroup:ro</option>:
+                  similar to <option>cgroup:mixed</option>, but everything will
+                  be mounted read-only.
+                </para>
+              </listitem>
+
+              <listitem>
+                <para>
+                  <option>cgroup:ro:force</option>:
+                  The <option>force</option> option will cause LXC to perform
+                  the cgroup mounts for the container under all circumstances.
+                  Otherwise it is similar to <option>cgroup:ro</option>.
+                  This is mainly useful when the cgroup namespaces are enabled
+                  where LXC will normally leave mounting cgroups to the init
+                  binary of the container since it is perfectly safe to do so.
+                </para>
+              </listitem>
+
                <listitem>
                  <para>
                    <option>cgroup:rw</option>: similar to
-                  <option>cgroup:mixed</option>, but everything will
-                  be mounted read-write. Note that the paths leading
-                  up to the container's own cgroup will be writable,
-                  but will not be a cgroup filesystem but just part
-                  of the tmpfs of <filename>/sys/fs/cgroup</filename>
+                  <option>cgroup:mixed</option>, but everything will be mounted
+                  read-write. Note that the paths leading up to the container's
+                  own cgroup will be writable, but will not be a cgroup
+                  filesystem but just part of the tmpfs of
+                  <filename>/sys/fs/cgroup</filename>
+                </para>
+              </listitem>
+
+              <listitem>
+                <para>
+                  <option>cgroup:rw:force</option>:
+                  The <option>force</option> option will cause LXC to perform
+                  the cgroup mounts for the container under all circumstances.
+                  Otherwise it is similar to <option>cgroup:rw</option>.
+                  This is mainly useful when the cgroup namespaces are enabled
+                  where LXC will normally leave mounting cgroups to the init
+                  binary of the container since it is perfectly safe to do so.
                  </para>
                </listitem>
+
                <listitem>
                  <para>
                    <option>cgroup</option> (without specifier):
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c

index 380fb22f503c9061b202c38fc1a5cd1e6a74aabd..1d2d4e317cc27f3b55c6bf3b26e4963c436b47c9 100644 (file)
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -2023,26 +2023,31 @@ static int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
  
  static bool cgfsng_mount(void *hdata, const char *root, int type)
  {
-       int i;
+       int i, ret;
         char *tmpfspath = NULL;
         bool retval = false;
         struct lxc_handler *handler = hdata;
         struct cgfsng_handler_data *d = handler->cgroup_data;
-       bool has_cgns = false, has_sys_admin = true;
+       bool has_cgns = false, wants_force_mount = false;
  
         if ((type & LXC_AUTO_CGROUP_MASK) == 0)
                 return true;
  
-       has_cgns = cgns_supported();
-       if (!lxc_list_empty(&handler->conf->keepcaps))
-               has_sys_admin = in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
-       else
-               has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
+       if (type & LXC_AUTO_CGROUP_FORCE) {
+               type &= ~LXC_AUTO_CGROUP_FORCE;
+               wants_force_mount = true;
+       }
  
-       if (has_cgns && has_sys_admin)
-               return true;
+       if (!wants_force_mount){
+               if (!lxc_list_empty(&handler->conf->keepcaps))
+                       wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
+               else
+                       wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
+       }
  
-       tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
+       has_cgns = cgns_supported();
+       if (has_cgns && !wants_force_mount)
+               return true;
  
         if (type == LXC_AUTO_CGROUP_NOSPEC)
                 type = LXC_AUTO_CGROUP_MIXED;
@@ -2050,17 +2055,17 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
                 type = LXC_AUTO_CGROUP_FULL_MIXED;
  
         /* Mount tmpfs */
-       if (safe_mount("cgroup_root", tmpfspath, "tmpfs",
-                       MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME,
-                       "size=10240k,mode=755",
-                       root) < 0)
-               goto  bad;
+       tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
+       ret = safe_mount("cgroup_root", tmpfspath, "tmpfs",
+                        MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+                        "size=10240k,mode=755", root);
+       if (ret < 0)
+               goto on_error;
  
         for (i = 0; hierarchies[i]; i++) {
                 char *controllerpath, *path2;
                 struct hierarchy *h = hierarchies[i];
                 char *controller = strrchr(h->mountpoint, '/');
-               int r;
  
                 if (!controller)
                         continue;
@@ -2070,49 +2075,56 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
                         free(controllerpath);
                         continue;
                 }
-               if (mkdir(controllerpath, 0755) < 0) {
+               ret = mkdir(controllerpath, 0755);
+               if (ret < 0) {
                         SYSERROR("Error creating cgroup path: %s", controllerpath);
                         free(controllerpath);
-                       goto bad;
+                       goto on_error;
                 }
  
-               if (has_cgns && !has_sys_admin) {
+               if (has_cgns && wants_force_mount) {
                         /* If cgroup namespaces are supported but the container
                          * will not have CAP_SYS_ADMIN after it has started we
                          * need to mount the cgroups manually.
                          */
-                       r = cg_mount_in_cgroup_namespace(type, h, controllerpath);
+                       ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
                         free(controllerpath);
-                       if (r < 0)
-                               goto bad;
+                       if (ret < 0)
+                               goto on_error;
+
                         continue;
                 }
  
-               if (mount_cgroup_full(type, h, controllerpath, d->container_cgroup) < 0) {
+               ret = mount_cgroup_full(type, h, controllerpath, d->container_cgroup);
+               if (ret < 0) {
                         free(controllerpath);
-                       goto bad;
+                       goto on_error;
                 }
+
                 if (!cg_mount_needs_subdirs(type)) {
                         free(controllerpath);
                         continue;
                 }
-               path2 = must_make_path(controllerpath, h->base_cgroup, d->container_cgroup, NULL);
-               if (mkdir_p(path2, 0755) < 0) {
+
+               path2 = must_make_path(controllerpath, h->base_cgroup,
+                                      d->container_cgroup, NULL);
+               ret = mkdir_p(path2, 0755);
+               if (ret < 0) {
                         free(controllerpath);
                         free(path2);
-                       goto bad;
+                       goto on_error;
                 }
  
-               r = do_secondstage_mounts_if_needed(type, h, controllerpath, path2,
-                                                   d->container_cgroup);
+               ret = do_secondstage_mounts_if_needed(
+                   type, h, controllerpath, path2, d->container_cgroup);
                 free(controllerpath);
                 free(path2);
-               if (r < 0)
-                       goto bad;
+               if (ret < 0)
+                       goto on_error;
         }
         retval = true;
  
-bad:
+on_error:
         free(tmpfspath);
         return retval;
  }
diff --git a/src/lxc/conf.c b/src/lxc/conf.c

index f2f326c46ef3a390984a0a0f8ab9b5e6efee9078..28d27878c6d5b2ef0a5578d5739fff92d59e450a 100644 (file)
--- a/src/lxc/conf.c
+++ b/src/lxc/conf.c
@@ -570,7 +570,7 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
         if (flags & LXC_AUTO_CGROUP_MASK) {
                 int cg_flags;
  
-               cg_flags = flags & LXC_AUTO_CGROUP_MASK;
+               cg_flags = flags & (LXC_AUTO_CGROUP_MASK & ~LXC_AUTO_CGROUP_FORCE);
                 /* If the type of cgroup mount was not specified, it depends on the
                  * container's capabilities as to what makes sense: if we have
                  * CAP_SYS_ADMIN, the read-only part can be remounted read-write
@@ -592,7 +592,8 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
                         else
                                 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
                 }
-
+               if (flags & LXC_AUTO_CGROUP_FORCE)
+                               cg_flags |= LXC_AUTO_CGROUP_FORCE;
                 if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
                         SYSERROR("error mounting /sys/fs/cgroup");
                         return -1;
@@ -3168,7 +3169,7 @@ int lxc_setup(struct lxc_handler *handler)
          * before, /sys could not have been mounted
          * (is either mounted automatically or via fstab entries)
          */
-       if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
+       if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & (LXC_AUTO_CGROUP_MASK), handler) < 0) {
                 ERROR("failed to setup the automatic mounts for '%s'", name);
                 return -1;
         }
diff --git a/src/lxc/conf.h b/src/lxc/conf.h

index 71df182d6e08f781e6ba227f33e401b7d7a09237..8f814895f19ca466f49299953f76cb0927ddad4b 100644 (file)
--- a/src/lxc/conf.h
+++ b/src/lxc/conf.h
@@ -160,9 +160,9 @@ enum {
          * variants, which is safe. */
         LXC_AUTO_CGROUP_NOSPEC        = 0x0B0,   /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
         LXC_AUTO_CGROUP_FULL_NOSPEC   = 0x0E0,   /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
-       LXC_AUTO_CGROUP_MASK          = 0x0F0,
-
-       LXC_AUTO_ALL_MASK             = 0x0FF,   /* all known settings */
+       LXC_AUTO_CGROUP_FORCE         = 0x100,   /* mount cgroups even when cgroup namespaces are supported */
+       LXC_AUTO_CGROUP_MASK          = 0x1F0,   /* all known cgroup options, doe not contain LXC_AUTO_CGROUP_FORCE */
+       LXC_AUTO_ALL_MASK             = 0x1FF,   /* all known settings */
  };
  
  /*
diff --git a/src/lxc/confile.c b/src/lxc/confile.c

index ccd45a2fa6f3ba0b17ae537f4236884800f34d68..f6253114f34a748f5ace1ed2aaff8bb80acf8435 100644 (file)
--- a/src/lxc/confile.c
+++ b/src/lxc/confile.c
@@ -1914,26 +1914,30 @@ static int set_config_mount_auto(const char *key, const char *value,
                 int mask;
                 int flag;
         } allowed_auto_mounts[] = {
-           { "proc",              LXC_AUTO_PROC_MASK,   LXC_AUTO_PROC_MIXED         },
-           { "proc:mixed",        LXC_AUTO_PROC_MASK,   LXC_AUTO_PROC_MIXED         },
-           { "proc:rw",           LXC_AUTO_PROC_MASK,   LXC_AUTO_PROC_RW            },
-           { "sys",               LXC_AUTO_SYS_MASK,    LXC_AUTO_SYS_MIXED          },
-           { "sys:ro",            LXC_AUTO_SYS_MASK,    LXC_AUTO_SYS_RO             },
-           { "sys:mixed",         LXC_AUTO_SYS_MASK,    LXC_AUTO_SYS_MIXED          },
-           { "sys:rw",            LXC_AUTO_SYS_MASK,    LXC_AUTO_SYS_RW             },
-           { "cgroup",            LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC      },
-           { "cgroup:mixed",      LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED       },
-           { "cgroup:ro",         LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO          },
-           { "cgroup:rw",         LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW          },
-           { "cgroup-full",       LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_NOSPEC },
-           { "cgroup-full:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED  },
-           { "cgroup-full:ro",    LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RO     },
-           { "cgroup-full:rw",    LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RW     },
-           /* NB: For adding anything that is just a single on/off, but has
-            *     no options: keep mask and flag identical and just define the
-            *     enum value as an unused bit so far
+           { "proc",                    LXC_AUTO_PROC_MASK,   LXC_AUTO_PROC_MIXED                            },
+           { "proc:mixed",              LXC_AUTO_PROC_MASK,   LXC_AUTO_PROC_MIXED                            },
+           { "proc:rw",                 LXC_AUTO_PROC_MASK,   LXC_AUTO_PROC_RW                               },
+           { "sys",                     LXC_AUTO_SYS_MASK,    LXC_AUTO_SYS_MIXED                             },
+           { "sys:ro",                  LXC_AUTO_SYS_MASK,    LXC_AUTO_SYS_RO                                },
+           { "sys:mixed",               LXC_AUTO_SYS_MASK,    LXC_AUTO_SYS_MIXED                             },
+           { "sys:rw",                  LXC_AUTO_SYS_MASK,    LXC_AUTO_SYS_RW                                },
+           { "cgroup",                  LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC                         },
+           { "cgroup:mixed",            LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED                          },
+           { "cgroup:ro",               LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO                             },
+           { "cgroup:rw",               LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW                             },
+           { "cgroup:force",            LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC | LXC_AUTO_CGROUP_FORCE },
+           { "cgroup:mixed:force",      LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED | LXC_AUTO_CGROUP_FORCE  },
+           { "cgroup:ro:force",         LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO | LXC_AUTO_CGROUP_FORCE     },
+           { "cgroup:rw:force",         LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW | LXC_AUTO_CGROUP_FORCE     },
+           { "cgroup-full",             LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_NOSPEC                    },
+           { "cgroup-full:mixed",       LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED                     },
+           { "cgroup-full:ro",          LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RO                        },
+           { "cgroup-full:rw",          LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RW                        },
+           /* For adding anything that is just a single on/off, but has no
+            * options: keep mask and flag identical and just define the enum
+            * value as an unused bit so far
              */
-           { NULL,                0,                    0                           }
+           { NULL,                      0,                     0                                             }
         };
  
         if (lxc_config_value_empty(value)) {
author	Shukui Yang <yangshukui@huawei.com>
	Fri, 16 Feb 2018 04:16:40 +0000 (23:16 -0500)
committer	Christian Brauner <christian.brauner@ubuntu.com>
	Wed, 21 Feb 2018 14:45:55 +0000 (15:45 +0100)
doc/lxc.container.conf.sgml.in		patch \| blob \| blame \| history
src/lxc/cgroups/cgfsng.c		patch \| blob \| blame \| history
src/lxc/conf.c		patch \| blob \| blame \| history
src/lxc/conf.h		patch \| blob \| blame \| history
src/lxc/confile.c		patch \| blob \| blame \| history