]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
lxc.mount.auto: improve defaults for cgroup and cgroup-full
authorChristian Seiler <christian@iwakd.de>
Sat, 3 May 2014 18:57:46 +0000 (20:57 +0200)
committerSerge Hallyn <serge.hallyn@ubuntu.com>
Tue, 6 May 2014 15:20:10 +0000 (10:20 -0500)
If the user specifies cgroup or cgroup-full without a specifier (:ro,
:rw or :mixed), this changes the behavior. Previously, these were
simple aliases for the :mixed variants; now they depend on whether the
container also has CAP_SYS_ADMIN; if it does they resolve to the :rw
variants, if it doesn't to the :mixed variants (as before).

If a container has CAP_SYS_ADMIN privileges, any filesystem can be
remounted read-write from within, so initially mounting the cgroup
filesystems partially read-only as a default creates a false sense of
security. It is better to default to full read-write mounts to show the
administrator what keeping CAP_SYS_ADMIN entails.

If an administrator really wants both CAP_SYS_ADMIN and the :mixed
variant of cgroup or cgroup-full automatic mounts, they can still
specify that explicitly; this commit just changes the default without
specifier.

Signed-off-by: Christian Seiler <christian@iwakd.de>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
doc/lxc.container.conf.sgml.in
src/lxc/cgfs.c
src/lxc/conf.c
src/lxc/conf.h
src/lxc/confile.c

index d3e3ef80fb761dc432a2c50997093d35ccb1c9c8..6e9688971f7a1096f9194ace3988ded27cdcb398 100644 (file)
@@ -743,8 +743,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
              </listitem>
              <listitem>
                <para>
-                 <option>cgroup:mixed</option> (or
-                 <option>cgroup</option>):
+                 <option>cgroup:mixed</option>:
                  mount a tmpfs to <filename>/sys/fs/cgroup</filename>,
                  create directories for all hierarchies to which
                  the container is added, create subdirectories
@@ -774,8 +773,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
              </listitem>
              <listitem>
                <para>
-                 <option>cgroup-full:mixed</option> (or
-                 <option>cgroup-full</option>):
+                 <option>cgroup</option> (without specifier):
+                 defaults to <option>cgroup:rw</option> if the
+                 container retains the CAP_SYS_ADMIN capability,
+                 <option>cgroup:mixed</option> otherwise.
+               </para>
+             </listitem>
+             <listitem>
+               <para>
+                 <option>cgroup-full:mixed</option>:
                  mount a tmpfs to <filename>/sys/fs/cgroup</filename>,
                  create directories for all hierarchies to which
                  the container is added, bind-mount the hierarchies
@@ -810,6 +816,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                  do so anyway.)
                </para>
              </listitem>
+             <listitem>
+               <para>
+                 <option>cgroup-full</option> (without specifier):
+                 defaults to <option>cgroup-full:rw</option> if the
+                 container retains the CAP_SYS_ADMIN capability,
+                 <option>cgroup-full:mixed</option> otherwise.
+               </para>
+             </listitem>
            </itemizedlist>
            <para>
              Note that if automatic mounting of the cgroup filesystem
index d75037a6065666fab2b30f4456463a6ccd003f8b..d59a305057f3938aa95a59b844770ff832227ba4 100644 (file)
@@ -1344,6 +1344,15 @@ static bool cgroupfs_mount_cgroup(void *hdata, const char *root, int type)
                return false;
        base_info = cgfs_d->info;
 
+       /* If we get passed the _NOSPEC types, we default to _MIXED, since we don't
+        * have access to the lxc_conf object at this point. It really should be up
+        * to the caller to fix this, but this doesn't really hurt.
+        */
+       if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
+               type = LXC_AUTO_CGROUP_FULL_MIXED;
+       else if (type == LXC_AUTO_CGROUP_NOSPEC)
+               type = LXC_AUTO_CGROUP_MIXED;
+
        if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) {
                ERROR("could not mount cgroups into container: invalid type specified internally");
                errno = EINVAL;
index 716fcad09b56692a3eb34c28c85651d991d90ac7..78d9de2ce987bfa2037c3c08682f4a96b265209b 100644 (file)
@@ -115,6 +115,12 @@ lxc_log_define(lxc_conf, lxc);
 #define LO_FLAGS_AUTOCLEAR 4
 #endif
 
+/* needed for cgroup automount checks, regardless of whether we
+ * have included linux/capability.h or not */
+#ifndef CAP_SYS_ADMIN
+#define CAP_SYS_ADMIN 21
+#endif
+
 /* Define pivot_root() if missing from the C library */
 #ifndef HAVE_PIVOT_ROOT
 static int pivot_root(const char * new_root, const char * put_old)
@@ -164,6 +170,9 @@ struct caps_opt {
        int value;
 };
 
+/* Declare this here, since we don't want to reshuffle the whole file. */
+static int in_caplist(int cap, struct lxc_list *caps);
+
 static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
 static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
 static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
@@ -743,8 +752,32 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha
        }
 
        if (flags & LXC_AUTO_CGROUP_MASK) {
-               if (!cgroup_mount(conf->rootfs.mount, handler,
-                                 flags & LXC_AUTO_CGROUP_MASK)) {
+               int cg_flags;
+
+               cg_flags = flags & LXC_AUTO_CGROUP_MASK;
+               /* If the type of cgroup mount was not specified, it depends on the
+                * container's capabilities as to what makes sense: if we have
+                * CAP_SYS_ADMIN, the read-only part can be remounted read-write
+                * anyway, so we may as well default to read-write; then the admin
+                * will not be given a false sense of security. (And if they really
+                * want mixed r/o r/w, then they can explicitly specify :mixed.)
+                * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
+                * :mixed, because then the container can't remount it read-write. */
+               if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
+                       int has_sys_admin = 0;
+                       if (!lxc_list_empty(&conf->keepcaps)) {
+                               has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
+                       } else {
+                               has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
+                       }
+                       if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
+                               cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
+                       } else {
+                               cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
+                       }
+               }
+
+               if (!cgroup_mount(conf->rootfs.mount, handler, cg_flags)) {
                        SYSERROR("error mounting /sys/fs/cgroup");
                        return -1;
                }
@@ -2192,6 +2225,20 @@ static int parse_cap(const char *cap)
        return capid;
 }
 
+int in_caplist(int cap, struct lxc_list *caps)
+{
+       struct lxc_list *iterator;
+       int capid;
+
+       lxc_list_for_each(iterator, caps) {
+               capid = parse_cap(iterator->elem);
+               if (capid == cap)
+                       return 1;
+       }
+
+       return 0;
+}
+
 static int setup_caps(struct lxc_list *caps)
 {
        struct lxc_list *iterator;
index c3705873ce6ad067225a110610f2dcb8505b3b75..865b87ac9062f36709135c8dd1d7bba0776fe166 100644 (file)
@@ -240,9 +240,16 @@ enum {
        LXC_AUTO_CGROUP_FULL_RO       = 0x040,   /* /sys/fs/cgroup (full mount, read-only) */
        LXC_AUTO_CGROUP_FULL_RW       = 0x050,   /* /sys/fs/cgroup (full mount, read-write) */
        LXC_AUTO_CGROUP_FULL_MIXED    = 0x060,   /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */
-       LXC_AUTO_CGROUP_MASK          = 0x070,
-
-       LXC_AUTO_ALL_MASK             = 0x07F,   /* all known settings */
+       /* These are defined in such a way as to retain
+        * binary compatibility with earlier versions of
+        * this code. If the previous mask is applied,
+        * both of these will default back to the _MIXED
+        * variants, which is safe. */
+       LXC_AUTO_CGROUP_NOSPEC        = 0x0B0,   /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */
+       LXC_AUTO_CGROUP_FULL_NOSPEC   = 0x0E0,   /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */
+       LXC_AUTO_CGROUP_MASK          = 0x0F0,
+
+       LXC_AUTO_ALL_MASK             = 0x0FF,   /* all known settings */
 };
 
 /*
index 04bd03c712b9f8793cdd064c6e44b92003fb8610..ac05b7562dbe9f7f15d481ee909222d4707b7b02 100644 (file)
@@ -1347,20 +1347,20 @@ static int config_mount_auto(const char *key, const char *value,
 {
        char *autos, *autoptr, *sptr, *token;
        static struct { const char *token; int mask; int flag; } allowed_auto_mounts[] = {
-               { "proc",               LXC_AUTO_PROC_MASK,      LXC_AUTO_PROC_MIXED        },
-               { "proc:mixed",         LXC_AUTO_PROC_MASK,      LXC_AUTO_PROC_MIXED        },
-               { "proc:rw",            LXC_AUTO_PROC_MASK,      LXC_AUTO_PROC_RW           },
-               { "sys",                LXC_AUTO_SYS_MASK,       LXC_AUTO_SYS_RO            },
-               { "sys:ro",             LXC_AUTO_SYS_MASK,       LXC_AUTO_SYS_RO            },
-               { "sys:rw",             LXC_AUTO_SYS_MASK,       LXC_AUTO_SYS_RW            },
-               { "cgroup",             LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_MIXED      },
-               { "cgroup:mixed",       LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_MIXED      },
-               { "cgroup:ro",          LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_RO         },
-               { "cgroup:rw",          LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_RW         },
-               { "cgroup-full",        LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_FULL_MIXED },
-               { "cgroup-full:mixed",  LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_FULL_MIXED },
-               { "cgroup-full:ro",     LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_FULL_RO    },
-               { "cgroup-full:rw",     LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_FULL_RW    },
+               { "proc",               LXC_AUTO_PROC_MASK,      LXC_AUTO_PROC_MIXED         },
+               { "proc:mixed",         LXC_AUTO_PROC_MASK,      LXC_AUTO_PROC_MIXED         },
+               { "proc:rw",            LXC_AUTO_PROC_MASK,      LXC_AUTO_PROC_RW            },
+               { "sys",                LXC_AUTO_SYS_MASK,       LXC_AUTO_SYS_RO             },
+               { "sys:ro",             LXC_AUTO_SYS_MASK,       LXC_AUTO_SYS_RO             },
+               { "sys:rw",             LXC_AUTO_SYS_MASK,       LXC_AUTO_SYS_RW             },
+               { "cgroup",             LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_NOSPEC      },
+               { "cgroup:mixed",       LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_MIXED       },
+               { "cgroup:ro",          LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_RO          },
+               { "cgroup:rw",          LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_RW          },
+               { "cgroup-full",        LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_FULL_NOSPEC },
+               { "cgroup-full:mixed",  LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_FULL_MIXED  },
+               { "cgroup-full:ro",     LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_FULL_RO     },
+               { "cgroup-full:rw",     LXC_AUTO_CGROUP_MASK,    LXC_AUTO_CGROUP_FULL_RW     },
                /* NB: For adding anything that ist just a single on/off, but has
                 *     no options: keep mask and flag identical and just define the
                 *     enum value as an unused bit so far
@@ -1954,6 +1954,7 @@ static int lxc_get_mount_entries(struct lxc_conf *c, char *retv, int inlen)
 static int lxc_get_auto_mounts(struct lxc_conf *c, char *retv, int inlen)
 {
        int len, fulllen = 0;
+       const char *sep = "";
 
        if (!retv)
                inlen = 0;
@@ -1964,24 +1965,27 @@ static int lxc_get_auto_mounts(struct lxc_conf *c, char *retv, int inlen)
                return 0;
 
        switch (c->auto_mounts & LXC_AUTO_PROC_MASK) {
-               case LXC_AUTO_PROC_MIXED:        strprint(retv, inlen, "proc:mixed\n"); break;
-               case LXC_AUTO_PROC_RW:           strprint(retv, inlen, "proc:rw");      break;
+               case LXC_AUTO_PROC_MIXED:         strprint(retv, inlen, "%sproc:mixed", sep);        sep = " "; break;
+               case LXC_AUTO_PROC_RW:            strprint(retv, inlen, "%sproc:rw", sep);           sep = " "; break;
                default: break;
        }
        switch (c->auto_mounts & LXC_AUTO_SYS_MASK) {
-               case LXC_AUTO_SYS_RO:            strprint(retv, inlen, "sys:ro");            break;
-               case LXC_AUTO_SYS_RW:            strprint(retv, inlen, "sys:rw");            break;
+               case LXC_AUTO_SYS_RO:             strprint(retv, inlen, "%ssys:ro", sep);            sep = " "; break;
+               case LXC_AUTO_SYS_RW:             strprint(retv, inlen, "%ssys:rw", sep);            sep = " "; break;
                default: break;
        }
        switch (c->auto_mounts & LXC_AUTO_CGROUP_MASK) {
-               case LXC_AUTO_CGROUP_MIXED:      strprint(retv, inlen, "cgroup:mixed");      break;
-               case LXC_AUTO_CGROUP_RO:         strprint(retv, inlen, "cgroup:ro");         break;
-               case LXC_AUTO_CGROUP_RW:         strprint(retv, inlen, "cgroup:rw");         break;
-               case LXC_AUTO_CGROUP_FULL_MIXED: strprint(retv, inlen, "cgroup-full:mixed"); break;
-               case LXC_AUTO_CGROUP_FULL_RO:    strprint(retv, inlen, "cgroup-full:ro");    break;
-               case LXC_AUTO_CGROUP_FULL_RW:    strprint(retv, inlen, "cgroup-full:rw");    break;
+               case LXC_AUTO_CGROUP_NOSPEC:      strprint(retv, inlen, "%scgroup", sep);            sep = " "; break;
+               case LXC_AUTO_CGROUP_MIXED:       strprint(retv, inlen, "%scgroup:mixed", sep);      sep = " "; break;
+               case LXC_AUTO_CGROUP_RO:          strprint(retv, inlen, "%scgroup:ro", sep);         sep = " "; break;
+               case LXC_AUTO_CGROUP_RW:          strprint(retv, inlen, "%scgroup:rw", sep);         sep = " "; break;
+               case LXC_AUTO_CGROUP_FULL_NOSPEC: strprint(retv, inlen, "%scgroup-full", sep);       sep = " "; break;
+               case LXC_AUTO_CGROUP_FULL_MIXED:  strprint(retv, inlen, "%scgroup-full:mixed", sep); sep = " "; break;
+               case LXC_AUTO_CGROUP_FULL_RO:     strprint(retv, inlen, "%scgroup-full:ro", sep);    sep = " "; break;
+               case LXC_AUTO_CGROUP_FULL_RW:     strprint(retv, inlen, "%scgroup-full:rw", sep);    sep = " "; break;
                default: break;
        }
+
        return fulllen;
 }
 
@@ -2227,22 +2231,24 @@ void write_config(FILE *fout, struct lxc_conf *c)
        if (c->auto_mounts & LXC_AUTO_ALL_MASK) {
                fprintf(fout, "lxc.mount.auto =");
                switch (c->auto_mounts & LXC_AUTO_PROC_MASK) {
-                       case LXC_AUTO_PROC_MIXED:        fprintf(fout, " proc:mixed");        break;
-                       case LXC_AUTO_PROC_RW:           fprintf(fout, " proc:rw");           break;
+                       case LXC_AUTO_PROC_MIXED:         fprintf(fout, " proc:mixed");        break;
+                       case LXC_AUTO_PROC_RW:            fprintf(fout, " proc:rw");           break;
                        default: break;
                }
                switch (c->auto_mounts & LXC_AUTO_SYS_MASK) {
-                       case LXC_AUTO_SYS_RO:            fprintf(fout, " sys:ro");            break;
-                       case LXC_AUTO_SYS_RW:            fprintf(fout, " sys:rw");            break;
+                       case LXC_AUTO_SYS_RO:             fprintf(fout, " sys:ro");            break;
+                       case LXC_AUTO_SYS_RW:             fprintf(fout, " sys:rw");            break;
                        default: break;
                }
                switch (c->auto_mounts & LXC_AUTO_CGROUP_MASK) {
-                       case LXC_AUTO_CGROUP_MIXED:      fprintf(fout, " cgroup:mixed");      break;
-                       case LXC_AUTO_CGROUP_RO:         fprintf(fout, " cgroup:ro");         break;
-                       case LXC_AUTO_CGROUP_RW:         fprintf(fout, " cgroup:rw");         break;
-                       case LXC_AUTO_CGROUP_FULL_MIXED: fprintf(fout, " cgroup-full:mixed"); break;
-                       case LXC_AUTO_CGROUP_FULL_RO:    fprintf(fout, " cgroup-full:ro");    break;
-                       case LXC_AUTO_CGROUP_FULL_RW:    fprintf(fout, " cgroup-full:rw");    break;
+                       case LXC_AUTO_CGROUP_NOSPEC:      fprintf(fout, " cgroup");            break;
+                       case LXC_AUTO_CGROUP_MIXED:       fprintf(fout, " cgroup:mixed");      break;
+                       case LXC_AUTO_CGROUP_RO:          fprintf(fout, " cgroup:ro");         break;
+                       case LXC_AUTO_CGROUP_RW:          fprintf(fout, " cgroup:rw");         break;
+                       case LXC_AUTO_CGROUP_FULL_NOSPEC: fprintf(fout, " cgroup-full");       break;
+                       case LXC_AUTO_CGROUP_FULL_MIXED:  fprintf(fout, " cgroup-full:mixed"); break;
+                       case LXC_AUTO_CGROUP_FULL_RO:     fprintf(fout, " cgroup-full:ro");    break;
+                       case LXC_AUTO_CGROUP_FULL_RW:     fprintf(fout, " cgroup-full:rw");    break;
                        default: break;
                }
                fprintf(fout, "\n");