From: Christian Seiler Date: Sat, 3 May 2014 18:57:46 +0000 (+0200) Subject: lxc.mount.auto: improve defaults for cgroup and cgroup-full X-Git-Tag: lxc-1.1.0.alpha1~113 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0769b82;p=thirdparty%2Flxc.git lxc.mount.auto: improve defaults for cgroup and cgroup-full If the user specifies cgroup or cgroup-full without a specifier (:ro, :rw or :mixed), this changes the behavior. Previously, these were simple aliases for the :mixed variants; now they depend on whether the container also has CAP_SYS_ADMIN; if it does they resolve to the :rw variants, if it doesn't to the :mixed variants (as before). If a container has CAP_SYS_ADMIN privileges, any filesystem can be remounted read-write from within, so initially mounting the cgroup filesystems partially read-only as a default creates a false sense of security. It is better to default to full read-write mounts to show the administrator what keeping CAP_SYS_ADMIN entails. If an administrator really wants both CAP_SYS_ADMIN and the :mixed variant of cgroup or cgroup-full automatic mounts, they can still specify that explicitly; this commit just changes the default without specifier. Signed-off-by: Christian Seiler Cc: Serge Hallyn Signed-off-by: Serge Hallyn --- diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in index d3e3ef80f..6e9688971 100644 --- a/doc/lxc.container.conf.sgml.in +++ b/doc/lxc.container.conf.sgml.in @@ -743,8 +743,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - (or - ): + : mount a tmpfs to /sys/fs/cgroup, create directories for all hierarchies to which the container is added, create subdirectories @@ -774,8 +773,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - (or - ): + (without specifier): + defaults to if the + container retains the CAP_SYS_ADMIN capability, + otherwise. + + + + + : mount a tmpfs to /sys/fs/cgroup, create directories for all hierarchies to which the container is added, bind-mount the hierarchies @@ -810,6 +816,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA do so anyway.) + + + (without specifier): + defaults to if the + container retains the CAP_SYS_ADMIN capability, + otherwise. + + Note that if automatic mounting of the cgroup filesystem diff --git a/src/lxc/cgfs.c b/src/lxc/cgfs.c index d75037a60..d59a30505 100644 --- a/src/lxc/cgfs.c +++ b/src/lxc/cgfs.c @@ -1344,6 +1344,15 @@ static bool cgroupfs_mount_cgroup(void *hdata, const char *root, int type) return false; base_info = cgfs_d->info; + /* If we get passed the _NOSPEC types, we default to _MIXED, since we don't + * have access to the lxc_conf object at this point. It really should be up + * to the caller to fix this, but this doesn't really hurt. + */ + if (type == LXC_AUTO_CGROUP_FULL_NOSPEC) + type = LXC_AUTO_CGROUP_FULL_MIXED; + else if (type == LXC_AUTO_CGROUP_NOSPEC) + type = LXC_AUTO_CGROUP_MIXED; + if (type < LXC_AUTO_CGROUP_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) { ERROR("could not mount cgroups into container: invalid type specified internally"); errno = EINVAL; diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 716fcad09..78d9de2ce 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -115,6 +115,12 @@ lxc_log_define(lxc_conf, lxc); #define LO_FLAGS_AUTOCLEAR 4 #endif +/* needed for cgroup automount checks, regardless of whether we + * have included linux/capability.h or not */ +#ifndef CAP_SYS_ADMIN +#define CAP_SYS_ADMIN 21 +#endif + /* Define pivot_root() if missing from the C library */ #ifndef HAVE_PIVOT_ROOT static int pivot_root(const char * new_root, const char * put_old) @@ -164,6 +170,9 @@ struct caps_opt { int value; }; +/* Declare this here, since we don't want to reshuffle the whole file. */ +static int in_caplist(int cap, struct lxc_list *caps); + static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *); static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *); static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *); @@ -743,8 +752,32 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha } if (flags & LXC_AUTO_CGROUP_MASK) { - if (!cgroup_mount(conf->rootfs.mount, handler, - flags & LXC_AUTO_CGROUP_MASK)) { + int cg_flags; + + cg_flags = flags & LXC_AUTO_CGROUP_MASK; + /* If the type of cgroup mount was not specified, it depends on the + * container's capabilities as to what makes sense: if we have + * CAP_SYS_ADMIN, the read-only part can be remounted read-write + * anyway, so we may as well default to read-write; then the admin + * will not be given a false sense of security. (And if they really + * want mixed r/o r/w, then they can explicitly specify :mixed.) + * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to + * :mixed, because then the container can't remount it read-write. */ + if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) { + int has_sys_admin = 0; + if (!lxc_list_empty(&conf->keepcaps)) { + has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps); + } else { + has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps); + } + if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) { + cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED; + } else { + cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED; + } + } + + if (!cgroup_mount(conf->rootfs.mount, handler, cg_flags)) { SYSERROR("error mounting /sys/fs/cgroup"); return -1; } @@ -2192,6 +2225,20 @@ static int parse_cap(const char *cap) return capid; } +int in_caplist(int cap, struct lxc_list *caps) +{ + struct lxc_list *iterator; + int capid; + + lxc_list_for_each(iterator, caps) { + capid = parse_cap(iterator->elem); + if (capid == cap) + return 1; + } + + return 0; +} + static int setup_caps(struct lxc_list *caps) { struct lxc_list *iterator; diff --git a/src/lxc/conf.h b/src/lxc/conf.h index c3705873c..865b87ac9 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -240,9 +240,16 @@ enum { LXC_AUTO_CGROUP_FULL_RO = 0x040, /* /sys/fs/cgroup (full mount, read-only) */ LXC_AUTO_CGROUP_FULL_RW = 0x050, /* /sys/fs/cgroup (full mount, read-write) */ LXC_AUTO_CGROUP_FULL_MIXED = 0x060, /* /sys/fs/cgroup (full mount, parent r/o, own r/w) */ - LXC_AUTO_CGROUP_MASK = 0x070, - - LXC_AUTO_ALL_MASK = 0x07F, /* all known settings */ + /* These are defined in such a way as to retain + * binary compatibility with earlier versions of + * this code. If the previous mask is applied, + * both of these will default back to the _MIXED + * variants, which is safe. */ + LXC_AUTO_CGROUP_NOSPEC = 0x0B0, /* /sys/fs/cgroup (partial mount, r/w or mixed, depending on caps) */ + LXC_AUTO_CGROUP_FULL_NOSPEC = 0x0E0, /* /sys/fs/cgroup (full mount, r/w or mixed, depending on caps) */ + LXC_AUTO_CGROUP_MASK = 0x0F0, + + LXC_AUTO_ALL_MASK = 0x0FF, /* all known settings */ }; /* diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 04bd03c71..ac05b7562 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -1347,20 +1347,20 @@ static int config_mount_auto(const char *key, const char *value, { char *autos, *autoptr, *sptr, *token; static struct { const char *token; int mask; int flag; } allowed_auto_mounts[] = { - { "proc", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED }, - { "proc:mixed", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED }, - { "proc:rw", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW }, - { "sys", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO }, - { "sys:ro", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO }, - { "sys:rw", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW }, - { "cgroup", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED }, - { "cgroup:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED }, - { "cgroup:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO }, - { "cgroup:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW }, - { "cgroup-full", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED }, - { "cgroup-full:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED }, - { "cgroup-full:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RO }, - { "cgroup-full:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RW }, + { "proc", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED }, + { "proc:mixed", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED }, + { "proc:rw", LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW }, + { "sys", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO }, + { "sys:ro", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO }, + { "sys:rw", LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW }, + { "cgroup", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_NOSPEC }, + { "cgroup:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_MIXED }, + { "cgroup:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RO }, + { "cgroup:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_RW }, + { "cgroup-full", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_NOSPEC }, + { "cgroup-full:mixed", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_MIXED }, + { "cgroup-full:ro", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RO }, + { "cgroup-full:rw", LXC_AUTO_CGROUP_MASK, LXC_AUTO_CGROUP_FULL_RW }, /* NB: For adding anything that ist just a single on/off, but has * no options: keep mask and flag identical and just define the * enum value as an unused bit so far @@ -1954,6 +1954,7 @@ static int lxc_get_mount_entries(struct lxc_conf *c, char *retv, int inlen) static int lxc_get_auto_mounts(struct lxc_conf *c, char *retv, int inlen) { int len, fulllen = 0; + const char *sep = ""; if (!retv) inlen = 0; @@ -1964,24 +1965,27 @@ static int lxc_get_auto_mounts(struct lxc_conf *c, char *retv, int inlen) return 0; switch (c->auto_mounts & LXC_AUTO_PROC_MASK) { - case LXC_AUTO_PROC_MIXED: strprint(retv, inlen, "proc:mixed\n"); break; - case LXC_AUTO_PROC_RW: strprint(retv, inlen, "proc:rw"); break; + case LXC_AUTO_PROC_MIXED: strprint(retv, inlen, "%sproc:mixed", sep); sep = " "; break; + case LXC_AUTO_PROC_RW: strprint(retv, inlen, "%sproc:rw", sep); sep = " "; break; default: break; } switch (c->auto_mounts & LXC_AUTO_SYS_MASK) { - case LXC_AUTO_SYS_RO: strprint(retv, inlen, "sys:ro"); break; - case LXC_AUTO_SYS_RW: strprint(retv, inlen, "sys:rw"); break; + case LXC_AUTO_SYS_RO: strprint(retv, inlen, "%ssys:ro", sep); sep = " "; break; + case LXC_AUTO_SYS_RW: strprint(retv, inlen, "%ssys:rw", sep); sep = " "; break; default: break; } switch (c->auto_mounts & LXC_AUTO_CGROUP_MASK) { - case LXC_AUTO_CGROUP_MIXED: strprint(retv, inlen, "cgroup:mixed"); break; - case LXC_AUTO_CGROUP_RO: strprint(retv, inlen, "cgroup:ro"); break; - case LXC_AUTO_CGROUP_RW: strprint(retv, inlen, "cgroup:rw"); break; - case LXC_AUTO_CGROUP_FULL_MIXED: strprint(retv, inlen, "cgroup-full:mixed"); break; - case LXC_AUTO_CGROUP_FULL_RO: strprint(retv, inlen, "cgroup-full:ro"); break; - case LXC_AUTO_CGROUP_FULL_RW: strprint(retv, inlen, "cgroup-full:rw"); break; + case LXC_AUTO_CGROUP_NOSPEC: strprint(retv, inlen, "%scgroup", sep); sep = " "; break; + case LXC_AUTO_CGROUP_MIXED: strprint(retv, inlen, "%scgroup:mixed", sep); sep = " "; break; + case LXC_AUTO_CGROUP_RO: strprint(retv, inlen, "%scgroup:ro", sep); sep = " "; break; + case LXC_AUTO_CGROUP_RW: strprint(retv, inlen, "%scgroup:rw", sep); sep = " "; break; + case LXC_AUTO_CGROUP_FULL_NOSPEC: strprint(retv, inlen, "%scgroup-full", sep); sep = " "; break; + case LXC_AUTO_CGROUP_FULL_MIXED: strprint(retv, inlen, "%scgroup-full:mixed", sep); sep = " "; break; + case LXC_AUTO_CGROUP_FULL_RO: strprint(retv, inlen, "%scgroup-full:ro", sep); sep = " "; break; + case LXC_AUTO_CGROUP_FULL_RW: strprint(retv, inlen, "%scgroup-full:rw", sep); sep = " "; break; default: break; } + return fulllen; } @@ -2227,22 +2231,24 @@ void write_config(FILE *fout, struct lxc_conf *c) if (c->auto_mounts & LXC_AUTO_ALL_MASK) { fprintf(fout, "lxc.mount.auto ="); switch (c->auto_mounts & LXC_AUTO_PROC_MASK) { - case LXC_AUTO_PROC_MIXED: fprintf(fout, " proc:mixed"); break; - case LXC_AUTO_PROC_RW: fprintf(fout, " proc:rw"); break; + case LXC_AUTO_PROC_MIXED: fprintf(fout, " proc:mixed"); break; + case LXC_AUTO_PROC_RW: fprintf(fout, " proc:rw"); break; default: break; } switch (c->auto_mounts & LXC_AUTO_SYS_MASK) { - case LXC_AUTO_SYS_RO: fprintf(fout, " sys:ro"); break; - case LXC_AUTO_SYS_RW: fprintf(fout, " sys:rw"); break; + case LXC_AUTO_SYS_RO: fprintf(fout, " sys:ro"); break; + case LXC_AUTO_SYS_RW: fprintf(fout, " sys:rw"); break; default: break; } switch (c->auto_mounts & LXC_AUTO_CGROUP_MASK) { - case LXC_AUTO_CGROUP_MIXED: fprintf(fout, " cgroup:mixed"); break; - case LXC_AUTO_CGROUP_RO: fprintf(fout, " cgroup:ro"); break; - case LXC_AUTO_CGROUP_RW: fprintf(fout, " cgroup:rw"); break; - case LXC_AUTO_CGROUP_FULL_MIXED: fprintf(fout, " cgroup-full:mixed"); break; - case LXC_AUTO_CGROUP_FULL_RO: fprintf(fout, " cgroup-full:ro"); break; - case LXC_AUTO_CGROUP_FULL_RW: fprintf(fout, " cgroup-full:rw"); break; + case LXC_AUTO_CGROUP_NOSPEC: fprintf(fout, " cgroup"); break; + case LXC_AUTO_CGROUP_MIXED: fprintf(fout, " cgroup:mixed"); break; + case LXC_AUTO_CGROUP_RO: fprintf(fout, " cgroup:ro"); break; + case LXC_AUTO_CGROUP_RW: fprintf(fout, " cgroup:rw"); break; + case LXC_AUTO_CGROUP_FULL_NOSPEC: fprintf(fout, " cgroup-full"); break; + case LXC_AUTO_CGROUP_FULL_MIXED: fprintf(fout, " cgroup-full:mixed"); break; + case LXC_AUTO_CGROUP_FULL_RO: fprintf(fout, " cgroup-full:ro"); break; + case LXC_AUTO_CGROUP_FULL_RW: fprintf(fout, " cgroup-full:rw"); break; default: break; } fprintf(fout, "\n");