]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
cgfs: don't mount /sys/fs/cgroup readonly
authorChristian Seiler <christian@iwakd.de>
Sat, 3 May 2014 18:57:44 +0000 (20:57 +0200)
committerSerge Hallyn <serge.hallyn@ubuntu.com>
Tue, 6 May 2014 15:20:08 +0000 (10:20 -0500)
Ubuntu containers have had trouble with automatic cgroup mounting that
was not read-write (i.e. lxc.mount.auto = cgroup{,-full}:{ro,mixed}) in
containers without CAP_SYS_ADMIN. Ubuntu's mountall program reads
/lib/init/fstab, which contains an entry for /sys/fs/cgroup. Since
there is no ro option specified for that filesystem, mountall will try
to remount it readwrite if it is already mounted. Without
CAP_SYS_ADMIN, that fails and mountall will interrupt boot and wait for
user input on whether to proceed anyway or to manually fix it,
effectively hanging container bootup.

This patch makes sure that /sys/fs/cgroup is always a readwrite tmpfs,
but that the actual cgroup hierarchy paths (/sys/fs/cgroup/$subsystem)
are readonly if :ro or :mixed is used. This still has the desired
effect within the container (no cgroup escalation possible and programs
get errors if they try to do so anyway), while keeping Ubuntu
containers happy.

Signed-off-by: Christian Seiler <christian@iwakd.de>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
doc/lxc.container.conf.sgml.in
src/lxc/cgfs.c

index 7bd2c9e84cd78e05cb26a5be1a50414e81aab5d6..d3e3ef80fb761dc432a2c50997093d35ccb1c9c8 100644 (file)
@@ -811,6 +811,26 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                </para>
              </listitem>
            </itemizedlist>
+           <para>
+             Note that if automatic mounting of the cgroup filesystem
+             is enabled, the tmpfs under
+             <filename>/sys/fs/cgroup</filename> will always be
+             mounted read-write (but for the <option>:mixed</option>
+             and <option>:ro</option> cases, the individual
+             hierarchies,
+             <filename>/sys/fs/cgroup/$hierarchy</filename>, will be
+             read-only). This is in order to work around a quirk in
+             Ubuntu's
+              <citerefentry>
+               <refentrytitle>mountall</refentrytitle>
+                <manvolnum>8</manvolnum>
+              </citerefentry>
+             command that will cause containers to wait for user
+             input at boot if
+             <filename>/sys/fs/cgroup</filename> is mounted read-only
+             and the container can't remount it read-write due to a
+             lack of CAP_SYS_ADMIN.
+           </para>
            <para>
              Examples:
            </para>
index db2a973ce210741c8c6ba18cb950d45ded785d0b..d75037a6065666fab2b30f4456463a6ccd003f8b 100644 (file)
@@ -1442,6 +1442,24 @@ static bool cgroupfs_mount_cgroup(void *hdata, const char *root, int type)
                                goto out_error;
                        }
 
+                       /* for read-only and mixed cases, we have to bind-mount the tmpfs directory
+                        * that points to the hierarchy itself (i.e. /sys/fs/cgroup/cpu etc.) onto
+                        * itself and then bind-mount it read-only, since we keep the tmpfs itself
+                        * read-write (see comment below)
+                        */
+                       if (type == LXC_AUTO_CGROUP_MIXED || type == LXC_AUTO_CGROUP_RO) {
+                               r = mount(abs_path, abs_path, NULL, MS_BIND, NULL);
+                               if (r < 0) {
+                                       SYSERROR("error bind-mounting %s onto itself", abs_path);
+                                       goto out_error;
+                               }
+                               r = mount(NULL, abs_path, NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL);
+                               if (r < 0) {
+                                       SYSERROR("error re-mounting %s readonly", abs_path);
+                                       goto out_error;
+                               }
+                       }
+
                        free(abs_path);
                        abs_path = NULL;
 
@@ -1487,13 +1505,21 @@ static bool cgroupfs_mount_cgroup(void *hdata, const char *root, int type)
                parts = NULL;
        }
 
-       /* try to remount the tmpfs readonly, since the container shouldn't
-        * change anything (this will also make sure that trying to create
-        * new cgroups outside the allowed area fails with an error instead
-        * of simply causing this to create directories in the tmpfs itself)
+       /* We used to remount the entire tmpfs readonly if any :ro or
+        * :mixed mode was specified. However, Ubuntu's mountall has the
+        * unfortunate behavior to block bootup if /sys/fs/cgroup is
+        * mounted read-only and cannot be remounted read-write.
+        * (mountall reads /lib/init/fstab and tries to (re-)mount all of
+        * these if they are not already mounted with the right options;
+        * it contains an entry for /sys/fs/cgroup. In case it can't do
+        * that, it prompts for the user to either manually fix it or
+        * boot anyway. But without user input, booting of the container
+        * hangs.)
+        *
+        * Instead of remounting the entire tmpfs readonly, we only
+        * remount the paths readonly that are part of the cgroup
+        * hierarchy.
         */
-       if (type != LXC_AUTO_CGROUP_RW && type != LXC_AUTO_CGROUP_FULL_RW)
-               mount(NULL, path, NULL, MS_REMOUNT|MS_RDONLY, NULL);
 
        free(path);