From: Alan Jenkins Date: Wed, 29 Aug 2018 23:32:54 +0000 (+0100) Subject: namespace: don't try to remount superblocks X-Git-Tag: v240~778^2~3 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=69338c3dfb13d5f5e2d8b7f66f785daab8cbe190;p=thirdparty%2Fsystemd.git namespace: don't try to remount superblocks We can't remount the underlying superblocks, if we are inside a user namespace and running Linux <= 4.17. We can only change the per-mount flags (MS_REMOUNT | MS_BIND). This type of mount() call can only change the per-mount flags, so we don't have to worry about passing the right string options now. Fixes #9914 ("Since 1beab8b was merged, systemd has been failing to start systemd-resolved inside unprivileged containers" ... "Failed to re-mount '/run/systemd/unit-root/dev' read-only: Operation not permitted"). > It's basically my fault :-). I pointed out we could remount read-only > without MS_BIND when reviewing the PR that added TemporaryFilesystem=, > and poettering suggested to change PrivateDevices= at the same time. > I think it's safe to change back, and I don't expect anyone will notice > a difference in behaviour. > > It just surprised me to realize that > `TemporaryFilesystem=/tmp:size=10M,ro,nosuid` would not apply `ro` to the > superblock (underlying filesystem), like mount -osize=10M,ro,nosuid does. > Maybe a comment could note the kernel version (v4.18), that lets you > remount without MS_BIND inside a user namespace. This makes the code longer and I guess this function is still ugly, sorry. One obstacle to cleaning it up is the interaction between `PrivateDevices=yes` and `ReadOnlyPaths=/dev`. I've added a test for the existing behaviour, which I think is now the correct behaviour. --- diff --git a/src/core/namespace.c b/src/core/namespace.c index c164bc5793a..c3bbb406800 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1026,6 +1026,15 @@ static int apply_mount( return 0; } +/* Change the per-mount readonly flag on an existing mount */ +static int remount_bind_readonly(const char *path, unsigned long orig_flags) { + int r; + + r = mount(NULL, path, NULL, MS_REMOUNT | MS_BIND | MS_RDONLY | orig_flags, NULL); + + return r < 0 ? -errno : 0; +} + static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self_mountinfo) { bool submounts = false; int r = 0; @@ -1035,17 +1044,15 @@ static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self if (mount_entry_read_only(m)) { if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) { - /* Make superblock readonly */ - if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT | MS_RDONLY | m->flags, mount_entry_options(m)) < 0) - r = -errno; + r = remount_bind_readonly(mount_entry_path(m), m->flags); } else { submounts = true; r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo); } } else if (m->mode == PRIVATE_DEV) { - /* Superblock can be readonly but the submounts can't */ - if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0) - r = -errno; + /* Set /dev readonly, but not submounts like /dev/shm. Also, we only set the per-mount read-only flag. + * We can't set it on the superblock, if we are inside a user namespace and running Linux <= 4.17. */ + r = remount_bind_readonly(mount_entry_path(m), DEV_MOUNT_OPTIONS); } else return 0; diff --git a/test/test-execute/exec-readonlypaths.service b/test/test-execute/exec-readonlypaths.service index 6866fdc7002..a0ca68f67da 100644 --- a/test/test-execute/exec-readonlypaths.service +++ b/test/test-execute/exec-readonlypaths.service @@ -2,6 +2,8 @@ Description=Test for ReadOnlyPaths= [Service] -ReadOnlyPaths=/etc -/i-dont-exist /usr -ExecStart=/bin/sh -x -c 'test ! -w /etc && test ! -w /usr && test ! -e /i-dont-exist && test -w /var' +ReadOnlyPaths=/usr /etc /sys /dev -/i-dont-exist +PrivateDevices=yes +ExecStart=/bin/sh -x -c 'test ! -w /usr && test ! -w /etc && test ! -w /sys && test ! -w /sys/fs/cgroup' +ExecStart=/bin/sh -x -c 'test ! -w /dev && test ! -w /dev/shm && test ! -e /i-dont-exist && test -w /var' Type=oneshot diff --git a/test/test-execute/exec-temporaryfilesystem-options.service b/test/test-execute/exec-temporaryfilesystem-options.service index b7a5baf93ad..371e5674b1c 100644 --- a/test/test-execute/exec-temporaryfilesystem-options.service +++ b/test/test-execute/exec-temporaryfilesystem-options.service @@ -5,11 +5,10 @@ Description=Test for TemporaryFileSystem with mount options Type=oneshot # The mount options default to "mode=0755,nodev,strictatime". -# Let's override some of them, and test the behaviour of "ro". +# Let's override some of them, and test "ro". TemporaryFileSystem=/var:ro,mode=0700,nostrictatime # Check /proc/self/mountinfo -ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$11 !~ /(^|,)ro(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""' ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$11 !~ /(^|,)mode=700(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""' ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$6 !~ /(^|,)ro(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""'