We can't remount the underlying superblocks, if we are inside a user
namespace and running Linux <= 4.17. We can only change the per-mount
flags (MS_REMOUNT | MS_BIND).
This type of mount() call can only change the per-mount flags, so we
don't have to worry about passing the right string options now.
Fixes #9914 ("Since
1beab8b was merged, systemd has been failing to start
systemd-resolved inside unprivileged containers" ... "Failed to re-mount
'/run/systemd/unit-root/dev' read-only: Operation not permitted").
> It's basically my fault :-). I pointed out we could remount read-only
> without MS_BIND when reviewing the PR that added TemporaryFilesystem=,
> and poettering suggested to change PrivateDevices= at the same time.
> I think it's safe to change back, and I don't expect anyone will notice
> a difference in behaviour.
>
> It just surprised me to realize that
> `TemporaryFilesystem=/tmp:size=10M,ro,nosuid` would not apply `ro` to the
> superblock (underlying filesystem), like mount -osize=10M,ro,nosuid does.
> Maybe a comment could note the kernel version (v4.18), that lets you
> remount without MS_BIND inside a user namespace.
This makes the code longer and I guess this function is still ugly, sorry.
One obstacle to cleaning it up is the interaction between
`PrivateDevices=yes` and `ReadOnlyPaths=/dev`. I've added a test for the
existing behaviour, which I think is now the correct behaviour.
return 0;
}
+/* Change the per-mount readonly flag on an existing mount */
+static int remount_bind_readonly(const char *path, unsigned long orig_flags) {
+ int r;
+
+ r = mount(NULL, path, NULL, MS_REMOUNT | MS_BIND | MS_RDONLY | orig_flags, NULL);
+
+ return r < 0 ? -errno : 0;
+}
+
static int make_read_only(const MountEntry *m, char **blacklist, FILE *proc_self_mountinfo) {
bool submounts = false;
int r = 0;
if (mount_entry_read_only(m)) {
if (IN_SET(m->mode, EMPTY_DIR, TMPFS)) {
- /* Make superblock readonly */
- if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT | MS_RDONLY | m->flags, mount_entry_options(m)) < 0)
- r = -errno;
+ r = remount_bind_readonly(mount_entry_path(m), m->flags);
} else {
submounts = true;
r = bind_remount_recursive_with_mountinfo(mount_entry_path(m), true, blacklist, proc_self_mountinfo);
}
} else if (m->mode == PRIVATE_DEV) {
- /* Superblock can be readonly but the submounts can't */
- if (mount(NULL, mount_entry_path(m), NULL, MS_REMOUNT|DEV_MOUNT_OPTIONS|MS_RDONLY, NULL) < 0)
- r = -errno;
+ /* Set /dev readonly, but not submounts like /dev/shm. Also, we only set the per-mount read-only flag.
+ * We can't set it on the superblock, if we are inside a user namespace and running Linux <= 4.17. */
+ r = remount_bind_readonly(mount_entry_path(m), DEV_MOUNT_OPTIONS);
} else
return 0;
Description=Test for ReadOnlyPaths=
[Service]
-ReadOnlyPaths=/etc -/i-dont-exist /usr
-ExecStart=/bin/sh -x -c 'test ! -w /etc && test ! -w /usr && test ! -e /i-dont-exist && test -w /var'
+ReadOnlyPaths=/usr /etc /sys /dev -/i-dont-exist
+PrivateDevices=yes
+ExecStart=/bin/sh -x -c 'test ! -w /usr && test ! -w /etc && test ! -w /sys && test ! -w /sys/fs/cgroup'
+ExecStart=/bin/sh -x -c 'test ! -w /dev && test ! -w /dev/shm && test ! -e /i-dont-exist && test -w /var'
Type=oneshot
Type=oneshot
# The mount options default to "mode=0755,nodev,strictatime".
-# Let's override some of them, and test the behaviour of "ro".
+# Let's override some of them, and test "ro".
TemporaryFileSystem=/var:ro,mode=0700,nostrictatime
# Check /proc/self/mountinfo
-ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$11 !~ /(^|,)ro(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""'
ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$11 !~ /(^|,)mode=700(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""'
ExecStart=/bin/sh -x -c 'test "$$(awk \'$$5 == "/var" && $$6 !~ /(^|,)ro(,|$$)/ { print $$6 }\' /proc/self/mountinfo)" = ""'