From: Luca Boccassi Date: Thu, 26 Oct 2023 13:56:58 +0000 (+0100) Subject: core: do not post-process skipped mounts X-Git-Tag: v255-rc1~125 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=63862de4b7643ac8703b9ecac59c08b0b2ba0a8e;p=thirdparty%2Fsystemd.git core: do not post-process skipped mounts When a mount is gracefully skipped (e.g.: BindReadOnlyPaths=-/nonexistent) we still post-process it, like making it read-only. Except if nothing has been mounted, the mount point will be made read-only for no reason. Track when mounts are skipped and avoid post-processing. One day we'll switch all of this to the new mount api and do these operations atomically or not at all. Fixes https://github.com/systemd/systemd/issues/29725 --- diff --git a/src/core/namespace.c b/src/core/namespace.c index 9ed253258df..2ed2a0967da 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -78,6 +78,14 @@ typedef enum MountMode { _MOUNT_MODE_MAX, } MountMode; +typedef enum MountEntryState { + MOUNT_PENDING, + MOUNT_APPLIED, + MOUNT_SKIPPED, + _MOUNT_ENTRY_STATE_MAX, + _MOUNT_ENTRY_STATE_INVALID = -EINVAL, +} MountEntryState; + typedef struct MountEntry { const char *path_const; /* Memory allocated on stack or static */ MountMode mode:5; @@ -87,7 +95,7 @@ typedef struct MountEntry { bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */ bool noexec:1; /* Shall set MS_NOEXEC on the mount itself */ bool exec:1; /* Shall clear MS_NOEXEC on the mount itself */ - bool applied:1; /* Already applied */ + MountEntryState state; /* Whether it was already processed or skipped */ char *path_malloc; /* Use this instead of 'path_const' if we had to allocate memory */ const char *unprefixed_path_const; /* If the path was amended with a prefix, these will save the original */ char *unprefixed_path_malloc; @@ -775,7 +783,7 @@ static void drop_duplicates(MountList *ml) { * above. Note that we only drop duplicates that haven't been mounted yet. */ if (previous && path_equal(mount_entry_path(f), mount_entry_path(previous)) && - !f->applied && !previous->applied) { + f->state == MOUNT_PENDING && previous->state == MOUNT_PENDING) { log_debug("%s (%s) is duplicate.", mount_entry_path(f), mount_mode_to_string(f->mode)); /* Propagate the flags to the remaining entry */ previous->read_only = previous->read_only || mount_entry_read_only(f); @@ -1101,7 +1109,7 @@ static int mount_private_dev(MountEntry *m, RuntimeScope scope) { (void) rmdir(dev); (void) rmdir(temporary_mount); - return 0; + return 1; fail: if (devpts) @@ -1139,7 +1147,11 @@ static int mount_bind_dev(const MountEntry *m) { if (r > 0) /* make this a NOP if /dev is already a mount point */ return 0; - return mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL); + r = mount_nofollow_verbose(LOG_DEBUG, "/dev", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return r; + + return 1; } static int mount_bind_sysfs(const MountEntry *m) { @@ -1156,7 +1168,11 @@ static int mount_bind_sysfs(const MountEntry *m) { return 0; /* Bind mount the host's version so that we get all child mounts of it, too. */ - return mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL); + r = mount_nofollow_verbose(LOG_DEBUG, "/sys", mount_entry_path(m), NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return r; + + return 1; } static int mount_private_apivfs( @@ -1201,7 +1217,11 @@ static int mount_private_apivfs( /* We lack permissions to mount a new instance, and it is not already mounted. But we can * access the host's, so as a final fallback bind-mount it to the destination, as most likely * we are inside a user manager in an unprivileged user namespace. */ - return mount_nofollow_verbose(LOG_DEBUG, bind_source, entry_path, /* fstype = */ NULL, MS_BIND|MS_REC, /* opts = */ NULL); + r = mount_nofollow_verbose(LOG_DEBUG, bind_source, entry_path, /* fstype = */ NULL, MS_BIND|MS_REC, /* opts = */ NULL); + if (r < 0) + return r; + + return 1; } else if (r < 0) return r; @@ -1219,7 +1239,7 @@ static int mount_private_apivfs( /* We mounted a new instance now. Let's bind mount the children over now. This matters for nspawn * where a bunch of files are overmounted, in particular the boot id. */ (void) bind_mount_submounts(bind_source, entry_path); - return 0; + return 1; } static int mount_private_sysfs(const MountEntry *m, const NamespaceParameters *p) { @@ -1295,7 +1315,7 @@ static int mount_tmpfs(const MountEntry *m) { if (r < 0) return log_debug_errno(r, "Failed to fix label of '%s' as '%s': %m", entry_path, inner_path); - return 0; + return 1; } static int mount_run(const MountEntry *m) { @@ -1327,7 +1347,7 @@ static int mount_mqueuefs(const MountEntry *m) { if (r < 0) return r; - return 0; + return 1; } static int mount_image( @@ -1388,7 +1408,7 @@ static int mount_image( if (r < 0) return log_debug_errno(r, "Failed to mount image %s on %s: %m", mount_entry_source(m), mount_entry_path(m)); - return 0; + return 1; } static int mount_overlay(const MountEntry *m) { @@ -1404,8 +1424,10 @@ static int mount_overlay(const MountEntry *m) { r = mount_nofollow_verbose(LOG_DEBUG, "overlay", mount_entry_path(m), "overlay", MS_RDONLY, options); if (r == -ENOENT && m->ignore) return 0; + if (r < 0) + return r; - return r; + return 1; } static int follow_symlink( @@ -1451,6 +1473,10 @@ static int apply_one_mount( const char *what; int r; + /* Return 1 when the mount should be post-processed (remounted r/o, etc.), 0 otherwise. In most + * cases post-processing is the right thing, the typical exception is when the mount is gracefully + * skipped. */ + assert(m); assert(p); @@ -1505,7 +1531,7 @@ static int apply_one_mount( mount_entry_path(m)); if (r > 0) /* Nothing to do here, it is already a mount. We just later toggle the MS_RDONLY * and MS_NOEXEC bits for the mount point if needed. */ - return 0; + return 1; /* This isn't a mount point yet, let's make it one. */ what = mount_entry_path(m); break; @@ -1665,7 +1691,7 @@ static int apply_one_mount( } log_debug("Successfully mounted %s to %s", what, mount_entry_path(m)); - return 0; + return 1; } static int make_read_only(const MountEntry *m, char **deny_list, FILE *proc_self_mountinfo) { @@ -1676,6 +1702,9 @@ static int make_read_only(const MountEntry *m, char **deny_list, FILE *proc_self assert(m); assert(proc_self_mountinfo); + if (m->state != MOUNT_APPLIED) + return 0; + if (mount_entry_read_only(m) || m->mode == PRIVATE_DEV) { new_flags |= MS_RDONLY; flags_mask |= MS_RDONLY; @@ -1721,6 +1750,9 @@ static int make_noexec(const MountEntry *m, char **deny_list, FILE *proc_self_mo assert(m); assert(proc_self_mountinfo); + if (m->state != MOUNT_APPLIED) + return 0; + if (mount_entry_noexec(m)) { new_flags |= MS_NOEXEC; flags_mask |= MS_NOEXEC; @@ -1754,6 +1786,9 @@ static int make_nosuid(const MountEntry *m, FILE *proc_self_mountinfo) { assert(m); assert(proc_self_mountinfo); + if (m->state != MOUNT_APPLIED) + return 0; + submounts = !IN_SET(m->mode, EMPTY_DIR, TMPFS); if (submounts) @@ -1903,7 +1938,7 @@ static int apply_mounts( FOREACH_ARRAY(m, ml->mounts, ml->n_mounts) { - if (m->applied) + if (m->state != MOUNT_PENDING) continue; /* ExtensionImages/Directories are first opened in the propagate directory, not in the root_directory */ @@ -1921,13 +1956,13 @@ static int apply_mounts( break; } + /* Returns 1 if the mount should be post-processed, 0 otherwise */ r = apply_one_mount(root, m, p); if (r < 0) { mount_entry_path_debug_string(root, m, error_path); return r; } - - m->applied = true; + m->state = r == 0 ? MOUNT_SKIPPED : MOUNT_APPLIED; } if (!again) diff --git a/test/units/testsuite-07.exec-context.sh b/test/units/testsuite-07.exec-context.sh index de9edd7640b..c6a0f218284 100755 --- a/test/units/testsuite-07.exec-context.sh +++ b/test/units/testsuite-07.exec-context.sh @@ -75,3 +75,8 @@ if ! systemd-detect-virt -cq; then systemd-run --wait --pipe -p ProtectKernelLogs=no -p User=testuser \ bash -xec "test -r /dev/kmsg" fi + +systemd-run --wait --pipe -p BindPaths="/etc /home:/mnt:norbind -/foo/bar/baz:/usr:rbind" \ + bash -xec "mountpoint /etc; test -d /etc/systemd; mountpoint /mnt; ! mountpoint /usr" +systemd-run --wait --pipe -p BindReadOnlyPaths="/etc /home:/mnt:norbind -/foo/bar/baz:/usr:rbind" \ + bash -xec "test ! -w /etc; test ! -w /mnt; ! mountpoint /usr"