From: Luca Boccassi Date: Wed, 9 Mar 2022 02:07:34 +0000 (+0000) Subject: core: support MountAPIVFS and RootDirectory in user manager X-Git-Tag: v251-rc1~178^2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F22685%2Fhead;p=thirdparty%2Fsystemd.git core: support MountAPIVFS and RootDirectory in user manager The only piece missing was to somehow make /proc appear in the new user+mount namespace. It is not possible to mount a new /proc instance, not even with hidepid=invisible,subset=pid, in a user namespace unless a PID namespace is created too (and also at the same time as the other namespaces, it is not possible to mount a new /proc in a child process that creates a PID namespace forked from a parent that created a user+mount namespace, it has to happen at the same time). Use the host's /proc with a bind-mount as a fallback for this case. User session services would already run with it, so nothing is lost. --- diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 38220958b42..3b57f8d2f11 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -143,9 +143,7 @@ Mounting logging sockets into root environment BindReadOnlyPaths=/dev/log /run/systemd/journal/socket /run/systemd/journal/stdout - - - + @@ -276,9 +274,7 @@ In order to allow propagating mounts at runtime in a safe manner, /run/systemd/propagate on the host will be used to set up new mounts, and /run/host/incoming/ in the private namespace - will be used as an intermediate step to store them before being moved to the final mount point. - - + will be used as an intermediate step to store them before being moved to the final mount point. @@ -364,9 +360,7 @@ InaccessiblePaths=, or under /home/ and other protected directories if ProtectHome=yes is specified. TemporaryFileSystem= with :ro or - ProtectHome=tmpfs should be used instead. - - + ProtectHome=tmpfs should be used instead. diff --git a/src/core/namespace.c b/src/core/namespace.c index e74e6ea778c..77dd473a48d 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1128,9 +1128,15 @@ static int mount_procfs(const MountEntry *m, const NamespaceInfo *ns_info) { r = path_is_mount_point(entry_path, NULL, 0); if (r < 0) return log_debug_errno(r, "Unable to determine whether /proc is already mounted: %m"); - if (r == 0) - /* /proc is not mounted. Propagate the original error code. */ - return -EPERM; + if (r == 0) { + /* We lack permissions to mount a new instance of /proc, and it is not already + * mounted. But we can access the host's, so as a final fallback bind-mount it to + * the destination, as most likely we are inside a user manager in an unprivileged + * user namespace. */ + r = mount_nofollow_verbose(LOG_DEBUG, "/proc", entry_path, NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + return -EPERM; + } } else if (r < 0) return r; @@ -2446,6 +2452,17 @@ int setup_namespace( /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root); + if (r == -EINVAL && root_directory) { + /* If we are using root_directory and we don't have privileges (ie: user manager in a user + * namespace) and the root_directory is already a mount point in the parent namespace, + * MS_MOVE will fail as we don't have permission to change it (with EINVAL rather than + * EPERM). Attempt to bind-mount it over itself (like we do above if it's not already a + * mount point) and try again. */ + r = mount_nofollow_verbose(LOG_DEBUG, root, root, NULL, MS_BIND|MS_REC, NULL); + if (r < 0) + goto finish; + r = mount_move_root(root); + } if (r < 0) { log_debug_errno(r, "Failed to mount root with MS_MOVE: %m"); goto finish; diff --git a/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh b/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh index bb8bc186970..dafcdb58fc9 100755 --- a/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh +++ b/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh @@ -3,10 +3,19 @@ set -e TEST_DESCRIPTION="Test PrivateUsers=yes on user manager" +IMAGE_NAME="private-users" # shellcheck source=test/test-functions . "${TEST_BASE_DIR:?}/test-functions" has_user_dbus_socket || exit 0 +command -v mksquashfs >/dev/null 2>&1 || exit 0 + +test_append_files() { + ( + inst_binary unsquashfs + install_verity_minimal + ) +} do_test "$@" diff --git a/test/units/testsuite-43.sh b/test/units/testsuite-43.sh index 3efe4193776..cda1fe1fda4 100755 --- a/test/units/testsuite-43.sh +++ b/test/units/testsuite-43.sh @@ -68,6 +68,24 @@ runas testuser systemd-run --wait --user --unit=test-group-fail \ -P true \ && { echo 'unexpected success'; exit 1; } +# Check that with a new user namespace we can bind mount +# files and use a different root directory +runas testuser systemd-run --wait --user --unit=test-bind-mount \ + -p PrivateUsers=yes -p BindPaths=/dev/null:/etc/os-release \ + test ! -s /etc/os-release + +unsquashfs -no-xattrs -d /tmp/img /usr/share/minimal_0.raw +runas testuser systemd-run --wait --user --unit=test-root-dir \ + -p PrivateUsers=yes -p RootDirectory=/tmp/img \ + grep MARKER=1 /etc/os-release + +mkdir /tmp/img_bind +mount --bind /tmp/img /tmp/img_bind +runas testuser systemd-run --wait --user --unit=test-root-dir-bind \ + -p PrivateUsers=yes -p RootDirectory=/tmp/img_bind \ + grep MARKER=1 /etc/os-release +umount /tmp/img_bind + systemd-analyze log-level info echo OK >/testok