From: Mike Yuan Date: Sat, 29 Mar 2025 20:02:04 +0000 (+0100) Subject: core: delegate mountns implicitly when any of pidns/cgns/netns is in use X-Git-Tag: v258-rc1~952^2~2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=32b69b190b74c0e03416572dffa31b598511e33f;p=thirdparty%2Fsystemd.git core: delegate mountns implicitly when any of pidns/cgns/netns is in use --- diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index fa6b9651017..bf4f223a431 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -2411,6 +2411,11 @@ RestrictNamespaces=~cgroup net done with the namespace specific unit setting such as PrivateNetwork= or PrivateMounts=. + Note that some namespace sandboxing options might entail mount namespace for private API VFS instances, + such as PrivatePIDs=, ProtectControlGroups=private/strict, or + PrivateNetwork=. If any of the mentioned options are enabled, mount namespace + is implicitly delegated. + diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index c926a808e0e..d9878e60886 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -4275,7 +4275,17 @@ static bool exec_namespace_is_delegated( if (context->delegate_namespaces == NAMESPACE_FLAGS_INITIAL) return params->runtime_scope == RUNTIME_SCOPE_USER; - return FLAGS_SET(context->delegate_namespaces, namespace); + if (FLAGS_SET(context->delegate_namespaces, namespace)) + return true; + + /* Various namespaces imply mountns for private procfs/sysfs/cgroupfs instances, which means when + * those are delegated mountns must be deferred too. + * + * The list should stay in sync with exec_needs_mount_namespace(). */ + if (namespace == CLONE_NEWNS) + return context->delegate_namespaces & (CLONE_NEWPID|CLONE_NEWCGROUP|CLONE_NEWNET); + + return false; } static int setup_delegated_namespaces( diff --git a/test/units/TEST-07-PID1.delegate-namespaces.sh b/test/units/TEST-07-PID1.delegate-namespaces.sh index 210635ebbc5..6d8d51caffe 100755 --- a/test/units/TEST-07-PID1.delegate-namespaces.sh +++ b/test/units/TEST-07-PID1.delegate-namespaces.sh @@ -40,7 +40,7 @@ testcase_pid() { # so we can't write to it when running in a container. if ! systemd-detect-virt --container; then (! systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid') - systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes -p DelegateNamespaces="mnt pid" --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid' + systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes -p DelegateNamespaces=pid --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid' fi }