]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: delegate mountns implicitly when any of pidns/cgns/netns is in use
authorMike Yuan <me@yhndnzj.com>
Sat, 29 Mar 2025 20:02:04 +0000 (21:02 +0100)
committerMike Yuan <me@yhndnzj.com>
Sun, 30 Mar 2025 16:57:18 +0000 (18:57 +0200)
man/systemd.exec.xml
src/core/exec-invoke.c
test/units/TEST-07-PID1.delegate-namespaces.sh

index fa6b9651017516b2a90628d160693708f260f8f4..bf4f223a431c0553da266b4a1dac147ee5162be4 100644 (file)
@@ -2411,6 +2411,11 @@ RestrictNamespaces=~cgroup net</programlisting>
         done with the namespace specific unit setting such as <varname>PrivateNetwork=</varname> or
         <varname>PrivateMounts=</varname>.</para>
 
+        <para>Note that some namespace sandboxing options might entail mount namespace for private API VFS instances,
+        such as <varname>PrivatePIDs=</varname>, <varname>ProtectControlGroups=private/strict</varname>, or
+        <varname>PrivateNetwork=</varname>. If any of the mentioned options are enabled, mount namespace
+        is implicitly delegated.</para>
+
         <xi:include href="version-info.xml" xpointer="v258"/></listitem>
       </varlistentry>
 
index c926a808e0e5deac78742aa139d64deb3c6c9bc7..d9878e608867216c0c8af2c94d4019d225599d34 100644 (file)
@@ -4275,7 +4275,17 @@ static bool exec_namespace_is_delegated(
         if (context->delegate_namespaces == NAMESPACE_FLAGS_INITIAL)
                 return params->runtime_scope == RUNTIME_SCOPE_USER;
 
-        return FLAGS_SET(context->delegate_namespaces, namespace);
+        if (FLAGS_SET(context->delegate_namespaces, namespace))
+                return true;
+
+        /* Various namespaces imply mountns for private procfs/sysfs/cgroupfs instances, which means when
+         * those are delegated mountns must be deferred too.
+         *
+         * The list should stay in sync with exec_needs_mount_namespace(). */
+        if (namespace == CLONE_NEWNS)
+                return context->delegate_namespaces & (CLONE_NEWPID|CLONE_NEWCGROUP|CLONE_NEWNET);
+
+        return false;
 }
 
 static int setup_delegated_namespaces(
index 210635ebbc566d1b17befebc7004c69361df2536..6d8d51caffeb8cb3c7e803c74a6aeb4863be9e1f 100755 (executable)
@@ -40,7 +40,7 @@ testcase_pid() {
     # so we can't write to it when running in a container.
     if ! systemd-detect-virt --container; then
         (! systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid')
-        systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes -p DelegateNamespaces="mnt pid" --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid'
+        systemd-run -p PrivateUsersEx=self -p PrivatePIDs=yes -p MountAPIVFS=yes -p DelegateNamespaces=pid --wait --pipe -- sh -c 'echo 5 >/proc/sys/kernel/ns_last_pid'
     fi
 }