]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: Add UserNamespacePath=
authorQuentin Deslandes <qde@naccy.de>
Fri, 17 Oct 2025 13:37:02 +0000 (15:37 +0200)
committerLennart Poettering <lennart@poettering.net>
Tue, 4 Nov 2025 09:55:04 +0000 (10:55 +0100)
This allows a service to reuse the user namespace created for an
existing service, similarly to NetworkNamespacePath=. The configuration
is the initial user namespace (e.g. ID mapping) is preserved.

16 files changed:
man/org.freedesktop.systemd1.xml
man/systemd.exec.xml
src/core/dbus-execute.c
src/core/exec-invoke.c
src/core/execute-serialize.c
src/core/execute.c
src/core/execute.h
src/core/executor.c
src/core/fuzz-execute-serialize.c
src/core/load-fragment-gperf.gperf.in
src/core/socket.c
src/core/varlink-execute.c
src/shared/bus-unit-util.c
src/shared/varlink-io.systemd.Unit.c
test/fuzz/fuzz-unit-file/directives-all.service
test/units/TEST-07-PID1.user-namespace-path.sh [new file with mode: 0755]

index d4dec82c70e92f648abb6fe3a4e538bcc1fd5d40..cb56d664c44dbf10f92227b95efc62f92d448341 100644 (file)
@@ -3408,6 +3408,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MemoryKSM = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s UserNamespacePath = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s NetworkNamespacePath = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s IPCNamespacePath = '...';
@@ -4019,6 +4021,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <!--property MemoryKSM is not documented!-->
 
+    <!--property UserNamespacePath is not documented!-->
+
     <!--property NetworkNamespacePath is not documented!-->
 
     <!--property IPCNamespacePath is not documented!-->
@@ -4759,6 +4763,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="UserNamespacePath"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
@@ -5655,6 +5661,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MemoryKSM = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s UserNamespacePath = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s NetworkNamespacePath = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s IPCNamespacePath = '...';
@@ -6286,6 +6294,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <!--property MemoryKSM is not documented!-->
 
+    <!--property UserNamespacePath is not documented!-->
+
     <!--property NetworkNamespacePath is not documented!-->
 
     <!--property IPCNamespacePath is not documented!-->
@@ -7006,6 +7016,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="UserNamespacePath"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
@@ -7726,6 +7738,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MemoryKSM = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s UserNamespacePath = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s NetworkNamespacePath = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s IPCNamespacePath = '...';
@@ -8279,6 +8293,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
 
     <!--property MemoryKSM is not documented!-->
 
+    <!--property UserNamespacePath is not documented!-->
+
     <!--property NetworkNamespacePath is not documented!-->
 
     <!--property IPCNamespacePath is not documented!-->
@@ -8907,6 +8923,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="UserNamespacePath"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
@@ -9760,6 +9778,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MemoryKSM = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly s UserNamespacePath = '...';
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s NetworkNamespacePath = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s IPCNamespacePath = '...';
@@ -10295,6 +10315,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
 
     <!--property MemoryKSM is not documented!-->
 
+    <!--property UserNamespacePath is not documented!-->
+
     <!--property NetworkNamespacePath is not documented!-->
 
     <!--property IPCNamespacePath is not documented!-->
@@ -10905,6 +10927,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MemoryKSM"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="UserNamespacePath"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="NetworkNamespacePath"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="IPCNamespacePath"/>
@@ -12519,7 +12543,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>LogsDirectoryQuotaUsage</varname>,
       <varname>LogsDirectoryAccounting</varname>, and
       <function>KillSubgroup()</function> were added in version 258.</para>
-      <para><varname>OOMKills</varname>, and
+      <para><varname>UserNamespacePath</varname>,
+      <varname>OOMKills</varname>, and
       <varname>ManagedOOMKills</varname> were added in 259.</para>
     </refsect2>
     <refsect2>
@@ -12587,6 +12612,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>LogsDirectoryAccounting</varname>, and
       <function>KillSubgroup()</function> were added in version 258.</para>
       <para><varname>OOMKills</varname>, and
+      <varname>UserNamespacePath</varname>, and
       <varname>ManagedOOMKills</varname> were added in 259.</para>
     </refsect2>
     <refsect2>
@@ -12648,7 +12674,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>LogsDirectoryQuotaUsage</varname>,
       <varname>LogsDirectoryAccounting</varname>, and
       <function>KillSubgroup()</function> were added in version 258.</para>
-      <para><varname>OOMKills</varname>, and
+      <para><varname>UserNamespacePath</varname>,
+      <varname>OOMKills</varname>, and
       <varname>ManagedOOMKills</varname> were added in 259.</para>
     </refsect2>
     <refsect2>
@@ -12708,7 +12735,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>LogsDirectoryQuotaUsage</varname>,
       <varname>LogsDirectoryAccounting</varname>, and
       <function>KillSubgroup()</function> were added in version 258.</para>
-      <para><varname>OOMKills</varname>, and
+      <para><varname>UserNamespacePath</varname>,
+      <varname>OOMKills</varname>, and
       <varname>ManagedOOMKills</varname> were added in 259.</para>
     </refsect2>
     <refsect2>
index 67eba1831a98e0a49620dd304f60c6e9e0b35468..c204873c39fdedf97e3425402db251f591fda407 100644 (file)
@@ -2026,6 +2026,20 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
         <xi:include href="system-or-user-ns.xml" xpointer="singular"/></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>UserNamespacePath=</varname></term>
+
+        <listitem><para>Takes an absolute file system path referring to a Linux user namespace
+        pseudo-file (i.e. a file like <filename>/proc/$PID/ns/user</filename> or a bind mount or symlink to
+        one). When set the invoked processes are added to the user namespace referenced by that path. The
+        path has to point to a valid namespace file at the moment the processes are forked off. If this
+        option is used <varname>PrivateUsers=</varname> has no effect.</para>
+
+        <para>This option is only available for system services.</para>
+
+        <xi:include href="version-info.xml" xpointer="v259"/></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>NetworkNamespacePath=</varname></term>
 
index 9b7eb5d76b59b03f3ef66fabed8e373a00199af1..7c5e70d40776ffd18f9aaf1e5b5cfd96b125401c 100644 (file)
@@ -1407,6 +1407,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("BPFDelegatePrograms", "s", property_get_bpf_delegate_programs, offsetof(ExecContext, bpf_delegate_programs), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("BPFDelegateAttachments", "s", property_get_bpf_delegate_attachments, offsetof(ExecContext, bpf_delegate_attachments), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("MemoryKSM", "b", bus_property_get_tristate, offsetof(ExecContext, memory_ksm), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("UserNamespacePath", "s", NULL, offsetof(ExecContext, user_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("IPCNamespacePath", "s", NULL, offsetof(ExecContext, ipc_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("RootImagePolicy", "s", property_get_image_policy, offsetof(ExecContext, root_image_policy), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -2500,6 +2501,9 @@ int bus_exec_context_set_transient_property(
         if (streq(name, "NetworkNamespacePath"))
                 return bus_set_transient_path(u, name, &c->network_namespace_path, message, flags, error);
 
+        if (streq(name, "UserNamespacePath"))
+                return bus_set_transient_path(u, name, &c->user_namespace_path, message, flags, error);
+
         if (streq(name, "IPCNamespacePath"))
                 return bus_set_transient_path(u, name, &c->ipc_namespace_path, message, flags, error);
 
index 9d331850667138923d088c639857a312d74d4ae1..aa3b2441953193a00bd0700d1d8f64d982105809 100644 (file)
@@ -4159,6 +4159,7 @@ static int close_remaining_fds(
         append_socket_pair(dont_close, &n_dont_close, runtime->ephemeral_storage_socket);
 
         if (runtime->shared) {
+                append_socket_pair(dont_close, &n_dont_close, runtime->shared->userns_storage_socket);
                 append_socket_pair(dont_close, &n_dont_close, runtime->shared->netns_storage_socket);
                 append_socket_pair(dont_close, &n_dont_close, runtime->shared->ipcns_storage_socket);
         }
@@ -4464,6 +4465,7 @@ static bool exec_needs_cap_sys_admin(const ExecContext *context, const ExecParam
                context->private_tmp != PRIVATE_TMP_NO ||
                context->private_devices ||
                context->private_network ||
+               context->user_namespace_path ||
                context->network_namespace_path ||
                context->private_ipc ||
                context->ipc_namespace_path ||
@@ -4768,6 +4770,7 @@ static void exec_shared_runtime_close(ExecSharedRuntime *shared) {
         if (!shared)
                 return;
 
+        safe_close_pair(shared->userns_storage_socket);
         safe_close_pair(shared->netns_storage_socket);
         safe_close_pair(shared->ipcns_storage_socket);
 }
@@ -5322,6 +5325,14 @@ int exec_invoke(
                 }
         }
 
+        if (context->user_namespace_path && runtime->shared && runtime->shared->userns_storage_socket[0] >= 0) {
+                r = open_shareable_ns_path(runtime->shared->userns_storage_socket, context->user_namespace_path, CLONE_NEWUSER);
+                if (r < 0) {
+                        *exit_status = EXIT_NAMESPACE;
+                        return log_error_errno(r, "Failed to open user namespace path %s: %m", context->user_namespace_path);
+                }
+        }
+
         if (context->network_namespace_path && runtime->shared && runtime->shared->netns_storage_socket[0] >= 0) {
                 r = open_shareable_ns_path(runtime->shared->netns_storage_socket, context->network_namespace_path, CLONE_NEWNET);
                 if (r < 0) {
@@ -5758,6 +5769,10 @@ int exec_invoke(
                 /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
                  * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
                  * set up all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
+
+                if (context->user_namespace_path && runtime->shared->userns_storage_socket[0] >= 0)
+                        return log_error_errno(SYNTHETIC_ERRNO(EPERM), "UserNamespacePath= is configured, but user namespace setup not permitted");
+
                 PrivateUsers pu = exec_context_get_effective_private_users(context, params);
                 if (pu == PRIVATE_USERS_NO)
                         pu = PRIVATE_USERS_SELF;
@@ -5828,8 +5843,20 @@ int exec_invoke(
          * restricted by rules pertaining to combining user namespaces with other namespaces (e.g. in the
          * case of mount namespaces being less privileged when the mount point list is copied from a
          * different user namespace). */
+        if (needs_sandboxing && context->user_namespace_path && runtime->shared && runtime->shared->userns_storage_socket[0] >= 0) {
+                if (!namespace_type_supported(NAMESPACE_USER))
+                        return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "UserNamespacePath= is not supported, refusing.");
+
+                r = setup_shareable_ns(runtime->shared->userns_storage_socket, CLONE_NEWUSER);
+                if (ERRNO_IS_NEG_PRIVILEGE(r))
+                        return log_notice_errno(r, "PrivateUsers= is configured, but user namespace setup not permitted, refusing.");
+                if (r < 0) {
+                        *exit_status = EXIT_USER;
+                        return log_error_errno(r, "Failed to set up user namespacing: %m");
+                }
 
-        if (needs_sandboxing && !userns_set_up) {
+                log_debug("Set up existing user namespace");
+        } else if (needs_sandboxing && !userns_set_up) {
                 PrivateUsers pu = exec_context_get_effective_private_users(context, params);
 
                 r = setup_private_users(pu, saved_uid, saved_gid, uid, gid,
index 575d34ff24cf3c2c10b5f7761b666b562ef24209..d046a8a6945d81bac6ed59d37db410e479a2c08c 100644 (file)
@@ -938,6 +938,12 @@ static int exec_runtime_serialize(const ExecRuntime *rt, FILE *f, FDSet *fds) {
                 if (r < 0)
                         return r;
 
+                if (rt->shared->userns_storage_socket[0] >= 0 && rt->shared->userns_storage_socket[1] >= 0) {
+                        r = serialize_fd_many(f, fds, "exec-runtime-userns-storage-socket", rt->shared->userns_storage_socket, 2);
+                        if (r < 0)
+                                return r;
+                }
+
                 if (rt->shared->netns_storage_socket[0] >= 0 && rt->shared->netns_storage_socket[1] >= 0) {
                         r = serialize_fd_many(f, fds, "exec-runtime-netns-storage-socket", rt->shared->netns_storage_socket, 2);
                         if (r < 0)
@@ -1013,6 +1019,12 @@ static int exec_runtime_deserialize(ExecRuntime *rt, FILE *f, FDSet *fds) {
                         r = free_and_strdup(&rt->shared->var_tmp_dir, val);
                         if (r < 0)
                                 return r;
+                } else if ((val = startswith(l, "exec-runtime-userns-storage-socket="))) {
+
+                        r = deserialize_fd_many(fds, val, 2, rt->shared->userns_storage_socket);
+                        if (r < 0)
+                                continue;
+
                 } else if ((val = startswith(l, "exec-runtime-netns-storage-socket="))) {
 
                         r = deserialize_fd_many(fds, val, 2, rt->shared->netns_storage_socket);
@@ -2362,6 +2374,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
         if (r < 0)
                 return r;
 
+        r = serialize_item(f, "exec-context-user-namespace-path", c->user_namespace_path);
+        if (r < 0)
+                return r;
+
         r = serialize_item(f, "exec-context-network-namespace-path", c->network_namespace_path);
         if (r < 0)
                 return r;
@@ -3493,6 +3509,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         r = free_and_strdup(&c->network_namespace_path, val);
                         if (r < 0)
                                 return r;
+                } else if ((val = startswith(l, "exec-context-user-namespace-path="))) {
+                        r = free_and_strdup(&c->user_namespace_path, val);
+                        if (r < 0)
+                                return r;
                 } else if ((val = startswith(l, "exec-context-ipc-namespace-path="))) {
                         r = free_and_strdup(&c->ipc_namespace_path, val);
                         if (r < 0)
index abf67a4ed4a8846a809d2bf32dc4ad7bc2996029..f93c21124cff58d8d68a04b364b4b5b62cc658a7 100644 (file)
@@ -742,6 +742,7 @@ void exec_context_done(ExecContext *c) {
         c->stdin_data = mfree(c->stdin_data);
         c->stdin_data_size = 0;
 
+        c->user_namespace_path = mfree(c->user_namespace_path);
         c->network_namespace_path = mfree(c->network_namespace_path);
         c->ipc_namespace_path = mfree(c->ipc_namespace_path);
 
@@ -1554,6 +1555,11 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
         }
 #endif
 
+        if (c->user_namespace_path)
+                fprintf(f,
+                        "%sUserNamespacePath: %s\n",
+                        prefix, c->user_namespace_path);
+
         if (c->network_namespace_path)
                 fprintf(f,
                         "%sNetworkNamespacePath: %s\n",
@@ -2286,6 +2292,7 @@ void exec_shared_runtime_done(ExecSharedRuntime *rt) {
         rt->id = mfree(rt->id);
         rt->tmp_dir = mfree(rt->tmp_dir);
         rt->var_tmp_dir = mfree(rt->var_tmp_dir);
+        safe_close_pair(rt->userns_storage_socket);
         safe_close_pair(rt->netns_storage_socket);
         safe_close_pair(rt->ipcns_storage_socket);
 }
@@ -2333,6 +2340,7 @@ static int exec_shared_runtime_allocate(ExecSharedRuntime **ret, const char *id)
 
         *n = (ExecSharedRuntime) {
                 .id = TAKE_PTR(id_copy),
+                .userns_storage_socket = EBADF_PAIR,
                 .netns_storage_socket = EBADF_PAIR,
                 .ipcns_storage_socket = EBADF_PAIR,
         };
@@ -2346,6 +2354,7 @@ static int exec_shared_runtime_add(
                 const char *id,
                 char **tmp_dir,
                 char **var_tmp_dir,
+                int userns_storage_socket[2],
                 int netns_storage_socket[2],
                 int ipcns_storage_socket[2],
                 ExecSharedRuntime **ret) {
@@ -2370,6 +2379,11 @@ static int exec_shared_runtime_add(
         rt->tmp_dir = TAKE_PTR(*tmp_dir);
         rt->var_tmp_dir = TAKE_PTR(*var_tmp_dir);
 
+        if (userns_storage_socket) {
+                rt->userns_storage_socket[0] = TAKE_FD(userns_storage_socket[0]);
+                rt->userns_storage_socket[1] = TAKE_FD(userns_storage_socket[1]);
+        }
+
         if (netns_storage_socket) {
                 rt->netns_storage_socket[0] = TAKE_FD(netns_storage_socket[0]);
                 rt->netns_storage_socket[1] = TAKE_FD(netns_storage_socket[1]);
@@ -2396,7 +2410,7 @@ static int exec_shared_runtime_make(
                 ExecSharedRuntime **ret) {
 
         _cleanup_(namespace_cleanup_tmpdirp) char *tmp_dir = NULL, *var_tmp_dir = NULL;
-        _cleanup_close_pair_ int netns_storage_socket[2] = EBADF_PAIR, ipcns_storage_socket[2] = EBADF_PAIR;
+        _cleanup_close_pair_ int userns_storage_socket[2] = EBADF_PAIR, netns_storage_socket[2] = EBADF_PAIR, ipcns_storage_socket[2] = EBADF_PAIR;
         int r;
 
         assert(m);
@@ -2418,6 +2432,10 @@ static int exec_shared_runtime_make(
                         return r;
         }
 
+        if (c->user_namespace_path)
+                if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, userns_storage_socket) < 0)
+                        return -errno;
+
         if (exec_needs_network_namespace(c))
                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
                         return -errno;
@@ -2426,7 +2444,7 @@ static int exec_shared_runtime_make(
                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ipcns_storage_socket) < 0)
                         return -errno;
 
-        r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_storage_socket, ipcns_storage_socket, ret);
+        r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, userns_storage_socket, netns_storage_socket, ipcns_storage_socket, ret);
         if (r < 0)
                 return r;
 
@@ -2484,6 +2502,26 @@ int exec_shared_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
                 if (rt->var_tmp_dir)
                         fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
 
+                if (rt->userns_storage_socket[0] >= 0) {
+                        int copy;
+
+                        copy = fdset_put_dup(fds, rt->userns_storage_socket[0]);
+                        if (copy < 0)
+                                return copy;
+
+                        fprintf(f, " userns-socket-0=%i", copy);
+                }
+
+                if (rt->userns_storage_socket[1] >= 0) {
+                        int copy;
+
+                        copy = fdset_put_dup(fds, rt->userns_storage_socket[1]);
+                        if (copy < 0)
+                                return copy;
+
+                        fprintf(f, " userns-socket-1=%i", copy);
+                }
+
                 if (rt->netns_storage_socket[0] >= 0) {
                         int copy;
 
@@ -2608,7 +2646,7 @@ int exec_shared_runtime_deserialize_compat(Unit *u, const char *key, const char
 int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
         _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
         char *id = NULL;
-        int r, netns_fdpair[] = {-1, -1}, ipcns_fdpair[] = {-1, -1};
+        int r, userns_fdpair[] = {-1, -1}, netns_fdpair[] = {-1, -1}, ipcns_fdpair[] = {-1, -1};
         const char *p, *v = ASSERT_PTR(value);
         size_t n;
 
@@ -2643,6 +2681,36 @@ int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fd
                 p = v + n + 1;
         }
 
+        v = startswith(p, "userns-socket-0=");
+        if (v) {
+                char *buf;
+
+                n = strcspn(v, " ");
+                buf = strndupa_safe(v, n);
+
+                userns_fdpair[0] = deserialize_fd(fds, buf);
+                if (userns_fdpair[0] < 0)
+                        return userns_fdpair[0];
+                if (v[n] != ' ')
+                        goto finalize;
+                p = v + n + 1;
+        }
+
+        v = startswith(p, "userns-socket-1=");
+        if (v) {
+                char *buf;
+
+                n = strcspn(v, " ");
+                buf = strndupa_safe(v, n);
+
+                userns_fdpair[1] = deserialize_fd(fds, buf);
+                if (userns_fdpair[1] < 0)
+                        return userns_fdpair[1];
+                if (v[n] != ' ')
+                        goto finalize;
+                p = v + n + 1;
+        }
+
         v = startswith(p, "netns-socket-0=");
         if (v) {
                 char *buf;
@@ -2701,7 +2769,7 @@ int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fd
         }
 
 finalize:
-        r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_fdpair, ipcns_fdpair, NULL);
+        r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, userns_fdpair, netns_fdpair, ipcns_fdpair, NULL);
         if (r < 0)
                 return log_debug_errno(r, "Failed to add exec-runtime: %m");
         return 0;
index 1ce78af6afe77ab713e5f7832e84ea27dd8d094c..8b2750cfb26f1162b97b782d351a33e7d0129f56 100644 (file)
@@ -111,6 +111,9 @@ typedef struct ExecSharedRuntime {
 
         /* Like netns_storage_socket, but the file descriptor is referring to the IPC namespace. */
         int ipcns_storage_socket[2];
+
+        /* Like netns_storage_socket, but the file descriptor is referring to the user namespace. */
+        int userns_storage_socket[2];
 } ExecSharedRuntime;
 
 typedef struct ExecRuntime {
@@ -353,6 +356,7 @@ typedef struct ExecContext {
         bool address_families_allow_list:1;
         Set *address_families;
 
+        char *user_namespace_path;
         char *network_namespace_path;
         char *ipc_namespace_path;
 
index 68695986a31c47cdba866e0c501fcb8a30fe40bd..9ca15cf35155fbaf9ee331a1b72af572470ba431 100644 (file)
@@ -183,6 +183,7 @@ static int run(int argc, char *argv[]) {
         _cleanup_(exec_command_done) ExecCommand command = {};
         _cleanup_(exec_params_deep_clear) ExecParameters params = EXEC_PARAMETERS_INIT(/* flags= */ 0);
         _cleanup_(exec_shared_runtime_done) ExecSharedRuntime shared = {
+                .userns_storage_socket = EBADF_PAIR,
                 .netns_storage_socket = EBADF_PAIR,
                 .ipcns_storage_socket = EBADF_PAIR,
         };
index 8b8267f3c75cad2c5c26884e09187d21a7e43b99..f03ead314a91a3b6673670481f8ec02d4cca71b1 100644 (file)
@@ -35,6 +35,7 @@ static void exec_fuzz_one(FILE *f, FDSet *fdset) {
         DynamicCreds dynamic_creds = {};
         ExecCommand command = {};
         ExecSharedRuntime shared = {
+                .userns_storage_socket = EBADF_PAIR,
                 .netns_storage_socket = EBADF_PAIR,
                 .ipcns_storage_socket = EBADF_PAIR,
         };
index 1977e882b51bf4a87cb05d8300208ff3c8fc0076..5b83c95e4c9daed2f6d137e3e343b28aabf500c3 100644 (file)
 {{type}}.ProtectKernelLogs,                   config_parse_bool,                                  0,                                  offsetof({{type}}, exec_context.protect_kernel_logs)
 {{type}}.ProtectClock,                        config_parse_bool,                                  0,                                  offsetof({{type}}, exec_context.protect_clock)
 {{type}}.ProtectControlGroups,                config_parse_protect_control_groups,                0,                                  offsetof({{type}}, exec_context.protect_control_groups)
+{{type}}.UserNamespacePath,                   config_parse_unit_path_printf,                      0,                                  offsetof({{type}}, exec_context.user_namespace_path)
 {{type}}.NetworkNamespacePath,                config_parse_unit_path_printf,                      0,                                  offsetof({{type}}, exec_context.network_namespace_path)
 {{type}}.IPCNamespacePath,                    config_parse_unit_path_printf,                      0,                                  offsetof({{type}}, exec_context.ipc_namespace_path)
 {{type}}.LogNamespace,                        config_parse_log_namespace,                         0,                                  offsetof({{type}}, exec_context)
index c946d2572465f31b39cbf657b5bdad29107d98b5..d57e2302de38c17f96fefb5bbd4a1ebf74afa6af 100644 (file)
@@ -1580,6 +1580,15 @@ static int socket_address_listen_in_cgroup(
         if (r < 0)
                 return log_unit_error_errno(UNIT(s), r, "Failed to acquire runtime: %m");
 
+        if (s->exec_context.user_namespace_path &&
+                s->exec_runtime &&
+                s->exec_runtime->shared &&
+                s->exec_runtime->shared->userns_storage_socket[0] >= 0) {
+                r = open_shareable_ns_path(s->exec_runtime->shared->userns_storage_socket, s->exec_context.user_namespace_path, CLONE_NEWUSER);
+                if (r < 0)
+                        return log_unit_error_errno(UNIT(s), r, "Failed to open user namespace path %s: %m", s->exec_context.user_namespace_path);
+        }
+
         if (s->exec_context.network_namespace_path &&
             s->exec_runtime &&
             s->exec_runtime->shared &&
index 2493c1bbf1d641a32cf92e47ea9128c48588a90e..c9c81152283418e4643a4e369c24fc753cc1d944 100644 (file)
@@ -885,6 +885,7 @@ int unit_exec_context_build_json(sd_json_variant **ret, const char *name, void *
                         JSON_BUILD_PAIR_TRISTATE_NON_NULL("MemoryKSM", c->memory_ksm),
                         SD_JSON_BUILD_PAIR_STRING("PrivatePIDs", private_pids_to_string(c->private_pids)),
                         SD_JSON_BUILD_PAIR_STRING("PrivateUsers", private_users_to_string(c->private_users)),
+                        JSON_BUILD_PAIR_STRING_NON_EMPTY("UserNamespacePath", c->user_namespace_path),
                         SD_JSON_BUILD_PAIR_STRING("ProtectHostname", protect_hostname_to_string(c->protect_hostname)),
                         JSON_BUILD_PAIR_YES_NO("ProtectClock", c->protect_clock),
                         JSON_BUILD_PAIR_YES_NO("ProtectKernelTunables", c->protect_kernel_tunables),
index d3c9840f5a2d38d48fa92c4f0a6d4f33d9cbdb6e..30a029716dcd50b9af1f2229739f35c4936101ff 100644 (file)
@@ -2419,6 +2419,7 @@ static const BusProperty execute_properties[] = {
         { "ProtectProc",                           bus_append_string                             },
         { "ProcSubset",                            bus_append_string                             },
         { "NetworkNamespacePath",                  bus_append_string                             },
+        { "UserNamespacePath",                     bus_append_string                             },
         { "IPCNamespacePath",                      bus_append_string                             },
         { "LogNamespace",                          bus_append_string                             },
         { "RootImagePolicy",                       bus_append_string                             },
index eacc960792643871efe8d7dce98fb4dd478a1ec9..b93112479a549349425e29e1358a48f52d3f6c50 100644 (file)
@@ -582,6 +582,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE(
                 SD_VARLINK_DEFINE_FIELD(PrivatePIDs, SD_VARLINK_STRING, 0),
                 SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#PrivateUsers="),
                 SD_VARLINK_DEFINE_FIELD(PrivateUsers, SD_VARLINK_STRING, 0),
+                SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#UserNamespacePath="),
+                SD_VARLINK_DEFINE_FIELD(UserNamespacePath, SD_VARLINK_STRING, SD_VARLINK_NULLABLE),
                 SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#ProtectHostname="),
                 SD_VARLINK_DEFINE_FIELD(ProtectHostname, SD_VARLINK_STRING, 0),
                 SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#ProtectClock="),
index 360ddd3dd969e5b74cee14bd2af778158250c29f..9fedef5a63a9ec7ec81f19590e3ded90f1b139e8 100644 (file)
@@ -276,6 +276,7 @@ Unit=
 UpheldBy=
 Upholds=
 User=
+UserNamespacePath=
 WakeSystem=
 WantedBy=
 Wants=
diff --git a/test/units/TEST-07-PID1.user-namespace-path.sh b/test/units/TEST-07-PID1.user-namespace-path.sh
new file mode 100755 (executable)
index 0000000..ebc8f00
--- /dev/null
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: LGPL-2.1-or-later
+set -eux
+set -o pipefail
+
+# shellcheck source=test/units/util.sh
+. "$(dirname "$0")"/util.sh
+
+# When sanitizers are used, export LD_PRELOAD with the sanitizers path,
+# lsns doesn't work otherwise.
+if [ -f /usr/lib/systemd/systemd-asan-env ]; then
+    # shellcheck source=/dev/null
+    . /usr/lib/systemd/systemd-asan-env
+    export LD_PRELOAD
+    export ASAN_OPTIONS
+fi
+
+# Only reuse the user namespace
+systemd-run --unit=oldservice --property=PrivateUsers=true sleep 3600
+sleep .2
+OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}')
+
+systemd-run --unit=newservice --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=PrivateNetwork=true sleep 3600
+sleep .2
+NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}')
+
+assert_neq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)"
+assert_eq "$(lsns -p "$OLD_PID" -o NS -t user -n)" "$(lsns -p "$NEW_PID" -o NS -t user -n)"
+
+systemctl stop oldservice newservice
+
+# Reuse the user and network namespaces
+systemd-run --unit=oldservice --property=PrivateUsers=true --property=PrivateNetwork=true sleep 3600
+sleep .2
+OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}')
+
+systemd-run --unit=newservice --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=NetworkNamespacePath=/proc/"$OLD_PID"/ns/net sleep 3600
+sleep .2
+NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}')
+
+assert_eq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)"
+assert_eq "$(lsns -p "$OLD_PID" -o NS -t user -n)" "$(lsns -p "$NEW_PID" -o NS -t user -n)"
+
+systemctl stop oldservice newservice
+
+# Delegate the network namespace
+systemd-run --unit=oldservice --property=PrivateUsers=true sleep 3600
+sleep .2
+OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}')
+
+systemd-run --unit=newservice --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=DelegateNamespaces=net --property=PrivateNetwork=true sleep 3600
+sleep .2
+NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}')
+
+assert_neq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)"
+assert_eq "$(lsns -p "$OLD_PID" -o NS -t user -n)" "$(lsns -p "$NEW_PID" -o NS -t user -n)"
+
+systemctl stop oldservice newservice