From 79dd24cf14adc809620479d45a7b469cf3e82892 Mon Sep 17 00:00:00 2001 From: Quentin Deslandes Date: Fri, 17 Oct 2025 15:37:02 +0200 Subject: [PATCH] core: Add UserNamespacePath= This allows a service to reuse the user namespace created for an existing service, similarly to NetworkNamespacePath=. The configuration is the initial user namespace (e.g. ID mapping) is preserved. --- man/org.freedesktop.systemd1.xml | 34 ++++++++- man/systemd.exec.xml | 14 ++++ src/core/dbus-execute.c | 4 + src/core/exec-invoke.c | 29 ++++++- src/core/execute-serialize.c | 20 +++++ src/core/execute.c | 76 ++++++++++++++++++- src/core/execute.h | 4 + src/core/executor.c | 1 + src/core/fuzz-execute-serialize.c | 1 + src/core/load-fragment-gperf.gperf.in | 1 + src/core/socket.c | 9 +++ src/core/varlink-execute.c | 1 + src/shared/bus-unit-util.c | 1 + src/shared/varlink-io.systemd.Unit.c | 2 + .../fuzz-unit-file/directives-all.service | 1 + .../units/TEST-07-PID1.user-namespace-path.sh | 58 ++++++++++++++ 16 files changed, 248 insertions(+), 8 deletions(-) create mode 100755 test/units/TEST-07-PID1.user-namespace-path.sh diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index d4dec82c70e..cb56d664c44 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -3408,6 +3408,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MemoryKSM = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s UserNamespacePath = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s NetworkNamespacePath = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s IPCNamespacePath = '...'; @@ -4019,6 +4021,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4759,6 +4763,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -5655,6 +5661,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MemoryKSM = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s UserNamespacePath = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s NetworkNamespacePath = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s IPCNamespacePath = '...'; @@ -6286,6 +6294,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -7006,6 +7016,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -7726,6 +7738,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MemoryKSM = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s UserNamespacePath = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s NetworkNamespacePath = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s IPCNamespacePath = '...'; @@ -8279,6 +8293,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -8907,6 +8923,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -9760,6 +9778,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MemoryKSM = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s UserNamespacePath = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s NetworkNamespacePath = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s IPCNamespacePath = '...'; @@ -10295,6 +10315,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -10905,6 +10927,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -12519,7 +12543,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ LogsDirectoryQuotaUsage, LogsDirectoryAccounting, and KillSubgroup() were added in version 258. - OOMKills, and + UserNamespacePath, + OOMKills, and ManagedOOMKills were added in 259. @@ -12587,6 +12612,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ LogsDirectoryAccounting, and KillSubgroup() were added in version 258. OOMKills, and + UserNamespacePath, and ManagedOOMKills were added in 259. @@ -12648,7 +12674,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ LogsDirectoryQuotaUsage, LogsDirectoryAccounting, and KillSubgroup() were added in version 258. - OOMKills, and + UserNamespacePath, + OOMKills, and ManagedOOMKills were added in 259. @@ -12708,7 +12735,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ LogsDirectoryQuotaUsage, LogsDirectoryAccounting, and KillSubgroup() were added in version 258. - OOMKills, and + UserNamespacePath, + OOMKills, and ManagedOOMKills were added in 259. diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 67eba1831a9..c204873c39f 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -2026,6 +2026,20 @@ BindReadOnlyPaths=/var/lib/systemd + + UserNamespacePath= + + Takes an absolute file system path referring to a Linux user namespace + pseudo-file (i.e. a file like /proc/$PID/ns/user or a bind mount or symlink to + one). When set the invoked processes are added to the user namespace referenced by that path. The + path has to point to a valid namespace file at the moment the processes are forked off. If this + option is used PrivateUsers= has no effect. + + This option is only available for system services. + + + + NetworkNamespacePath= diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 9b7eb5d76b5..7c5e70d4077 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -1407,6 +1407,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("BPFDelegatePrograms", "s", property_get_bpf_delegate_programs, offsetof(ExecContext, bpf_delegate_programs), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("BPFDelegateAttachments", "s", property_get_bpf_delegate_attachments, offsetof(ExecContext, bpf_delegate_attachments), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MemoryKSM", "b", bus_property_get_tristate, offsetof(ExecContext, memory_ksm), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("UserNamespacePath", "s", NULL, offsetof(ExecContext, user_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("IPCNamespacePath", "s", NULL, offsetof(ExecContext, ipc_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RootImagePolicy", "s", property_get_image_policy, offsetof(ExecContext, root_image_policy), SD_BUS_VTABLE_PROPERTY_CONST), @@ -2500,6 +2501,9 @@ int bus_exec_context_set_transient_property( if (streq(name, "NetworkNamespacePath")) return bus_set_transient_path(u, name, &c->network_namespace_path, message, flags, error); + if (streq(name, "UserNamespacePath")) + return bus_set_transient_path(u, name, &c->user_namespace_path, message, flags, error); + if (streq(name, "IPCNamespacePath")) return bus_set_transient_path(u, name, &c->ipc_namespace_path, message, flags, error); diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 9d331850667..aa3b2441953 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -4159,6 +4159,7 @@ static int close_remaining_fds( append_socket_pair(dont_close, &n_dont_close, runtime->ephemeral_storage_socket); if (runtime->shared) { + append_socket_pair(dont_close, &n_dont_close, runtime->shared->userns_storage_socket); append_socket_pair(dont_close, &n_dont_close, runtime->shared->netns_storage_socket); append_socket_pair(dont_close, &n_dont_close, runtime->shared->ipcns_storage_socket); } @@ -4464,6 +4465,7 @@ static bool exec_needs_cap_sys_admin(const ExecContext *context, const ExecParam context->private_tmp != PRIVATE_TMP_NO || context->private_devices || context->private_network || + context->user_namespace_path || context->network_namespace_path || context->private_ipc || context->ipc_namespace_path || @@ -4768,6 +4770,7 @@ static void exec_shared_runtime_close(ExecSharedRuntime *shared) { if (!shared) return; + safe_close_pair(shared->userns_storage_socket); safe_close_pair(shared->netns_storage_socket); safe_close_pair(shared->ipcns_storage_socket); } @@ -5322,6 +5325,14 @@ int exec_invoke( } } + if (context->user_namespace_path && runtime->shared && runtime->shared->userns_storage_socket[0] >= 0) { + r = open_shareable_ns_path(runtime->shared->userns_storage_socket, context->user_namespace_path, CLONE_NEWUSER); + if (r < 0) { + *exit_status = EXIT_NAMESPACE; + return log_error_errno(r, "Failed to open user namespace path %s: %m", context->user_namespace_path); + } + } + if (context->network_namespace_path && runtime->shared && runtime->shared->netns_storage_socket[0] >= 0) { r = open_shareable_ns_path(runtime->shared->netns_storage_socket, context->network_namespace_path, CLONE_NEWNET); if (r < 0) { @@ -5758,6 +5769,10 @@ int exec_invoke( /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces. * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to * set up all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */ + + if (context->user_namespace_path && runtime->shared->userns_storage_socket[0] >= 0) + return log_error_errno(SYNTHETIC_ERRNO(EPERM), "UserNamespacePath= is configured, but user namespace setup not permitted"); + PrivateUsers pu = exec_context_get_effective_private_users(context, params); if (pu == PRIVATE_USERS_NO) pu = PRIVATE_USERS_SELF; @@ -5828,8 +5843,20 @@ int exec_invoke( * restricted by rules pertaining to combining user namespaces with other namespaces (e.g. in the * case of mount namespaces being less privileged when the mount point list is copied from a * different user namespace). */ + if (needs_sandboxing && context->user_namespace_path && runtime->shared && runtime->shared->userns_storage_socket[0] >= 0) { + if (!namespace_type_supported(NAMESPACE_USER)) + return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "UserNamespacePath= is not supported, refusing."); + + r = setup_shareable_ns(runtime->shared->userns_storage_socket, CLONE_NEWUSER); + if (ERRNO_IS_NEG_PRIVILEGE(r)) + return log_notice_errno(r, "PrivateUsers= is configured, but user namespace setup not permitted, refusing."); + if (r < 0) { + *exit_status = EXIT_USER; + return log_error_errno(r, "Failed to set up user namespacing: %m"); + } - if (needs_sandboxing && !userns_set_up) { + log_debug("Set up existing user namespace"); + } else if (needs_sandboxing && !userns_set_up) { PrivateUsers pu = exec_context_get_effective_private_users(context, params); r = setup_private_users(pu, saved_uid, saved_gid, uid, gid, diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c index 575d34ff24c..d046a8a6945 100644 --- a/src/core/execute-serialize.c +++ b/src/core/execute-serialize.c @@ -938,6 +938,12 @@ static int exec_runtime_serialize(const ExecRuntime *rt, FILE *f, FDSet *fds) { if (r < 0) return r; + if (rt->shared->userns_storage_socket[0] >= 0 && rt->shared->userns_storage_socket[1] >= 0) { + r = serialize_fd_many(f, fds, "exec-runtime-userns-storage-socket", rt->shared->userns_storage_socket, 2); + if (r < 0) + return r; + } + if (rt->shared->netns_storage_socket[0] >= 0 && rt->shared->netns_storage_socket[1] >= 0) { r = serialize_fd_many(f, fds, "exec-runtime-netns-storage-socket", rt->shared->netns_storage_socket, 2); if (r < 0) @@ -1013,6 +1019,12 @@ static int exec_runtime_deserialize(ExecRuntime *rt, FILE *f, FDSet *fds) { r = free_and_strdup(&rt->shared->var_tmp_dir, val); if (r < 0) return r; + } else if ((val = startswith(l, "exec-runtime-userns-storage-socket="))) { + + r = deserialize_fd_many(fds, val, 2, rt->shared->userns_storage_socket); + if (r < 0) + continue; + } else if ((val = startswith(l, "exec-runtime-netns-storage-socket="))) { r = deserialize_fd_many(fds, val, 2, rt->shared->netns_storage_socket); @@ -2362,6 +2374,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) { if (r < 0) return r; + r = serialize_item(f, "exec-context-user-namespace-path", c->user_namespace_path); + if (r < 0) + return r; + r = serialize_item(f, "exec-context-network-namespace-path", c->network_namespace_path); if (r < 0) return r; @@ -3493,6 +3509,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) { r = free_and_strdup(&c->network_namespace_path, val); if (r < 0) return r; + } else if ((val = startswith(l, "exec-context-user-namespace-path="))) { + r = free_and_strdup(&c->user_namespace_path, val); + if (r < 0) + return r; } else if ((val = startswith(l, "exec-context-ipc-namespace-path="))) { r = free_and_strdup(&c->ipc_namespace_path, val); if (r < 0) diff --git a/src/core/execute.c b/src/core/execute.c index abf67a4ed4a..f93c21124cf 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -742,6 +742,7 @@ void exec_context_done(ExecContext *c) { c->stdin_data = mfree(c->stdin_data); c->stdin_data_size = 0; + c->user_namespace_path = mfree(c->user_namespace_path); c->network_namespace_path = mfree(c->network_namespace_path); c->ipc_namespace_path = mfree(c->ipc_namespace_path); @@ -1554,6 +1555,11 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { } #endif + if (c->user_namespace_path) + fprintf(f, + "%sUserNamespacePath: %s\n", + prefix, c->user_namespace_path); + if (c->network_namespace_path) fprintf(f, "%sNetworkNamespacePath: %s\n", @@ -2286,6 +2292,7 @@ void exec_shared_runtime_done(ExecSharedRuntime *rt) { rt->id = mfree(rt->id); rt->tmp_dir = mfree(rt->tmp_dir); rt->var_tmp_dir = mfree(rt->var_tmp_dir); + safe_close_pair(rt->userns_storage_socket); safe_close_pair(rt->netns_storage_socket); safe_close_pair(rt->ipcns_storage_socket); } @@ -2333,6 +2340,7 @@ static int exec_shared_runtime_allocate(ExecSharedRuntime **ret, const char *id) *n = (ExecSharedRuntime) { .id = TAKE_PTR(id_copy), + .userns_storage_socket = EBADF_PAIR, .netns_storage_socket = EBADF_PAIR, .ipcns_storage_socket = EBADF_PAIR, }; @@ -2346,6 +2354,7 @@ static int exec_shared_runtime_add( const char *id, char **tmp_dir, char **var_tmp_dir, + int userns_storage_socket[2], int netns_storage_socket[2], int ipcns_storage_socket[2], ExecSharedRuntime **ret) { @@ -2370,6 +2379,11 @@ static int exec_shared_runtime_add( rt->tmp_dir = TAKE_PTR(*tmp_dir); rt->var_tmp_dir = TAKE_PTR(*var_tmp_dir); + if (userns_storage_socket) { + rt->userns_storage_socket[0] = TAKE_FD(userns_storage_socket[0]); + rt->userns_storage_socket[1] = TAKE_FD(userns_storage_socket[1]); + } + if (netns_storage_socket) { rt->netns_storage_socket[0] = TAKE_FD(netns_storage_socket[0]); rt->netns_storage_socket[1] = TAKE_FD(netns_storage_socket[1]); @@ -2396,7 +2410,7 @@ static int exec_shared_runtime_make( ExecSharedRuntime **ret) { _cleanup_(namespace_cleanup_tmpdirp) char *tmp_dir = NULL, *var_tmp_dir = NULL; - _cleanup_close_pair_ int netns_storage_socket[2] = EBADF_PAIR, ipcns_storage_socket[2] = EBADF_PAIR; + _cleanup_close_pair_ int userns_storage_socket[2] = EBADF_PAIR, netns_storage_socket[2] = EBADF_PAIR, ipcns_storage_socket[2] = EBADF_PAIR; int r; assert(m); @@ -2418,6 +2432,10 @@ static int exec_shared_runtime_make( return r; } + if (c->user_namespace_path) + if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, userns_storage_socket) < 0) + return -errno; + if (exec_needs_network_namespace(c)) if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0) return -errno; @@ -2426,7 +2444,7 @@ static int exec_shared_runtime_make( if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, ipcns_storage_socket) < 0) return -errno; - r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_storage_socket, ipcns_storage_socket, ret); + r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, userns_storage_socket, netns_storage_socket, ipcns_storage_socket, ret); if (r < 0) return r; @@ -2484,6 +2502,26 @@ int exec_shared_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) { if (rt->var_tmp_dir) fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir); + if (rt->userns_storage_socket[0] >= 0) { + int copy; + + copy = fdset_put_dup(fds, rt->userns_storage_socket[0]); + if (copy < 0) + return copy; + + fprintf(f, " userns-socket-0=%i", copy); + } + + if (rt->userns_storage_socket[1] >= 0) { + int copy; + + copy = fdset_put_dup(fds, rt->userns_storage_socket[1]); + if (copy < 0) + return copy; + + fprintf(f, " userns-socket-1=%i", copy); + } + if (rt->netns_storage_socket[0] >= 0) { int copy; @@ -2608,7 +2646,7 @@ int exec_shared_runtime_deserialize_compat(Unit *u, const char *key, const char int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) { _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL; char *id = NULL; - int r, netns_fdpair[] = {-1, -1}, ipcns_fdpair[] = {-1, -1}; + int r, userns_fdpair[] = {-1, -1}, netns_fdpair[] = {-1, -1}, ipcns_fdpair[] = {-1, -1}; const char *p, *v = ASSERT_PTR(value); size_t n; @@ -2643,6 +2681,36 @@ int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fd p = v + n + 1; } + v = startswith(p, "userns-socket-0="); + if (v) { + char *buf; + + n = strcspn(v, " "); + buf = strndupa_safe(v, n); + + userns_fdpair[0] = deserialize_fd(fds, buf); + if (userns_fdpair[0] < 0) + return userns_fdpair[0]; + if (v[n] != ' ') + goto finalize; + p = v + n + 1; + } + + v = startswith(p, "userns-socket-1="); + if (v) { + char *buf; + + n = strcspn(v, " "); + buf = strndupa_safe(v, n); + + userns_fdpair[1] = deserialize_fd(fds, buf); + if (userns_fdpair[1] < 0) + return userns_fdpair[1]; + if (v[n] != ' ') + goto finalize; + p = v + n + 1; + } + v = startswith(p, "netns-socket-0="); if (v) { char *buf; @@ -2701,7 +2769,7 @@ int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fd } finalize: - r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_fdpair, ipcns_fdpair, NULL); + r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, userns_fdpair, netns_fdpair, ipcns_fdpair, NULL); if (r < 0) return log_debug_errno(r, "Failed to add exec-runtime: %m"); return 0; diff --git a/src/core/execute.h b/src/core/execute.h index 1ce78af6afe..8b2750cfb26 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -111,6 +111,9 @@ typedef struct ExecSharedRuntime { /* Like netns_storage_socket, but the file descriptor is referring to the IPC namespace. */ int ipcns_storage_socket[2]; + + /* Like netns_storage_socket, but the file descriptor is referring to the user namespace. */ + int userns_storage_socket[2]; } ExecSharedRuntime; typedef struct ExecRuntime { @@ -353,6 +356,7 @@ typedef struct ExecContext { bool address_families_allow_list:1; Set *address_families; + char *user_namespace_path; char *network_namespace_path; char *ipc_namespace_path; diff --git a/src/core/executor.c b/src/core/executor.c index 68695986a31..9ca15cf3515 100644 --- a/src/core/executor.c +++ b/src/core/executor.c @@ -183,6 +183,7 @@ static int run(int argc, char *argv[]) { _cleanup_(exec_command_done) ExecCommand command = {}; _cleanup_(exec_params_deep_clear) ExecParameters params = EXEC_PARAMETERS_INIT(/* flags= */ 0); _cleanup_(exec_shared_runtime_done) ExecSharedRuntime shared = { + .userns_storage_socket = EBADF_PAIR, .netns_storage_socket = EBADF_PAIR, .ipcns_storage_socket = EBADF_PAIR, }; diff --git a/src/core/fuzz-execute-serialize.c b/src/core/fuzz-execute-serialize.c index 8b8267f3c75..f03ead314a9 100644 --- a/src/core/fuzz-execute-serialize.c +++ b/src/core/fuzz-execute-serialize.c @@ -35,6 +35,7 @@ static void exec_fuzz_one(FILE *f, FDSet *fdset) { DynamicCreds dynamic_creds = {}; ExecCommand command = {}; ExecSharedRuntime shared = { + .userns_storage_socket = EBADF_PAIR, .netns_storage_socket = EBADF_PAIR, .ipcns_storage_socket = EBADF_PAIR, }; diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index 1977e882b51..5b83c95e4c9 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -132,6 +132,7 @@ {{type}}.ProtectKernelLogs, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_kernel_logs) {{type}}.ProtectClock, config_parse_bool, 0, offsetof({{type}}, exec_context.protect_clock) {{type}}.ProtectControlGroups, config_parse_protect_control_groups, 0, offsetof({{type}}, exec_context.protect_control_groups) +{{type}}.UserNamespacePath, config_parse_unit_path_printf, 0, offsetof({{type}}, exec_context.user_namespace_path) {{type}}.NetworkNamespacePath, config_parse_unit_path_printf, 0, offsetof({{type}}, exec_context.network_namespace_path) {{type}}.IPCNamespacePath, config_parse_unit_path_printf, 0, offsetof({{type}}, exec_context.ipc_namespace_path) {{type}}.LogNamespace, config_parse_log_namespace, 0, offsetof({{type}}, exec_context) diff --git a/src/core/socket.c b/src/core/socket.c index c946d257246..d57e2302de3 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1580,6 +1580,15 @@ static int socket_address_listen_in_cgroup( if (r < 0) return log_unit_error_errno(UNIT(s), r, "Failed to acquire runtime: %m"); + if (s->exec_context.user_namespace_path && + s->exec_runtime && + s->exec_runtime->shared && + s->exec_runtime->shared->userns_storage_socket[0] >= 0) { + r = open_shareable_ns_path(s->exec_runtime->shared->userns_storage_socket, s->exec_context.user_namespace_path, CLONE_NEWUSER); + if (r < 0) + return log_unit_error_errno(UNIT(s), r, "Failed to open user namespace path %s: %m", s->exec_context.user_namespace_path); + } + if (s->exec_context.network_namespace_path && s->exec_runtime && s->exec_runtime->shared && diff --git a/src/core/varlink-execute.c b/src/core/varlink-execute.c index 2493c1bbf1d..c9c81152283 100644 --- a/src/core/varlink-execute.c +++ b/src/core/varlink-execute.c @@ -885,6 +885,7 @@ int unit_exec_context_build_json(sd_json_variant **ret, const char *name, void * JSON_BUILD_PAIR_TRISTATE_NON_NULL("MemoryKSM", c->memory_ksm), SD_JSON_BUILD_PAIR_STRING("PrivatePIDs", private_pids_to_string(c->private_pids)), SD_JSON_BUILD_PAIR_STRING("PrivateUsers", private_users_to_string(c->private_users)), + JSON_BUILD_PAIR_STRING_NON_EMPTY("UserNamespacePath", c->user_namespace_path), SD_JSON_BUILD_PAIR_STRING("ProtectHostname", protect_hostname_to_string(c->protect_hostname)), JSON_BUILD_PAIR_YES_NO("ProtectClock", c->protect_clock), JSON_BUILD_PAIR_YES_NO("ProtectKernelTunables", c->protect_kernel_tunables), diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index d3c9840f5a2..30a029716dc 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -2419,6 +2419,7 @@ static const BusProperty execute_properties[] = { { "ProtectProc", bus_append_string }, { "ProcSubset", bus_append_string }, { "NetworkNamespacePath", bus_append_string }, + { "UserNamespacePath", bus_append_string }, { "IPCNamespacePath", bus_append_string }, { "LogNamespace", bus_append_string }, { "RootImagePolicy", bus_append_string }, diff --git a/src/shared/varlink-io.systemd.Unit.c b/src/shared/varlink-io.systemd.Unit.c index eacc9607926..b93112479a5 100644 --- a/src/shared/varlink-io.systemd.Unit.c +++ b/src/shared/varlink-io.systemd.Unit.c @@ -582,6 +582,8 @@ static SD_VARLINK_DEFINE_STRUCT_TYPE( SD_VARLINK_DEFINE_FIELD(PrivatePIDs, SD_VARLINK_STRING, 0), SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#PrivateUsers="), SD_VARLINK_DEFINE_FIELD(PrivateUsers, SD_VARLINK_STRING, 0), + SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#UserNamespacePath="), + SD_VARLINK_DEFINE_FIELD(UserNamespacePath, SD_VARLINK_STRING, SD_VARLINK_NULLABLE), SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#ProtectHostname="), SD_VARLINK_DEFINE_FIELD(ProtectHostname, SD_VARLINK_STRING, 0), SD_VARLINK_FIELD_COMMENT("https://www.freedesktop.org/software/systemd/man"PROJECT_VERSION_STR"systemd.exec.html#ProtectClock="), diff --git a/test/fuzz/fuzz-unit-file/directives-all.service b/test/fuzz/fuzz-unit-file/directives-all.service index 360ddd3dd96..9fedef5a63a 100644 --- a/test/fuzz/fuzz-unit-file/directives-all.service +++ b/test/fuzz/fuzz-unit-file/directives-all.service @@ -276,6 +276,7 @@ Unit= UpheldBy= Upholds= User= +UserNamespacePath= WakeSystem= WantedBy= Wants= diff --git a/test/units/TEST-07-PID1.user-namespace-path.sh b/test/units/TEST-07-PID1.user-namespace-path.sh new file mode 100755 index 00000000000..ebc8f00baf9 --- /dev/null +++ b/test/units/TEST-07-PID1.user-namespace-path.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# shellcheck source=test/units/util.sh +. "$(dirname "$0")"/util.sh + +# When sanitizers are used, export LD_PRELOAD with the sanitizers path, +# lsns doesn't work otherwise. +if [ -f /usr/lib/systemd/systemd-asan-env ]; then + # shellcheck source=/dev/null + . /usr/lib/systemd/systemd-asan-env + export LD_PRELOAD + export ASAN_OPTIONS +fi + +# Only reuse the user namespace +systemd-run --unit=oldservice --property=PrivateUsers=true sleep 3600 +sleep .2 +OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}') + +systemd-run --unit=newservice --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=PrivateNetwork=true sleep 3600 +sleep .2 +NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}') + +assert_neq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)" +assert_eq "$(lsns -p "$OLD_PID" -o NS -t user -n)" "$(lsns -p "$NEW_PID" -o NS -t user -n)" + +systemctl stop oldservice newservice + +# Reuse the user and network namespaces +systemd-run --unit=oldservice --property=PrivateUsers=true --property=PrivateNetwork=true sleep 3600 +sleep .2 +OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}') + +systemd-run --unit=newservice --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=NetworkNamespacePath=/proc/"$OLD_PID"/ns/net sleep 3600 +sleep .2 +NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}') + +assert_eq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)" +assert_eq "$(lsns -p "$OLD_PID" -o NS -t user -n)" "$(lsns -p "$NEW_PID" -o NS -t user -n)" + +systemctl stop oldservice newservice + +# Delegate the network namespace +systemd-run --unit=oldservice --property=PrivateUsers=true sleep 3600 +sleep .2 +OLD_PID=$(systemctl show oldservice -p MainPID | awk -F= '{print $2}') + +systemd-run --unit=newservice --property=UserNamespacePath=/proc/"$OLD_PID"/ns/user --property=DelegateNamespaces=net --property=PrivateNetwork=true sleep 3600 +sleep .2 +NEW_PID=$(systemctl show newservice -p MainPID | awk -F= '{print $2}') + +assert_neq "$(lsns -p "$OLD_PID" -o NS -t net -n)" "$(lsns -p "$NEW_PID" -o NS -t net -n)" +assert_eq "$(lsns -p "$OLD_PID" -o NS -t user -n)" "$(lsns -p "$NEW_PID" -o NS -t user -n)" + +systemctl stop oldservice newservice -- 2.47.3