From: Daan De Meyer Date: Sun, 15 Feb 2026 13:22:44 +0000 (+0100) Subject: namespace-util: Merge namespace_enter_delegated() into namespace_enter() X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=211c3c9d41c18b63c9aba66d31d75ddf5448247a;p=thirdparty%2Fsystemd.git namespace-util: Merge namespace_enter_delegated() into namespace_enter() There's no need to pass in a boolean to decide whether we use namespace_enter_delegated() or not. Instead, we can just check if we have CAP_SYS_ADMIN in our own user namespace. If we don't, then we have to insist on a child user namespace being passed in and we have to enter it first to get CAP_SYS_ADMIN as without CAP_SYS_ADMIN we wouldn't be able to call setns() in the first place. If we do have CAP_SYS_ADMIN, we can always enter the other namespaces first before entering the user namespace. Additionally, we don't fail anymore if we can't reset the UID/GID since a root user might not always be available in every user namespace we might enter. --- diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index 69bd4945dfb..6a73eed9130 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -8,6 +8,7 @@ #include #include +#include "capability-util.h" #include "dlfcn-util.h" #include "errno-util.h" #include "fd-util.h" @@ -215,53 +216,6 @@ int namespace_open( return pidref_namespace_open(&pidref, ret_pidns_fd, ret_mntns_fd, ret_netns_fd, ret_userns_fd, ret_root_fd); } -int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { - int r; - - /* Block dlopen() now, to avoid us inadvertently loading shared library from another namespace */ - block_dlopen(); - - if (userns_fd >= 0) { - /* Can't setns to your own userns, since then you could escalate from non-root to root in - * your own namespace, so check if namespaces are equal before attempting to enter. */ - - r = is_our_namespace(userns_fd, NAMESPACE_USER); - if (r < 0) - return r; - if (r > 0) - userns_fd = -EBADF; - } - - if (pidns_fd >= 0) - if (setns(pidns_fd, CLONE_NEWPID) < 0) - return -errno; - - if (mntns_fd >= 0) - if (setns(mntns_fd, CLONE_NEWNS) < 0) - return -errno; - - if (netns_fd >= 0) - if (setns(netns_fd, CLONE_NEWNET) < 0) - return -errno; - - if (userns_fd >= 0) - if (setns(userns_fd, CLONE_NEWUSER) < 0) - return -errno; - - if (root_fd >= 0) { - if (fchdir(root_fd) < 0) - return -errno; - - if (chroot(".") < 0) - return -errno; - } - - if (userns_fd >= 0) - return reset_uid_gid(); - - return 0; -} - static int namespace_enter_one_idempotent(int nsfd, NamespaceType type) { int r; @@ -283,20 +237,43 @@ static int namespace_enter_one_idempotent(int nsfd, NamespaceType type) { return 1; } -int namespace_enter_delegated(int userns_fd, int pidns_fd, int mntns_fd, int netns_fd, int root_fd) { +int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { int r; - /* Similar to namespace_enter(), but operates on a set of namespaces that are potentially owned - * by the userns ("delegated"), in which case we'll need to gain CAP_SYS_ADMIN by joining - * the userns first, and the rest later. */ - - assert(userns_fd >= 0); - /* Block dlopen() now, to avoid us inadvertently loading shared library from another namespace */ block_dlopen(); - if (setns(userns_fd, CLONE_NEWUSER) < 0) - return -errno; + if (userns_fd >= 0) { + /* Can't setns to your own userns, since then you could escalate from non-root to root in + * your own namespace, so check if namespaces are equal before attempting to enter. */ + + r = is_our_namespace(userns_fd, NAMESPACE_USER); + if (r < 0) + return r; + if (r > 0) + userns_fd = -EBADF; + } + + r = have_effective_cap(CAP_SYS_ADMIN); + if (r < 0) + return r; + + bool have_cap_sys_admin = r > 0; + + if (!have_cap_sys_admin) { + /* If we don't have CAP_SYS_ADMIN in our own user namespace, our best bet is to enter the + * user namespace first (if we got one) to get CAP_SYS_ADMIN within the child user namespace, + * and then hope the other namespaces are owned by the child user namespace. If they aren't, + * we'll just get an EPERM later on when trying to setns() to them. */ + + if (userns_fd < 0) + return log_debug_errno( + SYNTHETIC_ERRNO(EPERM), + "Need CAP_SYS_ADMIN or a child user namespace to enter namespaces."); + + if (setns(userns_fd, CLONE_NEWUSER) < 0) + return -errno; + } if (pidns_fd >= 0) { r = namespace_enter_one_idempotent(pidns_fd, NAMESPACE_PID); @@ -316,6 +293,10 @@ int namespace_enter_delegated(int userns_fd, int pidns_fd, int mntns_fd, int net return r; } + if (userns_fd >= 0 && have_cap_sys_admin) + if (setns(userns_fd, CLONE_NEWUSER) < 0) + return -errno; + if (root_fd >= 0) { if (fchdir(root_fd) < 0) return -errno; @@ -324,7 +305,15 @@ int namespace_enter_delegated(int userns_fd, int pidns_fd, int mntns_fd, int net return -errno; } - return maybe_setgroups(/* size = */ 0, NULL); + if (userns_fd >= 0) { + /* Try to become root in the user namespace but don't error out if we can't, since it's not + * uncommon to have user namespaces without a root user in them. */ + r = reset_uid_gid(); + if (r < 0) + log_debug_errno(r, "Unable to drop auxiliary groups or reset UID/GID, ignoring: %m"); + } + + return 0; } int fd_is_namespace(int fd, NamespaceType type) { diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h index 994125818f9..32e64fa9e68 100644 --- a/src/basic/namespace-util.h +++ b/src/basic/namespace-util.h @@ -47,7 +47,6 @@ int namespace_open( int *ret_root_fd); int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd); -int namespace_enter_delegated(int userns_fd, int pidns_fd, int mntns_fd, int netns_fd, int root_fd); int fd_is_namespace(int fd, NamespaceType type); int is_our_namespace(int fd, NamespaceType type); diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 52af6a01c8a..9c44193b67c 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -1778,7 +1778,6 @@ int namespace_fork_full( int netns_fd, int userns_fd, int root_fd, - bool delegated, PidRef *ret) { _cleanup_(pidref_done_sigkill_wait) PidRef pidref_outer = PIDREF_NULL; @@ -1824,10 +1823,7 @@ int namespace_fork_full( errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); - if (delegated) - r = namespace_enter_delegated(userns_fd, pidns_fd, mntns_fd, netns_fd, root_fd); - else - r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd); + r = namespace_enter(pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd); if (r < 0) { log_full_errno(prio, r, "Failed to join namespace: %m"); report_errno_and_exit(errno_pipe_fd[1], r); diff --git a/src/basic/process-util.h b/src/basic/process-util.h index 66bb194bac0..46a5612048f 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -201,7 +201,6 @@ int namespace_fork_full( int netns_fd, int userns_fd, int root_fd, - bool delegated, PidRef *ret); static inline int namespace_fork( @@ -216,7 +215,7 @@ static inline int namespace_fork( PidRef *ret) { return namespace_fork_full(outer_name, inner_name, NULL, 0, flags, - pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd, false, + pidns_fd, mntns_fd, netns_fd, userns_fd, root_fd, ret); } diff --git a/src/core/exec-credential.c b/src/core/exec-credential.c index 07d83ac7a6a..c1b2fcda85c 100644 --- a/src/core/exec-credential.c +++ b/src/core/exec-credential.c @@ -1255,7 +1255,6 @@ int unit_refresh_credentials(Unit *u) { (int[]) { tunnel_fds[1] }, 1, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG, pidns_fd, mntns_fd, /* netns_fd = */ -EBADF, userns_fd, root_fd, - /* delegated = */ MANAGER_IS_USER(u->manager), &child); if (r < 0) return log_full_errno(ERRNO_IS_NEG_PRIVILEGE(r) ? LOG_WARNING : LOG_ERR, r, diff --git a/src/test/meson.build b/src/test/meson.build index c68477dc60c..cd563654dff 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -149,6 +149,7 @@ simple_tests += files( 'test-mkdir.c', 'test-modhex.c', 'test-mountpoint-util.c', + 'test-namespace-util.c', 'test-net-naming-scheme.c', 'test-notify-recv.c', 'test-nsresource.c', diff --git a/src/test/test-namespace-util.c b/src/test/test-namespace-util.c new file mode 100644 index 00000000000..ccc8e17cf99 --- /dev/null +++ b/src/test/test-namespace-util.c @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include "capability-util.h" +#include "errno-util.h" +#include "pidref.h" +#include "process-util.h" +#include "fd-util.h" +#include "namespace-util.h" +#include "tests.h" + +TEST(namespace_enter) { + _cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL; + int r; + + r = pidref_safe_fork( + "test-ns-enter-1", + FORK_NEW_USERNS|FORK_NEW_MOUNTNS|FORK_LOG|FORK_FREEZE|FORK_DEATHSIG_SIGKILL, + &pidref); + if (ERRNO_IS_NEG_PRIVILEGE(r)) + return (void) log_tests_skipped_errno(r, "Unable to unshare user namespace"); + + ASSERT_OK(r); + + _cleanup_close_ int mntns_fd = -EBADF, userns_fd = -EBADF, root_fd = -EBADF; + ASSERT_OK(pidref_namespace_open(&pidref, NULL, &mntns_fd, NULL, &userns_fd, &root_fd)); + + r = ASSERT_OK(pidref_safe_fork( + "test-ns-enter-2", + FORK_LOG|FORK_WAIT|FORK_DEATHSIG_SIGKILL, + NULL)); + if (r == 0) { + ASSERT_OK(namespace_enter(-EBADF, mntns_fd, -EBADF, userns_fd, root_fd)); + _exit(EXIT_SUCCESS); + } + + /* Make sure we can enter the namespaces as well if we don't have CAP_SYS_ADMIN. */ + r = ASSERT_OK(pidref_safe_fork( + "test-ns-enter-3", + FORK_LOG|FORK_WAIT|FORK_DEATHSIG_SIGKILL, + NULL)); + if (r == 0) { + ASSERT_OK(drop_capability(CAP_SYS_ADMIN)); + ASSERT_OK(namespace_enter(-EBADF, mntns_fd, -EBADF, userns_fd, root_fd)); + _exit(EXIT_SUCCESS); + } +} + +DEFINE_TEST_MAIN(LOG_DEBUG);