From 1a1c5ab758eca2d94130a7a4d1ba2bfa424f706e Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Wed, 17 Dec 2025 12:32:14 +0100 Subject: [PATCH] namespace-util: introduce namespace_enter_delegated() Typically when entering a namespace the userns is handled last, because we assume our process is more privileged than the userns. However, that assumption no longer holds for user managers, which have no privilege over initial userns and all other namespaces are actually owned by the userns unshared first (in executor). Hence, let's add another flavor namespace_enter_delegated() to accommodate that use case. --- src/basic/namespace-util.c | 65 ++++++++++++++++++++++++++++++++++++++ src/basic/namespace-util.h | 1 + 2 files changed, 66 insertions(+) diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index c3b84ea7840..69bd4945dfb 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -262,6 +262,71 @@ int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int return 0; } +static int namespace_enter_one_idempotent(int nsfd, NamespaceType type) { + int r; + + /* Join a namespace, but only if we're not part of it already. This is important if we don't necessarily + * own the namespace in question, as kernel would unconditionally return EPERM otherwise. */ + + assert(nsfd >= 0); + assert(type >= 0 && type < _NAMESPACE_TYPE_MAX); + + r = is_our_namespace(nsfd, type); + if (r < 0) + return r; + if (r > 0) + return 0; + + if (setns(nsfd, namespace_info[type].clone_flag) < 0) + return -errno; + + return 1; +} + +int namespace_enter_delegated(int userns_fd, int pidns_fd, int mntns_fd, int netns_fd, int root_fd) { + int r; + + /* Similar to namespace_enter(), but operates on a set of namespaces that are potentially owned + * by the userns ("delegated"), in which case we'll need to gain CAP_SYS_ADMIN by joining + * the userns first, and the rest later. */ + + assert(userns_fd >= 0); + + /* Block dlopen() now, to avoid us inadvertently loading shared library from another namespace */ + block_dlopen(); + + if (setns(userns_fd, CLONE_NEWUSER) < 0) + return -errno; + + if (pidns_fd >= 0) { + r = namespace_enter_one_idempotent(pidns_fd, NAMESPACE_PID); + if (r < 0) + return r; + } + + if (mntns_fd >= 0) { + r = namespace_enter_one_idempotent(mntns_fd, NAMESPACE_MOUNT); + if (r < 0) + return r; + } + + if (netns_fd >= 0) { + r = namespace_enter_one_idempotent(netns_fd, NAMESPACE_NET); + if (r < 0) + return r; + } + + if (root_fd >= 0) { + if (fchdir(root_fd) < 0) + return -errno; + + if (chroot(".") < 0) + return -errno; + } + + return maybe_setgroups(/* size = */ 0, NULL); +} + int fd_is_namespace(int fd, NamespaceType type) { int r; diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h index 32e64fa9e68..994125818f9 100644 --- a/src/basic/namespace-util.h +++ b/src/basic/namespace-util.h @@ -47,6 +47,7 @@ int namespace_open( int *ret_root_fd); int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd); +int namespace_enter_delegated(int userns_fd, int pidns_fd, int mntns_fd, int netns_fd, int root_fd); int fd_is_namespace(int fd, NamespaceType type); int is_our_namespace(int fd, NamespaceType type); -- 2.47.3