From: Mike Yuan Date: Tue, 19 Nov 2024 23:14:32 +0000 (+0100) Subject: namespace-util: hook pidref_namespace_open() up with pidfd_get_namespace() X-Git-Tag: v258-rc1~1704^2~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4cad58788be911ddb6c739881b5738a783b7ba37;p=thirdparty%2Fsystemd.git namespace-util: hook pidref_namespace_open() up with pidfd_get_namespace() Supersedes #35308 --- diff --git a/TODO b/TODO index 5cefbc48aab..e693afcfc8f 100644 --- a/TODO +++ b/TODO @@ -144,9 +144,6 @@ Features: that terminal emulators can maybe connect a password manager or so, and highlight things specially. -* Port pidref_namespace_open() to use PIDFD_GET_MNT_NAMESPACE and related - ioctls to get nsfds directly from pidfds. - * start using STATX_SUBVOL in btrfs_is_subvol(). Also, make use of it generically, so that image discovery recognizes bcachefs subvols too. diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index 3fca1fb27b1..10bbf36ca2f 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -18,24 +18,25 @@ #include "mountpoint-util.h" #include "namespace-util.h" #include "parse-util.h" +#include "pidfd-util.h" #include "process-util.h" #include "stat-util.h" #include "stdio-util.h" #include "user-util.h" const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = { - [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PROC_CGROUP_INIT_INO }, - [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, PROC_IPC_INIT_INO }, - [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, 0 }, + [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, PROC_CGROUP_INIT_INO }, + [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, PIDFD_GET_IPC_NAMESPACE, PROC_IPC_INIT_INO }, + [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, PIDFD_GET_NET_NAMESPACE, 0 }, /* So, the mount namespace flag is called CLONE_NEWNS for historical * reasons. Let's expose it here under a more explanatory name: "mnt". * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ - [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, 0 }, - [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, PROC_PID_INIT_INO }, - [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, PROC_USER_INIT_INO }, - [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, PROC_UTS_INIT_INO }, - [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, PROC_TIME_INIT_INO }, - { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ }, + [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, PIDFD_GET_MNT_NAMESPACE, 0 }, + [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, PIDFD_GET_PID_NAMESPACE, PROC_PID_INIT_INO }, + [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, PIDFD_GET_USER_NAMESPACE, PROC_USER_INIT_INO }, + [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, PIDFD_GET_UTS_NAMESPACE, PROC_UTS_INIT_INO }, + [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, PIDFD_GET_TIME_NAMESPACE, PROC_TIME_INIT_INO }, + {}, /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ }; #define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path) @@ -48,6 +49,62 @@ NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) { return _NAMESPACE_TYPE_INVALID; } +static int pidref_namespace_open_by_type_internal(const PidRef *pidref, NamespaceType type, bool *need_verify) { + int r; + + assert(pidref_is_set(pidref)); + assert(type >= 0 && type < _NAMESPACE_TYPE_MAX); + + if (pidref_is_remote(pidref)) + return -EREMOTE; + + if (pidref->fd >= 0) { + r = pidfd_get_namespace(pidref->fd, namespace_info[type].pidfd_get_ns_ioctl_cmd); + if (!ERRNO_IS_NEG_NOT_SUPPORTED(r)) + return r; + } + + if (need_verify) /* The caller shall call pidref_verify() later */ + *need_verify = true; + + _cleanup_close_ int nsfd = -EBADF; + const char *p; + + p = pid_namespace_path(pidref->pid, type); + nsfd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC); + if (nsfd < 0) { + if (errno == ENOENT && proc_mounted() == 0) + return -ENOSYS; + + return -errno; + } + + if (!need_verify) { /* Otherwise we verify on our own */ + r = pidref_verify(pidref); + if (r < 0) + return r; + } + + return TAKE_FD(nsfd); +} + +int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type) { + return pidref_namespace_open_by_type_internal(pidref, type, NULL); +} + +int namespace_open_by_type(NamespaceType type) { + _cleanup_(pidref_done) PidRef self = PIDREF_NULL; + int r; + + assert(type >= 0 && type < _NAMESPACE_TYPE_MAX); + + r = pidref_set_self(&self); + if (r < 0) + return r; + + return pidref_namespace_open_by_type(&self, type); +} + int pidref_namespace_open( const PidRef *pidref, int *ret_pidns_fd, @@ -58,6 +115,7 @@ int pidref_namespace_open( _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF, userns_fd = -EBADF, root_fd = -EBADF; + bool need_verify = false; int r; assert(pidref_is_set(pidref)); @@ -66,53 +124,47 @@ int pidref_namespace_open( return -EREMOTE; if (ret_pidns_fd) { - const char *pidns; - - pidns = pid_namespace_path(pidref->pid, NAMESPACE_PID); - pidns_fd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + pidns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_PID, &need_verify); if (pidns_fd < 0) - return -errno; + return pidns_fd; } if (ret_mntns_fd) { - const char *mntns; - - mntns = pid_namespace_path(pidref->pid, NAMESPACE_MOUNT); - mntns_fd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + mntns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_MOUNT, &need_verify); if (mntns_fd < 0) - return -errno; + return mntns_fd; } if (ret_netns_fd) { - const char *netns; - - netns = pid_namespace_path(pidref->pid, NAMESPACE_NET); - netns_fd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); + netns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_NET, &need_verify); if (netns_fd < 0) - return -errno; + return netns_fd; } if (ret_userns_fd) { - const char *userns; - - userns = pid_namespace_path(pidref->pid, NAMESPACE_USER); - userns_fd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); - if (userns_fd < 0 && errno != ENOENT) - return -errno; + userns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_USER, &need_verify); + if (userns_fd < 0 && !IN_SET(userns_fd, -ENOENT, -ENOPKG)) + return userns_fd; } if (ret_root_fd) { const char *root; root = procfs_file_alloca(pidref->pid, "root"); - root_fd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + root_fd = RET_NERRNO(open(root, O_CLOEXEC|O_DIRECTORY)); + if (root_fd == -ENOENT && proc_mounted() == 0) + return -ENOSYS; if (root_fd < 0) - return -errno; + return root_fd; + + need_verify = true; } - r = pidref_verify(pidref); - if (r < 0) - return r; + if (need_verify) { + r = pidref_verify(pidref); + if (r < 0) + return r; + } if (ret_pidns_fd) *ret_pidns_fd = TAKE_FD(pidns_fd); @@ -488,22 +540,6 @@ int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_r return 0; } -int namespace_open_by_type(NamespaceType type) { - const char *p; - int fd; - - assert(type >= 0); - assert(type < _NAMESPACE_TYPE_MAX); - - p = pid_namespace_path(0, type); - - fd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC)); - if (fd == -ENOENT && proc_mounted() == 0) - return -ENOSYS; - - return fd; -} - int is_idmapping_supported(const char *path) { _cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF; _cleanup_free_ char *uid_map = NULL, *gid_map = NULL; diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h index 7b6f5f92d54..ccc42bc40c0 100644 --- a/src/basic/namespace-util.h +++ b/src/basic/namespace-util.h @@ -23,12 +23,16 @@ enum NamespaceType { extern const struct namespace_info { const char *proc_name; const char *proc_path; - unsigned int clone_flag; + unsigned long clone_flag; + unsigned long pidfd_get_ns_ioctl_cmd; ino_t root_inode; } namespace_info[_NAMESPACE_TYPE_MAX + 1]; NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag); +int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type); +int namespace_open_by_type(NamespaceType type); + int pidref_namespace_open( const PidRef *pidref, int *ret_pidns_fd, @@ -79,6 +83,4 @@ int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type); int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range); -int namespace_open_by_type(NamespaceType type); - int is_idmapping_supported(const char *path);