]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
namespace-util: hook pidref_namespace_open() up with pidfd_get_namespace()
authorMike Yuan <me@yhndnzj.com>
Tue, 19 Nov 2024 23:14:32 +0000 (00:14 +0100)
committerMike Yuan <me@yhndnzj.com>
Sat, 4 Jan 2025 16:08:00 +0000 (17:08 +0100)
Supersedes #35308

TODO
src/basic/namespace-util.c
src/basic/namespace-util.h

diff --git a/TODO b/TODO
index 5cefbc48aaba56159e07d6235a24fe61ab4e61f4..e693afcfc8f4240d82fd63436b3d450addcb6bd2 100644 (file)
--- a/TODO
+++ b/TODO
@@ -144,9 +144,6 @@ Features:
   that terminal emulators can maybe connect a password manager or so, and
   highlight things specially.
 
-* Port pidref_namespace_open() to use PIDFD_GET_MNT_NAMESPACE and related
-  ioctls to get nsfds directly from pidfds.
-
 * start using STATX_SUBVOL in btrfs_is_subvol(). Also, make use of it
   generically, so that image discovery recognizes bcachefs subvols too.
 
index 3fca1fb27b10fef3d9266c67297a1b82877cb1ec..10bbf36ca2f0d2ec235b33ffda0bcd5131b2a52f 100644 (file)
 #include "mountpoint-util.h"
 #include "namespace-util.h"
 #include "parse-util.h"
+#include "pidfd-util.h"
 #include "process-util.h"
 #include "stat-util.h"
 #include "stdio-util.h"
 #include "user-util.h"
 
 const struct namespace_info namespace_info[_NAMESPACE_TYPE_MAX + 1] = {
-        [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PROC_CGROUP_INIT_INO     },
-        [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,    PROC_IPC_INIT_INO        },
-        [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,    0                        },
+        [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, PROC_CGROUP_INIT_INO },
+        [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,    PIDFD_GET_IPC_NAMESPACE,    PROC_IPC_INIT_INO    },
+        [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,    PIDFD_GET_NET_NAMESPACE,    0                    },
         /* So, the mount namespace flag is called CLONE_NEWNS for historical
          * reasons. Let's expose it here under a more explanatory name: "mnt".
          * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
-        [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,     0                        },
-        [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,    PROC_PID_INIT_INO        },
-        [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,   PROC_USER_INIT_INO       },
-        [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,    PROC_UTS_INIT_INO        },
-        [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,   PROC_TIME_INIT_INO       },
-        { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */       },
+        [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,     PIDFD_GET_MNT_NAMESPACE,    0                    },
+        [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,    PIDFD_GET_PID_NAMESPACE,    PROC_PID_INIT_INO    },
+        [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,   PIDFD_GET_USER_NAMESPACE,   PROC_USER_INIT_INO   },
+        [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,    PIDFD_GET_UTS_NAMESPACE,    PROC_UTS_INIT_INO    },
+        [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,   PIDFD_GET_TIME_NAMESPACE,   PROC_TIME_INIT_INO   },
+        {}, /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */
 };
 
 #define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
@@ -48,6 +49,62 @@ NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag) {
         return _NAMESPACE_TYPE_INVALID;
 }
 
+static int pidref_namespace_open_by_type_internal(const PidRef *pidref, NamespaceType type, bool *need_verify) {
+        int r;
+
+        assert(pidref_is_set(pidref));
+        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
+
+        if (pidref_is_remote(pidref))
+                return -EREMOTE;
+
+        if (pidref->fd >= 0) {
+                r = pidfd_get_namespace(pidref->fd, namespace_info[type].pidfd_get_ns_ioctl_cmd);
+                if (!ERRNO_IS_NEG_NOT_SUPPORTED(r))
+                        return r;
+        }
+
+        if (need_verify) /* The caller shall call pidref_verify() later */
+                *need_verify = true;
+
+        _cleanup_close_ int nsfd = -EBADF;
+        const char *p;
+
+        p = pid_namespace_path(pidref->pid, type);
+        nsfd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+        if (nsfd < 0) {
+                if (errno == ENOENT && proc_mounted() == 0)
+                        return -ENOSYS;
+
+                return -errno;
+        }
+
+        if (!need_verify) { /* Otherwise we verify on our own */
+                r = pidref_verify(pidref);
+                if (r < 0)
+                        return r;
+        }
+
+        return TAKE_FD(nsfd);
+}
+
+int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type) {
+        return pidref_namespace_open_by_type_internal(pidref, type, NULL);
+}
+
+int namespace_open_by_type(NamespaceType type) {
+        _cleanup_(pidref_done) PidRef self = PIDREF_NULL;
+        int r;
+
+        assert(type >= 0 && type < _NAMESPACE_TYPE_MAX);
+
+        r = pidref_set_self(&self);
+        if (r < 0)
+                return r;
+
+        return pidref_namespace_open_by_type(&self, type);
+}
+
 int pidref_namespace_open(
                 const PidRef *pidref,
                 int *ret_pidns_fd,
@@ -58,6 +115,7 @@ int pidref_namespace_open(
 
         _cleanup_close_ int pidns_fd = -EBADF, mntns_fd = -EBADF, netns_fd = -EBADF,
                 userns_fd = -EBADF, root_fd = -EBADF;
+        bool need_verify = false;
         int r;
 
         assert(pidref_is_set(pidref));
@@ -66,53 +124,47 @@ int pidref_namespace_open(
                 return -EREMOTE;
 
         if (ret_pidns_fd) {
-                const char *pidns;
-
-                pidns = pid_namespace_path(pidref->pid, NAMESPACE_PID);
-                pidns_fd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+                pidns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_PID, &need_verify);
                 if (pidns_fd < 0)
-                        return -errno;
+                        return pidns_fd;
         }
 
         if (ret_mntns_fd) {
-                const char *mntns;
-
-                mntns = pid_namespace_path(pidref->pid, NAMESPACE_MOUNT);
-                mntns_fd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+                mntns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_MOUNT, &need_verify);
                 if (mntns_fd < 0)
-                        return -errno;
+                        return mntns_fd;
         }
 
         if (ret_netns_fd) {
-                const char *netns;
-
-                netns = pid_namespace_path(pidref->pid, NAMESPACE_NET);
-                netns_fd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+                netns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_NET, &need_verify);
                 if (netns_fd < 0)
-                        return -errno;
+                        return netns_fd;
         }
 
         if (ret_userns_fd) {
-                const char *userns;
-
-                userns = pid_namespace_path(pidref->pid, NAMESPACE_USER);
-                userns_fd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
-                if (userns_fd < 0 && errno != ENOENT)
-                        return -errno;
+                userns_fd = pidref_namespace_open_by_type_internal(pidref, NAMESPACE_USER, &need_verify);
+                if (userns_fd < 0 && !IN_SET(userns_fd, -ENOENT, -ENOPKG))
+                        return userns_fd;
         }
 
         if (ret_root_fd) {
                 const char *root;
 
                 root = procfs_file_alloca(pidref->pid, "root");
-                root_fd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+                root_fd = RET_NERRNO(open(root, O_CLOEXEC|O_DIRECTORY));
+                if (root_fd == -ENOENT && proc_mounted() == 0)
+                        return -ENOSYS;
                 if (root_fd < 0)
-                        return -errno;
+                        return root_fd;
+
+                need_verify = true;
         }
 
-        r = pidref_verify(pidref);
-        if (r < 0)
-                return r;
+        if (need_verify) {
+                r = pidref_verify(pidref);
+                if (r < 0)
+                        return r;
+        }
 
         if (ret_pidns_fd)
                 *ret_pidns_fd = TAKE_FD(pidns_fd);
@@ -488,22 +540,6 @@ int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_r
         return 0;
 }
 
-int namespace_open_by_type(NamespaceType type) {
-        const char *p;
-        int fd;
-
-        assert(type >= 0);
-        assert(type < _NAMESPACE_TYPE_MAX);
-
-        p = pid_namespace_path(0, type);
-
-        fd = RET_NERRNO(open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC));
-        if (fd == -ENOENT && proc_mounted() == 0)
-                return -ENOSYS;
-
-        return fd;
-}
-
 int is_idmapping_supported(const char *path) {
         _cleanup_close_ int mount_fd = -EBADF, userns_fd = -EBADF, dir_fd = -EBADF;
         _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
index 7b6f5f92d5425ddde1ca37e9dc152cc71922cc4f..ccc42bc40c0a8fc2c0cc9b79519274827759d7d9 100644 (file)
@@ -23,12 +23,16 @@ enum NamespaceType {
 extern const struct namespace_info {
         const char *proc_name;
         const char *proc_path;
-        unsigned int clone_flag;
+        unsigned long clone_flag;
+        unsigned long pidfd_get_ns_ioctl_cmd;
         ino_t root_inode;
 } namespace_info[_NAMESPACE_TYPE_MAX + 1];
 
 NamespaceType clone_flag_to_namespace_type(unsigned long clone_flag);
 
+int pidref_namespace_open_by_type(const PidRef *pidref, NamespaceType type);
+int namespace_open_by_type(NamespaceType type);
+
 int pidref_namespace_open(
                 const PidRef *pidref,
                 int *ret_pidns_fd,
@@ -79,6 +83,4 @@ int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type);
 
 int parse_userns_uid_range(const char *s, uid_t *ret_uid_shift, uid_t *ret_uid_range);
 
-int namespace_open_by_type(NamespaceType type);
-
 int is_idmapping_supported(const char *path);