src/basic/namespace-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <fcntl.h>
   4 #include <sys/ioctl.h>
   5 #include <sys/mount.h>
   6
   7 #include "errno-util.h"
   8 #include "fd-util.h"
   9 #include "fileio.h"
  10 #include "missing_fs.h"
  11 #include "missing_magic.h"
  12 #include "missing_sched.h"
  13 #include "namespace-util.h"
  14 #include "process-util.h"
  15 #include "stat-util.h"
  16 #include "stdio-util.h"
  17 #include "user-util.h"
  18
  19 const struct namespace_info namespace_info[] = {
  20         [NAMESPACE_CGROUP] =  { "cgroup", "ns/cgroup", CLONE_NEWCGROUP,                          },
  21         [NAMESPACE_IPC]    =  { "ipc",    "ns/ipc",    CLONE_NEWIPC,                             },
  22         [NAMESPACE_NET]    =  { "net",    "ns/net",    CLONE_NEWNET,                             },
  23         /* So, the mount namespace flag is called CLONE_NEWNS for historical
  24          * reasons. Let's expose it here under a more explanatory name: "mnt".
  25          * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
  26         [NAMESPACE_MOUNT]  =  { "mnt",    "ns/mnt",    CLONE_NEWNS,                              },
  27         [NAMESPACE_PID]    =  { "pid",    "ns/pid",    CLONE_NEWPID,                             },
  28         [NAMESPACE_USER]   =  { "user",   "ns/user",   CLONE_NEWUSER,                            },
  29         [NAMESPACE_UTS]    =  { "uts",    "ns/uts",    CLONE_NEWUTS,                             },
  30         [NAMESPACE_TIME]   =  { "time",   "ns/time",   CLONE_NEWTIME,                            },
  31         { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */       },
  32 };
  33
  34 #define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
  35
  36 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
  37         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
  38         int rfd = -1;
  39
  40         assert(pid >= 0);
  41
  42         if (mntns_fd) {
  43                 const char *mntns;
  44
  45                 mntns = pid_namespace_path(pid, NAMESPACE_MOUNT);
  46                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
  47                 if (mntnsfd < 0)
  48                         return -errno;
  49         }
  50
  51         if (pidns_fd) {
  52                 const char *pidns;
  53
  54                 pidns = pid_namespace_path(pid, NAMESPACE_PID);
  55                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
  56                 if (pidnsfd < 0)
  57                         return -errno;
  58         }
  59
  60         if (netns_fd) {
  61                 const char *netns;
  62
  63                 netns = pid_namespace_path(pid, NAMESPACE_NET);
  64                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
  65                 if (netnsfd < 0)
  66                         return -errno;
  67         }
  68
  69         if (userns_fd) {
  70                 const char *userns;
  71
  72                 userns = pid_namespace_path(pid, NAMESPACE_USER);
  73                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
  74                 if (usernsfd < 0 && errno != ENOENT)
  75                         return -errno;
  76         }
  77
  78         if (root_fd) {
  79                 const char *root;
  80
  81                 root = procfs_file_alloca(pid, "root");
  82                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
  83                 if (rfd < 0)
  84                         return -errno;
  85         }
  86
  87         if (pidns_fd)
  88                 *pidns_fd = TAKE_FD(pidnsfd);
  89
  90         if (mntns_fd)
  91                 *mntns_fd = TAKE_FD(mntnsfd);
  92
  93         if (netns_fd)
  94                 *netns_fd = TAKE_FD(netnsfd);
  95
  96         if (userns_fd)
  97                 *userns_fd = TAKE_FD(usernsfd);
  98
  99         if (root_fd)
 100                 *root_fd = TAKE_FD(rfd);
 101
 102         return 0;
 103 }
 104
 105 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
 106         int r;
 107
 108         if (userns_fd >= 0) {
 109                 /* Can't setns to your own userns, since then you could escalate from non-root to root in
 110                  * your own namespace, so check if namespaces are equal before attempting to enter. */
 111
 112                 r = files_same(FORMAT_PROC_FD_PATH(userns_fd), "/proc/self/ns/user", 0);
 113                 if (r < 0)
 114                         return r;
 115                 if (r)
 116                         userns_fd = -1;
 117         }
 118
 119         if (pidns_fd >= 0)
 120                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
 121                         return -errno;
 122
 123         if (mntns_fd >= 0)
 124                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
 125                         return -errno;
 126
 127         if (netns_fd >= 0)
 128                 if (setns(netns_fd, CLONE_NEWNET) < 0)
 129                         return -errno;
 130
 131         if (userns_fd >= 0)
 132                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
 133                         return -errno;
 134
 135         if (root_fd >= 0) {
 136                 if (fchdir(root_fd) < 0)
 137                         return -errno;
 138
 139                 if (chroot(".") < 0)
 140                         return -errno;
 141         }
 142
 143         return reset_uid_gid();
 144 }
 145
 146 int fd_is_ns(int fd, unsigned long nsflag) {
 147         struct statfs s;
 148         int r;
 149
 150         /* Checks whether the specified file descriptor refers to a namespace created by specifying nsflag in clone().
 151          * On old kernels there's no nice way to detect that, hence on those we'll return a recognizable error (EUCLEAN),
 152          * so that callers can handle this somewhat nicely.
 153          *
 154          * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not
 155          * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */
 156
 157         if (fstatfs(fd, &s) < 0)
 158                 return -errno;
 159
 160         if (!is_fs_type(&s, NSFS_MAGIC)) {
 161                 /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs
 162                  * instead. Handle that in a somewhat smart way. */
 163
 164                 if (is_fs_type(&s, PROC_SUPER_MAGIC)) {
 165                         struct statfs t;
 166
 167                         /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the
 168                          * passed fd might refer to a network namespace, but we can't know for sure. In that case,
 169                          * return a recognizable error. */
 170
 171                         if (statfs("/proc/self/ns/net", &t) < 0)
 172                                 return -errno;
 173
 174                         if (s.f_type == t.f_type)
 175                                 return -EUCLEAN; /* It's possible, we simply don't know */
 176                 }
 177
 178                 return 0; /* No! */
 179         }
 180
 181         r = ioctl(fd, NS_GET_NSTYPE);
 182         if (r < 0) {
 183                 if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */
 184                         return -EUCLEAN;
 185
 186                 return -errno;
 187         }
 188
 189         return (unsigned long) r == nsflag;
 190 }
 191
 192 int detach_mount_namespace(void) {
 193
 194         /* Detaches the mount namespace, disabling propagation from our namespace to the host */
 195
 196         if (unshare(CLONE_NEWNS) < 0)
 197                 return -errno;
 198
 199         return RET_NERRNO(mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL));
 200 }
 201
 202 int userns_acquire(const char *uid_map, const char *gid_map) {
 203         char path[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1];
 204         _cleanup_(sigkill_waitp) pid_t pid = 0;
 205         _cleanup_close_ int userns_fd = -1;
 206         int r;
 207
 208         assert(uid_map);
 209         assert(gid_map);
 210
 211         /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
 212          * and then kills the process again. This way we have a userns fd that is not bound to any
 213          * process. We can use that for file system mounts and similar. */
 214
 215         r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid);
 216         if (r < 0)
 217                 return r;
 218         if (r == 0)
 219                 /* Child. We do nothing here, just freeze until somebody kills us. */
 220                 freeze();
 221
 222         xsprintf(path, "/proc/" PID_FMT "/uid_map", pid);
 223         r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
 224         if (r < 0)
 225                 return log_error_errno(r, "Failed to write UID map: %m");
 226
 227         xsprintf(path, "/proc/" PID_FMT "/gid_map", pid);
 228         r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
 229         if (r < 0)
 230                 return log_error_errno(r, "Failed to write GID map: %m");
 231
 232         r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL);
 233         if (r < 0)
 234                 return log_error_errno(r, "Failed to open userns fd: %m");
 235
 236         return TAKE_FD(userns_fd);
 237
 238 }
 239
 240 int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) {
 241         const char *ns_path;
 242         struct stat ns_st1, ns_st2;
 243
 244         if (pid1 == 0)
 245                 pid1 = getpid_cached();
 246
 247         if (pid2 == 0)
 248                 pid2 = getpid_cached();
 249
 250         if (pid1 == pid2)
 251                 return 1;
 252
 253         ns_path = pid_namespace_path(pid1, type);
 254         if (stat(ns_path, &ns_st1) < 0)
 255                 return -errno;
 256
 257         ns_path = pid_namespace_path(pid2, type);
 258         if (stat(ns_path, &ns_st2) < 0)
 259                 return -errno;
 260
 261         return stat_inode_same(&ns_st1, &ns_st2);
 262 }