1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #include "errno-util.h"
10 #include "missing_fs.h"
11 #include "missing_magic.h"
12 #include "missing_sched.h"
13 #include "namespace-util.h"
14 #include "process-util.h"
15 #include "stat-util.h"
16 #include "stdio-util.h"
17 #include "user-util.h"
19 const struct namespace_info namespace_info
[] = {
20 [NAMESPACE_CGROUP
] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP
, },
21 [NAMESPACE_IPC
] = { "ipc", "ns/ipc", CLONE_NEWIPC
, },
22 [NAMESPACE_NET
] = { "net", "ns/net", CLONE_NEWNET
, },
23 /* So, the mount namespace flag is called CLONE_NEWNS for historical
24 * reasons. Let's expose it here under a more explanatory name: "mnt".
25 * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */
26 [NAMESPACE_MOUNT
] = { "mnt", "ns/mnt", CLONE_NEWNS
, },
27 [NAMESPACE_PID
] = { "pid", "ns/pid", CLONE_NEWPID
, },
28 [NAMESPACE_USER
] = { "user", "ns/user", CLONE_NEWUSER
, },
29 [NAMESPACE_UTS
] = { "uts", "ns/uts", CLONE_NEWUTS
, },
30 [NAMESPACE_TIME
] = { "time", "ns/time", CLONE_NEWTIME
, },
31 { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ },
34 #define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path)
36 int namespace_open(pid_t pid
, int *pidns_fd
, int *mntns_fd
, int *netns_fd
, int *userns_fd
, int *root_fd
) {
37 _cleanup_close_
int pidnsfd
= -EBADF
, mntnsfd
= -EBADF
, netnsfd
= -EBADF
, usernsfd
= -EBADF
;
45 mntns
= pid_namespace_path(pid
, NAMESPACE_MOUNT
);
46 mntnsfd
= open(mntns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
54 pidns
= pid_namespace_path(pid
, NAMESPACE_PID
);
55 pidnsfd
= open(pidns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
63 netns
= pid_namespace_path(pid
, NAMESPACE_NET
);
64 netnsfd
= open(netns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
72 userns
= pid_namespace_path(pid
, NAMESPACE_USER
);
73 usernsfd
= open(userns
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
);
74 if (usernsfd
< 0 && errno
!= ENOENT
)
81 root
= procfs_file_alloca(pid
, "root");
82 rfd
= open(root
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
88 *pidns_fd
= TAKE_FD(pidnsfd
);
91 *mntns_fd
= TAKE_FD(mntnsfd
);
94 *netns_fd
= TAKE_FD(netnsfd
);
97 *userns_fd
= TAKE_FD(usernsfd
);
100 *root_fd
= TAKE_FD(rfd
);
105 int namespace_enter(int pidns_fd
, int mntns_fd
, int netns_fd
, int userns_fd
, int root_fd
) {
108 if (userns_fd
>= 0) {
109 /* Can't setns to your own userns, since then you could escalate from non-root to root in
110 * your own namespace, so check if namespaces are equal before attempting to enter. */
112 r
= files_same(FORMAT_PROC_FD_PATH(userns_fd
), "/proc/self/ns/user", 0);
120 if (setns(pidns_fd
, CLONE_NEWPID
) < 0)
124 if (setns(mntns_fd
, CLONE_NEWNS
) < 0)
128 if (setns(netns_fd
, CLONE_NEWNET
) < 0)
132 if (setns(userns_fd
, CLONE_NEWUSER
) < 0)
136 if (fchdir(root_fd
) < 0)
143 return reset_uid_gid();
146 int fd_is_ns(int fd
, unsigned long nsflag
) {
150 /* Checks whether the specified file descriptor refers to a namespace created by specifying nsflag in clone().
151 * On old kernels there's no nice way to detect that, hence on those we'll return a recognizable error (EUCLEAN),
152 * so that callers can handle this somewhat nicely.
154 * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not
155 * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */
157 if (fstatfs(fd
, &s
) < 0)
160 if (!is_fs_type(&s
, NSFS_MAGIC
)) {
161 /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs
162 * instead. Handle that in a somewhat smart way. */
164 if (is_fs_type(&s
, PROC_SUPER_MAGIC
)) {
167 /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the
168 * passed fd might refer to a network namespace, but we can't know for sure. In that case,
169 * return a recognizable error. */
171 if (statfs("/proc/self/ns/net", &t
) < 0)
174 if (s
.f_type
== t
.f_type
)
175 return -EUCLEAN
; /* It's possible, we simply don't know */
181 r
= ioctl(fd
, NS_GET_NSTYPE
);
183 if (errno
== ENOTTY
) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */
189 return (unsigned long) r
== nsflag
;
192 int detach_mount_namespace(void) {
193 /* Detaches the mount namespace, disabling propagation from our namespace to the host. Sets
194 * propagation first to MS_SLAVE for all mounts (disabling propagation), and then back to MS_SHARED
195 * (so that we create a new peer group). */
197 if (unshare(CLONE_NEWNS
) < 0)
198 return log_debug_errno(errno
, "Failed to acquire mount namespace: %m");
200 if (mount(NULL
, "/", NULL
, MS_SLAVE
| MS_REC
, NULL
) < 0)
201 return log_debug_errno(errno
, "Failed to set mount propagation to MS_SLAVE for all mounts: %m");
203 if (mount(NULL
, "/", NULL
, MS_SHARED
| MS_REC
, NULL
) < 0)
204 return log_debug_errno(errno
, "Failed to set mount propagation back to MS_SHARED for all mounts: %m");
209 int userns_acquire(const char *uid_map
, const char *gid_map
) {
210 char path
[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t
) + 1];
211 _cleanup_(sigkill_waitp
) pid_t pid
= 0;
212 _cleanup_close_
int userns_fd
= -EBADF
;
218 /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
219 * and then kills the process again. This way we have a userns fd that is not bound to any
220 * process. We can use that for file system mounts and similar. */
222 r
= safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS
|FORK_DEATHSIG
|FORK_NEW_USERNS
, &pid
);
226 /* Child. We do nothing here, just freeze until somebody kills us. */
229 xsprintf(path
, "/proc/" PID_FMT
"/uid_map", pid
);
230 r
= write_string_file(path
, uid_map
, WRITE_STRING_FILE_DISABLE_BUFFER
);
232 return log_error_errno(r
, "Failed to write UID map: %m");
234 xsprintf(path
, "/proc/" PID_FMT
"/gid_map", pid
);
235 r
= write_string_file(path
, gid_map
, WRITE_STRING_FILE_DISABLE_BUFFER
);
237 return log_error_errno(r
, "Failed to write GID map: %m");
239 r
= namespace_open(pid
, NULL
, NULL
, NULL
, &userns_fd
, NULL
);
241 return log_error_errno(r
, "Failed to open userns fd: %m");
243 return TAKE_FD(userns_fd
);
247 int in_same_namespace(pid_t pid1
, pid_t pid2
, NamespaceType type
) {
249 struct stat ns_st1
, ns_st2
;
252 pid1
= getpid_cached();
255 pid2
= getpid_cached();
260 ns_path
= pid_namespace_path(pid1
, type
);
261 if (stat(ns_path
, &ns_st1
) < 0)
264 ns_path
= pid_namespace_path(pid2
, type
);
265 if (stat(ns_path
, &ns_st2
) < 0)
268 return stat_inode_same(&ns_st1
, &ns_st2
);