]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/namespace-util.c
basic/namespae-util: avoid one allocation
[thirdparty/systemd.git] / src / basic / namespace-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <fcntl.h>
4 #include <sys/ioctl.h>
5 #include <sys/mount.h>
6
7 #include "fd-util.h"
8 #include "missing_fs.h"
9 #include "missing_magic.h"
10 #include "namespace-util.h"
11 #include "process-util.h"
12 #include "stat-util.h"
13 #include "stdio-util.h"
14 #include "user-util.h"
15
16 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
17 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
18 int rfd = -1;
19
20 assert(pid >= 0);
21
22 if (mntns_fd) {
23 const char *mntns;
24
25 mntns = procfs_file_alloca(pid, "ns/mnt");
26 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
27 if (mntnsfd < 0)
28 return -errno;
29 }
30
31 if (pidns_fd) {
32 const char *pidns;
33
34 pidns = procfs_file_alloca(pid, "ns/pid");
35 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
36 if (pidnsfd < 0)
37 return -errno;
38 }
39
40 if (netns_fd) {
41 const char *netns;
42
43 netns = procfs_file_alloca(pid, "ns/net");
44 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
45 if (netnsfd < 0)
46 return -errno;
47 }
48
49 if (userns_fd) {
50 const char *userns;
51
52 userns = procfs_file_alloca(pid, "ns/user");
53 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
54 if (usernsfd < 0 && errno != ENOENT)
55 return -errno;
56 }
57
58 if (root_fd) {
59 const char *root;
60
61 root = procfs_file_alloca(pid, "root");
62 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
63 if (rfd < 0)
64 return -errno;
65 }
66
67 if (pidns_fd)
68 *pidns_fd = TAKE_FD(pidnsfd);
69
70 if (mntns_fd)
71 *mntns_fd = TAKE_FD(mntnsfd);
72
73 if (netns_fd)
74 *netns_fd = TAKE_FD(netnsfd);
75
76 if (userns_fd)
77 *userns_fd = TAKE_FD(usernsfd);
78
79 if (root_fd)
80 *root_fd = TAKE_FD(rfd);
81
82 return 0;
83 }
84
85 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
86 int r;
87
88 if (userns_fd >= 0) {
89 /* Can't setns to your own userns, since then you could escalate from non-root to root in
90 * your own namespace, so check if namespaces are equal before attempting to enter. */
91
92 char userns_fd_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
93 xsprintf(userns_fd_path, "/proc/self/fd/%d", userns_fd);
94 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
95 if (r < 0)
96 return r;
97 if (r)
98 userns_fd = -1;
99 }
100
101 if (pidns_fd >= 0)
102 if (setns(pidns_fd, CLONE_NEWPID) < 0)
103 return -errno;
104
105 if (mntns_fd >= 0)
106 if (setns(mntns_fd, CLONE_NEWNS) < 0)
107 return -errno;
108
109 if (netns_fd >= 0)
110 if (setns(netns_fd, CLONE_NEWNET) < 0)
111 return -errno;
112
113 if (userns_fd >= 0)
114 if (setns(userns_fd, CLONE_NEWUSER) < 0)
115 return -errno;
116
117 if (root_fd >= 0) {
118 if (fchdir(root_fd) < 0)
119 return -errno;
120
121 if (chroot(".") < 0)
122 return -errno;
123 }
124
125 return reset_uid_gid();
126 }
127
128 int fd_is_ns(int fd, unsigned long nsflag) {
129 struct statfs s;
130 int r;
131
132 /* Checks whether the specified file descriptor refers to a namespace created by specifying nsflag in clone().
133 * On old kernels there's no nice way to detect that, hence on those we'll return a recognizable error (EUCLEAN),
134 * so that callers can handle this somewhat nicely.
135 *
136 * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not
137 * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */
138
139 if (fstatfs(fd, &s) < 0)
140 return -errno;
141
142 if (!is_fs_type(&s, NSFS_MAGIC)) {
143 /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs
144 * instead. Handle that in a somewhat smart way. */
145
146 if (is_fs_type(&s, PROC_SUPER_MAGIC)) {
147 struct statfs t;
148
149 /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the
150 * passed fd might refer to a network namespace, but we can't know for sure. In that case,
151 * return a recognizable error. */
152
153 if (statfs("/proc/self/ns/net", &t) < 0)
154 return -errno;
155
156 if (s.f_type == t.f_type)
157 return -EUCLEAN; /* It's possible, we simply don't know */
158 }
159
160 return 0; /* No! */
161 }
162
163 r = ioctl(fd, NS_GET_NSTYPE);
164 if (r < 0) {
165 if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */
166 return -EUCLEAN;
167
168 return -errno;
169 }
170
171 return (unsigned long) r == nsflag;
172 }
173
174 int detach_mount_namespace(void) {
175
176 /* Detaches the mount namespace, disabling propagation from our namespace to the host */
177
178 if (unshare(CLONE_NEWNS) < 0)
179 return -errno;
180
181 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0)
182 return -errno;
183
184 return 0;
185 }