]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
0cb8e3d1 LP |
2 | |
3 | #include <fcntl.h> | |
f5947a5e | 4 | #include <sys/ioctl.h> |
e2ec9c4d | 5 | #include <sys/mount.h> |
0cb8e3d1 | 6 | |
7c248223 | 7 | #include "errno-util.h" |
0cb8e3d1 | 8 | #include "fd-util.h" |
979b0ff2 | 9 | #include "fileio.h" |
f5947a5e YW |
10 | #include "missing_fs.h" |
11 | #include "missing_magic.h" | |
c3b9c418 | 12 | #include "missing_sched.h" |
0cb8e3d1 LP |
13 | #include "namespace-util.h" |
14 | #include "process-util.h" | |
15 | #include "stat-util.h" | |
6bc352af | 16 | #include "stdio-util.h" |
0cb8e3d1 LP |
17 | #include "user-util.h" |
18 | ||
c3b9c418 CB |
19 | const struct namespace_info namespace_info[] = { |
20 | [NAMESPACE_CGROUP] = { "cgroup", "ns/cgroup", CLONE_NEWCGROUP, }, | |
21 | [NAMESPACE_IPC] = { "ipc", "ns/ipc", CLONE_NEWIPC, }, | |
22 | [NAMESPACE_NET] = { "net", "ns/net", CLONE_NEWNET, }, | |
23 | /* So, the mount namespace flag is called CLONE_NEWNS for historical | |
24 | * reasons. Let's expose it here under a more explanatory name: "mnt". | |
25 | * This is in-line with how the kernel exposes namespaces in /proc/$PID/ns. */ | |
26 | [NAMESPACE_MOUNT] = { "mnt", "ns/mnt", CLONE_NEWNS, }, | |
27 | [NAMESPACE_PID] = { "pid", "ns/pid", CLONE_NEWPID, }, | |
28 | [NAMESPACE_USER] = { "user", "ns/user", CLONE_NEWUSER, }, | |
29 | [NAMESPACE_UTS] = { "uts", "ns/uts", CLONE_NEWUTS, }, | |
30 | [NAMESPACE_TIME] = { "time", "ns/time", CLONE_NEWTIME, }, | |
31 | { /* Allow callers to iterate over the array without using _NAMESPACE_TYPE_MAX. */ }, | |
32 | }; | |
33 | ||
34 | #define pid_namespace_path(pid, type) procfs_file_alloca(pid, namespace_info[type].proc_path) | |
35 | ||
0cb8e3d1 | 36 | int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) { |
254d1313 ZJS |
37 | _cleanup_close_ int pidnsfd = -EBADF, mntnsfd = -EBADF, netnsfd = -EBADF, usernsfd = -EBADF; |
38 | int rfd = -EBADF; | |
0cb8e3d1 LP |
39 | |
40 | assert(pid >= 0); | |
41 | ||
42 | if (mntns_fd) { | |
43 | const char *mntns; | |
44 | ||
c3b9c418 | 45 | mntns = pid_namespace_path(pid, NAMESPACE_MOUNT); |
0cb8e3d1 LP |
46 | mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC); |
47 | if (mntnsfd < 0) | |
48 | return -errno; | |
49 | } | |
50 | ||
51 | if (pidns_fd) { | |
52 | const char *pidns; | |
53 | ||
c3b9c418 | 54 | pidns = pid_namespace_path(pid, NAMESPACE_PID); |
0cb8e3d1 LP |
55 | pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC); |
56 | if (pidnsfd < 0) | |
57 | return -errno; | |
58 | } | |
59 | ||
60 | if (netns_fd) { | |
61 | const char *netns; | |
62 | ||
c3b9c418 | 63 | netns = pid_namespace_path(pid, NAMESPACE_NET); |
0cb8e3d1 LP |
64 | netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC); |
65 | if (netnsfd < 0) | |
66 | return -errno; | |
67 | } | |
68 | ||
69 | if (userns_fd) { | |
70 | const char *userns; | |
71 | ||
c3b9c418 | 72 | userns = pid_namespace_path(pid, NAMESPACE_USER); |
0cb8e3d1 LP |
73 | usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC); |
74 | if (usernsfd < 0 && errno != ENOENT) | |
75 | return -errno; | |
76 | } | |
77 | ||
78 | if (root_fd) { | |
79 | const char *root; | |
80 | ||
81 | root = procfs_file_alloca(pid, "root"); | |
82 | rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); | |
83 | if (rfd < 0) | |
84 | return -errno; | |
85 | } | |
86 | ||
87 | if (pidns_fd) | |
04c84cd1 | 88 | *pidns_fd = TAKE_FD(pidnsfd); |
0cb8e3d1 LP |
89 | |
90 | if (mntns_fd) | |
04c84cd1 | 91 | *mntns_fd = TAKE_FD(mntnsfd); |
0cb8e3d1 LP |
92 | |
93 | if (netns_fd) | |
04c84cd1 | 94 | *netns_fd = TAKE_FD(netnsfd); |
0cb8e3d1 LP |
95 | |
96 | if (userns_fd) | |
04c84cd1 | 97 | *userns_fd = TAKE_FD(usernsfd); |
0cb8e3d1 LP |
98 | |
99 | if (root_fd) | |
04c84cd1 | 100 | *root_fd = TAKE_FD(rfd); |
0cb8e3d1 LP |
101 | |
102 | return 0; | |
103 | } | |
104 | ||
105 | int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) { | |
6bc352af ZJS |
106 | int r; |
107 | ||
0cb8e3d1 | 108 | if (userns_fd >= 0) { |
6bc352af ZJS |
109 | /* Can't setns to your own userns, since then you could escalate from non-root to root in |
110 | * your own namespace, so check if namespaces are equal before attempting to enter. */ | |
0cb8e3d1 | 111 | |
ddb6eeaf | 112 | r = files_same(FORMAT_PROC_FD_PATH(userns_fd), "/proc/self/ns/user", 0); |
0cb8e3d1 LP |
113 | if (r < 0) |
114 | return r; | |
115 | if (r) | |
254d1313 | 116 | userns_fd = -EBADF; |
0cb8e3d1 LP |
117 | } |
118 | ||
119 | if (pidns_fd >= 0) | |
120 | if (setns(pidns_fd, CLONE_NEWPID) < 0) | |
121 | return -errno; | |
122 | ||
123 | if (mntns_fd >= 0) | |
124 | if (setns(mntns_fd, CLONE_NEWNS) < 0) | |
125 | return -errno; | |
126 | ||
127 | if (netns_fd >= 0) | |
128 | if (setns(netns_fd, CLONE_NEWNET) < 0) | |
129 | return -errno; | |
130 | ||
131 | if (userns_fd >= 0) | |
132 | if (setns(userns_fd, CLONE_NEWUSER) < 0) | |
133 | return -errno; | |
134 | ||
135 | if (root_fd >= 0) { | |
136 | if (fchdir(root_fd) < 0) | |
137 | return -errno; | |
138 | ||
139 | if (chroot(".") < 0) | |
140 | return -errno; | |
141 | } | |
142 | ||
143 | return reset_uid_gid(); | |
144 | } | |
145 | ||
54c2459d | 146 | int fd_is_ns(int fd, unsigned long nsflag) { |
0cb8e3d1 LP |
147 | struct statfs s; |
148 | int r; | |
149 | ||
54c2459d XR |
150 | /* Checks whether the specified file descriptor refers to a namespace created by specifying nsflag in clone(). |
151 | * On old kernels there's no nice way to detect that, hence on those we'll return a recognizable error (EUCLEAN), | |
152 | * so that callers can handle this somewhat nicely. | |
0cb8e3d1 LP |
153 | * |
154 | * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not | |
155 | * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */ | |
156 | ||
157 | if (fstatfs(fd, &s) < 0) | |
158 | return -errno; | |
159 | ||
160 | if (!is_fs_type(&s, NSFS_MAGIC)) { | |
161 | /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs | |
162 | * instead. Handle that in a somewhat smart way. */ | |
163 | ||
164 | if (is_fs_type(&s, PROC_SUPER_MAGIC)) { | |
165 | struct statfs t; | |
166 | ||
167 | /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the | |
168 | * passed fd might refer to a network namespace, but we can't know for sure. In that case, | |
169 | * return a recognizable error. */ | |
170 | ||
171 | if (statfs("/proc/self/ns/net", &t) < 0) | |
172 | return -errno; | |
173 | ||
174 | if (s.f_type == t.f_type) | |
175 | return -EUCLEAN; /* It's possible, we simply don't know */ | |
176 | } | |
177 | ||
178 | return 0; /* No! */ | |
179 | } | |
180 | ||
181 | r = ioctl(fd, NS_GET_NSTYPE); | |
182 | if (r < 0) { | |
183 | if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */ | |
184 | return -EUCLEAN; | |
185 | ||
186 | return -errno; | |
187 | } | |
188 | ||
54c2459d | 189 | return (unsigned long) r == nsflag; |
0cb8e3d1 | 190 | } |
e2ec9c4d LP |
191 | |
192 | int detach_mount_namespace(void) { | |
193 | ||
194 | /* Detaches the mount namespace, disabling propagation from our namespace to the host */ | |
195 | ||
196 | if (unshare(CLONE_NEWNS) < 0) | |
197 | return -errno; | |
198 | ||
7c248223 | 199 | return RET_NERRNO(mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)); |
e2ec9c4d | 200 | } |
979b0ff2 LP |
201 | |
202 | int userns_acquire(const char *uid_map, const char *gid_map) { | |
203 | char path[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1]; | |
204 | _cleanup_(sigkill_waitp) pid_t pid = 0; | |
254d1313 | 205 | _cleanup_close_ int userns_fd = -EBADF; |
979b0ff2 LP |
206 | int r; |
207 | ||
208 | assert(uid_map); | |
209 | assert(gid_map); | |
210 | ||
211 | /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it, | |
212 | * and then kills the process again. This way we have a userns fd that is not bound to any | |
213 | * process. We can use that for file system mounts and similar. */ | |
214 | ||
215 | r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid); | |
216 | if (r < 0) | |
217 | return r; | |
218 | if (r == 0) | |
219 | /* Child. We do nothing here, just freeze until somebody kills us. */ | |
220 | freeze(); | |
221 | ||
222 | xsprintf(path, "/proc/" PID_FMT "/uid_map", pid); | |
223 | r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER); | |
224 | if (r < 0) | |
225 | return log_error_errno(r, "Failed to write UID map: %m"); | |
226 | ||
227 | xsprintf(path, "/proc/" PID_FMT "/gid_map", pid); | |
228 | r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER); | |
229 | if (r < 0) | |
230 | return log_error_errno(r, "Failed to write GID map: %m"); | |
231 | ||
232 | r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL); | |
233 | if (r < 0) | |
688efe77 | 234 | return log_error_errno(r, "Failed to open userns fd: %m"); |
979b0ff2 LP |
235 | |
236 | return TAKE_FD(userns_fd); | |
237 | ||
238 | } | |
2fe299a3 CB |
239 | |
240 | int in_same_namespace(pid_t pid1, pid_t pid2, NamespaceType type) { | |
241 | const char *ns_path; | |
242 | struct stat ns_st1, ns_st2; | |
243 | ||
244 | if (pid1 == 0) | |
245 | pid1 = getpid_cached(); | |
246 | ||
247 | if (pid2 == 0) | |
248 | pid2 = getpid_cached(); | |
249 | ||
250 | if (pid1 == pid2) | |
251 | return 1; | |
252 | ||
253 | ns_path = pid_namespace_path(pid1, type); | |
254 | if (stat(ns_path, &ns_st1) < 0) | |
255 | return -errno; | |
256 | ||
257 | ns_path = pid_namespace_path(pid2, type); | |
258 | if (stat(ns_path, &ns_st2) < 0) | |
259 | return -errno; | |
260 | ||
261 | return stat_inode_same(&ns_st1, &ns_st2); | |
262 | } |