]>
Commit | Line | Data |
---|---|---|
4205f1fd MG |
1 | /* |
2 | * unshare(1) - command-line interface for unshare(2) | |
3 | * | |
4 | * Copyright (C) 2009 Mikhail Gusarov <dottedmag@dottedmag.net> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms of the GNU General Public License as published by the | |
8 | * Free Software Foundation; either version 2, or (at your option) any | |
9 | * later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but | |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License along | |
17 | * with this program; if not, write to the Free Software Foundation, Inc., | |
7cebf0bb | 18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
4205f1fd MG |
19 | */ |
20 | ||
4205f1fd MG |
21 | #include <errno.h> |
22 | #include <getopt.h> | |
23 | #include <sched.h> | |
24 | #include <stdio.h> | |
25 | #include <stdlib.h> | |
26 | #include <unistd.h> | |
ff5dc96e | 27 | #include <sys/eventfd.h> |
5088ec33 | 28 | #include <sys/wait.h> |
6728ca10 | 29 | #include <sys/mount.h> |
c84f2590 KZ |
30 | #include <sys/types.h> |
31 | #include <sys/stat.h> | |
8e8f0fa5 | 32 | #include <sys/prctl.h> |
f0af42b5 | 33 | #include <grp.h> |
c84f2590 | 34 | |
d754315c RM |
35 | /* we only need some defines missing in sys/mount.h, no libmount linkage */ |
36 | #include <libmount.h> | |
37 | ||
4205f1fd | 38 | #include "nls.h" |
eb76ca98 | 39 | #include "c.h" |
cef4decf | 40 | #include "caputils.h" |
efb8854f | 41 | #include "closestream.h" |
c91280a4 | 42 | #include "namespace.h" |
57580694 | 43 | #include "exec_shell.h" |
4da21e37 LR |
44 | #include "xalloc.h" |
45 | #include "pathnames.h" | |
46 | #include "all-io.h" | |
8b39a17c | 47 | #include "signames.h" |
f0af42b5 | 48 | #include "strutils.h" |
987550cb | 49 | #include "pwdutils.h" |
4da21e37 | 50 | |
99fcafdf YK |
51 | /* synchronize parent and child by pipe */ |
52 | #define PIPE_SYNC_BYTE 0x06 | |
53 | ||
f0f22e9c KZ |
54 | /* 'private' is kernel default */ |
55 | #define UNSHARE_PROPAGATION_DEFAULT (MS_REC | MS_PRIVATE) | |
56 | ||
0490a6ca KZ |
57 | /* /proc namespace files and mountpoints for binds */ |
58 | static struct namespace_file { | |
59 | int type; /* CLONE_NEW* */ | |
60 | const char *name; /* ns/<type> */ | |
61 | const char *target; /* user specified target for bind mount */ | |
62 | } namespace_files[] = { | |
f9e7b66d SH |
63 | { .type = CLONE_NEWUSER, .name = "ns/user" }, |
64 | { .type = CLONE_NEWCGROUP,.name = "ns/cgroup" }, | |
65 | { .type = CLONE_NEWIPC, .name = "ns/ipc" }, | |
66 | { .type = CLONE_NEWUTS, .name = "ns/uts" }, | |
67 | { .type = CLONE_NEWNET, .name = "ns/net" }, | |
0d5260b6 | 68 | { .type = CLONE_NEWPID, .name = "ns/pid_for_children" }, |
f9e7b66d | 69 | { .type = CLONE_NEWNS, .name = "ns/mnt" }, |
0d5260b6 | 70 | { .type = CLONE_NEWTIME, .name = "ns/time_for_children" }, |
0490a6ca KZ |
71 | { .name = NULL } |
72 | }; | |
73 | ||
74 | static int npersists; /* number of persistent namespaces */ | |
75 | ||
fbceefde KZ |
76 | enum { |
77 | SETGROUPS_NONE = -1, | |
78 | SETGROUPS_DENY = 0, | |
79 | SETGROUPS_ALLOW = 1, | |
80 | }; | |
81 | ||
82 | static const char *setgroups_strings[] = | |
83 | { | |
84 | [SETGROUPS_DENY] = "deny", | |
85 | [SETGROUPS_ALLOW] = "allow" | |
86 | }; | |
87 | ||
88 | static int setgroups_str2id(const char *str) | |
89 | { | |
90 | size_t i; | |
91 | ||
92 | for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++) | |
93 | if (strcmp(str, setgroups_strings[i]) == 0) | |
94 | return i; | |
95 | ||
96 | errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str); | |
97 | } | |
98 | ||
99 | static void setgroups_control(int action) | |
0bf15941 EB |
100 | { |
101 | const char *file = _PATH_PROC_SETGROUPS; | |
fbceefde | 102 | const char *cmd; |
0bf15941 EB |
103 | int fd; |
104 | ||
fbceefde KZ |
105 | if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings)) |
106 | return; | |
107 | cmd = setgroups_strings[action]; | |
108 | ||
0bf15941 EB |
109 | fd = open(file, O_WRONLY); |
110 | if (fd < 0) { | |
111 | if (errno == ENOENT) | |
112 | return; | |
7ff635bf | 113 | err(EXIT_FAILURE, _("cannot open %s"), file); |
0bf15941 EB |
114 | } |
115 | ||
fbceefde | 116 | if (write_all(fd, cmd, strlen(cmd))) |
0bf15941 EB |
117 | err(EXIT_FAILURE, _("write failed %s"), file); |
118 | close(fd); | |
119 | } | |
120 | ||
4da21e37 LR |
121 | static void map_id(const char *file, uint32_t from, uint32_t to) |
122 | { | |
123 | char *buf; | |
124 | int fd; | |
125 | ||
126 | fd = open(file, O_WRONLY); | |
127 | if (fd < 0) | |
128 | err(EXIT_FAILURE, _("cannot open %s"), file); | |
129 | ||
130 | xasprintf(&buf, "%u %u 1", from, to); | |
131 | if (write_all(fd, buf, strlen(buf))) | |
132 | err(EXIT_FAILURE, _("write failed %s"), file); | |
133 | free(buf); | |
134 | close(fd); | |
135 | } | |
4205f1fd | 136 | |
f0f22e9c KZ |
137 | static unsigned long parse_propagation(const char *str) |
138 | { | |
139 | size_t i; | |
140 | static const struct prop_opts { | |
141 | const char *name; | |
142 | unsigned long flag; | |
143 | } opts[] = { | |
144 | { "slave", MS_REC | MS_SLAVE }, | |
145 | { "private", MS_REC | MS_PRIVATE }, | |
146 | { "shared", MS_REC | MS_SHARED }, | |
147 | { "unchanged", 0 } | |
148 | }; | |
149 | ||
150 | for (i = 0; i < ARRAY_SIZE(opts); i++) { | |
151 | if (strcmp(opts[i].name, str) == 0) | |
152 | return opts[i].flag; | |
153 | } | |
154 | ||
155 | errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str); | |
156 | } | |
157 | ||
158 | static void set_propagation(unsigned long flags) | |
159 | { | |
160 | if (flags == 0) | |
161 | return; | |
162 | ||
163 | if (mount("none", "/", NULL, flags, NULL) != 0) | |
164 | err(EXIT_FAILURE, _("cannot change root filesystem propagation")); | |
165 | } | |
166 | ||
0490a6ca KZ |
167 | |
168 | static int set_ns_target(int type, const char *path) | |
169 | { | |
170 | struct namespace_file *ns; | |
171 | ||
172 | for (ns = namespace_files; ns->name; ns++) { | |
173 | if (ns->type != type) | |
174 | continue; | |
175 | ns->target = path; | |
176 | npersists++; | |
177 | return 0; | |
178 | } | |
179 | ||
180 | return -EINVAL; | |
181 | } | |
182 | ||
183 | static int bind_ns_files(pid_t pid) | |
184 | { | |
185 | struct namespace_file *ns; | |
186 | char src[PATH_MAX]; | |
187 | ||
188 | for (ns = namespace_files; ns->name; ns++) { | |
189 | if (!ns->target) | |
190 | continue; | |
191 | ||
192 | snprintf(src, sizeof(src), "/proc/%u/%s", (unsigned) pid, ns->name); | |
193 | ||
194 | if (mount(src, ns->target, NULL, MS_BIND, NULL) != 0) | |
195 | err(EXIT_FAILURE, _("mount %s on %s failed"), src, ns->target); | |
196 | } | |
197 | ||
198 | return 0; | |
199 | } | |
200 | ||
c84f2590 KZ |
201 | static ino_t get_mnt_ino(pid_t pid) |
202 | { | |
203 | struct stat st; | |
204 | char path[PATH_MAX]; | |
205 | ||
206 | snprintf(path, sizeof(path), "/proc/%u/ns/mnt", (unsigned) pid); | |
207 | ||
208 | if (stat(path, &st) != 0) | |
1293b0f6 | 209 | err(EXIT_FAILURE, _("stat of %s failed"), path); |
c84f2590 KZ |
210 | return st.st_ino; |
211 | } | |
212 | ||
be7df01a AR |
213 | static void settime(time_t offset, clockid_t clk_id) |
214 | { | |
215 | char buf[sizeof(stringify_value(ULONG_MAX)) * 3]; | |
216 | int fd, len; | |
217 | ||
218 | len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset); | |
219 | ||
220 | fd = open("/proc/self/timens_offsets", O_WRONLY); | |
221 | if (fd < 0) | |
222 | err(EXIT_FAILURE, _("failed to open /proc/self/timens_offsets")); | |
223 | ||
224 | if (write(fd, buf, len) != len) | |
225 | err(EXIT_FAILURE, _("failed to write to /proc/self/timens_offsets")); | |
226 | ||
227 | close(fd); | |
228 | } | |
229 | ||
82ea6298 SA |
230 | /** |
231 | * waitchild() - Wait for a process to exit successfully | |
232 | * @pid: PID of the process to wait for | |
233 | * | |
234 | * Wait for a process to exit successfully. If it exits with a non-zero return | |
235 | * code, then exit() with the same status. | |
236 | */ | |
237 | static void waitchild(int pid) | |
238 | { | |
239 | int rc, status; | |
240 | ||
241 | do { | |
242 | rc = waitpid(pid, &status, 0); | |
243 | if (rc < 0) { | |
244 | if (errno == EINTR) | |
245 | continue; | |
246 | err(EXIT_FAILURE, _("waitpid failed")); | |
247 | } | |
248 | if (WIFEXITED(status) && | |
249 | WEXITSTATUS(status) != EXIT_SUCCESS) | |
250 | exit(WEXITSTATUS(status)); | |
251 | } while (rc < 0); | |
252 | } | |
253 | ||
783bb52a SA |
254 | /** |
255 | * sync_with_child() - Tell our child we're ready and wait for it to exit | |
256 | * @pid: The pid of our child | |
257 | * @fd: A file descriptor created with eventfd() | |
258 | * | |
259 | * This tells a child created with fork_and_wait() that we are ready for it to | |
260 | * continue. Once we have done that, wait for our child to exit. | |
261 | */ | |
262 | static void sync_with_child(pid_t pid, int fd) | |
c84f2590 | 263 | { |
783bb52a SA |
264 | uint64_t ch = PIPE_SYNC_BYTE; |
265 | ||
266 | write_all(fd, &ch, sizeof(ch)); | |
267 | close(fd); | |
c84f2590 | 268 | |
783bb52a SA |
269 | waitchild(pid); |
270 | } | |
99fcafdf | 271 | |
783bb52a SA |
272 | /** |
273 | * fork_and_wait() - Fork and wait to be sync'd with | |
274 | * @fd - A file descriptor created with eventfd() which should be passed to | |
275 | * sync_with_child() | |
276 | * | |
277 | * This creates an eventfd and forks. The parent process returns immediately, | |
278 | * but the child waits for a %PIPE_SYNC_BYTE on the eventfd before returning. | |
279 | * This allows the parent to perform some tasks before the child starts its | |
280 | * work. The parent should call sync_with_child() once it is ready for the | |
281 | * child to continue. | |
282 | * | |
283 | * Return: The pid from fork() | |
284 | */ | |
285 | static pid_t fork_and_wait(int *fd) | |
286 | { | |
287 | pid_t pid; | |
288 | uint64_t ch; | |
c84f2590 | 289 | |
783bb52a SA |
290 | *fd = eventfd(0, 0); |
291 | if (*fd < 0) | |
292 | err(EXIT_FAILURE, _("eventfd failed")); | |
293 | ||
294 | pid = fork(); | |
295 | if (pid < 0) | |
c84f2590 | 296 | err(EXIT_FAILURE, _("fork failed")); |
99fcafdf | 297 | |
783bb52a SA |
298 | if (!pid) { |
299 | /* wait for the our parent to tell us to continue */ | |
300 | if (read_all(*fd, (char *)&ch, sizeof(ch)) != sizeof(ch) || | |
301 | ch != PIPE_SYNC_BYTE) | |
302 | err(EXIT_FAILURE, _("failed to read eventfd")); | |
303 | close(*fd); | |
c84f2590 | 304 | } |
783bb52a SA |
305 | |
306 | return pid; | |
307 | } | |
308 | ||
309 | static pid_t bind_ns_files_from_child(int *fd) | |
310 | { | |
311 | pid_t child, ppid = getpid(); | |
312 | ino_t ino = get_mnt_ino(ppid); | |
313 | ||
314 | child = fork_and_wait(fd); | |
315 | if (child) | |
316 | return child; | |
317 | ||
318 | if (get_mnt_ino(ppid) == ino) | |
319 | exit(EXIT_FAILURE); | |
320 | bind_ns_files(ppid); | |
321 | exit(EXIT_SUCCESS); | |
c84f2590 KZ |
322 | } |
323 | ||
987550cb MHB |
324 | static uid_t get_user(const char *s, const char *err) |
325 | { | |
326 | struct passwd *pw; | |
327 | char *buf = NULL; | |
328 | uid_t ret; | |
329 | ||
330 | pw = xgetpwnam(s, &buf); | |
331 | if (pw) { | |
332 | ret = pw->pw_uid; | |
333 | free(pw); | |
334 | free(buf); | |
335 | } else { | |
336 | ret = strtoul_or_err(s, err); | |
337 | } | |
338 | ||
339 | return ret; | |
340 | } | |
341 | ||
342 | static gid_t get_group(const char *s, const char *err) | |
343 | { | |
344 | struct group *gr; | |
345 | char *buf = NULL; | |
346 | gid_t ret; | |
347 | ||
348 | gr = xgetgrnam(s, &buf); | |
349 | if (gr) { | |
350 | ret = gr->gr_gid; | |
351 | free(gr); | |
352 | free(buf); | |
353 | } else { | |
354 | ret = strtoul_or_err(s, err); | |
355 | } | |
356 | ||
357 | return ret; | |
358 | } | |
359 | ||
ff5dc96e SA |
360 | /** |
361 | * struct map_range - A range of IDs to map | |
362 | * @outer: First ID inside the namespace | |
363 | * @inner: First ID outside the namespace | |
364 | * @count: Length of the inside and outside ranges | |
365 | * | |
366 | * A range of uids/gids to map using new[gu]idmap. | |
367 | */ | |
368 | struct map_range { | |
369 | unsigned int outer; | |
370 | unsigned int inner; | |
371 | unsigned int count; | |
372 | }; | |
373 | ||
374 | #define UID_BUFSIZ sizeof(stringify_value(ULONG_MAX)) | |
375 | ||
376 | /** | |
377 | * uint_to_id() - Convert a string into a user/group ID | |
378 | * @name: The string representation of the ID | |
379 | * @sz: The length of @name, without an (optional) nul-terminator | |
380 | * | |
381 | * This converts a (possibly not nul-terminated_ string into user or group ID. | |
382 | * No name lookup is performed. | |
383 | * | |
384 | * Return: @name as a numeric ID | |
385 | */ | |
386 | static int uint_to_id(const char *name, size_t sz) | |
387 | { | |
388 | char buf[UID_BUFSIZ]; | |
389 | ||
390 | mem2strcpy(buf, name, sz, sizeof(buf)); | |
391 | return strtoul_or_err(name, _("could not parse ID")); | |
392 | } | |
393 | ||
394 | /** | |
395 | * get_map_range() - Parse a mapping range from a string | |
396 | * @s: A string of the format upper,lower,count | |
397 | * | |
398 | * Parse a string of the form upper,lower,count into a new mapping range. | |
399 | * | |
400 | * Return: A new &struct map_range | |
401 | */ | |
402 | static struct map_range *get_map_range(const char *s) | |
403 | { | |
404 | int n, map[3]; | |
405 | struct map_range *ret; | |
406 | ||
407 | n = string_to_idarray(s, map, ARRAY_SIZE(map), uint_to_id); | |
408 | if (n < 0) | |
409 | errx(EXIT_FAILURE, _("too many elements for mapping '%s'"), s); | |
410 | if (n != ARRAY_SIZE(map)) | |
411 | errx(EXIT_FAILURE, _("mapping '%s' contains only %d elements"), | |
412 | s, n); | |
413 | ||
414 | ret = xmalloc(sizeof(*ret)); | |
415 | ret->outer = map[0]; | |
416 | ret->inner = map[1]; | |
417 | ret->count = map[2]; | |
418 | return ret; | |
419 | } | |
420 | ||
e67b0ba3 SA |
421 | /** |
422 | * read_subid_range() - Look up a user's sub[gu]id range | |
423 | * @filename: The file to look up the range from. This should be either | |
424 | * ``/etc/subuid`` or ``/etc/subgid``. | |
425 | * @uid: The uid of the user whose range we should look up. | |
426 | * | |
427 | * This finds the first subid range matching @uid in @filename. | |
428 | */ | |
429 | static struct map_range *read_subid_range(char *filename, uid_t uid) | |
430 | { | |
431 | char *line = NULL, *pwbuf; | |
432 | FILE *idmap; | |
433 | size_t n; | |
434 | struct passwd *pw; | |
435 | struct map_range *map; | |
436 | ||
437 | map = xmalloc(sizeof(*map)); | |
438 | map->inner = 0; | |
439 | ||
440 | pw = xgetpwuid(uid, &pwbuf); | |
441 | if (!pw) | |
442 | errx(EXIT_FAILURE, _("you (user %d) don't exist."), uid); | |
443 | ||
444 | idmap = fopen(filename, "r"); | |
445 | if (!idmap) | |
446 | err(EXIT_FAILURE, _("could not open '%s'"), filename); | |
447 | ||
448 | /* | |
449 | * Each line in sub[ug]idmap looks like | |
450 | * username:subuid:count | |
451 | * OR | |
452 | * uid:subuid:count | |
453 | */ | |
454 | while (getline(&line, &n, idmap) != -1) { | |
455 | char *rest, *s; | |
456 | ||
457 | rest = strchr(line, ':'); | |
458 | if (!rest) | |
459 | continue; | |
460 | *rest = '\0'; | |
461 | ||
462 | if (strcmp(line, pw->pw_name) && | |
463 | strtoul(line, NULL, 10) != pw->pw_uid) | |
464 | continue; | |
465 | ||
466 | s = rest + 1; | |
467 | rest = strchr(s, ':'); | |
468 | if (!rest) | |
469 | continue; | |
470 | *rest = '\0'; | |
471 | map->outer = strtoul_or_err(s, _("failed to parse subid map")); | |
472 | ||
473 | s = rest + 1; | |
474 | rest = strchr(s, '\n'); | |
475 | if (rest) | |
476 | *rest = '\0'; | |
477 | map->count = strtoul_or_err(s, _("failed to parse subid map")); | |
478 | ||
479 | fclose(idmap); | |
d504b862 KZ |
480 | free(pw); |
481 | free(pwbuf); | |
482 | ||
e67b0ba3 SA |
483 | return map; |
484 | } | |
485 | ||
486 | err(EXIT_FAILURE, _("no line matching user \"%s\" in %s"), | |
487 | pw->pw_name, filename); | |
488 | } | |
489 | ||
ff5dc96e SA |
490 | /** |
491 | * map_ids() - Create a new uid/gid map | |
492 | * @idmapper: Either newuidmap or newgidmap | |
493 | * @ppid: Pid to set the map for | |
494 | * @outer: ID outside the namespace for a single map. | |
495 | * @inner: ID inside the namespace for a single map. May be -1 to only use @map. | |
496 | * @map: A range of IDs to map | |
497 | * | |
498 | * This creates a new uid/gid map for @ppid using @idmapper. The ID @outer in | |
499 | * the parent (our) namespace is mapped to the ID @inner in the child (@ppid's) | |
500 | * namespace. In addition, the range of IDs beginning at @map->outer is mapped | |
501 | * to the range of IDs beginning at @map->inner. The tricky bit is that we | |
502 | * cannot let these mappings overlap. We accomplish this by removing a "hole" | |
503 | * from @map, if @outer or @inner overlap it. This may result in one less than | |
504 | * @map->count IDs being mapped from @map. The unmapped IDs are always the | |
505 | * topmost IDs of the mapping (either in the parent or the child namespace). | |
506 | * | |
507 | * Most of the time, this function will be called with @map->outer as some | |
508 | * large ID, @map->inner as 0, and @map->count as a large number (at least | |
509 | * 1000, but less than @map->outer). Typically, there will be no conflict with | |
510 | * @outer. However, @inner may split the mapping for e.g. --map-current-user. | |
511 | * | |
512 | * This function always exec()s or errors out and does not return. | |
513 | */ | |
514 | static void __attribute__((__noreturn__)) | |
515 | map_ids(const char *idmapper, int ppid, unsigned int outer, unsigned int inner, | |
516 | struct map_range *map) | |
517 | { | |
518 | /* idmapper + pid + 4 * map + NULL */ | |
519 | char *argv[15]; | |
520 | /* argv - idmapper - "1" - NULL */ | |
521 | char args[12][UID_BUFSIZ]; | |
522 | int i = 0, j = 0; | |
523 | struct map_range lo, mid, hi; | |
524 | unsigned int inner_offset, outer_offset; | |
525 | ||
526 | /* Some helper macros to reduce bookkeeping */ | |
527 | #define push_str(s) do { \ | |
528 | argv[i++] = s; \ | |
529 | } while (0) | |
530 | #define push_ul(x) do { \ | |
531 | snprintf(args[j], sizeof(args[j]), "%u", x); \ | |
532 | push_str(args[j++]); \ | |
533 | } while (0) | |
534 | ||
535 | push_str(xstrdup(idmapper)); | |
536 | push_ul(ppid); | |
537 | if ((int)inner == -1) { | |
538 | /* | |
539 | * If we don't have a "single" mapping, then we can just use | |
540 | * map directly | |
541 | */ | |
542 | push_ul(map->inner); | |
543 | push_ul(map->outer); | |
544 | push_ul(map->count); | |
545 | push_str(NULL); | |
546 | ||
547 | execvp(idmapper, argv); | |
548 | errexec(idmapper); | |
549 | } | |
550 | ||
551 | /* If the mappings overlap, remove an ID from map */ | |
552 | if ((outer >= map->outer && outer <= map->outer + map->count) || | |
553 | (inner >= map->inner && inner <= map->inner + map->count)) | |
554 | map->count--; | |
555 | ||
556 | /* Determine where the splits between lo, mid, and hi will be */ | |
557 | outer_offset = min(outer > map->outer ? outer - map->outer : 0, | |
558 | map->count); | |
559 | inner_offset = min(inner > map->inner ? inner - map->inner : 0, | |
560 | map->count); | |
561 | ||
562 | /* | |
563 | * In the worst case, we need three mappings: | |
564 | * From the bottom of map to either inner or outer | |
565 | */ | |
566 | lo.outer = map->outer; | |
567 | lo.inner = map->inner; | |
568 | lo.count = min(inner_offset, outer_offset); | |
569 | ||
570 | /* From the lower of inner or outer to the higher */ | |
571 | mid.outer = lo.outer + lo.count; | |
572 | mid.outer += mid.outer == outer; | |
573 | mid.inner = lo.inner + lo.count; | |
574 | mid.inner += mid.inner == inner; | |
575 | mid.count = abs_diff(outer_offset, inner_offset); | |
576 | ||
577 | /* And from the higher of inner or outer to the end of the map */ | |
578 | hi.outer = mid.outer + mid.count; | |
579 | hi.outer += hi.outer == outer; | |
580 | hi.inner = mid.inner + mid.count; | |
581 | hi.inner += hi.inner == inner; | |
582 | hi.count = map->count - lo.count - mid.count; | |
583 | ||
584 | push_ul(inner); | |
585 | push_ul(outer); | |
586 | push_str("1"); | |
587 | /* new[gu]idmap doesn't like zero-length mappings, so skip them */ | |
588 | if (lo.count) { | |
589 | push_ul(lo.inner); | |
590 | push_ul(lo.outer); | |
591 | push_ul(lo.count); | |
592 | } | |
593 | if (mid.count) { | |
594 | push_ul(mid.inner); | |
595 | push_ul(mid.outer); | |
596 | push_ul(mid.count); | |
597 | } | |
598 | if (hi.count) { | |
599 | push_ul(hi.inner); | |
600 | push_ul(hi.outer); | |
601 | push_ul(hi.count); | |
602 | } | |
603 | push_str(NULL); | |
604 | execvp(idmapper, argv); | |
605 | errexec(idmapper); | |
606 | } | |
607 | ||
608 | /** | |
609 | * map_ids_from_child() - Set up a new uid/gid map | |
610 | * @fd: The eventfd to wait on | |
611 | * @mapuser: The user to map the current user to (or -1) | |
612 | * @usermap: The range of UIDs to map (or %NULL) | |
613 | * @mapgroup: The group to map the current group to (or -1) | |
614 | * @groupmap: The range of GIDs to map (or %NULL) | |
615 | * | |
616 | * fork_and_wait() for our parent to call sync_with_child() on @fd. Upon | |
617 | * recieving the go-ahead, use newuidmap and newgidmap to set the uid/gid map | |
618 | * for our parent's PID. | |
619 | * | |
620 | * Return: The pid of the child. | |
621 | */ | |
622 | static pid_t map_ids_from_child(int *fd, uid_t mapuser, | |
623 | struct map_range *usermap, gid_t mapgroup, | |
624 | struct map_range *groupmap) | |
625 | { | |
626 | pid_t child, pid = 0; | |
627 | pid_t ppid = getpid(); | |
628 | ||
629 | child = fork_and_wait(fd); | |
630 | if (child) | |
631 | return child; | |
632 | ||
633 | /* Avoid forking more than we need to */ | |
634 | if (usermap && groupmap) { | |
635 | pid = fork(); | |
636 | if (pid < 0) | |
637 | err(EXIT_FAILURE, _("fork failed")); | |
638 | if (pid) | |
639 | waitchild(pid); | |
640 | } | |
641 | ||
642 | if (!pid && usermap) | |
643 | map_ids("newuidmap", ppid, geteuid(), mapuser, usermap); | |
644 | if (groupmap) | |
645 | map_ids("newgidmap", ppid, getegid(), mapgroup, groupmap); | |
646 | exit(EXIT_SUCCESS); | |
647 | } | |
648 | ||
fa2cd89a | 649 | static void __attribute__((__noreturn__)) usage(void) |
4205f1fd | 650 | { |
fa2cd89a | 651 | FILE *out = stdout; |
4205f1fd | 652 | |
6a87798a | 653 | fputs(USAGE_HEADER, out); |
b5672517 | 654 | fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"), |
298dc4ff | 655 | program_invocation_short_name); |
4205f1fd | 656 | |
451dbcfa BS |
657 | fputs(USAGE_SEPARATOR, out); |
658 | fputs(_("Run a program with some namespaces unshared from the parent.\n"), out); | |
659 | ||
6a87798a | 660 | fputs(USAGE_OPTIONS, out); |
0490a6ca KZ |
661 | fputs(_(" -m, --mount[=<file>] unshare mounts namespace\n"), out); |
662 | fputs(_(" -u, --uts[=<file>] unshare UTS namespace (hostname etc)\n"), out); | |
663 | fputs(_(" -i, --ipc[=<file>] unshare System V IPC namespace\n"), out); | |
664 | fputs(_(" -n, --net[=<file>] unshare network namespace\n"), out); | |
665 | fputs(_(" -p, --pid[=<file>] unshare pid namespace\n"), out); | |
666 | fputs(_(" -U, --user[=<file>] unshare user namespace\n"), out); | |
f9e7b66d | 667 | fputs(_(" -C, --cgroup[=<file>] unshare cgroup namespace\n"), out); |
f218fd97 | 668 | fputs(_(" -T, --time[=<file>] unshare time namespace\n"), out); |
da639217 | 669 | fputs(USAGE_SEPARATOR, out); |
6728ca10 | 670 | fputs(_(" -f, --fork fork before launching <program>\n"), out); |
987550cb MHB |
671 | fputs(_(" --map-user=<uid>|<name> map current user to uid (implies --user)\n"), out); |
672 | fputs(_(" --map-group=<gid>|<name> map current group to gid (implies --user)\n"), out); | |
4da21e37 | 673 | fputs(_(" -r, --map-root-user map current user to root (implies --user)\n"), out); |
4175f29e | 674 | fputs(_(" -c, --map-current-user map current user to itself (implies --user)\n"), out); |
e67b0ba3 | 675 | fputs(_(" --map-auto map users and groups automatically (implies --user)\n"), out); |
ff5dc96e SA |
676 | fputs(_(" --map-users=<outeruid>,<inneruid>,<count>\n" |
677 | " map count users from outeruid to inneruid (implies --user)\n"), out); | |
678 | fputs(_(" --map-groups=<outergid>,<innergid>,<count>\n" | |
679 | " map count groups from outergid to innergid (implies --user)\n"), out); | |
da639217 KZ |
680 | fputs(USAGE_SEPARATOR, out); |
681 | fputs(_(" --kill-child[=<signame>] when dying, kill the forked child (implies --fork)\n" | |
682 | " defaults to SIGKILL\n"), out); | |
683 | fputs(_(" --mount-proc[=<dir>] mount proc filesystem first (implies --mount)\n"), out); | |
684 | fputs(_(" --propagation slave|shared|private|unchanged\n" | |
f0f22e9c | 685 | " modify mount propagation in mount namespace\n"), out); |
da639217 | 686 | fputs(_(" --setgroups allow|deny control the setgroups syscall in user namespaces\n"), out); |
cef4decf | 687 | fputs(_(" --keep-caps retain capabilities granted in user namespaces\n"), out); |
bf8834d4 | 688 | fputs(USAGE_SEPARATOR, out); |
6671501c AR |
689 | fputs(_(" -R, --root=<dir> run the command with root directory set to <dir>\n"), out); |
690 | fputs(_(" -w, --wd=<dir> change working directory to <dir>\n"), out); | |
691 | fputs(_(" -S, --setuid <uid> set uid in entered namespace\n"), out); | |
692 | fputs(_(" -G, --setgid <gid> set gid in entered namespace\n"), out); | |
be7df01a AR |
693 | fputs(_(" --monotonic <offset> set clock monotonic offset (seconds) in time namespaces\n"), out); |
694 | fputs(_(" --boottime <offset> set clock boottime offset (seconds) in time namespaces\n"), out); | |
4205f1fd | 695 | |
6a87798a | 696 | fputs(USAGE_SEPARATOR, out); |
f45f3ec3 RM |
697 | printf(USAGE_HELP_OPTIONS(27)); |
698 | printf(USAGE_MAN_TAIL("unshare(1)")); | |
6a87798a | 699 | |
fa2cd89a | 700 | exit(EXIT_SUCCESS); |
4205f1fd MG |
701 | } |
702 | ||
703 | int main(int argc, char *argv[]) | |
704 | { | |
6728ca10 | 705 | enum { |
fbceefde | 706 | OPT_MOUNTPROC = CHAR_MAX + 1, |
f0f22e9c | 707 | OPT_PROPAGATION, |
8e8f0fa5 | 708 | OPT_SETGROUPS, |
bf8834d4 | 709 | OPT_KILLCHILD, |
cef4decf | 710 | OPT_KEEPCAPS, |
be7df01a AR |
711 | OPT_MONOTONIC, |
712 | OPT_BOOTTIME, | |
6e837b5a | 713 | OPT_MAPUSER, |
ff5dc96e | 714 | OPT_MAPUSERS, |
6e837b5a | 715 | OPT_MAPGROUP, |
ff5dc96e | 716 | OPT_MAPGROUPS, |
e67b0ba3 | 717 | OPT_MAPAUTO, |
6728ca10 | 718 | }; |
6c7d5ae9 | 719 | static const struct option longopts[] = { |
87918040 SK |
720 | { "help", no_argument, NULL, 'h' }, |
721 | { "version", no_argument, NULL, 'V' }, | |
722 | ||
723 | { "mount", optional_argument, NULL, 'm' }, | |
724 | { "uts", optional_argument, NULL, 'u' }, | |
725 | { "ipc", optional_argument, NULL, 'i' }, | |
726 | { "net", optional_argument, NULL, 'n' }, | |
727 | { "pid", optional_argument, NULL, 'p' }, | |
728 | { "user", optional_argument, NULL, 'U' }, | |
729 | { "cgroup", optional_argument, NULL, 'C' }, | |
f218fd97 | 730 | { "time", optional_argument, NULL, 'T' }, |
87918040 SK |
731 | |
732 | { "fork", no_argument, NULL, 'f' }, | |
8b39a17c | 733 | { "kill-child", optional_argument, NULL, OPT_KILLCHILD }, |
87918040 | 734 | { "mount-proc", optional_argument, NULL, OPT_MOUNTPROC }, |
6e837b5a | 735 | { "map-user", required_argument, NULL, OPT_MAPUSER }, |
ff5dc96e | 736 | { "map-users", required_argument, NULL, OPT_MAPUSERS }, |
6e837b5a | 737 | { "map-group", required_argument, NULL, OPT_MAPGROUP }, |
ff5dc96e | 738 | { "map-groups", required_argument, NULL, OPT_MAPGROUPS }, |
87918040 | 739 | { "map-root-user", no_argument, NULL, 'r' }, |
4175f29e | 740 | { "map-current-user", no_argument, NULL, 'c' }, |
e67b0ba3 | 741 | { "map-auto", no_argument, NULL, OPT_MAPAUTO }, |
87918040 SK |
742 | { "propagation", required_argument, NULL, OPT_PROPAGATION }, |
743 | { "setgroups", required_argument, NULL, OPT_SETGROUPS }, | |
cef4decf | 744 | { "keep-caps", no_argument, NULL, OPT_KEEPCAPS }, |
f0af42b5 LV |
745 | { "setuid", required_argument, NULL, 'S' }, |
746 | { "setgid", required_argument, NULL, 'G' }, | |
bf8834d4 LV |
747 | { "root", required_argument, NULL, 'R' }, |
748 | { "wd", required_argument, NULL, 'w' }, | |
be7df01a AR |
749 | { "monotonic", required_argument, NULL, OPT_MONOTONIC }, |
750 | { "boottime", required_argument, NULL, OPT_BOOTTIME }, | |
87918040 | 751 | { NULL, 0, NULL, 0 } |
4205f1fd MG |
752 | }; |
753 | ||
fbceefde | 754 | int setgrpcmd = SETGROUPS_NONE; |
4205f1fd | 755 | int unshare_flags = 0; |
6e837b5a MHB |
756 | int c, forkit = 0; |
757 | uid_t mapuser = -1; | |
758 | gid_t mapgroup = -1; | |
ff5dc96e SA |
759 | struct map_range *usermap = NULL; |
760 | struct map_range *groupmap = NULL; | |
8b39a17c | 761 | int kill_child_signo = 0; /* 0 means --kill-child was not used */ |
6728ca10 | 762 | const char *procmnt = NULL; |
bf8834d4 LV |
763 | const char *newroot = NULL; |
764 | const char *newdir = NULL; | |
ff5dc96e | 765 | pid_t pid_bind = 0, pid_idmap = 0; |
c84f2590 | 766 | pid_t pid = 0; |
ff5dc96e | 767 | int fd_idmap, fd_bind = -1; |
f2f98017 | 768 | sigset_t sigset, oldsigset; |
c84f2590 | 769 | int status; |
f0f22e9c | 770 | unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT; |
f0af42b5 LV |
771 | int force_uid = 0, force_gid = 0; |
772 | uid_t uid = 0, real_euid = geteuid(); | |
773 | gid_t gid = 0, real_egid = getegid(); | |
cef4decf | 774 | int keepcaps = 0; |
be7df01a AR |
775 | time_t monotonic = 0; |
776 | time_t boottime = 0; | |
777 | int force_monotonic = 0; | |
778 | int force_boottime = 0; | |
4205f1fd | 779 | |
999ac5e2 | 780 | setlocale(LC_ALL, ""); |
4205f1fd MG |
781 | bindtextdomain(PACKAGE, LOCALEDIR); |
782 | textdomain(PACKAGE); | |
2c308875 | 783 | close_stdout_atexit(); |
4205f1fd | 784 | |
f218fd97 | 785 | while ((c = getopt_long(argc, argv, "+fhVmuinpCTUrR:w:S:G:c", longopts, NULL)) != -1) { |
2eefe517 | 786 | switch (c) { |
5088ec33 MF |
787 | case 'f': |
788 | forkit = 1; | |
789 | break; | |
4205f1fd | 790 | case 'm': |
ef6acdb8 | 791 | unshare_flags |= CLONE_NEWNS; |
0490a6ca KZ |
792 | if (optarg) |
793 | set_ns_target(CLONE_NEWNS, optarg); | |
4205f1fd MG |
794 | break; |
795 | case 'u': | |
ef6acdb8 | 796 | unshare_flags |= CLONE_NEWUTS; |
0490a6ca KZ |
797 | if (optarg) |
798 | set_ns_target(CLONE_NEWUTS, optarg); | |
4205f1fd MG |
799 | break; |
800 | case 'i': | |
ef6acdb8 | 801 | unshare_flags |= CLONE_NEWIPC; |
0490a6ca KZ |
802 | if (optarg) |
803 | set_ns_target(CLONE_NEWIPC, optarg); | |
4205f1fd MG |
804 | break; |
805 | case 'n': | |
ef6acdb8 | 806 | unshare_flags |= CLONE_NEWNET; |
0490a6ca KZ |
807 | if (optarg) |
808 | set_ns_target(CLONE_NEWNET, optarg); | |
4205f1fd | 809 | break; |
bc7f9b95 EB |
810 | case 'p': |
811 | unshare_flags |= CLONE_NEWPID; | |
0490a6ca KZ |
812 | if (optarg) |
813 | set_ns_target(CLONE_NEWPID, optarg); | |
bc7f9b95 EB |
814 | break; |
815 | case 'U': | |
816 | unshare_flags |= CLONE_NEWUSER; | |
0490a6ca KZ |
817 | if (optarg) |
818 | set_ns_target(CLONE_NEWUSER, optarg); | |
bc7f9b95 | 819 | break; |
f9e7b66d SH |
820 | case 'C': |
821 | unshare_flags |= CLONE_NEWCGROUP; | |
822 | if (optarg) | |
823 | set_ns_target(CLONE_NEWCGROUP, optarg); | |
824 | break; | |
f218fd97 | 825 | case 'T': |
be7df01a AR |
826 | unshare_flags |= CLONE_NEWTIME; |
827 | if (optarg) | |
828 | set_ns_target(CLONE_NEWTIME, optarg); | |
829 | break; | |
6728ca10 KZ |
830 | case OPT_MOUNTPROC: |
831 | unshare_flags |= CLONE_NEWNS; | |
832 | procmnt = optarg ? optarg : "/proc"; | |
833 | break; | |
6e837b5a MHB |
834 | case OPT_MAPUSER: |
835 | unshare_flags |= CLONE_NEWUSER; | |
987550cb | 836 | mapuser = get_user(optarg, _("failed to parse uid")); |
6e837b5a MHB |
837 | break; |
838 | case OPT_MAPGROUP: | |
839 | unshare_flags |= CLONE_NEWUSER; | |
987550cb | 840 | mapgroup = get_group(optarg, _("failed to parse gid")); |
6e837b5a | 841 | break; |
4da21e37 | 842 | case 'r': |
4175f29e | 843 | unshare_flags |= CLONE_NEWUSER; |
6e837b5a MHB |
844 | mapuser = 0; |
845 | mapgroup = 0; | |
4175f29e JP |
846 | break; |
847 | case 'c': | |
4da21e37 | 848 | unshare_flags |= CLONE_NEWUSER; |
6e837b5a MHB |
849 | mapuser = real_euid; |
850 | mapgroup = real_egid; | |
4da21e37 | 851 | break; |
ff5dc96e SA |
852 | case OPT_MAPUSERS: |
853 | unshare_flags |= CLONE_NEWUSER; | |
e67b0ba3 SA |
854 | if (!strcmp(optarg, "auto")) |
855 | usermap = read_subid_range(_PATH_SUBUID, real_euid); | |
856 | else | |
857 | usermap = get_map_range(optarg); | |
ff5dc96e SA |
858 | break; |
859 | case OPT_MAPGROUPS: | |
860 | unshare_flags |= CLONE_NEWUSER; | |
e67b0ba3 SA |
861 | if (!strcmp(optarg, "auto")) |
862 | groupmap = read_subid_range(_PATH_SUBGID, real_egid); | |
863 | else | |
864 | groupmap = get_map_range(optarg); | |
865 | break; | |
866 | case OPT_MAPAUTO: | |
867 | unshare_flags |= CLONE_NEWUSER; | |
868 | usermap = read_subid_range(_PATH_SUBUID, real_euid); | |
869 | groupmap = read_subid_range(_PATH_SUBGID, real_egid); | |
ff5dc96e | 870 | break; |
fbceefde KZ |
871 | case OPT_SETGROUPS: |
872 | setgrpcmd = setgroups_str2id(optarg); | |
873 | break; | |
f0f22e9c KZ |
874 | case OPT_PROPAGATION: |
875 | propagation = parse_propagation(optarg); | |
876 | break; | |
8e8f0fa5 | 877 | case OPT_KILLCHILD: |
8e8f0fa5 | 878 | forkit = 1; |
8b39a17c NH |
879 | if (optarg) { |
880 | if ((kill_child_signo = signame_to_signum(optarg)) < 0) | |
881 | errx(EXIT_FAILURE, _("unknown signal: %s"), | |
882 | optarg); | |
883 | } else { | |
884 | kill_child_signo = SIGKILL; | |
885 | } | |
8e8f0fa5 | 886 | break; |
cef4decf JP |
887 | case OPT_KEEPCAPS: |
888 | keepcaps = 1; | |
889 | cap_last_cap(); /* Force last cap to be cached before we fork. */ | |
890 | break; | |
f0af42b5 LV |
891 | case 'S': |
892 | uid = strtoul_or_err(optarg, _("failed to parse uid")); | |
893 | force_uid = 1; | |
894 | break; | |
895 | case 'G': | |
896 | gid = strtoul_or_err(optarg, _("failed to parse gid")); | |
897 | force_gid = 1; | |
898 | break; | |
bf8834d4 LV |
899 | case 'R': |
900 | newroot = optarg; | |
901 | break; | |
902 | case 'w': | |
903 | newdir = optarg; | |
904 | break; | |
be7df01a AR |
905 | case OPT_MONOTONIC: |
906 | monotonic = strtoul_or_err(optarg, _("failed to parse monotonic offset")); | |
907 | force_monotonic = 1; | |
908 | break; | |
909 | case OPT_BOOTTIME: | |
910 | boottime = strtoul_or_err(optarg, _("failed to parse boottime offset")); | |
911 | force_boottime = 1; | |
912 | break; | |
2c308875 KZ |
913 | |
914 | case 'h': | |
915 | usage(); | |
916 | case 'V': | |
917 | print_version(EXIT_SUCCESS); | |
4205f1fd | 918 | default: |
677ec86c | 919 | errtryhelp(EXIT_FAILURE); |
4205f1fd MG |
920 | } |
921 | } | |
922 | ||
be7df01a AR |
923 | if ((force_monotonic || force_boottime) && !(unshare_flags & CLONE_NEWTIME)) |
924 | errx(EXIT_FAILURE, _("options --monotonic and --boottime require " | |
925 | "unsharing of a time namespace (-t)")); | |
926 | ||
ec711d72 KZ |
927 | /* clear any inherited settings */ |
928 | signal(SIGCHLD, SIG_DFL); | |
929 | ||
c84f2590 | 930 | if (npersists && (unshare_flags & CLONE_NEWNS)) |
783bb52a | 931 | pid_bind = bind_ns_files_from_child(&fd_bind); |
c84f2590 | 932 | |
ff5dc96e SA |
933 | if (usermap || groupmap) |
934 | pid_idmap = map_ids_from_child(&fd_idmap, mapuser, usermap, | |
935 | mapgroup, groupmap); | |
936 | ||
2eefe517 | 937 | if (-1 == unshare(unshare_flags)) |
4205f1fd MG |
938 | err(EXIT_FAILURE, _("unshare failed")); |
939 | ||
ff5dc96e SA |
940 | /* Tell child we've called unshare() */ |
941 | if (usermap || groupmap) | |
942 | sync_with_child(pid_idmap, fd_idmap); | |
943 | ||
0d5260b6 | 944 | if (force_boottime) |
945 | settime(boottime, CLOCK_BOOTTIME); | |
946 | ||
947 | if (force_monotonic) | |
948 | settime(monotonic, CLOCK_MONOTONIC); | |
949 | ||
950 | if (forkit) { | |
f2f98017 EC |
951 | if (sigemptyset(&sigset) != 0 || |
952 | sigaddset(&sigset, SIGINT) != 0 || | |
953 | sigaddset(&sigset, SIGTERM) != 0 || | |
954 | sigprocmask(SIG_BLOCK, &sigset, &oldsigset) != 0) | |
955 | err(EXIT_FAILURE, _("sigprocmask block failed")); | |
3ba6736f | 956 | |
0d5260b6 | 957 | /* force child forking before mountspace binding |
958 | * so pid_for_children is populated */ | |
959 | pid = fork(); | |
960 | ||
961 | switch(pid) { | |
962 | case -1: | |
963 | err(EXIT_FAILURE, _("fork failed")); | |
964 | case 0: /* child */ | |
f2f98017 EC |
965 | if (sigprocmask(SIG_SETMASK, &oldsigset, NULL)) |
966 | err(EXIT_FAILURE, | |
967 | _("sigprocmask restore failed")); | |
783bb52a SA |
968 | if (npersists && (unshare_flags & CLONE_NEWNS)) |
969 | close(fd_bind); | |
0d5260b6 | 970 | break; |
971 | default: /* parent */ | |
972 | break; | |
973 | } | |
974 | } | |
975 | ||
976 | if (npersists && (pid || !forkit)) { | |
977 | /* run in parent */ | |
783bb52a SA |
978 | if (pid_bind && (unshare_flags & CLONE_NEWNS)) |
979 | sync_with_child(pid_bind, fd_bind); | |
980 | else | |
c84f2590 KZ |
981 | /* simple way, just bind */ |
982 | bind_ns_files(getpid()); | |
983 | } | |
984 | ||
0d5260b6 | 985 | if (pid) { |
986 | if (waitpid(pid, &status, 0) == -1) | |
987 | err(EXIT_FAILURE, _("waitpid failed")); | |
3ba6736f | 988 | |
0d5260b6 | 989 | if (WIFEXITED(status)) |
990 | return WEXITSTATUS(status); | |
f2f98017 EC |
991 | if (WIFSIGNALED(status)) { |
992 | ||
993 | /* Ensure the signal that terminated the child will | |
994 | * also terminate the parent. */ | |
995 | ||
996 | int termsig = WTERMSIG(status); | |
997 | ||
998 | if (signal(termsig, SIG_DFL) == SIG_ERR || | |
999 | sigemptyset(&sigset) != 0 || | |
1000 | sigaddset(&sigset, termsig) != 0 || | |
1001 | sigprocmask(SIG_UNBLOCK, &sigset, NULL) != 0) | |
1002 | err(EXIT_FAILURE, | |
1003 | _("sigprocmask unblock failed")); | |
1004 | ||
1005 | kill(getpid(), termsig); | |
1006 | } | |
0d5260b6 | 1007 | err(EXIT_FAILURE, _("child exit failed")); |
5088ec33 MF |
1008 | } |
1009 | ||
525a0ab2 KZ |
1010 | if (kill_child_signo != 0 && prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0) |
1011 | err(EXIT_FAILURE, "prctl failed"); | |
0490a6ca | 1012 | |
ff5dc96e | 1013 | if (mapuser != (uid_t) -1 && !usermap) |
6e837b5a MHB |
1014 | map_id(_PATH_PROC_UIDMAP, mapuser, real_euid); |
1015 | ||
4175f29e JP |
1016 | /* Since Linux 3.19 unprivileged writing of /proc/self/gid_map |
1017 | * has been disabled unless /proc/self/setgroups is written | |
1018 | * first to permanently disable the ability to call setgroups | |
1019 | * in that user namespace. */ | |
ff5dc96e | 1020 | if (mapgroup != (gid_t) -1 && !groupmap) { |
fbceefde KZ |
1021 | if (setgrpcmd == SETGROUPS_ALLOW) |
1022 | errx(EXIT_FAILURE, _("options --setgroups=allow and " | |
6e837b5a | 1023 | "--map-group are mutually exclusive")); |
fbceefde | 1024 | setgroups_control(SETGROUPS_DENY); |
6e837b5a MHB |
1025 | map_id(_PATH_PROC_GIDMAP, mapgroup, real_egid); |
1026 | } | |
fbceefde | 1027 | |
6e837b5a MHB |
1028 | if (setgrpcmd != SETGROUPS_NONE) |
1029 | setgroups_control(setgrpcmd); | |
4da21e37 | 1030 | |
f0f22e9c KZ |
1031 | if ((unshare_flags & CLONE_NEWNS) && propagation) |
1032 | set_propagation(propagation); | |
1033 | ||
bf8834d4 LV |
1034 | if (newroot) { |
1035 | if (chroot(newroot) != 0) | |
1036 | err(EXIT_FAILURE, | |
1037 | _("cannot change root directory to '%s'"), newroot); | |
1038 | newdir = newdir ?: "/"; | |
1039 | } | |
1040 | if (newdir && chdir(newdir)) | |
1041 | err(EXIT_FAILURE, _("cannot chdir to '%s'"), newdir); | |
1042 | ||
1043 | if (procmnt) { | |
ef7eccad | 1044 | /* When not changing root and using the default propagation flags |
1045 | then the recursive propagation change of root will | |
1046 | automatically change that of an existing proc mount. */ | |
1047 | if (!newroot && propagation != (MS_PRIVATE|MS_REC)) { | |
1048 | int rc = mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL); | |
1049 | ||
1050 | /* Custom procmnt means that proc is very likely not mounted, causing EINVAL. | |
1051 | Ignoring the error in this specific instance is considered safe. */ | |
1052 | if(rc != 0 && errno != EINVAL) | |
1053 | err(EXIT_FAILURE, _("cannot change %s filesystem propagation"), procmnt); | |
1054 | } | |
1055 | ||
bf8834d4 | 1056 | if (mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0) |
6728ca10 | 1057 | err(EXIT_FAILURE, _("mount %s failed"), procmnt); |
bf8834d4 | 1058 | } |
6728ca10 | 1059 | |
f0af42b5 LV |
1060 | if (force_gid) { |
1061 | if (setgroups(0, NULL) != 0) /* drop supplementary groups */ | |
1062 | err(EXIT_FAILURE, _("setgroups failed")); | |
1063 | if (setgid(gid) < 0) /* change GID */ | |
1064 | err(EXIT_FAILURE, _("setgid failed")); | |
1065 | } | |
1066 | if (force_uid && setuid(uid) < 0) /* change UID */ | |
1067 | err(EXIT_FAILURE, _("setuid failed")); | |
1068 | ||
cef4decf JP |
1069 | /* We use capabilities system calls to propagate the permitted |
1070 | * capabilities into the ambient set because we have already | |
1071 | * forked so are in async-signal-safe context. */ | |
1072 | if (keepcaps && (unshare_flags & CLONE_NEWUSER)) { | |
1073 | struct __user_cap_header_struct header = { | |
1074 | .version = _LINUX_CAPABILITY_VERSION_3, | |
1075 | .pid = 0, | |
1076 | }; | |
1077 | ||
9eba8476 | 1078 | struct __user_cap_data_struct payload[_LINUX_CAPABILITY_U32S_3] = {{ 0 }}; |
232fcae8 | 1079 | uint64_t effective, cap; |
cef4decf | 1080 | |
ac0391cc | 1081 | if (capget(&header, payload) < 0) |
cef4decf | 1082 | err(EXIT_FAILURE, _("capget failed")); |
cef4decf JP |
1083 | |
1084 | /* In order the make capabilities ambient, we first need to ensure | |
1085 | * that they are all inheritable. */ | |
1086 | payload[0].inheritable = payload[0].permitted; | |
1087 | payload[1].inheritable = payload[1].permitted; | |
1088 | ||
ac0391cc | 1089 | if (capset(&header, payload) < 0) |
cef4decf | 1090 | err(EXIT_FAILURE, _("capset failed")); |
cef4decf | 1091 | |
ac0391cc | 1092 | effective = ((uint64_t)payload[1].effective << 32) | (uint64_t)payload[0].effective; |
cef4decf | 1093 | |
232fcae8 | 1094 | for (cap = 0; cap < (sizeof(effective) * 8); cap++) { |
cef4decf JP |
1095 | /* This is the same check as cap_valid(), but using |
1096 | * the runtime value for the last valid cap. */ | |
232fcae8 | 1097 | if (cap > (uint64_t) cap_last_cap()) |
cef4decf | 1098 | continue; |
cef4decf | 1099 | |
ac0391cc KZ |
1100 | if ((effective & (1 << cap)) |
1101 | && prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) < 0) | |
cef4decf | 1102 | err(EXIT_FAILURE, _("prctl(PR_CAP_AMBIENT) failed")); |
cef4decf JP |
1103 | } |
1104 | } | |
1105 | ||
57580694 ZJS |
1106 | if (optind < argc) { |
1107 | execvp(argv[optind], argv + optind); | |
fd777151 | 1108 | errexec(argv[optind]); |
57580694 ZJS |
1109 | } |
1110 | exec_shell(); | |
4205f1fd | 1111 | } |