]> git.ipfire.org Git - thirdparty/util-linux.git/blob - sys-utils/unshare.c
misc: consolidate stat() error message
[thirdparty/util-linux.git] / sys-utils / unshare.c
1 /*
2 * unshare(1) - command-line interface for unshare(2)
3 *
4 * Copyright (C) 2009 Mikhail Gusarov <dottedmag@dottedmag.net>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 #include <errno.h>
22 #include <getopt.h>
23 #include <sched.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/wait.h>
28 #include <sys/mount.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <sys/prctl.h>
32 #include <grp.h>
33
34 /* we only need some defines missing in sys/mount.h, no libmount linkage */
35 #include <libmount.h>
36
37 #include "nls.h"
38 #include "c.h"
39 #include "caputils.h"
40 #include "closestream.h"
41 #include "namespace.h"
42 #include "exec_shell.h"
43 #include "xalloc.h"
44 #include "pathnames.h"
45 #include "all-io.h"
46 #include "signames.h"
47 #include "strutils.h"
48 #include "pwdutils.h"
49
50 /* synchronize parent and child by pipe */
51 #define PIPE_SYNC_BYTE 0x06
52
53 /* 'private' is kernel default */
54 #define UNSHARE_PROPAGATION_DEFAULT (MS_REC | MS_PRIVATE)
55
56 /* /proc namespace files and mountpoints for binds */
57 static struct namespace_file {
58 int type; /* CLONE_NEW* */
59 const char *name; /* ns/<type> */
60 const char *target; /* user specified target for bind mount */
61 } namespace_files[] = {
62 { .type = CLONE_NEWUSER, .name = "ns/user" },
63 { .type = CLONE_NEWCGROUP,.name = "ns/cgroup" },
64 { .type = CLONE_NEWIPC, .name = "ns/ipc" },
65 { .type = CLONE_NEWUTS, .name = "ns/uts" },
66 { .type = CLONE_NEWNET, .name = "ns/net" },
67 { .type = CLONE_NEWPID, .name = "ns/pid_for_children" },
68 { .type = CLONE_NEWNS, .name = "ns/mnt" },
69 { .type = CLONE_NEWTIME, .name = "ns/time_for_children" },
70 { .name = NULL }
71 };
72
73 static int npersists; /* number of persistent namespaces */
74
75 enum {
76 SETGROUPS_NONE = -1,
77 SETGROUPS_DENY = 0,
78 SETGROUPS_ALLOW = 1,
79 };
80
81 static const char *setgroups_strings[] =
82 {
83 [SETGROUPS_DENY] = "deny",
84 [SETGROUPS_ALLOW] = "allow"
85 };
86
87 static int setgroups_str2id(const char *str)
88 {
89 size_t i;
90
91 for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++)
92 if (strcmp(str, setgroups_strings[i]) == 0)
93 return i;
94
95 errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str);
96 }
97
98 static void setgroups_control(int action)
99 {
100 const char *file = _PATH_PROC_SETGROUPS;
101 const char *cmd;
102 int fd;
103
104 if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings))
105 return;
106 cmd = setgroups_strings[action];
107
108 fd = open(file, O_WRONLY);
109 if (fd < 0) {
110 if (errno == ENOENT)
111 return;
112 err(EXIT_FAILURE, _("cannot open %s"), file);
113 }
114
115 if (write_all(fd, cmd, strlen(cmd)))
116 err(EXIT_FAILURE, _("write failed %s"), file);
117 close(fd);
118 }
119
120 static void map_id(const char *file, uint32_t from, uint32_t to)
121 {
122 char *buf;
123 int fd;
124
125 fd = open(file, O_WRONLY);
126 if (fd < 0)
127 err(EXIT_FAILURE, _("cannot open %s"), file);
128
129 xasprintf(&buf, "%u %u 1", from, to);
130 if (write_all(fd, buf, strlen(buf)))
131 err(EXIT_FAILURE, _("write failed %s"), file);
132 free(buf);
133 close(fd);
134 }
135
136 static unsigned long parse_propagation(const char *str)
137 {
138 size_t i;
139 static const struct prop_opts {
140 const char *name;
141 unsigned long flag;
142 } opts[] = {
143 { "slave", MS_REC | MS_SLAVE },
144 { "private", MS_REC | MS_PRIVATE },
145 { "shared", MS_REC | MS_SHARED },
146 { "unchanged", 0 }
147 };
148
149 for (i = 0; i < ARRAY_SIZE(opts); i++) {
150 if (strcmp(opts[i].name, str) == 0)
151 return opts[i].flag;
152 }
153
154 errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str);
155 }
156
157 static void set_propagation(unsigned long flags)
158 {
159 if (flags == 0)
160 return;
161
162 if (mount("none", "/", NULL, flags, NULL) != 0)
163 err(EXIT_FAILURE, _("cannot change root filesystem propagation"));
164 }
165
166
167 static int set_ns_target(int type, const char *path)
168 {
169 struct namespace_file *ns;
170
171 for (ns = namespace_files; ns->name; ns++) {
172 if (ns->type != type)
173 continue;
174 ns->target = path;
175 npersists++;
176 return 0;
177 }
178
179 return -EINVAL;
180 }
181
182 static int bind_ns_files(pid_t pid)
183 {
184 struct namespace_file *ns;
185 char src[PATH_MAX];
186
187 for (ns = namespace_files; ns->name; ns++) {
188 if (!ns->target)
189 continue;
190
191 snprintf(src, sizeof(src), "/proc/%u/%s", (unsigned) pid, ns->name);
192
193 if (mount(src, ns->target, NULL, MS_BIND, NULL) != 0)
194 err(EXIT_FAILURE, _("mount %s on %s failed"), src, ns->target);
195 }
196
197 return 0;
198 }
199
200 static ino_t get_mnt_ino(pid_t pid)
201 {
202 struct stat st;
203 char path[PATH_MAX];
204
205 snprintf(path, sizeof(path), "/proc/%u/ns/mnt", (unsigned) pid);
206
207 if (stat(path, &st) != 0)
208 err(EXIT_FAILURE, _("stat of %s failed"), path);
209 return st.st_ino;
210 }
211
212 static void settime(time_t offset, clockid_t clk_id)
213 {
214 char buf[sizeof(stringify_value(ULONG_MAX)) * 3];
215 int fd, len;
216
217 len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset);
218
219 fd = open("/proc/self/timens_offsets", O_WRONLY);
220 if (fd < 0)
221 err(EXIT_FAILURE, _("failed to open /proc/self/timens_offsets"));
222
223 if (write(fd, buf, len) != len)
224 err(EXIT_FAILURE, _("failed to write to /proc/self/timens_offsets"));
225
226 close(fd);
227 }
228
229 static void bind_ns_files_from_child(pid_t *child, int fds[2])
230 {
231 char ch;
232 pid_t ppid = getpid();
233 ino_t ino = get_mnt_ino(ppid);
234
235 if (pipe(fds) < 0)
236 err(EXIT_FAILURE, _("pipe failed"));
237
238 *child = fork();
239
240 switch (*child) {
241 case -1:
242 err(EXIT_FAILURE, _("fork failed"));
243
244 case 0: /* child */
245 close(fds[1]);
246 fds[1] = -1;
247
248 /* wait for parent */
249 if (read_all(fds[0], &ch, 1) != 1 && ch != PIPE_SYNC_BYTE)
250 err(EXIT_FAILURE, _("failed to read pipe"));
251 if (get_mnt_ino(ppid) == ino)
252 exit(EXIT_FAILURE);
253 bind_ns_files(ppid);
254 exit(EXIT_SUCCESS);
255 break;
256
257 default: /* parent */
258 close(fds[0]);
259 fds[0] = -1;
260 break;
261 }
262 }
263
264 static uid_t get_user(const char *s, const char *err)
265 {
266 struct passwd *pw;
267 char *buf = NULL;
268 uid_t ret;
269
270 pw = xgetpwnam(s, &buf);
271 if (pw) {
272 ret = pw->pw_uid;
273 free(pw);
274 free(buf);
275 } else {
276 ret = strtoul_or_err(s, err);
277 }
278
279 return ret;
280 }
281
282 static gid_t get_group(const char *s, const char *err)
283 {
284 struct group *gr;
285 char *buf = NULL;
286 gid_t ret;
287
288 gr = xgetgrnam(s, &buf);
289 if (gr) {
290 ret = gr->gr_gid;
291 free(gr);
292 free(buf);
293 } else {
294 ret = strtoul_or_err(s, err);
295 }
296
297 return ret;
298 }
299
300 static void __attribute__((__noreturn__)) usage(void)
301 {
302 FILE *out = stdout;
303
304 fputs(USAGE_HEADER, out);
305 fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"),
306 program_invocation_short_name);
307
308 fputs(USAGE_SEPARATOR, out);
309 fputs(_("Run a program with some namespaces unshared from the parent.\n"), out);
310
311 fputs(USAGE_OPTIONS, out);
312 fputs(_(" -m, --mount[=<file>] unshare mounts namespace\n"), out);
313 fputs(_(" -u, --uts[=<file>] unshare UTS namespace (hostname etc)\n"), out);
314 fputs(_(" -i, --ipc[=<file>] unshare System V IPC namespace\n"), out);
315 fputs(_(" -n, --net[=<file>] unshare network namespace\n"), out);
316 fputs(_(" -p, --pid[=<file>] unshare pid namespace\n"), out);
317 fputs(_(" -U, --user[=<file>] unshare user namespace\n"), out);
318 fputs(_(" -C, --cgroup[=<file>] unshare cgroup namespace\n"), out);
319 fputs(_(" -T, --time[=<file>] unshare time namespace\n"), out);
320 fputs(USAGE_SEPARATOR, out);
321 fputs(_(" -f, --fork fork before launching <program>\n"), out);
322 fputs(_(" --map-user=<uid>|<name> map current user to uid (implies --user)\n"), out);
323 fputs(_(" --map-group=<gid>|<name> map current group to gid (implies --user)\n"), out);
324 fputs(_(" -r, --map-root-user map current user to root (implies --user)\n"), out);
325 fputs(_(" -c, --map-current-user map current user to itself (implies --user)\n"), out);
326 fputs(USAGE_SEPARATOR, out);
327 fputs(_(" --kill-child[=<signame>] when dying, kill the forked child (implies --fork)\n"
328 " defaults to SIGKILL\n"), out);
329 fputs(_(" --mount-proc[=<dir>] mount proc filesystem first (implies --mount)\n"), out);
330 fputs(_(" --propagation slave|shared|private|unchanged\n"
331 " modify mount propagation in mount namespace\n"), out);
332 fputs(_(" --setgroups allow|deny control the setgroups syscall in user namespaces\n"), out);
333 fputs(_(" --keep-caps retain capabilities granted in user namespaces\n"), out);
334 fputs(USAGE_SEPARATOR, out);
335 fputs(_(" -R, --root=<dir> run the command with root directory set to <dir>\n"), out);
336 fputs(_(" -w, --wd=<dir> change working directory to <dir>\n"), out);
337 fputs(_(" -S, --setuid <uid> set uid in entered namespace\n"), out);
338 fputs(_(" -G, --setgid <gid> set gid in entered namespace\n"), out);
339 fputs(_(" --monotonic <offset> set clock monotonic offset (seconds) in time namespaces\n"), out);
340 fputs(_(" --boottime <offset> set clock boottime offset (seconds) in time namespaces\n"), out);
341
342 fputs(USAGE_SEPARATOR, out);
343 printf(USAGE_HELP_OPTIONS(27));
344 printf(USAGE_MAN_TAIL("unshare(1)"));
345
346 exit(EXIT_SUCCESS);
347 }
348
349 int main(int argc, char *argv[])
350 {
351 enum {
352 OPT_MOUNTPROC = CHAR_MAX + 1,
353 OPT_PROPAGATION,
354 OPT_SETGROUPS,
355 OPT_KILLCHILD,
356 OPT_KEEPCAPS,
357 OPT_MONOTONIC,
358 OPT_BOOTTIME,
359 OPT_MAPUSER,
360 OPT_MAPGROUP,
361 };
362 static const struct option longopts[] = {
363 { "help", no_argument, NULL, 'h' },
364 { "version", no_argument, NULL, 'V' },
365
366 { "mount", optional_argument, NULL, 'm' },
367 { "uts", optional_argument, NULL, 'u' },
368 { "ipc", optional_argument, NULL, 'i' },
369 { "net", optional_argument, NULL, 'n' },
370 { "pid", optional_argument, NULL, 'p' },
371 { "user", optional_argument, NULL, 'U' },
372 { "cgroup", optional_argument, NULL, 'C' },
373 { "time", optional_argument, NULL, 'T' },
374
375 { "fork", no_argument, NULL, 'f' },
376 { "kill-child", optional_argument, NULL, OPT_KILLCHILD },
377 { "mount-proc", optional_argument, NULL, OPT_MOUNTPROC },
378 { "map-user", required_argument, NULL, OPT_MAPUSER },
379 { "map-group", required_argument, NULL, OPT_MAPGROUP },
380 { "map-root-user", no_argument, NULL, 'r' },
381 { "map-current-user", no_argument, NULL, 'c' },
382 { "propagation", required_argument, NULL, OPT_PROPAGATION },
383 { "setgroups", required_argument, NULL, OPT_SETGROUPS },
384 { "keep-caps", no_argument, NULL, OPT_KEEPCAPS },
385 { "setuid", required_argument, NULL, 'S' },
386 { "setgid", required_argument, NULL, 'G' },
387 { "root", required_argument, NULL, 'R' },
388 { "wd", required_argument, NULL, 'w' },
389 { "monotonic", required_argument, NULL, OPT_MONOTONIC },
390 { "boottime", required_argument, NULL, OPT_BOOTTIME },
391 { NULL, 0, NULL, 0 }
392 };
393
394 int setgrpcmd = SETGROUPS_NONE;
395 int unshare_flags = 0;
396 int c, forkit = 0;
397 uid_t mapuser = -1;
398 gid_t mapgroup = -1;
399 int kill_child_signo = 0; /* 0 means --kill-child was not used */
400 const char *procmnt = NULL;
401 const char *newroot = NULL;
402 const char *newdir = NULL;
403 pid_t pid_bind = 0;
404 pid_t pid = 0;
405 int fds[2];
406 int status;
407 unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
408 int force_uid = 0, force_gid = 0;
409 uid_t uid = 0, real_euid = geteuid();
410 gid_t gid = 0, real_egid = getegid();
411 int keepcaps = 0;
412 time_t monotonic = 0;
413 time_t boottime = 0;
414 int force_monotonic = 0;
415 int force_boottime = 0;
416
417 setlocale(LC_ALL, "");
418 bindtextdomain(PACKAGE, LOCALEDIR);
419 textdomain(PACKAGE);
420 close_stdout_atexit();
421
422 while ((c = getopt_long(argc, argv, "+fhVmuinpCTUrR:w:S:G:c", longopts, NULL)) != -1) {
423 switch (c) {
424 case 'f':
425 forkit = 1;
426 break;
427 case 'm':
428 unshare_flags |= CLONE_NEWNS;
429 if (optarg)
430 set_ns_target(CLONE_NEWNS, optarg);
431 break;
432 case 'u':
433 unshare_flags |= CLONE_NEWUTS;
434 if (optarg)
435 set_ns_target(CLONE_NEWUTS, optarg);
436 break;
437 case 'i':
438 unshare_flags |= CLONE_NEWIPC;
439 if (optarg)
440 set_ns_target(CLONE_NEWIPC, optarg);
441 break;
442 case 'n':
443 unshare_flags |= CLONE_NEWNET;
444 if (optarg)
445 set_ns_target(CLONE_NEWNET, optarg);
446 break;
447 case 'p':
448 unshare_flags |= CLONE_NEWPID;
449 if (optarg)
450 set_ns_target(CLONE_NEWPID, optarg);
451 break;
452 case 'U':
453 unshare_flags |= CLONE_NEWUSER;
454 if (optarg)
455 set_ns_target(CLONE_NEWUSER, optarg);
456 break;
457 case 'C':
458 unshare_flags |= CLONE_NEWCGROUP;
459 if (optarg)
460 set_ns_target(CLONE_NEWCGROUP, optarg);
461 break;
462 case 'T':
463 unshare_flags |= CLONE_NEWTIME;
464 if (optarg)
465 set_ns_target(CLONE_NEWTIME, optarg);
466 break;
467 case OPT_MOUNTPROC:
468 unshare_flags |= CLONE_NEWNS;
469 procmnt = optarg ? optarg : "/proc";
470 break;
471 case OPT_MAPUSER:
472 unshare_flags |= CLONE_NEWUSER;
473 mapuser = get_user(optarg, _("failed to parse uid"));
474 break;
475 case OPT_MAPGROUP:
476 unshare_flags |= CLONE_NEWUSER;
477 mapgroup = get_group(optarg, _("failed to parse gid"));
478 break;
479 case 'r':
480 unshare_flags |= CLONE_NEWUSER;
481 mapuser = 0;
482 mapgroup = 0;
483 break;
484 case 'c':
485 unshare_flags |= CLONE_NEWUSER;
486 mapuser = real_euid;
487 mapgroup = real_egid;
488 break;
489 case OPT_SETGROUPS:
490 setgrpcmd = setgroups_str2id(optarg);
491 break;
492 case OPT_PROPAGATION:
493 propagation = parse_propagation(optarg);
494 break;
495 case OPT_KILLCHILD:
496 forkit = 1;
497 if (optarg) {
498 if ((kill_child_signo = signame_to_signum(optarg)) < 0)
499 errx(EXIT_FAILURE, _("unknown signal: %s"),
500 optarg);
501 } else {
502 kill_child_signo = SIGKILL;
503 }
504 break;
505 case OPT_KEEPCAPS:
506 keepcaps = 1;
507 cap_last_cap(); /* Force last cap to be cached before we fork. */
508 break;
509 case 'S':
510 uid = strtoul_or_err(optarg, _("failed to parse uid"));
511 force_uid = 1;
512 break;
513 case 'G':
514 gid = strtoul_or_err(optarg, _("failed to parse gid"));
515 force_gid = 1;
516 break;
517 case 'R':
518 newroot = optarg;
519 break;
520 case 'w':
521 newdir = optarg;
522 break;
523 case OPT_MONOTONIC:
524 monotonic = strtoul_or_err(optarg, _("failed to parse monotonic offset"));
525 force_monotonic = 1;
526 break;
527 case OPT_BOOTTIME:
528 boottime = strtoul_or_err(optarg, _("failed to parse boottime offset"));
529 force_boottime = 1;
530 break;
531
532 case 'h':
533 usage();
534 case 'V':
535 print_version(EXIT_SUCCESS);
536 default:
537 errtryhelp(EXIT_FAILURE);
538 }
539 }
540
541 if ((force_monotonic || force_boottime) && !(unshare_flags & CLONE_NEWTIME))
542 errx(EXIT_FAILURE, _("options --monotonic and --boottime require "
543 "unsharing of a time namespace (-t)"));
544
545 if (npersists && (unshare_flags & CLONE_NEWNS))
546 bind_ns_files_from_child(&pid_bind, fds);
547
548 if (-1 == unshare(unshare_flags))
549 err(EXIT_FAILURE, _("unshare failed"));
550
551 if (force_boottime)
552 settime(boottime, CLOCK_BOOTTIME);
553
554 if (force_monotonic)
555 settime(monotonic, CLOCK_MONOTONIC);
556
557 if (forkit) {
558 signal(SIGINT, SIG_IGN);
559 signal(SIGTERM, SIG_IGN);
560
561 /* force child forking before mountspace binding
562 * so pid_for_children is populated */
563 pid = fork();
564
565 switch(pid) {
566 case -1:
567 err(EXIT_FAILURE, _("fork failed"));
568 case 0: /* child */
569 if (pid_bind && (unshare_flags & CLONE_NEWNS))
570 close(fds[1]);
571 break;
572 default: /* parent */
573 break;
574 }
575 }
576
577 if (npersists && (pid || !forkit)) {
578 /* run in parent */
579 if (pid_bind && (unshare_flags & CLONE_NEWNS)) {
580 int rc;
581 char ch = PIPE_SYNC_BYTE;
582
583 /* signal child we are ready */
584 write_all(fds[1], &ch, 1);
585 close(fds[1]);
586 fds[1] = -1;
587
588 /* wait for bind_ns_files_from_child() */
589 do {
590 rc = waitpid(pid_bind, &status, 0);
591 if (rc < 0) {
592 if (errno == EINTR)
593 continue;
594 err(EXIT_FAILURE, _("waitpid failed"));
595 }
596 if (WIFEXITED(status) &&
597 WEXITSTATUS(status) != EXIT_SUCCESS)
598 return WEXITSTATUS(status);
599 } while (rc < 0);
600 } else
601 /* simple way, just bind */
602 bind_ns_files(getpid());
603 }
604
605 if (pid) {
606 if (waitpid(pid, &status, 0) == -1)
607 err(EXIT_FAILURE, _("waitpid failed"));
608
609 signal(SIGINT, SIG_DFL);
610 signal(SIGTERM, SIG_DFL);
611
612 if (WIFEXITED(status))
613 return WEXITSTATUS(status);
614 if (WIFSIGNALED(status))
615 kill(getpid(), WTERMSIG(status));
616 err(EXIT_FAILURE, _("child exit failed"));
617 }
618
619 if (kill_child_signo != 0 && prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0)
620 err(EXIT_FAILURE, "prctl failed");
621
622 if (mapuser != (uid_t) -1)
623 map_id(_PATH_PROC_UIDMAP, mapuser, real_euid);
624
625 /* Since Linux 3.19 unprivileged writing of /proc/self/gid_map
626 * has been disabled unless /proc/self/setgroups is written
627 * first to permanently disable the ability to call setgroups
628 * in that user namespace. */
629 if (mapgroup != (gid_t) -1) {
630 if (setgrpcmd == SETGROUPS_ALLOW)
631 errx(EXIT_FAILURE, _("options --setgroups=allow and "
632 "--map-group are mutually exclusive"));
633 setgroups_control(SETGROUPS_DENY);
634 map_id(_PATH_PROC_GIDMAP, mapgroup, real_egid);
635 }
636
637 if (setgrpcmd != SETGROUPS_NONE)
638 setgroups_control(setgrpcmd);
639
640 if ((unshare_flags & CLONE_NEWNS) && propagation)
641 set_propagation(propagation);
642
643 if (newroot) {
644 if (chroot(newroot) != 0)
645 err(EXIT_FAILURE,
646 _("cannot change root directory to '%s'"), newroot);
647 newdir = newdir ?: "/";
648 }
649 if (newdir && chdir(newdir))
650 err(EXIT_FAILURE, _("cannot chdir to '%s'"), newdir);
651
652 if (procmnt) {
653 if (!newroot && mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0)
654 err(EXIT_FAILURE, _("cannot change %s filesystem propagation"), procmnt);
655 if (mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0)
656 err(EXIT_FAILURE, _("mount %s failed"), procmnt);
657 }
658
659 if (force_gid) {
660 if (setgroups(0, NULL) != 0) /* drop supplementary groups */
661 err(EXIT_FAILURE, _("setgroups failed"));
662 if (setgid(gid) < 0) /* change GID */
663 err(EXIT_FAILURE, _("setgid failed"));
664 }
665 if (force_uid && setuid(uid) < 0) /* change UID */
666 err(EXIT_FAILURE, _("setuid failed"));
667
668 /* We use capabilities system calls to propagate the permitted
669 * capabilities into the ambient set because we have already
670 * forked so are in async-signal-safe context. */
671 if (keepcaps && (unshare_flags & CLONE_NEWUSER)) {
672 struct __user_cap_header_struct header = {
673 .version = _LINUX_CAPABILITY_VERSION_3,
674 .pid = 0,
675 };
676
677 struct __user_cap_data_struct payload[_LINUX_CAPABILITY_U32S_3] = {{ 0 }};
678 uint64_t effective, cap;
679
680 if (capget(&header, payload) < 0)
681 err(EXIT_FAILURE, _("capget failed"));
682
683 /* In order the make capabilities ambient, we first need to ensure
684 * that they are all inheritable. */
685 payload[0].inheritable = payload[0].permitted;
686 payload[1].inheritable = payload[1].permitted;
687
688 if (capset(&header, payload) < 0)
689 err(EXIT_FAILURE, _("capset failed"));
690
691 effective = ((uint64_t)payload[1].effective << 32) | (uint64_t)payload[0].effective;
692
693 for (cap = 0; cap < (sizeof(effective) * 8); cap++) {
694 /* This is the same check as cap_valid(), but using
695 * the runtime value for the last valid cap. */
696 if (cap > (uint64_t) cap_last_cap())
697 continue;
698
699 if ((effective & (1 << cap))
700 && prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) < 0)
701 err(EXIT_FAILURE, _("prctl(PR_CAP_AMBIENT) failed"));
702 }
703 }
704
705 if (optind < argc) {
706 execvp(argv[optind], argv + optind);
707 errexec(argv[optind]);
708 }
709 exec_shell();
710 }