]> git.ipfire.org Git - thirdparty/util-linux.git/blame - sys-utils/unshare.c
unshare: support the time namespace
[thirdparty/util-linux.git] / sys-utils / unshare.c
CommitLineData
4205f1fd
MG
1/*
2 * unshare(1) - command-line interface for unshare(2)
3 *
4 * Copyright (C) 2009 Mikhail Gusarov <dottedmag@dottedmag.net>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
7cebf0bb 18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
4205f1fd
MG
19 */
20
4205f1fd
MG
21#include <errno.h>
22#include <getopt.h>
23#include <sched.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <unistd.h>
5088ec33 27#include <sys/wait.h>
6728ca10 28#include <sys/mount.h>
c84f2590
KZ
29#include <sys/types.h>
30#include <sys/stat.h>
8e8f0fa5 31#include <sys/prctl.h>
f0af42b5 32#include <grp.h>
c84f2590 33
d754315c
RM
34/* we only need some defines missing in sys/mount.h, no libmount linkage */
35#include <libmount.h>
36
4205f1fd 37#include "nls.h"
eb76ca98 38#include "c.h"
cef4decf 39#include "caputils.h"
efb8854f 40#include "closestream.h"
c91280a4 41#include "namespace.h"
57580694 42#include "exec_shell.h"
4da21e37
LR
43#include "xalloc.h"
44#include "pathnames.h"
45#include "all-io.h"
8b39a17c 46#include "signames.h"
f0af42b5 47#include "strutils.h"
4da21e37 48
99fcafdf
YK
49/* synchronize parent and child by pipe */
50#define PIPE_SYNC_BYTE 0x06
51
f0f22e9c
KZ
52/* 'private' is kernel default */
53#define UNSHARE_PROPAGATION_DEFAULT (MS_REC | MS_PRIVATE)
54
0490a6ca
KZ
55/* /proc namespace files and mountpoints for binds */
56static struct namespace_file {
57 int type; /* CLONE_NEW* */
58 const char *name; /* ns/<type> */
59 const char *target; /* user specified target for bind mount */
60} namespace_files[] = {
f9e7b66d
SH
61 { .type = CLONE_NEWUSER, .name = "ns/user" },
62 { .type = CLONE_NEWCGROUP,.name = "ns/cgroup" },
63 { .type = CLONE_NEWIPC, .name = "ns/ipc" },
64 { .type = CLONE_NEWUTS, .name = "ns/uts" },
65 { .type = CLONE_NEWNET, .name = "ns/net" },
66 { .type = CLONE_NEWPID, .name = "ns/pid" },
67 { .type = CLONE_NEWNS, .name = "ns/mnt" },
be7df01a 68 { .type = CLONE_NEWTIME, .name = "ns/time" },
0490a6ca
KZ
69 { .name = NULL }
70};
71
72static int npersists; /* number of persistent namespaces */
73
fbceefde
KZ
74enum {
75 SETGROUPS_NONE = -1,
76 SETGROUPS_DENY = 0,
77 SETGROUPS_ALLOW = 1,
78};
79
4175f29e
JP
80enum {
81 MAP_USER_NONE,
82 MAP_USER_ROOT,
83 MAP_USER_CURRENT,
84};
85
fbceefde
KZ
86static const char *setgroups_strings[] =
87{
88 [SETGROUPS_DENY] = "deny",
89 [SETGROUPS_ALLOW] = "allow"
90};
91
92static int setgroups_str2id(const char *str)
93{
94 size_t i;
95
96 for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++)
97 if (strcmp(str, setgroups_strings[i]) == 0)
98 return i;
99
100 errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str);
101}
102
103static void setgroups_control(int action)
0bf15941
EB
104{
105 const char *file = _PATH_PROC_SETGROUPS;
fbceefde 106 const char *cmd;
0bf15941
EB
107 int fd;
108
fbceefde
KZ
109 if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings))
110 return;
111 cmd = setgroups_strings[action];
112
0bf15941
EB
113 fd = open(file, O_WRONLY);
114 if (fd < 0) {
115 if (errno == ENOENT)
116 return;
7ff635bf 117 err(EXIT_FAILURE, _("cannot open %s"), file);
0bf15941
EB
118 }
119
fbceefde 120 if (write_all(fd, cmd, strlen(cmd)))
0bf15941
EB
121 err(EXIT_FAILURE, _("write failed %s"), file);
122 close(fd);
123}
124
4da21e37
LR
125static void map_id(const char *file, uint32_t from, uint32_t to)
126{
127 char *buf;
128 int fd;
129
130 fd = open(file, O_WRONLY);
131 if (fd < 0)
132 err(EXIT_FAILURE, _("cannot open %s"), file);
133
134 xasprintf(&buf, "%u %u 1", from, to);
135 if (write_all(fd, buf, strlen(buf)))
136 err(EXIT_FAILURE, _("write failed %s"), file);
137 free(buf);
138 close(fd);
139}
4205f1fd 140
f0f22e9c
KZ
141static unsigned long parse_propagation(const char *str)
142{
143 size_t i;
144 static const struct prop_opts {
145 const char *name;
146 unsigned long flag;
147 } opts[] = {
148 { "slave", MS_REC | MS_SLAVE },
149 { "private", MS_REC | MS_PRIVATE },
150 { "shared", MS_REC | MS_SHARED },
151 { "unchanged", 0 }
152 };
153
154 for (i = 0; i < ARRAY_SIZE(opts); i++) {
155 if (strcmp(opts[i].name, str) == 0)
156 return opts[i].flag;
157 }
158
159 errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str);
160}
161
162static void set_propagation(unsigned long flags)
163{
164 if (flags == 0)
165 return;
166
167 if (mount("none", "/", NULL, flags, NULL) != 0)
168 err(EXIT_FAILURE, _("cannot change root filesystem propagation"));
169}
170
0490a6ca
KZ
171
172static int set_ns_target(int type, const char *path)
173{
174 struct namespace_file *ns;
175
176 for (ns = namespace_files; ns->name; ns++) {
177 if (ns->type != type)
178 continue;
179 ns->target = path;
180 npersists++;
181 return 0;
182 }
183
184 return -EINVAL;
185}
186
187static int bind_ns_files(pid_t pid)
188{
189 struct namespace_file *ns;
190 char src[PATH_MAX];
191
192 for (ns = namespace_files; ns->name; ns++) {
193 if (!ns->target)
194 continue;
195
196 snprintf(src, sizeof(src), "/proc/%u/%s", (unsigned) pid, ns->name);
197
198 if (mount(src, ns->target, NULL, MS_BIND, NULL) != 0)
199 err(EXIT_FAILURE, _("mount %s on %s failed"), src, ns->target);
200 }
201
202 return 0;
203}
204
c84f2590
KZ
205static ino_t get_mnt_ino(pid_t pid)
206{
207 struct stat st;
208 char path[PATH_MAX];
209
210 snprintf(path, sizeof(path), "/proc/%u/ns/mnt", (unsigned) pid);
211
212 if (stat(path, &st) != 0)
213 err(EXIT_FAILURE, _("cannot stat %s"), path);
214 return st.st_ino;
215}
216
be7df01a
AR
217static void settime(time_t offset, clockid_t clk_id)
218{
219 char buf[sizeof(stringify_value(ULONG_MAX)) * 3];
220 int fd, len;
221
222 len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset);
223
224 fd = open("/proc/self/timens_offsets", O_WRONLY);
225 if (fd < 0)
226 err(EXIT_FAILURE, _("failed to open /proc/self/timens_offsets"));
227
228 if (write(fd, buf, len) != len)
229 err(EXIT_FAILURE, _("failed to write to /proc/self/timens_offsets"));
230
231 close(fd);
232}
233
99fcafdf 234static void bind_ns_files_from_child(pid_t *child, int fds[2])
c84f2590 235{
99fcafdf 236 char ch;
c84f2590
KZ
237 pid_t ppid = getpid();
238 ino_t ino = get_mnt_ino(ppid);
239
99fcafdf
YK
240 if (pipe(fds) < 0)
241 err(EXIT_FAILURE, _("pipe failed"));
242
c84f2590
KZ
243 *child = fork();
244
99fcafdf 245 switch (*child) {
c84f2590
KZ
246 case -1:
247 err(EXIT_FAILURE, _("fork failed"));
99fcafdf 248
c84f2590 249 case 0: /* child */
99fcafdf
YK
250 close(fds[1]);
251 fds[1] = -1;
252
253 /* wait for parent */
254 if (read_all(fds[0], &ch, 1) != 1 && ch != PIPE_SYNC_BYTE)
255 err(EXIT_FAILURE, _("failed to read pipe"));
256 if (get_mnt_ino(ppid) == ino)
257 exit(EXIT_FAILURE);
c84f2590
KZ
258 bind_ns_files(ppid);
259 exit(EXIT_SUCCESS);
260 break;
99fcafdf 261
c84f2590 262 default: /* parent */
99fcafdf
YK
263 close(fds[0]);
264 fds[0] = -1;
c84f2590
KZ
265 break;
266 }
267}
268
fa2cd89a 269static void __attribute__((__noreturn__)) usage(void)
4205f1fd 270{
fa2cd89a 271 FILE *out = stdout;
4205f1fd 272
6a87798a 273 fputs(USAGE_HEADER, out);
b5672517 274 fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"),
298dc4ff 275 program_invocation_short_name);
4205f1fd 276
451dbcfa
BS
277 fputs(USAGE_SEPARATOR, out);
278 fputs(_("Run a program with some namespaces unshared from the parent.\n"), out);
279
6a87798a 280 fputs(USAGE_OPTIONS, out);
0490a6ca
KZ
281 fputs(_(" -m, --mount[=<file>] unshare mounts namespace\n"), out);
282 fputs(_(" -u, --uts[=<file>] unshare UTS namespace (hostname etc)\n"), out);
283 fputs(_(" -i, --ipc[=<file>] unshare System V IPC namespace\n"), out);
284 fputs(_(" -n, --net[=<file>] unshare network namespace\n"), out);
285 fputs(_(" -p, --pid[=<file>] unshare pid namespace\n"), out);
286 fputs(_(" -U, --user[=<file>] unshare user namespace\n"), out);
f9e7b66d 287 fputs(_(" -C, --cgroup[=<file>] unshare cgroup namespace\n"), out);
be7df01a 288 fputs(_(" -t, --time[=<file>] unshare time namespace\n"), out);
da639217 289 fputs(USAGE_SEPARATOR, out);
6728ca10 290 fputs(_(" -f, --fork fork before launching <program>\n"), out);
4da21e37 291 fputs(_(" -r, --map-root-user map current user to root (implies --user)\n"), out);
4175f29e 292 fputs(_(" -c, --map-current-user map current user to itself (implies --user)\n"), out);
da639217
KZ
293 fputs(USAGE_SEPARATOR, out);
294 fputs(_(" --kill-child[=<signame>] when dying, kill the forked child (implies --fork)\n"
295 " defaults to SIGKILL\n"), out);
296 fputs(_(" --mount-proc[=<dir>] mount proc filesystem first (implies --mount)\n"), out);
297 fputs(_(" --propagation slave|shared|private|unchanged\n"
f0f22e9c 298 " modify mount propagation in mount namespace\n"), out);
da639217 299 fputs(_(" --setgroups allow|deny control the setgroups syscall in user namespaces\n"), out);
cef4decf 300 fputs(_(" --keep-caps retain capabilities granted in user namespaces\n"), out);
bf8834d4 301 fputs(USAGE_SEPARATOR, out);
6671501c
AR
302 fputs(_(" -R, --root=<dir> run the command with root directory set to <dir>\n"), out);
303 fputs(_(" -w, --wd=<dir> change working directory to <dir>\n"), out);
304 fputs(_(" -S, --setuid <uid> set uid in entered namespace\n"), out);
305 fputs(_(" -G, --setgid <gid> set gid in entered namespace\n"), out);
be7df01a
AR
306 fputs(_(" --monotonic <offset> set clock monotonic offset (seconds) in time namespaces\n"), out);
307 fputs(_(" --boottime <offset> set clock boottime offset (seconds) in time namespaces\n"), out);
4205f1fd 308
6a87798a 309 fputs(USAGE_SEPARATOR, out);
f45f3ec3
RM
310 printf(USAGE_HELP_OPTIONS(27));
311 printf(USAGE_MAN_TAIL("unshare(1)"));
6a87798a 312
fa2cd89a 313 exit(EXIT_SUCCESS);
4205f1fd
MG
314}
315
316int main(int argc, char *argv[])
317{
6728ca10 318 enum {
fbceefde 319 OPT_MOUNTPROC = CHAR_MAX + 1,
f0f22e9c 320 OPT_PROPAGATION,
8e8f0fa5 321 OPT_SETGROUPS,
bf8834d4 322 OPT_KILLCHILD,
cef4decf 323 OPT_KEEPCAPS,
be7df01a
AR
324 OPT_MONOTONIC,
325 OPT_BOOTTIME,
6728ca10 326 };
6c7d5ae9 327 static const struct option longopts[] = {
87918040
SK
328 { "help", no_argument, NULL, 'h' },
329 { "version", no_argument, NULL, 'V' },
330
331 { "mount", optional_argument, NULL, 'm' },
332 { "uts", optional_argument, NULL, 'u' },
333 { "ipc", optional_argument, NULL, 'i' },
334 { "net", optional_argument, NULL, 'n' },
335 { "pid", optional_argument, NULL, 'p' },
336 { "user", optional_argument, NULL, 'U' },
337 { "cgroup", optional_argument, NULL, 'C' },
be7df01a 338 { "time", optional_argument, NULL, 't' },
87918040
SK
339
340 { "fork", no_argument, NULL, 'f' },
8b39a17c 341 { "kill-child", optional_argument, NULL, OPT_KILLCHILD },
87918040
SK
342 { "mount-proc", optional_argument, NULL, OPT_MOUNTPROC },
343 { "map-root-user", no_argument, NULL, 'r' },
4175f29e 344 { "map-current-user", no_argument, NULL, 'c' },
87918040
SK
345 { "propagation", required_argument, NULL, OPT_PROPAGATION },
346 { "setgroups", required_argument, NULL, OPT_SETGROUPS },
cef4decf 347 { "keep-caps", no_argument, NULL, OPT_KEEPCAPS },
f0af42b5
LV
348 { "setuid", required_argument, NULL, 'S' },
349 { "setgid", required_argument, NULL, 'G' },
bf8834d4
LV
350 { "root", required_argument, NULL, 'R' },
351 { "wd", required_argument, NULL, 'w' },
be7df01a
AR
352 { "monotonic", required_argument, NULL, OPT_MONOTONIC },
353 { "boottime", required_argument, NULL, OPT_BOOTTIME },
87918040 354 { NULL, 0, NULL, 0 }
4205f1fd
MG
355 };
356
fbceefde 357 int setgrpcmd = SETGROUPS_NONE;
4205f1fd 358 int unshare_flags = 0;
4175f29e 359 int c, forkit = 0, mapuser = MAP_USER_NONE;
8b39a17c 360 int kill_child_signo = 0; /* 0 means --kill-child was not used */
6728ca10 361 const char *procmnt = NULL;
bf8834d4
LV
362 const char *newroot = NULL;
363 const char *newdir = NULL;
c84f2590 364 pid_t pid = 0;
99fcafdf 365 int fds[2];
c84f2590 366 int status;
f0f22e9c 367 unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
f0af42b5
LV
368 int force_uid = 0, force_gid = 0;
369 uid_t uid = 0, real_euid = geteuid();
370 gid_t gid = 0, real_egid = getegid();
cef4decf 371 int keepcaps = 0;
be7df01a
AR
372 time_t monotonic = 0;
373 time_t boottime = 0;
374 int force_monotonic = 0;
375 int force_boottime = 0;
4205f1fd 376
999ac5e2 377 setlocale(LC_ALL, "");
4205f1fd
MG
378 bindtextdomain(PACKAGE, LOCALEDIR);
379 textdomain(PACKAGE);
2c308875 380 close_stdout_atexit();
4205f1fd 381
be7df01a 382 while ((c = getopt_long(argc, argv, "+fhVmuinpCtUrR:w:S:G:c", longopts, NULL)) != -1) {
2eefe517 383 switch (c) {
5088ec33
MF
384 case 'f':
385 forkit = 1;
386 break;
4205f1fd 387 case 'm':
ef6acdb8 388 unshare_flags |= CLONE_NEWNS;
0490a6ca
KZ
389 if (optarg)
390 set_ns_target(CLONE_NEWNS, optarg);
4205f1fd
MG
391 break;
392 case 'u':
ef6acdb8 393 unshare_flags |= CLONE_NEWUTS;
0490a6ca
KZ
394 if (optarg)
395 set_ns_target(CLONE_NEWUTS, optarg);
4205f1fd
MG
396 break;
397 case 'i':
ef6acdb8 398 unshare_flags |= CLONE_NEWIPC;
0490a6ca
KZ
399 if (optarg)
400 set_ns_target(CLONE_NEWIPC, optarg);
4205f1fd
MG
401 break;
402 case 'n':
ef6acdb8 403 unshare_flags |= CLONE_NEWNET;
0490a6ca
KZ
404 if (optarg)
405 set_ns_target(CLONE_NEWNET, optarg);
4205f1fd 406 break;
bc7f9b95
EB
407 case 'p':
408 unshare_flags |= CLONE_NEWPID;
0490a6ca
KZ
409 if (optarg)
410 set_ns_target(CLONE_NEWPID, optarg);
bc7f9b95
EB
411 break;
412 case 'U':
413 unshare_flags |= CLONE_NEWUSER;
0490a6ca
KZ
414 if (optarg)
415 set_ns_target(CLONE_NEWUSER, optarg);
bc7f9b95 416 break;
f9e7b66d
SH
417 case 'C':
418 unshare_flags |= CLONE_NEWCGROUP;
419 if (optarg)
420 set_ns_target(CLONE_NEWCGROUP, optarg);
421 break;
be7df01a
AR
422 case 't':
423 unshare_flags |= CLONE_NEWTIME;
424 if (optarg)
425 set_ns_target(CLONE_NEWTIME, optarg);
426 break;
6728ca10
KZ
427 case OPT_MOUNTPROC:
428 unshare_flags |= CLONE_NEWNS;
429 procmnt = optarg ? optarg : "/proc";
430 break;
4da21e37 431 case 'r':
4175f29e
JP
432 if (mapuser == MAP_USER_CURRENT)
433 errx(EXIT_FAILURE, _("options --map-root-user and "
434 "--map-current-user are mutually exclusive"));
435
436 unshare_flags |= CLONE_NEWUSER;
437 mapuser = MAP_USER_ROOT;
438 break;
439 case 'c':
440 if (mapuser == MAP_USER_ROOT)
441 errx(EXIT_FAILURE, _("options --map-root-user and "
442 "--map-current-user are mutually exclusive"));
443
4da21e37 444 unshare_flags |= CLONE_NEWUSER;
4175f29e 445 mapuser = MAP_USER_CURRENT;
4da21e37 446 break;
fbceefde
KZ
447 case OPT_SETGROUPS:
448 setgrpcmd = setgroups_str2id(optarg);
449 break;
f0f22e9c
KZ
450 case OPT_PROPAGATION:
451 propagation = parse_propagation(optarg);
452 break;
8e8f0fa5 453 case OPT_KILLCHILD:
8e8f0fa5 454 forkit = 1;
8b39a17c
NH
455 if (optarg) {
456 if ((kill_child_signo = signame_to_signum(optarg)) < 0)
457 errx(EXIT_FAILURE, _("unknown signal: %s"),
458 optarg);
459 } else {
460 kill_child_signo = SIGKILL;
461 }
8e8f0fa5 462 break;
cef4decf
JP
463 case OPT_KEEPCAPS:
464 keepcaps = 1;
465 cap_last_cap(); /* Force last cap to be cached before we fork. */
466 break;
f0af42b5
LV
467 case 'S':
468 uid = strtoul_or_err(optarg, _("failed to parse uid"));
469 force_uid = 1;
470 break;
471 case 'G':
472 gid = strtoul_or_err(optarg, _("failed to parse gid"));
473 force_gid = 1;
474 break;
bf8834d4
LV
475 case 'R':
476 newroot = optarg;
477 break;
478 case 'w':
479 newdir = optarg;
480 break;
be7df01a
AR
481 case OPT_MONOTONIC:
482 monotonic = strtoul_or_err(optarg, _("failed to parse monotonic offset"));
483 force_monotonic = 1;
484 break;
485 case OPT_BOOTTIME:
486 boottime = strtoul_or_err(optarg, _("failed to parse boottime offset"));
487 force_boottime = 1;
488 break;
2c308875
KZ
489
490 case 'h':
491 usage();
492 case 'V':
493 print_version(EXIT_SUCCESS);
4205f1fd 494 default:
677ec86c 495 errtryhelp(EXIT_FAILURE);
4205f1fd
MG
496 }
497 }
498
be7df01a
AR
499 if ((force_monotonic || force_boottime) && !(unshare_flags & CLONE_NEWTIME))
500 errx(EXIT_FAILURE, _("options --monotonic and --boottime require "
501 "unsharing of a time namespace (-t)"));
502
c84f2590 503 if (npersists && (unshare_flags & CLONE_NEWNS))
99fcafdf 504 bind_ns_files_from_child(&pid, fds);
c84f2590 505
2eefe517 506 if (-1 == unshare(unshare_flags))
4205f1fd
MG
507 err(EXIT_FAILURE, _("unshare failed"));
508
c84f2590
KZ
509 if (npersists) {
510 if (pid && (unshare_flags & CLONE_NEWNS)) {
c84f2590 511 int rc;
99fcafdf
YK
512 char ch = PIPE_SYNC_BYTE;
513
514 /* signal child we are ready */
515 write_all(fds[1], &ch, 1);
516 close(fds[1]);
517 fds[1] = -1;
c84f2590 518
99fcafdf 519 /* wait for bind_ns_files_from_child() */
c84f2590
KZ
520 do {
521 rc = waitpid(pid, &status, 0);
522 if (rc < 0) {
523 if (errno == EINTR)
524 continue;
525 err(EXIT_FAILURE, _("waitpid failed"));
526 }
527 if (WIFEXITED(status) &&
528 WEXITSTATUS(status) != EXIT_SUCCESS)
529 return WEXITSTATUS(status);
530 } while (rc < 0);
531 } else
532 /* simple way, just bind */
533 bind_ns_files(getpid());
534 }
535
be7df01a
AR
536 if (force_boottime)
537 settime(boottime, CLOCK_BOOTTIME);
538
539 if (force_monotonic)
540 settime(monotonic, CLOCK_MONOTONIC);
541
5088ec33 542 if (forkit) {
c84f2590 543 pid = fork();
5088ec33
MF
544
545 switch(pid) {
546 case -1:
547 err(EXIT_FAILURE, _("fork failed"));
548 case 0: /* child */
549 break;
550 default: /* parent */
551 if (waitpid(pid, &status, 0) == -1)
552 err(EXIT_FAILURE, _("waitpid failed"));
553 if (WIFEXITED(status))
554 return WEXITSTATUS(status);
555 else if (WIFSIGNALED(status))
556 kill(getpid(), WTERMSIG(status));
557 err(EXIT_FAILURE, _("child exit failed"));
558 }
559 }
560
525a0ab2
KZ
561 if (kill_child_signo != 0 && prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0)
562 err(EXIT_FAILURE, "prctl failed");
0490a6ca 563
4175f29e
JP
564 /* Since Linux 3.19 unprivileged writing of /proc/self/gid_map
565 * has been disabled unless /proc/self/setgroups is written
566 * first to permanently disable the ability to call setgroups
567 * in that user namespace. */
568 switch (mapuser) {
569 case MAP_USER_ROOT:
fbceefde
KZ
570 if (setgrpcmd == SETGROUPS_ALLOW)
571 errx(EXIT_FAILURE, _("options --setgroups=allow and "
54fefa07 572 "--map-root-user are mutually exclusive"));
fbceefde 573
fbceefde 574 setgroups_control(SETGROUPS_DENY);
4da21e37
LR
575 map_id(_PATH_PROC_UIDMAP, 0, real_euid);
576 map_id(_PATH_PROC_GIDMAP, 0, real_egid);
4175f29e
JP
577 break;
578 case MAP_USER_CURRENT:
579 if (setgrpcmd == SETGROUPS_ALLOW)
580 errx(EXIT_FAILURE, _("options --setgroups=allow and "
581 "--map-current-user are mutually exclusive"));
fbceefde 582
4175f29e
JP
583 setgroups_control(SETGROUPS_DENY);
584 map_id(_PATH_PROC_UIDMAP, real_euid, real_euid);
585 map_id(_PATH_PROC_GIDMAP, real_egid, real_egid);
586 break;
587 case MAP_USER_NONE:
588 if (setgrpcmd != SETGROUPS_NONE)
589 setgroups_control(setgrpcmd);
590 }
4da21e37 591
f0f22e9c
KZ
592 if ((unshare_flags & CLONE_NEWNS) && propagation)
593 set_propagation(propagation);
594
bf8834d4
LV
595 if (newroot) {
596 if (chroot(newroot) != 0)
597 err(EXIT_FAILURE,
598 _("cannot change root directory to '%s'"), newroot);
599 newdir = newdir ?: "/";
600 }
601 if (newdir && chdir(newdir))
602 err(EXIT_FAILURE, _("cannot chdir to '%s'"), newdir);
603
604 if (procmnt) {
605 if (!newroot && mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0)
606 err(EXIT_FAILURE, _("umount %s failed"), procmnt);
607 if (mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0)
6728ca10 608 err(EXIT_FAILURE, _("mount %s failed"), procmnt);
bf8834d4 609 }
6728ca10 610
f0af42b5
LV
611 if (force_gid) {
612 if (setgroups(0, NULL) != 0) /* drop supplementary groups */
613 err(EXIT_FAILURE, _("setgroups failed"));
614 if (setgid(gid) < 0) /* change GID */
615 err(EXIT_FAILURE, _("setgid failed"));
616 }
617 if (force_uid && setuid(uid) < 0) /* change UID */
618 err(EXIT_FAILURE, _("setuid failed"));
619
cef4decf
JP
620 /* We use capabilities system calls to propagate the permitted
621 * capabilities into the ambient set because we have already
622 * forked so are in async-signal-safe context. */
623 if (keepcaps && (unshare_flags & CLONE_NEWUSER)) {
624 struct __user_cap_header_struct header = {
625 .version = _LINUX_CAPABILITY_VERSION_3,
626 .pid = 0,
627 };
628
629 struct __user_cap_data_struct payload[_LINUX_CAPABILITY_U32S_3] = { 0 };
ac0391cc
KZ
630 int cap;
631 uint64_t effective;
cef4decf 632
ac0391cc 633 if (capget(&header, payload) < 0)
cef4decf 634 err(EXIT_FAILURE, _("capget failed"));
cef4decf
JP
635
636 /* In order the make capabilities ambient, we first need to ensure
637 * that they are all inheritable. */
638 payload[0].inheritable = payload[0].permitted;
639 payload[1].inheritable = payload[1].permitted;
640
ac0391cc 641 if (capset(&header, payload) < 0)
cef4decf 642 err(EXIT_FAILURE, _("capset failed"));
cef4decf 643
ac0391cc 644 effective = ((uint64_t)payload[1].effective << 32) | (uint64_t)payload[0].effective;
cef4decf 645
ac0391cc 646 for (cap = 0; cap < 64; cap++) {
cef4decf
JP
647 /* This is the same check as cap_valid(), but using
648 * the runtime value for the last valid cap. */
ac0391cc 649 if (cap > cap_last_cap())
cef4decf 650 continue;
cef4decf 651
ac0391cc
KZ
652 if ((effective & (1 << cap))
653 && prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) < 0)
cef4decf 654 err(EXIT_FAILURE, _("prctl(PR_CAP_AMBIENT) failed"));
cef4decf
JP
655 }
656 }
657
57580694
ZJS
658 if (optind < argc) {
659 execvp(argv[optind], argv + optind);
fd777151 660 errexec(argv[optind]);
57580694
ZJS
661 }
662 exec_shell();
4205f1fd 663}