]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn.c
trivial: fix typo
[thirdparty/systemd.git] / src / nspawn / nspawn.c
CommitLineData
88213476
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
88213476
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
88213476 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
88213476
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <signal.h>
23#include <sched.h>
24#include <unistd.h>
25#include <sys/types.h>
26#include <sys/syscall.h>
27#include <sys/mount.h>
28#include <sys/wait.h>
29#include <stdlib.h>
30#include <string.h>
31#include <stdio.h>
32#include <errno.h>
33#include <sys/prctl.h>
34#include <sys/capability.h>
35#include <getopt.h>
a258bf26
LP
36#include <sys/epoll.h>
37#include <termios.h>
38#include <sys/signalfd.h>
687d0825 39#include <grp.h>
5ed27dbd 40#include <linux/fs.h>
9537eab0
LP
41#include <sys/un.h>
42#include <sys/socket.h>
88213476 43
81527be1
LP
44#include <systemd/sd-daemon.h>
45
88213476
LP
46#include "log.h"
47#include "util.h"
49e942b2 48#include "mkdir.h"
6b2d0e85 49#include "macro.h"
d7832d2c 50#include "audit.h"
94d82985 51#include "missing.h"
04d391da 52#include "cgroup-util.h"
a258bf26 53#include "strv.h"
9eb977db 54#include "path-util.h"
a41fe3a2 55#include "loopback-setup.h"
57fb9fb5 56#include "sd-id128.h"
4fc9982c 57#include "dev-setup.h"
57fb9fb5
LP
58
59typedef enum LinkJournal {
60 LINK_NO,
61 LINK_AUTO,
62 LINK_HOST,
63 LINK_GUEST
64} LinkJournal;
88213476
LP
65
66static char *arg_directory = NULL;
687d0825 67static char *arg_user = NULL;
40c32a4a 68static char **arg_controllers = NULL;
144f0fc0 69static char *arg_uuid = NULL;
ff01d048 70static bool arg_private_network = false;
bc2f673e 71static bool arg_read_only = false;
0f0dbc46 72static bool arg_boot = false;
57fb9fb5 73static LinkJournal arg_link_journal = LINK_AUTO;
5076f0cc
LP
74static uint64_t arg_retain =
75 (1ULL << CAP_CHOWN) |
76 (1ULL << CAP_DAC_OVERRIDE) |
77 (1ULL << CAP_DAC_READ_SEARCH) |
78 (1ULL << CAP_FOWNER) |
79 (1ULL << CAP_FSETID) |
80 (1ULL << CAP_IPC_OWNER) |
81 (1ULL << CAP_KILL) |
82 (1ULL << CAP_LEASE) |
83 (1ULL << CAP_LINUX_IMMUTABLE) |
84 (1ULL << CAP_NET_BIND_SERVICE) |
85 (1ULL << CAP_NET_BROADCAST) |
86 (1ULL << CAP_NET_RAW) |
87 (1ULL << CAP_SETGID) |
88 (1ULL << CAP_SETFCAP) |
89 (1ULL << CAP_SETPCAP) |
90 (1ULL << CAP_SETUID) |
91 (1ULL << CAP_SYS_ADMIN) |
92 (1ULL << CAP_SYS_CHROOT) |
93 (1ULL << CAP_SYS_NICE) |
94 (1ULL << CAP_SYS_PTRACE) |
95 (1ULL << CAP_SYS_TTY_CONFIG) |
d87be9b0
LP
96 (1ULL << CAP_SYS_RESOURCE) |
97 (1ULL << CAP_SYS_BOOT);
88213476
LP
98
99static int help(void) {
100
101 printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
102 "Spawn a minimal namespace container for debugging, testing and building.\n\n"
57fb9fb5
LP
103 " -h --help Show this help\n"
104 " -D --directory=NAME Root directory for the container\n"
105 " -b --boot Boot up full system (i.e. invoke init)\n"
106 " -u --user=USER Run the command under specified user or uid\n"
107 " -C --controllers=LIST Put the container in specified comma-separated cgroup hierarchies\n"
108 " --uuid=UUID Set a specific machine UUID for the container\n"
109 " --private-network Disable network in container\n"
110 " --read-only Mount the root directory read-only\n"
111 " --capability=CAP In addition to the default, retain specified capability\n"
112 " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n"
113 " -j Equivalent to --link-journal=host\n",
88213476
LP
114 program_invocation_short_name);
115
116 return 0;
117}
118
119static int parse_argv(int argc, char *argv[]) {
120
a41fe3a2 121 enum {
144f0fc0 122 ARG_PRIVATE_NETWORK = 0x100,
bc2f673e 123 ARG_UUID,
5076f0cc 124 ARG_READ_ONLY,
57fb9fb5
LP
125 ARG_CAPABILITY,
126 ARG_LINK_JOURNAL
a41fe3a2
LP
127 };
128
88213476 129 static const struct option options[] = {
ff01d048
LP
130 { "help", no_argument, NULL, 'h' },
131 { "directory", required_argument, NULL, 'D' },
132 { "user", required_argument, NULL, 'u' },
40c32a4a 133 { "controllers", required_argument, NULL, 'C' },
ff01d048 134 { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
0f0dbc46 135 { "boot", no_argument, NULL, 'b' },
144f0fc0 136 { "uuid", required_argument, NULL, ARG_UUID },
bc2f673e 137 { "read-only", no_argument, NULL, ARG_READ_ONLY },
5076f0cc 138 { "capability", required_argument, NULL, ARG_CAPABILITY },
57fb9fb5 139 { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
ff01d048 140 { NULL, 0, NULL, 0 }
88213476
LP
141 };
142
143 int c;
144
145 assert(argc >= 0);
146 assert(argv);
147
57fb9fb5 148 while ((c = getopt_long(argc, argv, "+hD:u:C:bj", options, NULL)) >= 0) {
88213476
LP
149
150 switch (c) {
151
152 case 'h':
153 help();
154 return 0;
155
156 case 'D':
157 free(arg_directory);
3a74cea5
LP
158 arg_directory = canonicalize_file_name(optarg);
159 if (!arg_directory) {
160 log_error("Failed to canonicalize root directory.");
88213476
LP
161 return -ENOMEM;
162 }
163
164 break;
165
687d0825
MV
166 case 'u':
167 free(arg_user);
168 if (!(arg_user = strdup(optarg))) {
169 log_error("Failed to duplicate user name.");
170 return -ENOMEM;
171 }
172
173 break;
174
40c32a4a
LGL
175 case 'C':
176 strv_free(arg_controllers);
177 arg_controllers = strv_split(optarg, ",");
178 if (!arg_controllers) {
179 log_error("Failed to split controllers list.");
180 return -ENOMEM;
181 }
182 strv_uniq(arg_controllers);
183
184 break;
185
ff01d048
LP
186 case ARG_PRIVATE_NETWORK:
187 arg_private_network = true;
a41fe3a2
LP
188 break;
189
0f0dbc46
LP
190 case 'b':
191 arg_boot = true;
192 break;
193
144f0fc0
LP
194 case ARG_UUID:
195 arg_uuid = optarg;
196 break;
197
bc2f673e
LP
198 case ARG_READ_ONLY:
199 arg_read_only = true;
200 break;
201
5076f0cc
LP
202 case ARG_CAPABILITY: {
203 char *state, *word;
204 size_t length;
205
206 FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
207 cap_value_t cap;
208 char *t;
209
210 t = strndup(word, length);
0d0f0c50
SL
211 if (!t)
212 return log_oom();
5076f0cc
LP
213
214 if (cap_from_name(t, &cap) < 0) {
215 log_error("Failed to parse capability %s.", t);
216 free(t);
217 return -EINVAL;
218 }
219
220 free(t);
221 arg_retain |= 1ULL << (uint64_t) cap;
222 }
223
224 break;
225 }
226
57fb9fb5
LP
227 case 'j':
228 arg_link_journal = LINK_GUEST;
229 break;
230
231 case ARG_LINK_JOURNAL:
232 if (streq(optarg, "auto"))
233 arg_link_journal = LINK_AUTO;
234 else if (streq(optarg, "no"))
235 arg_link_journal = LINK_NO;
236 else if (streq(optarg, "guest"))
237 arg_link_journal = LINK_GUEST;
238 else if (streq(optarg, "host"))
239 arg_link_journal = LINK_HOST;
240 else {
241 log_error("Failed to parse link journal mode %s", optarg);
242 return -EINVAL;
243 }
244
245 break;
246
88213476
LP
247 case '?':
248 return -EINVAL;
249
250 default:
251 log_error("Unknown option code %c", c);
252 return -EINVAL;
253 }
254 }
255
256 return 1;
257}
258
259static int mount_all(const char *dest) {
260
261 typedef struct MountPoint {
262 const char *what;
263 const char *where;
264 const char *type;
265 const char *options;
266 unsigned long flags;
3bd66c05 267 bool fatal;
88213476
LP
268 } MountPoint;
269
270 static const MountPoint mount_table[] = {
4b7a6af4 271 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
b4c59701
LP
272 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true }, /* Bind mount first */
273 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
e65aec12 274 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
635f7d8c 275 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
b4c59701 276 { "/dev/pts", "/dev/pts", NULL, NULL, MS_BIND, true },
ede89845 277 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
635f7d8c 278 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
9b634ea5 279#ifdef HAVE_SELINUX
b4c59701
LP
280 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false }, /* Bind mount first */
281 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */
9b634ea5 282#endif
88213476
LP
283 };
284
285 unsigned k;
286 int r = 0;
287
288 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
89154bd4 289 char _cleanup_free_ *where = NULL;
88213476
LP
290 int t;
291
292 if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) {
0d0f0c50 293 log_oom();
88213476
LP
294
295 if (r == 0)
296 r = -ENOMEM;
297
298 break;
299 }
300
e65aec12 301 t = path_is_mount_point(where, true);
68fb0892 302 if (t < 0) {
88213476 303 log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
88213476
LP
304
305 if (r == 0)
306 r = t;
307
308 continue;
309 }
310
9c1c7f71
LP
311 /* Skip this entry if it is not a remount. */
312 if (mount_table[k].what && t > 0)
014a9c77
LP
313 continue;
314
d2e54fae 315 mkdir_p_label(where, 0755);
88213476
LP
316
317 if (mount(mount_table[k].what,
318 where,
319 mount_table[k].type,
320 mount_table[k].flags,
3bd66c05
LP
321 mount_table[k].options) < 0 &&
322 mount_table[k].fatal) {
88213476
LP
323
324 log_error("mount(%s) failed: %m", where);
325
326 if (r == 0)
327 r = -errno;
328 }
88213476
LP
329 }
330
e58a1277
LP
331 return r;
332}
f8440af5 333
e58a1277 334static int setup_timezone(const char *dest) {
d4036145
LP
335 _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
336 char *z, *y;
337 int r;
f8440af5 338
e58a1277
LP
339 assert(dest);
340
341 /* Fix the timezone, if possible */
d4036145
LP
342 r = readlink_malloc("/etc/localtime", &p);
343 if (r < 0) {
344 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
345 return 0;
346 }
347
348 z = path_startswith(p, "../usr/share/zoneinfo/");
349 if (!z)
350 z = path_startswith(p, "/usr/share/zoneinfo/");
351 if (!z) {
352 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
353 return 0;
354 }
355
04bc4a3f
LP
356 where = strappend(dest, "/etc/localtime");
357 if (!where)
0d0f0c50 358 return log_oom();
715ac17a 359
d4036145
LP
360 r = readlink_malloc(where, &q);
361 if (r >= 0) {
362 y = path_startswith(q, "../usr/share/zoneinfo/");
363 if (!y)
364 y = path_startswith(q, "/usr/share/zoneinfo/");
4d1c38b8 365
4d1c38b8 366
d4036145
LP
367 /* Already pointing to the right place? Then do nothing .. */
368 if (y && streq(y, z))
369 return 0;
370 }
371
372 check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
373 if (!check)
0d0f0c50 374 return log_oom();
4d1c38b8 375
d4036145
LP
376 if (access(check, F_OK) < 0) {
377 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
378 return 0;
379 }
68fb0892 380
d4036145
LP
381 what = strappend("../usr/share/zoneinfo/", z);
382 if (!what)
383 return log_oom();
384
385 unlink(where);
386 if (symlink(what, where) < 0) {
387 log_error("Failed to correct timezone of container: %m");
388 return 0;
389 }
e58a1277
LP
390
391 return 0;
88213476
LP
392}
393
2547bb41
LP
394static int setup_resolv_conf(const char *dest) {
395 char *where;
396
397 assert(dest);
398
399 if (arg_private_network)
400 return 0;
401
402 /* Fix resolv.conf, if possible */
04bc4a3f
LP
403 where = strappend(dest, "/etc/resolv.conf");
404 if (!where)
0d0f0c50 405 return log_oom();
2547bb41 406
77e63faf
LP
407 /* We don't really care for the results of this really. If it
408 * fails, it fails, but meh... */
2547bb41
LP
409 if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) >= 0)
410 mount("/etc/resolv.conf", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
411
412 free(where);
413
414 return 0;
415}
416
04bc4a3f 417static int setup_boot_id(const char *dest) {
ed8b7a3e 418 char _cleanup_free_ *from = NULL, *to = NULL;
04bc4a3f
LP
419 sd_id128_t rnd;
420 char as_uuid[37];
421 int r;
422
423 assert(dest);
424
425 /* Generate a new randomized boot ID, so that each boot-up of
426 * the container gets a new one */
427
428 from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
04bc4a3f 429 to = strappend(dest, "/proc/sys/kernel/random/boot_id");
ed8b7a3e
ZJS
430 if (!from || !to)
431 return log_oom();
04bc4a3f
LP
432
433 r = sd_id128_randomize(&rnd);
434 if (r < 0) {
435 log_error("Failed to generate random boot id: %s", strerror(-r));
ed8b7a3e 436 return r;
04bc4a3f
LP
437 }
438
439 snprintf(as_uuid, sizeof(as_uuid),
440 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
441 SD_ID128_FORMAT_VAL(rnd));
442 char_array_0(as_uuid);
443
444 r = write_one_line_file(from, as_uuid);
445 if (r < 0) {
446 log_error("Failed to write boot id: %s", strerror(-r));
ed8b7a3e 447 return r;
04bc4a3f
LP
448 }
449
450 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
451 log_error("Failed to bind mount boot id: %m");
452 r = -errno;
453 } else
454 mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
455
456 unlink(from);
04bc4a3f
LP
457 return r;
458}
459
e58a1277 460static int copy_devnodes(const char *dest) {
88213476
LP
461
462 static const char devnodes[] =
463 "null\0"
464 "zero\0"
465 "full\0"
466 "random\0"
467 "urandom\0"
468 "tty\0"
3eabccc4 469 "ptmx\0";
88213476
LP
470
471 const char *d;
e58a1277 472 int r = 0;
25ea79fe 473 mode_t _cleanup_umask_ u;
a258bf26
LP
474
475 assert(dest);
124640f1
LP
476
477 u = umask(0000);
88213476
LP
478
479 NULSTR_FOREACH(d, devnodes) {
e58a1277 480 struct stat st;
ed8b7a3e 481 char _cleanup_free_ *from = NULL, *to = NULL;
88213476
LP
482
483 asprintf(&from, "/dev/%s", d);
484 asprintf(&to, "%s/dev/%s", dest, d);
485
486 if (!from || !to) {
ed8b7a3e 487 log_oom();
a258bf26 488
88213476
LP
489 if (r == 0)
490 r = -ENOMEM;
491
492 break;
493 }
494
495 if (stat(from, &st) < 0) {
496
497 if (errno != ENOENT) {
498 log_error("Failed to stat %s: %m", from);
88213476
LP
499 if (r == 0)
500 r = -errno;
501 }
502
a258bf26 503 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
88213476 504
ed8b7a3e 505 log_error("%s is not a char or block device, cannot copy", from);
a258bf26
LP
506 if (r == 0)
507 r = -EIO;
508
509 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
510
511 log_error("mknod(%s) failed: %m", dest);
512 if (r == 0)
513 r = -errno;
88213476 514 }
88213476
LP
515 }
516
e58a1277
LP
517 return r;
518}
88213476 519
e58a1277
LP
520static int setup_dev_console(const char *dest, const char *console) {
521 struct stat st;
ed8b7a3e 522 char _cleanup_free_ *to = NULL;
e58a1277 523 int r;
25ea79fe 524 mode_t _cleanup_umask_ u;
e58a1277
LP
525
526 assert(dest);
527 assert(console);
528
529 u = umask(0000);
530
531 if (stat(console, &st) < 0) {
532 log_error("Failed to stat %s: %m", console);
25ea79fe 533 return -errno;
88213476 534
a258bf26 535 } else if (!S_ISCHR(st.st_mode)) {
25ea79fe
ZJS
536 log_error("/dev/console is not a char device");
537 return -EIO;
e58a1277 538 }
88213476 539
e58a1277
LP
540 r = chmod_and_chown(console, 0600, 0, 0);
541 if (r < 0) {
542 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
25ea79fe 543 return r;
a258bf26 544 }
88213476 545
25ea79fe
ZJS
546 if (asprintf(&to, "%s/dev/console", dest) < 0)
547 return log_oom();
88213476 548
a258bf26
LP
549 /* We need to bind mount the right tty to /dev/console since
550 * ptys can only exist on pts file systems. To have something
551 * to bind mount things on we create a device node first, that
552 * has the right major/minor (note that the major minor
553 * doesn't actually matter here, since we mount it over
554 * anyway). */
555
e58a1277
LP
556 if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
557 log_error("mknod() for /dev/console failed: %m");
25ea79fe 558 return -errno;
e58a1277 559 }
a258bf26
LP
560
561 if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
e58a1277 562 log_error("Bind mount for /dev/console failed: %m");
25ea79fe 563 return -errno;
a258bf26
LP
564 }
565
25ea79fe 566 return 0;
e58a1277
LP
567}
568
569static int setup_kmsg(const char *dest, int kmsg_socket) {
ed8b7a3e 570 char _cleanup_free_ *from = NULL, *to = NULL;
e58a1277 571 int r, fd, k;
25ea79fe 572 mode_t _cleanup_umask_ u;
e58a1277
LP
573 union {
574 struct cmsghdr cmsghdr;
575 uint8_t buf[CMSG_SPACE(sizeof(int))];
576 } control;
577 struct msghdr mh;
578 struct cmsghdr *cmsg;
579
580 assert(dest);
581 assert(kmsg_socket >= 0);
a258bf26 582
e58a1277 583 u = umask(0000);
a258bf26 584
f1e5dfe2
LP
585 /* We create the kmsg FIFO as /dev/kmsg, but immediately
586 * delete it after bind mounting it to /proc/kmsg. While FIFOs
587 * on the reading side behave very similar to /proc/kmsg,
588 * their writing side behaves differently from /dev/kmsg in
589 * that writing blocks when nothing is reading. In order to
590 * avoid any problems with containers deadlocking due to this
591 * we simply make /dev/kmsg unavailable to the container. */
25ea79fe
ZJS
592 if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
593 asprintf(&to, "%s/proc/kmsg", dest) < 0)
594 return log_oom();
e58a1277
LP
595
596 if (mkfifo(from, 0600) < 0) {
597 log_error("mkfifo() for /dev/kmsg failed: %m");
25ea79fe 598 return -errno;
e58a1277
LP
599 }
600
601 r = chmod_and_chown(from, 0600, 0, 0);
602 if (r < 0) {
603 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
25ea79fe 604 return r;
e58a1277
LP
605 }
606
607 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
608 log_error("Bind mount for /proc/kmsg failed: %m");
25ea79fe 609 return -errno;
e58a1277
LP
610 }
611
612 fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
613 if (fd < 0) {
614 log_error("Failed to open fifo: %m");
25ea79fe 615 return -errno;
e58a1277
LP
616 }
617
618 zero(mh);
619 zero(control);
620
621 mh.msg_control = &control;
622 mh.msg_controllen = sizeof(control);
623
624 cmsg = CMSG_FIRSTHDR(&mh);
625 cmsg->cmsg_level = SOL_SOCKET;
626 cmsg->cmsg_type = SCM_RIGHTS;
627 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
628 memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
629
630 mh.msg_controllen = cmsg->cmsg_len;
631
632 /* Store away the fd in the socket, so that it stays open as
633 * long as we run the child */
634 k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
635 close_nointr_nofail(fd);
636
637 if (k < 0) {
638 log_error("Failed to send FIFO fd: %m");
25ea79fe 639 return -errno;
a258bf26
LP
640 }
641
f1e5dfe2
LP
642 /* And now make the FIFO unavailable as /dev/kmsg... */
643 unlink(from);
25ea79fe 644 return 0;
88213476
LP
645}
646
3a74cea5
LP
647static int setup_hostname(void) {
648 char *hn;
649 int r = 0;
650
9eb977db 651 hn = path_get_file_name(arg_directory);
3a74cea5
LP
652 if (hn) {
653 hn = strdup(hn);
654 if (!hn)
655 return -ENOMEM;
656
657 hostname_cleanup(hn);
658
659 if (!isempty(hn))
660 if (sethostname(hn, strlen(hn)) < 0)
661 r = -errno;
662
663 free(hn);
664 }
665
666 return r;
667}
668
57fb9fb5
LP
669static int setup_journal(const char *directory) {
670 sd_id128_t machine_id;
671 char *p = NULL, *b = NULL, *l, *q = NULL, *d = NULL;
672 int r;
673
674 if (arg_link_journal == LINK_NO)
675 return 0;
676
677 p = strappend(directory, "/etc/machine-id");
678 if (!p) {
0d0f0c50 679 r = log_oom();
57fb9fb5
LP
680 goto finish;
681 }
682
683 r = read_one_line_file(p, &b);
684 if (r == -ENOENT && arg_link_journal == LINK_AUTO) {
685 r = 0;
686 goto finish;
687 } else if (r < 0) {
688 log_error("Failed to read machine ID: %s", strerror(-r));
689 return r;
690 }
691
692 l = strstrip(b);
693 if (isempty(l) && arg_link_journal == LINK_AUTO) {
694 r = 0;
695 goto finish;
696 }
697
698 /* Verify validaty */
699 r = sd_id128_from_string(l, &machine_id);
700 if (r < 0) {
701 log_error("Failed to parse machine ID: %s", strerror(-r));
702 goto finish;
703 }
704
705 free(p);
706 p = strappend("/var/log/journal/", l);
707 q = strjoin(directory, "/var/log/journal/", l, NULL);
708 if (!p || !q) {
0d0f0c50 709 r = log_oom();
57fb9fb5
LP
710 goto finish;
711 }
712
713 if (path_is_mount_point(p, false) > 0 ||
714 path_is_mount_point(q, false) > 0) {
715 if (arg_link_journal != LINK_AUTO) {
716 log_error("Journal already a mount point, refusing.");
717 r = -EEXIST;
718 goto finish;
719 }
720
721 r = 0;
722 goto finish;
723 }
724
725 r = readlink_and_make_absolute(p, &d);
726 if (r >= 0) {
727 if ((arg_link_journal == LINK_GUEST ||
728 arg_link_journal == LINK_AUTO) &&
729 path_equal(d, q)) {
730
731 mkdir_p(q, 0755);
732
733 r = 0;
734 goto finish;
735 }
736
737 if (unlink(p) < 0) {
738 log_error("Failed to remove symlink %s: %m", p);
739 r = -errno;
740 goto finish;
741 }
742 } else if (r == -EINVAL) {
743
744 if (arg_link_journal == LINK_GUEST &&
745 rmdir(p) < 0) {
746
747 if (errno == ENOTDIR)
748 log_error("%s already exists and is neither symlink nor directory.", p);
749 else {
750 log_error("Failed to remove %s: %m", p);
751 r = -errno;
752 }
753
754 goto finish;
755 }
756 } else if (r != -ENOENT) {
757 log_error("readlink(%s) failed: %m", p);
758 goto finish;
759 }
760
761 if (arg_link_journal == LINK_GUEST) {
762
763 if (symlink(q, p) < 0) {
764 log_error("Failed to symlink %s to %s: %m", q, p);
765 r = -errno;
766 goto finish;
767 }
768
769 mkdir_p(q, 0755);
770
771 r = 0;
772 goto finish;
773 }
774
775 if (arg_link_journal == LINK_HOST) {
776 r = mkdir_p(p, 0755);
777 if (r < 0) {
778 log_error("Failed to create %s: %m", p);
779 goto finish;
780 }
781
782 } else if (access(p, F_OK) < 0) {
783 r = 0;
784 goto finish;
785 }
786
787 if (dir_is_empty(q) == 0) {
788 log_error("%s not empty.", q);
789 r = -ENOTEMPTY;
790 goto finish;
791 }
792
793 r = mkdir_p(q, 0755);
794 if (r < 0) {
795 log_error("Failed to create %s: %m", q);
796 goto finish;
797 }
798
799 if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
800 log_error("Failed to bind mount journal from host into guest: %m");
801 r = -errno;
802 goto finish;
803 }
804
805 r = 0;
806
807finish:
808 free(p);
809 free(q);
810 free(d);
811 free(b);
812 return r;
813
814}
815
88213476 816static int drop_capabilities(void) {
5076f0cc 817 return capability_bounding_set_drop(~arg_retain, false);
88213476
LP
818}
819
820static int is_os_tree(const char *path) {
821 int r;
822 char *p;
823 /* We use /bin/sh as flag file if something is an OS */
824
825 if (asprintf(&p, "%s/bin/sh", path) < 0)
826 return -ENOMEM;
827
828 r = access(p, F_OK);
829 free(p);
830
831 return r < 0 ? 0 : 1;
832}
833
a258bf26 834static int process_pty(int master, sigset_t *mask) {
0c749d50 835
b72491a2 836 char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
a258bf26
LP
837 size_t in_buffer_full = 0, out_buffer_full = 0;
838 struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev;
839 bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false;
a258bf26
LP
840 int ep = -1, signal_fd = -1, r;
841
842 fd_nonblock(STDIN_FILENO, 1);
843 fd_nonblock(STDOUT_FILENO, 1);
844 fd_nonblock(master, 1);
845
db7feb7e
LP
846 signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC);
847 if (signal_fd < 0) {
a258bf26
LP
848 log_error("signalfd(): %m");
849 r = -errno;
850 goto finish;
851 }
852
db7feb7e
LP
853 ep = epoll_create1(EPOLL_CLOEXEC);
854 if (ep < 0) {
a258bf26
LP
855 log_error("Failed to create epoll: %m");
856 r = -errno;
857 goto finish;
858 }
859
860 zero(stdin_ev);
861 stdin_ev.events = EPOLLIN|EPOLLET;
862 stdin_ev.data.fd = STDIN_FILENO;
863
864 zero(stdout_ev);
865 stdout_ev.events = EPOLLOUT|EPOLLET;
866 stdout_ev.data.fd = STDOUT_FILENO;
867
868 zero(master_ev);
869 master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET;
870 master_ev.data.fd = master;
871
872 zero(signal_ev);
873 signal_ev.events = EPOLLIN;
874 signal_ev.data.fd = signal_fd;
875
876 if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0 ||
877 epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 ||
878 epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 ||
879 epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) {
880 log_error("Failed to regiser fds in epoll: %m");
881 r = -errno;
882 goto finish;
883 }
884
fd14078a 885 for (;;) {
a258bf26
LP
886 struct epoll_event ev[16];
887 ssize_t k;
888 int i, nfds;
889
db7feb7e
LP
890 nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1);
891 if (nfds < 0) {
a258bf26
LP
892
893 if (errno == EINTR || errno == EAGAIN)
894 continue;
895
896 log_error("epoll_wait(): %m");
897 r = -errno;
898 goto finish;
899 }
900
901 assert(nfds >= 1);
902
903 for (i = 0; i < nfds; i++) {
904 if (ev[i].data.fd == STDIN_FILENO) {
905
fd14078a 906 if (ev[i].events & (EPOLLIN|EPOLLHUP))
a258bf26
LP
907 stdin_readable = true;
908
909 } else if (ev[i].data.fd == STDOUT_FILENO) {
910
fd14078a 911 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
a258bf26
LP
912 stdout_writable = true;
913
914 } else if (ev[i].data.fd == master) {
915
fd14078a 916 if (ev[i].events & (EPOLLIN|EPOLLHUP))
a258bf26
LP
917 master_readable = true;
918
fd14078a 919 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
a258bf26
LP
920 master_writable = true;
921
922 } else if (ev[i].data.fd == signal_fd) {
923 struct signalfd_siginfo sfsi;
924 ssize_t n;
925
db7feb7e
LP
926 n = read(signal_fd, &sfsi, sizeof(sfsi));
927 if (n != sizeof(sfsi)) {
a258bf26
LP
928
929 if (n >= 0) {
0c749d50 930 log_error("Failed to read from signalfd: invalid block size");
a258bf26
LP
931 r = -EIO;
932 goto finish;
933 }
934
935 if (errno != EINTR && errno != EAGAIN) {
0c749d50 936 log_error("Failed to read from signalfd: %m");
a258bf26
LP
937 r = -errno;
938 goto finish;
939 }
940 } else {
941
942 if (sfsi.ssi_signo == SIGWINCH) {
943 struct winsize ws;
944
945 /* The window size changed, let's forward that. */
a258bf26
LP
946 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
947 ioctl(master, TIOCSWINSZ, &ws);
948 } else {
0c749d50 949 r = 0;
a258bf26
LP
950 goto finish;
951 }
952 }
953 }
954 }
955
956 while ((stdin_readable && in_buffer_full <= 0) ||
957 (master_writable && in_buffer_full > 0) ||
958 (master_readable && out_buffer_full <= 0) ||
959 (stdout_writable && out_buffer_full > 0)) {
960
b72491a2 961 if (stdin_readable && in_buffer_full < LINE_MAX) {
a258bf26 962
db7feb7e
LP
963 k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full);
964 if (k < 0) {
a258bf26 965
fd14078a 966 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
a258bf26 967 stdin_readable = false;
a258bf26
LP
968 else {
969 log_error("read(): %m");
0c749d50 970 r = -errno;
a258bf26
LP
971 goto finish;
972 }
973 } else
974 in_buffer_full += (size_t) k;
a258bf26
LP
975 }
976
977 if (master_writable && in_buffer_full > 0) {
978
db7feb7e
LP
979 k = write(master, in_buffer, in_buffer_full);
980 if (k < 0) {
a258bf26 981
fd14078a 982 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
a258bf26 983 master_writable = false;
fd14078a 984 else {
a258bf26 985 log_error("write(): %m");
0c749d50 986 r = -errno;
a258bf26
LP
987 goto finish;
988 }
989
990 } else {
991 assert(in_buffer_full >= (size_t) k);
992 memmove(in_buffer, in_buffer + k, in_buffer_full - k);
993 in_buffer_full -= k;
994 }
995 }
996
b72491a2 997 if (master_readable && out_buffer_full < LINE_MAX) {
a258bf26 998
db7feb7e
LP
999 k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full);
1000 if (k < 0) {
a258bf26 1001
fd14078a 1002 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
a258bf26 1003 master_readable = false;
a258bf26
LP
1004 else {
1005 log_error("read(): %m");
0c749d50 1006 r = -errno;
a258bf26
LP
1007 goto finish;
1008 }
1009 } else
1010 out_buffer_full += (size_t) k;
a258bf26
LP
1011 }
1012
1013 if (stdout_writable && out_buffer_full > 0) {
1014
db7feb7e
LP
1015 k = write(STDOUT_FILENO, out_buffer, out_buffer_full);
1016 if (k < 0) {
a258bf26 1017
fd14078a 1018 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
a258bf26 1019 stdout_writable = false;
fd14078a 1020 else {
a258bf26 1021 log_error("write(): %m");
0c749d50 1022 r = -errno;
a258bf26
LP
1023 goto finish;
1024 }
1025
1026 } else {
1027 assert(out_buffer_full >= (size_t) k);
1028 memmove(out_buffer, out_buffer + k, out_buffer_full - k);
1029 out_buffer_full -= k;
1030 }
1031 }
1032 }
fd14078a 1033 }
a258bf26
LP
1034
1035finish:
1036 if (ep >= 0)
1037 close_nointr_nofail(ep);
1038
1039 if (signal_fd >= 0)
1040 close_nointr_nofail(signal_fd);
1041
1042 return r;
1043}
88213476
LP
1044
1045int main(int argc, char *argv[]) {
1046 pid_t pid = 0;
04d391da
LP
1047 int r = EXIT_FAILURE, k;
1048 char *oldcg = NULL, *newcg = NULL;
40c32a4a 1049 char **controller = NULL;
a258bf26
LP
1050 int master = -1;
1051 const char *console = NULL;
1052 struct termios saved_attr, raw_attr;
1053 sigset_t mask;
1054 bool saved_attr_valid = false;
1055 struct winsize ws;
e58a1277 1056 int kmsg_socket_pair[2] = { -1, -1 };
88213476
LP
1057
1058 log_parse_environment();
1059 log_open();
1060
db7feb7e
LP
1061 r = parse_argv(argc, argv);
1062 if (r <= 0)
88213476
LP
1063 goto finish;
1064
1065 if (arg_directory) {
1066 char *p;
1067
1068 p = path_make_absolute_cwd(arg_directory);
1069 free(arg_directory);
1070 arg_directory = p;
1071 } else
1072 arg_directory = get_current_dir_name();
1073
1074 if (!arg_directory) {
1075 log_error("Failed to determine path");
1076 goto finish;
1077 }
1078
1079 path_kill_slashes(arg_directory);
1080
1081 if (geteuid() != 0) {
1082 log_error("Need to be root.");
1083 goto finish;
1084 }
1085
04d391da
LP
1086 if (sd_booted() <= 0) {
1087 log_error("Not running on a systemd system.");
1088 goto finish;
1089 }
1090
88213476 1091 if (path_equal(arg_directory, "/")) {
6df6b939 1092 log_error("Spawning container on root directory not supported.");
88213476
LP
1093 goto finish;
1094 }
1095
1096 if (is_os_tree(arg_directory) <= 0) {
1097 log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory);
1098 goto finish;
1099 }
1100
db7feb7e
LP
1101 k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg);
1102 if (k < 0) {
04d391da
LP
1103 log_error("Failed to determine current cgroup: %s", strerror(-k));
1104 goto finish;
1105 }
1106
1107 if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) {
1108 log_error("Failed to allocate cgroup path.");
1109 goto finish;
1110 }
1111
40c32a4a
LGL
1112 k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0);
1113 if (k < 0) {
04d391da
LP
1114 log_error("Failed to create cgroup: %s", strerror(-k));
1115 goto finish;
1116 }
1117
db7feb7e 1118 STRV_FOREACH(controller, arg_controllers) {
40c32a4a
LGL
1119 k = cg_create_and_attach(*controller, newcg, 0);
1120 if (k < 0)
1121 log_warning("Failed to create cgroup in controller %s: %s", *controller, strerror(-k));
1122 }
1123
db7feb7e
LP
1124 master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1125 if (master < 0) {
a258bf26
LP
1126 log_error("Failed to acquire pseudo tty: %m");
1127 goto finish;
1128 }
1129
db7feb7e
LP
1130 console = ptsname(master);
1131 if (!console) {
a258bf26
LP
1132 log_error("Failed to determine tty name: %m");
1133 goto finish;
1134 }
1135
1136 log_info("Spawning namespace container on %s (console is %s).", arg_directory, console);
1137
1138 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
1139 ioctl(master, TIOCSWINSZ, &ws);
1140
1141 if (unlockpt(master) < 0) {
1142 log_error("Failed to unlock tty: %m");
1143 goto finish;
1144 }
1145
1146 if (tcgetattr(STDIN_FILENO, &saved_attr) < 0) {
1147 log_error("Failed to get terminal attributes: %m");
1148 goto finish;
1149 }
1150
1151 saved_attr_valid = true;
1152
1153 raw_attr = saved_attr;
1154 cfmakeraw(&raw_attr);
1155 raw_attr.c_lflag &= ~ECHO;
1156
e58a1277
LP
1157 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1158 log_error("Failed to create kmsg socket pair");
1159 goto finish;
1160 }
1161
a258bf26
LP
1162 assert_se(sigemptyset(&mask) == 0);
1163 sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1164 assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1165
d87be9b0
LP
1166 for (;;) {
1167 siginfo_t status;
52af2106 1168
d87be9b0
LP
1169 if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) {
1170 log_error("Failed to set terminal attributes: %m");
1171 goto finish;
1172 }
88213476 1173
d87be9b0
LP
1174 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1175 if (pid < 0) {
1176 if (errno == EINVAL)
1177 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1178 else
1179 log_error("clone() failed: %m");
a258bf26 1180
d87be9b0
LP
1181 goto finish;
1182 }
a258bf26 1183
d87be9b0
LP
1184 if (pid == 0) {
1185 /* child */
a258bf26 1186
d87be9b0
LP
1187 const char *home = NULL;
1188 uid_t uid = (uid_t) -1;
1189 gid_t gid = (gid_t) -1;
1190 const char *envp[] = {
1191 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1192 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1193 NULL, /* TERM */
1194 NULL, /* HOME */
1195 NULL, /* USER */
1196 NULL, /* LOGNAME */
1197 NULL, /* container_uuid */
1198 NULL
1199 };
a258bf26 1200
d87be9b0 1201 envp[2] = strv_find_prefix(environ, "TERM=");
a258bf26 1202
d87be9b0 1203 close_nointr_nofail(master);
a258bf26 1204
d87be9b0
LP
1205 close_nointr(STDIN_FILENO);
1206 close_nointr(STDOUT_FILENO);
1207 close_nointr(STDERR_FILENO);
db7feb7e 1208
d87be9b0 1209 close_all_fds(&kmsg_socket_pair[1], 1);
a258bf26 1210
d87be9b0 1211 reset_all_signal_handlers();
88213476 1212
d87be9b0
LP
1213 assert_se(sigemptyset(&mask) == 0);
1214 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
f5c1b9ee 1215
d87be9b0
LP
1216 if (open_terminal(console, O_RDWR) != STDIN_FILENO ||
1217 dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1218 dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO)
1219 goto child_fail;
bc2f673e 1220
d87be9b0
LP
1221 if (setsid() < 0) {
1222 log_error("setsid() failed: %m");
bc2f673e
LP
1223 goto child_fail;
1224 }
1225
d87be9b0
LP
1226 if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1227 log_error("PR_SET_PDEATHSIG failed: %m");
1228 goto child_fail;
1229 }
e58a1277 1230
d87be9b0
LP
1231 /* Mark everything as slave, so that we still
1232 * receive mounts from the real root, but don't
1233 * propagate mounts to the real root. */
1234 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1235 log_error("MS_SLAVE|MS_REC failed: %m");
1236 goto child_fail;
1237 }
04bc4a3f 1238
d87be9b0
LP
1239 /* Turn directory into bind mount */
1240 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1241 log_error("Failed to make bind mount.");
1242 goto child_fail;
1243 }
88213476 1244
d87be9b0
LP
1245 if (arg_read_only)
1246 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1247 log_error("Failed to make read-only.");
1248 goto child_fail;
1249 }
2547bb41 1250
d87be9b0
LP
1251 if (mount_all(arg_directory) < 0)
1252 goto child_fail;
57fb9fb5 1253
d87be9b0
LP
1254 if (copy_devnodes(arg_directory) < 0)
1255 goto child_fail;
a258bf26 1256
d87be9b0 1257 dev_setup(arg_directory);
88213476 1258
d87be9b0
LP
1259 if (setup_dev_console(arg_directory, console) < 0)
1260 goto child_fail;
88213476 1261
d87be9b0
LP
1262 if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1263 goto child_fail;
88213476 1264
d87be9b0 1265 close_nointr_nofail(kmsg_socket_pair[1]);
a258bf26 1266
d87be9b0
LP
1267 if (setup_boot_id(arg_directory) < 0)
1268 goto child_fail;
a41fe3a2 1269
d87be9b0
LP
1270 if (setup_timezone(arg_directory) < 0)
1271 goto child_fail;
88213476 1272
d87be9b0
LP
1273 if (setup_resolv_conf(arg_directory) < 0)
1274 goto child_fail;
687d0825 1275
d87be9b0 1276 if (setup_journal(arg_directory) < 0)
687d0825 1277 goto child_fail;
687d0825 1278
d87be9b0
LP
1279 if (chdir(arg_directory) < 0) {
1280 log_error("chdir(%s) failed: %m", arg_directory);
687d0825
MV
1281 goto child_fail;
1282 }
1283
d87be9b0
LP
1284 if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1285 log_error("mount(MS_MOVE) failed: %m");
687d0825
MV
1286 goto child_fail;
1287 }
1288
d87be9b0
LP
1289 if (chroot(".") < 0) {
1290 log_error("chroot() failed: %m");
687d0825
MV
1291 goto child_fail;
1292 }
1293
d87be9b0
LP
1294 if (chdir("/") < 0) {
1295 log_error("chdir() failed: %m");
687d0825
MV
1296 goto child_fail;
1297 }
1298
d87be9b0
LP
1299 umask(0022);
1300
1301 loopback_setup();
1302
1303 if (drop_capabilities() < 0) {
1304 log_error("drop_capabilities() failed: %m");
687d0825
MV
1305 goto child_fail;
1306 }
687d0825 1307
d87be9b0
LP
1308 if (arg_user) {
1309
963ddb91
LP
1310 /* Note that this resolves user names
1311 * inside the container, and hence
1312 * accesses the NSS modules from the
1313 * container and not the host. This is
1314 * a bit weird... */
1315
d87be9b0
LP
1316 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1317 log_error("get_user_creds() failed: %m");
1318 goto child_fail;
1319 }
1320
1321 if (mkdir_parents_label(home, 0775) < 0) {
1322 log_error("mkdir_parents_label() failed: %m");
1323 goto child_fail;
1324 }
1325
1326 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1327 log_error("mkdir_safe_label() failed: %m");
1328 goto child_fail;
1329 }
1330
1331 if (initgroups((const char*)arg_user, gid) < 0) {
1332 log_error("initgroups() failed: %m");
1333 goto child_fail;
1334 }
144f0fc0 1335
d87be9b0
LP
1336 if (setresgid(gid, gid, gid) < 0) {
1337 log_error("setregid() failed: %m");
1338 goto child_fail;
1339 }
1340
1341 if (setresuid(uid, uid, uid) < 0) {
1342 log_error("setreuid() failed: %m");
1343 goto child_fail;
1344 }
1345 }
1346
1347 if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) ||
1348 (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1349 (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
0d0f0c50 1350 log_oom();
144f0fc0
LP
1351 goto child_fail;
1352 }
687d0825 1353
d87be9b0
LP
1354 if (arg_uuid) {
1355 if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) {
1356 log_oom();
1357 goto child_fail;
1358 }
1359 }
1360
1361 setup_hostname();
1362
1363 if (arg_boot) {
1364 char **a;
1365 size_t l;
88213476 1366
d87be9b0 1367 /* Automatically search for the init system */
0f0dbc46 1368
d87be9b0
LP
1369 l = 1 + argc - optind;
1370 a = newa(char*, l + 1);
1371 memcpy(a + 1, argv + optind, l * sizeof(char*));
0f0dbc46 1372
d87be9b0
LP
1373 a[0] = (char*) "/usr/lib/systemd/systemd";
1374 execve(a[0], a, (char**) envp);
0f0dbc46 1375
d87be9b0
LP
1376 a[0] = (char*) "/lib/systemd/systemd";
1377 execve(a[0], a, (char**) envp);
0f0dbc46 1378
d87be9b0
LP
1379 a[0] = (char*) "/sbin/init";
1380 execve(a[0], a, (char**) envp);
1381 } else if (argc > optind)
1382 execvpe(argv[optind], argv + optind, (char**) envp);
1383 else {
1384 chdir(home ? home : "/root");
1385 execle("/bin/bash", "-bash", NULL, (char**) envp);
1386 }
1387
1388 log_error("execv() failed: %m");
0f0dbc46 1389
d87be9b0
LP
1390 child_fail:
1391 _exit(EXIT_FAILURE);
da5b3bad 1392 }
88213476 1393
d87be9b0
LP
1394 if (process_pty(master, &mask) < 0)
1395 goto finish;
88213476 1396
88213476 1397
d87be9b0
LP
1398 if (saved_attr_valid)
1399 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
a258bf26 1400
d87be9b0
LP
1401 r = wait_for_terminate(pid, &status);
1402 if (r < 0) {
1403 r = EXIT_FAILURE;
1404 break;
1405 }
a258bf26 1406
d87be9b0
LP
1407 if (status.si_code == CLD_EXITED) {
1408 if (status.si_status != 0) {
1409 log_error("Container failed with error code %i.", status.si_status);
1410 r = status.si_status;
1411 break;
1412 }
1413
1414 log_debug("Container exited successfully.");
1415 break;
1416 } else if (status.si_code == CLD_KILLED &&
1417 status.si_status == SIGINT) {
1418 log_info("Container has been shut down.");
1419 r = 0;
1420 break;
1421 } else if (status.si_code == CLD_KILLED &&
1422 status.si_status == SIGHUP) {
1423 log_info("Container is being rebooted.");
1424 continue;
1425 } else if (status.si_code == CLD_KILLED ||
1426 status.si_code == CLD_DUMPED) {
88213476 1427
d87be9b0
LP
1428 log_error("Container terminated by signal %s.", signal_to_string(status.si_status));
1429 r = EXIT_FAILURE;
1430 break;
1431 } else {
1432 log_error("Container failed due to unknown reason.");
1433 r = EXIT_FAILURE;
1434 break;
1435 }
1436 }
88213476
LP
1437
1438finish:
a258bf26
LP
1439 if (saved_attr_valid)
1440 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1441
1442 if (master >= 0)
1443 close_nointr_nofail(master);
1444
e58a1277
LP
1445 close_pipe(kmsg_socket_pair);
1446
04d391da
LP
1447 if (oldcg)
1448 cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0);
1449
1450 if (newcg)
1451 cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true);
88213476 1452
04d391da 1453 free(arg_directory);
40c32a4a 1454 strv_free(arg_controllers);
04d391da
LP
1455 free(oldcg);
1456 free(newcg);
88213476
LP
1457
1458 return r;
1459}