]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn.c
nspawn: add --version
[thirdparty/systemd.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <sys/epoll.h>
37 #include <termios.h>
38 #include <sys/signalfd.h>
39 #include <grp.h>
40 #include <linux/fs.h>
41 #include <sys/un.h>
42 #include <sys/socket.h>
43
44 #include <systemd/sd-daemon.h>
45
46 #include "log.h"
47 #include "util.h"
48 #include "mkdir.h"
49 #include "macro.h"
50 #include "audit.h"
51 #include "missing.h"
52 #include "cgroup-util.h"
53 #include "strv.h"
54 #include "path-util.h"
55 #include "loopback-setup.h"
56 #include "sd-id128.h"
57 #include "dev-setup.h"
58 #include "fdset.h"
59 #include "build.h"
60
61 typedef enum LinkJournal {
62 LINK_NO,
63 LINK_AUTO,
64 LINK_HOST,
65 LINK_GUEST
66 } LinkJournal;
67
68 static char *arg_directory = NULL;
69 static char *arg_user = NULL;
70 static char **arg_controllers = NULL;
71 static char *arg_uuid = NULL;
72 static bool arg_private_network = false;
73 static bool arg_read_only = false;
74 static bool arg_boot = false;
75 static LinkJournal arg_link_journal = LINK_AUTO;
76 static uint64_t arg_retain =
77 (1ULL << CAP_CHOWN) |
78 (1ULL << CAP_DAC_OVERRIDE) |
79 (1ULL << CAP_DAC_READ_SEARCH) |
80 (1ULL << CAP_FOWNER) |
81 (1ULL << CAP_FSETID) |
82 (1ULL << CAP_IPC_OWNER) |
83 (1ULL << CAP_KILL) |
84 (1ULL << CAP_LEASE) |
85 (1ULL << CAP_LINUX_IMMUTABLE) |
86 (1ULL << CAP_NET_BIND_SERVICE) |
87 (1ULL << CAP_NET_BROADCAST) |
88 (1ULL << CAP_NET_RAW) |
89 (1ULL << CAP_SETGID) |
90 (1ULL << CAP_SETFCAP) |
91 (1ULL << CAP_SETPCAP) |
92 (1ULL << CAP_SETUID) |
93 (1ULL << CAP_SYS_ADMIN) |
94 (1ULL << CAP_SYS_CHROOT) |
95 (1ULL << CAP_SYS_NICE) |
96 (1ULL << CAP_SYS_PTRACE) |
97 (1ULL << CAP_SYS_TTY_CONFIG) |
98 (1ULL << CAP_SYS_RESOURCE) |
99 (1ULL << CAP_SYS_BOOT);
100
101 static int help(void) {
102
103 printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
104 "Spawn a minimal namespace container for debugging, testing and building.\n\n"
105 " -h --help Show this help\n"
106 " --version Print version string\n"
107 " -D --directory=NAME Root directory for the container\n"
108 " -b --boot Boot up full system (i.e. invoke init)\n"
109 " -u --user=USER Run the command under specified user or uid\n"
110 " -C --controllers=LIST Put the container in specified comma-separated cgroup hierarchies\n"
111 " --uuid=UUID Set a specific machine UUID for the container\n"
112 " --private-network Disable network in container\n"
113 " --read-only Mount the root directory read-only\n"
114 " --capability=CAP In addition to the default, retain specified capability\n"
115 " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n"
116 " -j Equivalent to --link-journal=host\n",
117 program_invocation_short_name);
118
119 return 0;
120 }
121
122 static int parse_argv(int argc, char *argv[]) {
123
124 enum {
125 ARG_VERSION = 0x100,
126 ARG_PRIVATE_NETWORK,
127 ARG_UUID,
128 ARG_READ_ONLY,
129 ARG_CAPABILITY,
130 ARG_LINK_JOURNAL
131 };
132
133 static const struct option options[] = {
134 { "help", no_argument, NULL, 'h' },
135 { "version", no_argument, NULL, ARG_VERSION },
136 { "directory", required_argument, NULL, 'D' },
137 { "user", required_argument, NULL, 'u' },
138 { "controllers", required_argument, NULL, 'C' },
139 { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
140 { "boot", no_argument, NULL, 'b' },
141 { "uuid", required_argument, NULL, ARG_UUID },
142 { "read-only", no_argument, NULL, ARG_READ_ONLY },
143 { "capability", required_argument, NULL, ARG_CAPABILITY },
144 { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
145 { NULL, 0, NULL, 0 }
146 };
147
148 int c;
149
150 assert(argc >= 0);
151 assert(argv);
152
153 while ((c = getopt_long(argc, argv, "+hD:u:C:bj", options, NULL)) >= 0) {
154
155 switch (c) {
156
157 case 'h':
158 help();
159 return 0;
160
161 case ARG_VERSION:
162 puts(PACKAGE_STRING);
163 puts(SYSTEMD_FEATURES);
164 return 0;
165
166 case 'D':
167 free(arg_directory);
168 arg_directory = canonicalize_file_name(optarg);
169 if (!arg_directory) {
170 log_error("Failed to canonicalize root directory.");
171 return -ENOMEM;
172 }
173
174 break;
175
176 case 'u':
177 free(arg_user);
178 if (!(arg_user = strdup(optarg))) {
179 log_error("Failed to duplicate user name.");
180 return -ENOMEM;
181 }
182
183 break;
184
185 case 'C':
186 strv_free(arg_controllers);
187 arg_controllers = strv_split(optarg, ",");
188 if (!arg_controllers) {
189 log_error("Failed to split controllers list.");
190 return -ENOMEM;
191 }
192 strv_uniq(arg_controllers);
193
194 break;
195
196 case ARG_PRIVATE_NETWORK:
197 arg_private_network = true;
198 break;
199
200 case 'b':
201 arg_boot = true;
202 break;
203
204 case ARG_UUID:
205 arg_uuid = optarg;
206 break;
207
208 case ARG_READ_ONLY:
209 arg_read_only = true;
210 break;
211
212 case ARG_CAPABILITY: {
213 char *state, *word;
214 size_t length;
215
216 FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
217 cap_value_t cap;
218 char *t;
219
220 t = strndup(word, length);
221 if (!t)
222 return log_oom();
223
224 if (cap_from_name(t, &cap) < 0) {
225 log_error("Failed to parse capability %s.", t);
226 free(t);
227 return -EINVAL;
228 }
229
230 free(t);
231 arg_retain |= 1ULL << (uint64_t) cap;
232 }
233
234 break;
235 }
236
237 case 'j':
238 arg_link_journal = LINK_GUEST;
239 break;
240
241 case ARG_LINK_JOURNAL:
242 if (streq(optarg, "auto"))
243 arg_link_journal = LINK_AUTO;
244 else if (streq(optarg, "no"))
245 arg_link_journal = LINK_NO;
246 else if (streq(optarg, "guest"))
247 arg_link_journal = LINK_GUEST;
248 else if (streq(optarg, "host"))
249 arg_link_journal = LINK_HOST;
250 else {
251 log_error("Failed to parse link journal mode %s", optarg);
252 return -EINVAL;
253 }
254
255 break;
256
257 case '?':
258 return -EINVAL;
259
260 default:
261 log_error("Unknown option code %c", c);
262 return -EINVAL;
263 }
264 }
265
266 return 1;
267 }
268
269 static int mount_all(const char *dest) {
270
271 typedef struct MountPoint {
272 const char *what;
273 const char *where;
274 const char *type;
275 const char *options;
276 unsigned long flags;
277 bool fatal;
278 } MountPoint;
279
280 static const MountPoint mount_table[] = {
281 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
282 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true }, /* Bind mount first */
283 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
284 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
285 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
286 { "/dev/pts", "/dev/pts", NULL, NULL, MS_BIND, true },
287 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
288 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
289 #ifdef HAVE_SELINUX
290 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false }, /* Bind mount first */
291 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */
292 #endif
293 };
294
295 unsigned k;
296 int r = 0;
297
298 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
299 char _cleanup_free_ *where = NULL;
300 int t;
301
302 if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) {
303 log_oom();
304
305 if (r == 0)
306 r = -ENOMEM;
307
308 break;
309 }
310
311 t = path_is_mount_point(where, true);
312 if (t < 0) {
313 log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
314
315 if (r == 0)
316 r = t;
317
318 continue;
319 }
320
321 /* Skip this entry if it is not a remount. */
322 if (mount_table[k].what && t > 0)
323 continue;
324
325 mkdir_p_label(where, 0755);
326
327 if (mount(mount_table[k].what,
328 where,
329 mount_table[k].type,
330 mount_table[k].flags,
331 mount_table[k].options) < 0 &&
332 mount_table[k].fatal) {
333
334 log_error("mount(%s) failed: %m", where);
335
336 if (r == 0)
337 r = -errno;
338 }
339 }
340
341 return r;
342 }
343
344 static int setup_timezone(const char *dest) {
345 _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
346 char *z, *y;
347 int r;
348
349 assert(dest);
350
351 /* Fix the timezone, if possible */
352 r = readlink_malloc("/etc/localtime", &p);
353 if (r < 0) {
354 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
355 return 0;
356 }
357
358 z = path_startswith(p, "../usr/share/zoneinfo/");
359 if (!z)
360 z = path_startswith(p, "/usr/share/zoneinfo/");
361 if (!z) {
362 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
363 return 0;
364 }
365
366 where = strappend(dest, "/etc/localtime");
367 if (!where)
368 return log_oom();
369
370 r = readlink_malloc(where, &q);
371 if (r >= 0) {
372 y = path_startswith(q, "../usr/share/zoneinfo/");
373 if (!y)
374 y = path_startswith(q, "/usr/share/zoneinfo/");
375
376
377 /* Already pointing to the right place? Then do nothing .. */
378 if (y && streq(y, z))
379 return 0;
380 }
381
382 check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
383 if (!check)
384 return log_oom();
385
386 if (access(check, F_OK) < 0) {
387 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
388 return 0;
389 }
390
391 what = strappend("../usr/share/zoneinfo/", z);
392 if (!what)
393 return log_oom();
394
395 unlink(where);
396 if (symlink(what, where) < 0) {
397 log_error("Failed to correct timezone of container: %m");
398 return 0;
399 }
400
401 return 0;
402 }
403
404 static int setup_resolv_conf(const char *dest) {
405 char *where;
406
407 assert(dest);
408
409 if (arg_private_network)
410 return 0;
411
412 /* Fix resolv.conf, if possible */
413 where = strappend(dest, "/etc/resolv.conf");
414 if (!where)
415 return log_oom();
416
417 /* We don't really care for the results of this really. If it
418 * fails, it fails, but meh... */
419 if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) >= 0)
420 mount("/etc/resolv.conf", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
421
422 free(where);
423
424 return 0;
425 }
426
427 static int setup_boot_id(const char *dest) {
428 char _cleanup_free_ *from = NULL, *to = NULL;
429 sd_id128_t rnd;
430 char as_uuid[37];
431 int r;
432
433 assert(dest);
434
435 /* Generate a new randomized boot ID, so that each boot-up of
436 * the container gets a new one */
437
438 from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
439 to = strappend(dest, "/proc/sys/kernel/random/boot_id");
440 if (!from || !to)
441 return log_oom();
442
443 r = sd_id128_randomize(&rnd);
444 if (r < 0) {
445 log_error("Failed to generate random boot id: %s", strerror(-r));
446 return r;
447 }
448
449 snprintf(as_uuid, sizeof(as_uuid),
450 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
451 SD_ID128_FORMAT_VAL(rnd));
452 char_array_0(as_uuid);
453
454 r = write_one_line_file(from, as_uuid);
455 if (r < 0) {
456 log_error("Failed to write boot id: %s", strerror(-r));
457 return r;
458 }
459
460 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
461 log_error("Failed to bind mount boot id: %m");
462 r = -errno;
463 } else
464 mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
465
466 unlink(from);
467 return r;
468 }
469
470 static int copy_devnodes(const char *dest) {
471
472 static const char devnodes[] =
473 "null\0"
474 "zero\0"
475 "full\0"
476 "random\0"
477 "urandom\0"
478 "tty\0"
479 "ptmx\0";
480
481 const char *d;
482 int r = 0;
483 mode_t _cleanup_umask_ u;
484
485 assert(dest);
486
487 u = umask(0000);
488
489 NULSTR_FOREACH(d, devnodes) {
490 struct stat st;
491 char _cleanup_free_ *from = NULL, *to = NULL;
492
493 asprintf(&from, "/dev/%s", d);
494 asprintf(&to, "%s/dev/%s", dest, d);
495
496 if (!from || !to) {
497 log_oom();
498
499 if (r == 0)
500 r = -ENOMEM;
501
502 break;
503 }
504
505 if (stat(from, &st) < 0) {
506
507 if (errno != ENOENT) {
508 log_error("Failed to stat %s: %m", from);
509 if (r == 0)
510 r = -errno;
511 }
512
513 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
514
515 log_error("%s is not a char or block device, cannot copy", from);
516 if (r == 0)
517 r = -EIO;
518
519 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
520
521 log_error("mknod(%s) failed: %m", dest);
522 if (r == 0)
523 r = -errno;
524 }
525 }
526
527 return r;
528 }
529
530 static int setup_dev_console(const char *dest, const char *console) {
531 struct stat st;
532 char _cleanup_free_ *to = NULL;
533 int r;
534 mode_t _cleanup_umask_ u;
535
536 assert(dest);
537 assert(console);
538
539 u = umask(0000);
540
541 if (stat(console, &st) < 0) {
542 log_error("Failed to stat %s: %m", console);
543 return -errno;
544
545 } else if (!S_ISCHR(st.st_mode)) {
546 log_error("/dev/console is not a char device");
547 return -EIO;
548 }
549
550 r = chmod_and_chown(console, 0600, 0, 0);
551 if (r < 0) {
552 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
553 return r;
554 }
555
556 if (asprintf(&to, "%s/dev/console", dest) < 0)
557 return log_oom();
558
559 /* We need to bind mount the right tty to /dev/console since
560 * ptys can only exist on pts file systems. To have something
561 * to bind mount things on we create a device node first, that
562 * has the right major/minor (note that the major minor
563 * doesn't actually matter here, since we mount it over
564 * anyway). */
565
566 if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
567 log_error("mknod() for /dev/console failed: %m");
568 return -errno;
569 }
570
571 if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
572 log_error("Bind mount for /dev/console failed: %m");
573 return -errno;
574 }
575
576 return 0;
577 }
578
579 static int setup_kmsg(const char *dest, int kmsg_socket) {
580 char _cleanup_free_ *from = NULL, *to = NULL;
581 int r, fd, k;
582 mode_t _cleanup_umask_ u;
583 union {
584 struct cmsghdr cmsghdr;
585 uint8_t buf[CMSG_SPACE(sizeof(int))];
586 } control;
587 struct msghdr mh;
588 struct cmsghdr *cmsg;
589
590 assert(dest);
591 assert(kmsg_socket >= 0);
592
593 u = umask(0000);
594
595 /* We create the kmsg FIFO as /dev/kmsg, but immediately
596 * delete it after bind mounting it to /proc/kmsg. While FIFOs
597 * on the reading side behave very similar to /proc/kmsg,
598 * their writing side behaves differently from /dev/kmsg in
599 * that writing blocks when nothing is reading. In order to
600 * avoid any problems with containers deadlocking due to this
601 * we simply make /dev/kmsg unavailable to the container. */
602 if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
603 asprintf(&to, "%s/proc/kmsg", dest) < 0)
604 return log_oom();
605
606 if (mkfifo(from, 0600) < 0) {
607 log_error("mkfifo() for /dev/kmsg failed: %m");
608 return -errno;
609 }
610
611 r = chmod_and_chown(from, 0600, 0, 0);
612 if (r < 0) {
613 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
614 return r;
615 }
616
617 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
618 log_error("Bind mount for /proc/kmsg failed: %m");
619 return -errno;
620 }
621
622 fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
623 if (fd < 0) {
624 log_error("Failed to open fifo: %m");
625 return -errno;
626 }
627
628 zero(mh);
629 zero(control);
630
631 mh.msg_control = &control;
632 mh.msg_controllen = sizeof(control);
633
634 cmsg = CMSG_FIRSTHDR(&mh);
635 cmsg->cmsg_level = SOL_SOCKET;
636 cmsg->cmsg_type = SCM_RIGHTS;
637 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
638 memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
639
640 mh.msg_controllen = cmsg->cmsg_len;
641
642 /* Store away the fd in the socket, so that it stays open as
643 * long as we run the child */
644 k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
645 close_nointr_nofail(fd);
646
647 if (k < 0) {
648 log_error("Failed to send FIFO fd: %m");
649 return -errno;
650 }
651
652 /* And now make the FIFO unavailable as /dev/kmsg... */
653 unlink(from);
654 return 0;
655 }
656
657 static int setup_hostname(void) {
658 char *hn;
659 int r = 0;
660
661 hn = path_get_file_name(arg_directory);
662 if (hn) {
663 hn = strdup(hn);
664 if (!hn)
665 return -ENOMEM;
666
667 hostname_cleanup(hn);
668
669 if (!isempty(hn))
670 if (sethostname(hn, strlen(hn)) < 0)
671 r = -errno;
672
673 free(hn);
674 }
675
676 return r;
677 }
678
679 static int setup_journal(const char *directory) {
680 sd_id128_t machine_id;
681 char _cleanup_free_ *p = NULL, *b = NULL, *q = NULL, *d = NULL;
682 char *id;
683 int r;
684
685 if (arg_link_journal == LINK_NO)
686 return 0;
687
688 p = strappend(directory, "/etc/machine-id");
689 if (!p)
690 return log_oom();
691
692 r = read_one_line_file(p, &b);
693 if (r == -ENOENT && arg_link_journal == LINK_AUTO)
694 return 0;
695 else if (r < 0) {
696 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
697 return r;
698 }
699
700 id = strstrip(b);
701 if (isempty(id) && arg_link_journal == LINK_AUTO)
702 return 0;
703
704 /* Verify validity */
705 r = sd_id128_from_string(id, &machine_id);
706 if (r < 0) {
707 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
708 return r;
709 }
710
711 free(p);
712 p = strappend("/var/log/journal/", id);
713 q = strjoin(directory, "/var/log/journal/", id, NULL);
714 if (!p || !q)
715 return log_oom();
716
717 if (path_is_mount_point(p, false) > 0) {
718 if (arg_link_journal != LINK_AUTO) {
719 log_error("%s: already a mount point, refusing to use for journal", p);
720 return -EEXIST;
721 }
722
723 return 0;
724 }
725
726 if (path_is_mount_point(q, false) > 0) {
727 if (arg_link_journal != LINK_AUTO) {
728 log_error("%s: already a mount point, refusing to use for journal", q);
729 return -EEXIST;
730 }
731
732 return 0;
733 }
734
735 r = readlink_and_make_absolute(p, &d);
736 if (r >= 0) {
737 if ((arg_link_journal == LINK_GUEST ||
738 arg_link_journal == LINK_AUTO) &&
739 path_equal(d, q)) {
740
741 r = mkdir_p(q, 0755);
742 if (r < 0)
743 log_warning("failed to create directory %s: %m", q);
744 return 0;
745 }
746
747 if (unlink(p) < 0) {
748 log_error("Failed to remove symlink %s: %m", p);
749 return -errno;
750 }
751 } else if (r == -EINVAL) {
752
753 if (arg_link_journal == LINK_GUEST &&
754 rmdir(p) < 0) {
755
756 if (errno == ENOTDIR) {
757 log_error("%s already exists and is neither a symlink nor a directory", p);
758 return r;
759 } else {
760 log_error("Failed to remove %s: %m", p);
761 return -errno;
762 }
763 }
764 } else if (r != -ENOENT) {
765 log_error("readlink(%s) failed: %m", p);
766 return r;
767 }
768
769 if (arg_link_journal == LINK_GUEST) {
770
771 if (symlink(q, p) < 0) {
772 log_error("Failed to symlink %s to %s: %m", q, p);
773 return -errno;
774 }
775
776 r = mkdir_p(q, 0755);
777 if (r < 0)
778 log_warning("failed to create directory %s: %m", q);
779 return 0;
780 }
781
782 if (arg_link_journal == LINK_HOST) {
783 r = mkdir_p(p, 0755);
784 if (r < 0) {
785 log_error("Failed to create %s: %m", p);
786 return r;
787 }
788
789 } else if (access(p, F_OK) < 0)
790 return 0;
791
792 if (dir_is_empty(q) == 0) {
793 log_error("%s not empty.", q);
794 return -ENOTEMPTY;
795 }
796
797 r = mkdir_p(q, 0755);
798 if (r < 0) {
799 log_error("Failed to create %s: %m", q);
800 return r;
801 }
802
803 if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
804 log_error("Failed to bind mount journal from host into guest: %m");
805 return -errno;
806 }
807
808 return 0;
809 }
810
811 static int drop_capabilities(void) {
812 return capability_bounding_set_drop(~arg_retain, false);
813 }
814
815 static int is_os_tree(const char *path) {
816 int r;
817 char *p;
818 /* We use /bin/sh as flag file if something is an OS */
819
820 if (asprintf(&p, "%s/bin/sh", path) < 0)
821 return -ENOMEM;
822
823 r = access(p, F_OK);
824 free(p);
825
826 return r < 0 ? 0 : 1;
827 }
828
829 static int process_pty(int master, pid_t pid, sigset_t *mask) {
830
831 char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
832 size_t in_buffer_full = 0, out_buffer_full = 0;
833 struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev;
834 bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false;
835 int ep = -1, signal_fd = -1, r;
836 bool tried_orderly_shutdown = false;
837
838 assert(master >= 0);
839 assert(pid > 0);
840 assert(mask);
841
842 fd_nonblock(STDIN_FILENO, 1);
843 fd_nonblock(STDOUT_FILENO, 1);
844 fd_nonblock(master, 1);
845
846 signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC);
847 if (signal_fd < 0) {
848 log_error("signalfd(): %m");
849 r = -errno;
850 goto finish;
851 }
852
853 ep = epoll_create1(EPOLL_CLOEXEC);
854 if (ep < 0) {
855 log_error("Failed to create epoll: %m");
856 r = -errno;
857 goto finish;
858 }
859
860 /* We read from STDIN only if this is actually a TTY,
861 * otherwise we assume non-interactivity. */
862 if (isatty(STDIN_FILENO)) {
863 zero(stdin_ev);
864 stdin_ev.events = EPOLLIN|EPOLLET;
865 stdin_ev.data.fd = STDIN_FILENO;
866
867 if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0) {
868 log_error("Failed to register STDIN in epoll: %m");
869 r = -errno;
870 goto finish;
871 }
872 }
873
874 zero(stdout_ev);
875 stdout_ev.events = EPOLLOUT|EPOLLET;
876 stdout_ev.data.fd = STDOUT_FILENO;
877
878 zero(master_ev);
879 master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET;
880 master_ev.data.fd = master;
881
882 zero(signal_ev);
883 signal_ev.events = EPOLLIN;
884 signal_ev.data.fd = signal_fd;
885
886 if (epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 ||
887 epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 ||
888 epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) {
889 log_error("Failed to register fds in epoll: %m");
890 r = -errno;
891 goto finish;
892 }
893
894 for (;;) {
895 struct epoll_event ev[16];
896 ssize_t k;
897 int i, nfds;
898
899 nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1);
900 if (nfds < 0) {
901
902 if (errno == EINTR || errno == EAGAIN)
903 continue;
904
905 log_error("epoll_wait(): %m");
906 r = -errno;
907 goto finish;
908 }
909
910 assert(nfds >= 1);
911
912 for (i = 0; i < nfds; i++) {
913 if (ev[i].data.fd == STDIN_FILENO) {
914
915 if (ev[i].events & (EPOLLIN|EPOLLHUP))
916 stdin_readable = true;
917
918 } else if (ev[i].data.fd == STDOUT_FILENO) {
919
920 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
921 stdout_writable = true;
922
923 } else if (ev[i].data.fd == master) {
924
925 if (ev[i].events & (EPOLLIN|EPOLLHUP))
926 master_readable = true;
927
928 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
929 master_writable = true;
930
931 } else if (ev[i].data.fd == signal_fd) {
932 struct signalfd_siginfo sfsi;
933 ssize_t n;
934
935 n = read(signal_fd, &sfsi, sizeof(sfsi));
936 if (n != sizeof(sfsi)) {
937
938 if (n >= 0) {
939 log_error("Failed to read from signalfd: invalid block size");
940 r = -EIO;
941 goto finish;
942 }
943
944 if (errno != EINTR && errno != EAGAIN) {
945 log_error("Failed to read from signalfd: %m");
946 r = -errno;
947 goto finish;
948 }
949 } else {
950
951 if (sfsi.ssi_signo == SIGWINCH) {
952 struct winsize ws;
953
954 /* The window size changed, let's forward that. */
955 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
956 ioctl(master, TIOCSWINSZ, &ws);
957 } else if (sfsi.ssi_signo == SIGTERM && arg_boot && !tried_orderly_shutdown) {
958
959 log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination.");
960
961 /* This only works for systemd... */
962 tried_orderly_shutdown = true;
963 kill(pid, SIGRTMIN+3);
964
965 } else {
966 r = 0;
967 goto finish;
968 }
969 }
970 }
971 }
972
973 while ((stdin_readable && in_buffer_full <= 0) ||
974 (master_writable && in_buffer_full > 0) ||
975 (master_readable && out_buffer_full <= 0) ||
976 (stdout_writable && out_buffer_full > 0)) {
977
978 if (stdin_readable && in_buffer_full < LINE_MAX) {
979
980 k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full);
981 if (k < 0) {
982
983 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
984 stdin_readable = false;
985 else {
986 log_error("read(): %m");
987 r = -errno;
988 goto finish;
989 }
990 } else
991 in_buffer_full += (size_t) k;
992 }
993
994 if (master_writable && in_buffer_full > 0) {
995
996 k = write(master, in_buffer, in_buffer_full);
997 if (k < 0) {
998
999 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
1000 master_writable = false;
1001 else {
1002 log_error("write(): %m");
1003 r = -errno;
1004 goto finish;
1005 }
1006
1007 } else {
1008 assert(in_buffer_full >= (size_t) k);
1009 memmove(in_buffer, in_buffer + k, in_buffer_full - k);
1010 in_buffer_full -= k;
1011 }
1012 }
1013
1014 if (master_readable && out_buffer_full < LINE_MAX) {
1015
1016 k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full);
1017 if (k < 0) {
1018
1019 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
1020 master_readable = false;
1021 else {
1022 log_error("read(): %m");
1023 r = -errno;
1024 goto finish;
1025 }
1026 } else
1027 out_buffer_full += (size_t) k;
1028 }
1029
1030 if (stdout_writable && out_buffer_full > 0) {
1031
1032 k = write(STDOUT_FILENO, out_buffer, out_buffer_full);
1033 if (k < 0) {
1034
1035 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
1036 stdout_writable = false;
1037 else {
1038 log_error("write(): %m");
1039 r = -errno;
1040 goto finish;
1041 }
1042
1043 } else {
1044 assert(out_buffer_full >= (size_t) k);
1045 memmove(out_buffer, out_buffer + k, out_buffer_full - k);
1046 out_buffer_full -= k;
1047 }
1048 }
1049 }
1050 }
1051
1052 finish:
1053 if (ep >= 0)
1054 close_nointr_nofail(ep);
1055
1056 if (signal_fd >= 0)
1057 close_nointr_nofail(signal_fd);
1058
1059 return r;
1060 }
1061
1062 int main(int argc, char *argv[]) {
1063 pid_t pid = 0;
1064 int r = EXIT_FAILURE, k;
1065 char *oldcg = NULL, *newcg = NULL;
1066 char **controller = NULL;
1067 int master = -1, n_fd_passed;
1068 const char *console = NULL;
1069 struct termios saved_attr, raw_attr;
1070 sigset_t mask;
1071 bool saved_attr_valid = false;
1072 struct winsize ws;
1073 int kmsg_socket_pair[2] = { -1, -1 };
1074 FDSet *fds = NULL;
1075
1076 log_parse_environment();
1077 log_open();
1078
1079 r = parse_argv(argc, argv);
1080 if (r <= 0)
1081 goto finish;
1082
1083 if (arg_directory) {
1084 char *p;
1085
1086 p = path_make_absolute_cwd(arg_directory);
1087 free(arg_directory);
1088 arg_directory = p;
1089 } else
1090 arg_directory = get_current_dir_name();
1091
1092 if (!arg_directory) {
1093 log_error("Failed to determine path");
1094 goto finish;
1095 }
1096
1097 path_kill_slashes(arg_directory);
1098
1099 if (geteuid() != 0) {
1100 log_error("Need to be root.");
1101 goto finish;
1102 }
1103
1104 if (sd_booted() <= 0) {
1105 log_error("Not running on a systemd system.");
1106 goto finish;
1107 }
1108
1109 if (path_equal(arg_directory, "/")) {
1110 log_error("Spawning container on root directory not supported.");
1111 goto finish;
1112 }
1113
1114 if (is_os_tree(arg_directory) <= 0) {
1115 log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory);
1116 goto finish;
1117 }
1118
1119 log_close();
1120 n_fd_passed = sd_listen_fds(false);
1121 if (n_fd_passed > 0) {
1122 k = fdset_new_listen_fds(&fds, false);
1123 if (k < 0) {
1124 log_error("Failed to collect file descriptors: %s", strerror(-k));
1125 goto finish;
1126 }
1127 }
1128 fdset_close_others(fds);
1129 log_open();
1130
1131 k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg);
1132 if (k < 0) {
1133 log_error("Failed to determine current cgroup: %s", strerror(-k));
1134 goto finish;
1135 }
1136
1137 if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) {
1138 log_error("Failed to allocate cgroup path.");
1139 goto finish;
1140 }
1141
1142 k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0);
1143 if (k < 0) {
1144 log_error("Failed to create cgroup: %s", strerror(-k));
1145 goto finish;
1146 }
1147
1148 STRV_FOREACH(controller, arg_controllers) {
1149 k = cg_create_and_attach(*controller, newcg, 0);
1150 if (k < 0)
1151 log_warning("Failed to create cgroup in controller %s: %s", *controller, strerror(-k));
1152 }
1153
1154 master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1155 if (master < 0) {
1156 log_error("Failed to acquire pseudo tty: %m");
1157 goto finish;
1158 }
1159
1160 console = ptsname(master);
1161 if (!console) {
1162 log_error("Failed to determine tty name: %m");
1163 goto finish;
1164 }
1165
1166 log_info("Spawning namespace container on %s (console is %s).", arg_directory, console);
1167
1168 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
1169 ioctl(master, TIOCSWINSZ, &ws);
1170
1171 if (unlockpt(master) < 0) {
1172 log_error("Failed to unlock tty: %m");
1173 goto finish;
1174 }
1175
1176 if (tcgetattr(STDIN_FILENO, &saved_attr) >= 0) {
1177 saved_attr_valid = true;
1178
1179 raw_attr = saved_attr;
1180 cfmakeraw(&raw_attr);
1181 raw_attr.c_lflag &= ~ECHO;
1182 }
1183
1184 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1185 log_error("Failed to create kmsg socket pair");
1186 goto finish;
1187 }
1188
1189 assert_se(sigemptyset(&mask) == 0);
1190 sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1191 assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1192
1193 for (;;) {
1194 siginfo_t status;
1195
1196 if (saved_attr_valid) {
1197 if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) {
1198 log_error("Failed to set terminal attributes: %m");
1199 goto finish;
1200 }
1201 }
1202
1203 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1204 if (pid < 0) {
1205 if (errno == EINVAL)
1206 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1207 else
1208 log_error("clone() failed: %m");
1209
1210 goto finish;
1211 }
1212
1213 if (pid == 0) {
1214 /* child */
1215
1216 const char *home = NULL;
1217 uid_t uid = (uid_t) -1;
1218 gid_t gid = (gid_t) -1;
1219 unsigned n_env = 0;
1220 const char *envp[] = {
1221 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1222 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1223 NULL, /* TERM */
1224 NULL, /* HOME */
1225 NULL, /* USER */
1226 NULL, /* LOGNAME */
1227 NULL, /* container_uuid */
1228 NULL, /* LISTEN_FDS */
1229 NULL, /* LISTEN_PID */
1230 NULL
1231 };
1232
1233 envp[2] = strv_find_prefix(environ, "TERM=");
1234 n_env = 3;
1235
1236 close_nointr_nofail(master);
1237 master = -1;
1238
1239 close_nointr(STDIN_FILENO);
1240 close_nointr(STDOUT_FILENO);
1241 close_nointr(STDERR_FILENO);
1242
1243 close_nointr_nofail(kmsg_socket_pair[0]);
1244 kmsg_socket_pair[0] = -1;
1245
1246 reset_all_signal_handlers();
1247
1248 assert_se(sigemptyset(&mask) == 0);
1249 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1250
1251 k = open_terminal(console, O_RDWR);
1252 if (k != STDIN_FILENO) {
1253 if (k >= 0) {
1254 close_nointr_nofail(k);
1255 k = -EINVAL;
1256 }
1257
1258 log_error("Failed to open console: %s", strerror(-k));
1259 goto child_fail;
1260 }
1261
1262 if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1263 dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1264 log_error("Failed to duplicate console: %m");
1265 goto child_fail;
1266 }
1267
1268 if (setsid() < 0) {
1269 log_error("setsid() failed: %m");
1270 goto child_fail;
1271 }
1272
1273 if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1274 log_error("PR_SET_PDEATHSIG failed: %m");
1275 goto child_fail;
1276 }
1277
1278 /* Mark everything as slave, so that we still
1279 * receive mounts from the real root, but don't
1280 * propagate mounts to the real root. */
1281 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1282 log_error("MS_SLAVE|MS_REC failed: %m");
1283 goto child_fail;
1284 }
1285
1286 /* Turn directory into bind mount */
1287 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1288 log_error("Failed to make bind mount.");
1289 goto child_fail;
1290 }
1291
1292 if (arg_read_only)
1293 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1294 log_error("Failed to make read-only.");
1295 goto child_fail;
1296 }
1297
1298 if (mount_all(arg_directory) < 0)
1299 goto child_fail;
1300
1301 if (copy_devnodes(arg_directory) < 0)
1302 goto child_fail;
1303
1304 dev_setup(arg_directory);
1305
1306 if (setup_dev_console(arg_directory, console) < 0)
1307 goto child_fail;
1308
1309 if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1310 goto child_fail;
1311
1312 close_nointr_nofail(kmsg_socket_pair[1]);
1313 kmsg_socket_pair[1] = -1;
1314
1315 if (setup_boot_id(arg_directory) < 0)
1316 goto child_fail;
1317
1318 if (setup_timezone(arg_directory) < 0)
1319 goto child_fail;
1320
1321 if (setup_resolv_conf(arg_directory) < 0)
1322 goto child_fail;
1323
1324 if (setup_journal(arg_directory) < 0)
1325 goto child_fail;
1326
1327 if (chdir(arg_directory) < 0) {
1328 log_error("chdir(%s) failed: %m", arg_directory);
1329 goto child_fail;
1330 }
1331
1332 if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1333 log_error("mount(MS_MOVE) failed: %m");
1334 goto child_fail;
1335 }
1336
1337 if (chroot(".") < 0) {
1338 log_error("chroot() failed: %m");
1339 goto child_fail;
1340 }
1341
1342 if (chdir("/") < 0) {
1343 log_error("chdir() failed: %m");
1344 goto child_fail;
1345 }
1346
1347 umask(0022);
1348
1349 loopback_setup();
1350
1351 if (drop_capabilities() < 0) {
1352 log_error("drop_capabilities() failed: %m");
1353 goto child_fail;
1354 }
1355
1356 if (arg_user) {
1357
1358 /* Note that this resolves user names
1359 * inside the container, and hence
1360 * accesses the NSS modules from the
1361 * container and not the host. This is
1362 * a bit weird... */
1363
1364 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1365 log_error("get_user_creds() failed: %m");
1366 goto child_fail;
1367 }
1368
1369 if (mkdir_parents_label(home, 0775) < 0) {
1370 log_error("mkdir_parents_label() failed: %m");
1371 goto child_fail;
1372 }
1373
1374 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1375 log_error("mkdir_safe_label() failed: %m");
1376 goto child_fail;
1377 }
1378
1379 if (initgroups((const char*)arg_user, gid) < 0) {
1380 log_error("initgroups() failed: %m");
1381 goto child_fail;
1382 }
1383
1384 if (setresgid(gid, gid, gid) < 0) {
1385 log_error("setregid() failed: %m");
1386 goto child_fail;
1387 }
1388
1389 if (setresuid(uid, uid, uid) < 0) {
1390 log_error("setreuid() failed: %m");
1391 goto child_fail;
1392 }
1393 } else {
1394 /* Reset everything fully to 0, just in case */
1395
1396 if (setgroups(0, NULL) < 0) {
1397 log_error("setgroups() failed: %m");
1398 goto child_fail;
1399 }
1400
1401 if (setresgid(0, 0, 0) < 0) {
1402 log_error("setregid() failed: %m");
1403 goto child_fail;
1404 }
1405
1406 if (setresuid(0, 0, 0) < 0) {
1407 log_error("setreuid() failed: %m");
1408 goto child_fail;
1409 }
1410 }
1411
1412 if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1413 (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1414 (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1415 log_oom();
1416 goto child_fail;
1417 }
1418
1419 if (arg_uuid) {
1420 if (asprintf((char**)(envp + n_env++), "container_uuid=%s", arg_uuid) < 0) {
1421 log_oom();
1422 goto child_fail;
1423 }
1424 }
1425
1426 if (fdset_size(fds) > 0) {
1427 k = fdset_cloexec(fds, false);
1428 if (k < 0) {
1429 log_error("Failed to unset O_CLOEXEC for file descriptors.");
1430 goto child_fail;
1431 }
1432
1433 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1434 (asprintf((char **)(envp + n_env++), "LISTEN_PID=%lu", (unsigned long) getpid()) < 0)) {
1435 log_oom();
1436 goto child_fail;
1437 }
1438 }
1439
1440 setup_hostname();
1441
1442 if (arg_boot) {
1443 char **a;
1444 size_t l;
1445
1446 /* Automatically search for the init system */
1447
1448 l = 1 + argc - optind;
1449 a = newa(char*, l + 1);
1450 memcpy(a + 1, argv + optind, l * sizeof(char*));
1451
1452 a[0] = (char*) "/usr/lib/systemd/systemd";
1453 execve(a[0], a, (char**) envp);
1454
1455 a[0] = (char*) "/lib/systemd/systemd";
1456 execve(a[0], a, (char**) envp);
1457
1458 a[0] = (char*) "/sbin/init";
1459 execve(a[0], a, (char**) envp);
1460 } else if (argc > optind)
1461 execvpe(argv[optind], argv + optind, (char**) envp);
1462 else {
1463 chdir(home ? home : "/root");
1464 execle("/bin/bash", "-bash", NULL, (char**) envp);
1465 }
1466
1467 log_error("execv() failed: %m");
1468
1469 child_fail:
1470 _exit(EXIT_FAILURE);
1471 }
1472
1473 fdset_free(fds);
1474 fds = NULL;
1475
1476 if (process_pty(master, pid, &mask) < 0)
1477 goto finish;
1478
1479 if (saved_attr_valid)
1480 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1481
1482 r = wait_for_terminate(pid, &status);
1483 if (r < 0) {
1484 r = EXIT_FAILURE;
1485 break;
1486 }
1487
1488 if (status.si_code == CLD_EXITED) {
1489 if (status.si_status != 0) {
1490 log_error("Container failed with error code %i.", status.si_status);
1491 r = status.si_status;
1492 break;
1493 }
1494
1495 log_debug("Container exited successfully.");
1496 break;
1497 } else if (status.si_code == CLD_KILLED &&
1498 status.si_status == SIGINT) {
1499 log_info("Container has been shut down.");
1500 r = 0;
1501 break;
1502 } else if (status.si_code == CLD_KILLED &&
1503 status.si_status == SIGHUP) {
1504 log_info("Container is being rebooted.");
1505 continue;
1506 } else if (status.si_code == CLD_KILLED ||
1507 status.si_code == CLD_DUMPED) {
1508
1509 log_error("Container terminated by signal %s.", signal_to_string(status.si_status));
1510 r = EXIT_FAILURE;
1511 break;
1512 } else {
1513 log_error("Container failed due to unknown reason.");
1514 r = EXIT_FAILURE;
1515 break;
1516 }
1517 }
1518
1519 finish:
1520 if (saved_attr_valid)
1521 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1522
1523 if (master >= 0)
1524 close_nointr_nofail(master);
1525
1526 close_pipe(kmsg_socket_pair);
1527
1528 if (oldcg)
1529 cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0);
1530
1531 if (newcg)
1532 cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true);
1533
1534 free(arg_directory);
1535 strv_free(arg_controllers);
1536 free(oldcg);
1537 free(newcg);
1538
1539 fdset_free(fds);
1540
1541 return r;
1542 }