]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn.c
logging: reduce send timeout to something more sensible
[thirdparty/systemd.git] / src / nspawn / nspawn.c
CommitLineData
88213476
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
88213476
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
88213476 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
88213476
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <signal.h>
23#include <sched.h>
24#include <unistd.h>
25#include <sys/types.h>
26#include <sys/syscall.h>
27#include <sys/mount.h>
28#include <sys/wait.h>
29#include <stdlib.h>
30#include <string.h>
31#include <stdio.h>
32#include <errno.h>
33#include <sys/prctl.h>
34#include <sys/capability.h>
35#include <getopt.h>
a258bf26
LP
36#include <termios.h>
37#include <sys/signalfd.h>
687d0825 38#include <grp.h>
5ed27dbd 39#include <linux/fs.h>
9537eab0
LP
40#include <sys/un.h>
41#include <sys/socket.h>
aea38d80 42#include <linux/netlink.h>
88213476 43
1f0cd86b
LP
44#include "sd-daemon.h"
45#include "sd-bus.h"
46#include "sd-id128.h"
88213476
LP
47#include "log.h"
48#include "util.h"
49e942b2 49#include "mkdir.h"
6b2d0e85 50#include "macro.h"
d7832d2c 51#include "audit.h"
94d82985 52#include "missing.h"
04d391da 53#include "cgroup-util.h"
a258bf26 54#include "strv.h"
9eb977db 55#include "path-util.h"
a41fe3a2 56#include "loopback-setup.h"
4fc9982c 57#include "dev-setup.h"
842f3b0f 58#include "fdset.h"
acbeb427 59#include "build.h"
a5c32cff 60#include "fileio.h"
40ca29a1 61#include "bus-util.h"
1f0cd86b 62#include "bus-error.h"
4ba93280 63#include "ptyfwd.h"
9bd37b40 64#include "bus-kernel.h"
f4889f65 65#include "env-util.h"
57fb9fb5 66
f2d88580
LP
67#ifndef TTY_GID
68#define TTY_GID 5
69#endif
70
57fb9fb5
LP
71typedef enum LinkJournal {
72 LINK_NO,
73 LINK_AUTO,
74 LINK_HOST,
75 LINK_GUEST
76} LinkJournal;
88213476
LP
77
78static char *arg_directory = NULL;
687d0825 79static char *arg_user = NULL;
9444b1f2 80static sd_id128_t arg_uuid = {};
7027ff61 81static char *arg_machine = NULL;
9444b1f2 82static const char *arg_slice = NULL;
ff01d048 83static bool arg_private_network = false;
bc2f673e 84static bool arg_read_only = false;
0f0dbc46 85static bool arg_boot = false;
57fb9fb5 86static LinkJournal arg_link_journal = LINK_AUTO;
5076f0cc
LP
87static uint64_t arg_retain =
88 (1ULL << CAP_CHOWN) |
89 (1ULL << CAP_DAC_OVERRIDE) |
90 (1ULL << CAP_DAC_READ_SEARCH) |
91 (1ULL << CAP_FOWNER) |
92 (1ULL << CAP_FSETID) |
93 (1ULL << CAP_IPC_OWNER) |
94 (1ULL << CAP_KILL) |
95 (1ULL << CAP_LEASE) |
96 (1ULL << CAP_LINUX_IMMUTABLE) |
97 (1ULL << CAP_NET_BIND_SERVICE) |
98 (1ULL << CAP_NET_BROADCAST) |
99 (1ULL << CAP_NET_RAW) |
100 (1ULL << CAP_SETGID) |
101 (1ULL << CAP_SETFCAP) |
102 (1ULL << CAP_SETPCAP) |
103 (1ULL << CAP_SETUID) |
104 (1ULL << CAP_SYS_ADMIN) |
105 (1ULL << CAP_SYS_CHROOT) |
106 (1ULL << CAP_SYS_NICE) |
107 (1ULL << CAP_SYS_PTRACE) |
108 (1ULL << CAP_SYS_TTY_CONFIG) |
d87be9b0 109 (1ULL << CAP_SYS_RESOURCE) |
88d04e31
LP
110 (1ULL << CAP_SYS_BOOT) |
111 (1ULL << CAP_AUDIT_WRITE) |
112 (1ULL << CAP_AUDIT_CONTROL);
17fe0523
LP
113static char **arg_bind = NULL;
114static char **arg_bind_ro = NULL;
f4889f65 115static char **arg_setenv = NULL;
88213476
LP
116
117static int help(void) {
118
119 printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
120 "Spawn a minimal namespace container for debugging, testing and building.\n\n"
17fe0523 121 " -h --help Show this help\n"
7027ff61 122 " --version Print version string\n"
17fe0523
LP
123 " -D --directory=NAME Root directory for the container\n"
124 " -b --boot Boot up full system (i.e. invoke init)\n"
125 " -u --user=USER Run the command under specified user or uid\n"
17fe0523 126 " --uuid=UUID Set a specific machine UUID for the container\n"
7027ff61 127 " -M --machine=NAME Set the machine name for the container\n"
9444b1f2 128 " -S --slice=SLICE Place the container in the specified slice\n"
17fe0523
LP
129 " --private-network Disable network in container\n"
130 " --read-only Mount the root directory read-only\n"
131 " --capability=CAP In addition to the default, retain specified\n"
132 " capability\n"
420c7379 133 " --drop-capability=CAP Drop the specified capability from the default set\n"
17fe0523
LP
134 " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n"
135 " -j Equivalent to --link-journal=host\n"
136 " --bind=PATH[:PATH] Bind mount a file or directory from the host into\n"
137 " the container\n"
f4889f65
LP
138 " --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n"
139 " --setenv=NAME=VALUE Pass an environment variable to PID 1\n",
88213476
LP
140 program_invocation_short_name);
141
142 return 0;
143}
144
145static int parse_argv(int argc, char *argv[]) {
146
a41fe3a2 147 enum {
acbeb427
ZJS
148 ARG_VERSION = 0x100,
149 ARG_PRIVATE_NETWORK,
bc2f673e 150 ARG_UUID,
5076f0cc 151 ARG_READ_ONLY,
57fb9fb5 152 ARG_CAPABILITY,
420c7379 153 ARG_DROP_CAPABILITY,
17fe0523
LP
154 ARG_LINK_JOURNAL,
155 ARG_BIND,
f4889f65
LP
156 ARG_BIND_RO,
157 ARG_SETENV,
a41fe3a2
LP
158 };
159
88213476 160 static const struct option options[] = {
ff01d048 161 { "help", no_argument, NULL, 'h' },
acbeb427 162 { "version", no_argument, NULL, ARG_VERSION },
ff01d048
LP
163 { "directory", required_argument, NULL, 'D' },
164 { "user", required_argument, NULL, 'u' },
165 { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
0f0dbc46 166 { "boot", no_argument, NULL, 'b' },
144f0fc0 167 { "uuid", required_argument, NULL, ARG_UUID },
bc2f673e 168 { "read-only", no_argument, NULL, ARG_READ_ONLY },
5076f0cc 169 { "capability", required_argument, NULL, ARG_CAPABILITY },
420c7379 170 { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
57fb9fb5 171 { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
17fe0523
LP
172 { "bind", required_argument, NULL, ARG_BIND },
173 { "bind-ro", required_argument, NULL, ARG_BIND_RO },
7027ff61 174 { "machine", required_argument, NULL, 'M' },
9444b1f2 175 { "slice", required_argument, NULL, 'S' },
f4889f65 176 { "setenv", required_argument, NULL, ARG_SETENV },
eb9da376 177 {}
88213476
LP
178 };
179
9444b1f2 180 int c, r;
88213476
LP
181
182 assert(argc >= 0);
183 assert(argv);
184
bd5a5458 185 while ((c = getopt_long(argc, argv, "+hD:u:bM:jS:", options, NULL)) >= 0) {
88213476
LP
186
187 switch (c) {
188
189 case 'h':
eb9da376 190 return help();
88213476 191
acbeb427
ZJS
192 case ARG_VERSION:
193 puts(PACKAGE_STRING);
194 puts(SYSTEMD_FEATURES);
195 return 0;
196
88213476
LP
197 case 'D':
198 free(arg_directory);
3a74cea5
LP
199 arg_directory = canonicalize_file_name(optarg);
200 if (!arg_directory) {
898d5c91 201 log_error("Invalid root directory: %m");
88213476
LP
202 return -ENOMEM;
203 }
204
205 break;
206
687d0825
MV
207 case 'u':
208 free(arg_user);
7027ff61
LP
209 arg_user = strdup(optarg);
210 if (!arg_user)
211 return log_oom();
687d0825
MV
212
213 break;
214
ff01d048
LP
215 case ARG_PRIVATE_NETWORK:
216 arg_private_network = true;
a41fe3a2
LP
217 break;
218
0f0dbc46
LP
219 case 'b':
220 arg_boot = true;
221 break;
222
144f0fc0 223 case ARG_UUID:
9444b1f2
LP
224 r = sd_id128_from_string(optarg, &arg_uuid);
225 if (r < 0) {
aa96c6cb 226 log_error("Invalid UUID: %s", optarg);
9444b1f2 227 return r;
aa96c6cb 228 }
9444b1f2 229 break;
aa96c6cb 230
9444b1f2
LP
231 case 'S':
232 arg_slice = strdup(optarg);
b3451bed
DH
233 if (!arg_slice)
234 return log_oom();
235
144f0fc0
LP
236 break;
237
7027ff61
LP
238 case 'M':
239 if (!hostname_is_valid(optarg)) {
240 log_error("Invalid machine name: %s", optarg);
241 return -EINVAL;
242 }
243
244 free(arg_machine);
245 arg_machine = strdup(optarg);
246 if (!arg_machine)
247 return log_oom();
248
249 break;
250
bc2f673e
LP
251 case ARG_READ_ONLY:
252 arg_read_only = true;
253 break;
254
420c7379
LP
255 case ARG_CAPABILITY:
256 case ARG_DROP_CAPABILITY: {
5076f0cc
LP
257 char *state, *word;
258 size_t length;
259
260 FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
261 cap_value_t cap;
262 char *t;
263
264 t = strndup(word, length);
0d0f0c50
SL
265 if (!t)
266 return log_oom();
5076f0cc
LP
267
268 if (cap_from_name(t, &cap) < 0) {
269 log_error("Failed to parse capability %s.", t);
270 free(t);
271 return -EINVAL;
272 }
273
274 free(t);
420c7379
LP
275
276 if (c == ARG_CAPABILITY)
277 arg_retain |= 1ULL << (uint64_t) cap;
278 else
279 arg_retain &= ~(1ULL << (uint64_t) cap);
5076f0cc
LP
280 }
281
282 break;
283 }
284
57fb9fb5
LP
285 case 'j':
286 arg_link_journal = LINK_GUEST;
287 break;
288
289 case ARG_LINK_JOURNAL:
290 if (streq(optarg, "auto"))
291 arg_link_journal = LINK_AUTO;
292 else if (streq(optarg, "no"))
293 arg_link_journal = LINK_NO;
294 else if (streq(optarg, "guest"))
295 arg_link_journal = LINK_GUEST;
296 else if (streq(optarg, "host"))
297 arg_link_journal = LINK_HOST;
298 else {
299 log_error("Failed to parse link journal mode %s", optarg);
300 return -EINVAL;
301 }
302
303 break;
304
17fe0523
LP
305 case ARG_BIND:
306 case ARG_BIND_RO: {
307 _cleanup_free_ char *a = NULL, *b = NULL;
308 char *e;
309 char ***x;
17fe0523
LP
310
311 x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
312
313 e = strchr(optarg, ':');
314 if (e) {
315 a = strndup(optarg, e - optarg);
316 b = strdup(e + 1);
317 } else {
318 a = strdup(optarg);
319 b = strdup(optarg);
320 }
321
322 if (!a || !b)
323 return log_oom();
324
325 if (!path_is_absolute(a) || !path_is_absolute(b)) {
326 log_error("Invalid bind mount specification: %s", optarg);
327 return -EINVAL;
328 }
329
330 r = strv_extend(x, a);
331 if (r < 0)
b3451bed 332 return log_oom();
17fe0523
LP
333
334 r = strv_extend(x, b);
335 if (r < 0)
b3451bed 336 return log_oom();
17fe0523
LP
337
338 break;
339 }
340
f4889f65
LP
341 case ARG_SETENV: {
342 char **n;
343
344 if (!env_assignment_is_valid(optarg)) {
345 log_error("Environment variable assignment '%s' is not valid.", optarg);
346 return -EINVAL;
347 }
348
349 n = strv_env_set(arg_setenv, optarg);
350 if (!n)
351 return log_oom();
352
353 strv_free(arg_setenv);
354 arg_setenv = n;
355 break;
356 }
357
88213476
LP
358 case '?':
359 return -EINVAL;
360
361 default:
eb9da376 362 assert_not_reached("Unhandled option");
88213476
LP
363 }
364 }
365
366 return 1;
367}
368
369static int mount_all(const char *dest) {
370
371 typedef struct MountPoint {
372 const char *what;
373 const char *where;
374 const char *type;
375 const char *options;
376 unsigned long flags;
3bd66c05 377 bool fatal;
88213476
LP
378 } MountPoint;
379
380 static const MountPoint mount_table[] = {
4b7a6af4 381 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
b4c59701
LP
382 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true }, /* Bind mount first */
383 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
e65aec12 384 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
635f7d8c 385 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
f2d88580 386 { "devpts", "/dev/pts", "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
ede89845 387 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
635f7d8c 388 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
9b634ea5 389#ifdef HAVE_SELINUX
b4c59701
LP
390 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false }, /* Bind mount first */
391 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */
9b634ea5 392#endif
88213476
LP
393 };
394
395 unsigned k;
396 int r = 0;
397
398 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
7fd1b19b 399 _cleanup_free_ char *where = NULL;
88213476
LP
400 int t;
401
17fe0523
LP
402 where = strjoin(dest, "/", mount_table[k].where, NULL);
403 if (!where)
404 return log_oom();
88213476 405
e65aec12 406 t = path_is_mount_point(where, true);
68fb0892 407 if (t < 0) {
88213476 408 log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
88213476
LP
409
410 if (r == 0)
411 r = t;
412
413 continue;
414 }
415
9c1c7f71
LP
416 /* Skip this entry if it is not a remount. */
417 if (mount_table[k].what && t > 0)
014a9c77
LP
418 continue;
419
17fe0523 420 mkdir_p(where, 0755);
88213476
LP
421
422 if (mount(mount_table[k].what,
423 where,
424 mount_table[k].type,
425 mount_table[k].flags,
3bd66c05
LP
426 mount_table[k].options) < 0 &&
427 mount_table[k].fatal) {
88213476
LP
428
429 log_error("mount(%s) failed: %m", where);
430
431 if (r == 0)
432 r = -errno;
433 }
88213476
LP
434 }
435
e58a1277
LP
436 return r;
437}
f8440af5 438
17fe0523
LP
439static int mount_binds(const char *dest, char **l, unsigned long flags) {
440 char **x, **y;
441
442 STRV_FOREACH_PAIR(x, y, l) {
2ed4e5e0 443 char *where;
d2421337 444 struct stat source_st, dest_st;
2ed4e5e0 445 int r;
d2421337
DR
446
447 if (stat(*x, &source_st) < 0) {
448 log_error("failed to stat %s: %m", *x);
449 return -errno;
450 }
17fe0523 451
2ed4e5e0
SL
452 where = strappenda(dest, *y);
453 r = stat(where, &dest_st);
454 if (r == 0) {
d2421337 455 if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
cecf24e7 456 log_error("The file types of %s and %s do not match. Refusing bind mount",
d2421337
DR
457 *x, where);
458 return -EINVAL;
459 }
2ed4e5e0
SL
460 } else if (errno == ENOENT) {
461 r = mkdir_parents_label(where, 0755);
462 if (r < 0) {
463 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
464 return r;
d2421337 465 }
2ed4e5e0
SL
466 } else {
467 log_error("Failed to bind mount %s: %s", *x, strerror(errno));
468 return -errno;
469 }
470 /* Create the mount point, but be conservative -- refuse to create block
471 * and char devices. */
472 if (S_ISDIR(source_st.st_mode))
473 mkdir_label(where, 0755);
474 else if (S_ISFIFO(source_st.st_mode))
475 mkfifo(where, 0644);
476 else if (S_ISSOCK(source_st.st_mode))
477 mknod(where, 0644 | S_IFSOCK, 0);
478 else if (S_ISREG(source_st.st_mode))
479 touch(where);
480 else {
481 log_error("Refusing to create mountpoint for file: %s", *x);
482 return -ENOTSUP;
d2421337 483 }
17fe0523
LP
484
485 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
486 log_error("mount(%s) failed: %m", where);
487 return -errno;
488 }
489
490 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
491 log_error("mount(%s) failed: %m", where);
492 return -errno;
493 }
494 }
495
496 return 0;
497}
498
e58a1277 499static int setup_timezone(const char *dest) {
d4036145
LP
500 _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
501 char *z, *y;
502 int r;
f8440af5 503
e58a1277
LP
504 assert(dest);
505
506 /* Fix the timezone, if possible */
d4036145
LP
507 r = readlink_malloc("/etc/localtime", &p);
508 if (r < 0) {
509 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
510 return 0;
511 }
512
513 z = path_startswith(p, "../usr/share/zoneinfo/");
514 if (!z)
515 z = path_startswith(p, "/usr/share/zoneinfo/");
516 if (!z) {
517 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
518 return 0;
519 }
520
04bc4a3f
LP
521 where = strappend(dest, "/etc/localtime");
522 if (!where)
0d0f0c50 523 return log_oom();
715ac17a 524
d4036145
LP
525 r = readlink_malloc(where, &q);
526 if (r >= 0) {
527 y = path_startswith(q, "../usr/share/zoneinfo/");
528 if (!y)
529 y = path_startswith(q, "/usr/share/zoneinfo/");
4d1c38b8 530
4d1c38b8 531
d4036145
LP
532 /* Already pointing to the right place? Then do nothing .. */
533 if (y && streq(y, z))
534 return 0;
535 }
536
537 check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
538 if (!check)
0d0f0c50 539 return log_oom();
4d1c38b8 540
d4036145
LP
541 if (access(check, F_OK) < 0) {
542 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
543 return 0;
544 }
68fb0892 545
d4036145
LP
546 what = strappend("../usr/share/zoneinfo/", z);
547 if (!what)
548 return log_oom();
549
550 unlink(where);
551 if (symlink(what, where) < 0) {
552 log_error("Failed to correct timezone of container: %m");
553 return 0;
554 }
e58a1277
LP
555
556 return 0;
88213476
LP
557}
558
2547bb41 559static int setup_resolv_conf(const char *dest) {
f333fbb1 560 char _cleanup_free_ *where = NULL;
2547bb41
LP
561
562 assert(dest);
563
564 if (arg_private_network)
565 return 0;
566
567 /* Fix resolv.conf, if possible */
04bc4a3f
LP
568 where = strappend(dest, "/etc/resolv.conf");
569 if (!where)
0d0f0c50 570 return log_oom();
2547bb41 571
77e63faf
LP
572 /* We don't really care for the results of this really. If it
573 * fails, it fails, but meh... */
51045322 574 copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
2547bb41
LP
575
576 return 0;
577}
578
04bc4a3f 579static int setup_boot_id(const char *dest) {
7fd1b19b 580 _cleanup_free_ char *from = NULL, *to = NULL;
04bc4a3f
LP
581 sd_id128_t rnd;
582 char as_uuid[37];
583 int r;
584
585 assert(dest);
586
587 /* Generate a new randomized boot ID, so that each boot-up of
588 * the container gets a new one */
589
590 from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
04bc4a3f 591 to = strappend(dest, "/proc/sys/kernel/random/boot_id");
ed8b7a3e
ZJS
592 if (!from || !to)
593 return log_oom();
04bc4a3f
LP
594
595 r = sd_id128_randomize(&rnd);
596 if (r < 0) {
597 log_error("Failed to generate random boot id: %s", strerror(-r));
ed8b7a3e 598 return r;
04bc4a3f
LP
599 }
600
601 snprintf(as_uuid, sizeof(as_uuid),
602 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
603 SD_ID128_FORMAT_VAL(rnd));
604 char_array_0(as_uuid);
605
574d5f2d 606 r = write_string_file(from, as_uuid);
04bc4a3f
LP
607 if (r < 0) {
608 log_error("Failed to write boot id: %s", strerror(-r));
ed8b7a3e 609 return r;
04bc4a3f
LP
610 }
611
612 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
613 log_error("Failed to bind mount boot id: %m");
614 r = -errno;
10d18763
ZJS
615 } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
616 log_warning("Failed to make boot id read-only: %m");
04bc4a3f
LP
617
618 unlink(from);
04bc4a3f
LP
619 return r;
620}
621
e58a1277 622static int copy_devnodes(const char *dest) {
88213476
LP
623
624 static const char devnodes[] =
625 "null\0"
626 "zero\0"
627 "full\0"
628 "random\0"
629 "urandom\0"
f2d88580 630 "tty\0";
88213476
LP
631
632 const char *d;
e58a1277 633 int r = 0;
7fd1b19b 634 _cleanup_umask_ mode_t u;
a258bf26
LP
635
636 assert(dest);
124640f1
LP
637
638 u = umask(0000);
88213476
LP
639
640 NULSTR_FOREACH(d, devnodes) {
e58a1277 641 struct stat st;
7fd1b19b 642 _cleanup_free_ char *from = NULL, *to = NULL;
88213476
LP
643
644 asprintf(&from, "/dev/%s", d);
645 asprintf(&to, "%s/dev/%s", dest, d);
646
647 if (!from || !to) {
ed8b7a3e 648 log_oom();
a258bf26 649
88213476
LP
650 if (r == 0)
651 r = -ENOMEM;
652
653 break;
654 }
655
656 if (stat(from, &st) < 0) {
657
658 if (errno != ENOENT) {
659 log_error("Failed to stat %s: %m", from);
88213476
LP
660 if (r == 0)
661 r = -errno;
662 }
663
a258bf26 664 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
88213476 665
ed8b7a3e 666 log_error("%s is not a char or block device, cannot copy", from);
a258bf26
LP
667 if (r == 0)
668 r = -EIO;
669
670 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
671
672 log_error("mknod(%s) failed: %m", dest);
673 if (r == 0)
674 r = -errno;
88213476 675 }
88213476
LP
676 }
677
e58a1277
LP
678 return r;
679}
88213476 680
f2d88580
LP
681static int setup_ptmx(const char *dest) {
682 _cleanup_free_ char *p = NULL;
683
684 p = strappend(dest, "/dev/ptmx");
685 if (!p)
686 return log_oom();
687
688 if (symlink("pts/ptmx", p) < 0) {
689 log_error("Failed to create /dev/ptmx symlink: %m");
690 return -errno;
691 }
692
693 return 0;
694}
695
e58a1277
LP
696static int setup_dev_console(const char *dest, const char *console) {
697 struct stat st;
7fd1b19b 698 _cleanup_free_ char *to = NULL;
e58a1277 699 int r;
7fd1b19b 700 _cleanup_umask_ mode_t u;
e58a1277
LP
701
702 assert(dest);
703 assert(console);
704
705 u = umask(0000);
706
707 if (stat(console, &st) < 0) {
708 log_error("Failed to stat %s: %m", console);
25ea79fe 709 return -errno;
88213476 710
a258bf26 711 } else if (!S_ISCHR(st.st_mode)) {
25ea79fe
ZJS
712 log_error("/dev/console is not a char device");
713 return -EIO;
e58a1277 714 }
88213476 715
e58a1277
LP
716 r = chmod_and_chown(console, 0600, 0, 0);
717 if (r < 0) {
718 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
25ea79fe 719 return r;
a258bf26 720 }
88213476 721
25ea79fe
ZJS
722 if (asprintf(&to, "%s/dev/console", dest) < 0)
723 return log_oom();
88213476 724
a258bf26
LP
725 /* We need to bind mount the right tty to /dev/console since
726 * ptys can only exist on pts file systems. To have something
727 * to bind mount things on we create a device node first, that
728 * has the right major/minor (note that the major minor
729 * doesn't actually matter here, since we mount it over
730 * anyway). */
731
e58a1277
LP
732 if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
733 log_error("mknod() for /dev/console failed: %m");
25ea79fe 734 return -errno;
e58a1277 735 }
a258bf26
LP
736
737 if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
e58a1277 738 log_error("Bind mount for /dev/console failed: %m");
25ea79fe 739 return -errno;
a258bf26
LP
740 }
741
25ea79fe 742 return 0;
e58a1277
LP
743}
744
745static int setup_kmsg(const char *dest, int kmsg_socket) {
7fd1b19b 746 _cleanup_free_ char *from = NULL, *to = NULL;
e58a1277 747 int r, fd, k;
7fd1b19b 748 _cleanup_umask_ mode_t u;
e58a1277
LP
749 union {
750 struct cmsghdr cmsghdr;
751 uint8_t buf[CMSG_SPACE(sizeof(int))];
b92bea5d
ZJS
752 } control = {};
753 struct msghdr mh = {
754 .msg_control = &control,
755 .msg_controllen = sizeof(control),
756 };
e58a1277
LP
757 struct cmsghdr *cmsg;
758
759 assert(dest);
760 assert(kmsg_socket >= 0);
a258bf26 761
e58a1277 762 u = umask(0000);
a258bf26 763
f1e5dfe2
LP
764 /* We create the kmsg FIFO as /dev/kmsg, but immediately
765 * delete it after bind mounting it to /proc/kmsg. While FIFOs
766 * on the reading side behave very similar to /proc/kmsg,
767 * their writing side behaves differently from /dev/kmsg in
768 * that writing blocks when nothing is reading. In order to
769 * avoid any problems with containers deadlocking due to this
770 * we simply make /dev/kmsg unavailable to the container. */
25ea79fe
ZJS
771 if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
772 asprintf(&to, "%s/proc/kmsg", dest) < 0)
773 return log_oom();
e58a1277
LP
774
775 if (mkfifo(from, 0600) < 0) {
776 log_error("mkfifo() for /dev/kmsg failed: %m");
25ea79fe 777 return -errno;
e58a1277
LP
778 }
779
780 r = chmod_and_chown(from, 0600, 0, 0);
781 if (r < 0) {
782 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
25ea79fe 783 return r;
e58a1277
LP
784 }
785
786 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
787 log_error("Bind mount for /proc/kmsg failed: %m");
25ea79fe 788 return -errno;
e58a1277
LP
789 }
790
791 fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
792 if (fd < 0) {
793 log_error("Failed to open fifo: %m");
25ea79fe 794 return -errno;
e58a1277
LP
795 }
796
e58a1277
LP
797 cmsg = CMSG_FIRSTHDR(&mh);
798 cmsg->cmsg_level = SOL_SOCKET;
799 cmsg->cmsg_type = SCM_RIGHTS;
800 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
801 memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
802
803 mh.msg_controllen = cmsg->cmsg_len;
804
805 /* Store away the fd in the socket, so that it stays open as
806 * long as we run the child */
807 k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
808 close_nointr_nofail(fd);
809
810 if (k < 0) {
811 log_error("Failed to send FIFO fd: %m");
25ea79fe 812 return -errno;
a258bf26
LP
813 }
814
f1e5dfe2
LP
815 /* And now make the FIFO unavailable as /dev/kmsg... */
816 unlink(from);
25ea79fe 817 return 0;
88213476
LP
818}
819
3a74cea5 820static int setup_hostname(void) {
3a74cea5 821
7027ff61
LP
822 if (sethostname(arg_machine, strlen(arg_machine)) < 0)
823 return -errno;
3a74cea5 824
7027ff61 825 return 0;
3a74cea5
LP
826}
827
57fb9fb5 828static int setup_journal(const char *directory) {
4d680aee 829 sd_id128_t machine_id, this_id;
7fd1b19b 830 _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
27407a01 831 char *id;
57fb9fb5
LP
832 int r;
833
57fb9fb5 834 p = strappend(directory, "/etc/machine-id");
27407a01
ZJS
835 if (!p)
836 return log_oom();
57fb9fb5
LP
837
838 r = read_one_line_file(p, &b);
27407a01
ZJS
839 if (r == -ENOENT && arg_link_journal == LINK_AUTO)
840 return 0;
841 else if (r < 0) {
842 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
57fb9fb5
LP
843 return r;
844 }
845
27407a01
ZJS
846 id = strstrip(b);
847 if (isempty(id) && arg_link_journal == LINK_AUTO)
848 return 0;
57fb9fb5 849
27407a01
ZJS
850 /* Verify validity */
851 r = sd_id128_from_string(id, &machine_id);
57fb9fb5 852 if (r < 0) {
27407a01
ZJS
853 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
854 return r;
57fb9fb5
LP
855 }
856
4d680aee
ZJS
857 r = sd_id128_get_machine(&this_id);
858 if (r < 0) {
859 log_error("Failed to retrieve machine ID: %s", strerror(-r));
860 return r;
861 }
862
863 if (sd_id128_equal(machine_id, this_id)) {
864 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
865 "Host and machine ids are equal (%s): refusing to link journals", id);
866 if (arg_link_journal == LINK_AUTO)
867 return 0;
868 return
869 -EEXIST;
870 }
871
872 if (arg_link_journal == LINK_NO)
873 return 0;
874
57fb9fb5 875 free(p);
27407a01
ZJS
876 p = strappend("/var/log/journal/", id);
877 q = strjoin(directory, "/var/log/journal/", id, NULL);
878 if (!p || !q)
879 return log_oom();
880
881 if (path_is_mount_point(p, false) > 0) {
882 if (arg_link_journal != LINK_AUTO) {
883 log_error("%s: already a mount point, refusing to use for journal", p);
884 return -EEXIST;
885 }
886
887 return 0;
57fb9fb5
LP
888 }
889
27407a01 890 if (path_is_mount_point(q, false) > 0) {
57fb9fb5 891 if (arg_link_journal != LINK_AUTO) {
27407a01
ZJS
892 log_error("%s: already a mount point, refusing to use for journal", q);
893 return -EEXIST;
57fb9fb5
LP
894 }
895
27407a01 896 return 0;
57fb9fb5
LP
897 }
898
899 r = readlink_and_make_absolute(p, &d);
900 if (r >= 0) {
901 if ((arg_link_journal == LINK_GUEST ||
902 arg_link_journal == LINK_AUTO) &&
903 path_equal(d, q)) {
904
27407a01
ZJS
905 r = mkdir_p(q, 0755);
906 if (r < 0)
907 log_warning("failed to create directory %s: %m", q);
908 return 0;
57fb9fb5
LP
909 }
910
911 if (unlink(p) < 0) {
912 log_error("Failed to remove symlink %s: %m", p);
27407a01 913 return -errno;
57fb9fb5
LP
914 }
915 } else if (r == -EINVAL) {
916
917 if (arg_link_journal == LINK_GUEST &&
918 rmdir(p) < 0) {
919
27407a01
ZJS
920 if (errno == ENOTDIR) {
921 log_error("%s already exists and is neither a symlink nor a directory", p);
922 return r;
923 } else {
57fb9fb5 924 log_error("Failed to remove %s: %m", p);
27407a01 925 return -errno;
57fb9fb5 926 }
57fb9fb5
LP
927 }
928 } else if (r != -ENOENT) {
929 log_error("readlink(%s) failed: %m", p);
27407a01 930 return r;
57fb9fb5
LP
931 }
932
933 if (arg_link_journal == LINK_GUEST) {
934
935 if (symlink(q, p) < 0) {
936 log_error("Failed to symlink %s to %s: %m", q, p);
27407a01 937 return -errno;
57fb9fb5
LP
938 }
939
27407a01
ZJS
940 r = mkdir_p(q, 0755);
941 if (r < 0)
942 log_warning("failed to create directory %s: %m", q);
943 return 0;
57fb9fb5
LP
944 }
945
946 if (arg_link_journal == LINK_HOST) {
947 r = mkdir_p(p, 0755);
948 if (r < 0) {
949 log_error("Failed to create %s: %m", p);
27407a01 950 return r;
57fb9fb5
LP
951 }
952
27407a01
ZJS
953 } else if (access(p, F_OK) < 0)
954 return 0;
57fb9fb5
LP
955
956 if (dir_is_empty(q) == 0) {
957 log_error("%s not empty.", q);
27407a01 958 return -ENOTEMPTY;
57fb9fb5
LP
959 }
960
961 r = mkdir_p(q, 0755);
962 if (r < 0) {
963 log_error("Failed to create %s: %m", q);
27407a01 964 return r;
57fb9fb5
LP
965 }
966
967 if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
968 log_error("Failed to bind mount journal from host into guest: %m");
27407a01 969 return -errno;
57fb9fb5
LP
970 }
971
27407a01 972 return 0;
57fb9fb5
LP
973}
974
9bd37b40
LP
975static int setup_kdbus(const char *dest, const char *path) {
976 const char *p;
977
978 if (!path)
979 return 0;
980
981 p = strappenda(dest, "/dev/kdbus");
982 if (mkdir(p, 0755) < 0) {
983 log_error("Failed to create kdbus path: %m");
984 return -errno;
985 }
986
987 if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
988 log_error("Failed to mount kdbus namespace path: %m");
989 return -errno;
990 }
991
992 return 0;
993}
994
88213476 995static int drop_capabilities(void) {
5076f0cc 996 return capability_bounding_set_drop(~arg_retain, false);
88213476
LP
997}
998
9444b1f2
LP
999static int register_machine(void) {
1000 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1001 _cleanup_bus_unref_ sd_bus *bus = NULL;
1002 int r;
1003
1004 r = sd_bus_open_system(&bus);
1005 if (r < 0) {
1006 log_error("Failed to open system bus: %s", strerror(-r));
1007 return r;
1008 }
1009
1010 r = sd_bus_call_method(
1011 bus,
1ee306e1
LP
1012 "org.freedesktop.machine1",
1013 "/org/freedesktop/machine1",
1014 "org.freedesktop.machine1.Manager",
9444b1f2
LP
1015 "CreateMachine",
1016 &error,
1017 NULL,
6a4e0b13 1018 "sayssusa(sv)",
9444b1f2 1019 arg_machine,
40ca29a1 1020 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
9444b1f2
LP
1021 "nspawn",
1022 "container",
1023 (uint32_t) 0,
6a4e0b13 1024 strempty(arg_directory),
88212f7b 1025 !isempty(arg_slice), "Slice", "s", arg_slice);
9444b1f2 1026 if (r < 0) {
1f0cd86b
LP
1027 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1028 return r;
1029 }
1030
1031 return 0;
1032}
1033
1034static int terminate_machine(pid_t pid) {
1035 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1036 _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1037 _cleanup_bus_unref_ sd_bus *bus = NULL;
1038 const char *path;
1039 int r;
1040
76b54375 1041 r = sd_bus_default_system(&bus);
1f0cd86b
LP
1042 if (r < 0) {
1043 log_error("Failed to open system bus: %s", strerror(-r));
1044 return r;
1045 }
1046
1047 r = sd_bus_call_method(
1048 bus,
1049 "org.freedesktop.machine1",
1050 "/org/freedesktop/machine1",
1051 "org.freedesktop.machine1.Manager",
1052 "GetMachineByPID",
1053 &error,
1054 &reply,
1055 "u",
1056 (uint32_t) pid);
1057 if (r < 0) {
1058 /* Note that the machine might already have been
1059 * cleaned up automatically, hence don't consider it a
1060 * failure if we cannot get the machine object. */
1061 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1062 return 0;
1063 }
1064
1065 r = sd_bus_message_read(reply, "o", &path);
5b30bef8
LP
1066 if (r < 0)
1067 return bus_log_parse_error(r);
9444b1f2 1068
1f0cd86b
LP
1069 r = sd_bus_call_method(
1070 bus,
1071 "org.freedesktop.machine1",
1072 path,
1073 "org.freedesktop.machine1.Machine",
1074 "Terminate",
1075 &error,
1076 NULL,
1077 NULL);
1078 if (r < 0) {
1079 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1080 return 0;
1081 }
1082
9444b1f2
LP
1083 return 0;
1084}
1085
77b6e194
LP
1086static bool audit_enabled(void) {
1087 int fd;
1088
1089 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
1090 if (fd >= 0) {
1091 close_nointr_nofail(fd);
1092 return true;
1093 }
77b6e194
LP
1094 return false;
1095}
1096
88213476
LP
1097int main(int argc, char *argv[]) {
1098 pid_t pid = 0;
04d391da 1099 int r = EXIT_FAILURE, k;
9bd37b40 1100 _cleanup_close_ int master = -1, kdbus_fd = -1;
7027ff61 1101 int n_fd_passed;
a258bf26 1102 const char *console = NULL;
a258bf26 1103 sigset_t mask;
04d39279 1104 _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
51d122af 1105 _cleanup_fdset_free_ FDSet *fds = NULL;
9bd37b40 1106 _cleanup_free_ char *kdbus_namespace = NULL;
9e554864 1107 const char *ns;
88213476
LP
1108
1109 log_parse_environment();
1110 log_open();
1111
05947bef
LP
1112 k = parse_argv(argc, argv);
1113 if (k < 0)
88213476 1114 goto finish;
05947bef
LP
1115 else if (k == 0) {
1116 r = EXIT_SUCCESS;
1117 goto finish;
1118 }
88213476
LP
1119
1120 if (arg_directory) {
1121 char *p;
1122
1123 p = path_make_absolute_cwd(arg_directory);
1124 free(arg_directory);
1125 arg_directory = p;
1126 } else
1127 arg_directory = get_current_dir_name();
1128
1129 if (!arg_directory) {
a383724e 1130 log_error("Failed to determine path, please use -D.");
88213476
LP
1131 goto finish;
1132 }
1133
1134 path_kill_slashes(arg_directory);
1135
7027ff61 1136 if (!arg_machine) {
2b6bf07d 1137 arg_machine = strdup(basename(arg_directory));
7027ff61
LP
1138 if (!arg_machine) {
1139 log_oom();
1140 goto finish;
1141 }
1142
e724b063 1143 hostname_cleanup(arg_machine, false);
7027ff61
LP
1144 if (isempty(arg_machine)) {
1145 log_error("Failed to determine machine name automatically, please use -M.");
1146 goto finish;
1147 }
1148 }
1149
88213476
LP
1150 if (geteuid() != 0) {
1151 log_error("Need to be root.");
1152 goto finish;
1153 }
1154
04d391da
LP
1155 if (sd_booted() <= 0) {
1156 log_error("Not running on a systemd system.");
1157 goto finish;
1158 }
1159
c2384970 1160 if (arg_boot && audit_enabled()) {
77b6e194
LP
1161 log_warning("The kernel auditing subsystem is known to be incompatible with containers.\n"
1162 "Please make sure to turn off auditing with 'audit=0' on the kernel command\n"
1163 "line before using systemd-nspawn. Sleeping for 5s...\n");
1164 sleep(5);
1165 }
1166
88213476 1167 if (path_equal(arg_directory, "/")) {
6df6b939 1168 log_error("Spawning container on root directory not supported.");
88213476
LP
1169 goto finish;
1170 }
1171
66060897 1172 if (path_is_os_tree(arg_directory) <= 0) {
f8964235 1173 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
88213476
LP
1174 goto finish;
1175 }
1176
842f3b0f
LP
1177 log_close();
1178 n_fd_passed = sd_listen_fds(false);
1179 if (n_fd_passed > 0) {
1180 k = fdset_new_listen_fds(&fds, false);
1181 if (k < 0) {
1182 log_error("Failed to collect file descriptors: %s", strerror(-k));
1183 goto finish;
1184 }
1185 }
1186 fdset_close_others(fds);
1187 log_open();
1188
db7feb7e
LP
1189 master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1190 if (master < 0) {
a258bf26
LP
1191 log_error("Failed to acquire pseudo tty: %m");
1192 goto finish;
1193 }
1194
db7feb7e
LP
1195 console = ptsname(master);
1196 if (!console) {
a258bf26
LP
1197 log_error("Failed to determine tty name: %m");
1198 goto finish;
1199 }
1200
04d39279 1201 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
a258bf26
LP
1202
1203 if (unlockpt(master) < 0) {
1204 log_error("Failed to unlock tty: %m");
1205 goto finish;
1206 }
1207
9e554864
LP
1208 ns = strappenda("machine-", arg_machine);
1209 kdbus_fd = bus_kernel_create_namespace(ns, &kdbus_namespace);
9bd37b40
LP
1210 if (r < 0)
1211 log_debug("Failed to create kdbus namespace: %s", strerror(-r));
1212 else
1213 log_debug("Successfully created kdbus namespace as %s", kdbus_namespace);
1214
e58a1277 1215 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
7027ff61 1216 log_error("Failed to create kmsg socket pair.");
e58a1277
LP
1217 goto finish;
1218 }
1219
05947bef
LP
1220 sd_notify(0, "READY=1");
1221
a258bf26
LP
1222 assert_se(sigemptyset(&mask) == 0);
1223 sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1224 assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1225
d87be9b0
LP
1226 for (;;) {
1227 siginfo_t status;
a383724e 1228
d87be9b0
LP
1229 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1230 if (pid < 0) {
1231 if (errno == EINVAL)
1232 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1233 else
1234 log_error("clone() failed: %m");
a258bf26 1235
d87be9b0
LP
1236 goto finish;
1237 }
a258bf26 1238
d87be9b0
LP
1239 if (pid == 0) {
1240 /* child */
d87be9b0
LP
1241 const char *home = NULL;
1242 uid_t uid = (uid_t) -1;
1243 gid_t gid = (gid_t) -1;
5674767e 1244 unsigned n_env = 2;
d87be9b0
LP
1245 const char *envp[] = {
1246 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1247 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1248 NULL, /* TERM */
1249 NULL, /* HOME */
1250 NULL, /* USER */
1251 NULL, /* LOGNAME */
1252 NULL, /* container_uuid */
842f3b0f
LP
1253 NULL, /* LISTEN_FDS */
1254 NULL, /* LISTEN_PID */
d87be9b0
LP
1255 NULL
1256 };
f4889f65 1257 char **env_use;
a258bf26 1258
5674767e
ZJS
1259 envp[n_env] = strv_find_prefix(environ, "TERM=");
1260 if (envp[n_env])
1261 n_env ++;
a258bf26 1262
d87be9b0 1263 close_nointr_nofail(master);
842f3b0f 1264 master = -1;
a258bf26 1265
d87be9b0
LP
1266 close_nointr(STDIN_FILENO);
1267 close_nointr(STDOUT_FILENO);
1268 close_nointr(STDERR_FILENO);
db7feb7e 1269
842f3b0f
LP
1270 close_nointr_nofail(kmsg_socket_pair[0]);
1271 kmsg_socket_pair[0] = -1;
a258bf26 1272
d87be9b0 1273 reset_all_signal_handlers();
88213476 1274
d87be9b0
LP
1275 assert_se(sigemptyset(&mask) == 0);
1276 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
f5c1b9ee 1277
842f3b0f
LP
1278 k = open_terminal(console, O_RDWR);
1279 if (k != STDIN_FILENO) {
1280 if (k >= 0) {
1281 close_nointr_nofail(k);
1282 k = -EINVAL;
1283 }
1284
1285 log_error("Failed to open console: %s", strerror(-k));
1286 goto child_fail;
1287 }
1288
1289 if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1290 dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1291 log_error("Failed to duplicate console: %m");
d87be9b0 1292 goto child_fail;
842f3b0f 1293 }
bc2f673e 1294
d87be9b0
LP
1295 if (setsid() < 0) {
1296 log_error("setsid() failed: %m");
bc2f673e
LP
1297 goto child_fail;
1298 }
1299
d87be9b0
LP
1300 if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1301 log_error("PR_SET_PDEATHSIG failed: %m");
1302 goto child_fail;
1303 }
e58a1277 1304
9444b1f2
LP
1305 r = register_machine();
1306 if (r < 0)
1307 goto finish;
1308
d87be9b0
LP
1309 /* Mark everything as slave, so that we still
1310 * receive mounts from the real root, but don't
1311 * propagate mounts to the real root. */
1312 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1313 log_error("MS_SLAVE|MS_REC failed: %m");
1314 goto child_fail;
1315 }
04bc4a3f 1316
d87be9b0
LP
1317 /* Turn directory into bind mount */
1318 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1319 log_error("Failed to make bind mount.");
1320 goto child_fail;
1321 }
88213476 1322
d87be9b0
LP
1323 if (arg_read_only)
1324 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1325 log_error("Failed to make read-only.");
1326 goto child_fail;
1327 }
2547bb41 1328
d87be9b0
LP
1329 if (mount_all(arg_directory) < 0)
1330 goto child_fail;
57fb9fb5 1331
d87be9b0
LP
1332 if (copy_devnodes(arg_directory) < 0)
1333 goto child_fail;
a258bf26 1334
f2d88580
LP
1335 if (setup_ptmx(arg_directory) < 0)
1336 goto child_fail;
1337
d87be9b0 1338 dev_setup(arg_directory);
88213476 1339
d87be9b0
LP
1340 if (setup_dev_console(arg_directory, console) < 0)
1341 goto child_fail;
88213476 1342
d87be9b0
LP
1343 if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1344 goto child_fail;
88213476 1345
d87be9b0 1346 close_nointr_nofail(kmsg_socket_pair[1]);
842f3b0f 1347 kmsg_socket_pair[1] = -1;
a258bf26 1348
d87be9b0
LP
1349 if (setup_boot_id(arg_directory) < 0)
1350 goto child_fail;
a41fe3a2 1351
d87be9b0
LP
1352 if (setup_timezone(arg_directory) < 0)
1353 goto child_fail;
88213476 1354
d87be9b0
LP
1355 if (setup_resolv_conf(arg_directory) < 0)
1356 goto child_fail;
687d0825 1357
d87be9b0 1358 if (setup_journal(arg_directory) < 0)
687d0825 1359 goto child_fail;
687d0825 1360
17fe0523
LP
1361 if (mount_binds(arg_directory, arg_bind, 0) < 0)
1362 goto child_fail;
1363
1364 if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1365 goto child_fail;
1366
9bd37b40
LP
1367 if (setup_kdbus(arg_directory, kdbus_namespace) < 0)
1368 goto child_fail;
1369
d87be9b0
LP
1370 if (chdir(arg_directory) < 0) {
1371 log_error("chdir(%s) failed: %m", arg_directory);
687d0825
MV
1372 goto child_fail;
1373 }
1374
d87be9b0
LP
1375 if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1376 log_error("mount(MS_MOVE) failed: %m");
687d0825
MV
1377 goto child_fail;
1378 }
1379
d87be9b0
LP
1380 if (chroot(".") < 0) {
1381 log_error("chroot() failed: %m");
687d0825
MV
1382 goto child_fail;
1383 }
1384
d87be9b0
LP
1385 if (chdir("/") < 0) {
1386 log_error("chdir() failed: %m");
687d0825
MV
1387 goto child_fail;
1388 }
1389
d87be9b0
LP
1390 umask(0022);
1391
1392 loopback_setup();
1393
1394 if (drop_capabilities() < 0) {
1395 log_error("drop_capabilities() failed: %m");
687d0825
MV
1396 goto child_fail;
1397 }
687d0825 1398
d87be9b0
LP
1399 if (arg_user) {
1400
963ddb91
LP
1401 /* Note that this resolves user names
1402 * inside the container, and hence
1403 * accesses the NSS modules from the
1404 * container and not the host. This is
1405 * a bit weird... */
1406
d87be9b0
LP
1407 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1408 log_error("get_user_creds() failed: %m");
1409 goto child_fail;
1410 }
1411
1412 if (mkdir_parents_label(home, 0775) < 0) {
1413 log_error("mkdir_parents_label() failed: %m");
1414 goto child_fail;
1415 }
1416
1417 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1418 log_error("mkdir_safe_label() failed: %m");
1419 goto child_fail;
1420 }
1421
1422 if (initgroups((const char*)arg_user, gid) < 0) {
1423 log_error("initgroups() failed: %m");
1424 goto child_fail;
1425 }
144f0fc0 1426
d87be9b0
LP
1427 if (setresgid(gid, gid, gid) < 0) {
1428 log_error("setregid() failed: %m");
1429 goto child_fail;
1430 }
1431
1432 if (setresuid(uid, uid, uid) < 0) {
1433 log_error("setreuid() failed: %m");
1434 goto child_fail;
1435 }
3c957acf
LP
1436 } else {
1437 /* Reset everything fully to 0, just in case */
1438
1439 if (setgroups(0, NULL) < 0) {
1440 log_error("setgroups() failed: %m");
1441 goto child_fail;
1442 }
1443
1444 if (setresgid(0, 0, 0) < 0) {
1445 log_error("setregid() failed: %m");
1446 goto child_fail;
1447 }
1448
1449 if (setresuid(0, 0, 0) < 0) {
1450 log_error("setreuid() failed: %m");
1451 goto child_fail;
1452 }
d87be9b0
LP
1453 }
1454
842f3b0f
LP
1455 if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1456 (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1457 (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
0d0f0c50 1458 log_oom();
144f0fc0
LP
1459 goto child_fail;
1460 }
687d0825 1461
9444b1f2
LP
1462 if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1463 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
842f3b0f
LP
1464 log_oom();
1465 goto child_fail;
1466 }
1467 }
1468
1469 if (fdset_size(fds) > 0) {
1470 k = fdset_cloexec(fds, false);
1471 if (k < 0) {
1472 log_error("Failed to unset O_CLOEXEC for file descriptors.");
1473 goto child_fail;
1474 }
1475
1476 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
d1826146 1477 (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
d87be9b0
LP
1478 log_oom();
1479 goto child_fail;
1480 }
1481 }
1482
1483 setup_hostname();
1484
f4889f65
LP
1485 if (!strv_isempty(arg_setenv)) {
1486 char **n;
1487
1488 n = strv_env_merge(2, envp, arg_setenv);
1489 if (!n) {
1490 log_oom();
1491 goto child_fail;
1492 }
1493
1494 env_use = n;
1495 } else
1496 env_use = (char**) envp;
1497
d87be9b0
LP
1498 if (arg_boot) {
1499 char **a;
1500 size_t l;
88213476 1501
d87be9b0 1502 /* Automatically search for the init system */
0f0dbc46 1503
d87be9b0
LP
1504 l = 1 + argc - optind;
1505 a = newa(char*, l + 1);
1506 memcpy(a + 1, argv + optind, l * sizeof(char*));
0f0dbc46 1507
d87be9b0 1508 a[0] = (char*) "/usr/lib/systemd/systemd";
f4889f65 1509 execve(a[0], a, env_use);
0f0dbc46 1510
d87be9b0 1511 a[0] = (char*) "/lib/systemd/systemd";
f4889f65 1512 execve(a[0], a, env_use);
0f0dbc46 1513
d87be9b0 1514 a[0] = (char*) "/sbin/init";
f4889f65 1515 execve(a[0], a, env_use);
d87be9b0 1516 } else if (argc > optind)
f4889f65 1517 execvpe(argv[optind], argv + optind, env_use);
d87be9b0
LP
1518 else {
1519 chdir(home ? home : "/root");
f4889f65 1520 execle("/bin/bash", "-bash", NULL, env_use);
d87be9b0
LP
1521 }
1522
1523 log_error("execv() failed: %m");
0f0dbc46 1524
d87be9b0
LP
1525 child_fail:
1526 _exit(EXIT_FAILURE);
da5b3bad 1527 }
88213476 1528
842f3b0f
LP
1529 fdset_free(fds);
1530 fds = NULL;
1531
04d39279
LP
1532 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
1533 if (k < 0) {
1534 r = EXIT_FAILURE;
1535 break;
1536 }
88213476 1537
04d39279
LP
1538 putc('\n', stdout);
1539
1540 /* Kill if it is not dead yet anyway */
1f0cd86b
LP
1541 terminate_machine(pid);
1542
1543 /* Redundant, but better safe than sorry */
04d39279 1544 kill(pid, SIGKILL);
a258bf26 1545
05947bef 1546 k = wait_for_terminate(pid, &status);
04d39279
LP
1547 pid = 0;
1548
05947bef 1549 if (k < 0) {
d87be9b0
LP
1550 r = EXIT_FAILURE;
1551 break;
1552 }
a258bf26 1553
d87be9b0 1554 if (status.si_code == CLD_EXITED) {
a5f5f8a0 1555 r = status.si_status;
d87be9b0 1556 if (status.si_status != 0) {
04d39279 1557 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
d87be9b0
LP
1558 break;
1559 }
1560
04d39279 1561 log_debug("Container %s exited successfully.", arg_machine);
d87be9b0
LP
1562 break;
1563 } else if (status.si_code == CLD_KILLED &&
1564 status.si_status == SIGINT) {
04d39279 1565 log_info("Container %s has been shut down.", arg_machine);
d87be9b0
LP
1566 r = 0;
1567 break;
1568 } else if (status.si_code == CLD_KILLED &&
1569 status.si_status == SIGHUP) {
04d39279 1570 log_info("Container %s is being rebooted.", arg_machine);
d87be9b0
LP
1571 continue;
1572 } else if (status.si_code == CLD_KILLED ||
1573 status.si_code == CLD_DUMPED) {
88213476 1574
04d39279 1575 log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
d87be9b0
LP
1576 r = EXIT_FAILURE;
1577 break;
1578 } else {
04d39279 1579 log_error("Container %s failed due to unknown reason.", arg_machine);
d87be9b0
LP
1580 r = EXIT_FAILURE;
1581 break;
1582 }
1583 }
88213476
LP
1584
1585finish:
9444b1f2
LP
1586 if (pid > 0)
1587 kill(pid, SIGKILL);
88213476 1588
04d391da 1589 free(arg_directory);
7027ff61 1590 free(arg_machine);
f4889f65 1591 free(arg_setenv);
88213476
LP
1592
1593 return r;
1594}