]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn.c
sd-rtnl: link flags - don't allow change = 0
[thirdparty/systemd.git] / src / nspawn / nspawn.c
CommitLineData
88213476
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
88213476
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
88213476 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
88213476
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <signal.h>
23#include <sched.h>
24#include <unistd.h>
25#include <sys/types.h>
26#include <sys/syscall.h>
27#include <sys/mount.h>
28#include <sys/wait.h>
29#include <stdlib.h>
30#include <string.h>
31#include <stdio.h>
32#include <errno.h>
33#include <sys/prctl.h>
34#include <sys/capability.h>
35#include <getopt.h>
a258bf26
LP
36#include <termios.h>
37#include <sys/signalfd.h>
687d0825 38#include <grp.h>
5ed27dbd 39#include <linux/fs.h>
9537eab0
LP
40#include <sys/un.h>
41#include <sys/socket.h>
aea38d80 42#include <linux/netlink.h>
aa28aefe 43#include <linux/rtnetlink.h>
354bfd2b 44#include <sys/eventfd.h>
aa28aefe 45#include <net/if.h>
69c79d3c 46#include <linux/veth.h>
aa28aefe 47
5d63309c 48#ifdef HAVE_SELINUX
a8828ed9
DW
49#include <selinux/selinux.h>
50#endif
88213476 51
24fb1112
LP
52#ifdef HAVE_SECCOMP
53#include <seccomp.h>
54#endif
55
1f0cd86b
LP
56#include "sd-daemon.h"
57#include "sd-bus.h"
58#include "sd-id128.h"
aa28aefe 59#include "sd-rtnl.h"
88213476
LP
60#include "log.h"
61#include "util.h"
49e942b2 62#include "mkdir.h"
6b2d0e85 63#include "macro.h"
d7832d2c 64#include "audit.h"
94d82985 65#include "missing.h"
04d391da 66#include "cgroup-util.h"
a258bf26 67#include "strv.h"
9eb977db 68#include "path-util.h"
a41fe3a2 69#include "loopback-setup.h"
4fc9982c 70#include "dev-setup.h"
842f3b0f 71#include "fdset.h"
acbeb427 72#include "build.h"
a5c32cff 73#include "fileio.h"
40ca29a1 74#include "bus-util.h"
1f0cd86b 75#include "bus-error.h"
4ba93280 76#include "ptyfwd.h"
9bd37b40 77#include "bus-kernel.h"
f4889f65 78#include "env-util.h"
7f112f50 79#include "def.h"
aa28aefe 80#include "rtnl-util.h"
7e227024 81#include "udev-util.h"
f2d88580 82
57fb9fb5
LP
83typedef enum LinkJournal {
84 LINK_NO,
85 LINK_AUTO,
86 LINK_HOST,
87 LINK_GUEST
88} LinkJournal;
88213476
LP
89
90static char *arg_directory = NULL;
687d0825 91static char *arg_user = NULL;
9444b1f2 92static sd_id128_t arg_uuid = {};
7027ff61 93static char *arg_machine = NULL;
82adf6af
LP
94static char *arg_selinux_context = NULL;
95static char *arg_selinux_apifs_context = NULL;
9444b1f2 96static const char *arg_slice = NULL;
ff01d048 97static bool arg_private_network = false;
bc2f673e 98static bool arg_read_only = false;
0f0dbc46 99static bool arg_boot = false;
57fb9fb5 100static LinkJournal arg_link_journal = LINK_AUTO;
5076f0cc
LP
101static uint64_t arg_retain =
102 (1ULL << CAP_CHOWN) |
103 (1ULL << CAP_DAC_OVERRIDE) |
104 (1ULL << CAP_DAC_READ_SEARCH) |
105 (1ULL << CAP_FOWNER) |
106 (1ULL << CAP_FSETID) |
107 (1ULL << CAP_IPC_OWNER) |
108 (1ULL << CAP_KILL) |
109 (1ULL << CAP_LEASE) |
110 (1ULL << CAP_LINUX_IMMUTABLE) |
111 (1ULL << CAP_NET_BIND_SERVICE) |
112 (1ULL << CAP_NET_BROADCAST) |
113 (1ULL << CAP_NET_RAW) |
114 (1ULL << CAP_SETGID) |
115 (1ULL << CAP_SETFCAP) |
116 (1ULL << CAP_SETPCAP) |
117 (1ULL << CAP_SETUID) |
118 (1ULL << CAP_SYS_ADMIN) |
119 (1ULL << CAP_SYS_CHROOT) |
120 (1ULL << CAP_SYS_NICE) |
121 (1ULL << CAP_SYS_PTRACE) |
122 (1ULL << CAP_SYS_TTY_CONFIG) |
d87be9b0 123 (1ULL << CAP_SYS_RESOURCE) |
88d04e31
LP
124 (1ULL << CAP_SYS_BOOT) |
125 (1ULL << CAP_AUDIT_WRITE) |
7f112f50
LP
126 (1ULL << CAP_AUDIT_CONTROL) |
127 (1ULL << CAP_MKNOD);
17fe0523
LP
128static char **arg_bind = NULL;
129static char **arg_bind_ro = NULL;
f4889f65 130static char **arg_setenv = NULL;
284c0b91 131static bool arg_quiet = false;
8a96d94e 132static bool arg_share_system = false;
eb91eb18 133static bool arg_register = true;
89f7c846 134static bool arg_keep_unit = false;
aa28aefe 135static char **arg_network_interfaces = NULL;
69c79d3c 136static bool arg_network_veth = false;
88213476
LP
137
138static int help(void) {
139
140 printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
141 "Spawn a minimal namespace container for debugging, testing and building.\n\n"
a8828ed9
DW
142 " -h --help Show this help\n"
143 " --version Print version string\n"
69c79d3c 144 " -q --quiet Do not show status information\n"
a8828ed9
DW
145 " -D --directory=NAME Root directory for the container\n"
146 " -b --boot Boot up full system (i.e. invoke init)\n"
147 " -u --user=USER Run the command under specified user or uid\n"
a8828ed9 148 " -M --machine=NAME Set the machine name for the container\n"
69c79d3c 149 " --uuid=UUID Set a specific machine UUID for the container\n"
a8828ed9 150 " -S --slice=SLICE Place the container in the specified slice\n"
69c79d3c
LP
151 " --private-network Disable network in container\n"
152 " --network-interface=INTERFACE\n"
153 " Assign an existing network interface to the\n"
154 " container\n"
155 " --network-veth Add a a virtual ethernet connection between host\n"
156 " and container\n"
82adf6af
LP
157 " -Z --selinux-context=SECLABEL\n"
158 " Set the SELinux security context to be used by\n"
159 " processes in the container\n"
160 " -L --selinux-apifs-context=SECLABEL\n"
161 " Set the SELinux security context to be used by\n"
162 " API/tmpfs file systems in the container\n"
a8828ed9
DW
163 " --capability=CAP In addition to the default, retain specified\n"
164 " capability\n"
165 " --drop-capability=CAP Drop the specified capability from the default set\n"
166 " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n"
167 " -j Equivalent to --link-journal=host\n"
69c79d3c 168 " --read-only Mount the root directory read-only\n"
a8828ed9
DW
169 " --bind=PATH[:PATH] Bind mount a file or directory from the host into\n"
170 " the container\n"
171 " --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n"
284c0b91 172 " --setenv=NAME=VALUE Pass an environment variable to PID 1\n"
69c79d3c 173 " --share-system Share system namespaces with host\n"
eb91eb18 174 " --register=BOOLEAN Register container as machine\n"
89f7c846 175 " --keep-unit Do not register a scope for the machine, reuse\n"
69c79d3c 176 " the service unit nspawn is running in\n",
88213476
LP
177 program_invocation_short_name);
178
179 return 0;
180}
181
182static int parse_argv(int argc, char *argv[]) {
183
a41fe3a2 184 enum {
acbeb427
ZJS
185 ARG_VERSION = 0x100,
186 ARG_PRIVATE_NETWORK,
bc2f673e 187 ARG_UUID,
5076f0cc 188 ARG_READ_ONLY,
57fb9fb5 189 ARG_CAPABILITY,
420c7379 190 ARG_DROP_CAPABILITY,
17fe0523
LP
191 ARG_LINK_JOURNAL,
192 ARG_BIND,
f4889f65
LP
193 ARG_BIND_RO,
194 ARG_SETENV,
eb91eb18 195 ARG_SHARE_SYSTEM,
89f7c846 196 ARG_REGISTER,
aa28aefe 197 ARG_KEEP_UNIT,
69c79d3c
LP
198 ARG_NETWORK_INTERFACE,
199 ARG_NETWORK_VETH,
a41fe3a2
LP
200 };
201
88213476 202 static const struct option options[] = {
aa28aefe
LP
203 { "help", no_argument, NULL, 'h' },
204 { "version", no_argument, NULL, ARG_VERSION },
205 { "directory", required_argument, NULL, 'D' },
206 { "user", required_argument, NULL, 'u' },
207 { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
208 { "boot", no_argument, NULL, 'b' },
209 { "uuid", required_argument, NULL, ARG_UUID },
210 { "read-only", no_argument, NULL, ARG_READ_ONLY },
211 { "capability", required_argument, NULL, ARG_CAPABILITY },
212 { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
213 { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
214 { "bind", required_argument, NULL, ARG_BIND },
215 { "bind-ro", required_argument, NULL, ARG_BIND_RO },
216 { "machine", required_argument, NULL, 'M' },
217 { "slice", required_argument, NULL, 'S' },
218 { "setenv", required_argument, NULL, ARG_SETENV },
219 { "selinux-context", required_argument, NULL, 'Z' },
220 { "selinux-apifs-context", required_argument, NULL, 'L' },
221 { "quiet", no_argument, NULL, 'q' },
222 { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM },
223 { "register", required_argument, NULL, ARG_REGISTER },
224 { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
225 { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
69c79d3c 226 { "network-veth", no_argument, NULL, ARG_NETWORK_VETH },
eb9da376 227 {}
88213476
LP
228 };
229
9444b1f2 230 int c, r;
a42c8b54 231 uint64_t plus = 0, minus = 0;
88213476
LP
232
233 assert(argc >= 0);
234 assert(argv);
235
284c0b91 236 while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:q", options, NULL)) >= 0) {
88213476
LP
237
238 switch (c) {
239
240 case 'h':
eb9da376 241 return help();
88213476 242
acbeb427
ZJS
243 case ARG_VERSION:
244 puts(PACKAGE_STRING);
245 puts(SYSTEMD_FEATURES);
246 return 0;
247
88213476
LP
248 case 'D':
249 free(arg_directory);
3a74cea5
LP
250 arg_directory = canonicalize_file_name(optarg);
251 if (!arg_directory) {
898d5c91 252 log_error("Invalid root directory: %m");
88213476
LP
253 return -ENOMEM;
254 }
255
256 break;
257
687d0825
MV
258 case 'u':
259 free(arg_user);
7027ff61
LP
260 arg_user = strdup(optarg);
261 if (!arg_user)
262 return log_oom();
687d0825
MV
263
264 break;
265
69c79d3c
LP
266 case ARG_NETWORK_VETH:
267 arg_network_veth = true;
268 arg_private_network = true;
269 break;
270
aa28aefe
LP
271 case ARG_NETWORK_INTERFACE:
272 if (strv_push(&arg_network_interfaces, optarg) < 0)
273 return log_oom();
274
275 /* fall through */
276
ff01d048
LP
277 case ARG_PRIVATE_NETWORK:
278 arg_private_network = true;
a41fe3a2
LP
279 break;
280
0f0dbc46
LP
281 case 'b':
282 arg_boot = true;
283 break;
284
144f0fc0 285 case ARG_UUID:
9444b1f2
LP
286 r = sd_id128_from_string(optarg, &arg_uuid);
287 if (r < 0) {
aa96c6cb 288 log_error("Invalid UUID: %s", optarg);
9444b1f2 289 return r;
aa96c6cb 290 }
9444b1f2 291 break;
aa96c6cb 292
9444b1f2
LP
293 case 'S':
294 arg_slice = strdup(optarg);
b3451bed
DH
295 if (!arg_slice)
296 return log_oom();
297
144f0fc0
LP
298 break;
299
7027ff61 300 case 'M':
eb91eb18
LP
301 if (isempty(optarg)) {
302 free(arg_machine);
303 arg_machine = NULL;
304 } else {
7027ff61 305
eb91eb18
LP
306 if (!hostname_is_valid(optarg)) {
307 log_error("Invalid machine name: %s", optarg);
308 return -EINVAL;
309 }
7027ff61 310
eb91eb18
LP
311 free(arg_machine);
312 arg_machine = strdup(optarg);
313 if (!arg_machine)
314 return log_oom();
315
316 break;
317 }
7027ff61 318
82adf6af
LP
319 case 'Z':
320 arg_selinux_context = optarg;
a8828ed9
DW
321 break;
322
82adf6af
LP
323 case 'L':
324 arg_selinux_apifs_context = optarg;
a8828ed9
DW
325 break;
326
bc2f673e
LP
327 case ARG_READ_ONLY:
328 arg_read_only = true;
329 break;
330
420c7379
LP
331 case ARG_CAPABILITY:
332 case ARG_DROP_CAPABILITY: {
5076f0cc
LP
333 char *state, *word;
334 size_t length;
335
336 FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
39ed67d1 337 _cleanup_free_ char *t;
5076f0cc 338 cap_value_t cap;
5076f0cc
LP
339
340 t = strndup(word, length);
0d0f0c50
SL
341 if (!t)
342 return log_oom();
5076f0cc 343
39ed67d1
LP
344 if (streq(t, "all")) {
345 if (c == ARG_CAPABILITY)
a42c8b54 346 plus = (uint64_t) -1;
39ed67d1 347 else
a42c8b54 348 minus = (uint64_t) -1;
39ed67d1
LP
349 } else {
350 if (cap_from_name(t, &cap) < 0) {
351 log_error("Failed to parse capability %s.", t);
352 return -EINVAL;
353 }
354
355 if (c == ARG_CAPABILITY)
a42c8b54 356 plus |= 1ULL << (uint64_t) cap;
39ed67d1 357 else
a42c8b54 358 minus |= 1ULL << (uint64_t) cap;
5076f0cc 359 }
5076f0cc
LP
360 }
361
362 break;
363 }
364
57fb9fb5
LP
365 case 'j':
366 arg_link_journal = LINK_GUEST;
367 break;
368
369 case ARG_LINK_JOURNAL:
370 if (streq(optarg, "auto"))
371 arg_link_journal = LINK_AUTO;
372 else if (streq(optarg, "no"))
373 arg_link_journal = LINK_NO;
374 else if (streq(optarg, "guest"))
375 arg_link_journal = LINK_GUEST;
376 else if (streq(optarg, "host"))
377 arg_link_journal = LINK_HOST;
378 else {
379 log_error("Failed to parse link journal mode %s", optarg);
380 return -EINVAL;
381 }
382
383 break;
384
17fe0523
LP
385 case ARG_BIND:
386 case ARG_BIND_RO: {
387 _cleanup_free_ char *a = NULL, *b = NULL;
388 char *e;
389 char ***x;
17fe0523
LP
390
391 x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
392
393 e = strchr(optarg, ':');
394 if (e) {
395 a = strndup(optarg, e - optarg);
396 b = strdup(e + 1);
397 } else {
398 a = strdup(optarg);
399 b = strdup(optarg);
400 }
401
402 if (!a || !b)
403 return log_oom();
404
405 if (!path_is_absolute(a) || !path_is_absolute(b)) {
406 log_error("Invalid bind mount specification: %s", optarg);
407 return -EINVAL;
408 }
409
410 r = strv_extend(x, a);
411 if (r < 0)
b3451bed 412 return log_oom();
17fe0523
LP
413
414 r = strv_extend(x, b);
415 if (r < 0)
b3451bed 416 return log_oom();
17fe0523
LP
417
418 break;
419 }
420
f4889f65
LP
421 case ARG_SETENV: {
422 char **n;
423
424 if (!env_assignment_is_valid(optarg)) {
425 log_error("Environment variable assignment '%s' is not valid.", optarg);
426 return -EINVAL;
427 }
428
429 n = strv_env_set(arg_setenv, optarg);
430 if (!n)
431 return log_oom();
432
433 strv_free(arg_setenv);
434 arg_setenv = n;
435 break;
436 }
437
284c0b91
LP
438 case 'q':
439 arg_quiet = true;
440 break;
441
8a96d94e
LP
442 case ARG_SHARE_SYSTEM:
443 arg_share_system = true;
444 break;
445
eb91eb18
LP
446 case ARG_REGISTER:
447 r = parse_boolean(optarg);
448 if (r < 0) {
449 log_error("Failed to parse --register= argument: %s", optarg);
450 return r;
451 }
452
453 arg_register = r;
454 break;
455
89f7c846
LP
456 case ARG_KEEP_UNIT:
457 arg_keep_unit = true;
458 break;
459
88213476
LP
460 case '?':
461 return -EINVAL;
462
463 default:
eb9da376 464 assert_not_reached("Unhandled option");
88213476
LP
465 }
466 }
467
eb91eb18
LP
468 if (arg_share_system)
469 arg_register = false;
470
471 if (arg_boot && arg_share_system) {
472 log_error("--boot and --share-system may not be combined.");
473 return -EINVAL;
474 }
475
89f7c846
LP
476 if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
477 log_error("--keep-unit may not be used when invoked from a user session.");
478 return -EINVAL;
479 }
480
a42c8b54
LP
481 arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
482
88213476
LP
483 return 1;
484}
485
486static int mount_all(const char *dest) {
487
488 typedef struct MountPoint {
489 const char *what;
490 const char *where;
491 const char *type;
492 const char *options;
493 unsigned long flags;
3bd66c05 494 bool fatal;
88213476
LP
495 } MountPoint;
496
497 static const MountPoint mount_table[] = {
4b7a6af4 498 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
b4c59701
LP
499 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true }, /* Bind mount first */
500 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
e65aec12 501 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
635f7d8c 502 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
f2d88580 503 { "devpts", "/dev/pts", "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
ede89845 504 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
635f7d8c 505 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
9b634ea5 506#ifdef HAVE_SELINUX
b4c59701
LP
507 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false }, /* Bind mount first */
508 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */
9b634ea5 509#endif
88213476
LP
510 };
511
512 unsigned k;
513 int r = 0;
514
515 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
7fd1b19b 516 _cleanup_free_ char *where = NULL;
d002827b 517#ifdef HAVE_SELINUX
a8828ed9 518 _cleanup_free_ char *options = NULL;
d002827b
LP
519#endif
520 const char *o;
88213476
LP
521 int t;
522
17fe0523
LP
523 where = strjoin(dest, "/", mount_table[k].where, NULL);
524 if (!where)
525 return log_oom();
88213476 526
e65aec12 527 t = path_is_mount_point(where, true);
68fb0892 528 if (t < 0) {
88213476 529 log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
88213476
LP
530
531 if (r == 0)
532 r = t;
533
534 continue;
535 }
536
9c1c7f71
LP
537 /* Skip this entry if it is not a remount. */
538 if (mount_table[k].what && t > 0)
014a9c77
LP
539 continue;
540
17fe0523 541 mkdir_p(where, 0755);
88213476 542
a8828ed9 543#ifdef HAVE_SELINUX
82adf6af
LP
544 if (arg_selinux_apifs_context &&
545 (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
546 options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
d002827b
LP
547 if (!options)
548 return log_oom();
549
550 o = options;
551 } else
a8828ed9 552#endif
d002827b 553 o = mount_table[k].options;
a8828ed9 554
a8828ed9 555
88213476
LP
556 if (mount(mount_table[k].what,
557 where,
558 mount_table[k].type,
559 mount_table[k].flags,
d002827b 560 o) < 0 &&
3bd66c05 561 mount_table[k].fatal) {
88213476
LP
562
563 log_error("mount(%s) failed: %m", where);
564
565 if (r == 0)
566 r = -errno;
567 }
88213476
LP
568 }
569
e58a1277
LP
570 return r;
571}
f8440af5 572
17fe0523
LP
573static int mount_binds(const char *dest, char **l, unsigned long flags) {
574 char **x, **y;
575
576 STRV_FOREACH_PAIR(x, y, l) {
2ed4e5e0 577 char *where;
d2421337 578 struct stat source_st, dest_st;
2ed4e5e0 579 int r;
d2421337
DR
580
581 if (stat(*x, &source_st) < 0) {
582 log_error("failed to stat %s: %m", *x);
583 return -errno;
584 }
17fe0523 585
2ed4e5e0
SL
586 where = strappenda(dest, *y);
587 r = stat(where, &dest_st);
588 if (r == 0) {
d2421337 589 if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
cecf24e7 590 log_error("The file types of %s and %s do not match. Refusing bind mount",
d2421337
DR
591 *x, where);
592 return -EINVAL;
593 }
2ed4e5e0
SL
594 } else if (errno == ENOENT) {
595 r = mkdir_parents_label(where, 0755);
596 if (r < 0) {
597 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
598 return r;
d2421337 599 }
2ed4e5e0
SL
600 } else {
601 log_error("Failed to bind mount %s: %s", *x, strerror(errno));
602 return -errno;
603 }
604 /* Create the mount point, but be conservative -- refuse to create block
605 * and char devices. */
606 if (S_ISDIR(source_st.st_mode))
607 mkdir_label(where, 0755);
608 else if (S_ISFIFO(source_st.st_mode))
609 mkfifo(where, 0644);
610 else if (S_ISSOCK(source_st.st_mode))
611 mknod(where, 0644 | S_IFSOCK, 0);
612 else if (S_ISREG(source_st.st_mode))
613 touch(where);
614 else {
615 log_error("Refusing to create mountpoint for file: %s", *x);
616 return -ENOTSUP;
d2421337 617 }
17fe0523
LP
618
619 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
620 log_error("mount(%s) failed: %m", where);
621 return -errno;
622 }
623
624 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
625 log_error("mount(%s) failed: %m", where);
626 return -errno;
627 }
628 }
629
630 return 0;
631}
632
e58a1277 633static int setup_timezone(const char *dest) {
d4036145
LP
634 _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
635 char *z, *y;
636 int r;
f8440af5 637
e58a1277
LP
638 assert(dest);
639
640 /* Fix the timezone, if possible */
d4036145
LP
641 r = readlink_malloc("/etc/localtime", &p);
642 if (r < 0) {
643 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
644 return 0;
645 }
646
647 z = path_startswith(p, "../usr/share/zoneinfo/");
648 if (!z)
649 z = path_startswith(p, "/usr/share/zoneinfo/");
650 if (!z) {
651 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
652 return 0;
653 }
654
04bc4a3f
LP
655 where = strappend(dest, "/etc/localtime");
656 if (!where)
0d0f0c50 657 return log_oom();
715ac17a 658
d4036145
LP
659 r = readlink_malloc(where, &q);
660 if (r >= 0) {
661 y = path_startswith(q, "../usr/share/zoneinfo/");
662 if (!y)
663 y = path_startswith(q, "/usr/share/zoneinfo/");
4d1c38b8 664
4d1c38b8 665
d4036145
LP
666 /* Already pointing to the right place? Then do nothing .. */
667 if (y && streq(y, z))
668 return 0;
669 }
670
671 check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
672 if (!check)
0d0f0c50 673 return log_oom();
4d1c38b8 674
d4036145
LP
675 if (access(check, F_OK) < 0) {
676 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
677 return 0;
678 }
68fb0892 679
d4036145
LP
680 what = strappend("../usr/share/zoneinfo/", z);
681 if (!what)
682 return log_oom();
683
684 unlink(where);
685 if (symlink(what, where) < 0) {
686 log_error("Failed to correct timezone of container: %m");
687 return 0;
688 }
e58a1277
LP
689
690 return 0;
88213476
LP
691}
692
2547bb41 693static int setup_resolv_conf(const char *dest) {
f333fbb1 694 char _cleanup_free_ *where = NULL;
2547bb41
LP
695
696 assert(dest);
697
698 if (arg_private_network)
699 return 0;
700
701 /* Fix resolv.conf, if possible */
04bc4a3f
LP
702 where = strappend(dest, "/etc/resolv.conf");
703 if (!where)
0d0f0c50 704 return log_oom();
2547bb41 705
77e63faf
LP
706 /* We don't really care for the results of this really. If it
707 * fails, it fails, but meh... */
51045322 708 copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
2547bb41
LP
709
710 return 0;
711}
712
04bc4a3f 713static int setup_boot_id(const char *dest) {
7fd1b19b 714 _cleanup_free_ char *from = NULL, *to = NULL;
04bc4a3f
LP
715 sd_id128_t rnd;
716 char as_uuid[37];
717 int r;
718
719 assert(dest);
720
eb91eb18
LP
721 if (arg_share_system)
722 return 0;
723
04bc4a3f
LP
724 /* Generate a new randomized boot ID, so that each boot-up of
725 * the container gets a new one */
726
727 from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
04bc4a3f 728 to = strappend(dest, "/proc/sys/kernel/random/boot_id");
ed8b7a3e
ZJS
729 if (!from || !to)
730 return log_oom();
04bc4a3f
LP
731
732 r = sd_id128_randomize(&rnd);
733 if (r < 0) {
734 log_error("Failed to generate random boot id: %s", strerror(-r));
ed8b7a3e 735 return r;
04bc4a3f
LP
736 }
737
738 snprintf(as_uuid, sizeof(as_uuid),
739 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
740 SD_ID128_FORMAT_VAL(rnd));
741 char_array_0(as_uuid);
742
574d5f2d 743 r = write_string_file(from, as_uuid);
04bc4a3f
LP
744 if (r < 0) {
745 log_error("Failed to write boot id: %s", strerror(-r));
ed8b7a3e 746 return r;
04bc4a3f
LP
747 }
748
749 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
750 log_error("Failed to bind mount boot id: %m");
751 r = -errno;
10d18763
ZJS
752 } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
753 log_warning("Failed to make boot id read-only: %m");
04bc4a3f
LP
754
755 unlink(from);
04bc4a3f
LP
756 return r;
757}
758
e58a1277 759static int copy_devnodes(const char *dest) {
88213476
LP
760
761 static const char devnodes[] =
762 "null\0"
763 "zero\0"
764 "full\0"
765 "random\0"
766 "urandom\0"
f2d88580 767 "tty\0";
88213476
LP
768
769 const char *d;
e58a1277 770 int r = 0;
7fd1b19b 771 _cleanup_umask_ mode_t u;
a258bf26
LP
772
773 assert(dest);
124640f1
LP
774
775 u = umask(0000);
88213476
LP
776
777 NULSTR_FOREACH(d, devnodes) {
7fd1b19b 778 _cleanup_free_ char *from = NULL, *to = NULL;
7f112f50 779 struct stat st;
88213476 780
7f112f50
LP
781 from = strappend("/dev/", d);
782 to = strjoin(dest, "/dev/", d, NULL);
783 if (!from || !to)
784 return log_oom();
88213476
LP
785
786 if (stat(from, &st) < 0) {
787
788 if (errno != ENOENT) {
789 log_error("Failed to stat %s: %m", from);
7f112f50 790 return -errno;
88213476
LP
791 }
792
a258bf26 793 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
88213476 794
ed8b7a3e 795 log_error("%s is not a char or block device, cannot copy", from);
7f112f50 796 return -EIO;
a258bf26
LP
797
798 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
799
800 log_error("mknod(%s) failed: %m", dest);
7f112f50 801 return -errno;
88213476 802 }
88213476
LP
803 }
804
e58a1277
LP
805 return r;
806}
88213476 807
f2d88580
LP
808static int setup_ptmx(const char *dest) {
809 _cleanup_free_ char *p = NULL;
810
811 p = strappend(dest, "/dev/ptmx");
812 if (!p)
813 return log_oom();
814
815 if (symlink("pts/ptmx", p) < 0) {
816 log_error("Failed to create /dev/ptmx symlink: %m");
817 return -errno;
818 }
819
820 return 0;
821}
822
e58a1277
LP
823static int setup_dev_console(const char *dest, const char *console) {
824 struct stat st;
7fd1b19b 825 _cleanup_free_ char *to = NULL;
e58a1277 826 int r;
7fd1b19b 827 _cleanup_umask_ mode_t u;
e58a1277
LP
828
829 assert(dest);
830 assert(console);
831
832 u = umask(0000);
833
834 if (stat(console, &st) < 0) {
835 log_error("Failed to stat %s: %m", console);
25ea79fe 836 return -errno;
88213476 837
a258bf26 838 } else if (!S_ISCHR(st.st_mode)) {
25ea79fe
ZJS
839 log_error("/dev/console is not a char device");
840 return -EIO;
e58a1277 841 }
88213476 842
e58a1277
LP
843 r = chmod_and_chown(console, 0600, 0, 0);
844 if (r < 0) {
845 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
25ea79fe 846 return r;
a258bf26 847 }
88213476 848
25ea79fe
ZJS
849 if (asprintf(&to, "%s/dev/console", dest) < 0)
850 return log_oom();
88213476 851
a258bf26
LP
852 /* We need to bind mount the right tty to /dev/console since
853 * ptys can only exist on pts file systems. To have something
854 * to bind mount things on we create a device node first, that
855 * has the right major/minor (note that the major minor
856 * doesn't actually matter here, since we mount it over
857 * anyway). */
858
e58a1277
LP
859 if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
860 log_error("mknod() for /dev/console failed: %m");
25ea79fe 861 return -errno;
e58a1277 862 }
a258bf26
LP
863
864 if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
e58a1277 865 log_error("Bind mount for /dev/console failed: %m");
25ea79fe 866 return -errno;
a258bf26
LP
867 }
868
25ea79fe 869 return 0;
e58a1277
LP
870}
871
872static int setup_kmsg(const char *dest, int kmsg_socket) {
7fd1b19b 873 _cleanup_free_ char *from = NULL, *to = NULL;
e58a1277 874 int r, fd, k;
7fd1b19b 875 _cleanup_umask_ mode_t u;
e58a1277
LP
876 union {
877 struct cmsghdr cmsghdr;
878 uint8_t buf[CMSG_SPACE(sizeof(int))];
b92bea5d
ZJS
879 } control = {};
880 struct msghdr mh = {
881 .msg_control = &control,
882 .msg_controllen = sizeof(control),
883 };
e58a1277
LP
884 struct cmsghdr *cmsg;
885
886 assert(dest);
887 assert(kmsg_socket >= 0);
a258bf26 888
e58a1277 889 u = umask(0000);
a258bf26 890
f1e5dfe2
LP
891 /* We create the kmsg FIFO as /dev/kmsg, but immediately
892 * delete it after bind mounting it to /proc/kmsg. While FIFOs
893 * on the reading side behave very similar to /proc/kmsg,
894 * their writing side behaves differently from /dev/kmsg in
895 * that writing blocks when nothing is reading. In order to
896 * avoid any problems with containers deadlocking due to this
897 * we simply make /dev/kmsg unavailable to the container. */
25ea79fe
ZJS
898 if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
899 asprintf(&to, "%s/proc/kmsg", dest) < 0)
900 return log_oom();
e58a1277
LP
901
902 if (mkfifo(from, 0600) < 0) {
903 log_error("mkfifo() for /dev/kmsg failed: %m");
25ea79fe 904 return -errno;
e58a1277
LP
905 }
906
907 r = chmod_and_chown(from, 0600, 0, 0);
908 if (r < 0) {
909 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
25ea79fe 910 return r;
e58a1277
LP
911 }
912
913 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
914 log_error("Bind mount for /proc/kmsg failed: %m");
25ea79fe 915 return -errno;
e58a1277
LP
916 }
917
918 fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
919 if (fd < 0) {
920 log_error("Failed to open fifo: %m");
25ea79fe 921 return -errno;
e58a1277
LP
922 }
923
e58a1277
LP
924 cmsg = CMSG_FIRSTHDR(&mh);
925 cmsg->cmsg_level = SOL_SOCKET;
926 cmsg->cmsg_type = SCM_RIGHTS;
927 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
928 memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
929
930 mh.msg_controllen = cmsg->cmsg_len;
931
932 /* Store away the fd in the socket, so that it stays open as
933 * long as we run the child */
934 k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
935 close_nointr_nofail(fd);
936
937 if (k < 0) {
938 log_error("Failed to send FIFO fd: %m");
25ea79fe 939 return -errno;
a258bf26
LP
940 }
941
f1e5dfe2
LP
942 /* And now make the FIFO unavailable as /dev/kmsg... */
943 unlink(from);
25ea79fe 944 return 0;
88213476
LP
945}
946
3a74cea5 947static int setup_hostname(void) {
3a74cea5 948
eb91eb18
LP
949 if (arg_share_system)
950 return 0;
951
7027ff61
LP
952 if (sethostname(arg_machine, strlen(arg_machine)) < 0)
953 return -errno;
3a74cea5 954
7027ff61 955 return 0;
3a74cea5
LP
956}
957
57fb9fb5 958static int setup_journal(const char *directory) {
4d680aee 959 sd_id128_t machine_id, this_id;
7fd1b19b 960 _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
27407a01 961 char *id;
57fb9fb5
LP
962 int r;
963
57fb9fb5 964 p = strappend(directory, "/etc/machine-id");
27407a01
ZJS
965 if (!p)
966 return log_oom();
57fb9fb5
LP
967
968 r = read_one_line_file(p, &b);
27407a01
ZJS
969 if (r == -ENOENT && arg_link_journal == LINK_AUTO)
970 return 0;
971 else if (r < 0) {
972 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
57fb9fb5
LP
973 return r;
974 }
975
27407a01
ZJS
976 id = strstrip(b);
977 if (isempty(id) && arg_link_journal == LINK_AUTO)
978 return 0;
57fb9fb5 979
27407a01
ZJS
980 /* Verify validity */
981 r = sd_id128_from_string(id, &machine_id);
57fb9fb5 982 if (r < 0) {
27407a01
ZJS
983 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
984 return r;
57fb9fb5
LP
985 }
986
4d680aee
ZJS
987 r = sd_id128_get_machine(&this_id);
988 if (r < 0) {
989 log_error("Failed to retrieve machine ID: %s", strerror(-r));
990 return r;
991 }
992
993 if (sd_id128_equal(machine_id, this_id)) {
994 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
995 "Host and machine ids are equal (%s): refusing to link journals", id);
996 if (arg_link_journal == LINK_AUTO)
997 return 0;
998 return
999 -EEXIST;
1000 }
1001
1002 if (arg_link_journal == LINK_NO)
1003 return 0;
1004
57fb9fb5 1005 free(p);
27407a01
ZJS
1006 p = strappend("/var/log/journal/", id);
1007 q = strjoin(directory, "/var/log/journal/", id, NULL);
1008 if (!p || !q)
1009 return log_oom();
1010
1011 if (path_is_mount_point(p, false) > 0) {
1012 if (arg_link_journal != LINK_AUTO) {
1013 log_error("%s: already a mount point, refusing to use for journal", p);
1014 return -EEXIST;
1015 }
1016
1017 return 0;
57fb9fb5
LP
1018 }
1019
27407a01 1020 if (path_is_mount_point(q, false) > 0) {
57fb9fb5 1021 if (arg_link_journal != LINK_AUTO) {
27407a01
ZJS
1022 log_error("%s: already a mount point, refusing to use for journal", q);
1023 return -EEXIST;
57fb9fb5
LP
1024 }
1025
27407a01 1026 return 0;
57fb9fb5
LP
1027 }
1028
1029 r = readlink_and_make_absolute(p, &d);
1030 if (r >= 0) {
1031 if ((arg_link_journal == LINK_GUEST ||
1032 arg_link_journal == LINK_AUTO) &&
1033 path_equal(d, q)) {
1034
27407a01
ZJS
1035 r = mkdir_p(q, 0755);
1036 if (r < 0)
1037 log_warning("failed to create directory %s: %m", q);
1038 return 0;
57fb9fb5
LP
1039 }
1040
1041 if (unlink(p) < 0) {
1042 log_error("Failed to remove symlink %s: %m", p);
27407a01 1043 return -errno;
57fb9fb5
LP
1044 }
1045 } else if (r == -EINVAL) {
1046
1047 if (arg_link_journal == LINK_GUEST &&
1048 rmdir(p) < 0) {
1049
27407a01
ZJS
1050 if (errno == ENOTDIR) {
1051 log_error("%s already exists and is neither a symlink nor a directory", p);
1052 return r;
1053 } else {
57fb9fb5 1054 log_error("Failed to remove %s: %m", p);
27407a01 1055 return -errno;
57fb9fb5 1056 }
57fb9fb5
LP
1057 }
1058 } else if (r != -ENOENT) {
1059 log_error("readlink(%s) failed: %m", p);
27407a01 1060 return r;
57fb9fb5
LP
1061 }
1062
1063 if (arg_link_journal == LINK_GUEST) {
1064
1065 if (symlink(q, p) < 0) {
1066 log_error("Failed to symlink %s to %s: %m", q, p);
27407a01 1067 return -errno;
57fb9fb5
LP
1068 }
1069
27407a01
ZJS
1070 r = mkdir_p(q, 0755);
1071 if (r < 0)
1072 log_warning("failed to create directory %s: %m", q);
1073 return 0;
57fb9fb5
LP
1074 }
1075
1076 if (arg_link_journal == LINK_HOST) {
1077 r = mkdir_p(p, 0755);
1078 if (r < 0) {
1079 log_error("Failed to create %s: %m", p);
27407a01 1080 return r;
57fb9fb5
LP
1081 }
1082
27407a01
ZJS
1083 } else if (access(p, F_OK) < 0)
1084 return 0;
57fb9fb5
LP
1085
1086 if (dir_is_empty(q) == 0) {
1087 log_error("%s not empty.", q);
27407a01 1088 return -ENOTEMPTY;
57fb9fb5
LP
1089 }
1090
1091 r = mkdir_p(q, 0755);
1092 if (r < 0) {
1093 log_error("Failed to create %s: %m", q);
27407a01 1094 return r;
57fb9fb5
LP
1095 }
1096
1097 if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1098 log_error("Failed to bind mount journal from host into guest: %m");
27407a01 1099 return -errno;
57fb9fb5
LP
1100 }
1101
27407a01 1102 return 0;
57fb9fb5
LP
1103}
1104
9bd37b40
LP
1105static int setup_kdbus(const char *dest, const char *path) {
1106 const char *p;
1107
1108 if (!path)
1109 return 0;
1110
1111 p = strappenda(dest, "/dev/kdbus");
1112 if (mkdir(p, 0755) < 0) {
1113 log_error("Failed to create kdbus path: %m");
1114 return -errno;
1115 }
1116
1117 if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
486e99a3 1118 log_error("Failed to mount kdbus domain path: %m");
9bd37b40
LP
1119 return -errno;
1120 }
1121
1122 return 0;
1123}
1124
88213476 1125static int drop_capabilities(void) {
5076f0cc 1126 return capability_bounding_set_drop(~arg_retain, false);
88213476
LP
1127}
1128
354bfd2b 1129static int register_machine(pid_t pid) {
9444b1f2
LP
1130 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1131 _cleanup_bus_unref_ sd_bus *bus = NULL;
1132 int r;
1133
eb91eb18
LP
1134 if (!arg_register)
1135 return 0;
1136
1c03020c 1137 r = sd_bus_default_system(&bus);
9444b1f2
LP
1138 if (r < 0) {
1139 log_error("Failed to open system bus: %s", strerror(-r));
1140 return r;
1141 }
1142
89f7c846
LP
1143 if (arg_keep_unit) {
1144 r = sd_bus_call_method(
1145 bus,
1146 "org.freedesktop.machine1",
1147 "/org/freedesktop/machine1",
1148 "org.freedesktop.machine1.Manager",
1149 "RegisterMachine",
1150 &error,
1151 NULL,
1152 "sayssus",
1153 arg_machine,
1154 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1155 "nspawn",
1156 "container",
1157 (uint32_t) pid,
1158 strempty(arg_directory));
1159 } else {
1160 r = sd_bus_call_method(
1161 bus,
1162 "org.freedesktop.machine1",
1163 "/org/freedesktop/machine1",
1164 "org.freedesktop.machine1.Manager",
1165 "CreateMachine",
1166 &error,
1167 NULL,
1168 "sayssusa(sv)",
1169 arg_machine,
1170 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1171 "nspawn",
1172 "container",
1173 (uint32_t) pid,
1174 strempty(arg_directory),
1175 !isempty(arg_slice), "Slice", "s", arg_slice);
1176 }
1177
9444b1f2 1178 if (r < 0) {
1f0cd86b
LP
1179 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1180 return r;
1181 }
1182
1183 return 0;
1184}
1185
1186static int terminate_machine(pid_t pid) {
1187 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1188 _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1189 _cleanup_bus_unref_ sd_bus *bus = NULL;
1190 const char *path;
1191 int r;
1192
eb91eb18
LP
1193 if (!arg_register)
1194 return 0;
1195
76b54375 1196 r = sd_bus_default_system(&bus);
1f0cd86b
LP
1197 if (r < 0) {
1198 log_error("Failed to open system bus: %s", strerror(-r));
1199 return r;
1200 }
1201
1202 r = sd_bus_call_method(
1203 bus,
1204 "org.freedesktop.machine1",
1205 "/org/freedesktop/machine1",
1206 "org.freedesktop.machine1.Manager",
1207 "GetMachineByPID",
1208 &error,
1209 &reply,
1210 "u",
1211 (uint32_t) pid);
1212 if (r < 0) {
1213 /* Note that the machine might already have been
1214 * cleaned up automatically, hence don't consider it a
1215 * failure if we cannot get the machine object. */
1216 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1217 return 0;
1218 }
1219
1220 r = sd_bus_message_read(reply, "o", &path);
5b30bef8
LP
1221 if (r < 0)
1222 return bus_log_parse_error(r);
9444b1f2 1223
1f0cd86b
LP
1224 r = sd_bus_call_method(
1225 bus,
1226 "org.freedesktop.machine1",
1227 path,
1228 "org.freedesktop.machine1.Machine",
1229 "Terminate",
1230 &error,
1231 NULL,
1232 NULL);
1233 if (r < 0) {
1234 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1235 return 0;
1236 }
1237
9444b1f2
LP
1238 return 0;
1239}
1240
db999e0f
LP
1241static int reset_audit_loginuid(void) {
1242 _cleanup_free_ char *p = NULL;
1243 int r;
1244
1245 if (arg_share_system)
1246 return 0;
1247
1248 r = read_one_line_file("/proc/self/loginuid", &p);
1249 if (r == -EEXIST)
1250 return 0;
1251 if (r < 0) {
1252 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1253 return r;
1254 }
1255
1256 /* Already reset? */
1257 if (streq(p, "4294967295"))
1258 return 0;
1259
1260 r = write_string_file("/proc/self/loginuid", "4294967295");
1261 if (r < 0) {
1262 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1263 "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1264 "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1265 "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1266 "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
77b6e194 1267
db999e0f 1268 sleep(5);
77b6e194 1269 }
db999e0f
LP
1270
1271 return 0;
77b6e194
LP
1272}
1273
69c79d3c
LP
1274static int setup_veth(int netns_fd) {
1275 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
cf6a8911 1276 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
69c79d3c
LP
1277 char iface_name[IFNAMSIZ] = "ve-";
1278 int r;
1279
1280 if (!arg_private_network)
1281 return 0;
1282
1283 if (!arg_network_veth)
1284 return 0;
1285
1286 strncpy(iface_name+3, arg_machine, sizeof(iface_name) - 3);
1287
1288 r = sd_rtnl_open(0, &rtnl);
1289 if (r < 0) {
1290 log_error("Failed to connect to netlink: %s", strerror(-r));
1291 return r;
1292 }
1293
1294 r = sd_rtnl_message_new_link(RTM_NEWLINK, 0, &m);
1295 if (r < 0) {
1296 log_error("Failed to allocate netlink message: %s", strerror(-r));
1297 return r;
1298 }
1299
1300 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1301 if (r < 0) {
1302 log_error("Failed to append netlink kind: %s", strerror(-r));
1303 return r;
1304 }
1305
1306 r = sd_rtnl_message_open_container(m, IFLA_LINKINFO, 0);
1307 if (r < 0) {
1308 log_error("Failed to open netlink container: %s", strerror(-r));
1309 return r;
1310 }
1311
1312 r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "veth");
1313 if (r < 0) {
1314 log_error("Failed to append netlink kind: %s", strerror(-r));
1315 return r;
1316 }
1317
1318 r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA, 0);
1319 if (r < 0) {
1320 log_error("Failed to open netlink container: %s", strerror(-r));
1321 return r;
1322 }
1323
1324 r = sd_rtnl_message_open_container(m, VETH_INFO_PEER, sizeof(struct ifinfomsg));
1325 if (r < 0) {
1326 log_error("z Failed to open netlink container: %s", strerror(-r));
1327 return r;
1328 }
1329
1330 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1331 if (r < 0) {
1332 log_error("Failed to append netlink kind: %s", strerror(-r));
1333 return r;
1334 }
1335
1336 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_FD, netns_fd);
1337 if (r < 0) {
1338 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1339 return r;
1340 }
1341
1342 r = sd_rtnl_message_close_container(m);
1343 if (r < 0) {
1344 log_error("Failed to close netlink container: %s", strerror(-r));
1345 return r;
1346 }
1347
1348 r = sd_rtnl_message_close_container(m);
1349 if (r < 0) {
1350 log_error("Failed to close netlink container: %s", strerror(-r));
1351 return r;
1352 }
1353
1354 r = sd_rtnl_message_close_container(m);
1355 if (r < 0) {
1356 log_error("Failed to close netlink container: %s", strerror(-r));
1357 return r;
1358 }
1359
1360 r = sd_rtnl_call(rtnl, m, 0, NULL);
1361 if (r < 0) {
1362 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1363 return r;
1364 }
1365
1366 return 0;
1367}
1368
1369static int move_network_interfaces(pid_t pid) {
7e227024 1370 _cleanup_udev_unref_ struct udev *udev = NULL;
69c79d3c 1371 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
aa28aefe
LP
1372 char **i;
1373 int r;
1374
1375 if (!arg_private_network)
1376 return 0;
1377
1378 if (strv_isempty(arg_network_interfaces))
1379 return 0;
1380
b88eb17a 1381 r = sd_rtnl_open(0, &rtnl);
aa28aefe
LP
1382 if (r < 0) {
1383 log_error("Failed to connect to netlink: %s", strerror(-r));
1384 return r;
1385 }
1386
7e227024
LP
1387 udev = udev_new();
1388 if (!udev) {
1389 log_error("Failed to connect to udev.");
1390 return -ENOMEM;
1391 }
1392
aa28aefe 1393 STRV_FOREACH(i, arg_network_interfaces) {
cf6a8911 1394 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
7e227024
LP
1395 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1396 char ifi_str[2 + DECIMAL_STR_MAX(int)];
b88eb17a 1397 int ifi;
aa28aefe 1398
b88eb17a
LP
1399 ifi = (int) if_nametoindex(*i);
1400 if (ifi <= 0) {
aa28aefe
LP
1401 log_error("Failed to resolve interface %s: %m", *i);
1402 return -errno;
1403 }
1404
7e227024
LP
1405 sprintf(ifi_str, "n%i", ifi);
1406 d = udev_device_new_from_device_id(udev, ifi_str);
1407 if (!d) {
1408 log_error("Failed to get udev device for interface %s: %m", *i);
1409 return -errno;
1410 }
1411
1412 if (udev_device_get_is_initialized(d) <= 0) {
1413 log_error("Network interface %s is not initialized yet.", *i);
1414 return -EBUSY;
1415 }
1416
d595c5cc 1417 r = sd_rtnl_message_new_link(RTM_NEWLINK, ifi, &m);
aa28aefe
LP
1418 if (r < 0) {
1419 log_error("Failed to allocate netlink message: %s", strerror(-r));
1420 return r;
1421 }
1422
1423 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1424 if (r < 0) {
1425 log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1426 return r;
1427 }
1428
1429 r = sd_rtnl_call(rtnl, m, 0, NULL);
1430 if (r < 0) {
b88eb17a 1431 log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
aa28aefe
LP
1432 return r;
1433 }
1434 }
1435
1436 return 0;
1437}
1438
24fb1112
LP
1439static int audit_still_doesnt_work_in_containers(void) {
1440
1441#ifdef HAVE_SECCOMP
1442 scmp_filter_ctx seccomp;
1443 int r;
1444
1445 /*
1446 Audit is broken in containers, much of the userspace audit
1447 hookup will fail if running inside a container. We don't
1448 care and just turn off creation of audit sockets.
1449
1450 This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1451 with EAFNOSUPPORT which audit userspace uses as indication
1452 that audit is disabled in the kernel.
1453 */
1454
1455 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1456 if (!seccomp)
1457 return log_oom();
1458
1459 r = seccomp_rule_add_exact(
1460 seccomp,
1461 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1462 SCMP_SYS(socket),
1463 2,
1464 SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1465 SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1466 if (r < 0) {
1467 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1468 goto finish;
1469 }
1470
1471 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1472 if (r < 0) {
1473 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1474 goto finish;
1475 }
1476
1477 r = seccomp_load(seccomp);
1478 if (r < 0)
1479 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1480
1481finish:
1482 seccomp_release(seccomp);
1483 return r;
1484#else
1485 return 0;
1486#endif
1487
1488}
1489
88213476 1490int main(int argc, char *argv[]) {
69c79d3c
LP
1491
1492 _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1, netns_fd = -1;
1493 _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1494 _cleanup_free_ char *kdbus_domain = NULL;
1495 _cleanup_fdset_free_ FDSet *fds = NULL;
1496 const char *console = NULL;
04d391da 1497 int r = EXIT_FAILURE, k;
7027ff61 1498 int n_fd_passed;
69c79d3c 1499 pid_t pid = 0;
a258bf26 1500 sigset_t mask;
88213476
LP
1501
1502 log_parse_environment();
1503 log_open();
1504
05947bef
LP
1505 k = parse_argv(argc, argv);
1506 if (k < 0)
88213476 1507 goto finish;
05947bef
LP
1508 else if (k == 0) {
1509 r = EXIT_SUCCESS;
1510 goto finish;
1511 }
88213476
LP
1512
1513 if (arg_directory) {
1514 char *p;
1515
1516 p = path_make_absolute_cwd(arg_directory);
1517 free(arg_directory);
1518 arg_directory = p;
1519 } else
1520 arg_directory = get_current_dir_name();
1521
1522 if (!arg_directory) {
a383724e 1523 log_error("Failed to determine path, please use -D.");
88213476
LP
1524 goto finish;
1525 }
1526
1527 path_kill_slashes(arg_directory);
1528
7027ff61 1529 if (!arg_machine) {
2b6bf07d 1530 arg_machine = strdup(basename(arg_directory));
7027ff61
LP
1531 if (!arg_machine) {
1532 log_oom();
1533 goto finish;
1534 }
1535
e724b063 1536 hostname_cleanup(arg_machine, false);
7027ff61
LP
1537 if (isempty(arg_machine)) {
1538 log_error("Failed to determine machine name automatically, please use -M.");
1539 goto finish;
1540 }
1541 }
1542
88213476
LP
1543 if (geteuid() != 0) {
1544 log_error("Need to be root.");
1545 goto finish;
1546 }
1547
04d391da
LP
1548 if (sd_booted() <= 0) {
1549 log_error("Not running on a systemd system.");
1550 goto finish;
1551 }
1552
88213476 1553 if (path_equal(arg_directory, "/")) {
6df6b939 1554 log_error("Spawning container on root directory not supported.");
88213476
LP
1555 goto finish;
1556 }
1557
6b9132a9
LP
1558 if (arg_boot) {
1559 if (path_is_os_tree(arg_directory) <= 0) {
1560 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1561 goto finish;
1562 }
1563 } else {
1564 const char *p;
1565
1566 p = strappenda(arg_directory,
1567 argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
1568 if (access(p, F_OK) < 0) {
1569 log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
1570 goto finish;
1571
1572 }
88213476
LP
1573 }
1574
842f3b0f
LP
1575 log_close();
1576 n_fd_passed = sd_listen_fds(false);
1577 if (n_fd_passed > 0) {
1578 k = fdset_new_listen_fds(&fds, false);
1579 if (k < 0) {
1580 log_error("Failed to collect file descriptors: %s", strerror(-k));
1581 goto finish;
1582 }
1583 }
1584 fdset_close_others(fds);
1585 log_open();
1586
db7feb7e
LP
1587 master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1588 if (master < 0) {
a258bf26
LP
1589 log_error("Failed to acquire pseudo tty: %m");
1590 goto finish;
1591 }
1592
db7feb7e
LP
1593 console = ptsname(master);
1594 if (!console) {
a258bf26
LP
1595 log_error("Failed to determine tty name: %m");
1596 goto finish;
1597 }
1598
284c0b91
LP
1599 if (!arg_quiet)
1600 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
a258bf26
LP
1601
1602 if (unlockpt(master) < 0) {
1603 log_error("Failed to unlock tty: %m");
1604 goto finish;
1605 }
1606
69c79d3c
LP
1607 if (arg_network_veth) {
1608 netns_fd = open("/proc/self/ns/net", O_RDWR|O_CLOEXEC);
1609 if (netns_fd < 0) {
1610 log_error("Failed to open network namespace fd: %m");
1611 goto finish;
1612 }
1613 }
eb91eb18
LP
1614
1615 if (access("/dev/kdbus/control", F_OK) >= 0) {
1616
1617 if (arg_share_system) {
1618 kdbus_domain = strdup("/dev/kdbus");
1619 if (!kdbus_domain) {
1620 log_oom();
1621 goto finish;
1622 }
1623 } else {
1624 const char *ns;
1625
1626 ns = strappenda("machine-", arg_machine);
1627 kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
1628 if (r < 0)
1629 log_debug("Failed to create kdbus domain: %s", strerror(-r));
1630 else
1631 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
1632 }
1633 }
9bd37b40 1634
e58a1277 1635 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
354bfd2b
LP
1636 log_error("Failed to create kmsg socket pair: %m");
1637 goto finish;
1638 }
1639
05947bef
LP
1640 sd_notify(0, "READY=1");
1641
a258bf26
LP
1642 assert_se(sigemptyset(&mask) == 0);
1643 sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1644 assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1645
d87be9b0
LP
1646 for (;;) {
1647 siginfo_t status;
a383724e 1648
40ddbdf8
LP
1649 sync_fd = eventfd(0, EFD_CLOEXEC);
1650 if (sync_fd < 0) {
1651 log_error("Failed to create event fd: %m");
1652 goto finish;
1653 }
1654
8a96d94e
LP
1655 pid = syscall(__NR_clone,
1656 SIGCHLD|CLONE_NEWNS|
1657 (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
1658 (arg_private_network ? CLONE_NEWNET : 0), NULL);
d87be9b0
LP
1659 if (pid < 0) {
1660 if (errno == EINVAL)
1661 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1662 else
1663 log_error("clone() failed: %m");
a258bf26 1664
d87be9b0
LP
1665 goto finish;
1666 }
a258bf26 1667
d87be9b0
LP
1668 if (pid == 0) {
1669 /* child */
d87be9b0
LP
1670 const char *home = NULL;
1671 uid_t uid = (uid_t) -1;
1672 gid_t gid = (gid_t) -1;
5674767e 1673 unsigned n_env = 2;
d87be9b0 1674 const char *envp[] = {
e10a55fd 1675 "PATH=" DEFAULT_PATH_SPLIT_USR,
d87be9b0
LP
1676 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1677 NULL, /* TERM */
1678 NULL, /* HOME */
1679 NULL, /* USER */
1680 NULL, /* LOGNAME */
1681 NULL, /* container_uuid */
842f3b0f
LP
1682 NULL, /* LISTEN_FDS */
1683 NULL, /* LISTEN_PID */
d87be9b0
LP
1684 NULL
1685 };
f4889f65 1686 char **env_use;
354bfd2b 1687 eventfd_t x;
a258bf26 1688
5674767e
ZJS
1689 envp[n_env] = strv_find_prefix(environ, "TERM=");
1690 if (envp[n_env])
1691 n_env ++;
a258bf26 1692
d87be9b0 1693 close_nointr_nofail(master);
842f3b0f 1694 master = -1;
a258bf26 1695
d87be9b0
LP
1696 close_nointr(STDIN_FILENO);
1697 close_nointr(STDOUT_FILENO);
1698 close_nointr(STDERR_FILENO);
db7feb7e 1699
842f3b0f
LP
1700 close_nointr_nofail(kmsg_socket_pair[0]);
1701 kmsg_socket_pair[0] = -1;
a258bf26 1702
d87be9b0 1703 reset_all_signal_handlers();
88213476 1704
d87be9b0
LP
1705 assert_se(sigemptyset(&mask) == 0);
1706 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
f5c1b9ee 1707
842f3b0f
LP
1708 k = open_terminal(console, O_RDWR);
1709 if (k != STDIN_FILENO) {
1710 if (k >= 0) {
1711 close_nointr_nofail(k);
1712 k = -EINVAL;
1713 }
1714
1715 log_error("Failed to open console: %s", strerror(-k));
1716 goto child_fail;
1717 }
1718
1719 if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1720 dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1721 log_error("Failed to duplicate console: %m");
d87be9b0 1722 goto child_fail;
842f3b0f 1723 }
bc2f673e 1724
d87be9b0
LP
1725 if (setsid() < 0) {
1726 log_error("setsid() failed: %m");
bc2f673e
LP
1727 goto child_fail;
1728 }
1729
db999e0f
LP
1730 if (reset_audit_loginuid() < 0)
1731 goto child_fail;
1732
d87be9b0
LP
1733 if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1734 log_error("PR_SET_PDEATHSIG failed: %m");
1735 goto child_fail;
1736 }
e58a1277 1737
d87be9b0
LP
1738 /* Mark everything as slave, so that we still
1739 * receive mounts from the real root, but don't
1740 * propagate mounts to the real root. */
1741 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1742 log_error("MS_SLAVE|MS_REC failed: %m");
1743 goto child_fail;
1744 }
04bc4a3f 1745
d87be9b0
LP
1746 /* Turn directory into bind mount */
1747 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1748 log_error("Failed to make bind mount.");
1749 goto child_fail;
1750 }
88213476 1751
d87be9b0
LP
1752 if (arg_read_only)
1753 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1754 log_error("Failed to make read-only.");
1755 goto child_fail;
1756 }
2547bb41 1757
d87be9b0
LP
1758 if (mount_all(arg_directory) < 0)
1759 goto child_fail;
57fb9fb5 1760
d87be9b0
LP
1761 if (copy_devnodes(arg_directory) < 0)
1762 goto child_fail;
a258bf26 1763
f2d88580
LP
1764 if (setup_ptmx(arg_directory) < 0)
1765 goto child_fail;
1766
d87be9b0 1767 dev_setup(arg_directory);
88213476 1768
69c79d3c
LP
1769 if (setup_veth(netns_fd) < 0)
1770 goto child_fail;
1771
1772 if (netns_fd >= 0) {
1773 close_nointr_nofail(netns_fd);
1774 netns_fd = -1;
1775 }
1776
24fb1112
LP
1777 if (audit_still_doesnt_work_in_containers() < 0)
1778 goto child_fail;
1779
d87be9b0
LP
1780 if (setup_dev_console(arg_directory, console) < 0)
1781 goto child_fail;
88213476 1782
d87be9b0
LP
1783 if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1784 goto child_fail;
88213476 1785
d87be9b0 1786 close_nointr_nofail(kmsg_socket_pair[1]);
842f3b0f 1787 kmsg_socket_pair[1] = -1;
a258bf26 1788
d87be9b0
LP
1789 if (setup_boot_id(arg_directory) < 0)
1790 goto child_fail;
a41fe3a2 1791
d87be9b0
LP
1792 if (setup_timezone(arg_directory) < 0)
1793 goto child_fail;
88213476 1794
d87be9b0
LP
1795 if (setup_resolv_conf(arg_directory) < 0)
1796 goto child_fail;
687d0825 1797
d87be9b0 1798 if (setup_journal(arg_directory) < 0)
687d0825 1799 goto child_fail;
687d0825 1800
17fe0523
LP
1801 if (mount_binds(arg_directory, arg_bind, 0) < 0)
1802 goto child_fail;
1803
1804 if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1805 goto child_fail;
1806
486e99a3 1807 if (setup_kdbus(arg_directory, kdbus_domain) < 0)
9bd37b40
LP
1808 goto child_fail;
1809
d87be9b0
LP
1810 if (chdir(arg_directory) < 0) {
1811 log_error("chdir(%s) failed: %m", arg_directory);
687d0825
MV
1812 goto child_fail;
1813 }
1814
d87be9b0
LP
1815 if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1816 log_error("mount(MS_MOVE) failed: %m");
687d0825
MV
1817 goto child_fail;
1818 }
1819
d87be9b0
LP
1820 if (chroot(".") < 0) {
1821 log_error("chroot() failed: %m");
687d0825
MV
1822 goto child_fail;
1823 }
1824
d87be9b0
LP
1825 if (chdir("/") < 0) {
1826 log_error("chdir() failed: %m");
687d0825
MV
1827 goto child_fail;
1828 }
1829
d87be9b0
LP
1830 umask(0022);
1831
eb91eb18
LP
1832 if (arg_private_network)
1833 loopback_setup();
d87be9b0
LP
1834
1835 if (drop_capabilities() < 0) {
1836 log_error("drop_capabilities() failed: %m");
687d0825
MV
1837 goto child_fail;
1838 }
687d0825 1839
d87be9b0
LP
1840 if (arg_user) {
1841
963ddb91
LP
1842 /* Note that this resolves user names
1843 * inside the container, and hence
1844 * accesses the NSS modules from the
1845 * container and not the host. This is
1846 * a bit weird... */
1847
d87be9b0
LP
1848 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1849 log_error("get_user_creds() failed: %m");
1850 goto child_fail;
1851 }
1852
1853 if (mkdir_parents_label(home, 0775) < 0) {
1854 log_error("mkdir_parents_label() failed: %m");
1855 goto child_fail;
1856 }
1857
1858 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1859 log_error("mkdir_safe_label() failed: %m");
1860 goto child_fail;
1861 }
1862
1863 if (initgroups((const char*)arg_user, gid) < 0) {
1864 log_error("initgroups() failed: %m");
1865 goto child_fail;
1866 }
144f0fc0 1867
d87be9b0
LP
1868 if (setresgid(gid, gid, gid) < 0) {
1869 log_error("setregid() failed: %m");
1870 goto child_fail;
1871 }
1872
1873 if (setresuid(uid, uid, uid) < 0) {
1874 log_error("setreuid() failed: %m");
1875 goto child_fail;
1876 }
3c957acf
LP
1877 } else {
1878 /* Reset everything fully to 0, just in case */
1879
1880 if (setgroups(0, NULL) < 0) {
1881 log_error("setgroups() failed: %m");
1882 goto child_fail;
1883 }
1884
1885 if (setresgid(0, 0, 0) < 0) {
1886 log_error("setregid() failed: %m");
1887 goto child_fail;
1888 }
1889
1890 if (setresuid(0, 0, 0) < 0) {
1891 log_error("setreuid() failed: %m");
1892 goto child_fail;
1893 }
d87be9b0
LP
1894 }
1895
842f3b0f
LP
1896 if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1897 (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1898 (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
0d0f0c50 1899 log_oom();
144f0fc0
LP
1900 goto child_fail;
1901 }
687d0825 1902
9444b1f2
LP
1903 if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1904 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
842f3b0f
LP
1905 log_oom();
1906 goto child_fail;
1907 }
1908 }
1909
1910 if (fdset_size(fds) > 0) {
1911 k = fdset_cloexec(fds, false);
1912 if (k < 0) {
1913 log_error("Failed to unset O_CLOEXEC for file descriptors.");
1914 goto child_fail;
1915 }
1916
1917 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
d1826146 1918 (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
d87be9b0
LP
1919 log_oom();
1920 goto child_fail;
1921 }
1922 }
1923
1924 setup_hostname();
1925
354bfd2b
LP
1926 eventfd_read(sync_fd, &x);
1927 close_nointr_nofail(sync_fd);
1928 sync_fd = -1;
1929
f4889f65
LP
1930 if (!strv_isempty(arg_setenv)) {
1931 char **n;
1932
1933 n = strv_env_merge(2, envp, arg_setenv);
1934 if (!n) {
1935 log_oom();
1936 goto child_fail;
1937 }
1938
1939 env_use = n;
1940 } else
1941 env_use = (char**) envp;
1942
5d63309c 1943#ifdef HAVE_SELINUX
82adf6af
LP
1944 if (arg_selinux_context)
1945 if (setexeccon(arg_selinux_context) < 0)
1946 log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
a8828ed9 1947#endif
d87be9b0
LP
1948 if (arg_boot) {
1949 char **a;
1950 size_t l;
88213476 1951
d87be9b0 1952 /* Automatically search for the init system */
0f0dbc46 1953
d87be9b0
LP
1954 l = 1 + argc - optind;
1955 a = newa(char*, l + 1);
1956 memcpy(a + 1, argv + optind, l * sizeof(char*));
0f0dbc46 1957
d87be9b0 1958 a[0] = (char*) "/usr/lib/systemd/systemd";
f4889f65 1959 execve(a[0], a, env_use);
0f0dbc46 1960
d87be9b0 1961 a[0] = (char*) "/lib/systemd/systemd";
f4889f65 1962 execve(a[0], a, env_use);
0f0dbc46 1963
d87be9b0 1964 a[0] = (char*) "/sbin/init";
f4889f65 1965 execve(a[0], a, env_use);
d87be9b0 1966 } else if (argc > optind)
f4889f65 1967 execvpe(argv[optind], argv + optind, env_use);
d87be9b0
LP
1968 else {
1969 chdir(home ? home : "/root");
f4889f65 1970 execle("/bin/bash", "-bash", NULL, env_use);
262d10e6 1971 execle("/bin/sh", "-sh", NULL, env_use);
d87be9b0
LP
1972 }
1973
1974 log_error("execv() failed: %m");
0f0dbc46 1975
d87be9b0
LP
1976 child_fail:
1977 _exit(EXIT_FAILURE);
da5b3bad 1978 }
88213476 1979
842f3b0f
LP
1980 fdset_free(fds);
1981 fds = NULL;
1982
354bfd2b
LP
1983 r = register_machine(pid);
1984 if (r < 0)
1985 goto finish;
1986
aa28aefe
LP
1987 r = move_network_interfaces(pid);
1988 if (r < 0)
1989 goto finish;
1990
354bfd2b
LP
1991 eventfd_write(sync_fd, 1);
1992 close_nointr_nofail(sync_fd);
1993 sync_fd = -1;
1994
04d39279
LP
1995 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
1996 if (k < 0) {
1997 r = EXIT_FAILURE;
1998 break;
1999 }
88213476 2000
284c0b91
LP
2001 if (!arg_quiet)
2002 putc('\n', stdout);
04d39279
LP
2003
2004 /* Kill if it is not dead yet anyway */
1f0cd86b
LP
2005 terminate_machine(pid);
2006
2007 /* Redundant, but better safe than sorry */
04d39279 2008 kill(pid, SIGKILL);
a258bf26 2009
05947bef 2010 k = wait_for_terminate(pid, &status);
04d39279
LP
2011 pid = 0;
2012
05947bef 2013 if (k < 0) {
d87be9b0
LP
2014 r = EXIT_FAILURE;
2015 break;
2016 }
a258bf26 2017
d87be9b0 2018 if (status.si_code == CLD_EXITED) {
a5f5f8a0 2019 r = status.si_status;
d87be9b0 2020 if (status.si_status != 0) {
04d39279 2021 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
d87be9b0
LP
2022 break;
2023 }
2024
284c0b91
LP
2025 if (!arg_quiet)
2026 log_debug("Container %s exited successfully.", arg_machine);
d87be9b0
LP
2027 break;
2028 } else if (status.si_code == CLD_KILLED &&
2029 status.si_status == SIGINT) {
284c0b91
LP
2030
2031 if (!arg_quiet)
2032 log_info("Container %s has been shut down.", arg_machine);
d87be9b0
LP
2033 r = 0;
2034 break;
2035 } else if (status.si_code == CLD_KILLED &&
2036 status.si_status == SIGHUP) {
284c0b91
LP
2037
2038 if (!arg_quiet)
2039 log_info("Container %s is being rebooted.", arg_machine);
d87be9b0
LP
2040 continue;
2041 } else if (status.si_code == CLD_KILLED ||
2042 status.si_code == CLD_DUMPED) {
88213476 2043
eb91eb18 2044 log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
d87be9b0
LP
2045 r = EXIT_FAILURE;
2046 break;
2047 } else {
04d39279 2048 log_error("Container %s failed due to unknown reason.", arg_machine);
d87be9b0
LP
2049 r = EXIT_FAILURE;
2050 break;
2051 }
2052 }
88213476
LP
2053
2054finish:
9444b1f2
LP
2055 if (pid > 0)
2056 kill(pid, SIGKILL);
88213476 2057
04d391da 2058 free(arg_directory);
7027ff61 2059 free(arg_machine);
f4889f65 2060 free(arg_setenv);
aa28aefe 2061 free(arg_network_interfaces);
88213476
LP
2062
2063 return r;
2064}