]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn.c
relicense to LGPLv2.1 (with exceptions)
[thirdparty/systemd.git] / src / nspawn.c
CommitLineData
88213476
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
5430f7f2
LP
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
88213476
LP
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
5430f7f2 16 Lesser General Public License for more details.
88213476 17
5430f7f2 18 You should have received a copy of the GNU Lesser General Public License
88213476
LP
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <signal.h>
23#include <sched.h>
24#include <unistd.h>
25#include <sys/types.h>
26#include <sys/syscall.h>
27#include <sys/mount.h>
28#include <sys/wait.h>
29#include <stdlib.h>
30#include <string.h>
31#include <stdio.h>
32#include <errno.h>
33#include <sys/prctl.h>
34#include <sys/capability.h>
35#include <getopt.h>
a258bf26
LP
36#include <sys/epoll.h>
37#include <termios.h>
38#include <sys/signalfd.h>
687d0825 39#include <grp.h>
5ed27dbd 40#include <linux/fs.h>
88213476 41
81527be1
LP
42#include <systemd/sd-daemon.h>
43
88213476
LP
44#include "log.h"
45#include "util.h"
49e942b2 46#include "mkdir.h"
d7832d2c 47#include "audit.h"
94d82985 48#include "missing.h"
04d391da 49#include "cgroup-util.h"
a258bf26 50#include "strv.h"
a41fe3a2 51#include "loopback-setup.h"
88213476
LP
52
53static char *arg_directory = NULL;
687d0825 54static char *arg_user = NULL;
ff01d048 55static bool arg_private_network = false;
88213476
LP
56
57static int help(void) {
58
59 printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
60 "Spawn a minimal namespace container for debugging, testing and building.\n\n"
61 " -h --help Show this help\n"
687d0825 62 " -D --directory=NAME Root directory for the container\n"
a41fe3a2 63 " -u --user=USER Run the command under specified user or uid\n"
ff01d048 64 " --private-network Disable network in container\n",
88213476
LP
65 program_invocation_short_name);
66
67 return 0;
68}
69
70static int parse_argv(int argc, char *argv[]) {
71
a41fe3a2 72 enum {
ff01d048 73 ARG_PRIVATE_NETWORK = 0x100
a41fe3a2
LP
74 };
75
88213476 76 static const struct option options[] = {
ff01d048
LP
77 { "help", no_argument, NULL, 'h' },
78 { "directory", required_argument, NULL, 'D' },
79 { "user", required_argument, NULL, 'u' },
80 { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
81 { NULL, 0, NULL, 0 }
88213476
LP
82 };
83
84 int c;
85
86 assert(argc >= 0);
87 assert(argv);
88
687d0825 89 while ((c = getopt_long(argc, argv, "+hD:u:", options, NULL)) >= 0) {
88213476
LP
90
91 switch (c) {
92
93 case 'h':
94 help();
95 return 0;
96
97 case 'D':
98 free(arg_directory);
99 if (!(arg_directory = strdup(optarg))) {
100 log_error("Failed to duplicate root directory.");
101 return -ENOMEM;
102 }
103
104 break;
105
687d0825
MV
106 case 'u':
107 free(arg_user);
108 if (!(arg_user = strdup(optarg))) {
109 log_error("Failed to duplicate user name.");
110 return -ENOMEM;
111 }
112
113 break;
114
ff01d048
LP
115 case ARG_PRIVATE_NETWORK:
116 arg_private_network = true;
a41fe3a2
LP
117 break;
118
88213476
LP
119 case '?':
120 return -EINVAL;
121
122 default:
123 log_error("Unknown option code %c", c);
124 return -EINVAL;
125 }
126 }
127
128 return 1;
129}
130
131static int mount_all(const char *dest) {
132
133 typedef struct MountPoint {
134 const char *what;
135 const char *where;
136 const char *type;
137 const char *options;
138 unsigned long flags;
3bd66c05 139 bool fatal;
88213476
LP
140 } MountPoint;
141
142 static const MountPoint mount_table[] = {
4b7a6af4 143 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
576a01c8
LP
144 { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */
145 { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
146 { "/sys", "/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */
147 { "/sys", "/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
635f7d8c 148 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
576a01c8 149 { "/dev/pts", "/dev/pts", "bind", NULL, MS_BIND, true },
635f7d8c 150 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
9b634ea5 151#ifdef HAVE_SELINUX
6b2bf923
LP
152 { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND, false }, /* Bind mount first */
153 { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */
9b634ea5 154#endif
88213476
LP
155 };
156
157 unsigned k;
158 int r = 0;
715ac17a 159 char *where;
88213476
LP
160
161 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
88213476
LP
162 int t;
163
164 if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) {
165 log_error("Out of memory");
166
167 if (r == 0)
168 r = -ENOMEM;
169
170 break;
171 }
172
0c85a4f3 173 if ((t = path_is_mount_point(where, false)) < 0) {
88213476
LP
174 log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
175 free(where);
176
177 if (r == 0)
178 r = t;
179
180 continue;
181 }
182
183 mkdir_p(where, 0755);
184
185 if (mount(mount_table[k].what,
186 where,
187 mount_table[k].type,
188 mount_table[k].flags,
3bd66c05
LP
189 mount_table[k].options) < 0 &&
190 mount_table[k].fatal) {
88213476
LP
191
192 log_error("mount(%s) failed: %m", where);
193
194 if (r == 0)
195 r = -errno;
196 }
197
198 free(where);
199 }
200
715ac17a 201 /* Fix the timezone, if possible */
4d1c38b8 202 if (asprintf(&where, "%s/etc/localtime", dest) >= 0) {
f8440af5
LP
203
204 if (mount("/etc/localtime", where, "bind", MS_BIND, NULL) >= 0)
205 mount("/etc/localtime", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
206
715ac17a
LP
207 free(where);
208 }
209
4d1c38b8
LP
210 if (asprintf(&where, "%s/etc/timezone", dest) >= 0) {
211
212 if (mount("/etc/timezone", where, "bind", MS_BIND, NULL) >= 0)
213 mount("/etc/timezone", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
214
215 free(where);
216 }
217
88213476
LP
218 return r;
219}
220
a258bf26 221static int copy_devnodes(const char *dest, const char *console) {
88213476
LP
222
223 static const char devnodes[] =
224 "null\0"
225 "zero\0"
226 "full\0"
227 "random\0"
228 "urandom\0"
229 "tty\0"
230 "ptmx\0"
231 "kmsg\0"
232 "rtc0\0";
233
234 const char *d;
235 int r = 0, k;
124640f1 236 mode_t u;
a258bf26
LP
237 struct stat st;
238 char *from = NULL, *to = NULL;
239
240 assert(dest);
241 assert(console);
124640f1
LP
242
243 u = umask(0000);
88213476
LP
244
245 NULSTR_FOREACH(d, devnodes) {
a258bf26 246 from = to = NULL;
88213476
LP
247
248 asprintf(&from, "/dev/%s", d);
249 asprintf(&to, "%s/dev/%s", dest, d);
250
251 if (!from || !to) {
252 log_error("Failed to allocate devnode path");
253
254 free(from);
255 free(to);
256
a258bf26
LP
257 from = to = NULL;
258
88213476
LP
259 if (r == 0)
260 r = -ENOMEM;
261
262 break;
263 }
264
265 if (stat(from, &st) < 0) {
266
267 if (errno != ENOENT) {
268 log_error("Failed to stat %s: %m", from);
88213476
LP
269 if (r == 0)
270 r = -errno;
271 }
272
a258bf26 273 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
88213476 274
a258bf26
LP
275 log_error("%s is not a char or block device, cannot copy.", from);
276 if (r == 0)
277 r = -EIO;
278
279 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
280
281 log_error("mknod(%s) failed: %m", dest);
282 if (r == 0)
283 r = -errno;
88213476
LP
284 }
285
286 free(from);
287 free(to);
288 }
289
a258bf26 290 if (stat(console, &st) < 0) {
88213476 291
a258bf26 292 log_error("Failed to stat %s: %m", console);
88213476 293 if (r == 0)
a258bf26 294 r = -errno;
88213476 295
a258bf26 296 goto finish;
88213476 297
a258bf26 298 } else if (!S_ISCHR(st.st_mode)) {
88213476 299
a258bf26
LP
300 log_error("/dev/console is not a char device.");
301 if (r == 0)
302 r = -EIO;
88213476 303
a258bf26
LP
304 goto finish;
305 }
88213476 306
a258bf26 307 if (asprintf(&to, "%s/dev/console", dest) < 0) {
88213476 308
a258bf26
LP
309 log_error("Out of memory");
310 if (r == 0)
311 r = -ENOMEM;
88213476 312
a258bf26 313 goto finish;
88213476
LP
314 }
315
a258bf26
LP
316 /* We need to bind mount the right tty to /dev/console since
317 * ptys can only exist on pts file systems. To have something
318 * to bind mount things on we create a device node first, that
319 * has the right major/minor (note that the major minor
320 * doesn't actually matter here, since we mount it over
321 * anyway). */
322
323 if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0)
324 log_error("mknod for /dev/console failed: %m");
325
326 if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
327 log_error("bind mount for /dev/console failed: %m");
328
329 if (r == 0)
330 r = -errno;
331 }
332
333 free(to);
334
335 if ((k = chmod_and_chown(console, 0600, 0, 0)) < 0) {
336 log_error("Failed to correct access mode for TTY: %s", strerror(-k));
337
338 if (r == 0)
339 r = k;
340 }
341
342finish:
124640f1
LP
343 umask(u);
344
88213476
LP
345 return r;
346}
347
348static int drop_capabilities(void) {
349 static const unsigned long retain[] = {
350 CAP_CHOWN,
351 CAP_DAC_OVERRIDE,
352 CAP_DAC_READ_SEARCH,
353 CAP_FOWNER,
354 CAP_FSETID,
355 CAP_IPC_OWNER,
356 CAP_KILL,
357 CAP_LEASE,
358 CAP_LINUX_IMMUTABLE,
359 CAP_NET_BIND_SERVICE,
360 CAP_NET_BROADCAST,
361 CAP_NET_RAW,
362 CAP_SETGID,
363 CAP_SETFCAP,
364 CAP_SETPCAP,
365 CAP_SETUID,
366 CAP_SYS_ADMIN,
367 CAP_SYS_CHROOT,
368 CAP_SYS_NICE,
369 CAP_SYS_PTRACE,
370 CAP_SYS_TTY_CONFIG
371 };
372
373 unsigned long l;
374
64685e0c 375 for (l = 0; l <= cap_last_cap(); l++) {
88213476
LP
376 unsigned i;
377
378 for (i = 0; i < ELEMENTSOF(retain); i++)
379 if (retain[i] == l)
380 break;
381
382 if (i < ELEMENTSOF(retain))
383 continue;
384
385 if (prctl(PR_CAPBSET_DROP, l) < 0) {
88213476
LP
386 log_error("PR_CAPBSET_DROP failed: %m");
387 return -errno;
388 }
389 }
390
391 return 0;
392}
393
394static int is_os_tree(const char *path) {
395 int r;
396 char *p;
397 /* We use /bin/sh as flag file if something is an OS */
398
399 if (asprintf(&p, "%s/bin/sh", path) < 0)
400 return -ENOMEM;
401
402 r = access(p, F_OK);
403 free(p);
404
405 return r < 0 ? 0 : 1;
406}
407
a258bf26 408static int process_pty(int master, sigset_t *mask) {
0c749d50 409
b72491a2 410 char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
a258bf26
LP
411 size_t in_buffer_full = 0, out_buffer_full = 0;
412 struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev;
413 bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false;
a258bf26
LP
414 int ep = -1, signal_fd = -1, r;
415
416 fd_nonblock(STDIN_FILENO, 1);
417 fd_nonblock(STDOUT_FILENO, 1);
418 fd_nonblock(master, 1);
419
420 if ((signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0) {
421 log_error("signalfd(): %m");
422 r = -errno;
423 goto finish;
424 }
425
426 if ((ep = epoll_create1(EPOLL_CLOEXEC)) < 0) {
427 log_error("Failed to create epoll: %m");
428 r = -errno;
429 goto finish;
430 }
431
432 zero(stdin_ev);
433 stdin_ev.events = EPOLLIN|EPOLLET;
434 stdin_ev.data.fd = STDIN_FILENO;
435
436 zero(stdout_ev);
437 stdout_ev.events = EPOLLOUT|EPOLLET;
438 stdout_ev.data.fd = STDOUT_FILENO;
439
440 zero(master_ev);
441 master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET;
442 master_ev.data.fd = master;
443
444 zero(signal_ev);
445 signal_ev.events = EPOLLIN;
446 signal_ev.data.fd = signal_fd;
447
448 if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0 ||
449 epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 ||
450 epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 ||
451 epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) {
452 log_error("Failed to regiser fds in epoll: %m");
453 r = -errno;
454 goto finish;
455 }
456
fd14078a 457 for (;;) {
a258bf26
LP
458 struct epoll_event ev[16];
459 ssize_t k;
460 int i, nfds;
461
462 if ((nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1)) < 0) {
463
464 if (errno == EINTR || errno == EAGAIN)
465 continue;
466
467 log_error("epoll_wait(): %m");
468 r = -errno;
469 goto finish;
470 }
471
472 assert(nfds >= 1);
473
474 for (i = 0; i < nfds; i++) {
475 if (ev[i].data.fd == STDIN_FILENO) {
476
fd14078a 477 if (ev[i].events & (EPOLLIN|EPOLLHUP))
a258bf26
LP
478 stdin_readable = true;
479
480 } else if (ev[i].data.fd == STDOUT_FILENO) {
481
fd14078a 482 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
a258bf26
LP
483 stdout_writable = true;
484
485 } else if (ev[i].data.fd == master) {
486
fd14078a 487 if (ev[i].events & (EPOLLIN|EPOLLHUP))
a258bf26
LP
488 master_readable = true;
489
fd14078a 490 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
a258bf26
LP
491 master_writable = true;
492
493 } else if (ev[i].data.fd == signal_fd) {
494 struct signalfd_siginfo sfsi;
495 ssize_t n;
496
497 if ((n = read(signal_fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) {
498
499 if (n >= 0) {
0c749d50 500 log_error("Failed to read from signalfd: invalid block size");
a258bf26
LP
501 r = -EIO;
502 goto finish;
503 }
504
505 if (errno != EINTR && errno != EAGAIN) {
0c749d50 506 log_error("Failed to read from signalfd: %m");
a258bf26
LP
507 r = -errno;
508 goto finish;
509 }
510 } else {
511
512 if (sfsi.ssi_signo == SIGWINCH) {
513 struct winsize ws;
514
515 /* The window size changed, let's forward that. */
a258bf26
LP
516 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
517 ioctl(master, TIOCSWINSZ, &ws);
518 } else {
0c749d50 519 r = 0;
a258bf26
LP
520 goto finish;
521 }
522 }
523 }
524 }
525
526 while ((stdin_readable && in_buffer_full <= 0) ||
527 (master_writable && in_buffer_full > 0) ||
528 (master_readable && out_buffer_full <= 0) ||
529 (stdout_writable && out_buffer_full > 0)) {
530
b72491a2 531 if (stdin_readable && in_buffer_full < LINE_MAX) {
a258bf26 532
b72491a2 533 if ((k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full)) < 0) {
a258bf26 534
fd14078a 535 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
a258bf26 536 stdin_readable = false;
a258bf26
LP
537 else {
538 log_error("read(): %m");
0c749d50 539 r = -errno;
a258bf26
LP
540 goto finish;
541 }
542 } else
543 in_buffer_full += (size_t) k;
a258bf26
LP
544 }
545
546 if (master_writable && in_buffer_full > 0) {
547
548 if ((k = write(master, in_buffer, in_buffer_full)) < 0) {
549
fd14078a 550 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
a258bf26 551 master_writable = false;
fd14078a 552 else {
a258bf26 553 log_error("write(): %m");
0c749d50 554 r = -errno;
a258bf26
LP
555 goto finish;
556 }
557
558 } else {
559 assert(in_buffer_full >= (size_t) k);
560 memmove(in_buffer, in_buffer + k, in_buffer_full - k);
561 in_buffer_full -= k;
562 }
563 }
564
b72491a2 565 if (master_readable && out_buffer_full < LINE_MAX) {
a258bf26 566
b72491a2 567 if ((k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full)) < 0) {
a258bf26 568
fd14078a 569 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
a258bf26 570 master_readable = false;
a258bf26
LP
571 else {
572 log_error("read(): %m");
0c749d50 573 r = -errno;
a258bf26
LP
574 goto finish;
575 }
576 } else
577 out_buffer_full += (size_t) k;
a258bf26
LP
578 }
579
580 if (stdout_writable && out_buffer_full > 0) {
581
582 if ((k = write(STDOUT_FILENO, out_buffer, out_buffer_full)) < 0) {
583
fd14078a 584 if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
a258bf26 585 stdout_writable = false;
fd14078a 586 else {
a258bf26 587 log_error("write(): %m");
0c749d50 588 r = -errno;
a258bf26
LP
589 goto finish;
590 }
591
592 } else {
593 assert(out_buffer_full >= (size_t) k);
594 memmove(out_buffer, out_buffer + k, out_buffer_full - k);
595 out_buffer_full -= k;
596 }
597 }
598 }
fd14078a 599 }
a258bf26
LP
600
601finish:
602 if (ep >= 0)
603 close_nointr_nofail(ep);
604
605 if (signal_fd >= 0)
606 close_nointr_nofail(signal_fd);
607
608 return r;
609}
88213476
LP
610
611int main(int argc, char *argv[]) {
612 pid_t pid = 0;
04d391da
LP
613 int r = EXIT_FAILURE, k;
614 char *oldcg = NULL, *newcg = NULL;
a258bf26
LP
615 int master = -1;
616 const char *console = NULL;
617 struct termios saved_attr, raw_attr;
618 sigset_t mask;
619 bool saved_attr_valid = false;
620 struct winsize ws;
88213476
LP
621
622 log_parse_environment();
623 log_open();
624
625 if ((r = parse_argv(argc, argv)) <= 0)
626 goto finish;
627
628 if (arg_directory) {
629 char *p;
630
631 p = path_make_absolute_cwd(arg_directory);
632 free(arg_directory);
633 arg_directory = p;
634 } else
635 arg_directory = get_current_dir_name();
636
637 if (!arg_directory) {
638 log_error("Failed to determine path");
639 goto finish;
640 }
641
642 path_kill_slashes(arg_directory);
643
644 if (geteuid() != 0) {
645 log_error("Need to be root.");
646 goto finish;
647 }
648
04d391da
LP
649 if (sd_booted() <= 0) {
650 log_error("Not running on a systemd system.");
651 goto finish;
652 }
653
88213476 654 if (path_equal(arg_directory, "/")) {
6df6b939 655 log_error("Spawning container on root directory not supported.");
88213476
LP
656 goto finish;
657 }
658
659 if (is_os_tree(arg_directory) <= 0) {
660 log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory);
661 goto finish;
662 }
663
04d391da
LP
664 if ((k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg)) < 0) {
665 log_error("Failed to determine current cgroup: %s", strerror(-k));
666 goto finish;
667 }
668
669 if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) {
670 log_error("Failed to allocate cgroup path.");
671 goto finish;
672 }
673
674 if ((k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0)) < 0) {
675 log_error("Failed to create cgroup: %s", strerror(-k));
676 goto finish;
677 }
678
a258bf26
LP
679 if ((master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY)) < 0) {
680 log_error("Failed to acquire pseudo tty: %m");
681 goto finish;
682 }
683
684 if (!(console = ptsname(master))) {
685 log_error("Failed to determine tty name: %m");
686 goto finish;
687 }
688
689 log_info("Spawning namespace container on %s (console is %s).", arg_directory, console);
690
691 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
692 ioctl(master, TIOCSWINSZ, &ws);
693
694 if (unlockpt(master) < 0) {
695 log_error("Failed to unlock tty: %m");
696 goto finish;
697 }
698
699 if (tcgetattr(STDIN_FILENO, &saved_attr) < 0) {
700 log_error("Failed to get terminal attributes: %m");
701 goto finish;
702 }
703
704 saved_attr_valid = true;
705
706 raw_attr = saved_attr;
707 cfmakeraw(&raw_attr);
708 raw_attr.c_lflag &= ~ECHO;
709
710 if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) {
711 log_error("Failed to set terminal attributes: %m");
712 goto finish;
713 }
714
715 assert_se(sigemptyset(&mask) == 0);
716 sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
717 assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
718
52af2106
LP
719 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
720 if (pid < 0) {
721 if (errno == EINVAL)
722 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
723 else
724 log_error("clone() failed: %m");
725
88213476
LP
726 goto finish;
727 }
728
729 if (pid == 0) {
a258bf26
LP
730 /* child */
731
88213476 732 const char *hn;
687d0825
MV
733 const char *home = NULL;
734 uid_t uid = (uid_t) -1;
735 gid_t gid = (gid_t) -1;
da5b3bad 736 const char *envp[] = {
da5b3bad 737 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
3bb1c6b0 738 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
687d0825
MV
739 NULL, /* TERM */
740 NULL, /* HOME */
741 NULL, /* USER */
742 NULL, /* LOGNAME */
da5b3bad
LP
743 NULL
744 };
88213476 745
3bb1c6b0 746 envp[2] = strv_find_prefix(environ, "TERM=");
a258bf26
LP
747
748 close_nointr_nofail(master);
749
750 close_nointr(STDIN_FILENO);
751 close_nointr(STDOUT_FILENO);
752 close_nointr(STDERR_FILENO);
753
754 close_all_fds(NULL, 0);
755
756 reset_all_signal_handlers();
757
758 assert_se(sigemptyset(&mask) == 0);
759 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
760
761 if (setsid() < 0)
762 goto child_fail;
763
764 if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
765 goto child_fail;
88213476 766
f5c1b9ee
LP
767 /* Mark / as private, in case somebody marked it shared */
768 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0)
769 goto child_fail;
770
88213476
LP
771 if (mount_all(arg_directory) < 0)
772 goto child_fail;
773
a258bf26 774 if (copy_devnodes(arg_directory, console) < 0)
88213476
LP
775 goto child_fail;
776
777 if (chdir(arg_directory) < 0) {
778 log_error("chdir(%s) failed: %m", arg_directory);
779 goto child_fail;
780 }
a258bf26
LP
781
782 if (open_terminal("dev/console", O_RDWR) != STDIN_FILENO ||
783 dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
784 dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO)
785 goto child_fail;
786
88213476
LP
787 if (mount(arg_directory, "/", "bind", MS_BIND|MS_MOVE, NULL) < 0) {
788 log_error("mount(MS_MOVE) failed: %m");
789 goto child_fail;
790 }
791
792 if (chroot(".") < 0) {
793 log_error("chroot() failed: %m");
794 goto child_fail;
795 }
796
797 if (chdir("/") < 0) {
798 log_error("chdir() failed: %m");
799 goto child_fail;
800 }
801
4c12626c 802 umask(0022);
a258bf26 803
a41fe3a2
LP
804 loopback_setup();
805
88213476
LP
806 if (drop_capabilities() < 0)
807 goto child_fail;
808
687d0825
MV
809 if (arg_user) {
810
811 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home) < 0) {
812 log_error("get_user_creds() failed: %m");
813 goto child_fail;
814 }
815
816 if (mkdir_parents(home, 0775) < 0) {
817 log_error("mkdir_parents() failed: %m");
818 goto child_fail;
819 }
820
821 if (safe_mkdir(home, 0775, uid, gid) < 0) {
822 log_error("safe_mkdir() failed: %m");
823 goto child_fail;
824 }
825
826 if (initgroups((const char*)arg_user, gid) < 0) {
827 log_error("initgroups() failed: %m");
828 goto child_fail;
829 }
830
5c94603d 831 if (setresgid(gid, gid, gid) < 0) {
687d0825
MV
832 log_error("setregid() failed: %m");
833 goto child_fail;
834 }
835
5c94603d 836 if (setresuid(uid, uid, uid) < 0) {
687d0825
MV
837 log_error("setreuid() failed: %m");
838 goto child_fail;
839 }
840 }
841
3bb1c6b0
LP
842 if ((asprintf((char**)(envp + 3), "HOME=%s", home? home: "/root") < 0) ||
843 (asprintf((char**)(envp + 4), "USER=%s", arg_user? arg_user : "root") < 0) ||
844 (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user? arg_user : "root") < 0)) {
5c94603d 845 log_error("Out of memory");
687d0825
MV
846 goto child_fail;
847 }
848
88213476
LP
849 if ((hn = file_name_from_path(arg_directory)))
850 sethostname(hn, strlen(hn));
851
852 if (argc > optind)
da5b3bad
LP
853 execvpe(argv[optind], argv + optind, (char**) envp);
854 else {
5c94603d 855 chdir(home ? home : "/root");
da5b3bad
LP
856 execle("/bin/bash", "-bash", NULL, (char**) envp);
857 }
88213476
LP
858
859 log_error("execv() failed: %m");
860
861 child_fail:
862 _exit(EXIT_FAILURE);
863 }
864
a258bf26
LP
865 if (process_pty(master, &mask) < 0)
866 goto finish;
867
868 if (saved_attr_valid) {
869 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
870 saved_attr_valid = false;
871 }
872
6df6b939 873 r = wait_for_terminate_and_warn(argc > optind ? argv[optind] : "bash", pid);
88213476
LP
874
875 if (r < 0)
876 r = EXIT_FAILURE;
877
878finish:
a258bf26
LP
879 if (saved_attr_valid)
880 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
881
882 if (master >= 0)
883 close_nointr_nofail(master);
884
04d391da
LP
885 if (oldcg)
886 cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0);
887
888 if (newcg)
889 cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true);
88213476 890
04d391da
LP
891 free(arg_directory);
892 free(oldcg);
893 free(newcg);
88213476
LP
894
895 return r;
896}