]>
Commit | Line | Data |
---|---|---|
88213476 LP |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2010 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
5430f7f2 LP |
9 | under the terms of the GNU Lesser General Public License as published by |
10 | the Free Software Foundation; either version 2.1 of the License, or | |
88213476 LP |
11 | (at your option) any later version. |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
5430f7f2 | 16 | Lesser General Public License for more details. |
88213476 | 17 | |
5430f7f2 | 18 | You should have received a copy of the GNU Lesser General Public License |
88213476 LP |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. |
20 | ***/ | |
21 | ||
22 | #include <signal.h> | |
23 | #include <sched.h> | |
24 | #include <unistd.h> | |
25 | #include <sys/types.h> | |
26 | #include <sys/syscall.h> | |
27 | #include <sys/mount.h> | |
28 | #include <sys/wait.h> | |
29 | #include <stdlib.h> | |
30 | #include <string.h> | |
31 | #include <stdio.h> | |
32 | #include <errno.h> | |
33 | #include <sys/prctl.h> | |
34 | #include <sys/capability.h> | |
35 | #include <getopt.h> | |
a258bf26 LP |
36 | #include <sys/epoll.h> |
37 | #include <termios.h> | |
38 | #include <sys/signalfd.h> | |
687d0825 | 39 | #include <grp.h> |
5ed27dbd | 40 | #include <linux/fs.h> |
88213476 | 41 | |
81527be1 LP |
42 | #include <systemd/sd-daemon.h> |
43 | ||
88213476 LP |
44 | #include "log.h" |
45 | #include "util.h" | |
49e942b2 | 46 | #include "mkdir.h" |
d7832d2c | 47 | #include "audit.h" |
94d82985 | 48 | #include "missing.h" |
04d391da | 49 | #include "cgroup-util.h" |
a258bf26 | 50 | #include "strv.h" |
a41fe3a2 | 51 | #include "loopback-setup.h" |
88213476 LP |
52 | |
53 | static char *arg_directory = NULL; | |
687d0825 | 54 | static char *arg_user = NULL; |
ff01d048 | 55 | static bool arg_private_network = false; |
88213476 LP |
56 | |
57 | static int help(void) { | |
58 | ||
59 | printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" | |
60 | "Spawn a minimal namespace container for debugging, testing and building.\n\n" | |
61 | " -h --help Show this help\n" | |
687d0825 | 62 | " -D --directory=NAME Root directory for the container\n" |
a41fe3a2 | 63 | " -u --user=USER Run the command under specified user or uid\n" |
ff01d048 | 64 | " --private-network Disable network in container\n", |
88213476 LP |
65 | program_invocation_short_name); |
66 | ||
67 | return 0; | |
68 | } | |
69 | ||
70 | static int parse_argv(int argc, char *argv[]) { | |
71 | ||
a41fe3a2 | 72 | enum { |
ff01d048 | 73 | ARG_PRIVATE_NETWORK = 0x100 |
a41fe3a2 LP |
74 | }; |
75 | ||
88213476 | 76 | static const struct option options[] = { |
ff01d048 LP |
77 | { "help", no_argument, NULL, 'h' }, |
78 | { "directory", required_argument, NULL, 'D' }, | |
79 | { "user", required_argument, NULL, 'u' }, | |
80 | { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK }, | |
81 | { NULL, 0, NULL, 0 } | |
88213476 LP |
82 | }; |
83 | ||
84 | int c; | |
85 | ||
86 | assert(argc >= 0); | |
87 | assert(argv); | |
88 | ||
687d0825 | 89 | while ((c = getopt_long(argc, argv, "+hD:u:", options, NULL)) >= 0) { |
88213476 LP |
90 | |
91 | switch (c) { | |
92 | ||
93 | case 'h': | |
94 | help(); | |
95 | return 0; | |
96 | ||
97 | case 'D': | |
98 | free(arg_directory); | |
99 | if (!(arg_directory = strdup(optarg))) { | |
100 | log_error("Failed to duplicate root directory."); | |
101 | return -ENOMEM; | |
102 | } | |
103 | ||
104 | break; | |
105 | ||
687d0825 MV |
106 | case 'u': |
107 | free(arg_user); | |
108 | if (!(arg_user = strdup(optarg))) { | |
109 | log_error("Failed to duplicate user name."); | |
110 | return -ENOMEM; | |
111 | } | |
112 | ||
113 | break; | |
114 | ||
ff01d048 LP |
115 | case ARG_PRIVATE_NETWORK: |
116 | arg_private_network = true; | |
a41fe3a2 LP |
117 | break; |
118 | ||
88213476 LP |
119 | case '?': |
120 | return -EINVAL; | |
121 | ||
122 | default: | |
123 | log_error("Unknown option code %c", c); | |
124 | return -EINVAL; | |
125 | } | |
126 | } | |
127 | ||
128 | return 1; | |
129 | } | |
130 | ||
131 | static int mount_all(const char *dest) { | |
132 | ||
133 | typedef struct MountPoint { | |
134 | const char *what; | |
135 | const char *where; | |
136 | const char *type; | |
137 | const char *options; | |
138 | unsigned long flags; | |
3bd66c05 | 139 | bool fatal; |
88213476 LP |
140 | } MountPoint; |
141 | ||
142 | static const MountPoint mount_table[] = { | |
4b7a6af4 | 143 | { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true }, |
576a01c8 LP |
144 | { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */ |
145 | { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ | |
146 | { "/sys", "/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */ | |
147 | { "/sys", "/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ | |
635f7d8c | 148 | { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true }, |
576a01c8 | 149 | { "/dev/pts", "/dev/pts", "bind", NULL, MS_BIND, true }, |
635f7d8c | 150 | { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true }, |
9b634ea5 | 151 | #ifdef HAVE_SELINUX |
6b2bf923 LP |
152 | { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND, false }, /* Bind mount first */ |
153 | { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */ | |
9b634ea5 | 154 | #endif |
88213476 LP |
155 | }; |
156 | ||
157 | unsigned k; | |
158 | int r = 0; | |
715ac17a | 159 | char *where; |
88213476 LP |
160 | |
161 | for (k = 0; k < ELEMENTSOF(mount_table); k++) { | |
88213476 LP |
162 | int t; |
163 | ||
164 | if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) { | |
165 | log_error("Out of memory"); | |
166 | ||
167 | if (r == 0) | |
168 | r = -ENOMEM; | |
169 | ||
170 | break; | |
171 | } | |
172 | ||
0c85a4f3 | 173 | if ((t = path_is_mount_point(where, false)) < 0) { |
88213476 LP |
174 | log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t)); |
175 | free(where); | |
176 | ||
177 | if (r == 0) | |
178 | r = t; | |
179 | ||
180 | continue; | |
181 | } | |
182 | ||
183 | mkdir_p(where, 0755); | |
184 | ||
185 | if (mount(mount_table[k].what, | |
186 | where, | |
187 | mount_table[k].type, | |
188 | mount_table[k].flags, | |
3bd66c05 LP |
189 | mount_table[k].options) < 0 && |
190 | mount_table[k].fatal) { | |
88213476 LP |
191 | |
192 | log_error("mount(%s) failed: %m", where); | |
193 | ||
194 | if (r == 0) | |
195 | r = -errno; | |
196 | } | |
197 | ||
198 | free(where); | |
199 | } | |
200 | ||
715ac17a | 201 | /* Fix the timezone, if possible */ |
4d1c38b8 | 202 | if (asprintf(&where, "%s/etc/localtime", dest) >= 0) { |
f8440af5 LP |
203 | |
204 | if (mount("/etc/localtime", where, "bind", MS_BIND, NULL) >= 0) | |
205 | mount("/etc/localtime", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); | |
206 | ||
715ac17a LP |
207 | free(where); |
208 | } | |
209 | ||
4d1c38b8 LP |
210 | if (asprintf(&where, "%s/etc/timezone", dest) >= 0) { |
211 | ||
212 | if (mount("/etc/timezone", where, "bind", MS_BIND, NULL) >= 0) | |
213 | mount("/etc/timezone", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); | |
214 | ||
215 | free(where); | |
216 | } | |
217 | ||
88213476 LP |
218 | return r; |
219 | } | |
220 | ||
a258bf26 | 221 | static int copy_devnodes(const char *dest, const char *console) { |
88213476 LP |
222 | |
223 | static const char devnodes[] = | |
224 | "null\0" | |
225 | "zero\0" | |
226 | "full\0" | |
227 | "random\0" | |
228 | "urandom\0" | |
229 | "tty\0" | |
230 | "ptmx\0" | |
231 | "kmsg\0" | |
232 | "rtc0\0"; | |
233 | ||
234 | const char *d; | |
235 | int r = 0, k; | |
124640f1 | 236 | mode_t u; |
a258bf26 LP |
237 | struct stat st; |
238 | char *from = NULL, *to = NULL; | |
239 | ||
240 | assert(dest); | |
241 | assert(console); | |
124640f1 LP |
242 | |
243 | u = umask(0000); | |
88213476 LP |
244 | |
245 | NULSTR_FOREACH(d, devnodes) { | |
a258bf26 | 246 | from = to = NULL; |
88213476 LP |
247 | |
248 | asprintf(&from, "/dev/%s", d); | |
249 | asprintf(&to, "%s/dev/%s", dest, d); | |
250 | ||
251 | if (!from || !to) { | |
252 | log_error("Failed to allocate devnode path"); | |
253 | ||
254 | free(from); | |
255 | free(to); | |
256 | ||
a258bf26 LP |
257 | from = to = NULL; |
258 | ||
88213476 LP |
259 | if (r == 0) |
260 | r = -ENOMEM; | |
261 | ||
262 | break; | |
263 | } | |
264 | ||
265 | if (stat(from, &st) < 0) { | |
266 | ||
267 | if (errno != ENOENT) { | |
268 | log_error("Failed to stat %s: %m", from); | |
88213476 LP |
269 | if (r == 0) |
270 | r = -errno; | |
271 | } | |
272 | ||
a258bf26 | 273 | } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { |
88213476 | 274 | |
a258bf26 LP |
275 | log_error("%s is not a char or block device, cannot copy.", from); |
276 | if (r == 0) | |
277 | r = -EIO; | |
278 | ||
279 | } else if (mknod(to, st.st_mode, st.st_rdev) < 0) { | |
280 | ||
281 | log_error("mknod(%s) failed: %m", dest); | |
282 | if (r == 0) | |
283 | r = -errno; | |
88213476 LP |
284 | } |
285 | ||
286 | free(from); | |
287 | free(to); | |
288 | } | |
289 | ||
a258bf26 | 290 | if (stat(console, &st) < 0) { |
88213476 | 291 | |
a258bf26 | 292 | log_error("Failed to stat %s: %m", console); |
88213476 | 293 | if (r == 0) |
a258bf26 | 294 | r = -errno; |
88213476 | 295 | |
a258bf26 | 296 | goto finish; |
88213476 | 297 | |
a258bf26 | 298 | } else if (!S_ISCHR(st.st_mode)) { |
88213476 | 299 | |
a258bf26 LP |
300 | log_error("/dev/console is not a char device."); |
301 | if (r == 0) | |
302 | r = -EIO; | |
88213476 | 303 | |
a258bf26 LP |
304 | goto finish; |
305 | } | |
88213476 | 306 | |
a258bf26 | 307 | if (asprintf(&to, "%s/dev/console", dest) < 0) { |
88213476 | 308 | |
a258bf26 LP |
309 | log_error("Out of memory"); |
310 | if (r == 0) | |
311 | r = -ENOMEM; | |
88213476 | 312 | |
a258bf26 | 313 | goto finish; |
88213476 LP |
314 | } |
315 | ||
a258bf26 LP |
316 | /* We need to bind mount the right tty to /dev/console since |
317 | * ptys can only exist on pts file systems. To have something | |
318 | * to bind mount things on we create a device node first, that | |
319 | * has the right major/minor (note that the major minor | |
320 | * doesn't actually matter here, since we mount it over | |
321 | * anyway). */ | |
322 | ||
323 | if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) | |
324 | log_error("mknod for /dev/console failed: %m"); | |
325 | ||
326 | if (mount(console, to, "bind", MS_BIND, NULL) < 0) { | |
327 | log_error("bind mount for /dev/console failed: %m"); | |
328 | ||
329 | if (r == 0) | |
330 | r = -errno; | |
331 | } | |
332 | ||
333 | free(to); | |
334 | ||
335 | if ((k = chmod_and_chown(console, 0600, 0, 0)) < 0) { | |
336 | log_error("Failed to correct access mode for TTY: %s", strerror(-k)); | |
337 | ||
338 | if (r == 0) | |
339 | r = k; | |
340 | } | |
341 | ||
342 | finish: | |
124640f1 LP |
343 | umask(u); |
344 | ||
88213476 LP |
345 | return r; |
346 | } | |
347 | ||
348 | static int drop_capabilities(void) { | |
349 | static const unsigned long retain[] = { | |
350 | CAP_CHOWN, | |
351 | CAP_DAC_OVERRIDE, | |
352 | CAP_DAC_READ_SEARCH, | |
353 | CAP_FOWNER, | |
354 | CAP_FSETID, | |
355 | CAP_IPC_OWNER, | |
356 | CAP_KILL, | |
357 | CAP_LEASE, | |
358 | CAP_LINUX_IMMUTABLE, | |
359 | CAP_NET_BIND_SERVICE, | |
360 | CAP_NET_BROADCAST, | |
361 | CAP_NET_RAW, | |
362 | CAP_SETGID, | |
363 | CAP_SETFCAP, | |
364 | CAP_SETPCAP, | |
365 | CAP_SETUID, | |
366 | CAP_SYS_ADMIN, | |
367 | CAP_SYS_CHROOT, | |
368 | CAP_SYS_NICE, | |
369 | CAP_SYS_PTRACE, | |
370 | CAP_SYS_TTY_CONFIG | |
371 | }; | |
372 | ||
373 | unsigned long l; | |
374 | ||
64685e0c | 375 | for (l = 0; l <= cap_last_cap(); l++) { |
88213476 LP |
376 | unsigned i; |
377 | ||
378 | for (i = 0; i < ELEMENTSOF(retain); i++) | |
379 | if (retain[i] == l) | |
380 | break; | |
381 | ||
382 | if (i < ELEMENTSOF(retain)) | |
383 | continue; | |
384 | ||
385 | if (prctl(PR_CAPBSET_DROP, l) < 0) { | |
88213476 LP |
386 | log_error("PR_CAPBSET_DROP failed: %m"); |
387 | return -errno; | |
388 | } | |
389 | } | |
390 | ||
391 | return 0; | |
392 | } | |
393 | ||
394 | static int is_os_tree(const char *path) { | |
395 | int r; | |
396 | char *p; | |
397 | /* We use /bin/sh as flag file if something is an OS */ | |
398 | ||
399 | if (asprintf(&p, "%s/bin/sh", path) < 0) | |
400 | return -ENOMEM; | |
401 | ||
402 | r = access(p, F_OK); | |
403 | free(p); | |
404 | ||
405 | return r < 0 ? 0 : 1; | |
406 | } | |
407 | ||
a258bf26 | 408 | static int process_pty(int master, sigset_t *mask) { |
0c749d50 | 409 | |
b72491a2 | 410 | char in_buffer[LINE_MAX], out_buffer[LINE_MAX]; |
a258bf26 LP |
411 | size_t in_buffer_full = 0, out_buffer_full = 0; |
412 | struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev; | |
413 | bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false; | |
a258bf26 LP |
414 | int ep = -1, signal_fd = -1, r; |
415 | ||
416 | fd_nonblock(STDIN_FILENO, 1); | |
417 | fd_nonblock(STDOUT_FILENO, 1); | |
418 | fd_nonblock(master, 1); | |
419 | ||
420 | if ((signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0) { | |
421 | log_error("signalfd(): %m"); | |
422 | r = -errno; | |
423 | goto finish; | |
424 | } | |
425 | ||
426 | if ((ep = epoll_create1(EPOLL_CLOEXEC)) < 0) { | |
427 | log_error("Failed to create epoll: %m"); | |
428 | r = -errno; | |
429 | goto finish; | |
430 | } | |
431 | ||
432 | zero(stdin_ev); | |
433 | stdin_ev.events = EPOLLIN|EPOLLET; | |
434 | stdin_ev.data.fd = STDIN_FILENO; | |
435 | ||
436 | zero(stdout_ev); | |
437 | stdout_ev.events = EPOLLOUT|EPOLLET; | |
438 | stdout_ev.data.fd = STDOUT_FILENO; | |
439 | ||
440 | zero(master_ev); | |
441 | master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET; | |
442 | master_ev.data.fd = master; | |
443 | ||
444 | zero(signal_ev); | |
445 | signal_ev.events = EPOLLIN; | |
446 | signal_ev.data.fd = signal_fd; | |
447 | ||
448 | if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0 || | |
449 | epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 || | |
450 | epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 || | |
451 | epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) { | |
452 | log_error("Failed to regiser fds in epoll: %m"); | |
453 | r = -errno; | |
454 | goto finish; | |
455 | } | |
456 | ||
fd14078a | 457 | for (;;) { |
a258bf26 LP |
458 | struct epoll_event ev[16]; |
459 | ssize_t k; | |
460 | int i, nfds; | |
461 | ||
462 | if ((nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1)) < 0) { | |
463 | ||
464 | if (errno == EINTR || errno == EAGAIN) | |
465 | continue; | |
466 | ||
467 | log_error("epoll_wait(): %m"); | |
468 | r = -errno; | |
469 | goto finish; | |
470 | } | |
471 | ||
472 | assert(nfds >= 1); | |
473 | ||
474 | for (i = 0; i < nfds; i++) { | |
475 | if (ev[i].data.fd == STDIN_FILENO) { | |
476 | ||
fd14078a | 477 | if (ev[i].events & (EPOLLIN|EPOLLHUP)) |
a258bf26 LP |
478 | stdin_readable = true; |
479 | ||
480 | } else if (ev[i].data.fd == STDOUT_FILENO) { | |
481 | ||
fd14078a | 482 | if (ev[i].events & (EPOLLOUT|EPOLLHUP)) |
a258bf26 LP |
483 | stdout_writable = true; |
484 | ||
485 | } else if (ev[i].data.fd == master) { | |
486 | ||
fd14078a | 487 | if (ev[i].events & (EPOLLIN|EPOLLHUP)) |
a258bf26 LP |
488 | master_readable = true; |
489 | ||
fd14078a | 490 | if (ev[i].events & (EPOLLOUT|EPOLLHUP)) |
a258bf26 LP |
491 | master_writable = true; |
492 | ||
493 | } else if (ev[i].data.fd == signal_fd) { | |
494 | struct signalfd_siginfo sfsi; | |
495 | ssize_t n; | |
496 | ||
497 | if ((n = read(signal_fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) { | |
498 | ||
499 | if (n >= 0) { | |
0c749d50 | 500 | log_error("Failed to read from signalfd: invalid block size"); |
a258bf26 LP |
501 | r = -EIO; |
502 | goto finish; | |
503 | } | |
504 | ||
505 | if (errno != EINTR && errno != EAGAIN) { | |
0c749d50 | 506 | log_error("Failed to read from signalfd: %m"); |
a258bf26 LP |
507 | r = -errno; |
508 | goto finish; | |
509 | } | |
510 | } else { | |
511 | ||
512 | if (sfsi.ssi_signo == SIGWINCH) { | |
513 | struct winsize ws; | |
514 | ||
515 | /* The window size changed, let's forward that. */ | |
a258bf26 LP |
516 | if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0) |
517 | ioctl(master, TIOCSWINSZ, &ws); | |
518 | } else { | |
0c749d50 | 519 | r = 0; |
a258bf26 LP |
520 | goto finish; |
521 | } | |
522 | } | |
523 | } | |
524 | } | |
525 | ||
526 | while ((stdin_readable && in_buffer_full <= 0) || | |
527 | (master_writable && in_buffer_full > 0) || | |
528 | (master_readable && out_buffer_full <= 0) || | |
529 | (stdout_writable && out_buffer_full > 0)) { | |
530 | ||
b72491a2 | 531 | if (stdin_readable && in_buffer_full < LINE_MAX) { |
a258bf26 | 532 | |
b72491a2 | 533 | if ((k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full)) < 0) { |
a258bf26 | 534 | |
fd14078a | 535 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 536 | stdin_readable = false; |
a258bf26 LP |
537 | else { |
538 | log_error("read(): %m"); | |
0c749d50 | 539 | r = -errno; |
a258bf26 LP |
540 | goto finish; |
541 | } | |
542 | } else | |
543 | in_buffer_full += (size_t) k; | |
a258bf26 LP |
544 | } |
545 | ||
546 | if (master_writable && in_buffer_full > 0) { | |
547 | ||
548 | if ((k = write(master, in_buffer, in_buffer_full)) < 0) { | |
549 | ||
fd14078a | 550 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 551 | master_writable = false; |
fd14078a | 552 | else { |
a258bf26 | 553 | log_error("write(): %m"); |
0c749d50 | 554 | r = -errno; |
a258bf26 LP |
555 | goto finish; |
556 | } | |
557 | ||
558 | } else { | |
559 | assert(in_buffer_full >= (size_t) k); | |
560 | memmove(in_buffer, in_buffer + k, in_buffer_full - k); | |
561 | in_buffer_full -= k; | |
562 | } | |
563 | } | |
564 | ||
b72491a2 | 565 | if (master_readable && out_buffer_full < LINE_MAX) { |
a258bf26 | 566 | |
b72491a2 | 567 | if ((k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full)) < 0) { |
a258bf26 | 568 | |
fd14078a | 569 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 570 | master_readable = false; |
a258bf26 LP |
571 | else { |
572 | log_error("read(): %m"); | |
0c749d50 | 573 | r = -errno; |
a258bf26 LP |
574 | goto finish; |
575 | } | |
576 | } else | |
577 | out_buffer_full += (size_t) k; | |
a258bf26 LP |
578 | } |
579 | ||
580 | if (stdout_writable && out_buffer_full > 0) { | |
581 | ||
582 | if ((k = write(STDOUT_FILENO, out_buffer, out_buffer_full)) < 0) { | |
583 | ||
fd14078a | 584 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 585 | stdout_writable = false; |
fd14078a | 586 | else { |
a258bf26 | 587 | log_error("write(): %m"); |
0c749d50 | 588 | r = -errno; |
a258bf26 LP |
589 | goto finish; |
590 | } | |
591 | ||
592 | } else { | |
593 | assert(out_buffer_full >= (size_t) k); | |
594 | memmove(out_buffer, out_buffer + k, out_buffer_full - k); | |
595 | out_buffer_full -= k; | |
596 | } | |
597 | } | |
598 | } | |
fd14078a | 599 | } |
a258bf26 LP |
600 | |
601 | finish: | |
602 | if (ep >= 0) | |
603 | close_nointr_nofail(ep); | |
604 | ||
605 | if (signal_fd >= 0) | |
606 | close_nointr_nofail(signal_fd); | |
607 | ||
608 | return r; | |
609 | } | |
88213476 LP |
610 | |
611 | int main(int argc, char *argv[]) { | |
612 | pid_t pid = 0; | |
04d391da LP |
613 | int r = EXIT_FAILURE, k; |
614 | char *oldcg = NULL, *newcg = NULL; | |
a258bf26 LP |
615 | int master = -1; |
616 | const char *console = NULL; | |
617 | struct termios saved_attr, raw_attr; | |
618 | sigset_t mask; | |
619 | bool saved_attr_valid = false; | |
620 | struct winsize ws; | |
88213476 LP |
621 | |
622 | log_parse_environment(); | |
623 | log_open(); | |
624 | ||
625 | if ((r = parse_argv(argc, argv)) <= 0) | |
626 | goto finish; | |
627 | ||
628 | if (arg_directory) { | |
629 | char *p; | |
630 | ||
631 | p = path_make_absolute_cwd(arg_directory); | |
632 | free(arg_directory); | |
633 | arg_directory = p; | |
634 | } else | |
635 | arg_directory = get_current_dir_name(); | |
636 | ||
637 | if (!arg_directory) { | |
638 | log_error("Failed to determine path"); | |
639 | goto finish; | |
640 | } | |
641 | ||
642 | path_kill_slashes(arg_directory); | |
643 | ||
644 | if (geteuid() != 0) { | |
645 | log_error("Need to be root."); | |
646 | goto finish; | |
647 | } | |
648 | ||
04d391da LP |
649 | if (sd_booted() <= 0) { |
650 | log_error("Not running on a systemd system."); | |
651 | goto finish; | |
652 | } | |
653 | ||
88213476 | 654 | if (path_equal(arg_directory, "/")) { |
6df6b939 | 655 | log_error("Spawning container on root directory not supported."); |
88213476 LP |
656 | goto finish; |
657 | } | |
658 | ||
659 | if (is_os_tree(arg_directory) <= 0) { | |
660 | log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory); | |
661 | goto finish; | |
662 | } | |
663 | ||
04d391da LP |
664 | if ((k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg)) < 0) { |
665 | log_error("Failed to determine current cgroup: %s", strerror(-k)); | |
666 | goto finish; | |
667 | } | |
668 | ||
669 | if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) { | |
670 | log_error("Failed to allocate cgroup path."); | |
671 | goto finish; | |
672 | } | |
673 | ||
674 | if ((k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0)) < 0) { | |
675 | log_error("Failed to create cgroup: %s", strerror(-k)); | |
676 | goto finish; | |
677 | } | |
678 | ||
a258bf26 LP |
679 | if ((master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY)) < 0) { |
680 | log_error("Failed to acquire pseudo tty: %m"); | |
681 | goto finish; | |
682 | } | |
683 | ||
684 | if (!(console = ptsname(master))) { | |
685 | log_error("Failed to determine tty name: %m"); | |
686 | goto finish; | |
687 | } | |
688 | ||
689 | log_info("Spawning namespace container on %s (console is %s).", arg_directory, console); | |
690 | ||
691 | if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0) | |
692 | ioctl(master, TIOCSWINSZ, &ws); | |
693 | ||
694 | if (unlockpt(master) < 0) { | |
695 | log_error("Failed to unlock tty: %m"); | |
696 | goto finish; | |
697 | } | |
698 | ||
699 | if (tcgetattr(STDIN_FILENO, &saved_attr) < 0) { | |
700 | log_error("Failed to get terminal attributes: %m"); | |
701 | goto finish; | |
702 | } | |
703 | ||
704 | saved_attr_valid = true; | |
705 | ||
706 | raw_attr = saved_attr; | |
707 | cfmakeraw(&raw_attr); | |
708 | raw_attr.c_lflag &= ~ECHO; | |
709 | ||
710 | if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) { | |
711 | log_error("Failed to set terminal attributes: %m"); | |
712 | goto finish; | |
713 | } | |
714 | ||
715 | assert_se(sigemptyset(&mask) == 0); | |
716 | sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1); | |
717 | assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0); | |
718 | ||
52af2106 LP |
719 | pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL); |
720 | if (pid < 0) { | |
721 | if (errno == EINVAL) | |
722 | log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m"); | |
723 | else | |
724 | log_error("clone() failed: %m"); | |
725 | ||
88213476 LP |
726 | goto finish; |
727 | } | |
728 | ||
729 | if (pid == 0) { | |
a258bf26 LP |
730 | /* child */ |
731 | ||
88213476 | 732 | const char *hn; |
687d0825 MV |
733 | const char *home = NULL; |
734 | uid_t uid = (uid_t) -1; | |
735 | gid_t gid = (gid_t) -1; | |
da5b3bad | 736 | const char *envp[] = { |
da5b3bad | 737 | "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", |
3bb1c6b0 | 738 | "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */ |
687d0825 MV |
739 | NULL, /* TERM */ |
740 | NULL, /* HOME */ | |
741 | NULL, /* USER */ | |
742 | NULL, /* LOGNAME */ | |
da5b3bad LP |
743 | NULL |
744 | }; | |
88213476 | 745 | |
3bb1c6b0 | 746 | envp[2] = strv_find_prefix(environ, "TERM="); |
a258bf26 LP |
747 | |
748 | close_nointr_nofail(master); | |
749 | ||
750 | close_nointr(STDIN_FILENO); | |
751 | close_nointr(STDOUT_FILENO); | |
752 | close_nointr(STDERR_FILENO); | |
753 | ||
754 | close_all_fds(NULL, 0); | |
755 | ||
756 | reset_all_signal_handlers(); | |
757 | ||
758 | assert_se(sigemptyset(&mask) == 0); | |
759 | assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); | |
760 | ||
761 | if (setsid() < 0) | |
762 | goto child_fail; | |
763 | ||
764 | if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) | |
765 | goto child_fail; | |
88213476 | 766 | |
f5c1b9ee LP |
767 | /* Mark / as private, in case somebody marked it shared */ |
768 | if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) | |
769 | goto child_fail; | |
770 | ||
88213476 LP |
771 | if (mount_all(arg_directory) < 0) |
772 | goto child_fail; | |
773 | ||
a258bf26 | 774 | if (copy_devnodes(arg_directory, console) < 0) |
88213476 LP |
775 | goto child_fail; |
776 | ||
777 | if (chdir(arg_directory) < 0) { | |
778 | log_error("chdir(%s) failed: %m", arg_directory); | |
779 | goto child_fail; | |
780 | } | |
a258bf26 LP |
781 | |
782 | if (open_terminal("dev/console", O_RDWR) != STDIN_FILENO || | |
783 | dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO || | |
784 | dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) | |
785 | goto child_fail; | |
786 | ||
88213476 LP |
787 | if (mount(arg_directory, "/", "bind", MS_BIND|MS_MOVE, NULL) < 0) { |
788 | log_error("mount(MS_MOVE) failed: %m"); | |
789 | goto child_fail; | |
790 | } | |
791 | ||
792 | if (chroot(".") < 0) { | |
793 | log_error("chroot() failed: %m"); | |
794 | goto child_fail; | |
795 | } | |
796 | ||
797 | if (chdir("/") < 0) { | |
798 | log_error("chdir() failed: %m"); | |
799 | goto child_fail; | |
800 | } | |
801 | ||
4c12626c | 802 | umask(0022); |
a258bf26 | 803 | |
a41fe3a2 LP |
804 | loopback_setup(); |
805 | ||
88213476 LP |
806 | if (drop_capabilities() < 0) |
807 | goto child_fail; | |
808 | ||
687d0825 MV |
809 | if (arg_user) { |
810 | ||
811 | if (get_user_creds((const char**)&arg_user, &uid, &gid, &home) < 0) { | |
812 | log_error("get_user_creds() failed: %m"); | |
813 | goto child_fail; | |
814 | } | |
815 | ||
816 | if (mkdir_parents(home, 0775) < 0) { | |
817 | log_error("mkdir_parents() failed: %m"); | |
818 | goto child_fail; | |
819 | } | |
820 | ||
821 | if (safe_mkdir(home, 0775, uid, gid) < 0) { | |
822 | log_error("safe_mkdir() failed: %m"); | |
823 | goto child_fail; | |
824 | } | |
825 | ||
826 | if (initgroups((const char*)arg_user, gid) < 0) { | |
827 | log_error("initgroups() failed: %m"); | |
828 | goto child_fail; | |
829 | } | |
830 | ||
5c94603d | 831 | if (setresgid(gid, gid, gid) < 0) { |
687d0825 MV |
832 | log_error("setregid() failed: %m"); |
833 | goto child_fail; | |
834 | } | |
835 | ||
5c94603d | 836 | if (setresuid(uid, uid, uid) < 0) { |
687d0825 MV |
837 | log_error("setreuid() failed: %m"); |
838 | goto child_fail; | |
839 | } | |
840 | } | |
841 | ||
3bb1c6b0 LP |
842 | if ((asprintf((char**)(envp + 3), "HOME=%s", home? home: "/root") < 0) || |
843 | (asprintf((char**)(envp + 4), "USER=%s", arg_user? arg_user : "root") < 0) || | |
844 | (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user? arg_user : "root") < 0)) { | |
5c94603d | 845 | log_error("Out of memory"); |
687d0825 MV |
846 | goto child_fail; |
847 | } | |
848 | ||
88213476 LP |
849 | if ((hn = file_name_from_path(arg_directory))) |
850 | sethostname(hn, strlen(hn)); | |
851 | ||
852 | if (argc > optind) | |
da5b3bad LP |
853 | execvpe(argv[optind], argv + optind, (char**) envp); |
854 | else { | |
5c94603d | 855 | chdir(home ? home : "/root"); |
da5b3bad LP |
856 | execle("/bin/bash", "-bash", NULL, (char**) envp); |
857 | } | |
88213476 LP |
858 | |
859 | log_error("execv() failed: %m"); | |
860 | ||
861 | child_fail: | |
862 | _exit(EXIT_FAILURE); | |
863 | } | |
864 | ||
a258bf26 LP |
865 | if (process_pty(master, &mask) < 0) |
866 | goto finish; | |
867 | ||
868 | if (saved_attr_valid) { | |
869 | tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr); | |
870 | saved_attr_valid = false; | |
871 | } | |
872 | ||
6df6b939 | 873 | r = wait_for_terminate_and_warn(argc > optind ? argv[optind] : "bash", pid); |
88213476 LP |
874 | |
875 | if (r < 0) | |
876 | r = EXIT_FAILURE; | |
877 | ||
878 | finish: | |
a258bf26 LP |
879 | if (saved_attr_valid) |
880 | tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr); | |
881 | ||
882 | if (master >= 0) | |
883 | close_nointr_nofail(master); | |
884 | ||
04d391da LP |
885 | if (oldcg) |
886 | cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0); | |
887 | ||
888 | if (newcg) | |
889 | cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true); | |
88213476 | 890 | |
04d391da LP |
891 | free(arg_directory); |
892 | free(oldcg); | |
893 | free(newcg); | |
88213476 LP |
894 | |
895 | return r; | |
896 | } |