]>
Commit | Line | Data |
---|---|---|
88213476 LP |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2010 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
20 | ***/ | |
21 | ||
22 | #include <signal.h> | |
23 | #include <sched.h> | |
24 | #include <unistd.h> | |
25 | #include <sys/types.h> | |
26 | #include <sys/syscall.h> | |
27 | #include <sys/mount.h> | |
28 | #include <sys/wait.h> | |
29 | #include <stdlib.h> | |
30 | #include <string.h> | |
31 | #include <stdio.h> | |
32 | #include <errno.h> | |
33 | #include <sys/prctl.h> | |
34 | #include <sys/capability.h> | |
35 | #include <getopt.h> | |
a258bf26 LP |
36 | #include <sys/epoll.h> |
37 | #include <termios.h> | |
38 | #include <sys/signalfd.h> | |
687d0825 | 39 | #include <grp.h> |
88213476 LP |
40 | |
41 | #include "log.h" | |
42 | #include "util.h" | |
94d82985 | 43 | #include "missing.h" |
04d391da LP |
44 | #include "cgroup-util.h" |
45 | #include "sd-daemon.h" | |
a258bf26 | 46 | #include "strv.h" |
88213476 LP |
47 | |
48 | static char *arg_directory = NULL; | |
687d0825 | 49 | static char *arg_user = NULL; |
88213476 LP |
50 | |
51 | static int help(void) { | |
52 | ||
53 | printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" | |
54 | "Spawn a minimal namespace container for debugging, testing and building.\n\n" | |
55 | " -h --help Show this help\n" | |
687d0825 MV |
56 | " -D --directory=NAME Root directory for the container\n" |
57 | " -u --user=USER Run the command under specified user or uid\n", | |
88213476 LP |
58 | program_invocation_short_name); |
59 | ||
60 | return 0; | |
61 | } | |
62 | ||
63 | static int parse_argv(int argc, char *argv[]) { | |
64 | ||
65 | static const struct option options[] = { | |
66 | { "help", no_argument, NULL, 'h' }, | |
67 | { "directory", required_argument, NULL, 'D' }, | |
687d0825 | 68 | { "user", optional_argument, NULL, 'u' }, |
88213476 LP |
69 | { NULL, 0, NULL, 0 } |
70 | }; | |
71 | ||
72 | int c; | |
73 | ||
74 | assert(argc >= 0); | |
75 | assert(argv); | |
76 | ||
687d0825 | 77 | while ((c = getopt_long(argc, argv, "+hD:u:", options, NULL)) >= 0) { |
88213476 LP |
78 | |
79 | switch (c) { | |
80 | ||
81 | case 'h': | |
82 | help(); | |
83 | return 0; | |
84 | ||
85 | case 'D': | |
86 | free(arg_directory); | |
87 | if (!(arg_directory = strdup(optarg))) { | |
88 | log_error("Failed to duplicate root directory."); | |
89 | return -ENOMEM; | |
90 | } | |
91 | ||
92 | break; | |
93 | ||
687d0825 MV |
94 | case 'u': |
95 | free(arg_user); | |
96 | if (!(arg_user = strdup(optarg))) { | |
97 | log_error("Failed to duplicate user name."); | |
98 | return -ENOMEM; | |
99 | } | |
100 | ||
101 | break; | |
102 | ||
88213476 LP |
103 | case '?': |
104 | return -EINVAL; | |
105 | ||
106 | default: | |
107 | log_error("Unknown option code %c", c); | |
108 | return -EINVAL; | |
109 | } | |
110 | } | |
111 | ||
112 | return 1; | |
113 | } | |
114 | ||
115 | static int mount_all(const char *dest) { | |
116 | ||
117 | typedef struct MountPoint { | |
118 | const char *what; | |
119 | const char *where; | |
120 | const char *type; | |
121 | const char *options; | |
122 | unsigned long flags; | |
3bd66c05 | 123 | bool fatal; |
88213476 LP |
124 | } MountPoint; |
125 | ||
126 | static const MountPoint mount_table[] = { | |
4b7a6af4 | 127 | { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true }, |
576a01c8 LP |
128 | { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */ |
129 | { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ | |
130 | { "/sys", "/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */ | |
131 | { "/sys", "/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ | |
132 | { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID, true }, | |
133 | { "/dev/pts", "/dev/pts", "bind", NULL, MS_BIND, true }, | |
134 | { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV, true }, | |
9b634ea5 | 135 | #ifdef HAVE_SELINUX |
6b2bf923 LP |
136 | { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND, false }, /* Bind mount first */ |
137 | { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */ | |
9b634ea5 | 138 | #endif |
88213476 LP |
139 | }; |
140 | ||
141 | unsigned k; | |
142 | int r = 0; | |
715ac17a | 143 | char *where; |
88213476 LP |
144 | |
145 | for (k = 0; k < ELEMENTSOF(mount_table); k++) { | |
88213476 LP |
146 | int t; |
147 | ||
148 | if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) { | |
149 | log_error("Out of memory"); | |
150 | ||
151 | if (r == 0) | |
152 | r = -ENOMEM; | |
153 | ||
154 | break; | |
155 | } | |
156 | ||
157 | if ((t = path_is_mount_point(where)) < 0) { | |
158 | log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t)); | |
159 | free(where); | |
160 | ||
161 | if (r == 0) | |
162 | r = t; | |
163 | ||
164 | continue; | |
165 | } | |
166 | ||
167 | mkdir_p(where, 0755); | |
168 | ||
169 | if (mount(mount_table[k].what, | |
170 | where, | |
171 | mount_table[k].type, | |
172 | mount_table[k].flags, | |
3bd66c05 LP |
173 | mount_table[k].options) < 0 && |
174 | mount_table[k].fatal) { | |
88213476 LP |
175 | |
176 | log_error("mount(%s) failed: %m", where); | |
177 | ||
178 | if (r == 0) | |
179 | r = -errno; | |
180 | } | |
181 | ||
182 | free(where); | |
183 | } | |
184 | ||
715ac17a LP |
185 | /* Fix the timezone, if possible */ |
186 | if (asprintf(&where, "%s/%s", dest, "/etc/localtime") >= 0) { | |
187 | mount("/etc/localtime", where, "bind", MS_BIND, NULL); | |
188 | mount("/etc/localtime", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); | |
189 | free(where); | |
190 | } | |
191 | ||
88213476 LP |
192 | return r; |
193 | } | |
194 | ||
a258bf26 | 195 | static int copy_devnodes(const char *dest, const char *console) { |
88213476 LP |
196 | |
197 | static const char devnodes[] = | |
198 | "null\0" | |
199 | "zero\0" | |
200 | "full\0" | |
201 | "random\0" | |
202 | "urandom\0" | |
203 | "tty\0" | |
204 | "ptmx\0" | |
205 | "kmsg\0" | |
206 | "rtc0\0"; | |
207 | ||
208 | const char *d; | |
209 | int r = 0, k; | |
124640f1 | 210 | mode_t u; |
a258bf26 LP |
211 | struct stat st; |
212 | char *from = NULL, *to = NULL; | |
213 | ||
214 | assert(dest); | |
215 | assert(console); | |
124640f1 LP |
216 | |
217 | u = umask(0000); | |
88213476 LP |
218 | |
219 | NULSTR_FOREACH(d, devnodes) { | |
a258bf26 | 220 | from = to = NULL; |
88213476 LP |
221 | |
222 | asprintf(&from, "/dev/%s", d); | |
223 | asprintf(&to, "%s/dev/%s", dest, d); | |
224 | ||
225 | if (!from || !to) { | |
226 | log_error("Failed to allocate devnode path"); | |
227 | ||
228 | free(from); | |
229 | free(to); | |
230 | ||
a258bf26 LP |
231 | from = to = NULL; |
232 | ||
88213476 LP |
233 | if (r == 0) |
234 | r = -ENOMEM; | |
235 | ||
236 | break; | |
237 | } | |
238 | ||
239 | if (stat(from, &st) < 0) { | |
240 | ||
241 | if (errno != ENOENT) { | |
242 | log_error("Failed to stat %s: %m", from); | |
88213476 LP |
243 | if (r == 0) |
244 | r = -errno; | |
245 | } | |
246 | ||
a258bf26 | 247 | } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { |
88213476 | 248 | |
a258bf26 LP |
249 | log_error("%s is not a char or block device, cannot copy.", from); |
250 | if (r == 0) | |
251 | r = -EIO; | |
252 | ||
253 | } else if (mknod(to, st.st_mode, st.st_rdev) < 0) { | |
254 | ||
255 | log_error("mknod(%s) failed: %m", dest); | |
256 | if (r == 0) | |
257 | r = -errno; | |
88213476 LP |
258 | } |
259 | ||
260 | free(from); | |
261 | free(to); | |
262 | } | |
263 | ||
a258bf26 | 264 | if (stat(console, &st) < 0) { |
88213476 | 265 | |
a258bf26 | 266 | log_error("Failed to stat %s: %m", console); |
88213476 | 267 | if (r == 0) |
a258bf26 | 268 | r = -errno; |
88213476 | 269 | |
a258bf26 | 270 | goto finish; |
88213476 | 271 | |
a258bf26 | 272 | } else if (!S_ISCHR(st.st_mode)) { |
88213476 | 273 | |
a258bf26 LP |
274 | log_error("/dev/console is not a char device."); |
275 | if (r == 0) | |
276 | r = -EIO; | |
88213476 | 277 | |
a258bf26 LP |
278 | goto finish; |
279 | } | |
88213476 | 280 | |
a258bf26 | 281 | if (asprintf(&to, "%s/dev/console", dest) < 0) { |
88213476 | 282 | |
a258bf26 LP |
283 | log_error("Out of memory"); |
284 | if (r == 0) | |
285 | r = -ENOMEM; | |
88213476 | 286 | |
a258bf26 | 287 | goto finish; |
88213476 LP |
288 | } |
289 | ||
a258bf26 LP |
290 | /* We need to bind mount the right tty to /dev/console since |
291 | * ptys can only exist on pts file systems. To have something | |
292 | * to bind mount things on we create a device node first, that | |
293 | * has the right major/minor (note that the major minor | |
294 | * doesn't actually matter here, since we mount it over | |
295 | * anyway). */ | |
296 | ||
297 | if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) | |
298 | log_error("mknod for /dev/console failed: %m"); | |
299 | ||
300 | if (mount(console, to, "bind", MS_BIND, NULL) < 0) { | |
301 | log_error("bind mount for /dev/console failed: %m"); | |
302 | ||
303 | if (r == 0) | |
304 | r = -errno; | |
305 | } | |
306 | ||
307 | free(to); | |
308 | ||
309 | if ((k = chmod_and_chown(console, 0600, 0, 0)) < 0) { | |
310 | log_error("Failed to correct access mode for TTY: %s", strerror(-k)); | |
311 | ||
312 | if (r == 0) | |
313 | r = k; | |
314 | } | |
315 | ||
316 | finish: | |
124640f1 LP |
317 | umask(u); |
318 | ||
88213476 LP |
319 | return r; |
320 | } | |
321 | ||
322 | static int drop_capabilities(void) { | |
323 | static const unsigned long retain[] = { | |
324 | CAP_CHOWN, | |
325 | CAP_DAC_OVERRIDE, | |
326 | CAP_DAC_READ_SEARCH, | |
327 | CAP_FOWNER, | |
328 | CAP_FSETID, | |
329 | CAP_IPC_OWNER, | |
330 | CAP_KILL, | |
331 | CAP_LEASE, | |
332 | CAP_LINUX_IMMUTABLE, | |
333 | CAP_NET_BIND_SERVICE, | |
334 | CAP_NET_BROADCAST, | |
335 | CAP_NET_RAW, | |
336 | CAP_SETGID, | |
337 | CAP_SETFCAP, | |
338 | CAP_SETPCAP, | |
339 | CAP_SETUID, | |
340 | CAP_SYS_ADMIN, | |
341 | CAP_SYS_CHROOT, | |
342 | CAP_SYS_NICE, | |
343 | CAP_SYS_PTRACE, | |
344 | CAP_SYS_TTY_CONFIG | |
345 | }; | |
346 | ||
347 | unsigned long l; | |
348 | ||
ae556c21 | 349 | for (l = 0; l <= MAX(63LU, (unsigned long) CAP_LAST_CAP); l++) { |
88213476 LP |
350 | unsigned i; |
351 | ||
352 | for (i = 0; i < ELEMENTSOF(retain); i++) | |
353 | if (retain[i] == l) | |
354 | break; | |
355 | ||
356 | if (i < ELEMENTSOF(retain)) | |
357 | continue; | |
358 | ||
359 | if (prctl(PR_CAPBSET_DROP, l) < 0) { | |
360 | ||
361 | /* If this capability is not known, EINVAL | |
362 | * will be returned, let's ignore this. */ | |
363 | if (errno == EINVAL) | |
ae556c21 | 364 | break; |
88213476 LP |
365 | |
366 | log_error("PR_CAPBSET_DROP failed: %m"); | |
367 | return -errno; | |
368 | } | |
369 | } | |
370 | ||
371 | return 0; | |
372 | } | |
373 | ||
374 | static int is_os_tree(const char *path) { | |
375 | int r; | |
376 | char *p; | |
377 | /* We use /bin/sh as flag file if something is an OS */ | |
378 | ||
379 | if (asprintf(&p, "%s/bin/sh", path) < 0) | |
380 | return -ENOMEM; | |
381 | ||
382 | r = access(p, F_OK); | |
383 | free(p); | |
384 | ||
385 | return r < 0 ? 0 : 1; | |
386 | } | |
387 | ||
a258bf26 LP |
388 | #define BUFFER_SIZE 1024 |
389 | ||
390 | static int process_pty(int master, sigset_t *mask) { | |
0c749d50 | 391 | |
a258bf26 LP |
392 | char in_buffer[BUFFER_SIZE], out_buffer[BUFFER_SIZE]; |
393 | size_t in_buffer_full = 0, out_buffer_full = 0; | |
394 | struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev; | |
395 | bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false; | |
a258bf26 LP |
396 | int ep = -1, signal_fd = -1, r; |
397 | ||
398 | fd_nonblock(STDIN_FILENO, 1); | |
399 | fd_nonblock(STDOUT_FILENO, 1); | |
400 | fd_nonblock(master, 1); | |
401 | ||
402 | if ((signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0) { | |
403 | log_error("signalfd(): %m"); | |
404 | r = -errno; | |
405 | goto finish; | |
406 | } | |
407 | ||
408 | if ((ep = epoll_create1(EPOLL_CLOEXEC)) < 0) { | |
409 | log_error("Failed to create epoll: %m"); | |
410 | r = -errno; | |
411 | goto finish; | |
412 | } | |
413 | ||
414 | zero(stdin_ev); | |
415 | stdin_ev.events = EPOLLIN|EPOLLET; | |
416 | stdin_ev.data.fd = STDIN_FILENO; | |
417 | ||
418 | zero(stdout_ev); | |
419 | stdout_ev.events = EPOLLOUT|EPOLLET; | |
420 | stdout_ev.data.fd = STDOUT_FILENO; | |
421 | ||
422 | zero(master_ev); | |
423 | master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET; | |
424 | master_ev.data.fd = master; | |
425 | ||
426 | zero(signal_ev); | |
427 | signal_ev.events = EPOLLIN; | |
428 | signal_ev.data.fd = signal_fd; | |
429 | ||
430 | if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0 || | |
431 | epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 || | |
432 | epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 || | |
433 | epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) { | |
434 | log_error("Failed to regiser fds in epoll: %m"); | |
435 | r = -errno; | |
436 | goto finish; | |
437 | } | |
438 | ||
fd14078a | 439 | for (;;) { |
a258bf26 LP |
440 | struct epoll_event ev[16]; |
441 | ssize_t k; | |
442 | int i, nfds; | |
443 | ||
444 | if ((nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1)) < 0) { | |
445 | ||
446 | if (errno == EINTR || errno == EAGAIN) | |
447 | continue; | |
448 | ||
449 | log_error("epoll_wait(): %m"); | |
450 | r = -errno; | |
451 | goto finish; | |
452 | } | |
453 | ||
454 | assert(nfds >= 1); | |
455 | ||
456 | for (i = 0; i < nfds; i++) { | |
457 | if (ev[i].data.fd == STDIN_FILENO) { | |
458 | ||
fd14078a | 459 | if (ev[i].events & (EPOLLIN|EPOLLHUP)) |
a258bf26 LP |
460 | stdin_readable = true; |
461 | ||
462 | } else if (ev[i].data.fd == STDOUT_FILENO) { | |
463 | ||
fd14078a | 464 | if (ev[i].events & (EPOLLOUT|EPOLLHUP)) |
a258bf26 LP |
465 | stdout_writable = true; |
466 | ||
467 | } else if (ev[i].data.fd == master) { | |
468 | ||
fd14078a | 469 | if (ev[i].events & (EPOLLIN|EPOLLHUP)) |
a258bf26 LP |
470 | master_readable = true; |
471 | ||
fd14078a | 472 | if (ev[i].events & (EPOLLOUT|EPOLLHUP)) |
a258bf26 LP |
473 | master_writable = true; |
474 | ||
475 | } else if (ev[i].data.fd == signal_fd) { | |
476 | struct signalfd_siginfo sfsi; | |
477 | ssize_t n; | |
478 | ||
479 | if ((n = read(signal_fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) { | |
480 | ||
481 | if (n >= 0) { | |
0c749d50 | 482 | log_error("Failed to read from signalfd: invalid block size"); |
a258bf26 LP |
483 | r = -EIO; |
484 | goto finish; | |
485 | } | |
486 | ||
487 | if (errno != EINTR && errno != EAGAIN) { | |
0c749d50 | 488 | log_error("Failed to read from signalfd: %m"); |
a258bf26 LP |
489 | r = -errno; |
490 | goto finish; | |
491 | } | |
492 | } else { | |
493 | ||
494 | if (sfsi.ssi_signo == SIGWINCH) { | |
495 | struct winsize ws; | |
496 | ||
497 | /* The window size changed, let's forward that. */ | |
a258bf26 LP |
498 | if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0) |
499 | ioctl(master, TIOCSWINSZ, &ws); | |
500 | } else { | |
0c749d50 | 501 | r = 0; |
a258bf26 LP |
502 | goto finish; |
503 | } | |
504 | } | |
505 | } | |
506 | } | |
507 | ||
508 | while ((stdin_readable && in_buffer_full <= 0) || | |
509 | (master_writable && in_buffer_full > 0) || | |
510 | (master_readable && out_buffer_full <= 0) || | |
511 | (stdout_writable && out_buffer_full > 0)) { | |
512 | ||
513 | if (stdin_readable && in_buffer_full < BUFFER_SIZE) { | |
514 | ||
515 | if ((k = read(STDIN_FILENO, in_buffer + in_buffer_full, BUFFER_SIZE - in_buffer_full)) < 0) { | |
516 | ||
fd14078a | 517 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 518 | stdin_readable = false; |
a258bf26 LP |
519 | else { |
520 | log_error("read(): %m"); | |
0c749d50 | 521 | r = -errno; |
a258bf26 LP |
522 | goto finish; |
523 | } | |
524 | } else | |
525 | in_buffer_full += (size_t) k; | |
a258bf26 LP |
526 | } |
527 | ||
528 | if (master_writable && in_buffer_full > 0) { | |
529 | ||
530 | if ((k = write(master, in_buffer, in_buffer_full)) < 0) { | |
531 | ||
fd14078a | 532 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 533 | master_writable = false; |
fd14078a | 534 | else { |
a258bf26 | 535 | log_error("write(): %m"); |
0c749d50 | 536 | r = -errno; |
a258bf26 LP |
537 | goto finish; |
538 | } | |
539 | ||
540 | } else { | |
541 | assert(in_buffer_full >= (size_t) k); | |
542 | memmove(in_buffer, in_buffer + k, in_buffer_full - k); | |
543 | in_buffer_full -= k; | |
544 | } | |
545 | } | |
546 | ||
547 | if (master_readable && out_buffer_full < BUFFER_SIZE) { | |
548 | ||
549 | if ((k = read(master, out_buffer + out_buffer_full, BUFFER_SIZE - out_buffer_full)) < 0) { | |
550 | ||
fd14078a | 551 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 552 | master_readable = false; |
a258bf26 LP |
553 | else { |
554 | log_error("read(): %m"); | |
0c749d50 | 555 | r = -errno; |
a258bf26 LP |
556 | goto finish; |
557 | } | |
558 | } else | |
559 | out_buffer_full += (size_t) k; | |
a258bf26 LP |
560 | } |
561 | ||
562 | if (stdout_writable && out_buffer_full > 0) { | |
563 | ||
564 | if ((k = write(STDOUT_FILENO, out_buffer, out_buffer_full)) < 0) { | |
565 | ||
fd14078a | 566 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 567 | stdout_writable = false; |
fd14078a | 568 | else { |
a258bf26 | 569 | log_error("write(): %m"); |
0c749d50 | 570 | r = -errno; |
a258bf26 LP |
571 | goto finish; |
572 | } | |
573 | ||
574 | } else { | |
575 | assert(out_buffer_full >= (size_t) k); | |
576 | memmove(out_buffer, out_buffer + k, out_buffer_full - k); | |
577 | out_buffer_full -= k; | |
578 | } | |
579 | } | |
580 | } | |
fd14078a | 581 | } |
a258bf26 LP |
582 | |
583 | finish: | |
584 | if (ep >= 0) | |
585 | close_nointr_nofail(ep); | |
586 | ||
587 | if (signal_fd >= 0) | |
588 | close_nointr_nofail(signal_fd); | |
589 | ||
590 | return r; | |
591 | } | |
88213476 LP |
592 | |
593 | int main(int argc, char *argv[]) { | |
594 | pid_t pid = 0; | |
04d391da LP |
595 | int r = EXIT_FAILURE, k; |
596 | char *oldcg = NULL, *newcg = NULL; | |
a258bf26 LP |
597 | int master = -1; |
598 | const char *console = NULL; | |
599 | struct termios saved_attr, raw_attr; | |
600 | sigset_t mask; | |
601 | bool saved_attr_valid = false; | |
602 | struct winsize ws; | |
88213476 LP |
603 | |
604 | log_parse_environment(); | |
605 | log_open(); | |
606 | ||
607 | if ((r = parse_argv(argc, argv)) <= 0) | |
608 | goto finish; | |
609 | ||
610 | if (arg_directory) { | |
611 | char *p; | |
612 | ||
613 | p = path_make_absolute_cwd(arg_directory); | |
614 | free(arg_directory); | |
615 | arg_directory = p; | |
616 | } else | |
617 | arg_directory = get_current_dir_name(); | |
618 | ||
619 | if (!arg_directory) { | |
620 | log_error("Failed to determine path"); | |
621 | goto finish; | |
622 | } | |
623 | ||
624 | path_kill_slashes(arg_directory); | |
625 | ||
626 | if (geteuid() != 0) { | |
627 | log_error("Need to be root."); | |
628 | goto finish; | |
629 | } | |
630 | ||
04d391da LP |
631 | if (sd_booted() <= 0) { |
632 | log_error("Not running on a systemd system."); | |
633 | goto finish; | |
634 | } | |
635 | ||
88213476 | 636 | if (path_equal(arg_directory, "/")) { |
6df6b939 | 637 | log_error("Spawning container on root directory not supported."); |
88213476 LP |
638 | goto finish; |
639 | } | |
640 | ||
641 | if (is_os_tree(arg_directory) <= 0) { | |
642 | log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory); | |
643 | goto finish; | |
644 | } | |
645 | ||
04d391da LP |
646 | if ((k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg)) < 0) { |
647 | log_error("Failed to determine current cgroup: %s", strerror(-k)); | |
648 | goto finish; | |
649 | } | |
650 | ||
651 | if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) { | |
652 | log_error("Failed to allocate cgroup path."); | |
653 | goto finish; | |
654 | } | |
655 | ||
656 | if ((k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0)) < 0) { | |
657 | log_error("Failed to create cgroup: %s", strerror(-k)); | |
658 | goto finish; | |
659 | } | |
660 | ||
a258bf26 LP |
661 | if ((master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY)) < 0) { |
662 | log_error("Failed to acquire pseudo tty: %m"); | |
663 | goto finish; | |
664 | } | |
665 | ||
666 | if (!(console = ptsname(master))) { | |
667 | log_error("Failed to determine tty name: %m"); | |
668 | goto finish; | |
669 | } | |
670 | ||
671 | log_info("Spawning namespace container on %s (console is %s).", arg_directory, console); | |
672 | ||
673 | if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0) | |
674 | ioctl(master, TIOCSWINSZ, &ws); | |
675 | ||
676 | if (unlockpt(master) < 0) { | |
677 | log_error("Failed to unlock tty: %m"); | |
678 | goto finish; | |
679 | } | |
680 | ||
681 | if (tcgetattr(STDIN_FILENO, &saved_attr) < 0) { | |
682 | log_error("Failed to get terminal attributes: %m"); | |
683 | goto finish; | |
684 | } | |
685 | ||
686 | saved_attr_valid = true; | |
687 | ||
688 | raw_attr = saved_attr; | |
689 | cfmakeraw(&raw_attr); | |
690 | raw_attr.c_lflag &= ~ECHO; | |
691 | ||
692 | if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) { | |
693 | log_error("Failed to set terminal attributes: %m"); | |
694 | goto finish; | |
695 | } | |
696 | ||
697 | assert_se(sigemptyset(&mask) == 0); | |
698 | sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1); | |
699 | assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0); | |
700 | ||
64af1b62 | 701 | if ((pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS, NULL)) < 0) { |
88213476 LP |
702 | log_error("clone() failed: %m"); |
703 | goto finish; | |
704 | } | |
705 | ||
706 | if (pid == 0) { | |
a258bf26 LP |
707 | /* child */ |
708 | ||
88213476 | 709 | const char *hn; |
687d0825 MV |
710 | const char *home = NULL; |
711 | uid_t uid = (uid_t) -1; | |
712 | gid_t gid = (gid_t) -1; | |
da5b3bad | 713 | const char *envp[] = { |
da5b3bad | 714 | "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", |
687d0825 MV |
715 | NULL, /* TERM */ |
716 | NULL, /* HOME */ | |
717 | NULL, /* USER */ | |
718 | NULL, /* LOGNAME */ | |
da5b3bad LP |
719 | NULL |
720 | }; | |
88213476 | 721 | |
687d0825 | 722 | envp[1] = strv_find_prefix(environ, "TERM="); |
a258bf26 LP |
723 | |
724 | close_nointr_nofail(master); | |
725 | ||
726 | close_nointr(STDIN_FILENO); | |
727 | close_nointr(STDOUT_FILENO); | |
728 | close_nointr(STDERR_FILENO); | |
729 | ||
730 | close_all_fds(NULL, 0); | |
731 | ||
732 | reset_all_signal_handlers(); | |
733 | ||
734 | assert_se(sigemptyset(&mask) == 0); | |
735 | assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); | |
736 | ||
737 | if (setsid() < 0) | |
738 | goto child_fail; | |
739 | ||
740 | if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) | |
741 | goto child_fail; | |
88213476 | 742 | |
f5c1b9ee LP |
743 | /* Mark / as private, in case somebody marked it shared */ |
744 | if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) | |
745 | goto child_fail; | |
746 | ||
88213476 LP |
747 | if (mount_all(arg_directory) < 0) |
748 | goto child_fail; | |
749 | ||
a258bf26 | 750 | if (copy_devnodes(arg_directory, console) < 0) |
88213476 LP |
751 | goto child_fail; |
752 | ||
753 | if (chdir(arg_directory) < 0) { | |
754 | log_error("chdir(%s) failed: %m", arg_directory); | |
755 | goto child_fail; | |
756 | } | |
a258bf26 LP |
757 | |
758 | if (open_terminal("dev/console", O_RDWR) != STDIN_FILENO || | |
759 | dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO || | |
760 | dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) | |
761 | goto child_fail; | |
762 | ||
88213476 LP |
763 | if (mount(arg_directory, "/", "bind", MS_BIND|MS_MOVE, NULL) < 0) { |
764 | log_error("mount(MS_MOVE) failed: %m"); | |
765 | goto child_fail; | |
766 | } | |
767 | ||
768 | if (chroot(".") < 0) { | |
769 | log_error("chroot() failed: %m"); | |
770 | goto child_fail; | |
771 | } | |
772 | ||
773 | if (chdir("/") < 0) { | |
774 | log_error("chdir() failed: %m"); | |
775 | goto child_fail; | |
776 | } | |
777 | ||
4c12626c | 778 | umask(0022); |
a258bf26 | 779 | |
88213476 LP |
780 | if (drop_capabilities() < 0) |
781 | goto child_fail; | |
782 | ||
687d0825 MV |
783 | if (arg_user) { |
784 | ||
785 | if (get_user_creds((const char**)&arg_user, &uid, &gid, &home) < 0) { | |
786 | log_error("get_user_creds() failed: %m"); | |
787 | goto child_fail; | |
788 | } | |
789 | ||
790 | if (mkdir_parents(home, 0775) < 0) { | |
791 | log_error("mkdir_parents() failed: %m"); | |
792 | goto child_fail; | |
793 | } | |
794 | ||
795 | if (safe_mkdir(home, 0775, uid, gid) < 0) { | |
796 | log_error("safe_mkdir() failed: %m"); | |
797 | goto child_fail; | |
798 | } | |
799 | ||
800 | if (initgroups((const char*)arg_user, gid) < 0) { | |
801 | log_error("initgroups() failed: %m"); | |
802 | goto child_fail; | |
803 | } | |
804 | ||
5c94603d | 805 | if (setresgid(gid, gid, gid) < 0) { |
687d0825 MV |
806 | log_error("setregid() failed: %m"); |
807 | goto child_fail; | |
808 | } | |
809 | ||
5c94603d | 810 | if (setresuid(uid, uid, uid) < 0) { |
687d0825 MV |
811 | log_error("setreuid() failed: %m"); |
812 | goto child_fail; | |
813 | } | |
814 | } | |
815 | ||
816 | if ((asprintf((char**)(envp + 2), "HOME=%s", home? home: "/root") < 0) || | |
817 | (asprintf((char**)(envp + 3), "USER=%s", arg_user? arg_user : "root") < 0) || | |
818 | (asprintf((char**)(envp + 4), "LOGNAME=%s", arg_user? arg_user : "root") < 0)) { | |
5c94603d | 819 | log_error("Out of memory"); |
687d0825 MV |
820 | goto child_fail; |
821 | } | |
822 | ||
88213476 LP |
823 | if ((hn = file_name_from_path(arg_directory))) |
824 | sethostname(hn, strlen(hn)); | |
825 | ||
826 | if (argc > optind) | |
da5b3bad LP |
827 | execvpe(argv[optind], argv + optind, (char**) envp); |
828 | else { | |
5c94603d | 829 | chdir(home ? home : "/root"); |
da5b3bad LP |
830 | execle("/bin/bash", "-bash", NULL, (char**) envp); |
831 | } | |
88213476 LP |
832 | |
833 | log_error("execv() failed: %m"); | |
834 | ||
835 | child_fail: | |
836 | _exit(EXIT_FAILURE); | |
837 | } | |
838 | ||
a258bf26 LP |
839 | if (process_pty(master, &mask) < 0) |
840 | goto finish; | |
841 | ||
842 | if (saved_attr_valid) { | |
843 | tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr); | |
844 | saved_attr_valid = false; | |
845 | } | |
846 | ||
6df6b939 | 847 | r = wait_for_terminate_and_warn(argc > optind ? argv[optind] : "bash", pid); |
88213476 LP |
848 | |
849 | if (r < 0) | |
850 | r = EXIT_FAILURE; | |
851 | ||
852 | finish: | |
a258bf26 LP |
853 | if (saved_attr_valid) |
854 | tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr); | |
855 | ||
856 | if (master >= 0) | |
857 | close_nointr_nofail(master); | |
858 | ||
04d391da LP |
859 | if (oldcg) |
860 | cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0); | |
861 | ||
862 | if (newcg) | |
863 | cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true); | |
88213476 | 864 | |
04d391da LP |
865 | free(arg_directory); |
866 | free(oldcg); | |
867 | free(newcg); | |
88213476 LP |
868 | |
869 | return r; | |
870 | } |