]>
Commit | Line | Data |
---|---|---|
88213476 LP |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2010 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
5430f7f2 LP |
9 | under the terms of the GNU Lesser General Public License as published by |
10 | the Free Software Foundation; either version 2.1 of the License, or | |
88213476 LP |
11 | (at your option) any later version. |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
5430f7f2 | 16 | Lesser General Public License for more details. |
88213476 | 17 | |
5430f7f2 | 18 | You should have received a copy of the GNU Lesser General Public License |
88213476 LP |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. |
20 | ***/ | |
21 | ||
22 | #include <signal.h> | |
23 | #include <sched.h> | |
24 | #include <unistd.h> | |
25 | #include <sys/types.h> | |
26 | #include <sys/syscall.h> | |
27 | #include <sys/mount.h> | |
28 | #include <sys/wait.h> | |
29 | #include <stdlib.h> | |
30 | #include <string.h> | |
31 | #include <stdio.h> | |
32 | #include <errno.h> | |
33 | #include <sys/prctl.h> | |
34 | #include <sys/capability.h> | |
35 | #include <getopt.h> | |
a258bf26 LP |
36 | #include <sys/epoll.h> |
37 | #include <termios.h> | |
38 | #include <sys/signalfd.h> | |
687d0825 | 39 | #include <grp.h> |
5ed27dbd | 40 | #include <linux/fs.h> |
9537eab0 LP |
41 | #include <sys/un.h> |
42 | #include <sys/socket.h> | |
88213476 | 43 | |
81527be1 LP |
44 | #include <systemd/sd-daemon.h> |
45 | ||
88213476 LP |
46 | #include "log.h" |
47 | #include "util.h" | |
49e942b2 | 48 | #include "mkdir.h" |
d7832d2c | 49 | #include "audit.h" |
94d82985 | 50 | #include "missing.h" |
04d391da | 51 | #include "cgroup-util.h" |
a258bf26 | 52 | #include "strv.h" |
9eb977db | 53 | #include "path-util.h" |
a41fe3a2 | 54 | #include "loopback-setup.h" |
57fb9fb5 LP |
55 | #include "sd-id128.h" |
56 | ||
57 | typedef enum LinkJournal { | |
58 | LINK_NO, | |
59 | LINK_AUTO, | |
60 | LINK_HOST, | |
61 | LINK_GUEST | |
62 | } LinkJournal; | |
88213476 LP |
63 | |
64 | static char *arg_directory = NULL; | |
687d0825 | 65 | static char *arg_user = NULL; |
40c32a4a | 66 | static char **arg_controllers = NULL; |
144f0fc0 | 67 | static char *arg_uuid = NULL; |
ff01d048 | 68 | static bool arg_private_network = false; |
bc2f673e | 69 | static bool arg_read_only = false; |
0f0dbc46 | 70 | static bool arg_boot = false; |
57fb9fb5 | 71 | static LinkJournal arg_link_journal = LINK_AUTO; |
5076f0cc LP |
72 | static uint64_t arg_retain = |
73 | (1ULL << CAP_CHOWN) | | |
74 | (1ULL << CAP_DAC_OVERRIDE) | | |
75 | (1ULL << CAP_DAC_READ_SEARCH) | | |
76 | (1ULL << CAP_FOWNER) | | |
77 | (1ULL << CAP_FSETID) | | |
78 | (1ULL << CAP_IPC_OWNER) | | |
79 | (1ULL << CAP_KILL) | | |
80 | (1ULL << CAP_LEASE) | | |
81 | (1ULL << CAP_LINUX_IMMUTABLE) | | |
82 | (1ULL << CAP_NET_BIND_SERVICE) | | |
83 | (1ULL << CAP_NET_BROADCAST) | | |
84 | (1ULL << CAP_NET_RAW) | | |
85 | (1ULL << CAP_SETGID) | | |
86 | (1ULL << CAP_SETFCAP) | | |
87 | (1ULL << CAP_SETPCAP) | | |
88 | (1ULL << CAP_SETUID) | | |
89 | (1ULL << CAP_SYS_ADMIN) | | |
90 | (1ULL << CAP_SYS_CHROOT) | | |
91 | (1ULL << CAP_SYS_NICE) | | |
92 | (1ULL << CAP_SYS_PTRACE) | | |
93 | (1ULL << CAP_SYS_TTY_CONFIG) | | |
94 | (1ULL << CAP_SYS_RESOURCE); | |
88213476 LP |
95 | |
96 | static int help(void) { | |
97 | ||
98 | printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" | |
99 | "Spawn a minimal namespace container for debugging, testing and building.\n\n" | |
57fb9fb5 LP |
100 | " -h --help Show this help\n" |
101 | " -D --directory=NAME Root directory for the container\n" | |
102 | " -b --boot Boot up full system (i.e. invoke init)\n" | |
103 | " -u --user=USER Run the command under specified user or uid\n" | |
104 | " -C --controllers=LIST Put the container in specified comma-separated cgroup hierarchies\n" | |
105 | " --uuid=UUID Set a specific machine UUID for the container\n" | |
106 | " --private-network Disable network in container\n" | |
107 | " --read-only Mount the root directory read-only\n" | |
108 | " --capability=CAP In addition to the default, retain specified capability\n" | |
109 | " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n" | |
110 | " -j Equivalent to --link-journal=host\n", | |
88213476 LP |
111 | program_invocation_short_name); |
112 | ||
113 | return 0; | |
114 | } | |
115 | ||
116 | static int parse_argv(int argc, char *argv[]) { | |
117 | ||
a41fe3a2 | 118 | enum { |
144f0fc0 | 119 | ARG_PRIVATE_NETWORK = 0x100, |
bc2f673e | 120 | ARG_UUID, |
5076f0cc | 121 | ARG_READ_ONLY, |
57fb9fb5 LP |
122 | ARG_CAPABILITY, |
123 | ARG_LINK_JOURNAL | |
a41fe3a2 LP |
124 | }; |
125 | ||
88213476 | 126 | static const struct option options[] = { |
ff01d048 LP |
127 | { "help", no_argument, NULL, 'h' }, |
128 | { "directory", required_argument, NULL, 'D' }, | |
129 | { "user", required_argument, NULL, 'u' }, | |
40c32a4a | 130 | { "controllers", required_argument, NULL, 'C' }, |
ff01d048 | 131 | { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK }, |
0f0dbc46 | 132 | { "boot", no_argument, NULL, 'b' }, |
144f0fc0 | 133 | { "uuid", required_argument, NULL, ARG_UUID }, |
bc2f673e | 134 | { "read-only", no_argument, NULL, ARG_READ_ONLY }, |
5076f0cc | 135 | { "capability", required_argument, NULL, ARG_CAPABILITY }, |
57fb9fb5 | 136 | { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL }, |
ff01d048 | 137 | { NULL, 0, NULL, 0 } |
88213476 LP |
138 | }; |
139 | ||
140 | int c; | |
141 | ||
142 | assert(argc >= 0); | |
143 | assert(argv); | |
144 | ||
57fb9fb5 | 145 | while ((c = getopt_long(argc, argv, "+hD:u:C:bj", options, NULL)) >= 0) { |
88213476 LP |
146 | |
147 | switch (c) { | |
148 | ||
149 | case 'h': | |
150 | help(); | |
151 | return 0; | |
152 | ||
153 | case 'D': | |
154 | free(arg_directory); | |
3a74cea5 LP |
155 | arg_directory = canonicalize_file_name(optarg); |
156 | if (!arg_directory) { | |
157 | log_error("Failed to canonicalize root directory."); | |
88213476 LP |
158 | return -ENOMEM; |
159 | } | |
160 | ||
161 | break; | |
162 | ||
687d0825 MV |
163 | case 'u': |
164 | free(arg_user); | |
165 | if (!(arg_user = strdup(optarg))) { | |
166 | log_error("Failed to duplicate user name."); | |
167 | return -ENOMEM; | |
168 | } | |
169 | ||
170 | break; | |
171 | ||
40c32a4a LGL |
172 | case 'C': |
173 | strv_free(arg_controllers); | |
174 | arg_controllers = strv_split(optarg, ","); | |
175 | if (!arg_controllers) { | |
176 | log_error("Failed to split controllers list."); | |
177 | return -ENOMEM; | |
178 | } | |
179 | strv_uniq(arg_controllers); | |
180 | ||
181 | break; | |
182 | ||
ff01d048 LP |
183 | case ARG_PRIVATE_NETWORK: |
184 | arg_private_network = true; | |
a41fe3a2 LP |
185 | break; |
186 | ||
0f0dbc46 LP |
187 | case 'b': |
188 | arg_boot = true; | |
189 | break; | |
190 | ||
144f0fc0 LP |
191 | case ARG_UUID: |
192 | arg_uuid = optarg; | |
193 | break; | |
194 | ||
bc2f673e LP |
195 | case ARG_READ_ONLY: |
196 | arg_read_only = true; | |
197 | break; | |
198 | ||
5076f0cc LP |
199 | case ARG_CAPABILITY: { |
200 | char *state, *word; | |
201 | size_t length; | |
202 | ||
203 | FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) { | |
204 | cap_value_t cap; | |
205 | char *t; | |
206 | ||
207 | t = strndup(word, length); | |
208 | if (!t) { | |
209 | log_error("Out of memory."); | |
210 | return -ENOMEM; | |
211 | } | |
212 | ||
213 | if (cap_from_name(t, &cap) < 0) { | |
214 | log_error("Failed to parse capability %s.", t); | |
215 | free(t); | |
216 | return -EINVAL; | |
217 | } | |
218 | ||
219 | free(t); | |
220 | arg_retain |= 1ULL << (uint64_t) cap; | |
221 | } | |
222 | ||
223 | break; | |
224 | } | |
225 | ||
57fb9fb5 LP |
226 | case 'j': |
227 | arg_link_journal = LINK_GUEST; | |
228 | break; | |
229 | ||
230 | case ARG_LINK_JOURNAL: | |
231 | if (streq(optarg, "auto")) | |
232 | arg_link_journal = LINK_AUTO; | |
233 | else if (streq(optarg, "no")) | |
234 | arg_link_journal = LINK_NO; | |
235 | else if (streq(optarg, "guest")) | |
236 | arg_link_journal = LINK_GUEST; | |
237 | else if (streq(optarg, "host")) | |
238 | arg_link_journal = LINK_HOST; | |
239 | else { | |
240 | log_error("Failed to parse link journal mode %s", optarg); | |
241 | return -EINVAL; | |
242 | } | |
243 | ||
244 | break; | |
245 | ||
88213476 LP |
246 | case '?': |
247 | return -EINVAL; | |
248 | ||
249 | default: | |
250 | log_error("Unknown option code %c", c); | |
251 | return -EINVAL; | |
252 | } | |
253 | } | |
254 | ||
255 | return 1; | |
256 | } | |
257 | ||
258 | static int mount_all(const char *dest) { | |
259 | ||
260 | typedef struct MountPoint { | |
261 | const char *what; | |
262 | const char *where; | |
263 | const char *type; | |
264 | const char *options; | |
265 | unsigned long flags; | |
3bd66c05 | 266 | bool fatal; |
88213476 LP |
267 | } MountPoint; |
268 | ||
269 | static const MountPoint mount_table[] = { | |
4b7a6af4 | 270 | { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true }, |
576a01c8 LP |
271 | { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */ |
272 | { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ | |
273 | { "/sys", "/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */ | |
274 | { "/sys", "/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ | |
635f7d8c | 275 | { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true }, |
576a01c8 | 276 | { "/dev/pts", "/dev/pts", "bind", NULL, MS_BIND, true }, |
635f7d8c | 277 | { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true }, |
9b634ea5 | 278 | #ifdef HAVE_SELINUX |
6b2bf923 LP |
279 | { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND, false }, /* Bind mount first */ |
280 | { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */ | |
9b634ea5 | 281 | #endif |
88213476 LP |
282 | }; |
283 | ||
284 | unsigned k; | |
285 | int r = 0; | |
715ac17a | 286 | char *where; |
88213476 LP |
287 | |
288 | for (k = 0; k < ELEMENTSOF(mount_table); k++) { | |
88213476 LP |
289 | int t; |
290 | ||
291 | if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) { | |
669241a0 | 292 | log_error("Out of memory."); |
88213476 LP |
293 | |
294 | if (r == 0) | |
295 | r = -ENOMEM; | |
296 | ||
297 | break; | |
298 | } | |
299 | ||
68fb0892 LP |
300 | t = path_is_mount_point(where, false); |
301 | if (t < 0) { | |
88213476 LP |
302 | log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t)); |
303 | free(where); | |
304 | ||
305 | if (r == 0) | |
306 | r = t; | |
307 | ||
308 | continue; | |
309 | } | |
310 | ||
d2e54fae | 311 | mkdir_p_label(where, 0755); |
88213476 LP |
312 | |
313 | if (mount(mount_table[k].what, | |
314 | where, | |
315 | mount_table[k].type, | |
316 | mount_table[k].flags, | |
3bd66c05 LP |
317 | mount_table[k].options) < 0 && |
318 | mount_table[k].fatal) { | |
88213476 LP |
319 | |
320 | log_error("mount(%s) failed: %m", where); | |
321 | ||
322 | if (r == 0) | |
323 | r = -errno; | |
324 | } | |
325 | ||
326 | free(where); | |
327 | } | |
328 | ||
e58a1277 LP |
329 | return r; |
330 | } | |
f8440af5 | 331 | |
e58a1277 LP |
332 | static int setup_timezone(const char *dest) { |
333 | char *where; | |
f8440af5 | 334 | |
e58a1277 LP |
335 | assert(dest); |
336 | ||
337 | /* Fix the timezone, if possible */ | |
338 | if (asprintf(&where, "%s/etc/localtime", dest) < 0) { | |
669241a0 | 339 | log_error("Out of memory."); |
e58a1277 | 340 | return -ENOMEM; |
715ac17a LP |
341 | } |
342 | ||
e58a1277 LP |
343 | if (mount("/etc/localtime", where, "bind", MS_BIND, NULL) >= 0) |
344 | mount("/etc/localtime", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); | |
4d1c38b8 | 345 | |
e58a1277 | 346 | free(where); |
4d1c38b8 | 347 | |
e58a1277 | 348 | if (asprintf(&where, "%s/etc/timezone", dest) < 0) { |
669241a0 | 349 | log_error("Out of memory."); |
e58a1277 | 350 | return -ENOMEM; |
4d1c38b8 LP |
351 | } |
352 | ||
e58a1277 LP |
353 | if (mount("/etc/timezone", where, "bind", MS_BIND, NULL) >= 0) |
354 | mount("/etc/timezone", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); | |
68fb0892 | 355 | |
e58a1277 LP |
356 | free(where); |
357 | ||
358 | return 0; | |
88213476 LP |
359 | } |
360 | ||
2547bb41 LP |
361 | static int setup_resolv_conf(const char *dest) { |
362 | char *where; | |
363 | ||
364 | assert(dest); | |
365 | ||
366 | if (arg_private_network) | |
367 | return 0; | |
368 | ||
369 | /* Fix resolv.conf, if possible */ | |
370 | if (asprintf(&where, "%s/etc/resolv.conf", dest) < 0) { | |
669241a0 | 371 | log_error("Out of memory."); |
2547bb41 LP |
372 | return -ENOMEM; |
373 | } | |
374 | ||
375 | if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) >= 0) | |
376 | mount("/etc/resolv.conf", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); | |
377 | ||
378 | free(where); | |
379 | ||
380 | return 0; | |
381 | } | |
382 | ||
e58a1277 | 383 | static int copy_devnodes(const char *dest) { |
88213476 LP |
384 | |
385 | static const char devnodes[] = | |
386 | "null\0" | |
387 | "zero\0" | |
388 | "full\0" | |
389 | "random\0" | |
390 | "urandom\0" | |
391 | "tty\0" | |
392 | "ptmx\0" | |
88213476 LP |
393 | "rtc0\0"; |
394 | ||
395 | const char *d; | |
e58a1277 | 396 | int r = 0; |
124640f1 | 397 | mode_t u; |
a258bf26 LP |
398 | |
399 | assert(dest); | |
124640f1 LP |
400 | |
401 | u = umask(0000); | |
88213476 LP |
402 | |
403 | NULSTR_FOREACH(d, devnodes) { | |
e58a1277 LP |
404 | struct stat st; |
405 | char *from = NULL, *to = NULL; | |
88213476 LP |
406 | |
407 | asprintf(&from, "/dev/%s", d); | |
408 | asprintf(&to, "%s/dev/%s", dest, d); | |
409 | ||
410 | if (!from || !to) { | |
411 | log_error("Failed to allocate devnode path"); | |
412 | ||
413 | free(from); | |
414 | free(to); | |
415 | ||
a258bf26 LP |
416 | from = to = NULL; |
417 | ||
88213476 LP |
418 | if (r == 0) |
419 | r = -ENOMEM; | |
420 | ||
421 | break; | |
422 | } | |
423 | ||
424 | if (stat(from, &st) < 0) { | |
425 | ||
426 | if (errno != ENOENT) { | |
427 | log_error("Failed to stat %s: %m", from); | |
88213476 LP |
428 | if (r == 0) |
429 | r = -errno; | |
430 | } | |
431 | ||
a258bf26 | 432 | } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { |
88213476 | 433 | |
a258bf26 LP |
434 | log_error("%s is not a char or block device, cannot copy.", from); |
435 | if (r == 0) | |
436 | r = -EIO; | |
437 | ||
438 | } else if (mknod(to, st.st_mode, st.st_rdev) < 0) { | |
439 | ||
440 | log_error("mknod(%s) failed: %m", dest); | |
441 | if (r == 0) | |
442 | r = -errno; | |
88213476 LP |
443 | } |
444 | ||
445 | free(from); | |
446 | free(to); | |
447 | } | |
448 | ||
e58a1277 | 449 | umask(u); |
88213476 | 450 | |
e58a1277 LP |
451 | return r; |
452 | } | |
88213476 | 453 | |
e58a1277 LP |
454 | static int setup_dev_console(const char *dest, const char *console) { |
455 | struct stat st; | |
456 | char *to = NULL; | |
457 | int r; | |
458 | mode_t u; | |
459 | ||
460 | assert(dest); | |
461 | assert(console); | |
462 | ||
463 | u = umask(0000); | |
464 | ||
465 | if (stat(console, &st) < 0) { | |
466 | log_error("Failed to stat %s: %m", console); | |
467 | r = -errno; | |
a258bf26 | 468 | goto finish; |
88213476 | 469 | |
a258bf26 | 470 | } else if (!S_ISCHR(st.st_mode)) { |
a258bf26 | 471 | log_error("/dev/console is not a char device."); |
e58a1277 LP |
472 | r = -EIO; |
473 | goto finish; | |
474 | } | |
88213476 | 475 | |
e58a1277 LP |
476 | r = chmod_and_chown(console, 0600, 0, 0); |
477 | if (r < 0) { | |
478 | log_error("Failed to correct access mode for TTY: %s", strerror(-r)); | |
a258bf26 LP |
479 | goto finish; |
480 | } | |
88213476 | 481 | |
a258bf26 | 482 | if (asprintf(&to, "%s/dev/console", dest) < 0) { |
669241a0 | 483 | log_error("Out of memory."); |
e58a1277 LP |
484 | r = -ENOMEM; |
485 | goto finish; | |
88213476 LP |
486 | } |
487 | ||
a258bf26 LP |
488 | /* We need to bind mount the right tty to /dev/console since |
489 | * ptys can only exist on pts file systems. To have something | |
490 | * to bind mount things on we create a device node first, that | |
491 | * has the right major/minor (note that the major minor | |
492 | * doesn't actually matter here, since we mount it over | |
493 | * anyway). */ | |
494 | ||
e58a1277 LP |
495 | if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) { |
496 | log_error("mknod() for /dev/console failed: %m"); | |
497 | r = -errno; | |
498 | goto finish; | |
499 | } | |
a258bf26 LP |
500 | |
501 | if (mount(console, to, "bind", MS_BIND, NULL) < 0) { | |
e58a1277 LP |
502 | log_error("Bind mount for /dev/console failed: %m"); |
503 | r = -errno; | |
504 | goto finish; | |
a258bf26 LP |
505 | } |
506 | ||
e58a1277 | 507 | finish: |
a258bf26 | 508 | free(to); |
e58a1277 LP |
509 | umask(u); |
510 | ||
511 | return r; | |
512 | } | |
513 | ||
514 | static int setup_kmsg(const char *dest, int kmsg_socket) { | |
515 | char *from = NULL, *to = NULL; | |
516 | int r, fd, k; | |
517 | mode_t u; | |
518 | union { | |
519 | struct cmsghdr cmsghdr; | |
520 | uint8_t buf[CMSG_SPACE(sizeof(int))]; | |
521 | } control; | |
522 | struct msghdr mh; | |
523 | struct cmsghdr *cmsg; | |
524 | ||
525 | assert(dest); | |
526 | assert(kmsg_socket >= 0); | |
a258bf26 | 527 | |
e58a1277 | 528 | u = umask(0000); |
a258bf26 | 529 | |
f1e5dfe2 LP |
530 | /* We create the kmsg FIFO as /dev/kmsg, but immediately |
531 | * delete it after bind mounting it to /proc/kmsg. While FIFOs | |
532 | * on the reading side behave very similar to /proc/kmsg, | |
533 | * their writing side behaves differently from /dev/kmsg in | |
534 | * that writing blocks when nothing is reading. In order to | |
535 | * avoid any problems with containers deadlocking due to this | |
536 | * we simply make /dev/kmsg unavailable to the container. */ | |
e58a1277 | 537 | if (asprintf(&from, "%s/dev/kmsg", dest) < 0) { |
669241a0 | 538 | log_error("Out of memory."); |
e58a1277 LP |
539 | r = -ENOMEM; |
540 | goto finish; | |
541 | } | |
542 | ||
543 | if (asprintf(&to, "%s/proc/kmsg", dest) < 0) { | |
669241a0 | 544 | log_error("Out of memory."); |
e58a1277 LP |
545 | r = -ENOMEM; |
546 | goto finish; | |
547 | } | |
548 | ||
549 | if (mkfifo(from, 0600) < 0) { | |
550 | log_error("mkfifo() for /dev/kmsg failed: %m"); | |
551 | r = -errno; | |
552 | goto finish; | |
553 | } | |
554 | ||
555 | r = chmod_and_chown(from, 0600, 0, 0); | |
556 | if (r < 0) { | |
557 | log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r)); | |
558 | goto finish; | |
559 | } | |
560 | ||
561 | if (mount(from, to, "bind", MS_BIND, NULL) < 0) { | |
562 | log_error("Bind mount for /proc/kmsg failed: %m"); | |
563 | r = -errno; | |
564 | goto finish; | |
565 | } | |
566 | ||
567 | fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC); | |
568 | if (fd < 0) { | |
569 | log_error("Failed to open fifo: %m"); | |
570 | r = -errno; | |
571 | goto finish; | |
572 | } | |
573 | ||
574 | zero(mh); | |
575 | zero(control); | |
576 | ||
577 | mh.msg_control = &control; | |
578 | mh.msg_controllen = sizeof(control); | |
579 | ||
580 | cmsg = CMSG_FIRSTHDR(&mh); | |
581 | cmsg->cmsg_level = SOL_SOCKET; | |
582 | cmsg->cmsg_type = SCM_RIGHTS; | |
583 | cmsg->cmsg_len = CMSG_LEN(sizeof(int)); | |
584 | memcpy(CMSG_DATA(cmsg), &fd, sizeof(int)); | |
585 | ||
586 | mh.msg_controllen = cmsg->cmsg_len; | |
587 | ||
588 | /* Store away the fd in the socket, so that it stays open as | |
589 | * long as we run the child */ | |
590 | k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL); | |
591 | close_nointr_nofail(fd); | |
592 | ||
593 | if (k < 0) { | |
594 | log_error("Failed to send FIFO fd: %m"); | |
595 | r = -errno; | |
596 | goto finish; | |
a258bf26 LP |
597 | } |
598 | ||
f1e5dfe2 LP |
599 | /* And now make the FIFO unavailable as /dev/kmsg... */ |
600 | unlink(from); | |
601 | ||
a258bf26 | 602 | finish: |
e58a1277 LP |
603 | free(from); |
604 | free(to); | |
124640f1 LP |
605 | umask(u); |
606 | ||
88213476 LP |
607 | return r; |
608 | } | |
609 | ||
3a74cea5 LP |
610 | static int setup_hostname(void) { |
611 | char *hn; | |
612 | int r = 0; | |
613 | ||
9eb977db | 614 | hn = path_get_file_name(arg_directory); |
3a74cea5 LP |
615 | if (hn) { |
616 | hn = strdup(hn); | |
617 | if (!hn) | |
618 | return -ENOMEM; | |
619 | ||
620 | hostname_cleanup(hn); | |
621 | ||
622 | if (!isempty(hn)) | |
623 | if (sethostname(hn, strlen(hn)) < 0) | |
624 | r = -errno; | |
625 | ||
626 | free(hn); | |
627 | } | |
628 | ||
629 | return r; | |
630 | } | |
631 | ||
57fb9fb5 LP |
632 | static int setup_journal(const char *directory) { |
633 | sd_id128_t machine_id; | |
634 | char *p = NULL, *b = NULL, *l, *q = NULL, *d = NULL; | |
635 | int r; | |
636 | ||
637 | if (arg_link_journal == LINK_NO) | |
638 | return 0; | |
639 | ||
640 | p = strappend(directory, "/etc/machine-id"); | |
641 | if (!p) { | |
669241a0 | 642 | log_error("Out of memory."); |
57fb9fb5 LP |
643 | r = -ENOMEM; |
644 | goto finish; | |
645 | } | |
646 | ||
647 | r = read_one_line_file(p, &b); | |
648 | if (r == -ENOENT && arg_link_journal == LINK_AUTO) { | |
649 | r = 0; | |
650 | goto finish; | |
651 | } else if (r < 0) { | |
652 | log_error("Failed to read machine ID: %s", strerror(-r)); | |
653 | return r; | |
654 | } | |
655 | ||
656 | l = strstrip(b); | |
657 | if (isempty(l) && arg_link_journal == LINK_AUTO) { | |
658 | r = 0; | |
659 | goto finish; | |
660 | } | |
661 | ||
662 | /* Verify validaty */ | |
663 | r = sd_id128_from_string(l, &machine_id); | |
664 | if (r < 0) { | |
665 | log_error("Failed to parse machine ID: %s", strerror(-r)); | |
666 | goto finish; | |
667 | } | |
668 | ||
669 | free(p); | |
670 | p = strappend("/var/log/journal/", l); | |
671 | q = strjoin(directory, "/var/log/journal/", l, NULL); | |
672 | if (!p || !q) { | |
669241a0 | 673 | log_error("Out of memory."); |
57fb9fb5 LP |
674 | r = -ENOMEM; |
675 | goto finish; | |
676 | } | |
677 | ||
678 | if (path_is_mount_point(p, false) > 0 || | |
679 | path_is_mount_point(q, false) > 0) { | |
680 | if (arg_link_journal != LINK_AUTO) { | |
681 | log_error("Journal already a mount point, refusing."); | |
682 | r = -EEXIST; | |
683 | goto finish; | |
684 | } | |
685 | ||
686 | r = 0; | |
687 | goto finish; | |
688 | } | |
689 | ||
690 | r = readlink_and_make_absolute(p, &d); | |
691 | if (r >= 0) { | |
692 | if ((arg_link_journal == LINK_GUEST || | |
693 | arg_link_journal == LINK_AUTO) && | |
694 | path_equal(d, q)) { | |
695 | ||
696 | mkdir_p(q, 0755); | |
697 | ||
698 | r = 0; | |
699 | goto finish; | |
700 | } | |
701 | ||
702 | if (unlink(p) < 0) { | |
703 | log_error("Failed to remove symlink %s: %m", p); | |
704 | r = -errno; | |
705 | goto finish; | |
706 | } | |
707 | } else if (r == -EINVAL) { | |
708 | ||
709 | if (arg_link_journal == LINK_GUEST && | |
710 | rmdir(p) < 0) { | |
711 | ||
712 | if (errno == ENOTDIR) | |
713 | log_error("%s already exists and is neither symlink nor directory.", p); | |
714 | else { | |
715 | log_error("Failed to remove %s: %m", p); | |
716 | r = -errno; | |
717 | } | |
718 | ||
719 | goto finish; | |
720 | } | |
721 | } else if (r != -ENOENT) { | |
722 | log_error("readlink(%s) failed: %m", p); | |
723 | goto finish; | |
724 | } | |
725 | ||
726 | if (arg_link_journal == LINK_GUEST) { | |
727 | ||
728 | if (symlink(q, p) < 0) { | |
729 | log_error("Failed to symlink %s to %s: %m", q, p); | |
730 | r = -errno; | |
731 | goto finish; | |
732 | } | |
733 | ||
734 | mkdir_p(q, 0755); | |
735 | ||
736 | r = 0; | |
737 | goto finish; | |
738 | } | |
739 | ||
740 | if (arg_link_journal == LINK_HOST) { | |
741 | r = mkdir_p(p, 0755); | |
742 | if (r < 0) { | |
743 | log_error("Failed to create %s: %m", p); | |
744 | goto finish; | |
745 | } | |
746 | ||
747 | } else if (access(p, F_OK) < 0) { | |
748 | r = 0; | |
749 | goto finish; | |
750 | } | |
751 | ||
752 | if (dir_is_empty(q) == 0) { | |
753 | log_error("%s not empty.", q); | |
754 | r = -ENOTEMPTY; | |
755 | goto finish; | |
756 | } | |
757 | ||
758 | r = mkdir_p(q, 0755); | |
759 | if (r < 0) { | |
760 | log_error("Failed to create %s: %m", q); | |
761 | goto finish; | |
762 | } | |
763 | ||
764 | if (mount(p, q, "bind", MS_BIND, NULL) < 0) { | |
765 | log_error("Failed to bind mount journal from host into guest: %m"); | |
766 | r = -errno; | |
767 | goto finish; | |
768 | } | |
769 | ||
770 | r = 0; | |
771 | ||
772 | finish: | |
773 | free(p); | |
774 | free(q); | |
775 | free(d); | |
776 | free(b); | |
777 | return r; | |
778 | ||
779 | } | |
780 | ||
88213476 | 781 | static int drop_capabilities(void) { |
5076f0cc | 782 | return capability_bounding_set_drop(~arg_retain, false); |
88213476 LP |
783 | } |
784 | ||
785 | static int is_os_tree(const char *path) { | |
786 | int r; | |
787 | char *p; | |
788 | /* We use /bin/sh as flag file if something is an OS */ | |
789 | ||
790 | if (asprintf(&p, "%s/bin/sh", path) < 0) | |
791 | return -ENOMEM; | |
792 | ||
793 | r = access(p, F_OK); | |
794 | free(p); | |
795 | ||
796 | return r < 0 ? 0 : 1; | |
797 | } | |
798 | ||
a258bf26 | 799 | static int process_pty(int master, sigset_t *mask) { |
0c749d50 | 800 | |
b72491a2 | 801 | char in_buffer[LINE_MAX], out_buffer[LINE_MAX]; |
a258bf26 LP |
802 | size_t in_buffer_full = 0, out_buffer_full = 0; |
803 | struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev; | |
804 | bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false; | |
a258bf26 LP |
805 | int ep = -1, signal_fd = -1, r; |
806 | ||
807 | fd_nonblock(STDIN_FILENO, 1); | |
808 | fd_nonblock(STDOUT_FILENO, 1); | |
809 | fd_nonblock(master, 1); | |
810 | ||
db7feb7e LP |
811 | signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC); |
812 | if (signal_fd < 0) { | |
a258bf26 LP |
813 | log_error("signalfd(): %m"); |
814 | r = -errno; | |
815 | goto finish; | |
816 | } | |
817 | ||
db7feb7e LP |
818 | ep = epoll_create1(EPOLL_CLOEXEC); |
819 | if (ep < 0) { | |
a258bf26 LP |
820 | log_error("Failed to create epoll: %m"); |
821 | r = -errno; | |
822 | goto finish; | |
823 | } | |
824 | ||
825 | zero(stdin_ev); | |
826 | stdin_ev.events = EPOLLIN|EPOLLET; | |
827 | stdin_ev.data.fd = STDIN_FILENO; | |
828 | ||
829 | zero(stdout_ev); | |
830 | stdout_ev.events = EPOLLOUT|EPOLLET; | |
831 | stdout_ev.data.fd = STDOUT_FILENO; | |
832 | ||
833 | zero(master_ev); | |
834 | master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET; | |
835 | master_ev.data.fd = master; | |
836 | ||
837 | zero(signal_ev); | |
838 | signal_ev.events = EPOLLIN; | |
839 | signal_ev.data.fd = signal_fd; | |
840 | ||
841 | if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0 || | |
842 | epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 || | |
843 | epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 || | |
844 | epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) { | |
845 | log_error("Failed to regiser fds in epoll: %m"); | |
846 | r = -errno; | |
847 | goto finish; | |
848 | } | |
849 | ||
fd14078a | 850 | for (;;) { |
a258bf26 LP |
851 | struct epoll_event ev[16]; |
852 | ssize_t k; | |
853 | int i, nfds; | |
854 | ||
db7feb7e LP |
855 | nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1); |
856 | if (nfds < 0) { | |
a258bf26 LP |
857 | |
858 | if (errno == EINTR || errno == EAGAIN) | |
859 | continue; | |
860 | ||
861 | log_error("epoll_wait(): %m"); | |
862 | r = -errno; | |
863 | goto finish; | |
864 | } | |
865 | ||
866 | assert(nfds >= 1); | |
867 | ||
868 | for (i = 0; i < nfds; i++) { | |
869 | if (ev[i].data.fd == STDIN_FILENO) { | |
870 | ||
fd14078a | 871 | if (ev[i].events & (EPOLLIN|EPOLLHUP)) |
a258bf26 LP |
872 | stdin_readable = true; |
873 | ||
874 | } else if (ev[i].data.fd == STDOUT_FILENO) { | |
875 | ||
fd14078a | 876 | if (ev[i].events & (EPOLLOUT|EPOLLHUP)) |
a258bf26 LP |
877 | stdout_writable = true; |
878 | ||
879 | } else if (ev[i].data.fd == master) { | |
880 | ||
fd14078a | 881 | if (ev[i].events & (EPOLLIN|EPOLLHUP)) |
a258bf26 LP |
882 | master_readable = true; |
883 | ||
fd14078a | 884 | if (ev[i].events & (EPOLLOUT|EPOLLHUP)) |
a258bf26 LP |
885 | master_writable = true; |
886 | ||
887 | } else if (ev[i].data.fd == signal_fd) { | |
888 | struct signalfd_siginfo sfsi; | |
889 | ssize_t n; | |
890 | ||
db7feb7e LP |
891 | n = read(signal_fd, &sfsi, sizeof(sfsi)); |
892 | if (n != sizeof(sfsi)) { | |
a258bf26 LP |
893 | |
894 | if (n >= 0) { | |
0c749d50 | 895 | log_error("Failed to read from signalfd: invalid block size"); |
a258bf26 LP |
896 | r = -EIO; |
897 | goto finish; | |
898 | } | |
899 | ||
900 | if (errno != EINTR && errno != EAGAIN) { | |
0c749d50 | 901 | log_error("Failed to read from signalfd: %m"); |
a258bf26 LP |
902 | r = -errno; |
903 | goto finish; | |
904 | } | |
905 | } else { | |
906 | ||
907 | if (sfsi.ssi_signo == SIGWINCH) { | |
908 | struct winsize ws; | |
909 | ||
910 | /* The window size changed, let's forward that. */ | |
a258bf26 LP |
911 | if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0) |
912 | ioctl(master, TIOCSWINSZ, &ws); | |
913 | } else { | |
0c749d50 | 914 | r = 0; |
a258bf26 LP |
915 | goto finish; |
916 | } | |
917 | } | |
918 | } | |
919 | } | |
920 | ||
921 | while ((stdin_readable && in_buffer_full <= 0) || | |
922 | (master_writable && in_buffer_full > 0) || | |
923 | (master_readable && out_buffer_full <= 0) || | |
924 | (stdout_writable && out_buffer_full > 0)) { | |
925 | ||
b72491a2 | 926 | if (stdin_readable && in_buffer_full < LINE_MAX) { |
a258bf26 | 927 | |
db7feb7e LP |
928 | k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full); |
929 | if (k < 0) { | |
a258bf26 | 930 | |
fd14078a | 931 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 932 | stdin_readable = false; |
a258bf26 LP |
933 | else { |
934 | log_error("read(): %m"); | |
0c749d50 | 935 | r = -errno; |
a258bf26 LP |
936 | goto finish; |
937 | } | |
938 | } else | |
939 | in_buffer_full += (size_t) k; | |
a258bf26 LP |
940 | } |
941 | ||
942 | if (master_writable && in_buffer_full > 0) { | |
943 | ||
db7feb7e LP |
944 | k = write(master, in_buffer, in_buffer_full); |
945 | if (k < 0) { | |
a258bf26 | 946 | |
fd14078a | 947 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 948 | master_writable = false; |
fd14078a | 949 | else { |
a258bf26 | 950 | log_error("write(): %m"); |
0c749d50 | 951 | r = -errno; |
a258bf26 LP |
952 | goto finish; |
953 | } | |
954 | ||
955 | } else { | |
956 | assert(in_buffer_full >= (size_t) k); | |
957 | memmove(in_buffer, in_buffer + k, in_buffer_full - k); | |
958 | in_buffer_full -= k; | |
959 | } | |
960 | } | |
961 | ||
b72491a2 | 962 | if (master_readable && out_buffer_full < LINE_MAX) { |
a258bf26 | 963 | |
db7feb7e LP |
964 | k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full); |
965 | if (k < 0) { | |
a258bf26 | 966 | |
fd14078a | 967 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 968 | master_readable = false; |
a258bf26 LP |
969 | else { |
970 | log_error("read(): %m"); | |
0c749d50 | 971 | r = -errno; |
a258bf26 LP |
972 | goto finish; |
973 | } | |
974 | } else | |
975 | out_buffer_full += (size_t) k; | |
a258bf26 LP |
976 | } |
977 | ||
978 | if (stdout_writable && out_buffer_full > 0) { | |
979 | ||
db7feb7e LP |
980 | k = write(STDOUT_FILENO, out_buffer, out_buffer_full); |
981 | if (k < 0) { | |
a258bf26 | 982 | |
fd14078a | 983 | if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) |
a258bf26 | 984 | stdout_writable = false; |
fd14078a | 985 | else { |
a258bf26 | 986 | log_error("write(): %m"); |
0c749d50 | 987 | r = -errno; |
a258bf26 LP |
988 | goto finish; |
989 | } | |
990 | ||
991 | } else { | |
992 | assert(out_buffer_full >= (size_t) k); | |
993 | memmove(out_buffer, out_buffer + k, out_buffer_full - k); | |
994 | out_buffer_full -= k; | |
995 | } | |
996 | } | |
997 | } | |
fd14078a | 998 | } |
a258bf26 LP |
999 | |
1000 | finish: | |
1001 | if (ep >= 0) | |
1002 | close_nointr_nofail(ep); | |
1003 | ||
1004 | if (signal_fd >= 0) | |
1005 | close_nointr_nofail(signal_fd); | |
1006 | ||
1007 | return r; | |
1008 | } | |
88213476 LP |
1009 | |
1010 | int main(int argc, char *argv[]) { | |
1011 | pid_t pid = 0; | |
04d391da LP |
1012 | int r = EXIT_FAILURE, k; |
1013 | char *oldcg = NULL, *newcg = NULL; | |
40c32a4a | 1014 | char **controller = NULL; |
a258bf26 LP |
1015 | int master = -1; |
1016 | const char *console = NULL; | |
1017 | struct termios saved_attr, raw_attr; | |
1018 | sigset_t mask; | |
1019 | bool saved_attr_valid = false; | |
1020 | struct winsize ws; | |
e58a1277 | 1021 | int kmsg_socket_pair[2] = { -1, -1 }; |
88213476 LP |
1022 | |
1023 | log_parse_environment(); | |
1024 | log_open(); | |
1025 | ||
db7feb7e LP |
1026 | r = parse_argv(argc, argv); |
1027 | if (r <= 0) | |
88213476 LP |
1028 | goto finish; |
1029 | ||
1030 | if (arg_directory) { | |
1031 | char *p; | |
1032 | ||
1033 | p = path_make_absolute_cwd(arg_directory); | |
1034 | free(arg_directory); | |
1035 | arg_directory = p; | |
1036 | } else | |
1037 | arg_directory = get_current_dir_name(); | |
1038 | ||
1039 | if (!arg_directory) { | |
1040 | log_error("Failed to determine path"); | |
1041 | goto finish; | |
1042 | } | |
1043 | ||
1044 | path_kill_slashes(arg_directory); | |
1045 | ||
1046 | if (geteuid() != 0) { | |
1047 | log_error("Need to be root."); | |
1048 | goto finish; | |
1049 | } | |
1050 | ||
04d391da LP |
1051 | if (sd_booted() <= 0) { |
1052 | log_error("Not running on a systemd system."); | |
1053 | goto finish; | |
1054 | } | |
1055 | ||
88213476 | 1056 | if (path_equal(arg_directory, "/")) { |
6df6b939 | 1057 | log_error("Spawning container on root directory not supported."); |
88213476 LP |
1058 | goto finish; |
1059 | } | |
1060 | ||
1061 | if (is_os_tree(arg_directory) <= 0) { | |
1062 | log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory); | |
1063 | goto finish; | |
1064 | } | |
1065 | ||
db7feb7e LP |
1066 | k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg); |
1067 | if (k < 0) { | |
04d391da LP |
1068 | log_error("Failed to determine current cgroup: %s", strerror(-k)); |
1069 | goto finish; | |
1070 | } | |
1071 | ||
1072 | if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) { | |
1073 | log_error("Failed to allocate cgroup path."); | |
1074 | goto finish; | |
1075 | } | |
1076 | ||
40c32a4a LGL |
1077 | k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0); |
1078 | if (k < 0) { | |
04d391da LP |
1079 | log_error("Failed to create cgroup: %s", strerror(-k)); |
1080 | goto finish; | |
1081 | } | |
1082 | ||
db7feb7e | 1083 | STRV_FOREACH(controller, arg_controllers) { |
40c32a4a LGL |
1084 | k = cg_create_and_attach(*controller, newcg, 0); |
1085 | if (k < 0) | |
1086 | log_warning("Failed to create cgroup in controller %s: %s", *controller, strerror(-k)); | |
1087 | } | |
1088 | ||
db7feb7e LP |
1089 | master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY); |
1090 | if (master < 0) { | |
a258bf26 LP |
1091 | log_error("Failed to acquire pseudo tty: %m"); |
1092 | goto finish; | |
1093 | } | |
1094 | ||
db7feb7e LP |
1095 | console = ptsname(master); |
1096 | if (!console) { | |
a258bf26 LP |
1097 | log_error("Failed to determine tty name: %m"); |
1098 | goto finish; | |
1099 | } | |
1100 | ||
1101 | log_info("Spawning namespace container on %s (console is %s).", arg_directory, console); | |
1102 | ||
1103 | if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0) | |
1104 | ioctl(master, TIOCSWINSZ, &ws); | |
1105 | ||
1106 | if (unlockpt(master) < 0) { | |
1107 | log_error("Failed to unlock tty: %m"); | |
1108 | goto finish; | |
1109 | } | |
1110 | ||
1111 | if (tcgetattr(STDIN_FILENO, &saved_attr) < 0) { | |
1112 | log_error("Failed to get terminal attributes: %m"); | |
1113 | goto finish; | |
1114 | } | |
1115 | ||
1116 | saved_attr_valid = true; | |
1117 | ||
1118 | raw_attr = saved_attr; | |
1119 | cfmakeraw(&raw_attr); | |
1120 | raw_attr.c_lflag &= ~ECHO; | |
1121 | ||
1122 | if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) { | |
1123 | log_error("Failed to set terminal attributes: %m"); | |
1124 | goto finish; | |
1125 | } | |
1126 | ||
e58a1277 LP |
1127 | if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) { |
1128 | log_error("Failed to create kmsg socket pair"); | |
1129 | goto finish; | |
1130 | } | |
1131 | ||
a258bf26 LP |
1132 | assert_se(sigemptyset(&mask) == 0); |
1133 | sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1); | |
1134 | assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0); | |
1135 | ||
52af2106 LP |
1136 | pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL); |
1137 | if (pid < 0) { | |
1138 | if (errno == EINVAL) | |
1139 | log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m"); | |
1140 | else | |
1141 | log_error("clone() failed: %m"); | |
1142 | ||
88213476 LP |
1143 | goto finish; |
1144 | } | |
1145 | ||
1146 | if (pid == 0) { | |
a258bf26 LP |
1147 | /* child */ |
1148 | ||
687d0825 MV |
1149 | const char *home = NULL; |
1150 | uid_t uid = (uid_t) -1; | |
1151 | gid_t gid = (gid_t) -1; | |
da5b3bad | 1152 | const char *envp[] = { |
da5b3bad | 1153 | "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", |
3bb1c6b0 | 1154 | "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */ |
687d0825 MV |
1155 | NULL, /* TERM */ |
1156 | NULL, /* HOME */ | |
1157 | NULL, /* USER */ | |
1158 | NULL, /* LOGNAME */ | |
144f0fc0 | 1159 | NULL, /* container_uuid */ |
da5b3bad LP |
1160 | NULL |
1161 | }; | |
88213476 | 1162 | |
3bb1c6b0 | 1163 | envp[2] = strv_find_prefix(environ, "TERM="); |
a258bf26 LP |
1164 | |
1165 | close_nointr_nofail(master); | |
1166 | ||
1167 | close_nointr(STDIN_FILENO); | |
1168 | close_nointr(STDOUT_FILENO); | |
1169 | close_nointr(STDERR_FILENO); | |
1170 | ||
e58a1277 | 1171 | close_all_fds(&kmsg_socket_pair[1], 1); |
a258bf26 LP |
1172 | |
1173 | reset_all_signal_handlers(); | |
1174 | ||
1175 | assert_se(sigemptyset(&mask) == 0); | |
1176 | assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); | |
1177 | ||
db7feb7e LP |
1178 | if (open_terminal(console, O_RDWR) != STDIN_FILENO || |
1179 | dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO || | |
1180 | dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) | |
1181 | goto child_fail; | |
1182 | ||
1183 | if (setsid() < 0) { | |
1184 | log_error("setsid() failed: %m"); | |
a258bf26 | 1185 | goto child_fail; |
db7feb7e | 1186 | } |
a258bf26 | 1187 | |
db7feb7e LP |
1188 | if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) { |
1189 | log_error("PR_SET_PDEATHSIG failed: %m"); | |
a258bf26 | 1190 | goto child_fail; |
db7feb7e | 1191 | } |
88213476 | 1192 | |
f5c1b9ee | 1193 | /* Mark / as private, in case somebody marked it shared */ |
db7feb7e LP |
1194 | if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) { |
1195 | log_error("MS_PRIVATE|MS_REC failed: %m"); | |
f5c1b9ee | 1196 | goto child_fail; |
db7feb7e | 1197 | } |
f5c1b9ee | 1198 | |
bc2f673e LP |
1199 | /* Turn directory into bind mount */ |
1200 | if (mount(arg_directory, arg_directory, "bind", MS_BIND, NULL) < 0) { | |
1201 | log_error("Failed to make bind mount."); | |
1202 | goto child_fail; | |
1203 | } | |
1204 | ||
1205 | if (arg_read_only) | |
1206 | if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) { | |
1207 | log_error("Failed to make read-only."); | |
1208 | goto child_fail; | |
1209 | } | |
1210 | ||
88213476 LP |
1211 | if (mount_all(arg_directory) < 0) |
1212 | goto child_fail; | |
1213 | ||
e58a1277 LP |
1214 | if (copy_devnodes(arg_directory) < 0) |
1215 | goto child_fail; | |
1216 | ||
1217 | if (setup_dev_console(arg_directory, console) < 0) | |
1218 | goto child_fail; | |
1219 | ||
1220 | if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0) | |
1221 | goto child_fail; | |
1222 | ||
1223 | close_nointr_nofail(kmsg_socket_pair[1]); | |
1224 | ||
1225 | if (setup_timezone(arg_directory) < 0) | |
88213476 LP |
1226 | goto child_fail; |
1227 | ||
2547bb41 LP |
1228 | if (setup_resolv_conf(arg_directory) < 0) |
1229 | goto child_fail; | |
1230 | ||
57fb9fb5 LP |
1231 | if (setup_journal(arg_directory) < 0) |
1232 | goto child_fail; | |
1233 | ||
88213476 LP |
1234 | if (chdir(arg_directory) < 0) { |
1235 | log_error("chdir(%s) failed: %m", arg_directory); | |
1236 | goto child_fail; | |
1237 | } | |
a258bf26 | 1238 | |
bc2f673e LP |
1239 | if (mount(arg_directory, "/", "bind", MS_MOVE, NULL) < 0) { |
1240 | log_error("mount(MS_BIND) failed: %m"); | |
88213476 LP |
1241 | goto child_fail; |
1242 | } | |
1243 | ||
1244 | if (chroot(".") < 0) { | |
1245 | log_error("chroot() failed: %m"); | |
1246 | goto child_fail; | |
1247 | } | |
1248 | ||
1249 | if (chdir("/") < 0) { | |
1250 | log_error("chdir() failed: %m"); | |
1251 | goto child_fail; | |
1252 | } | |
1253 | ||
4c12626c | 1254 | umask(0022); |
a258bf26 | 1255 | |
a41fe3a2 LP |
1256 | loopback_setup(); |
1257 | ||
ec8927ca LP |
1258 | if (drop_capabilities() < 0) { |
1259 | log_error("drop_capabilities() failed: %m"); | |
88213476 | 1260 | goto child_fail; |
ec8927ca | 1261 | } |
88213476 | 1262 | |
687d0825 MV |
1263 | if (arg_user) { |
1264 | ||
d05c5031 | 1265 | if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) { |
687d0825 MV |
1266 | log_error("get_user_creds() failed: %m"); |
1267 | goto child_fail; | |
1268 | } | |
1269 | ||
d2e54fae KS |
1270 | if (mkdir_parents_label(home, 0775) < 0) { |
1271 | log_error("mkdir_parents_label() failed: %m"); | |
687d0825 MV |
1272 | goto child_fail; |
1273 | } | |
1274 | ||
d2e54fae KS |
1275 | if (mkdir_safe_label(home, 0775, uid, gid) < 0) { |
1276 | log_error("mkdir_safe_label() failed: %m"); | |
687d0825 MV |
1277 | goto child_fail; |
1278 | } | |
1279 | ||
1280 | if (initgroups((const char*)arg_user, gid) < 0) { | |
1281 | log_error("initgroups() failed: %m"); | |
1282 | goto child_fail; | |
1283 | } | |
1284 | ||
5c94603d | 1285 | if (setresgid(gid, gid, gid) < 0) { |
687d0825 MV |
1286 | log_error("setregid() failed: %m"); |
1287 | goto child_fail; | |
1288 | } | |
1289 | ||
5c94603d | 1290 | if (setresuid(uid, uid, uid) < 0) { |
687d0825 MV |
1291 | log_error("setreuid() failed: %m"); |
1292 | goto child_fail; | |
1293 | } | |
1294 | } | |
1295 | ||
144f0fc0 LP |
1296 | if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) || |
1297 | (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) || | |
1298 | (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) { | |
669241a0 | 1299 | log_error("Out of memory."); |
687d0825 | 1300 | goto child_fail; |
144f0fc0 LP |
1301 | } |
1302 | ||
1303 | if (arg_uuid) { | |
1304 | if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) { | |
669241a0 | 1305 | log_error("Out of memory."); |
144f0fc0 LP |
1306 | goto child_fail; |
1307 | } | |
687d0825 MV |
1308 | } |
1309 | ||
3a74cea5 | 1310 | setup_hostname(); |
88213476 | 1311 | |
0f0dbc46 LP |
1312 | if (arg_boot) { |
1313 | char **a; | |
1314 | size_t l; | |
1315 | ||
1316 | /* Automatically search for the init system */ | |
1317 | ||
1318 | l = 1 + argc - optind; | |
1319 | a = newa(char*, l + 1); | |
1320 | memcpy(a + 1, argv + optind, l * sizeof(char*)); | |
1321 | ||
1322 | a[0] = (char*) "/usr/lib/systemd/systemd"; | |
1323 | execve(a[0], a, (char**) envp); | |
1324 | ||
1325 | a[0] = (char*) "/lib/systemd/systemd"; | |
1326 | execve(a[0], a, (char**) envp); | |
1327 | ||
1328 | a[0] = (char*) "/sbin/init"; | |
1329 | execve(a[0], a, (char**) envp); | |
1330 | } else if (argc > optind) | |
da5b3bad LP |
1331 | execvpe(argv[optind], argv + optind, (char**) envp); |
1332 | else { | |
5c94603d | 1333 | chdir(home ? home : "/root"); |
da5b3bad LP |
1334 | execle("/bin/bash", "-bash", NULL, (char**) envp); |
1335 | } | |
88213476 LP |
1336 | |
1337 | log_error("execv() failed: %m"); | |
1338 | ||
1339 | child_fail: | |
1340 | _exit(EXIT_FAILURE); | |
1341 | } | |
1342 | ||
a258bf26 LP |
1343 | if (process_pty(master, &mask) < 0) |
1344 | goto finish; | |
1345 | ||
1346 | if (saved_attr_valid) { | |
1347 | tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr); | |
1348 | saved_attr_valid = false; | |
1349 | } | |
1350 | ||
6df6b939 | 1351 | r = wait_for_terminate_and_warn(argc > optind ? argv[optind] : "bash", pid); |
88213476 LP |
1352 | |
1353 | if (r < 0) | |
1354 | r = EXIT_FAILURE; | |
1355 | ||
1356 | finish: | |
a258bf26 LP |
1357 | if (saved_attr_valid) |
1358 | tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr); | |
1359 | ||
1360 | if (master >= 0) | |
1361 | close_nointr_nofail(master); | |
1362 | ||
e58a1277 LP |
1363 | close_pipe(kmsg_socket_pair); |
1364 | ||
04d391da LP |
1365 | if (oldcg) |
1366 | cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0); | |
1367 | ||
1368 | if (newcg) | |
1369 | cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true); | |
88213476 | 1370 | |
04d391da | 1371 | free(arg_directory); |
40c32a4a | 1372 | strv_free(arg_controllers); |
04d391da LP |
1373 | free(oldcg); |
1374 | free(newcg); | |
88213476 LP |
1375 | |
1376 | return r; | |
1377 | } |