]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn-mount.c
detect-virt: do not try to read all of /proc/cpuinfo
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
e83bebef 2
4f5dd394 3#include <sys/mount.h>
07630cea 4#include <linux/magic.h>
e83bebef 5
b5efdb8a 6#include "alloc-util.h"
4f5dd394 7#include "escape.h"
0996ef00
CB
8#include "fd-util.h"
9#include "fileio.h"
f4f15635 10#include "fs-util.h"
e83bebef 11#include "label.h"
4f5dd394 12#include "mkdir.h"
4349cd7c 13#include "mount-util.h"
6bedfcbb
LP
14#include "nspawn-mount.h"
15#include "parse-util.h"
4f5dd394
LP
16#include "path-util.h"
17#include "rm-rf.h"
e83bebef 18#include "set.h"
8fcde012 19#include "stat-util.h"
07630cea 20#include "string-util.h"
4f5dd394 21#include "strv.h"
ee104e11 22#include "user-util.h"
4f5dd394 23#include "util.h"
e83bebef 24
88614c8a 25CustomMount* custom_mount_add(CustomMount **l, size_t *n, CustomMountType t) {
e83bebef
LP
26 CustomMount *c, *ret;
27
28 assert(l);
29 assert(n);
30 assert(t >= 0);
31 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
32
aa484f35 33 c = reallocarray(*l, *n + 1, sizeof(CustomMount));
e83bebef
LP
34 if (!c)
35 return NULL;
36
37 *l = c;
38 ret = *l + *n;
39 (*n)++;
40
41 *ret = (CustomMount) { .type = t };
42
43 return ret;
44}
45
88614c8a
LP
46void custom_mount_free_all(CustomMount *l, size_t n) {
47 size_t i;
e83bebef
LP
48
49 for (i = 0; i < n; i++) {
50 CustomMount *m = l + i;
51
52 free(m->source);
53 free(m->destination);
54 free(m->options);
55
56 if (m->work_dir) {
57 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
58 free(m->work_dir);
59 }
60
c7a4890c
LP
61 if (m->rm_rf_tmpdir) {
62 (void) rm_rf(m->rm_rf_tmpdir, REMOVE_ROOT|REMOVE_PHYSICAL);
63 free(m->rm_rf_tmpdir);
64 }
65
e83bebef
LP
66 strv_free(m->lower);
67 }
68
69 free(l);
70}
71
86c0dd4a 72static int custom_mount_compare(const void *a, const void *b) {
e83bebef
LP
73 const CustomMount *x = a, *y = b;
74 int r;
75
76 r = path_compare(x->destination, y->destination);
77 if (r != 0)
78 return r;
79
80 if (x->type < y->type)
81 return -1;
82 if (x->type > y->type)
83 return 1;
84
85 return 0;
86}
87
86c0dd4a
LP
88static bool source_path_is_valid(const char *p) {
89 assert(p);
90
91 if (*p == '+')
92 p++;
93
94 return path_is_absolute(p);
95}
96
97static char *resolve_source_path(const char *dest, const char *source) {
98
99 if (!source)
100 return NULL;
101
102 if (source[0] == '+')
103 return prefix_root(dest, source + 1);
104
105 return strdup(source);
106}
107
88614c8a
LP
108int custom_mount_prepare_all(const char *dest, CustomMount *l, size_t n) {
109 size_t i;
86c0dd4a
LP
110 int r;
111
112 /* Prepare all custom mounts. This will make source we know all temporary directories. This is called in the
113 * parent process, so that we know the temporary directories to remove on exit before we fork off the
114 * children. */
115
116 assert(l || n == 0);
117
118 /* Order the custom mounts, and make sure we have a working directory */
119 qsort_safe(l, n, sizeof(CustomMount), custom_mount_compare);
120
121 for (i = 0; i < n; i++) {
122 CustomMount *m = l + i;
123
124 if (m->source) {
125 char *s;
126
127 s = resolve_source_path(dest, m->source);
128 if (!s)
129 return log_oom();
130
10af01a5 131 free_and_replace(m->source, s);
c7a4890c
LP
132 } else {
133 /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
134
135 m->rm_rf_tmpdir = strdup("/var/tmp/nspawn-temp-XXXXXX");
136 if (!m->rm_rf_tmpdir)
137 return log_oom();
138
139 if (!mkdtemp(m->rm_rf_tmpdir)) {
140 m->rm_rf_tmpdir = mfree(m->rm_rf_tmpdir);
141 return log_error_errno(errno, "Failed to acquire temporary directory: %m");
142 }
143
144 m->source = strjoin(m->rm_rf_tmpdir, "/src");
145 if (!m->source)
146 return log_oom();
147
148 if (mkdir(m->source, 0755) < 0)
149 return log_error_errno(errno, "Failed to create %s: %m", m->source);
86c0dd4a
LP
150 }
151
152 if (m->type == CUSTOM_MOUNT_OVERLAY) {
153 char **j;
154
155 STRV_FOREACH(j, m->lower) {
156 char *s;
157
158 s = resolve_source_path(dest, *j);
159 if (!s)
160 return log_oom();
161
10af01a5 162 free_and_replace(*j, s);
86c0dd4a
LP
163 }
164
165 if (m->work_dir) {
166 char *s;
167
168 s = resolve_source_path(dest, m->work_dir);
169 if (!s)
170 return log_oom();
171
10af01a5 172 free_and_replace(m->work_dir, s);
86c0dd4a
LP
173 } else {
174 assert(m->source);
175
176 r = tempfn_random(m->source, NULL, &m->work_dir);
177 if (r < 0)
178 return log_error_errno(r, "Failed to acquire working directory: %m");
179 }
180
181 (void) mkdir_label(m->work_dir, 0700);
182 }
183 }
184
185 return 0;
186}
187
88614c8a 188int bind_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
e83bebef
LP
189 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
190 const char *p = s;
191 CustomMount *m;
192 int r;
193
194 assert(l);
195 assert(n);
196
197 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
198 if (r < 0)
199 return r;
200 if (r == 0)
201 return -EINVAL;
e83bebef 202 if (r == 1) {
86c0dd4a 203 destination = strdup(source[0] == '+' ? source+1 : source);
e83bebef
LP
204 if (!destination)
205 return -ENOMEM;
206 }
e83bebef
LP
207 if (r == 2 && !isempty(p)) {
208 opts = strdup(p);
209 if (!opts)
210 return -ENOMEM;
211 }
212
c7a4890c
LP
213 if (isempty(source))
214 source = NULL;
215 else if (!source_path_is_valid(source))
e83bebef 216 return -EINVAL;
c7a4890c 217
e83bebef
LP
218 if (!path_is_absolute(destination))
219 return -EINVAL;
220
221 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
222 if (!m)
48cbe5f8 223 return -ENOMEM;
e83bebef
LP
224
225 m->source = source;
226 m->destination = destination;
227 m->read_only = read_only;
228 m->options = opts;
229
230 source = destination = opts = NULL;
231 return 0;
232}
233
88614c8a 234int tmpfs_mount_parse(CustomMount **l, size_t *n, const char *s) {
e83bebef
LP
235 _cleanup_free_ char *path = NULL, *opts = NULL;
236 const char *p = s;
237 CustomMount *m;
238 int r;
239
240 assert(l);
241 assert(n);
242 assert(s);
243
244 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
245 if (r < 0)
246 return r;
247 if (r == 0)
248 return -EINVAL;
249
250 if (isempty(p))
251 opts = strdup("mode=0755");
252 else
253 opts = strdup(p);
254 if (!opts)
255 return -ENOMEM;
256
257 if (!path_is_absolute(path))
258 return -EINVAL;
259
260 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
261 if (!m)
262 return -ENOMEM;
263
1cc6c93a
YW
264 m->destination = TAKE_PTR(path);
265 m->options = TAKE_PTR(opts);
e83bebef 266
e83bebef
LP
267 return 0;
268}
269
88614c8a 270int overlay_mount_parse(CustomMount **l, size_t *n, const char *s, bool read_only) {
ad85779a
LP
271 _cleanup_free_ char *upper = NULL, *destination = NULL;
272 _cleanup_strv_free_ char **lower = NULL;
273 CustomMount *m;
86c0dd4a 274 int k;
ad85779a 275
86c0dd4a
LP
276 k = strv_split_extract(&lower, s, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
277 if (k < 0)
278 return k;
ad85779a
LP
279 if (k < 2)
280 return -EADDRNOTAVAIL;
281 if (k == 2) {
86c0dd4a
LP
282 /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
283 * we'll also define the destination mount point the same as the upper. */
284
285 if (!source_path_is_valid(lower[0]) ||
286 !source_path_is_valid(lower[1]))
287 return -EINVAL;
288
ae2a15bc 289 upper = TAKE_PTR(lower[1]);
ad85779a 290
86c0dd4a 291 destination = strdup(upper[0] == '+' ? upper+1 : upper); /* take the destination without "+" prefix */
ad85779a
LP
292 if (!destination)
293 return -ENOMEM;
ad85779a 294 } else {
c7a4890c 295 char **i;
86c0dd4a
LP
296
297 /* If more than two parameters are specified, the last one is the destination, the second to last one
298 * the "upper", and all before that the "lower" directories. */
299
ad85779a 300 destination = lower[k - 1];
ae2a15bc 301 upper = TAKE_PTR(lower[k - 2]);
86c0dd4a 302
c7a4890c
LP
303 STRV_FOREACH(i, lower)
304 if (!source_path_is_valid(*i))
305 return -EINVAL;
306
307 /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
308 * in /var/tmp */
309 if (isempty(upper))
310 upper = NULL;
311 else if (!source_path_is_valid(upper))
312 return -EINVAL;
313
86c0dd4a
LP
314 if (!path_is_absolute(destination))
315 return -EINVAL;
ad85779a
LP
316 }
317
318 m = custom_mount_add(l, n, CUSTOM_MOUNT_OVERLAY);
319 if (!m)
320 return -ENOMEM;
321
1cc6c93a
YW
322 m->destination = TAKE_PTR(destination);
323 m->source = TAKE_PTR(upper);
324 m->lower = TAKE_PTR(lower);
ad85779a
LP
325 m->read_only = read_only;
326
ad85779a
LP
327 return 0;
328}
329
04029482 330int tmpfs_patch_options(
e83bebef 331 const char *options,
2fa017f1 332 uid_t uid_shift,
e83bebef
LP
333 const char *selinux_apifs_context,
334 char **ret) {
335
336 char *buf = NULL;
337
2fa017f1 338 if (uid_shift != UID_INVALID) {
9aa2169e 339 if (asprintf(&buf, "%s%suid=" UID_FMT ",gid=" UID_FMT,
87e4e28d 340 strempty(options), options ? "," : "",
9aa2169e 341 uid_shift, uid_shift) < 0)
e83bebef
LP
342 return -ENOMEM;
343
344 options = buf;
345 }
346
349cc4a5 347#if HAVE_SELINUX
e83bebef
LP
348 if (selinux_apifs_context) {
349 char *t;
350
87e4e28d 351 t = strjoin(strempty(options), options ? "," : "",
9aa2169e
ZJS
352 "context=\"", selinux_apifs_context, "\"");
353 free(buf);
354 if (!t)
e83bebef 355 return -ENOMEM;
e83bebef 356
e83bebef
LP
357 buf = t;
358 }
359#endif
360
0996ef00
CB
361 if (!buf && options) {
362 buf = strdup(options);
363 if (!buf)
364 return -ENOMEM;
365 }
e83bebef 366 *ret = buf;
0996ef00 367
e83bebef
LP
368 return !!buf;
369}
370
4f086aab 371int mount_sysfs(const char *dest, MountSettingsMask mount_settings) {
d8fc6a00 372 const char *full, *top, *x;
d1678248 373 int r;
4f086aab 374 unsigned long extra_flags = 0;
d8fc6a00
LP
375
376 top = prefix_roota(dest, "/sys");
40fd52f2 377 r = path_is_fs_type(top, SYSFS_MAGIC);
d1678248
ILG
378 if (r < 0)
379 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
380 /* /sys might already be mounted as sysfs by the outer child in the
381 * !netns case. In this case, it's all good. Don't touch it because we
382 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
383 */
384 if (r > 0)
385 return 0;
386
d8fc6a00
LP
387 full = prefix_roota(top, "/full");
388
389 (void) mkdir(full, 0755);
390
4f086aab
SU
391 if (mount_settings & MOUNT_APPLY_APIVFS_RO)
392 extra_flags |= MS_RDONLY;
393
60e76d48 394 r = mount_verbose(LOG_ERR, "sysfs", full, "sysfs",
4f086aab 395 MS_NOSUID|MS_NOEXEC|MS_NODEV|extra_flags, NULL);
60e76d48
ZJS
396 if (r < 0)
397 return r;
d8fc6a00
LP
398
399 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
400 _cleanup_free_ char *from = NULL, *to = NULL;
401
402 from = prefix_root(full, x);
403 if (!from)
404 return log_oom();
405
406 to = prefix_root(top, x);
407 if (!to)
408 return log_oom();
409
410 (void) mkdir(to, 0755);
411
60e76d48
ZJS
412 r = mount_verbose(LOG_ERR, from, to, NULL, MS_BIND, NULL);
413 if (r < 0)
414 return r;
d8fc6a00 415
60e76d48 416 r = mount_verbose(LOG_ERR, NULL, to, NULL,
4f086aab 417 MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
60e76d48
ZJS
418 if (r < 0)
419 return r;
d8fc6a00
LP
420 }
421
60e76d48
ZJS
422 r = umount_verbose(full);
423 if (r < 0)
424 return r;
d8fc6a00
LP
425
426 if (rmdir(full) < 0)
427 return log_error_errno(errno, "Failed to remove %s: %m", full);
428
0996ef00
CB
429 /* Create mountpoint for cgroups. Otherwise we are not allowed since we
430 * remount /sys read-only.
431 */
677a72cd
LS
432 x = prefix_roota(top, "/fs/cgroup");
433 (void) mkdir_p(x, 0755);
d8fc6a00 434
60e76d48 435 return mount_verbose(LOG_ERR, NULL, top, NULL,
4f086aab 436 MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT|extra_flags, NULL);
d8fc6a00
LP
437}
438
9c0fad5f 439static int mkdir_userns(const char *path, mode_t mode, uid_t uid_shift) {
63eae723
EV
440 int r;
441
442 assert(path);
443
dae8b82e
ZJS
444 r = mkdir_errno_wrapper(path, mode);
445 if (r < 0 && r != -EEXIST)
446 return r;
63eae723 447
9c0fad5f 448 if (uid_shift == UID_INVALID)
acbbf69b
LP
449 return 0;
450
dae8b82e 451 if (lchown(path, uid_shift, uid_shift) < 0)
acbbf69b 452 return -errno;
63eae723
EV
453
454 return 0;
455}
456
9c0fad5f 457static int mkdir_userns_p(const char *prefix, const char *path, mode_t mode, uid_t uid_shift) {
63eae723
EV
458 const char *p, *e;
459 int r;
460
461 assert(path);
462
463 if (prefix && !path_startswith(path, prefix))
464 return -ENOTDIR;
465
466 /* create every parent directory in the path, except the last component */
467 p = path + strspn(path, "/");
468 for (;;) {
469 char t[strlen(path) + 1];
470
471 e = p + strcspn(p, "/");
472 p = e + strspn(e, "/");
473
474 /* Is this the last component? If so, then we're done */
475 if (*p == 0)
476 break;
477
478 memcpy(t, path, e - path);
479 t[e-path] = 0;
480
481 if (prefix && path_startswith(prefix, t))
482 continue;
483
9c0fad5f 484 r = mkdir_userns(t, mode, uid_shift);
63eae723
EV
485 if (r < 0)
486 return r;
487 }
488
9c0fad5f 489 return mkdir_userns(path, mode, uid_shift);
63eae723
EV
490}
491
e83bebef 492int mount_all(const char *dest,
4f086aab 493 MountSettingsMask mount_settings,
2fa017f1 494 uid_t uid_shift,
e83bebef
LP
495 const char *selinux_apifs_context) {
496
d4b653c5
LP
497#define PROC_INACCESSIBLE(path) \
498 { NULL, (path), NULL, NULL, MS_BIND, \
499 MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_INACCESSIBLE_REG }, /* Bind mount first ... */ \
500 { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
501 MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
502
503#define PROC_READ_ONLY(path) \
504 { (path), (path), NULL, NULL, MS_BIND, \
505 MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
506 { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
507 MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
508
e83bebef
LP
509 typedef struct MountPoint {
510 const char *what;
511 const char *where;
512 const char *type;
513 const char *options;
514 unsigned long flags;
4f086aab 515 MountSettingsMask mount_settings;
e83bebef
LP
516 } MountPoint;
517
518 static const MountPoint mount_table[] = {
d4b653c5
LP
519 /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing) */
520 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
521 MOUNT_FATAL|MOUNT_IN_USERNS },
522
523 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND,
524 MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */
525
526 { "/proc/sys/net", "/proc/sys/net", NULL, NULL, MS_BIND,
527 MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO|MOUNT_APPLY_APIVFS_NETNS }, /* (except for this) */
528
529 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
530 MOUNT_FATAL|MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* ... then, make it r/o */
531
532 /* Make these files inaccessible to container payloads: they potentially leak information about kernel
533 * internals or the host's execution environment to the container */
534 PROC_INACCESSIBLE("/proc/kallsyms"),
535 PROC_INACCESSIBLE("/proc/kcore"),
536 PROC_INACCESSIBLE("/proc/keys"),
537 PROC_INACCESSIBLE("/proc/sysrq-trigger"),
538 PROC_INACCESSIBLE("/proc/timer_list"),
539
540 /* Make these directories read-only to container payloads: they show hardware information, and in some
541 * cases contain tunables the container really shouldn't have access to. */
542 PROC_READ_ONLY("/proc/acpi"),
543 PROC_READ_ONLY("/proc/apm"),
544 PROC_READ_ONLY("/proc/asound"),
545 PROC_READ_ONLY("/proc/bus"),
546 PROC_READ_ONLY("/proc/fs"),
547 PROC_READ_ONLY("/proc/irq"),
548 PROC_READ_ONLY("/proc/scsi"),
549
550 /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing) */
551 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
552 MOUNT_FATAL },
03d0f4b5 553 { "tmpfs", "/sys", "tmpfs", "mode=555", MS_NOSUID|MS_NOEXEC|MS_NODEV,
d4b653c5
LP
554 MOUNT_FATAL|MOUNT_APPLY_APIVFS_NETNS },
555 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,
556 MOUNT_FATAL|MOUNT_APPLY_APIVFS_RO }, /* skipped if above was mounted */
557 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
558 MOUNT_FATAL }, /* skipped if above was mounted */
559 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
560 MOUNT_FATAL },
561 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
562 MOUNT_FATAL },
563 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
564 MOUNT_FATAL },
565
349cc4a5 566#if HAVE_SELINUX
d4b653c5
LP
567 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,
568 0 }, /* Bind mount first */
569 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT,
570 0 }, /* Then, make it r/o */
e83bebef
LP
571#endif
572 };
573
d4b653c5 574 _cleanup_(unlink_and_freep) char *inaccessible = NULL;
4f086aab
SU
575 bool use_userns = (mount_settings & MOUNT_USE_USERNS);
576 bool netns = (mount_settings & MOUNT_APPLY_APIVFS_NETNS);
577 bool ro = (mount_settings & MOUNT_APPLY_APIVFS_RO);
578 bool in_userns = (mount_settings & MOUNT_IN_USERNS);
d4b653c5 579 size_t k;
88614c8a 580 int r;
e83bebef
LP
581
582 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
583 _cleanup_free_ char *where = NULL, *options = NULL;
d4b653c5 584 const char *o, *what;
4f086aab
SU
585 bool fatal = (mount_table[k].mount_settings & MOUNT_FATAL);
586
587 if (in_userns != (bool)(mount_table[k].mount_settings & MOUNT_IN_USERNS))
588 continue;
e83bebef 589
4f086aab 590 if (!netns && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_NETNS))
d1678248
ILG
591 continue;
592
4f086aab 593 if (!ro && (bool)(mount_table[k].mount_settings & MOUNT_APPLY_APIVFS_RO))
e83bebef
LP
594 continue;
595
cb638b5e 596 r = chase_symlinks(mount_table[k].where, dest, CHASE_NONEXISTENT|CHASE_PREFIX_ROOT, &where);
8ce48cf0 597 if (r < 0)
ec57bd42 598 return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, mount_table[k].where);
e83bebef 599
d4b653c5
LP
600 if (mount_table[k].mount_settings & MOUNT_INACCESSIBLE_REG) {
601
602 if (!inaccessible) {
603 _cleanup_free_ char *np = NULL;
604
605 r = tempfn_random_child(NULL, "inaccessible", &np);
606 if (r < 0)
607 return log_error_errno(r, "Failed to generate inaccessible file node path: %m");
608
609 r = touch_file(np, false, USEC_INFINITY, UID_INVALID, GID_INVALID, 0000);
610 if (r < 0)
611 return log_error_errno(r, "Failed to create inaccessible file node '%s': %m", np);
612
613 inaccessible = TAKE_PTR(np);
614 }
615
616 what = inaccessible;
617 } else
618 what = mount_table[k].what;
619
8ce48cf0 620 r = path_is_mount_point(where, NULL, 0);
e83bebef
LP
621 if (r < 0 && r != -ENOENT)
622 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
623
624 /* Skip this entry if it is not a remount. */
d4b653c5 625 if (what && r > 0)
e83bebef
LP
626 continue;
627
9c0fad5f 628 r = mkdir_userns_p(dest, where, 0755, (use_userns && !in_userns) ? uid_shift : UID_INVALID);
920a7899 629 if (r < 0 && r != -EEXIST) {
4f13e534 630 if (fatal && r != -EROFS)
e83bebef
LP
631 return log_error_errno(r, "Failed to create directory %s: %m", where);
632
201b13c8 633 log_debug_errno(r, "Failed to create directory %s: %m", where);
4f13e534
LT
634 /* If we failed mkdir() or chown() due to the root
635 * directory being read only, attempt to mount this fs
636 * anyway and let mount_verbose log any errors */
637 if (r != -EROFS)
638 continue;
e83bebef
LP
639 }
640
641 o = mount_table[k].options;
642 if (streq_ptr(mount_table[k].type, "tmpfs")) {
2fa017f1 643 r = tmpfs_patch_options(o, in_userns ? 0 : uid_shift, selinux_apifs_context, &options);
e83bebef
LP
644 if (r < 0)
645 return log_oom();
646 if (r > 0)
647 o = options;
648 }
649
4f086aab 650 r = mount_verbose(fatal ? LOG_ERR : LOG_DEBUG,
d4b653c5 651 what,
60e76d48
ZJS
652 where,
653 mount_table[k].type,
654 mount_table[k].flags,
655 o);
4f086aab 656 if (r < 0 && fatal)
60e76d48 657 return r;
e83bebef
LP
658 }
659
660 return 0;
661}
662
e83bebef 663static int mount_bind(const char *dest, CustomMount *m) {
68cf43c3 664
72d967df 665 _cleanup_free_ char *where = NULL;
68cf43c3 666 struct stat source_st, dest_st;
e83bebef
LP
667 int r;
668
86c0dd4a 669 assert(dest);
e83bebef
LP
670 assert(m);
671
e83bebef
LP
672 if (stat(m->source, &source_st) < 0)
673 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
674
cb638b5e 675 r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
68cf43c3 676 if (r < 0)
ec57bd42 677 return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
8ce48cf0
LP
678 if (r > 0) { /* Path exists already? */
679
680 if (stat(where, &dest_st) < 0)
681 return log_error_errno(errno, "Failed to stat %s: %m", where);
e83bebef 682
e83bebef
LP
683 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
684 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
685 return -EINVAL;
686 }
687
688 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
689 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
690 return -EINVAL;
691 }
692
8ce48cf0 693 } else { /* Path doesn't exist yet? */
e83bebef
LP
694 r = mkdir_parents_label(where, 0755);
695 if (r < 0)
696 return log_error_errno(r, "Failed to make parents of %s: %m", where);
b97e83cb
BN
697
698 /* Create the mount point. Any non-directory file can be
699 * mounted on any non-directory file (regular, fifo, socket,
700 * char, block).
701 */
702 if (S_ISDIR(source_st.st_mode))
703 r = mkdir_label(where, 0755);
704 else
705 r = touch(where);
706 if (r < 0)
707 return log_error_errno(r, "Failed to create mount point %s: %m", where);
708
8ce48cf0 709 }
e83bebef 710
72d967df 711 r = mount_verbose(LOG_ERR, m->source, where, NULL, MS_BIND | MS_REC, m->options);
60e76d48
ZJS
712 if (r < 0)
713 return r;
e83bebef
LP
714
715 if (m->read_only) {
6b7c9f8b 716 r = bind_remount_recursive(where, true, NULL);
e83bebef
LP
717 if (r < 0)
718 return log_error_errno(r, "Read-only bind mount failed: %m");
719 }
720
721 return 0;
722}
723
724static int mount_tmpfs(
725 const char *dest,
726 CustomMount *m,
727 bool userns, uid_t uid_shift, uid_t uid_range,
728 const char *selinux_apifs_context) {
729
68cf43c3
LP
730 const char *options;
731 _cleanup_free_ char *buf = NULL, *where = NULL;
e83bebef
LP
732 int r;
733
734 assert(dest);
735 assert(m);
736
cb638b5e 737 r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
68cf43c3 738 if (r < 0)
ec57bd42 739 return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
8ce48cf0
LP
740 if (r == 0) { /* Doesn't exist yet? */
741 r = mkdir_p_label(where, 0755);
742 if (r < 0)
743 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
744 }
e83bebef 745
2fa017f1 746 r = tmpfs_patch_options(m->options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
e83bebef
LP
747 if (r < 0)
748 return log_oom();
749 options = r > 0 ? buf : m->options;
750
60e76d48 751 return mount_verbose(LOG_ERR, "tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options);
e83bebef
LP
752}
753
86c0dd4a 754static char *joined_and_escaped_lower_dirs(char **lower) {
e83bebef
LP
755 _cleanup_strv_free_ char **sv = NULL;
756
757 sv = strv_copy(lower);
758 if (!sv)
759 return NULL;
760
761 strv_reverse(sv);
762
763 if (!strv_shell_escape(sv, ",:"))
764 return NULL;
765
766 return strv_join(sv, ":");
767}
768
769static int mount_overlay(const char *dest, CustomMount *m) {
68cf43c3 770
86c0dd4a 771 _cleanup_free_ char *lower = NULL, *where = NULL, *escaped_source = NULL;
68cf43c3 772 const char *options;
e83bebef
LP
773 int r;
774
775 assert(dest);
776 assert(m);
777
cb638b5e 778 r = chase_symlinks(m->destination, dest, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &where);
68cf43c3 779 if (r < 0)
ec57bd42 780 return log_error_errno(r, "Failed to resolve %s/%s: %m", dest, m->destination);
8ce48cf0
LP
781 if (r == 0) { /* Doesn't exist yet? */
782 r = mkdir_label(where, 0755);
783 if (r < 0)
784 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
785 }
e83bebef
LP
786
787 (void) mkdir_p_label(m->source, 0755);
788
789 lower = joined_and_escaped_lower_dirs(m->lower);
790 if (!lower)
791 return log_oom();
792
86c0dd4a
LP
793 escaped_source = shell_escape(m->source, ",:");
794 if (!escaped_source)
795 return log_oom();
e83bebef 796
86c0dd4a 797 if (m->read_only)
e83bebef 798 options = strjoina("lowerdir=", escaped_source, ":", lower);
86c0dd4a
LP
799 else {
800 _cleanup_free_ char *escaped_work_dir = NULL;
e83bebef 801
e83bebef
LP
802 escaped_work_dir = shell_escape(m->work_dir, ",:");
803 if (!escaped_work_dir)
804 return log_oom();
805
806 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
807 }
808
60e76d48 809 return mount_verbose(LOG_ERR, "overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options);
e83bebef
LP
810}
811
812int mount_custom(
813 const char *dest,
88614c8a 814 CustomMount *mounts, size_t n,
e83bebef
LP
815 bool userns, uid_t uid_shift, uid_t uid_range,
816 const char *selinux_apifs_context) {
817
88614c8a 818 size_t i;
e83bebef
LP
819 int r;
820
821 assert(dest);
822
823 for (i = 0; i < n; i++) {
824 CustomMount *m = mounts + i;
825
826 switch (m->type) {
827
828 case CUSTOM_MOUNT_BIND:
829 r = mount_bind(dest, m);
830 break;
831
832 case CUSTOM_MOUNT_TMPFS:
833 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
834 break;
835
836 case CUSTOM_MOUNT_OVERLAY:
837 r = mount_overlay(dest, m);
838 break;
839
840 default:
841 assert_not_reached("Unknown custom mount type");
842 }
843
844 if (r < 0)
845 return r;
846 }
847
848 return 0;
849}
850
e83bebef
LP
851int setup_volatile_state(
852 const char *directory,
853 VolatileMode mode,
854 bool userns, uid_t uid_shift, uid_t uid_range,
855 const char *selinux_apifs_context) {
856
857 _cleanup_free_ char *buf = NULL;
858 const char *p, *options;
859 int r;
860
861 assert(directory);
862
863 if (mode != VOLATILE_STATE)
864 return 0;
865
866 /* --volatile=state means we simply overmount /var
867 with a tmpfs, and the rest read-only. */
868
6b7c9f8b 869 r = bind_remount_recursive(directory, true, NULL);
e83bebef
LP
870 if (r < 0)
871 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
872
873 p = prefix_roota(directory, "/var");
874 r = mkdir(p, 0755);
875 if (r < 0 && errno != EEXIST)
876 return log_error_errno(errno, "Failed to create %s: %m", directory);
877
878 options = "mode=755";
2fa017f1 879 r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
e83bebef
LP
880 if (r < 0)
881 return log_oom();
882 if (r > 0)
883 options = buf;
884
60e76d48 885 return mount_verbose(LOG_ERR, "tmpfs", p, "tmpfs", MS_STRICTATIME, options);
e83bebef
LP
886}
887
888int setup_volatile(
889 const char *directory,
890 VolatileMode mode,
891 bool userns, uid_t uid_shift, uid_t uid_range,
892 const char *selinux_apifs_context) {
893
894 bool tmpfs_mounted = false, bind_mounted = false;
895 char template[] = "/tmp/nspawn-volatile-XXXXXX";
896 _cleanup_free_ char *buf = NULL;
897 const char *f, *t, *options;
898 int r;
899
900 assert(directory);
901
902 if (mode != VOLATILE_YES)
903 return 0;
904
905 /* --volatile=yes means we mount a tmpfs to the root dir, and
906 the original /usr to use inside it, and that read-only. */
907
908 if (!mkdtemp(template))
909 return log_error_errno(errno, "Failed to create temporary directory: %m");
910
911 options = "mode=755";
2fa017f1 912 r = tmpfs_patch_options(options, uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &buf);
e83bebef
LP
913 if (r < 0)
914 return log_oom();
915 if (r > 0)
916 options = buf;
917
60e76d48
ZJS
918 r = mount_verbose(LOG_ERR, "tmpfs", template, "tmpfs", MS_STRICTATIME, options);
919 if (r < 0)
e83bebef 920 goto fail;
e83bebef
LP
921
922 tmpfs_mounted = true;
923
924 f = prefix_roota(directory, "/usr");
925 t = prefix_roota(template, "/usr");
926
927 r = mkdir(t, 0755);
928 if (r < 0 && errno != EEXIST) {
929 r = log_error_errno(errno, "Failed to create %s: %m", t);
930 goto fail;
931 }
932
60e76d48
ZJS
933 r = mount_verbose(LOG_ERR, f, t, NULL, MS_BIND|MS_REC, NULL);
934 if (r < 0)
e83bebef 935 goto fail;
e83bebef
LP
936
937 bind_mounted = true;
938
6b7c9f8b 939 r = bind_remount_recursive(t, true, NULL);
e83bebef
LP
940 if (r < 0) {
941 log_error_errno(r, "Failed to remount %s read-only: %m", t);
942 goto fail;
943 }
944
60e76d48
ZJS
945 r = mount_verbose(LOG_ERR, template, directory, NULL, MS_MOVE, NULL);
946 if (r < 0)
e83bebef 947 goto fail;
e83bebef
LP
948
949 (void) rmdir(template);
950
951 return 0;
952
953fail:
954 if (bind_mounted)
60e76d48 955 (void) umount_verbose(t);
e83bebef
LP
956
957 if (tmpfs_mounted)
60e76d48 958 (void) umount_verbose(template);
e83bebef
LP
959 (void) rmdir(template);
960 return r;
961}
b53ede69
PW
962
963/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
964int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
965 _cleanup_free_ char *root_new = NULL, *root_old = NULL;
966 const char *p = s;
967 int r;
968
969 assert(pivot_root_new);
970 assert(pivot_root_old);
971
972 r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
973 if (r < 0)
974 return r;
975 if (r == 0)
976 return -EINVAL;
977
978 if (isempty(p))
979 root_old = NULL;
980 else {
981 root_old = strdup(p);
982 if (!root_old)
983 return -ENOMEM;
984 }
985
986 if (!path_is_absolute(root_new))
987 return -EINVAL;
988 if (root_old && !path_is_absolute(root_old))
989 return -EINVAL;
990
991 free_and_replace(*pivot_root_new, root_new);
992 free_and_replace(*pivot_root_old, root_old);
993
994 return 0;
995}
996
997int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
998 _cleanup_free_ char *directory_pivot_root_new = NULL;
999 _cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
1000 char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX";
1001 bool remove_pivot_tmp = false;
1002 int r;
1003
1004 assert(directory);
1005
1006 if (!pivot_root_new)
1007 return 0;
1008
1009 /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
1010 * If pivot_root_old is NULL, the existing / disappears.
1011 * This requires a temporary directory, pivot_tmp, which is
1012 * not a child of either.
1013 *
1014 * This is typically used for OSTree-style containers, where
1015 * the root partition contains several sysroots which could be
1016 * run. Normally, one would be chosen by the bootloader and
1017 * pivoted to / by initramfs.
1018 *
1019 * For example, for an OSTree deployment, pivot_root_new
1020 * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
1021 * code doesn’t do the /var mount which OSTree expects: use
1022 * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
1023 *
1024 * So in the OSTree case, we’ll end up with something like:
1025 * - directory = /tmp/nspawn-root-123456
1026 * - pivot_root_new = /ostree/deploy/os/deploy/123abc
1027 * - pivot_root_old = /sysroot
1028 * - directory_pivot_root_new =
1029 * /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
1030 * - pivot_tmp = /tmp/nspawn-pivot-123456
1031 * - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
1032 *
1033 * Requires all file systems at directory and below to be mounted
1034 * MS_PRIVATE or MS_SLAVE so they can be moved.
1035 */
1036 directory_pivot_root_new = prefix_root(directory, pivot_root_new);
1037
1038 /* Remount directory_pivot_root_new to make it movable. */
1039 r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
1040 if (r < 0)
1041 goto done;
1042
1043 if (pivot_root_old) {
1044 if (!mkdtemp(pivot_tmp)) {
1045 r = log_error_errno(errno, "Failed to create temporary directory: %m");
1046 goto done;
1047 }
1048
1049 remove_pivot_tmp = true;
1050 pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old);
1051
1052 r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
1053 if (r < 0)
1054 goto done;
1055
1056 r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
1057 if (r < 0)
1058 goto done;
1059
1060 r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
1061 if (r < 0)
1062 goto done;
1063 } else {
1064 r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
1065 if (r < 0)
1066 goto done;
1067 }
1068
1069done:
1070 if (remove_pivot_tmp)
1071 (void) rmdir(pivot_tmp);
1072
1073 return r;
1074}