]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn-mount.c
nspawn: order includes
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
CommitLineData
e83bebef
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/mount.h>
23
24#include "util.h"
25#include "rm-rf.h"
26#include "strv.h"
27#include "path-util.h"
28#include "mkdir.h"
29#include "label.h"
30#include "set.h"
31#include "cgroup-util.h"
32
e83bebef
LP
33#include "nspawn-mount.h"
34
35CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
36 CustomMount *c, *ret;
37
38 assert(l);
39 assert(n);
40 assert(t >= 0);
41 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
42
43 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
44 if (!c)
45 return NULL;
46
47 *l = c;
48 ret = *l + *n;
49 (*n)++;
50
51 *ret = (CustomMount) { .type = t };
52
53 return ret;
54}
55
56void custom_mount_free_all(CustomMount *l, unsigned n) {
57 unsigned i;
58
59 for (i = 0; i < n; i++) {
60 CustomMount *m = l + i;
61
62 free(m->source);
63 free(m->destination);
64 free(m->options);
65
66 if (m->work_dir) {
67 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
68 free(m->work_dir);
69 }
70
71 strv_free(m->lower);
72 }
73
74 free(l);
75}
76
77int custom_mount_compare(const void *a, const void *b) {
78 const CustomMount *x = a, *y = b;
79 int r;
80
81 r = path_compare(x->destination, y->destination);
82 if (r != 0)
83 return r;
84
85 if (x->type < y->type)
86 return -1;
87 if (x->type > y->type)
88 return 1;
89
90 return 0;
91}
92
93int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
94 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
95 const char *p = s;
96 CustomMount *m;
97 int r;
98
99 assert(l);
100 assert(n);
101
102 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
103 if (r < 0)
104 return r;
105 if (r == 0)
106 return -EINVAL;
107
108 if (r == 1) {
109 destination = strdup(source);
110 if (!destination)
111 return -ENOMEM;
112 }
113
114 if (r == 2 && !isempty(p)) {
115 opts = strdup(p);
116 if (!opts)
117 return -ENOMEM;
118 }
119
120 if (!path_is_absolute(source))
121 return -EINVAL;
122
123 if (!path_is_absolute(destination))
124 return -EINVAL;
125
126 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
127 if (!m)
128 return log_oom();
129
130 m->source = source;
131 m->destination = destination;
132 m->read_only = read_only;
133 m->options = opts;
134
135 source = destination = opts = NULL;
136 return 0;
137}
138
139int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
140 _cleanup_free_ char *path = NULL, *opts = NULL;
141 const char *p = s;
142 CustomMount *m;
143 int r;
144
145 assert(l);
146 assert(n);
147 assert(s);
148
149 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
150 if (r < 0)
151 return r;
152 if (r == 0)
153 return -EINVAL;
154
155 if (isempty(p))
156 opts = strdup("mode=0755");
157 else
158 opts = strdup(p);
159 if (!opts)
160 return -ENOMEM;
161
162 if (!path_is_absolute(path))
163 return -EINVAL;
164
165 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
166 if (!m)
167 return -ENOMEM;
168
169 m->destination = path;
170 m->options = opts;
171
172 path = opts = NULL;
173 return 0;
174}
175
176static int tmpfs_patch_options(
177 const char *options,
178 bool userns, uid_t uid_shift, uid_t uid_range,
179 const char *selinux_apifs_context,
180 char **ret) {
181
182 char *buf = NULL;
183
184 if (userns && uid_shift != 0) {
185 assert(uid_shift != UID_INVALID);
186
187 if (options)
188 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
189 else
190 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
191 if (!buf)
192 return -ENOMEM;
193
194 options = buf;
195 }
196
197#ifdef HAVE_SELINUX
198 if (selinux_apifs_context) {
199 char *t;
200
201 if (options)
202 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
203 else
204 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
205 if (!t) {
206 free(buf);
207 return -ENOMEM;
208 }
209
210 free(buf);
211 buf = t;
212 }
213#endif
214
215 *ret = buf;
216 return !!buf;
217}
218
219int mount_all(const char *dest,
220 bool userns, uid_t uid_shift, uid_t uid_range,
221 const char *selinux_apifs_context) {
222
223 typedef struct MountPoint {
224 const char *what;
225 const char *where;
226 const char *type;
227 const char *options;
228 unsigned long flags;
229 bool fatal;
230 bool userns;
231 } MountPoint;
232
233 static const MountPoint mount_table[] = {
234 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
235 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */
236 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */
237 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
238 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false },
239 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
240 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
241 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false },
242#ifdef HAVE_SELINUX
243 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */
244 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */
245#endif
246 };
247
248 unsigned k;
249 int r;
250
251 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
252 _cleanup_free_ char *where = NULL, *options = NULL;
253 const char *o;
254
255 if (userns != mount_table[k].userns)
256 continue;
257
258 where = prefix_root(dest, mount_table[k].where);
259 if (!where)
260 return log_oom();
261
262 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
263 if (r < 0 && r != -ENOENT)
264 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
265
266 /* Skip this entry if it is not a remount. */
267 if (mount_table[k].what && r > 0)
268 continue;
269
270 r = mkdir_p(where, 0755);
271 if (r < 0) {
272 if (mount_table[k].fatal)
273 return log_error_errno(r, "Failed to create directory %s: %m", where);
274
275 log_warning_errno(r, "Failed to create directory %s: %m", where);
276 continue;
277 }
278
279 o = mount_table[k].options;
280 if (streq_ptr(mount_table[k].type, "tmpfs")) {
281 r = tmpfs_patch_options(o, userns, uid_shift, uid_range, selinux_apifs_context, &options);
282 if (r < 0)
283 return log_oom();
284 if (r > 0)
285 o = options;
286 }
287
288 if (mount(mount_table[k].what,
289 where,
290 mount_table[k].type,
291 mount_table[k].flags,
292 o) < 0) {
293
294 if (mount_table[k].fatal)
295 return log_error_errno(errno, "mount(%s) failed: %m", where);
296
297 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
298 }
299 }
300
301 return 0;
302}
303
304static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
305 const char *p = options;
306 unsigned long flags = *mount_flags;
307 char *opts = NULL;
308
309 assert(options);
310
311 for (;;) {
312 _cleanup_free_ char *word = NULL;
313 int r = extract_first_word(&p, &word, ",", 0);
314 if (r < 0)
315 return log_error_errno(r, "Failed to extract mount option: %m");
316 if (r == 0)
317 break;
318
319 if (streq(word, "rbind"))
320 flags |= MS_REC;
321 else if (streq(word, "norbind"))
322 flags &= ~MS_REC;
323 else {
324 log_error("Invalid bind mount option: %s", word);
325 return -EINVAL;
326 }
327 }
328
329 *mount_flags = flags;
330 /* in the future mount_opts will hold string options for mount(2) */
331 *mount_opts = opts;
332
333 return 0;
334}
335
336static int mount_bind(const char *dest, CustomMount *m) {
337 struct stat source_st, dest_st;
338 const char *where;
339 unsigned long mount_flags = MS_BIND | MS_REC;
340 _cleanup_free_ char *mount_opts = NULL;
341 int r;
342
343 assert(m);
344
345 if (m->options) {
346 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
347 if (r < 0)
348 return r;
349 }
350
351 if (stat(m->source, &source_st) < 0)
352 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
353
354 where = prefix_roota(dest, m->destination);
355
356 if (stat(where, &dest_st) >= 0) {
357 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
358 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
359 return -EINVAL;
360 }
361
362 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
363 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
364 return -EINVAL;
365 }
366
367 } else if (errno == ENOENT) {
368 r = mkdir_parents_label(where, 0755);
369 if (r < 0)
370 return log_error_errno(r, "Failed to make parents of %s: %m", where);
371 } else {
372 log_error_errno(errno, "Failed to stat %s: %m", where);
373 return -errno;
374 }
375
376 /* Create the mount point. Any non-directory file can be
377 * mounted on any non-directory file (regular, fifo, socket,
378 * char, block).
379 */
380 if (S_ISDIR(source_st.st_mode))
381 r = mkdir_label(where, 0755);
382 else
383 r = touch(where);
384 if (r < 0 && r != -EEXIST)
385 return log_error_errno(r, "Failed to create mount point %s: %m", where);
386
387 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
388 return log_error_errno(errno, "mount(%s) failed: %m", where);
389
390 if (m->read_only) {
391 r = bind_remount_recursive(where, true);
392 if (r < 0)
393 return log_error_errno(r, "Read-only bind mount failed: %m");
394 }
395
396 return 0;
397}
398
399static int mount_tmpfs(
400 const char *dest,
401 CustomMount *m,
402 bool userns, uid_t uid_shift, uid_t uid_range,
403 const char *selinux_apifs_context) {
404
405 const char *where, *options;
406 _cleanup_free_ char *buf = NULL;
407 int r;
408
409 assert(dest);
410 assert(m);
411
412 where = prefix_roota(dest, m->destination);
413
414 r = mkdir_p_label(where, 0755);
415 if (r < 0 && r != -EEXIST)
416 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
417
418 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
419 if (r < 0)
420 return log_oom();
421 options = r > 0 ? buf : m->options;
422
423 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
424 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
425
426 return 0;
427}
428
429static char *joined_and_escaped_lower_dirs(char * const *lower) {
430 _cleanup_strv_free_ char **sv = NULL;
431
432 sv = strv_copy(lower);
433 if (!sv)
434 return NULL;
435
436 strv_reverse(sv);
437
438 if (!strv_shell_escape(sv, ",:"))
439 return NULL;
440
441 return strv_join(sv, ":");
442}
443
444static int mount_overlay(const char *dest, CustomMount *m) {
445 _cleanup_free_ char *lower = NULL;
446 const char *where, *options;
447 int r;
448
449 assert(dest);
450 assert(m);
451
452 where = prefix_roota(dest, m->destination);
453
454 r = mkdir_label(where, 0755);
455 if (r < 0 && r != -EEXIST)
456 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
457
458 (void) mkdir_p_label(m->source, 0755);
459
460 lower = joined_and_escaped_lower_dirs(m->lower);
461 if (!lower)
462 return log_oom();
463
464 if (m->read_only) {
465 _cleanup_free_ char *escaped_source = NULL;
466
467 escaped_source = shell_escape(m->source, ",:");
468 if (!escaped_source)
469 return log_oom();
470
471 options = strjoina("lowerdir=", escaped_source, ":", lower);
472 } else {
473 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
474
475 assert(m->work_dir);
476 (void) mkdir_label(m->work_dir, 0700);
477
478 escaped_source = shell_escape(m->source, ",:");
479 if (!escaped_source)
480 return log_oom();
481 escaped_work_dir = shell_escape(m->work_dir, ",:");
482 if (!escaped_work_dir)
483 return log_oom();
484
485 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
486 }
487
488 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
489 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
490
491 return 0;
492}
493
494int mount_custom(
495 const char *dest,
496 CustomMount *mounts, unsigned n,
497 bool userns, uid_t uid_shift, uid_t uid_range,
498 const char *selinux_apifs_context) {
499
500 unsigned i;
501 int r;
502
503 assert(dest);
504
505 for (i = 0; i < n; i++) {
506 CustomMount *m = mounts + i;
507
508 switch (m->type) {
509
510 case CUSTOM_MOUNT_BIND:
511 r = mount_bind(dest, m);
512 break;
513
514 case CUSTOM_MOUNT_TMPFS:
515 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
516 break;
517
518 case CUSTOM_MOUNT_OVERLAY:
519 r = mount_overlay(dest, m);
520 break;
521
522 default:
523 assert_not_reached("Unknown custom mount type");
524 }
525
526 if (r < 0)
527 return r;
528 }
529
530 return 0;
531}
532
533static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
534 char *to;
535 int r;
536
ee30f6ac 537 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
e83bebef
LP
538
539 r = path_is_mount_point(to, 0);
540 if (r < 0 && r != -ENOENT)
541 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
542 if (r > 0)
543 return 0;
544
545 mkdir_p(to, 0755);
546
547 /* The superblock mount options of the mount point need to be
548 * identical to the hosts', and hence writable... */
549 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
550 return log_error_errno(errno, "Failed to mount to %s: %m", to);
551
552 /* ... hence let's only make the bind mount read-only, not the
553 * superblock. */
554 if (read_only) {
555 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
556 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
557 }
558 return 1;
559}
560
561static int mount_legacy_cgroups(
562 const char *dest,
563 bool userns, uid_t uid_shift, uid_t uid_range,
564 const char *selinux_apifs_context) {
565
566 _cleanup_set_free_free_ Set *controllers = NULL;
567 const char *cgroup_root;
568 int r;
569
570 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
571
572 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
573 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
574 if (r < 0)
575 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
576 if (r == 0) {
577 _cleanup_free_ char *options = NULL;
578
579 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
580 if (r < 0)
581 return log_oom();
582
583 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
584 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
585 }
586
587 if (cg_unified() > 0)
588 goto skip_controllers;
589
590 controllers = set_new(&string_hash_ops);
591 if (!controllers)
592 return log_oom();
593
594 r = cg_kernel_controllers(controllers);
595 if (r < 0)
596 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
597
598 for (;;) {
599 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
600
601 controller = set_steal_first(controllers);
602 if (!controller)
603 break;
604
605 origin = prefix_root("/sys/fs/cgroup/", controller);
606 if (!origin)
607 return log_oom();
608
609 r = readlink_malloc(origin, &combined);
610 if (r == -EINVAL) {
611 /* Not a symbolic link, but directly a single cgroup hierarchy */
612
613 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
614 if (r < 0)
615 return r;
616
617 } else if (r < 0)
618 return log_error_errno(r, "Failed to read link %s: %m", origin);
619 else {
620 _cleanup_free_ char *target = NULL;
621
622 target = prefix_root(dest, origin);
623 if (!target)
624 return log_oom();
625
626 /* A symbolic link, a combination of controllers in one hierarchy */
627
628 if (!filename_is_valid(combined)) {
629 log_warning("Ignoring invalid combined hierarchy %s.", combined);
630 continue;
631 }
632
633 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
634 if (r < 0)
635 return r;
636
637 r = symlink_idempotent(combined, target);
638 if (r == -EINVAL) {
639 log_error("Invalid existing symlink for combined hierarchy");
640 return r;
641 }
642 if (r < 0)
643 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
644 }
645 }
646
647skip_controllers:
648 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
649 if (r < 0)
650 return r;
651
652 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
653 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
654
655 return 0;
656}
657
658static int mount_unified_cgroups(const char *dest) {
659 const char *p;
660 int r;
661
662 assert(dest);
663
664 p = strjoina(dest, "/sys/fs/cgroup");
665
666 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
667 if (r < 0)
668 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
669 if (r > 0) {
670 p = strjoina(dest, "/sys/fs/cgroup/cgroup.procs");
671 if (access(p, F_OK) >= 0)
672 return 0;
673 if (errno != ENOENT)
674 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
675
676 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
677 return -EINVAL;
678 }
679
680 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
681 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
682
683 return 0;
684}
685
686int mount_cgroups(
687 const char *dest,
688 bool unified_requested,
689 bool userns, uid_t uid_shift, uid_t uid_range,
690 const char *selinux_apifs_context) {
691
692 if (unified_requested)
693 return mount_unified_cgroups(dest);
694 else
695 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
696}
697
698int mount_systemd_cgroup_writable(
699 const char *dest,
700 bool unified_requested) {
701
702 _cleanup_free_ char *own_cgroup_path = NULL;
703 const char *systemd_root, *systemd_own;
704 int r;
705
706 assert(dest);
707
708 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
709 if (r < 0)
710 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
711
712 /* If we are living in the top-level, then there's nothing to do... */
713 if (path_equal(own_cgroup_path, "/"))
714 return 0;
715
716 if (unified_requested) {
717 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
718 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
719 } else {
720 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
721 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
722 }
723
724 /* Make our own cgroup a (writable) bind mount */
725 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
726 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
727
728 /* And then remount the systemd cgroup root read-only */
729 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
730 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
731
732 return 0;
733}
734
735int setup_volatile_state(
736 const char *directory,
737 VolatileMode mode,
738 bool userns, uid_t uid_shift, uid_t uid_range,
739 const char *selinux_apifs_context) {
740
741 _cleanup_free_ char *buf = NULL;
742 const char *p, *options;
743 int r;
744
745 assert(directory);
746
747 if (mode != VOLATILE_STATE)
748 return 0;
749
750 /* --volatile=state means we simply overmount /var
751 with a tmpfs, and the rest read-only. */
752
753 r = bind_remount_recursive(directory, true);
754 if (r < 0)
755 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
756
757 p = prefix_roota(directory, "/var");
758 r = mkdir(p, 0755);
759 if (r < 0 && errno != EEXIST)
760 return log_error_errno(errno, "Failed to create %s: %m", directory);
761
762 options = "mode=755";
763 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
764 if (r < 0)
765 return log_oom();
766 if (r > 0)
767 options = buf;
768
769 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
770 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
771
772 return 0;
773}
774
775int setup_volatile(
776 const char *directory,
777 VolatileMode mode,
778 bool userns, uid_t uid_shift, uid_t uid_range,
779 const char *selinux_apifs_context) {
780
781 bool tmpfs_mounted = false, bind_mounted = false;
782 char template[] = "/tmp/nspawn-volatile-XXXXXX";
783 _cleanup_free_ char *buf = NULL;
784 const char *f, *t, *options;
785 int r;
786
787 assert(directory);
788
789 if (mode != VOLATILE_YES)
790 return 0;
791
792 /* --volatile=yes means we mount a tmpfs to the root dir, and
793 the original /usr to use inside it, and that read-only. */
794
795 if (!mkdtemp(template))
796 return log_error_errno(errno, "Failed to create temporary directory: %m");
797
798 options = "mode=755";
799 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
800 if (r < 0)
801 return log_oom();
802 if (r > 0)
803 options = buf;
804
805 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
806 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
807 goto fail;
808 }
809
810 tmpfs_mounted = true;
811
812 f = prefix_roota(directory, "/usr");
813 t = prefix_roota(template, "/usr");
814
815 r = mkdir(t, 0755);
816 if (r < 0 && errno != EEXIST) {
817 r = log_error_errno(errno, "Failed to create %s: %m", t);
818 goto fail;
819 }
820
821 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
822 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
823 goto fail;
824 }
825
826 bind_mounted = true;
827
828 r = bind_remount_recursive(t, true);
829 if (r < 0) {
830 log_error_errno(r, "Failed to remount %s read-only: %m", t);
831 goto fail;
832 }
833
834 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
835 r = log_error_errno(errno, "Failed to move root mount: %m");
836 goto fail;
837 }
838
839 (void) rmdir(template);
840
841 return 0;
842
843fail:
844 if (bind_mounted)
845 (void) umount(t);
846
847 if (tmpfs_mounted)
848 (void) umount(template);
849 (void) rmdir(template);
850 return r;
851}
852
853VolatileMode volatile_mode_from_string(const char *s) {
854 int b;
855
856 if (isempty(s))
857 return _VOLATILE_MODE_INVALID;
858
859 b = parse_boolean(s);
860 if (b > 0)
861 return VOLATILE_YES;
862 if (b == 0)
863 return VOLATILE_NO;
864
865 if (streq(s, "state"))
866 return VOLATILE_STATE;
867
868 return _VOLATILE_MODE_INVALID;
869}