]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-mount.c
nspawn: split out mount related functions into a new nspawn-mount.c file
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23
24 #include "util.h"
25 #include "rm-rf.h"
26 #include "strv.h"
27 #include "path-util.h"
28 #include "mkdir.h"
29 #include "label.h"
30 #include "set.h"
31 #include "cgroup-util.h"
32
33 #include "nspawn.h"
34 #include "nspawn-mount.h"
35
36 CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
37 CustomMount *c, *ret;
38
39 assert(l);
40 assert(n);
41 assert(t >= 0);
42 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
43
44 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
45 if (!c)
46 return NULL;
47
48 *l = c;
49 ret = *l + *n;
50 (*n)++;
51
52 *ret = (CustomMount) { .type = t };
53
54 return ret;
55 }
56
57 void custom_mount_free_all(CustomMount *l, unsigned n) {
58 unsigned i;
59
60 for (i = 0; i < n; i++) {
61 CustomMount *m = l + i;
62
63 free(m->source);
64 free(m->destination);
65 free(m->options);
66
67 if (m->work_dir) {
68 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
69 free(m->work_dir);
70 }
71
72 strv_free(m->lower);
73 }
74
75 free(l);
76 }
77
78 int custom_mount_compare(const void *a, const void *b) {
79 const CustomMount *x = a, *y = b;
80 int r;
81
82 r = path_compare(x->destination, y->destination);
83 if (r != 0)
84 return r;
85
86 if (x->type < y->type)
87 return -1;
88 if (x->type > y->type)
89 return 1;
90
91 return 0;
92 }
93
94 int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
95 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
96 const char *p = s;
97 CustomMount *m;
98 int r;
99
100 assert(l);
101 assert(n);
102
103 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
104 if (r < 0)
105 return r;
106 if (r == 0)
107 return -EINVAL;
108
109 if (r == 1) {
110 destination = strdup(source);
111 if (!destination)
112 return -ENOMEM;
113 }
114
115 if (r == 2 && !isempty(p)) {
116 opts = strdup(p);
117 if (!opts)
118 return -ENOMEM;
119 }
120
121 if (!path_is_absolute(source))
122 return -EINVAL;
123
124 if (!path_is_absolute(destination))
125 return -EINVAL;
126
127 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
128 if (!m)
129 return log_oom();
130
131 m->source = source;
132 m->destination = destination;
133 m->read_only = read_only;
134 m->options = opts;
135
136 source = destination = opts = NULL;
137 return 0;
138 }
139
140 int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
141 _cleanup_free_ char *path = NULL, *opts = NULL;
142 const char *p = s;
143 CustomMount *m;
144 int r;
145
146 assert(l);
147 assert(n);
148 assert(s);
149
150 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
151 if (r < 0)
152 return r;
153 if (r == 0)
154 return -EINVAL;
155
156 if (isempty(p))
157 opts = strdup("mode=0755");
158 else
159 opts = strdup(p);
160 if (!opts)
161 return -ENOMEM;
162
163 if (!path_is_absolute(path))
164 return -EINVAL;
165
166 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
167 if (!m)
168 return -ENOMEM;
169
170 m->destination = path;
171 m->options = opts;
172
173 path = opts = NULL;
174 return 0;
175 }
176
177 static int tmpfs_patch_options(
178 const char *options,
179 bool userns, uid_t uid_shift, uid_t uid_range,
180 const char *selinux_apifs_context,
181 char **ret) {
182
183 char *buf = NULL;
184
185 if (userns && uid_shift != 0) {
186 assert(uid_shift != UID_INVALID);
187
188 if (options)
189 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
190 else
191 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
192 if (!buf)
193 return -ENOMEM;
194
195 options = buf;
196 }
197
198 #ifdef HAVE_SELINUX
199 if (selinux_apifs_context) {
200 char *t;
201
202 if (options)
203 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
204 else
205 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
206 if (!t) {
207 free(buf);
208 return -ENOMEM;
209 }
210
211 free(buf);
212 buf = t;
213 }
214 #endif
215
216 *ret = buf;
217 return !!buf;
218 }
219
220 int mount_all(const char *dest,
221 bool userns, uid_t uid_shift, uid_t uid_range,
222 const char *selinux_apifs_context) {
223
224 typedef struct MountPoint {
225 const char *what;
226 const char *where;
227 const char *type;
228 const char *options;
229 unsigned long flags;
230 bool fatal;
231 bool userns;
232 } MountPoint;
233
234 static const MountPoint mount_table[] = {
235 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
236 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */
237 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */
238 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
239 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false },
240 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
241 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
242 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false },
243 #ifdef HAVE_SELINUX
244 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */
245 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */
246 #endif
247 };
248
249 unsigned k;
250 int r;
251
252 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
253 _cleanup_free_ char *where = NULL, *options = NULL;
254 const char *o;
255
256 if (userns != mount_table[k].userns)
257 continue;
258
259 where = prefix_root(dest, mount_table[k].where);
260 if (!where)
261 return log_oom();
262
263 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
264 if (r < 0 && r != -ENOENT)
265 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
266
267 /* Skip this entry if it is not a remount. */
268 if (mount_table[k].what && r > 0)
269 continue;
270
271 r = mkdir_p(where, 0755);
272 if (r < 0) {
273 if (mount_table[k].fatal)
274 return log_error_errno(r, "Failed to create directory %s: %m", where);
275
276 log_warning_errno(r, "Failed to create directory %s: %m", where);
277 continue;
278 }
279
280 o = mount_table[k].options;
281 if (streq_ptr(mount_table[k].type, "tmpfs")) {
282 r = tmpfs_patch_options(o, userns, uid_shift, uid_range, selinux_apifs_context, &options);
283 if (r < 0)
284 return log_oom();
285 if (r > 0)
286 o = options;
287 }
288
289 if (mount(mount_table[k].what,
290 where,
291 mount_table[k].type,
292 mount_table[k].flags,
293 o) < 0) {
294
295 if (mount_table[k].fatal)
296 return log_error_errno(errno, "mount(%s) failed: %m", where);
297
298 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
299 }
300 }
301
302 return 0;
303 }
304
305 static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
306 const char *p = options;
307 unsigned long flags = *mount_flags;
308 char *opts = NULL;
309
310 assert(options);
311
312 for (;;) {
313 _cleanup_free_ char *word = NULL;
314 int r = extract_first_word(&p, &word, ",", 0);
315 if (r < 0)
316 return log_error_errno(r, "Failed to extract mount option: %m");
317 if (r == 0)
318 break;
319
320 if (streq(word, "rbind"))
321 flags |= MS_REC;
322 else if (streq(word, "norbind"))
323 flags &= ~MS_REC;
324 else {
325 log_error("Invalid bind mount option: %s", word);
326 return -EINVAL;
327 }
328 }
329
330 *mount_flags = flags;
331 /* in the future mount_opts will hold string options for mount(2) */
332 *mount_opts = opts;
333
334 return 0;
335 }
336
337 static int mount_bind(const char *dest, CustomMount *m) {
338 struct stat source_st, dest_st;
339 const char *where;
340 unsigned long mount_flags = MS_BIND | MS_REC;
341 _cleanup_free_ char *mount_opts = NULL;
342 int r;
343
344 assert(m);
345
346 if (m->options) {
347 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
348 if (r < 0)
349 return r;
350 }
351
352 if (stat(m->source, &source_st) < 0)
353 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
354
355 where = prefix_roota(dest, m->destination);
356
357 if (stat(where, &dest_st) >= 0) {
358 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
359 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
360 return -EINVAL;
361 }
362
363 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
364 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
365 return -EINVAL;
366 }
367
368 } else if (errno == ENOENT) {
369 r = mkdir_parents_label(where, 0755);
370 if (r < 0)
371 return log_error_errno(r, "Failed to make parents of %s: %m", where);
372 } else {
373 log_error_errno(errno, "Failed to stat %s: %m", where);
374 return -errno;
375 }
376
377 /* Create the mount point. Any non-directory file can be
378 * mounted on any non-directory file (regular, fifo, socket,
379 * char, block).
380 */
381 if (S_ISDIR(source_st.st_mode))
382 r = mkdir_label(where, 0755);
383 else
384 r = touch(where);
385 if (r < 0 && r != -EEXIST)
386 return log_error_errno(r, "Failed to create mount point %s: %m", where);
387
388 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
389 return log_error_errno(errno, "mount(%s) failed: %m", where);
390
391 if (m->read_only) {
392 r = bind_remount_recursive(where, true);
393 if (r < 0)
394 return log_error_errno(r, "Read-only bind mount failed: %m");
395 }
396
397 return 0;
398 }
399
400 static int mount_tmpfs(
401 const char *dest,
402 CustomMount *m,
403 bool userns, uid_t uid_shift, uid_t uid_range,
404 const char *selinux_apifs_context) {
405
406 const char *where, *options;
407 _cleanup_free_ char *buf = NULL;
408 int r;
409
410 assert(dest);
411 assert(m);
412
413 where = prefix_roota(dest, m->destination);
414
415 r = mkdir_p_label(where, 0755);
416 if (r < 0 && r != -EEXIST)
417 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
418
419 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
420 if (r < 0)
421 return log_oom();
422 options = r > 0 ? buf : m->options;
423
424 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
425 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
426
427 return 0;
428 }
429
430 static char *joined_and_escaped_lower_dirs(char * const *lower) {
431 _cleanup_strv_free_ char **sv = NULL;
432
433 sv = strv_copy(lower);
434 if (!sv)
435 return NULL;
436
437 strv_reverse(sv);
438
439 if (!strv_shell_escape(sv, ",:"))
440 return NULL;
441
442 return strv_join(sv, ":");
443 }
444
445 static int mount_overlay(const char *dest, CustomMount *m) {
446 _cleanup_free_ char *lower = NULL;
447 const char *where, *options;
448 int r;
449
450 assert(dest);
451 assert(m);
452
453 where = prefix_roota(dest, m->destination);
454
455 r = mkdir_label(where, 0755);
456 if (r < 0 && r != -EEXIST)
457 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
458
459 (void) mkdir_p_label(m->source, 0755);
460
461 lower = joined_and_escaped_lower_dirs(m->lower);
462 if (!lower)
463 return log_oom();
464
465 if (m->read_only) {
466 _cleanup_free_ char *escaped_source = NULL;
467
468 escaped_source = shell_escape(m->source, ",:");
469 if (!escaped_source)
470 return log_oom();
471
472 options = strjoina("lowerdir=", escaped_source, ":", lower);
473 } else {
474 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
475
476 assert(m->work_dir);
477 (void) mkdir_label(m->work_dir, 0700);
478
479 escaped_source = shell_escape(m->source, ",:");
480 if (!escaped_source)
481 return log_oom();
482 escaped_work_dir = shell_escape(m->work_dir, ",:");
483 if (!escaped_work_dir)
484 return log_oom();
485
486 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
487 }
488
489 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
490 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
491
492 return 0;
493 }
494
495 int mount_custom(
496 const char *dest,
497 CustomMount *mounts, unsigned n,
498 bool userns, uid_t uid_shift, uid_t uid_range,
499 const char *selinux_apifs_context) {
500
501 unsigned i;
502 int r;
503
504 assert(dest);
505
506 for (i = 0; i < n; i++) {
507 CustomMount *m = mounts + i;
508
509 switch (m->type) {
510
511 case CUSTOM_MOUNT_BIND:
512 r = mount_bind(dest, m);
513 break;
514
515 case CUSTOM_MOUNT_TMPFS:
516 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
517 break;
518
519 case CUSTOM_MOUNT_OVERLAY:
520 r = mount_overlay(dest, m);
521 break;
522
523 default:
524 assert_not_reached("Unknown custom mount type");
525 }
526
527 if (r < 0)
528 return r;
529 }
530
531 return 0;
532 }
533
534 static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
535 char *to;
536 int r;
537
538 to = strjoina(dest, "/sys/fs/cgroup/", hierarchy);
539
540 r = path_is_mount_point(to, 0);
541 if (r < 0 && r != -ENOENT)
542 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
543 if (r > 0)
544 return 0;
545
546 mkdir_p(to, 0755);
547
548 /* The superblock mount options of the mount point need to be
549 * identical to the hosts', and hence writable... */
550 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
551 return log_error_errno(errno, "Failed to mount to %s: %m", to);
552
553 /* ... hence let's only make the bind mount read-only, not the
554 * superblock. */
555 if (read_only) {
556 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
557 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
558 }
559 return 1;
560 }
561
562 static int mount_legacy_cgroups(
563 const char *dest,
564 bool userns, uid_t uid_shift, uid_t uid_range,
565 const char *selinux_apifs_context) {
566
567 _cleanup_set_free_free_ Set *controllers = NULL;
568 const char *cgroup_root;
569 int r;
570
571 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
572
573 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
574 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
575 if (r < 0)
576 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
577 if (r == 0) {
578 _cleanup_free_ char *options = NULL;
579
580 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
581 if (r < 0)
582 return log_oom();
583
584 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
585 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
586 }
587
588 if (cg_unified() > 0)
589 goto skip_controllers;
590
591 controllers = set_new(&string_hash_ops);
592 if (!controllers)
593 return log_oom();
594
595 r = cg_kernel_controllers(controllers);
596 if (r < 0)
597 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
598
599 for (;;) {
600 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
601
602 controller = set_steal_first(controllers);
603 if (!controller)
604 break;
605
606 origin = prefix_root("/sys/fs/cgroup/", controller);
607 if (!origin)
608 return log_oom();
609
610 r = readlink_malloc(origin, &combined);
611 if (r == -EINVAL) {
612 /* Not a symbolic link, but directly a single cgroup hierarchy */
613
614 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
615 if (r < 0)
616 return r;
617
618 } else if (r < 0)
619 return log_error_errno(r, "Failed to read link %s: %m", origin);
620 else {
621 _cleanup_free_ char *target = NULL;
622
623 target = prefix_root(dest, origin);
624 if (!target)
625 return log_oom();
626
627 /* A symbolic link, a combination of controllers in one hierarchy */
628
629 if (!filename_is_valid(combined)) {
630 log_warning("Ignoring invalid combined hierarchy %s.", combined);
631 continue;
632 }
633
634 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
635 if (r < 0)
636 return r;
637
638 r = symlink_idempotent(combined, target);
639 if (r == -EINVAL) {
640 log_error("Invalid existing symlink for combined hierarchy");
641 return r;
642 }
643 if (r < 0)
644 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
645 }
646 }
647
648 skip_controllers:
649 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
650 if (r < 0)
651 return r;
652
653 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
654 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
655
656 return 0;
657 }
658
659 static int mount_unified_cgroups(const char *dest) {
660 const char *p;
661 int r;
662
663 assert(dest);
664
665 p = strjoina(dest, "/sys/fs/cgroup");
666
667 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
668 if (r < 0)
669 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
670 if (r > 0) {
671 p = strjoina(dest, "/sys/fs/cgroup/cgroup.procs");
672 if (access(p, F_OK) >= 0)
673 return 0;
674 if (errno != ENOENT)
675 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
676
677 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
678 return -EINVAL;
679 }
680
681 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
682 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
683
684 return 0;
685 }
686
687 int mount_cgroups(
688 const char *dest,
689 bool unified_requested,
690 bool userns, uid_t uid_shift, uid_t uid_range,
691 const char *selinux_apifs_context) {
692
693 if (unified_requested)
694 return mount_unified_cgroups(dest);
695 else
696 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
697 }
698
699 int mount_systemd_cgroup_writable(
700 const char *dest,
701 bool unified_requested) {
702
703 _cleanup_free_ char *own_cgroup_path = NULL;
704 const char *systemd_root, *systemd_own;
705 int r;
706
707 assert(dest);
708
709 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
710 if (r < 0)
711 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
712
713 /* If we are living in the top-level, then there's nothing to do... */
714 if (path_equal(own_cgroup_path, "/"))
715 return 0;
716
717 if (unified_requested) {
718 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
719 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
720 } else {
721 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
722 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
723 }
724
725 /* Make our own cgroup a (writable) bind mount */
726 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
727 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
728
729 /* And then remount the systemd cgroup root read-only */
730 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
731 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
732
733 return 0;
734 }
735
736 int setup_volatile_state(
737 const char *directory,
738 VolatileMode mode,
739 bool userns, uid_t uid_shift, uid_t uid_range,
740 const char *selinux_apifs_context) {
741
742 _cleanup_free_ char *buf = NULL;
743 const char *p, *options;
744 int r;
745
746 assert(directory);
747
748 if (mode != VOLATILE_STATE)
749 return 0;
750
751 /* --volatile=state means we simply overmount /var
752 with a tmpfs, and the rest read-only. */
753
754 r = bind_remount_recursive(directory, true);
755 if (r < 0)
756 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
757
758 p = prefix_roota(directory, "/var");
759 r = mkdir(p, 0755);
760 if (r < 0 && errno != EEXIST)
761 return log_error_errno(errno, "Failed to create %s: %m", directory);
762
763 options = "mode=755";
764 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
765 if (r < 0)
766 return log_oom();
767 if (r > 0)
768 options = buf;
769
770 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
771 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
772
773 return 0;
774 }
775
776 int setup_volatile(
777 const char *directory,
778 VolatileMode mode,
779 bool userns, uid_t uid_shift, uid_t uid_range,
780 const char *selinux_apifs_context) {
781
782 bool tmpfs_mounted = false, bind_mounted = false;
783 char template[] = "/tmp/nspawn-volatile-XXXXXX";
784 _cleanup_free_ char *buf = NULL;
785 const char *f, *t, *options;
786 int r;
787
788 assert(directory);
789
790 if (mode != VOLATILE_YES)
791 return 0;
792
793 /* --volatile=yes means we mount a tmpfs to the root dir, and
794 the original /usr to use inside it, and that read-only. */
795
796 if (!mkdtemp(template))
797 return log_error_errno(errno, "Failed to create temporary directory: %m");
798
799 options = "mode=755";
800 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
801 if (r < 0)
802 return log_oom();
803 if (r > 0)
804 options = buf;
805
806 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
807 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
808 goto fail;
809 }
810
811 tmpfs_mounted = true;
812
813 f = prefix_roota(directory, "/usr");
814 t = prefix_roota(template, "/usr");
815
816 r = mkdir(t, 0755);
817 if (r < 0 && errno != EEXIST) {
818 r = log_error_errno(errno, "Failed to create %s: %m", t);
819 goto fail;
820 }
821
822 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
823 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
824 goto fail;
825 }
826
827 bind_mounted = true;
828
829 r = bind_remount_recursive(t, true);
830 if (r < 0) {
831 log_error_errno(r, "Failed to remount %s read-only: %m", t);
832 goto fail;
833 }
834
835 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
836 r = log_error_errno(errno, "Failed to move root mount: %m");
837 goto fail;
838 }
839
840 (void) rmdir(template);
841
842 return 0;
843
844 fail:
845 if (bind_mounted)
846 (void) umount(t);
847
848 if (tmpfs_mounted)
849 (void) umount(template);
850 (void) rmdir(template);
851 return r;
852 }
853
854 VolatileMode volatile_mode_from_string(const char *s) {
855 int b;
856
857 if (isempty(s))
858 return _VOLATILE_MODE_INVALID;
859
860 b = parse_boolean(s);
861 if (b > 0)
862 return VOLATILE_YES;
863 if (b == 0)
864 return VOLATILE_NO;
865
866 if (streq(s, "state"))
867 return VOLATILE_STATE;
868
869 return _VOLATILE_MODE_INVALID;
870 }