]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-mount.c
Merge pull request #1428 from franciozzy/tagenhance
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23
24 #include "util.h"
25 #include "rm-rf.h"
26 #include "strv.h"
27 #include "path-util.h"
28 #include "mkdir.h"
29 #include "label.h"
30 #include "set.h"
31 #include "cgroup-util.h"
32
33 #include "nspawn-mount.h"
34
35 CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
36 CustomMount *c, *ret;
37
38 assert(l);
39 assert(n);
40 assert(t >= 0);
41 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
42
43 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
44 if (!c)
45 return NULL;
46
47 *l = c;
48 ret = *l + *n;
49 (*n)++;
50
51 *ret = (CustomMount) { .type = t };
52
53 return ret;
54 }
55
56 void custom_mount_free_all(CustomMount *l, unsigned n) {
57 unsigned i;
58
59 for (i = 0; i < n; i++) {
60 CustomMount *m = l + i;
61
62 free(m->source);
63 free(m->destination);
64 free(m->options);
65
66 if (m->work_dir) {
67 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
68 free(m->work_dir);
69 }
70
71 strv_free(m->lower);
72 }
73
74 free(l);
75 }
76
77 int custom_mount_compare(const void *a, const void *b) {
78 const CustomMount *x = a, *y = b;
79 int r;
80
81 r = path_compare(x->destination, y->destination);
82 if (r != 0)
83 return r;
84
85 if (x->type < y->type)
86 return -1;
87 if (x->type > y->type)
88 return 1;
89
90 return 0;
91 }
92
93 int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
94 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
95 const char *p = s;
96 CustomMount *m;
97 int r;
98
99 assert(l);
100 assert(n);
101
102 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
103 if (r < 0)
104 return r;
105 if (r == 0)
106 return -EINVAL;
107
108 if (r == 1) {
109 destination = strdup(source);
110 if (!destination)
111 return -ENOMEM;
112 }
113
114 if (r == 2 && !isempty(p)) {
115 opts = strdup(p);
116 if (!opts)
117 return -ENOMEM;
118 }
119
120 if (!path_is_absolute(source))
121 return -EINVAL;
122
123 if (!path_is_absolute(destination))
124 return -EINVAL;
125
126 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
127 if (!m)
128 return log_oom();
129
130 m->source = source;
131 m->destination = destination;
132 m->read_only = read_only;
133 m->options = opts;
134
135 source = destination = opts = NULL;
136 return 0;
137 }
138
139 int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
140 _cleanup_free_ char *path = NULL, *opts = NULL;
141 const char *p = s;
142 CustomMount *m;
143 int r;
144
145 assert(l);
146 assert(n);
147 assert(s);
148
149 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
150 if (r < 0)
151 return r;
152 if (r == 0)
153 return -EINVAL;
154
155 if (isempty(p))
156 opts = strdup("mode=0755");
157 else
158 opts = strdup(p);
159 if (!opts)
160 return -ENOMEM;
161
162 if (!path_is_absolute(path))
163 return -EINVAL;
164
165 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
166 if (!m)
167 return -ENOMEM;
168
169 m->destination = path;
170 m->options = opts;
171
172 path = opts = NULL;
173 return 0;
174 }
175
176 static int tmpfs_patch_options(
177 const char *options,
178 bool userns, uid_t uid_shift, uid_t uid_range,
179 const char *selinux_apifs_context,
180 char **ret) {
181
182 char *buf = NULL;
183
184 if (userns && uid_shift != 0) {
185 assert(uid_shift != UID_INVALID);
186
187 if (options)
188 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
189 else
190 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
191 if (!buf)
192 return -ENOMEM;
193
194 options = buf;
195 }
196
197 #ifdef HAVE_SELINUX
198 if (selinux_apifs_context) {
199 char *t;
200
201 if (options)
202 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
203 else
204 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
205 if (!t) {
206 free(buf);
207 return -ENOMEM;
208 }
209
210 free(buf);
211 buf = t;
212 }
213 #endif
214
215 *ret = buf;
216 return !!buf;
217 }
218
219 int mount_sysfs(const char *dest) {
220 const char *full, *top, *x;
221
222 top = prefix_roota(dest, "/sys");
223 full = prefix_roota(top, "/full");
224
225 (void) mkdir(full, 0755);
226
227 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
228 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
229
230 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
231 _cleanup_free_ char *from = NULL, *to = NULL;
232
233 from = prefix_root(full, x);
234 if (!from)
235 return log_oom();
236
237 to = prefix_root(top, x);
238 if (!to)
239 return log_oom();
240
241 (void) mkdir(to, 0755);
242
243 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
244 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
245
246 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
247 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
248 }
249
250 if (umount(full) < 0)
251 return log_error_errno(errno, "Failed to unmount %s: %m", full);
252
253 if (rmdir(full) < 0)
254 return log_error_errno(errno, "Failed to remove %s: %m", full);
255
256 x = prefix_roota(top, "/fs/kdbus");
257 (void) mkdir(x, 0755);
258
259 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
260 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
261
262 return 0;
263 }
264
265 int mount_all(const char *dest,
266 bool use_userns, bool in_userns,
267 uid_t uid_shift, uid_t uid_range,
268 const char *selinux_apifs_context) {
269
270 typedef struct MountPoint {
271 const char *what;
272 const char *where;
273 const char *type;
274 const char *options;
275 unsigned long flags;
276 bool fatal;
277 bool userns;
278 } MountPoint;
279
280 static const MountPoint mount_table[] = {
281 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
282 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */
283 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */
284 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
285 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false },
286 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
287 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
288 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false },
289 #ifdef HAVE_SELINUX
290 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */
291 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */
292 #endif
293 };
294
295 unsigned k;
296 int r;
297
298 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
299 _cleanup_free_ char *where = NULL, *options = NULL;
300 const char *o;
301
302 if (in_userns != mount_table[k].userns)
303 continue;
304
305 where = prefix_root(dest, mount_table[k].where);
306 if (!where)
307 return log_oom();
308
309 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
310 if (r < 0 && r != -ENOENT)
311 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
312
313 /* Skip this entry if it is not a remount. */
314 if (mount_table[k].what && r > 0)
315 continue;
316
317 r = mkdir_p(where, 0755);
318 if (r < 0) {
319 if (mount_table[k].fatal)
320 return log_error_errno(r, "Failed to create directory %s: %m", where);
321
322 log_warning_errno(r, "Failed to create directory %s: %m", where);
323 continue;
324 }
325
326 o = mount_table[k].options;
327 if (streq_ptr(mount_table[k].type, "tmpfs")) {
328 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
329 if (r < 0)
330 return log_oom();
331 if (r > 0)
332 o = options;
333 }
334
335 if (mount(mount_table[k].what,
336 where,
337 mount_table[k].type,
338 mount_table[k].flags,
339 o) < 0) {
340
341 if (mount_table[k].fatal)
342 return log_error_errno(errno, "mount(%s) failed: %m", where);
343
344 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
345 }
346 }
347
348 return 0;
349 }
350
351 static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
352 const char *p = options;
353 unsigned long flags = *mount_flags;
354 char *opts = NULL;
355
356 assert(options);
357
358 for (;;) {
359 _cleanup_free_ char *word = NULL;
360 int r = extract_first_word(&p, &word, ",", 0);
361 if (r < 0)
362 return log_error_errno(r, "Failed to extract mount option: %m");
363 if (r == 0)
364 break;
365
366 if (streq(word, "rbind"))
367 flags |= MS_REC;
368 else if (streq(word, "norbind"))
369 flags &= ~MS_REC;
370 else {
371 log_error("Invalid bind mount option: %s", word);
372 return -EINVAL;
373 }
374 }
375
376 *mount_flags = flags;
377 /* in the future mount_opts will hold string options for mount(2) */
378 *mount_opts = opts;
379
380 return 0;
381 }
382
383 static int mount_bind(const char *dest, CustomMount *m) {
384 struct stat source_st, dest_st;
385 const char *where;
386 unsigned long mount_flags = MS_BIND | MS_REC;
387 _cleanup_free_ char *mount_opts = NULL;
388 int r;
389
390 assert(m);
391
392 if (m->options) {
393 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
394 if (r < 0)
395 return r;
396 }
397
398 if (stat(m->source, &source_st) < 0)
399 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
400
401 where = prefix_roota(dest, m->destination);
402
403 if (stat(where, &dest_st) >= 0) {
404 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
405 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
406 return -EINVAL;
407 }
408
409 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
410 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
411 return -EINVAL;
412 }
413
414 } else if (errno == ENOENT) {
415 r = mkdir_parents_label(where, 0755);
416 if (r < 0)
417 return log_error_errno(r, "Failed to make parents of %s: %m", where);
418 } else {
419 log_error_errno(errno, "Failed to stat %s: %m", where);
420 return -errno;
421 }
422
423 /* Create the mount point. Any non-directory file can be
424 * mounted on any non-directory file (regular, fifo, socket,
425 * char, block).
426 */
427 if (S_ISDIR(source_st.st_mode))
428 r = mkdir_label(where, 0755);
429 else
430 r = touch(where);
431 if (r < 0 && r != -EEXIST)
432 return log_error_errno(r, "Failed to create mount point %s: %m", where);
433
434 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
435 return log_error_errno(errno, "mount(%s) failed: %m", where);
436
437 if (m->read_only) {
438 r = bind_remount_recursive(where, true);
439 if (r < 0)
440 return log_error_errno(r, "Read-only bind mount failed: %m");
441 }
442
443 return 0;
444 }
445
446 static int mount_tmpfs(
447 const char *dest,
448 CustomMount *m,
449 bool userns, uid_t uid_shift, uid_t uid_range,
450 const char *selinux_apifs_context) {
451
452 const char *where, *options;
453 _cleanup_free_ char *buf = NULL;
454 int r;
455
456 assert(dest);
457 assert(m);
458
459 where = prefix_roota(dest, m->destination);
460
461 r = mkdir_p_label(where, 0755);
462 if (r < 0 && r != -EEXIST)
463 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
464
465 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
466 if (r < 0)
467 return log_oom();
468 options = r > 0 ? buf : m->options;
469
470 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
471 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
472
473 return 0;
474 }
475
476 static char *joined_and_escaped_lower_dirs(char * const *lower) {
477 _cleanup_strv_free_ char **sv = NULL;
478
479 sv = strv_copy(lower);
480 if (!sv)
481 return NULL;
482
483 strv_reverse(sv);
484
485 if (!strv_shell_escape(sv, ",:"))
486 return NULL;
487
488 return strv_join(sv, ":");
489 }
490
491 static int mount_overlay(const char *dest, CustomMount *m) {
492 _cleanup_free_ char *lower = NULL;
493 const char *where, *options;
494 int r;
495
496 assert(dest);
497 assert(m);
498
499 where = prefix_roota(dest, m->destination);
500
501 r = mkdir_label(where, 0755);
502 if (r < 0 && r != -EEXIST)
503 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
504
505 (void) mkdir_p_label(m->source, 0755);
506
507 lower = joined_and_escaped_lower_dirs(m->lower);
508 if (!lower)
509 return log_oom();
510
511 if (m->read_only) {
512 _cleanup_free_ char *escaped_source = NULL;
513
514 escaped_source = shell_escape(m->source, ",:");
515 if (!escaped_source)
516 return log_oom();
517
518 options = strjoina("lowerdir=", escaped_source, ":", lower);
519 } else {
520 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
521
522 assert(m->work_dir);
523 (void) mkdir_label(m->work_dir, 0700);
524
525 escaped_source = shell_escape(m->source, ",:");
526 if (!escaped_source)
527 return log_oom();
528 escaped_work_dir = shell_escape(m->work_dir, ",:");
529 if (!escaped_work_dir)
530 return log_oom();
531
532 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
533 }
534
535 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
536 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
537
538 return 0;
539 }
540
541 int mount_custom(
542 const char *dest,
543 CustomMount *mounts, unsigned n,
544 bool userns, uid_t uid_shift, uid_t uid_range,
545 const char *selinux_apifs_context) {
546
547 unsigned i;
548 int r;
549
550 assert(dest);
551
552 for (i = 0; i < n; i++) {
553 CustomMount *m = mounts + i;
554
555 switch (m->type) {
556
557 case CUSTOM_MOUNT_BIND:
558 r = mount_bind(dest, m);
559 break;
560
561 case CUSTOM_MOUNT_TMPFS:
562 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
563 break;
564
565 case CUSTOM_MOUNT_OVERLAY:
566 r = mount_overlay(dest, m);
567 break;
568
569 default:
570 assert_not_reached("Unknown custom mount type");
571 }
572
573 if (r < 0)
574 return r;
575 }
576
577 return 0;
578 }
579
580 static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
581 char *to;
582 int r;
583
584 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
585
586 r = path_is_mount_point(to, 0);
587 if (r < 0 && r != -ENOENT)
588 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
589 if (r > 0)
590 return 0;
591
592 mkdir_p(to, 0755);
593
594 /* The superblock mount options of the mount point need to be
595 * identical to the hosts', and hence writable... */
596 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
597 return log_error_errno(errno, "Failed to mount to %s: %m", to);
598
599 /* ... hence let's only make the bind mount read-only, not the
600 * superblock. */
601 if (read_only) {
602 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
603 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
604 }
605 return 1;
606 }
607
608 static int mount_legacy_cgroups(
609 const char *dest,
610 bool userns, uid_t uid_shift, uid_t uid_range,
611 const char *selinux_apifs_context) {
612
613 _cleanup_set_free_free_ Set *controllers = NULL;
614 const char *cgroup_root;
615 int r;
616
617 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
618
619 (void) mkdir_p(cgroup_root, 0755);
620
621 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
622 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
623 if (r < 0)
624 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
625 if (r == 0) {
626 _cleanup_free_ char *options = NULL;
627
628 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
629 if (r < 0)
630 return log_oom();
631
632 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
633 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
634 }
635
636 if (cg_unified() > 0)
637 goto skip_controllers;
638
639 controllers = set_new(&string_hash_ops);
640 if (!controllers)
641 return log_oom();
642
643 r = cg_kernel_controllers(controllers);
644 if (r < 0)
645 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
646
647 for (;;) {
648 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
649
650 controller = set_steal_first(controllers);
651 if (!controller)
652 break;
653
654 origin = prefix_root("/sys/fs/cgroup/", controller);
655 if (!origin)
656 return log_oom();
657
658 r = readlink_malloc(origin, &combined);
659 if (r == -EINVAL) {
660 /* Not a symbolic link, but directly a single cgroup hierarchy */
661
662 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
663 if (r < 0)
664 return r;
665
666 } else if (r < 0)
667 return log_error_errno(r, "Failed to read link %s: %m", origin);
668 else {
669 _cleanup_free_ char *target = NULL;
670
671 target = prefix_root(dest, origin);
672 if (!target)
673 return log_oom();
674
675 /* A symbolic link, a combination of controllers in one hierarchy */
676
677 if (!filename_is_valid(combined)) {
678 log_warning("Ignoring invalid combined hierarchy %s.", combined);
679 continue;
680 }
681
682 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
683 if (r < 0)
684 return r;
685
686 r = symlink_idempotent(combined, target);
687 if (r == -EINVAL) {
688 log_error("Invalid existing symlink for combined hierarchy");
689 return r;
690 }
691 if (r < 0)
692 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
693 }
694 }
695
696 skip_controllers:
697 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
698 if (r < 0)
699 return r;
700
701 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
702 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
703
704 return 0;
705 }
706
707 static int mount_unified_cgroups(const char *dest) {
708 const char *p;
709 int r;
710
711 assert(dest);
712
713 p = strjoina(dest, "/sys/fs/cgroup");
714
715 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
716 if (r < 0)
717 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
718 if (r > 0) {
719 p = strjoina(dest, "/sys/fs/cgroup/cgroup.procs");
720 if (access(p, F_OK) >= 0)
721 return 0;
722 if (errno != ENOENT)
723 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
724
725 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
726 return -EINVAL;
727 }
728
729 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
730 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
731
732 return 0;
733 }
734
735 int mount_cgroups(
736 const char *dest,
737 bool unified_requested,
738 bool userns, uid_t uid_shift, uid_t uid_range,
739 const char *selinux_apifs_context) {
740
741 if (unified_requested)
742 return mount_unified_cgroups(dest);
743 else
744 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
745 }
746
747 int mount_systemd_cgroup_writable(
748 const char *dest,
749 bool unified_requested) {
750
751 _cleanup_free_ char *own_cgroup_path = NULL;
752 const char *systemd_root, *systemd_own;
753 int r;
754
755 assert(dest);
756
757 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
758 if (r < 0)
759 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
760
761 /* If we are living in the top-level, then there's nothing to do... */
762 if (path_equal(own_cgroup_path, "/"))
763 return 0;
764
765 if (unified_requested) {
766 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
767 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
768 } else {
769 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
770 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
771 }
772
773 /* Make our own cgroup a (writable) bind mount */
774 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
775 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
776
777 /* And then remount the systemd cgroup root read-only */
778 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
779 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
780
781 return 0;
782 }
783
784 int setup_volatile_state(
785 const char *directory,
786 VolatileMode mode,
787 bool userns, uid_t uid_shift, uid_t uid_range,
788 const char *selinux_apifs_context) {
789
790 _cleanup_free_ char *buf = NULL;
791 const char *p, *options;
792 int r;
793
794 assert(directory);
795
796 if (mode != VOLATILE_STATE)
797 return 0;
798
799 /* --volatile=state means we simply overmount /var
800 with a tmpfs, and the rest read-only. */
801
802 r = bind_remount_recursive(directory, true);
803 if (r < 0)
804 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
805
806 p = prefix_roota(directory, "/var");
807 r = mkdir(p, 0755);
808 if (r < 0 && errno != EEXIST)
809 return log_error_errno(errno, "Failed to create %s: %m", directory);
810
811 options = "mode=755";
812 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
813 if (r < 0)
814 return log_oom();
815 if (r > 0)
816 options = buf;
817
818 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
819 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
820
821 return 0;
822 }
823
824 int setup_volatile(
825 const char *directory,
826 VolatileMode mode,
827 bool userns, uid_t uid_shift, uid_t uid_range,
828 const char *selinux_apifs_context) {
829
830 bool tmpfs_mounted = false, bind_mounted = false;
831 char template[] = "/tmp/nspawn-volatile-XXXXXX";
832 _cleanup_free_ char *buf = NULL;
833 const char *f, *t, *options;
834 int r;
835
836 assert(directory);
837
838 if (mode != VOLATILE_YES)
839 return 0;
840
841 /* --volatile=yes means we mount a tmpfs to the root dir, and
842 the original /usr to use inside it, and that read-only. */
843
844 if (!mkdtemp(template))
845 return log_error_errno(errno, "Failed to create temporary directory: %m");
846
847 options = "mode=755";
848 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
849 if (r < 0)
850 return log_oom();
851 if (r > 0)
852 options = buf;
853
854 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
855 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
856 goto fail;
857 }
858
859 tmpfs_mounted = true;
860
861 f = prefix_roota(directory, "/usr");
862 t = prefix_roota(template, "/usr");
863
864 r = mkdir(t, 0755);
865 if (r < 0 && errno != EEXIST) {
866 r = log_error_errno(errno, "Failed to create %s: %m", t);
867 goto fail;
868 }
869
870 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
871 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
872 goto fail;
873 }
874
875 bind_mounted = true;
876
877 r = bind_remount_recursive(t, true);
878 if (r < 0) {
879 log_error_errno(r, "Failed to remount %s read-only: %m", t);
880 goto fail;
881 }
882
883 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
884 r = log_error_errno(errno, "Failed to move root mount: %m");
885 goto fail;
886 }
887
888 (void) rmdir(template);
889
890 return 0;
891
892 fail:
893 if (bind_mounted)
894 (void) umount(t);
895
896 if (tmpfs_mounted)
897 (void) umount(template);
898 (void) rmdir(template);
899 return r;
900 }
901
902 VolatileMode volatile_mode_from_string(const char *s) {
903 int b;
904
905 if (isempty(s))
906 return _VOLATILE_MODE_INVALID;
907
908 b = parse_boolean(s);
909 if (b > 0)
910 return VOLATILE_YES;
911 if (b == 0)
912 return VOLATILE_NO;
913
914 if (streq(s, "state"))
915 return VOLATILE_STATE;
916
917 return _VOLATILE_MODE_INVALID;
918 }