]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-mount.c
nspawn: create /sys/fs/cgroup for unified hierarchy as well
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23
24 #include "util.h"
25 #include "rm-rf.h"
26 #include "strv.h"
27 #include "path-util.h"
28 #include "mkdir.h"
29 #include "label.h"
30 #include "set.h"
31 #include "cgroup-util.h"
32
33 #include "nspawn-mount.h"
34
35 CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
36 CustomMount *c, *ret;
37
38 assert(l);
39 assert(n);
40 assert(t >= 0);
41 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
42
43 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
44 if (!c)
45 return NULL;
46
47 *l = c;
48 ret = *l + *n;
49 (*n)++;
50
51 *ret = (CustomMount) { .type = t };
52
53 return ret;
54 }
55
56 void custom_mount_free_all(CustomMount *l, unsigned n) {
57 unsigned i;
58
59 for (i = 0; i < n; i++) {
60 CustomMount *m = l + i;
61
62 free(m->source);
63 free(m->destination);
64 free(m->options);
65
66 if (m->work_dir) {
67 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
68 free(m->work_dir);
69 }
70
71 strv_free(m->lower);
72 }
73
74 free(l);
75 }
76
77 int custom_mount_compare(const void *a, const void *b) {
78 const CustomMount *x = a, *y = b;
79 int r;
80
81 r = path_compare(x->destination, y->destination);
82 if (r != 0)
83 return r;
84
85 if (x->type < y->type)
86 return -1;
87 if (x->type > y->type)
88 return 1;
89
90 return 0;
91 }
92
93 int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
94 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
95 const char *p = s;
96 CustomMount *m;
97 int r;
98
99 assert(l);
100 assert(n);
101
102 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
103 if (r < 0)
104 return r;
105 if (r == 0)
106 return -EINVAL;
107
108 if (r == 1) {
109 destination = strdup(source);
110 if (!destination)
111 return -ENOMEM;
112 }
113
114 if (r == 2 && !isempty(p)) {
115 opts = strdup(p);
116 if (!opts)
117 return -ENOMEM;
118 }
119
120 if (!path_is_absolute(source))
121 return -EINVAL;
122
123 if (!path_is_absolute(destination))
124 return -EINVAL;
125
126 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
127 if (!m)
128 return log_oom();
129
130 m->source = source;
131 m->destination = destination;
132 m->read_only = read_only;
133 m->options = opts;
134
135 source = destination = opts = NULL;
136 return 0;
137 }
138
139 int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
140 _cleanup_free_ char *path = NULL, *opts = NULL;
141 const char *p = s;
142 CustomMount *m;
143 int r;
144
145 assert(l);
146 assert(n);
147 assert(s);
148
149 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
150 if (r < 0)
151 return r;
152 if (r == 0)
153 return -EINVAL;
154
155 if (isempty(p))
156 opts = strdup("mode=0755");
157 else
158 opts = strdup(p);
159 if (!opts)
160 return -ENOMEM;
161
162 if (!path_is_absolute(path))
163 return -EINVAL;
164
165 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
166 if (!m)
167 return -ENOMEM;
168
169 m->destination = path;
170 m->options = opts;
171
172 path = opts = NULL;
173 return 0;
174 }
175
176 static int tmpfs_patch_options(
177 const char *options,
178 bool userns, uid_t uid_shift, uid_t uid_range,
179 const char *selinux_apifs_context,
180 char **ret) {
181
182 char *buf = NULL;
183
184 if (userns && uid_shift != 0) {
185 assert(uid_shift != UID_INVALID);
186
187 if (options)
188 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
189 else
190 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
191 if (!buf)
192 return -ENOMEM;
193
194 options = buf;
195 }
196
197 #ifdef HAVE_SELINUX
198 if (selinux_apifs_context) {
199 char *t;
200
201 if (options)
202 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
203 else
204 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
205 if (!t) {
206 free(buf);
207 return -ENOMEM;
208 }
209
210 free(buf);
211 buf = t;
212 }
213 #endif
214
215 *ret = buf;
216 return !!buf;
217 }
218
219 int mount_sysfs(const char *dest) {
220 const char *full, *top, *x;
221
222 top = prefix_roota(dest, "/sys");
223 full = prefix_roota(top, "/full");
224
225 (void) mkdir(full, 0755);
226
227 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
228 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
229
230 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
231 _cleanup_free_ char *from = NULL, *to = NULL;
232
233 from = prefix_root(full, x);
234 if (!from)
235 return log_oom();
236
237 to = prefix_root(top, x);
238 if (!to)
239 return log_oom();
240
241 (void) mkdir(to, 0755);
242
243 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
244 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
245
246 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
247 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
248 }
249
250 if (umount(full) < 0)
251 return log_error_errno(errno, "Failed to unmount %s: %m", full);
252
253 if (rmdir(full) < 0)
254 return log_error_errno(errno, "Failed to remove %s: %m", full);
255
256 x = prefix_roota(top, "/fs/kdbus");
257 (void) mkdir(x, 0755);
258
259 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
260 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
261
262 return 0;
263 }
264
265 int mount_all(const char *dest,
266 bool use_userns, bool in_userns,
267 uid_t uid_shift, uid_t uid_range,
268 const char *selinux_apifs_context) {
269
270 typedef struct MountPoint {
271 const char *what;
272 const char *where;
273 const char *type;
274 const char *options;
275 unsigned long flags;
276 bool fatal;
277 bool userns;
278 } MountPoint;
279
280 static const MountPoint mount_table[] = {
281 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true },
282 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true }, /* Bind mount first */
283 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true }, /* Then, make it r/o */
284 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false },
285 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false },
286 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
287 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false },
288 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false },
289 #ifdef HAVE_SELINUX
290 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false }, /* Bind mount first */
291 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false }, /* Then, make it r/o */
292 #endif
293 };
294
295 unsigned k;
296 int r;
297
298 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
299 _cleanup_free_ char *where = NULL, *options = NULL;
300 const char *o;
301
302 if (in_userns != mount_table[k].userns)
303 continue;
304
305 where = prefix_root(dest, mount_table[k].where);
306 if (!where)
307 return log_oom();
308
309 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
310 if (r < 0 && r != -ENOENT)
311 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
312
313 /* Skip this entry if it is not a remount. */
314 if (mount_table[k].what && r > 0)
315 continue;
316
317 r = mkdir_p(where, 0755);
318 if (r < 0) {
319 if (mount_table[k].fatal)
320 return log_error_errno(r, "Failed to create directory %s: %m", where);
321
322 log_warning_errno(r, "Failed to create directory %s: %m", where);
323 continue;
324 }
325
326 o = mount_table[k].options;
327 if (streq_ptr(mount_table[k].type, "tmpfs")) {
328 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
329 if (r < 0)
330 return log_oom();
331 if (r > 0)
332 o = options;
333 }
334
335 if (mount(mount_table[k].what,
336 where,
337 mount_table[k].type,
338 mount_table[k].flags,
339 o) < 0) {
340
341 if (mount_table[k].fatal)
342 return log_error_errno(errno, "mount(%s) failed: %m", where);
343
344 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
345 }
346 }
347
348 return 0;
349 }
350
351 static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
352 const char *p = options;
353 unsigned long flags = *mount_flags;
354 char *opts = NULL;
355
356 assert(options);
357
358 for (;;) {
359 _cleanup_free_ char *word = NULL;
360 int r = extract_first_word(&p, &word, ",", 0);
361 if (r < 0)
362 return log_error_errno(r, "Failed to extract mount option: %m");
363 if (r == 0)
364 break;
365
366 if (streq(word, "rbind"))
367 flags |= MS_REC;
368 else if (streq(word, "norbind"))
369 flags &= ~MS_REC;
370 else {
371 log_error("Invalid bind mount option: %s", word);
372 return -EINVAL;
373 }
374 }
375
376 *mount_flags = flags;
377 /* in the future mount_opts will hold string options for mount(2) */
378 *mount_opts = opts;
379
380 return 0;
381 }
382
383 static int mount_bind(const char *dest, CustomMount *m) {
384 struct stat source_st, dest_st;
385 const char *where;
386 unsigned long mount_flags = MS_BIND | MS_REC;
387 _cleanup_free_ char *mount_opts = NULL;
388 int r;
389
390 assert(m);
391
392 if (m->options) {
393 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
394 if (r < 0)
395 return r;
396 }
397
398 if (stat(m->source, &source_st) < 0)
399 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
400
401 where = prefix_roota(dest, m->destination);
402
403 if (stat(where, &dest_st) >= 0) {
404 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
405 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
406 return -EINVAL;
407 }
408
409 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
410 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
411 return -EINVAL;
412 }
413
414 } else if (errno == ENOENT) {
415 r = mkdir_parents_label(where, 0755);
416 if (r < 0)
417 return log_error_errno(r, "Failed to make parents of %s: %m", where);
418 } else {
419 log_error_errno(errno, "Failed to stat %s: %m", where);
420 return -errno;
421 }
422
423 /* Create the mount point. Any non-directory file can be
424 * mounted on any non-directory file (regular, fifo, socket,
425 * char, block).
426 */
427 if (S_ISDIR(source_st.st_mode))
428 r = mkdir_label(where, 0755);
429 else
430 r = touch(where);
431 if (r < 0 && r != -EEXIST)
432 return log_error_errno(r, "Failed to create mount point %s: %m", where);
433
434 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
435 return log_error_errno(errno, "mount(%s) failed: %m", where);
436
437 if (m->read_only) {
438 r = bind_remount_recursive(where, true);
439 if (r < 0)
440 return log_error_errno(r, "Read-only bind mount failed: %m");
441 }
442
443 return 0;
444 }
445
446 static int mount_tmpfs(
447 const char *dest,
448 CustomMount *m,
449 bool userns, uid_t uid_shift, uid_t uid_range,
450 const char *selinux_apifs_context) {
451
452 const char *where, *options;
453 _cleanup_free_ char *buf = NULL;
454 int r;
455
456 assert(dest);
457 assert(m);
458
459 where = prefix_roota(dest, m->destination);
460
461 r = mkdir_p_label(where, 0755);
462 if (r < 0 && r != -EEXIST)
463 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
464
465 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
466 if (r < 0)
467 return log_oom();
468 options = r > 0 ? buf : m->options;
469
470 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
471 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
472
473 return 0;
474 }
475
476 static char *joined_and_escaped_lower_dirs(char * const *lower) {
477 _cleanup_strv_free_ char **sv = NULL;
478
479 sv = strv_copy(lower);
480 if (!sv)
481 return NULL;
482
483 strv_reverse(sv);
484
485 if (!strv_shell_escape(sv, ",:"))
486 return NULL;
487
488 return strv_join(sv, ":");
489 }
490
491 static int mount_overlay(const char *dest, CustomMount *m) {
492 _cleanup_free_ char *lower = NULL;
493 const char *where, *options;
494 int r;
495
496 assert(dest);
497 assert(m);
498
499 where = prefix_roota(dest, m->destination);
500
501 r = mkdir_label(where, 0755);
502 if (r < 0 && r != -EEXIST)
503 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
504
505 (void) mkdir_p_label(m->source, 0755);
506
507 lower = joined_and_escaped_lower_dirs(m->lower);
508 if (!lower)
509 return log_oom();
510
511 if (m->read_only) {
512 _cleanup_free_ char *escaped_source = NULL;
513
514 escaped_source = shell_escape(m->source, ",:");
515 if (!escaped_source)
516 return log_oom();
517
518 options = strjoina("lowerdir=", escaped_source, ":", lower);
519 } else {
520 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
521
522 assert(m->work_dir);
523 (void) mkdir_label(m->work_dir, 0700);
524
525 escaped_source = shell_escape(m->source, ",:");
526 if (!escaped_source)
527 return log_oom();
528 escaped_work_dir = shell_escape(m->work_dir, ",:");
529 if (!escaped_work_dir)
530 return log_oom();
531
532 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
533 }
534
535 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
536 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
537
538 return 0;
539 }
540
541 int mount_custom(
542 const char *dest,
543 CustomMount *mounts, unsigned n,
544 bool userns, uid_t uid_shift, uid_t uid_range,
545 const char *selinux_apifs_context) {
546
547 unsigned i;
548 int r;
549
550 assert(dest);
551
552 for (i = 0; i < n; i++) {
553 CustomMount *m = mounts + i;
554
555 switch (m->type) {
556
557 case CUSTOM_MOUNT_BIND:
558 r = mount_bind(dest, m);
559 break;
560
561 case CUSTOM_MOUNT_TMPFS:
562 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
563 break;
564
565 case CUSTOM_MOUNT_OVERLAY:
566 r = mount_overlay(dest, m);
567 break;
568
569 default:
570 assert_not_reached("Unknown custom mount type");
571 }
572
573 if (r < 0)
574 return r;
575 }
576
577 return 0;
578 }
579
580 static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
581 char *to;
582 int r;
583
584 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
585
586 r = path_is_mount_point(to, 0);
587 if (r < 0 && r != -ENOENT)
588 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
589 if (r > 0)
590 return 0;
591
592 mkdir_p(to, 0755);
593
594 /* The superblock mount options of the mount point need to be
595 * identical to the hosts', and hence writable... */
596 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
597 return log_error_errno(errno, "Failed to mount to %s: %m", to);
598
599 /* ... hence let's only make the bind mount read-only, not the
600 * superblock. */
601 if (read_only) {
602 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
603 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
604 }
605 return 1;
606 }
607
608 static int mount_legacy_cgroups(
609 const char *dest,
610 bool userns, uid_t uid_shift, uid_t uid_range,
611 const char *selinux_apifs_context) {
612
613 _cleanup_set_free_free_ Set *controllers = NULL;
614 const char *cgroup_root;
615 int r;
616
617 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
618
619 (void) mkdir_p(cgroup_root, 0755);
620
621 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
622 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
623 if (r < 0)
624 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
625 if (r == 0) {
626 _cleanup_free_ char *options = NULL;
627
628 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
629 if (r < 0)
630 return log_oom();
631
632 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
633 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
634 }
635
636 if (cg_unified() > 0)
637 goto skip_controllers;
638
639 controllers = set_new(&string_hash_ops);
640 if (!controllers)
641 return log_oom();
642
643 r = cg_kernel_controllers(controllers);
644 if (r < 0)
645 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
646
647 for (;;) {
648 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
649
650 controller = set_steal_first(controllers);
651 if (!controller)
652 break;
653
654 origin = prefix_root("/sys/fs/cgroup/", controller);
655 if (!origin)
656 return log_oom();
657
658 r = readlink_malloc(origin, &combined);
659 if (r == -EINVAL) {
660 /* Not a symbolic link, but directly a single cgroup hierarchy */
661
662 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
663 if (r < 0)
664 return r;
665
666 } else if (r < 0)
667 return log_error_errno(r, "Failed to read link %s: %m", origin);
668 else {
669 _cleanup_free_ char *target = NULL;
670
671 target = prefix_root(dest, origin);
672 if (!target)
673 return log_oom();
674
675 /* A symbolic link, a combination of controllers in one hierarchy */
676
677 if (!filename_is_valid(combined)) {
678 log_warning("Ignoring invalid combined hierarchy %s.", combined);
679 continue;
680 }
681
682 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
683 if (r < 0)
684 return r;
685
686 r = symlink_idempotent(combined, target);
687 if (r == -EINVAL) {
688 log_error("Invalid existing symlink for combined hierarchy");
689 return r;
690 }
691 if (r < 0)
692 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
693 }
694 }
695
696 skip_controllers:
697 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
698 if (r < 0)
699 return r;
700
701 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
702 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
703
704 return 0;
705 }
706
707 static int mount_unified_cgroups(const char *dest) {
708 const char *p;
709 int r;
710
711 assert(dest);
712
713 p = prefix_roota(dest, "/sys/fs/cgroup");
714
715 (void) mkdir_p(p, 0755);
716
717 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
718 if (r < 0)
719 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
720 if (r > 0) {
721 p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
722 if (access(p, F_OK) >= 0)
723 return 0;
724 if (errno != ENOENT)
725 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
726
727 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
728 return -EINVAL;
729 }
730
731 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
732 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
733
734 return 0;
735 }
736
737 int mount_cgroups(
738 const char *dest,
739 bool unified_requested,
740 bool userns, uid_t uid_shift, uid_t uid_range,
741 const char *selinux_apifs_context) {
742
743 if (unified_requested)
744 return mount_unified_cgroups(dest);
745 else
746 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
747 }
748
749 int mount_systemd_cgroup_writable(
750 const char *dest,
751 bool unified_requested) {
752
753 _cleanup_free_ char *own_cgroup_path = NULL;
754 const char *systemd_root, *systemd_own;
755 int r;
756
757 assert(dest);
758
759 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
760 if (r < 0)
761 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
762
763 /* If we are living in the top-level, then there's nothing to do... */
764 if (path_equal(own_cgroup_path, "/"))
765 return 0;
766
767 if (unified_requested) {
768 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
769 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
770 } else {
771 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
772 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
773 }
774
775 /* Make our own cgroup a (writable) bind mount */
776 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
777 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
778
779 /* And then remount the systemd cgroup root read-only */
780 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
781 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
782
783 return 0;
784 }
785
786 int setup_volatile_state(
787 const char *directory,
788 VolatileMode mode,
789 bool userns, uid_t uid_shift, uid_t uid_range,
790 const char *selinux_apifs_context) {
791
792 _cleanup_free_ char *buf = NULL;
793 const char *p, *options;
794 int r;
795
796 assert(directory);
797
798 if (mode != VOLATILE_STATE)
799 return 0;
800
801 /* --volatile=state means we simply overmount /var
802 with a tmpfs, and the rest read-only. */
803
804 r = bind_remount_recursive(directory, true);
805 if (r < 0)
806 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
807
808 p = prefix_roota(directory, "/var");
809 r = mkdir(p, 0755);
810 if (r < 0 && errno != EEXIST)
811 return log_error_errno(errno, "Failed to create %s: %m", directory);
812
813 options = "mode=755";
814 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
815 if (r < 0)
816 return log_oom();
817 if (r > 0)
818 options = buf;
819
820 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
821 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
822
823 return 0;
824 }
825
826 int setup_volatile(
827 const char *directory,
828 VolatileMode mode,
829 bool userns, uid_t uid_shift, uid_t uid_range,
830 const char *selinux_apifs_context) {
831
832 bool tmpfs_mounted = false, bind_mounted = false;
833 char template[] = "/tmp/nspawn-volatile-XXXXXX";
834 _cleanup_free_ char *buf = NULL;
835 const char *f, *t, *options;
836 int r;
837
838 assert(directory);
839
840 if (mode != VOLATILE_YES)
841 return 0;
842
843 /* --volatile=yes means we mount a tmpfs to the root dir, and
844 the original /usr to use inside it, and that read-only. */
845
846 if (!mkdtemp(template))
847 return log_error_errno(errno, "Failed to create temporary directory: %m");
848
849 options = "mode=755";
850 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
851 if (r < 0)
852 return log_oom();
853 if (r > 0)
854 options = buf;
855
856 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
857 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
858 goto fail;
859 }
860
861 tmpfs_mounted = true;
862
863 f = prefix_roota(directory, "/usr");
864 t = prefix_roota(template, "/usr");
865
866 r = mkdir(t, 0755);
867 if (r < 0 && errno != EEXIST) {
868 r = log_error_errno(errno, "Failed to create %s: %m", t);
869 goto fail;
870 }
871
872 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
873 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
874 goto fail;
875 }
876
877 bind_mounted = true;
878
879 r = bind_remount_recursive(t, true);
880 if (r < 0) {
881 log_error_errno(r, "Failed to remount %s read-only: %m", t);
882 goto fail;
883 }
884
885 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
886 r = log_error_errno(errno, "Failed to move root mount: %m");
887 goto fail;
888 }
889
890 (void) rmdir(template);
891
892 return 0;
893
894 fail:
895 if (bind_mounted)
896 (void) umount(t);
897
898 if (tmpfs_mounted)
899 (void) umount(template);
900 (void) rmdir(template);
901 return r;
902 }
903
904 VolatileMode volatile_mode_from_string(const char *s) {
905 int b;
906
907 if (isempty(s))
908 return _VOLATILE_MODE_INVALID;
909
910 b = parse_boolean(s);
911 if (b > 0)
912 return VOLATILE_YES;
913 if (b == 0)
914 return VOLATILE_NO;
915
916 if (streq(s, "state"))
917 return VOLATILE_STATE;
918
919 return _VOLATILE_MODE_INVALID;
920 }