]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-mount.c
Merge pull request #1653 from keszybz/lz4-compress-time
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <linux/magic.h>
24
25 #include "util.h"
26 #include "rm-rf.h"
27 #include "strv.h"
28 #include "path-util.h"
29 #include "mkdir.h"
30 #include "label.h"
31 #include "set.h"
32 #include "cgroup-util.h"
33
34 #include "nspawn-mount.h"
35
36 CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
37 CustomMount *c, *ret;
38
39 assert(l);
40 assert(n);
41 assert(t >= 0);
42 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
43
44 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
45 if (!c)
46 return NULL;
47
48 *l = c;
49 ret = *l + *n;
50 (*n)++;
51
52 *ret = (CustomMount) { .type = t };
53
54 return ret;
55 }
56
57 void custom_mount_free_all(CustomMount *l, unsigned n) {
58 unsigned i;
59
60 for (i = 0; i < n; i++) {
61 CustomMount *m = l + i;
62
63 free(m->source);
64 free(m->destination);
65 free(m->options);
66
67 if (m->work_dir) {
68 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
69 free(m->work_dir);
70 }
71
72 strv_free(m->lower);
73 }
74
75 free(l);
76 }
77
78 int custom_mount_compare(const void *a, const void *b) {
79 const CustomMount *x = a, *y = b;
80 int r;
81
82 r = path_compare(x->destination, y->destination);
83 if (r != 0)
84 return r;
85
86 if (x->type < y->type)
87 return -1;
88 if (x->type > y->type)
89 return 1;
90
91 return 0;
92 }
93
94 int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
95 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
96 const char *p = s;
97 CustomMount *m;
98 int r;
99
100 assert(l);
101 assert(n);
102
103 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
104 if (r < 0)
105 return r;
106 if (r == 0)
107 return -EINVAL;
108
109 if (r == 1) {
110 destination = strdup(source);
111 if (!destination)
112 return -ENOMEM;
113 }
114
115 if (r == 2 && !isempty(p)) {
116 opts = strdup(p);
117 if (!opts)
118 return -ENOMEM;
119 }
120
121 if (!path_is_absolute(source))
122 return -EINVAL;
123
124 if (!path_is_absolute(destination))
125 return -EINVAL;
126
127 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
128 if (!m)
129 return log_oom();
130
131 m->source = source;
132 m->destination = destination;
133 m->read_only = read_only;
134 m->options = opts;
135
136 source = destination = opts = NULL;
137 return 0;
138 }
139
140 int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
141 _cleanup_free_ char *path = NULL, *opts = NULL;
142 const char *p = s;
143 CustomMount *m;
144 int r;
145
146 assert(l);
147 assert(n);
148 assert(s);
149
150 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
151 if (r < 0)
152 return r;
153 if (r == 0)
154 return -EINVAL;
155
156 if (isempty(p))
157 opts = strdup("mode=0755");
158 else
159 opts = strdup(p);
160 if (!opts)
161 return -ENOMEM;
162
163 if (!path_is_absolute(path))
164 return -EINVAL;
165
166 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
167 if (!m)
168 return -ENOMEM;
169
170 m->destination = path;
171 m->options = opts;
172
173 path = opts = NULL;
174 return 0;
175 }
176
177 static int tmpfs_patch_options(
178 const char *options,
179 bool userns, uid_t uid_shift, uid_t uid_range,
180 const char *selinux_apifs_context,
181 char **ret) {
182
183 char *buf = NULL;
184
185 if (userns && uid_shift != 0) {
186 assert(uid_shift != UID_INVALID);
187
188 if (options)
189 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
190 else
191 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
192 if (!buf)
193 return -ENOMEM;
194
195 options = buf;
196 }
197
198 #ifdef HAVE_SELINUX
199 if (selinux_apifs_context) {
200 char *t;
201
202 if (options)
203 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
204 else
205 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
206 if (!t) {
207 free(buf);
208 return -ENOMEM;
209 }
210
211 free(buf);
212 buf = t;
213 }
214 #endif
215
216 *ret = buf;
217 return !!buf;
218 }
219
220 int mount_sysfs(const char *dest) {
221 const char *full, *top, *x;
222 int r;
223
224 top = prefix_roota(dest, "/sys");
225 r = path_check_fstype(top, SYSFS_MAGIC);
226 if (r < 0)
227 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
228 /* /sys might already be mounted as sysfs by the outer child in the
229 * !netns case. In this case, it's all good. Don't touch it because we
230 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
231 */
232 if (r > 0)
233 return 0;
234
235 full = prefix_roota(top, "/full");
236
237 (void) mkdir(full, 0755);
238
239 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
240 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
241
242 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
243 _cleanup_free_ char *from = NULL, *to = NULL;
244
245 from = prefix_root(full, x);
246 if (!from)
247 return log_oom();
248
249 to = prefix_root(top, x);
250 if (!to)
251 return log_oom();
252
253 (void) mkdir(to, 0755);
254
255 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
256 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
257
258 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
259 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
260 }
261
262 if (umount(full) < 0)
263 return log_error_errno(errno, "Failed to unmount %s: %m", full);
264
265 if (rmdir(full) < 0)
266 return log_error_errno(errno, "Failed to remove %s: %m", full);
267
268 x = prefix_roota(top, "/fs/kdbus");
269 (void) mkdir(x, 0755);
270
271 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
272 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
273
274 return 0;
275 }
276
277 int mount_all(const char *dest,
278 bool use_userns, bool in_userns,
279 bool use_netns,
280 uid_t uid_shift, uid_t uid_range,
281 const char *selinux_apifs_context) {
282
283 typedef struct MountPoint {
284 const char *what;
285 const char *where;
286 const char *type;
287 const char *options;
288 unsigned long flags;
289 bool fatal;
290 bool in_userns;
291 bool use_netns;
292 } MountPoint;
293
294 static const MountPoint mount_table[] = {
295 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
296 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
297 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
298 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
299 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
300 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
301 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
302 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
303 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
304 #ifdef HAVE_SELINUX
305 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
306 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
307 #endif
308 };
309
310 unsigned k;
311 int r;
312
313 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
314 _cleanup_free_ char *where = NULL, *options = NULL;
315 const char *o;
316
317 if (in_userns != mount_table[k].in_userns)
318 continue;
319
320 if (!use_netns && mount_table[k].use_netns)
321 continue;
322
323 where = prefix_root(dest, mount_table[k].where);
324 if (!where)
325 return log_oom();
326
327 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
328 if (r < 0 && r != -ENOENT)
329 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
330
331 /* Skip this entry if it is not a remount. */
332 if (mount_table[k].what && r > 0)
333 continue;
334
335 r = mkdir_p(where, 0755);
336 if (r < 0) {
337 if (mount_table[k].fatal)
338 return log_error_errno(r, "Failed to create directory %s: %m", where);
339
340 log_warning_errno(r, "Failed to create directory %s: %m", where);
341 continue;
342 }
343
344 o = mount_table[k].options;
345 if (streq_ptr(mount_table[k].type, "tmpfs")) {
346 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
347 if (r < 0)
348 return log_oom();
349 if (r > 0)
350 o = options;
351 }
352
353 if (mount(mount_table[k].what,
354 where,
355 mount_table[k].type,
356 mount_table[k].flags,
357 o) < 0) {
358
359 if (mount_table[k].fatal)
360 return log_error_errno(errno, "mount(%s) failed: %m", where);
361
362 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
363 }
364 }
365
366 return 0;
367 }
368
369 static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
370 const char *p = options;
371 unsigned long flags = *mount_flags;
372 char *opts = NULL;
373
374 assert(options);
375
376 for (;;) {
377 _cleanup_free_ char *word = NULL;
378 int r = extract_first_word(&p, &word, ",", 0);
379 if (r < 0)
380 return log_error_errno(r, "Failed to extract mount option: %m");
381 if (r == 0)
382 break;
383
384 if (streq(word, "rbind"))
385 flags |= MS_REC;
386 else if (streq(word, "norbind"))
387 flags &= ~MS_REC;
388 else {
389 log_error("Invalid bind mount option: %s", word);
390 return -EINVAL;
391 }
392 }
393
394 *mount_flags = flags;
395 /* in the future mount_opts will hold string options for mount(2) */
396 *mount_opts = opts;
397
398 return 0;
399 }
400
401 static int mount_bind(const char *dest, CustomMount *m) {
402 struct stat source_st, dest_st;
403 const char *where;
404 unsigned long mount_flags = MS_BIND | MS_REC;
405 _cleanup_free_ char *mount_opts = NULL;
406 int r;
407
408 assert(m);
409
410 if (m->options) {
411 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
412 if (r < 0)
413 return r;
414 }
415
416 if (stat(m->source, &source_st) < 0)
417 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
418
419 where = prefix_roota(dest, m->destination);
420
421 if (stat(where, &dest_st) >= 0) {
422 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
423 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
424 return -EINVAL;
425 }
426
427 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
428 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
429 return -EINVAL;
430 }
431
432 } else if (errno == ENOENT) {
433 r = mkdir_parents_label(where, 0755);
434 if (r < 0)
435 return log_error_errno(r, "Failed to make parents of %s: %m", where);
436 } else {
437 log_error_errno(errno, "Failed to stat %s: %m", where);
438 return -errno;
439 }
440
441 /* Create the mount point. Any non-directory file can be
442 * mounted on any non-directory file (regular, fifo, socket,
443 * char, block).
444 */
445 if (S_ISDIR(source_st.st_mode))
446 r = mkdir_label(where, 0755);
447 else
448 r = touch(where);
449 if (r < 0 && r != -EEXIST)
450 return log_error_errno(r, "Failed to create mount point %s: %m", where);
451
452 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
453 return log_error_errno(errno, "mount(%s) failed: %m", where);
454
455 if (m->read_only) {
456 r = bind_remount_recursive(where, true);
457 if (r < 0)
458 return log_error_errno(r, "Read-only bind mount failed: %m");
459 }
460
461 return 0;
462 }
463
464 static int mount_tmpfs(
465 const char *dest,
466 CustomMount *m,
467 bool userns, uid_t uid_shift, uid_t uid_range,
468 const char *selinux_apifs_context) {
469
470 const char *where, *options;
471 _cleanup_free_ char *buf = NULL;
472 int r;
473
474 assert(dest);
475 assert(m);
476
477 where = prefix_roota(dest, m->destination);
478
479 r = mkdir_p_label(where, 0755);
480 if (r < 0 && r != -EEXIST)
481 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
482
483 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
484 if (r < 0)
485 return log_oom();
486 options = r > 0 ? buf : m->options;
487
488 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
489 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
490
491 return 0;
492 }
493
494 static char *joined_and_escaped_lower_dirs(char * const *lower) {
495 _cleanup_strv_free_ char **sv = NULL;
496
497 sv = strv_copy(lower);
498 if (!sv)
499 return NULL;
500
501 strv_reverse(sv);
502
503 if (!strv_shell_escape(sv, ",:"))
504 return NULL;
505
506 return strv_join(sv, ":");
507 }
508
509 static int mount_overlay(const char *dest, CustomMount *m) {
510 _cleanup_free_ char *lower = NULL;
511 const char *where, *options;
512 int r;
513
514 assert(dest);
515 assert(m);
516
517 where = prefix_roota(dest, m->destination);
518
519 r = mkdir_label(where, 0755);
520 if (r < 0 && r != -EEXIST)
521 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
522
523 (void) mkdir_p_label(m->source, 0755);
524
525 lower = joined_and_escaped_lower_dirs(m->lower);
526 if (!lower)
527 return log_oom();
528
529 if (m->read_only) {
530 _cleanup_free_ char *escaped_source = NULL;
531
532 escaped_source = shell_escape(m->source, ",:");
533 if (!escaped_source)
534 return log_oom();
535
536 options = strjoina("lowerdir=", escaped_source, ":", lower);
537 } else {
538 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
539
540 assert(m->work_dir);
541 (void) mkdir_label(m->work_dir, 0700);
542
543 escaped_source = shell_escape(m->source, ",:");
544 if (!escaped_source)
545 return log_oom();
546 escaped_work_dir = shell_escape(m->work_dir, ",:");
547 if (!escaped_work_dir)
548 return log_oom();
549
550 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
551 }
552
553 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
554 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
555
556 return 0;
557 }
558
559 int mount_custom(
560 const char *dest,
561 CustomMount *mounts, unsigned n,
562 bool userns, uid_t uid_shift, uid_t uid_range,
563 const char *selinux_apifs_context) {
564
565 unsigned i;
566 int r;
567
568 assert(dest);
569
570 for (i = 0; i < n; i++) {
571 CustomMount *m = mounts + i;
572
573 switch (m->type) {
574
575 case CUSTOM_MOUNT_BIND:
576 r = mount_bind(dest, m);
577 break;
578
579 case CUSTOM_MOUNT_TMPFS:
580 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
581 break;
582
583 case CUSTOM_MOUNT_OVERLAY:
584 r = mount_overlay(dest, m);
585 break;
586
587 default:
588 assert_not_reached("Unknown custom mount type");
589 }
590
591 if (r < 0)
592 return r;
593 }
594
595 return 0;
596 }
597
598 static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
599 char *to;
600 int r;
601
602 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
603
604 r = path_is_mount_point(to, 0);
605 if (r < 0 && r != -ENOENT)
606 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
607 if (r > 0)
608 return 0;
609
610 mkdir_p(to, 0755);
611
612 /* The superblock mount options of the mount point need to be
613 * identical to the hosts', and hence writable... */
614 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
615 return log_error_errno(errno, "Failed to mount to %s: %m", to);
616
617 /* ... hence let's only make the bind mount read-only, not the
618 * superblock. */
619 if (read_only) {
620 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
621 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
622 }
623 return 1;
624 }
625
626 static int mount_legacy_cgroups(
627 const char *dest,
628 bool userns, uid_t uid_shift, uid_t uid_range,
629 const char *selinux_apifs_context) {
630
631 _cleanup_set_free_free_ Set *controllers = NULL;
632 const char *cgroup_root;
633 int r;
634
635 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
636
637 (void) mkdir_p(cgroup_root, 0755);
638
639 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
640 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
641 if (r < 0)
642 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
643 if (r == 0) {
644 _cleanup_free_ char *options = NULL;
645
646 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
647 if (r < 0)
648 return log_oom();
649
650 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
651 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
652 }
653
654 if (cg_unified() > 0)
655 goto skip_controllers;
656
657 controllers = set_new(&string_hash_ops);
658 if (!controllers)
659 return log_oom();
660
661 r = cg_kernel_controllers(controllers);
662 if (r < 0)
663 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
664
665 for (;;) {
666 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
667
668 controller = set_steal_first(controllers);
669 if (!controller)
670 break;
671
672 origin = prefix_root("/sys/fs/cgroup/", controller);
673 if (!origin)
674 return log_oom();
675
676 r = readlink_malloc(origin, &combined);
677 if (r == -EINVAL) {
678 /* Not a symbolic link, but directly a single cgroup hierarchy */
679
680 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
681 if (r < 0)
682 return r;
683
684 } else if (r < 0)
685 return log_error_errno(r, "Failed to read link %s: %m", origin);
686 else {
687 _cleanup_free_ char *target = NULL;
688
689 target = prefix_root(dest, origin);
690 if (!target)
691 return log_oom();
692
693 /* A symbolic link, a combination of controllers in one hierarchy */
694
695 if (!filename_is_valid(combined)) {
696 log_warning("Ignoring invalid combined hierarchy %s.", combined);
697 continue;
698 }
699
700 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
701 if (r < 0)
702 return r;
703
704 r = symlink_idempotent(combined, target);
705 if (r == -EINVAL) {
706 log_error("Invalid existing symlink for combined hierarchy");
707 return r;
708 }
709 if (r < 0)
710 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
711 }
712 }
713
714 skip_controllers:
715 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
716 if (r < 0)
717 return r;
718
719 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
720 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
721
722 return 0;
723 }
724
725 static int mount_unified_cgroups(const char *dest) {
726 const char *p;
727 int r;
728
729 assert(dest);
730
731 p = prefix_roota(dest, "/sys/fs/cgroup");
732
733 (void) mkdir_p(p, 0755);
734
735 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
736 if (r < 0)
737 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
738 if (r > 0) {
739 p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
740 if (access(p, F_OK) >= 0)
741 return 0;
742 if (errno != ENOENT)
743 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
744
745 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
746 return -EINVAL;
747 }
748
749 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
750 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
751
752 return 0;
753 }
754
755 int mount_cgroups(
756 const char *dest,
757 bool unified_requested,
758 bool userns, uid_t uid_shift, uid_t uid_range,
759 const char *selinux_apifs_context) {
760
761 if (unified_requested)
762 return mount_unified_cgroups(dest);
763 else
764 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
765 }
766
767 int mount_systemd_cgroup_writable(
768 const char *dest,
769 bool unified_requested) {
770
771 _cleanup_free_ char *own_cgroup_path = NULL;
772 const char *systemd_root, *systemd_own;
773 int r;
774
775 assert(dest);
776
777 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
778 if (r < 0)
779 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
780
781 /* If we are living in the top-level, then there's nothing to do... */
782 if (path_equal(own_cgroup_path, "/"))
783 return 0;
784
785 if (unified_requested) {
786 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
787 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
788 } else {
789 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
790 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
791 }
792
793 /* Make our own cgroup a (writable) bind mount */
794 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
795 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
796
797 /* And then remount the systemd cgroup root read-only */
798 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
799 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
800
801 return 0;
802 }
803
804 int setup_volatile_state(
805 const char *directory,
806 VolatileMode mode,
807 bool userns, uid_t uid_shift, uid_t uid_range,
808 const char *selinux_apifs_context) {
809
810 _cleanup_free_ char *buf = NULL;
811 const char *p, *options;
812 int r;
813
814 assert(directory);
815
816 if (mode != VOLATILE_STATE)
817 return 0;
818
819 /* --volatile=state means we simply overmount /var
820 with a tmpfs, and the rest read-only. */
821
822 r = bind_remount_recursive(directory, true);
823 if (r < 0)
824 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
825
826 p = prefix_roota(directory, "/var");
827 r = mkdir(p, 0755);
828 if (r < 0 && errno != EEXIST)
829 return log_error_errno(errno, "Failed to create %s: %m", directory);
830
831 options = "mode=755";
832 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
833 if (r < 0)
834 return log_oom();
835 if (r > 0)
836 options = buf;
837
838 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
839 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
840
841 return 0;
842 }
843
844 int setup_volatile(
845 const char *directory,
846 VolatileMode mode,
847 bool userns, uid_t uid_shift, uid_t uid_range,
848 const char *selinux_apifs_context) {
849
850 bool tmpfs_mounted = false, bind_mounted = false;
851 char template[] = "/tmp/nspawn-volatile-XXXXXX";
852 _cleanup_free_ char *buf = NULL;
853 const char *f, *t, *options;
854 int r;
855
856 assert(directory);
857
858 if (mode != VOLATILE_YES)
859 return 0;
860
861 /* --volatile=yes means we mount a tmpfs to the root dir, and
862 the original /usr to use inside it, and that read-only. */
863
864 if (!mkdtemp(template))
865 return log_error_errno(errno, "Failed to create temporary directory: %m");
866
867 options = "mode=755";
868 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
869 if (r < 0)
870 return log_oom();
871 if (r > 0)
872 options = buf;
873
874 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
875 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
876 goto fail;
877 }
878
879 tmpfs_mounted = true;
880
881 f = prefix_roota(directory, "/usr");
882 t = prefix_roota(template, "/usr");
883
884 r = mkdir(t, 0755);
885 if (r < 0 && errno != EEXIST) {
886 r = log_error_errno(errno, "Failed to create %s: %m", t);
887 goto fail;
888 }
889
890 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
891 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
892 goto fail;
893 }
894
895 bind_mounted = true;
896
897 r = bind_remount_recursive(t, true);
898 if (r < 0) {
899 log_error_errno(r, "Failed to remount %s read-only: %m", t);
900 goto fail;
901 }
902
903 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
904 r = log_error_errno(errno, "Failed to move root mount: %m");
905 goto fail;
906 }
907
908 (void) rmdir(template);
909
910 return 0;
911
912 fail:
913 if (bind_mounted)
914 (void) umount(t);
915
916 if (tmpfs_mounted)
917 (void) umount(template);
918 (void) rmdir(template);
919 return r;
920 }
921
922 VolatileMode volatile_mode_from_string(const char *s) {
923 int b;
924
925 if (isempty(s))
926 return _VOLATILE_MODE_INVALID;
927
928 b = parse_boolean(s);
929 if (b > 0)
930 return VOLATILE_YES;
931 if (b == 0)
932 return VOLATILE_NO;
933
934 if (streq(s, "state"))
935 return VOLATILE_STATE;
936
937 return _VOLATILE_MODE_INVALID;
938 }