]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-mount.c
util-lib: split out globbing related calls into glob-util.[ch]
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <linux/magic.h>
24
25 #include "cgroup-util.h"
26 #include "escape.h"
27 #include "fs-util.h"
28 #include "label.h"
29 #include "mkdir.h"
30 #include "mount-util.h"
31 #include "nspawn-mount.h"
32 #include "parse-util.h"
33 #include "path-util.h"
34 #include "rm-rf.h"
35 #include "set.h"
36 #include "stat-util.h"
37 #include "string-util.h"
38 #include "strv.h"
39 #include "user-util.h"
40 #include "util.h"
41
42 CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
43 CustomMount *c, *ret;
44
45 assert(l);
46 assert(n);
47 assert(t >= 0);
48 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
49
50 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
51 if (!c)
52 return NULL;
53
54 *l = c;
55 ret = *l + *n;
56 (*n)++;
57
58 *ret = (CustomMount) { .type = t };
59
60 return ret;
61 }
62
63 void custom_mount_free_all(CustomMount *l, unsigned n) {
64 unsigned i;
65
66 for (i = 0; i < n; i++) {
67 CustomMount *m = l + i;
68
69 free(m->source);
70 free(m->destination);
71 free(m->options);
72
73 if (m->work_dir) {
74 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
75 free(m->work_dir);
76 }
77
78 strv_free(m->lower);
79 }
80
81 free(l);
82 }
83
84 int custom_mount_compare(const void *a, const void *b) {
85 const CustomMount *x = a, *y = b;
86 int r;
87
88 r = path_compare(x->destination, y->destination);
89 if (r != 0)
90 return r;
91
92 if (x->type < y->type)
93 return -1;
94 if (x->type > y->type)
95 return 1;
96
97 return 0;
98 }
99
100 int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
101 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
102 const char *p = s;
103 CustomMount *m;
104 int r;
105
106 assert(l);
107 assert(n);
108
109 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
110 if (r < 0)
111 return r;
112 if (r == 0)
113 return -EINVAL;
114
115 if (r == 1) {
116 destination = strdup(source);
117 if (!destination)
118 return -ENOMEM;
119 }
120
121 if (r == 2 && !isempty(p)) {
122 opts = strdup(p);
123 if (!opts)
124 return -ENOMEM;
125 }
126
127 if (!path_is_absolute(source))
128 return -EINVAL;
129
130 if (!path_is_absolute(destination))
131 return -EINVAL;
132
133 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
134 if (!m)
135 return log_oom();
136
137 m->source = source;
138 m->destination = destination;
139 m->read_only = read_only;
140 m->options = opts;
141
142 source = destination = opts = NULL;
143 return 0;
144 }
145
146 int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
147 _cleanup_free_ char *path = NULL, *opts = NULL;
148 const char *p = s;
149 CustomMount *m;
150 int r;
151
152 assert(l);
153 assert(n);
154 assert(s);
155
156 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
157 if (r < 0)
158 return r;
159 if (r == 0)
160 return -EINVAL;
161
162 if (isempty(p))
163 opts = strdup("mode=0755");
164 else
165 opts = strdup(p);
166 if (!opts)
167 return -ENOMEM;
168
169 if (!path_is_absolute(path))
170 return -EINVAL;
171
172 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
173 if (!m)
174 return -ENOMEM;
175
176 m->destination = path;
177 m->options = opts;
178
179 path = opts = NULL;
180 return 0;
181 }
182
183 static int tmpfs_patch_options(
184 const char *options,
185 bool userns, uid_t uid_shift, uid_t uid_range,
186 const char *selinux_apifs_context,
187 char **ret) {
188
189 char *buf = NULL;
190
191 if (userns && uid_shift != 0) {
192 assert(uid_shift != UID_INVALID);
193
194 if (options)
195 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
196 else
197 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
198 if (!buf)
199 return -ENOMEM;
200
201 options = buf;
202 }
203
204 #ifdef HAVE_SELINUX
205 if (selinux_apifs_context) {
206 char *t;
207
208 if (options)
209 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
210 else
211 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
212 if (!t) {
213 free(buf);
214 return -ENOMEM;
215 }
216
217 free(buf);
218 buf = t;
219 }
220 #endif
221
222 *ret = buf;
223 return !!buf;
224 }
225
226 int mount_sysfs(const char *dest) {
227 const char *full, *top, *x;
228 int r;
229
230 top = prefix_roota(dest, "/sys");
231 r = path_check_fstype(top, SYSFS_MAGIC);
232 if (r < 0)
233 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
234 /* /sys might already be mounted as sysfs by the outer child in the
235 * !netns case. In this case, it's all good. Don't touch it because we
236 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
237 */
238 if (r > 0)
239 return 0;
240
241 full = prefix_roota(top, "/full");
242
243 (void) mkdir(full, 0755);
244
245 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
246 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
247
248 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
249 _cleanup_free_ char *from = NULL, *to = NULL;
250
251 from = prefix_root(full, x);
252 if (!from)
253 return log_oom();
254
255 to = prefix_root(top, x);
256 if (!to)
257 return log_oom();
258
259 (void) mkdir(to, 0755);
260
261 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
262 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
263
264 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
265 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
266 }
267
268 if (umount(full) < 0)
269 return log_error_errno(errno, "Failed to unmount %s: %m", full);
270
271 if (rmdir(full) < 0)
272 return log_error_errno(errno, "Failed to remove %s: %m", full);
273
274 x = prefix_roota(top, "/fs/kdbus");
275 (void) mkdir(x, 0755);
276
277 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
278 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
279
280 return 0;
281 }
282
283 int mount_all(const char *dest,
284 bool use_userns, bool in_userns,
285 bool use_netns,
286 uid_t uid_shift, uid_t uid_range,
287 const char *selinux_apifs_context) {
288
289 typedef struct MountPoint {
290 const char *what;
291 const char *where;
292 const char *type;
293 const char *options;
294 unsigned long flags;
295 bool fatal;
296 bool in_userns;
297 bool use_netns;
298 } MountPoint;
299
300 static const MountPoint mount_table[] = {
301 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
302 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
303 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
304 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
305 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
306 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
307 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
308 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
309 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
310 #ifdef HAVE_SELINUX
311 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
312 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
313 #endif
314 };
315
316 unsigned k;
317 int r;
318
319 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
320 _cleanup_free_ char *where = NULL, *options = NULL;
321 const char *o;
322
323 if (in_userns != mount_table[k].in_userns)
324 continue;
325
326 if (!use_netns && mount_table[k].use_netns)
327 continue;
328
329 where = prefix_root(dest, mount_table[k].where);
330 if (!where)
331 return log_oom();
332
333 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
334 if (r < 0 && r != -ENOENT)
335 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
336
337 /* Skip this entry if it is not a remount. */
338 if (mount_table[k].what && r > 0)
339 continue;
340
341 r = mkdir_p(where, 0755);
342 if (r < 0) {
343 if (mount_table[k].fatal)
344 return log_error_errno(r, "Failed to create directory %s: %m", where);
345
346 log_warning_errno(r, "Failed to create directory %s: %m", where);
347 continue;
348 }
349
350 o = mount_table[k].options;
351 if (streq_ptr(mount_table[k].type, "tmpfs")) {
352 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
353 if (r < 0)
354 return log_oom();
355 if (r > 0)
356 o = options;
357 }
358
359 if (mount(mount_table[k].what,
360 where,
361 mount_table[k].type,
362 mount_table[k].flags,
363 o) < 0) {
364
365 if (mount_table[k].fatal)
366 return log_error_errno(errno, "mount(%s) failed: %m", where);
367
368 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
369 }
370 }
371
372 return 0;
373 }
374
375 static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
376 const char *p = options;
377 unsigned long flags = *mount_flags;
378 char *opts = NULL;
379
380 assert(options);
381
382 for (;;) {
383 _cleanup_free_ char *word = NULL;
384 int r = extract_first_word(&p, &word, ",", 0);
385 if (r < 0)
386 return log_error_errno(r, "Failed to extract mount option: %m");
387 if (r == 0)
388 break;
389
390 if (streq(word, "rbind"))
391 flags |= MS_REC;
392 else if (streq(word, "norbind"))
393 flags &= ~MS_REC;
394 else {
395 log_error("Invalid bind mount option: %s", word);
396 return -EINVAL;
397 }
398 }
399
400 *mount_flags = flags;
401 /* in the future mount_opts will hold string options for mount(2) */
402 *mount_opts = opts;
403
404 return 0;
405 }
406
407 static int mount_bind(const char *dest, CustomMount *m) {
408 struct stat source_st, dest_st;
409 const char *where;
410 unsigned long mount_flags = MS_BIND | MS_REC;
411 _cleanup_free_ char *mount_opts = NULL;
412 int r;
413
414 assert(m);
415
416 if (m->options) {
417 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
418 if (r < 0)
419 return r;
420 }
421
422 if (stat(m->source, &source_st) < 0)
423 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
424
425 where = prefix_roota(dest, m->destination);
426
427 if (stat(where, &dest_st) >= 0) {
428 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
429 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
430 return -EINVAL;
431 }
432
433 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
434 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
435 return -EINVAL;
436 }
437
438 } else if (errno == ENOENT) {
439 r = mkdir_parents_label(where, 0755);
440 if (r < 0)
441 return log_error_errno(r, "Failed to make parents of %s: %m", where);
442 } else {
443 log_error_errno(errno, "Failed to stat %s: %m", where);
444 return -errno;
445 }
446
447 /* Create the mount point. Any non-directory file can be
448 * mounted on any non-directory file (regular, fifo, socket,
449 * char, block).
450 */
451 if (S_ISDIR(source_st.st_mode))
452 r = mkdir_label(where, 0755);
453 else
454 r = touch(where);
455 if (r < 0 && r != -EEXIST)
456 return log_error_errno(r, "Failed to create mount point %s: %m", where);
457
458 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
459 return log_error_errno(errno, "mount(%s) failed: %m", where);
460
461 if (m->read_only) {
462 r = bind_remount_recursive(where, true);
463 if (r < 0)
464 return log_error_errno(r, "Read-only bind mount failed: %m");
465 }
466
467 return 0;
468 }
469
470 static int mount_tmpfs(
471 const char *dest,
472 CustomMount *m,
473 bool userns, uid_t uid_shift, uid_t uid_range,
474 const char *selinux_apifs_context) {
475
476 const char *where, *options;
477 _cleanup_free_ char *buf = NULL;
478 int r;
479
480 assert(dest);
481 assert(m);
482
483 where = prefix_roota(dest, m->destination);
484
485 r = mkdir_p_label(where, 0755);
486 if (r < 0 && r != -EEXIST)
487 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
488
489 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
490 if (r < 0)
491 return log_oom();
492 options = r > 0 ? buf : m->options;
493
494 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
495 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
496
497 return 0;
498 }
499
500 static char *joined_and_escaped_lower_dirs(char * const *lower) {
501 _cleanup_strv_free_ char **sv = NULL;
502
503 sv = strv_copy(lower);
504 if (!sv)
505 return NULL;
506
507 strv_reverse(sv);
508
509 if (!strv_shell_escape(sv, ",:"))
510 return NULL;
511
512 return strv_join(sv, ":");
513 }
514
515 static int mount_overlay(const char *dest, CustomMount *m) {
516 _cleanup_free_ char *lower = NULL;
517 const char *where, *options;
518 int r;
519
520 assert(dest);
521 assert(m);
522
523 where = prefix_roota(dest, m->destination);
524
525 r = mkdir_label(where, 0755);
526 if (r < 0 && r != -EEXIST)
527 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
528
529 (void) mkdir_p_label(m->source, 0755);
530
531 lower = joined_and_escaped_lower_dirs(m->lower);
532 if (!lower)
533 return log_oom();
534
535 if (m->read_only) {
536 _cleanup_free_ char *escaped_source = NULL;
537
538 escaped_source = shell_escape(m->source, ",:");
539 if (!escaped_source)
540 return log_oom();
541
542 options = strjoina("lowerdir=", escaped_source, ":", lower);
543 } else {
544 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
545
546 assert(m->work_dir);
547 (void) mkdir_label(m->work_dir, 0700);
548
549 escaped_source = shell_escape(m->source, ",:");
550 if (!escaped_source)
551 return log_oom();
552 escaped_work_dir = shell_escape(m->work_dir, ",:");
553 if (!escaped_work_dir)
554 return log_oom();
555
556 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
557 }
558
559 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
560 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
561
562 return 0;
563 }
564
565 int mount_custom(
566 const char *dest,
567 CustomMount *mounts, unsigned n,
568 bool userns, uid_t uid_shift, uid_t uid_range,
569 const char *selinux_apifs_context) {
570
571 unsigned i;
572 int r;
573
574 assert(dest);
575
576 for (i = 0; i < n; i++) {
577 CustomMount *m = mounts + i;
578
579 switch (m->type) {
580
581 case CUSTOM_MOUNT_BIND:
582 r = mount_bind(dest, m);
583 break;
584
585 case CUSTOM_MOUNT_TMPFS:
586 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
587 break;
588
589 case CUSTOM_MOUNT_OVERLAY:
590 r = mount_overlay(dest, m);
591 break;
592
593 default:
594 assert_not_reached("Unknown custom mount type");
595 }
596
597 if (r < 0)
598 return r;
599 }
600
601 return 0;
602 }
603
604 static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
605 char *to;
606 int r;
607
608 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
609
610 r = path_is_mount_point(to, 0);
611 if (r < 0 && r != -ENOENT)
612 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
613 if (r > 0)
614 return 0;
615
616 mkdir_p(to, 0755);
617
618 /* The superblock mount options of the mount point need to be
619 * identical to the hosts', and hence writable... */
620 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
621 return log_error_errno(errno, "Failed to mount to %s: %m", to);
622
623 /* ... hence let's only make the bind mount read-only, not the
624 * superblock. */
625 if (read_only) {
626 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
627 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
628 }
629 return 1;
630 }
631
632 static int mount_legacy_cgroups(
633 const char *dest,
634 bool userns, uid_t uid_shift, uid_t uid_range,
635 const char *selinux_apifs_context) {
636
637 _cleanup_set_free_free_ Set *controllers = NULL;
638 const char *cgroup_root;
639 int r;
640
641 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
642
643 (void) mkdir_p(cgroup_root, 0755);
644
645 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
646 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
647 if (r < 0)
648 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
649 if (r == 0) {
650 _cleanup_free_ char *options = NULL;
651
652 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
653 if (r < 0)
654 return log_oom();
655
656 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
657 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
658 }
659
660 if (cg_unified() > 0)
661 goto skip_controllers;
662
663 controllers = set_new(&string_hash_ops);
664 if (!controllers)
665 return log_oom();
666
667 r = cg_kernel_controllers(controllers);
668 if (r < 0)
669 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
670
671 for (;;) {
672 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
673
674 controller = set_steal_first(controllers);
675 if (!controller)
676 break;
677
678 origin = prefix_root("/sys/fs/cgroup/", controller);
679 if (!origin)
680 return log_oom();
681
682 r = readlink_malloc(origin, &combined);
683 if (r == -EINVAL) {
684 /* Not a symbolic link, but directly a single cgroup hierarchy */
685
686 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
687 if (r < 0)
688 return r;
689
690 } else if (r < 0)
691 return log_error_errno(r, "Failed to read link %s: %m", origin);
692 else {
693 _cleanup_free_ char *target = NULL;
694
695 target = prefix_root(dest, origin);
696 if (!target)
697 return log_oom();
698
699 /* A symbolic link, a combination of controllers in one hierarchy */
700
701 if (!filename_is_valid(combined)) {
702 log_warning("Ignoring invalid combined hierarchy %s.", combined);
703 continue;
704 }
705
706 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
707 if (r < 0)
708 return r;
709
710 r = symlink_idempotent(combined, target);
711 if (r == -EINVAL) {
712 log_error("Invalid existing symlink for combined hierarchy");
713 return r;
714 }
715 if (r < 0)
716 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
717 }
718 }
719
720 skip_controllers:
721 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
722 if (r < 0)
723 return r;
724
725 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
726 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
727
728 return 0;
729 }
730
731 static int mount_unified_cgroups(const char *dest) {
732 const char *p;
733 int r;
734
735 assert(dest);
736
737 p = prefix_roota(dest, "/sys/fs/cgroup");
738
739 (void) mkdir_p(p, 0755);
740
741 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
742 if (r < 0)
743 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
744 if (r > 0) {
745 p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
746 if (access(p, F_OK) >= 0)
747 return 0;
748 if (errno != ENOENT)
749 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
750
751 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
752 return -EINVAL;
753 }
754
755 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
756 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
757
758 return 0;
759 }
760
761 int mount_cgroups(
762 const char *dest,
763 bool unified_requested,
764 bool userns, uid_t uid_shift, uid_t uid_range,
765 const char *selinux_apifs_context) {
766
767 if (unified_requested)
768 return mount_unified_cgroups(dest);
769 else
770 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
771 }
772
773 int mount_systemd_cgroup_writable(
774 const char *dest,
775 bool unified_requested) {
776
777 _cleanup_free_ char *own_cgroup_path = NULL;
778 const char *systemd_root, *systemd_own;
779 int r;
780
781 assert(dest);
782
783 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
784 if (r < 0)
785 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
786
787 /* If we are living in the top-level, then there's nothing to do... */
788 if (path_equal(own_cgroup_path, "/"))
789 return 0;
790
791 if (unified_requested) {
792 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
793 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
794 } else {
795 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
796 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
797 }
798
799 /* Make our own cgroup a (writable) bind mount */
800 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
801 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
802
803 /* And then remount the systemd cgroup root read-only */
804 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
805 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
806
807 return 0;
808 }
809
810 int setup_volatile_state(
811 const char *directory,
812 VolatileMode mode,
813 bool userns, uid_t uid_shift, uid_t uid_range,
814 const char *selinux_apifs_context) {
815
816 _cleanup_free_ char *buf = NULL;
817 const char *p, *options;
818 int r;
819
820 assert(directory);
821
822 if (mode != VOLATILE_STATE)
823 return 0;
824
825 /* --volatile=state means we simply overmount /var
826 with a tmpfs, and the rest read-only. */
827
828 r = bind_remount_recursive(directory, true);
829 if (r < 0)
830 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
831
832 p = prefix_roota(directory, "/var");
833 r = mkdir(p, 0755);
834 if (r < 0 && errno != EEXIST)
835 return log_error_errno(errno, "Failed to create %s: %m", directory);
836
837 options = "mode=755";
838 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
839 if (r < 0)
840 return log_oom();
841 if (r > 0)
842 options = buf;
843
844 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
845 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
846
847 return 0;
848 }
849
850 int setup_volatile(
851 const char *directory,
852 VolatileMode mode,
853 bool userns, uid_t uid_shift, uid_t uid_range,
854 const char *selinux_apifs_context) {
855
856 bool tmpfs_mounted = false, bind_mounted = false;
857 char template[] = "/tmp/nspawn-volatile-XXXXXX";
858 _cleanup_free_ char *buf = NULL;
859 const char *f, *t, *options;
860 int r;
861
862 assert(directory);
863
864 if (mode != VOLATILE_YES)
865 return 0;
866
867 /* --volatile=yes means we mount a tmpfs to the root dir, and
868 the original /usr to use inside it, and that read-only. */
869
870 if (!mkdtemp(template))
871 return log_error_errno(errno, "Failed to create temporary directory: %m");
872
873 options = "mode=755";
874 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
875 if (r < 0)
876 return log_oom();
877 if (r > 0)
878 options = buf;
879
880 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
881 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
882 goto fail;
883 }
884
885 tmpfs_mounted = true;
886
887 f = prefix_roota(directory, "/usr");
888 t = prefix_roota(template, "/usr");
889
890 r = mkdir(t, 0755);
891 if (r < 0 && errno != EEXIST) {
892 r = log_error_errno(errno, "Failed to create %s: %m", t);
893 goto fail;
894 }
895
896 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
897 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
898 goto fail;
899 }
900
901 bind_mounted = true;
902
903 r = bind_remount_recursive(t, true);
904 if (r < 0) {
905 log_error_errno(r, "Failed to remount %s read-only: %m", t);
906 goto fail;
907 }
908
909 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
910 r = log_error_errno(errno, "Failed to move root mount: %m");
911 goto fail;
912 }
913
914 (void) rmdir(template);
915
916 return 0;
917
918 fail:
919 if (bind_mounted)
920 (void) umount(t);
921
922 if (tmpfs_mounted)
923 (void) umount(template);
924 (void) rmdir(template);
925 return r;
926 }
927
928 VolatileMode volatile_mode_from_string(const char *s) {
929 int b;
930
931 if (isempty(s))
932 return _VOLATILE_MODE_INVALID;
933
934 b = parse_boolean(s);
935 if (b > 0)
936 return VOLATILE_YES;
937 if (b == 0)
938 return VOLATILE_NO;
939
940 if (streq(s, "state"))
941 return VOLATILE_STATE;
942
943 return _VOLATILE_MODE_INVALID;
944 }