]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-mount.c
util-lib: move mount related utility calls to mount-util.[ch]
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <linux/magic.h>
24
25 #include "cgroup-util.h"
26 #include "escape.h"
27 #include "label.h"
28 #include "mkdir.h"
29 #include "mount-util.h"
30 #include "nspawn-mount.h"
31 #include "parse-util.h"
32 #include "path-util.h"
33 #include "rm-rf.h"
34 #include "set.h"
35 #include "string-util.h"
36 #include "strv.h"
37 #include "util.h"
38
39 CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
40 CustomMount *c, *ret;
41
42 assert(l);
43 assert(n);
44 assert(t >= 0);
45 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
46
47 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
48 if (!c)
49 return NULL;
50
51 *l = c;
52 ret = *l + *n;
53 (*n)++;
54
55 *ret = (CustomMount) { .type = t };
56
57 return ret;
58 }
59
60 void custom_mount_free_all(CustomMount *l, unsigned n) {
61 unsigned i;
62
63 for (i = 0; i < n; i++) {
64 CustomMount *m = l + i;
65
66 free(m->source);
67 free(m->destination);
68 free(m->options);
69
70 if (m->work_dir) {
71 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
72 free(m->work_dir);
73 }
74
75 strv_free(m->lower);
76 }
77
78 free(l);
79 }
80
81 int custom_mount_compare(const void *a, const void *b) {
82 const CustomMount *x = a, *y = b;
83 int r;
84
85 r = path_compare(x->destination, y->destination);
86 if (r != 0)
87 return r;
88
89 if (x->type < y->type)
90 return -1;
91 if (x->type > y->type)
92 return 1;
93
94 return 0;
95 }
96
97 int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
98 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
99 const char *p = s;
100 CustomMount *m;
101 int r;
102
103 assert(l);
104 assert(n);
105
106 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
107 if (r < 0)
108 return r;
109 if (r == 0)
110 return -EINVAL;
111
112 if (r == 1) {
113 destination = strdup(source);
114 if (!destination)
115 return -ENOMEM;
116 }
117
118 if (r == 2 && !isempty(p)) {
119 opts = strdup(p);
120 if (!opts)
121 return -ENOMEM;
122 }
123
124 if (!path_is_absolute(source))
125 return -EINVAL;
126
127 if (!path_is_absolute(destination))
128 return -EINVAL;
129
130 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
131 if (!m)
132 return log_oom();
133
134 m->source = source;
135 m->destination = destination;
136 m->read_only = read_only;
137 m->options = opts;
138
139 source = destination = opts = NULL;
140 return 0;
141 }
142
143 int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
144 _cleanup_free_ char *path = NULL, *opts = NULL;
145 const char *p = s;
146 CustomMount *m;
147 int r;
148
149 assert(l);
150 assert(n);
151 assert(s);
152
153 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
154 if (r < 0)
155 return r;
156 if (r == 0)
157 return -EINVAL;
158
159 if (isempty(p))
160 opts = strdup("mode=0755");
161 else
162 opts = strdup(p);
163 if (!opts)
164 return -ENOMEM;
165
166 if (!path_is_absolute(path))
167 return -EINVAL;
168
169 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
170 if (!m)
171 return -ENOMEM;
172
173 m->destination = path;
174 m->options = opts;
175
176 path = opts = NULL;
177 return 0;
178 }
179
180 static int tmpfs_patch_options(
181 const char *options,
182 bool userns, uid_t uid_shift, uid_t uid_range,
183 const char *selinux_apifs_context,
184 char **ret) {
185
186 char *buf = NULL;
187
188 if (userns && uid_shift != 0) {
189 assert(uid_shift != UID_INVALID);
190
191 if (options)
192 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
193 else
194 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
195 if (!buf)
196 return -ENOMEM;
197
198 options = buf;
199 }
200
201 #ifdef HAVE_SELINUX
202 if (selinux_apifs_context) {
203 char *t;
204
205 if (options)
206 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
207 else
208 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
209 if (!t) {
210 free(buf);
211 return -ENOMEM;
212 }
213
214 free(buf);
215 buf = t;
216 }
217 #endif
218
219 *ret = buf;
220 return !!buf;
221 }
222
223 int mount_sysfs(const char *dest) {
224 const char *full, *top, *x;
225 int r;
226
227 top = prefix_roota(dest, "/sys");
228 r = path_check_fstype(top, SYSFS_MAGIC);
229 if (r < 0)
230 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
231 /* /sys might already be mounted as sysfs by the outer child in the
232 * !netns case. In this case, it's all good. Don't touch it because we
233 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
234 */
235 if (r > 0)
236 return 0;
237
238 full = prefix_roota(top, "/full");
239
240 (void) mkdir(full, 0755);
241
242 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
243 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
244
245 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
246 _cleanup_free_ char *from = NULL, *to = NULL;
247
248 from = prefix_root(full, x);
249 if (!from)
250 return log_oom();
251
252 to = prefix_root(top, x);
253 if (!to)
254 return log_oom();
255
256 (void) mkdir(to, 0755);
257
258 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
259 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
260
261 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
262 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
263 }
264
265 if (umount(full) < 0)
266 return log_error_errno(errno, "Failed to unmount %s: %m", full);
267
268 if (rmdir(full) < 0)
269 return log_error_errno(errno, "Failed to remove %s: %m", full);
270
271 x = prefix_roota(top, "/fs/kdbus");
272 (void) mkdir(x, 0755);
273
274 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
275 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
276
277 return 0;
278 }
279
280 int mount_all(const char *dest,
281 bool use_userns, bool in_userns,
282 bool use_netns,
283 uid_t uid_shift, uid_t uid_range,
284 const char *selinux_apifs_context) {
285
286 typedef struct MountPoint {
287 const char *what;
288 const char *where;
289 const char *type;
290 const char *options;
291 unsigned long flags;
292 bool fatal;
293 bool in_userns;
294 bool use_netns;
295 } MountPoint;
296
297 static const MountPoint mount_table[] = {
298 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
299 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
300 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
301 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
302 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
303 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
304 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
305 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
306 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
307 #ifdef HAVE_SELINUX
308 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
309 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
310 #endif
311 };
312
313 unsigned k;
314 int r;
315
316 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
317 _cleanup_free_ char *where = NULL, *options = NULL;
318 const char *o;
319
320 if (in_userns != mount_table[k].in_userns)
321 continue;
322
323 if (!use_netns && mount_table[k].use_netns)
324 continue;
325
326 where = prefix_root(dest, mount_table[k].where);
327 if (!where)
328 return log_oom();
329
330 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
331 if (r < 0 && r != -ENOENT)
332 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
333
334 /* Skip this entry if it is not a remount. */
335 if (mount_table[k].what && r > 0)
336 continue;
337
338 r = mkdir_p(where, 0755);
339 if (r < 0) {
340 if (mount_table[k].fatal)
341 return log_error_errno(r, "Failed to create directory %s: %m", where);
342
343 log_warning_errno(r, "Failed to create directory %s: %m", where);
344 continue;
345 }
346
347 o = mount_table[k].options;
348 if (streq_ptr(mount_table[k].type, "tmpfs")) {
349 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
350 if (r < 0)
351 return log_oom();
352 if (r > 0)
353 o = options;
354 }
355
356 if (mount(mount_table[k].what,
357 where,
358 mount_table[k].type,
359 mount_table[k].flags,
360 o) < 0) {
361
362 if (mount_table[k].fatal)
363 return log_error_errno(errno, "mount(%s) failed: %m", where);
364
365 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
366 }
367 }
368
369 return 0;
370 }
371
372 static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
373 const char *p = options;
374 unsigned long flags = *mount_flags;
375 char *opts = NULL;
376
377 assert(options);
378
379 for (;;) {
380 _cleanup_free_ char *word = NULL;
381 int r = extract_first_word(&p, &word, ",", 0);
382 if (r < 0)
383 return log_error_errno(r, "Failed to extract mount option: %m");
384 if (r == 0)
385 break;
386
387 if (streq(word, "rbind"))
388 flags |= MS_REC;
389 else if (streq(word, "norbind"))
390 flags &= ~MS_REC;
391 else {
392 log_error("Invalid bind mount option: %s", word);
393 return -EINVAL;
394 }
395 }
396
397 *mount_flags = flags;
398 /* in the future mount_opts will hold string options for mount(2) */
399 *mount_opts = opts;
400
401 return 0;
402 }
403
404 static int mount_bind(const char *dest, CustomMount *m) {
405 struct stat source_st, dest_st;
406 const char *where;
407 unsigned long mount_flags = MS_BIND | MS_REC;
408 _cleanup_free_ char *mount_opts = NULL;
409 int r;
410
411 assert(m);
412
413 if (m->options) {
414 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
415 if (r < 0)
416 return r;
417 }
418
419 if (stat(m->source, &source_st) < 0)
420 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
421
422 where = prefix_roota(dest, m->destination);
423
424 if (stat(where, &dest_st) >= 0) {
425 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
426 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
427 return -EINVAL;
428 }
429
430 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
431 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
432 return -EINVAL;
433 }
434
435 } else if (errno == ENOENT) {
436 r = mkdir_parents_label(where, 0755);
437 if (r < 0)
438 return log_error_errno(r, "Failed to make parents of %s: %m", where);
439 } else {
440 log_error_errno(errno, "Failed to stat %s: %m", where);
441 return -errno;
442 }
443
444 /* Create the mount point. Any non-directory file can be
445 * mounted on any non-directory file (regular, fifo, socket,
446 * char, block).
447 */
448 if (S_ISDIR(source_st.st_mode))
449 r = mkdir_label(where, 0755);
450 else
451 r = touch(where);
452 if (r < 0 && r != -EEXIST)
453 return log_error_errno(r, "Failed to create mount point %s: %m", where);
454
455 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
456 return log_error_errno(errno, "mount(%s) failed: %m", where);
457
458 if (m->read_only) {
459 r = bind_remount_recursive(where, true);
460 if (r < 0)
461 return log_error_errno(r, "Read-only bind mount failed: %m");
462 }
463
464 return 0;
465 }
466
467 static int mount_tmpfs(
468 const char *dest,
469 CustomMount *m,
470 bool userns, uid_t uid_shift, uid_t uid_range,
471 const char *selinux_apifs_context) {
472
473 const char *where, *options;
474 _cleanup_free_ char *buf = NULL;
475 int r;
476
477 assert(dest);
478 assert(m);
479
480 where = prefix_roota(dest, m->destination);
481
482 r = mkdir_p_label(where, 0755);
483 if (r < 0 && r != -EEXIST)
484 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
485
486 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
487 if (r < 0)
488 return log_oom();
489 options = r > 0 ? buf : m->options;
490
491 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
492 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
493
494 return 0;
495 }
496
497 static char *joined_and_escaped_lower_dirs(char * const *lower) {
498 _cleanup_strv_free_ char **sv = NULL;
499
500 sv = strv_copy(lower);
501 if (!sv)
502 return NULL;
503
504 strv_reverse(sv);
505
506 if (!strv_shell_escape(sv, ",:"))
507 return NULL;
508
509 return strv_join(sv, ":");
510 }
511
512 static int mount_overlay(const char *dest, CustomMount *m) {
513 _cleanup_free_ char *lower = NULL;
514 const char *where, *options;
515 int r;
516
517 assert(dest);
518 assert(m);
519
520 where = prefix_roota(dest, m->destination);
521
522 r = mkdir_label(where, 0755);
523 if (r < 0 && r != -EEXIST)
524 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
525
526 (void) mkdir_p_label(m->source, 0755);
527
528 lower = joined_and_escaped_lower_dirs(m->lower);
529 if (!lower)
530 return log_oom();
531
532 if (m->read_only) {
533 _cleanup_free_ char *escaped_source = NULL;
534
535 escaped_source = shell_escape(m->source, ",:");
536 if (!escaped_source)
537 return log_oom();
538
539 options = strjoina("lowerdir=", escaped_source, ":", lower);
540 } else {
541 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
542
543 assert(m->work_dir);
544 (void) mkdir_label(m->work_dir, 0700);
545
546 escaped_source = shell_escape(m->source, ",:");
547 if (!escaped_source)
548 return log_oom();
549 escaped_work_dir = shell_escape(m->work_dir, ",:");
550 if (!escaped_work_dir)
551 return log_oom();
552
553 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
554 }
555
556 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
557 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
558
559 return 0;
560 }
561
562 int mount_custom(
563 const char *dest,
564 CustomMount *mounts, unsigned n,
565 bool userns, uid_t uid_shift, uid_t uid_range,
566 const char *selinux_apifs_context) {
567
568 unsigned i;
569 int r;
570
571 assert(dest);
572
573 for (i = 0; i < n; i++) {
574 CustomMount *m = mounts + i;
575
576 switch (m->type) {
577
578 case CUSTOM_MOUNT_BIND:
579 r = mount_bind(dest, m);
580 break;
581
582 case CUSTOM_MOUNT_TMPFS:
583 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
584 break;
585
586 case CUSTOM_MOUNT_OVERLAY:
587 r = mount_overlay(dest, m);
588 break;
589
590 default:
591 assert_not_reached("Unknown custom mount type");
592 }
593
594 if (r < 0)
595 return r;
596 }
597
598 return 0;
599 }
600
601 static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
602 char *to;
603 int r;
604
605 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
606
607 r = path_is_mount_point(to, 0);
608 if (r < 0 && r != -ENOENT)
609 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
610 if (r > 0)
611 return 0;
612
613 mkdir_p(to, 0755);
614
615 /* The superblock mount options of the mount point need to be
616 * identical to the hosts', and hence writable... */
617 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
618 return log_error_errno(errno, "Failed to mount to %s: %m", to);
619
620 /* ... hence let's only make the bind mount read-only, not the
621 * superblock. */
622 if (read_only) {
623 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
624 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
625 }
626 return 1;
627 }
628
629 static int mount_legacy_cgroups(
630 const char *dest,
631 bool userns, uid_t uid_shift, uid_t uid_range,
632 const char *selinux_apifs_context) {
633
634 _cleanup_set_free_free_ Set *controllers = NULL;
635 const char *cgroup_root;
636 int r;
637
638 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
639
640 (void) mkdir_p(cgroup_root, 0755);
641
642 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
643 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
644 if (r < 0)
645 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
646 if (r == 0) {
647 _cleanup_free_ char *options = NULL;
648
649 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
650 if (r < 0)
651 return log_oom();
652
653 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
654 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
655 }
656
657 if (cg_unified() > 0)
658 goto skip_controllers;
659
660 controllers = set_new(&string_hash_ops);
661 if (!controllers)
662 return log_oom();
663
664 r = cg_kernel_controllers(controllers);
665 if (r < 0)
666 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
667
668 for (;;) {
669 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
670
671 controller = set_steal_first(controllers);
672 if (!controller)
673 break;
674
675 origin = prefix_root("/sys/fs/cgroup/", controller);
676 if (!origin)
677 return log_oom();
678
679 r = readlink_malloc(origin, &combined);
680 if (r == -EINVAL) {
681 /* Not a symbolic link, but directly a single cgroup hierarchy */
682
683 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
684 if (r < 0)
685 return r;
686
687 } else if (r < 0)
688 return log_error_errno(r, "Failed to read link %s: %m", origin);
689 else {
690 _cleanup_free_ char *target = NULL;
691
692 target = prefix_root(dest, origin);
693 if (!target)
694 return log_oom();
695
696 /* A symbolic link, a combination of controllers in one hierarchy */
697
698 if (!filename_is_valid(combined)) {
699 log_warning("Ignoring invalid combined hierarchy %s.", combined);
700 continue;
701 }
702
703 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
704 if (r < 0)
705 return r;
706
707 r = symlink_idempotent(combined, target);
708 if (r == -EINVAL) {
709 log_error("Invalid existing symlink for combined hierarchy");
710 return r;
711 }
712 if (r < 0)
713 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
714 }
715 }
716
717 skip_controllers:
718 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
719 if (r < 0)
720 return r;
721
722 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
723 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
724
725 return 0;
726 }
727
728 static int mount_unified_cgroups(const char *dest) {
729 const char *p;
730 int r;
731
732 assert(dest);
733
734 p = prefix_roota(dest, "/sys/fs/cgroup");
735
736 (void) mkdir_p(p, 0755);
737
738 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
739 if (r < 0)
740 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
741 if (r > 0) {
742 p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
743 if (access(p, F_OK) >= 0)
744 return 0;
745 if (errno != ENOENT)
746 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
747
748 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
749 return -EINVAL;
750 }
751
752 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
753 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
754
755 return 0;
756 }
757
758 int mount_cgroups(
759 const char *dest,
760 bool unified_requested,
761 bool userns, uid_t uid_shift, uid_t uid_range,
762 const char *selinux_apifs_context) {
763
764 if (unified_requested)
765 return mount_unified_cgroups(dest);
766 else
767 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
768 }
769
770 int mount_systemd_cgroup_writable(
771 const char *dest,
772 bool unified_requested) {
773
774 _cleanup_free_ char *own_cgroup_path = NULL;
775 const char *systemd_root, *systemd_own;
776 int r;
777
778 assert(dest);
779
780 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
781 if (r < 0)
782 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
783
784 /* If we are living in the top-level, then there's nothing to do... */
785 if (path_equal(own_cgroup_path, "/"))
786 return 0;
787
788 if (unified_requested) {
789 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
790 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
791 } else {
792 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
793 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
794 }
795
796 /* Make our own cgroup a (writable) bind mount */
797 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
798 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
799
800 /* And then remount the systemd cgroup root read-only */
801 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
802 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
803
804 return 0;
805 }
806
807 int setup_volatile_state(
808 const char *directory,
809 VolatileMode mode,
810 bool userns, uid_t uid_shift, uid_t uid_range,
811 const char *selinux_apifs_context) {
812
813 _cleanup_free_ char *buf = NULL;
814 const char *p, *options;
815 int r;
816
817 assert(directory);
818
819 if (mode != VOLATILE_STATE)
820 return 0;
821
822 /* --volatile=state means we simply overmount /var
823 with a tmpfs, and the rest read-only. */
824
825 r = bind_remount_recursive(directory, true);
826 if (r < 0)
827 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
828
829 p = prefix_roota(directory, "/var");
830 r = mkdir(p, 0755);
831 if (r < 0 && errno != EEXIST)
832 return log_error_errno(errno, "Failed to create %s: %m", directory);
833
834 options = "mode=755";
835 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
836 if (r < 0)
837 return log_oom();
838 if (r > 0)
839 options = buf;
840
841 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
842 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
843
844 return 0;
845 }
846
847 int setup_volatile(
848 const char *directory,
849 VolatileMode mode,
850 bool userns, uid_t uid_shift, uid_t uid_range,
851 const char *selinux_apifs_context) {
852
853 bool tmpfs_mounted = false, bind_mounted = false;
854 char template[] = "/tmp/nspawn-volatile-XXXXXX";
855 _cleanup_free_ char *buf = NULL;
856 const char *f, *t, *options;
857 int r;
858
859 assert(directory);
860
861 if (mode != VOLATILE_YES)
862 return 0;
863
864 /* --volatile=yes means we mount a tmpfs to the root dir, and
865 the original /usr to use inside it, and that read-only. */
866
867 if (!mkdtemp(template))
868 return log_error_errno(errno, "Failed to create temporary directory: %m");
869
870 options = "mode=755";
871 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
872 if (r < 0)
873 return log_oom();
874 if (r > 0)
875 options = buf;
876
877 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
878 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
879 goto fail;
880 }
881
882 tmpfs_mounted = true;
883
884 f = prefix_roota(directory, "/usr");
885 t = prefix_roota(template, "/usr");
886
887 r = mkdir(t, 0755);
888 if (r < 0 && errno != EEXIST) {
889 r = log_error_errno(errno, "Failed to create %s: %m", t);
890 goto fail;
891 }
892
893 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
894 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
895 goto fail;
896 }
897
898 bind_mounted = true;
899
900 r = bind_remount_recursive(t, true);
901 if (r < 0) {
902 log_error_errno(r, "Failed to remount %s read-only: %m", t);
903 goto fail;
904 }
905
906 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
907 r = log_error_errno(errno, "Failed to move root mount: %m");
908 goto fail;
909 }
910
911 (void) rmdir(template);
912
913 return 0;
914
915 fail:
916 if (bind_mounted)
917 (void) umount(t);
918
919 if (tmpfs_mounted)
920 (void) umount(template);
921 (void) rmdir(template);
922 return r;
923 }
924
925 VolatileMode volatile_mode_from_string(const char *s) {
926 int b;
927
928 if (isempty(s))
929 return _VOLATILE_MODE_INVALID;
930
931 b = parse_boolean(s);
932 if (b > 0)
933 return VOLATILE_YES;
934 if (b == 0)
935 return VOLATILE_NO;
936
937 if (streq(s, "state"))
938 return VOLATILE_STATE;
939
940 return _VOLATILE_MODE_INVALID;
941 }