]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn-mount.c
util-lib: move mount related utility calls to mount-util.[ch]
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
CommitLineData
e83bebef
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
4f5dd394 22#include <sys/mount.h>
07630cea 23#include <linux/magic.h>
e83bebef 24
4f5dd394
LP
25#include "cgroup-util.h"
26#include "escape.h"
e83bebef 27#include "label.h"
4f5dd394 28#include "mkdir.h"
4349cd7c 29#include "mount-util.h"
6bedfcbb
LP
30#include "nspawn-mount.h"
31#include "parse-util.h"
4f5dd394
LP
32#include "path-util.h"
33#include "rm-rf.h"
e83bebef 34#include "set.h"
07630cea 35#include "string-util.h"
4f5dd394
LP
36#include "strv.h"
37#include "util.h"
e83bebef
LP
38
39CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
40 CustomMount *c, *ret;
41
42 assert(l);
43 assert(n);
44 assert(t >= 0);
45 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
46
47 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
48 if (!c)
49 return NULL;
50
51 *l = c;
52 ret = *l + *n;
53 (*n)++;
54
55 *ret = (CustomMount) { .type = t };
56
57 return ret;
58}
59
60void custom_mount_free_all(CustomMount *l, unsigned n) {
61 unsigned i;
62
63 for (i = 0; i < n; i++) {
64 CustomMount *m = l + i;
65
66 free(m->source);
67 free(m->destination);
68 free(m->options);
69
70 if (m->work_dir) {
71 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
72 free(m->work_dir);
73 }
74
75 strv_free(m->lower);
76 }
77
78 free(l);
79}
80
81int custom_mount_compare(const void *a, const void *b) {
82 const CustomMount *x = a, *y = b;
83 int r;
84
85 r = path_compare(x->destination, y->destination);
86 if (r != 0)
87 return r;
88
89 if (x->type < y->type)
90 return -1;
91 if (x->type > y->type)
92 return 1;
93
94 return 0;
95}
96
97int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
98 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
99 const char *p = s;
100 CustomMount *m;
101 int r;
102
103 assert(l);
104 assert(n);
105
106 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
107 if (r < 0)
108 return r;
109 if (r == 0)
110 return -EINVAL;
111
112 if (r == 1) {
113 destination = strdup(source);
114 if (!destination)
115 return -ENOMEM;
116 }
117
118 if (r == 2 && !isempty(p)) {
119 opts = strdup(p);
120 if (!opts)
121 return -ENOMEM;
122 }
123
124 if (!path_is_absolute(source))
125 return -EINVAL;
126
127 if (!path_is_absolute(destination))
128 return -EINVAL;
129
130 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
131 if (!m)
132 return log_oom();
133
134 m->source = source;
135 m->destination = destination;
136 m->read_only = read_only;
137 m->options = opts;
138
139 source = destination = opts = NULL;
140 return 0;
141}
142
143int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
144 _cleanup_free_ char *path = NULL, *opts = NULL;
145 const char *p = s;
146 CustomMount *m;
147 int r;
148
149 assert(l);
150 assert(n);
151 assert(s);
152
153 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
154 if (r < 0)
155 return r;
156 if (r == 0)
157 return -EINVAL;
158
159 if (isempty(p))
160 opts = strdup("mode=0755");
161 else
162 opts = strdup(p);
163 if (!opts)
164 return -ENOMEM;
165
166 if (!path_is_absolute(path))
167 return -EINVAL;
168
169 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
170 if (!m)
171 return -ENOMEM;
172
173 m->destination = path;
174 m->options = opts;
175
176 path = opts = NULL;
177 return 0;
178}
179
180static int tmpfs_patch_options(
181 const char *options,
182 bool userns, uid_t uid_shift, uid_t uid_range,
183 const char *selinux_apifs_context,
184 char **ret) {
185
186 char *buf = NULL;
187
188 if (userns && uid_shift != 0) {
189 assert(uid_shift != UID_INVALID);
190
191 if (options)
192 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
193 else
194 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
195 if (!buf)
196 return -ENOMEM;
197
198 options = buf;
199 }
200
201#ifdef HAVE_SELINUX
202 if (selinux_apifs_context) {
203 char *t;
204
205 if (options)
206 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
207 else
208 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
209 if (!t) {
210 free(buf);
211 return -ENOMEM;
212 }
213
214 free(buf);
215 buf = t;
216 }
217#endif
218
219 *ret = buf;
220 return !!buf;
221}
222
d8fc6a00
LP
223int mount_sysfs(const char *dest) {
224 const char *full, *top, *x;
d1678248 225 int r;
d8fc6a00
LP
226
227 top = prefix_roota(dest, "/sys");
d1678248
ILG
228 r = path_check_fstype(top, SYSFS_MAGIC);
229 if (r < 0)
230 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
231 /* /sys might already be mounted as sysfs by the outer child in the
232 * !netns case. In this case, it's all good. Don't touch it because we
233 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
234 */
235 if (r > 0)
236 return 0;
237
d8fc6a00
LP
238 full = prefix_roota(top, "/full");
239
240 (void) mkdir(full, 0755);
241
242 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
243 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
244
245 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
246 _cleanup_free_ char *from = NULL, *to = NULL;
247
248 from = prefix_root(full, x);
249 if (!from)
250 return log_oom();
251
252 to = prefix_root(top, x);
253 if (!to)
254 return log_oom();
255
256 (void) mkdir(to, 0755);
257
258 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
259 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
260
261 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
262 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
263 }
264
265 if (umount(full) < 0)
266 return log_error_errno(errno, "Failed to unmount %s: %m", full);
267
268 if (rmdir(full) < 0)
269 return log_error_errno(errno, "Failed to remove %s: %m", full);
270
271 x = prefix_roota(top, "/fs/kdbus");
272 (void) mkdir(x, 0755);
273
274 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
275 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
276
277 return 0;
278}
279
e83bebef 280int mount_all(const char *dest,
403af78c 281 bool use_userns, bool in_userns,
d1678248 282 bool use_netns,
403af78c 283 uid_t uid_shift, uid_t uid_range,
e83bebef
LP
284 const char *selinux_apifs_context) {
285
286 typedef struct MountPoint {
287 const char *what;
288 const char *where;
289 const char *type;
290 const char *options;
291 unsigned long flags;
292 bool fatal;
d1678248
ILG
293 bool in_userns;
294 bool use_netns;
e83bebef
LP
295 } MountPoint;
296
297 static const MountPoint mount_table[] = {
d1678248
ILG
298 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
299 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
300 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
301 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
302 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
303 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
304 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
305 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
306 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
e83bebef 307#ifdef HAVE_SELINUX
d1678248
ILG
308 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
309 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
e83bebef
LP
310#endif
311 };
312
313 unsigned k;
314 int r;
315
316 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
317 _cleanup_free_ char *where = NULL, *options = NULL;
318 const char *o;
319
d1678248
ILG
320 if (in_userns != mount_table[k].in_userns)
321 continue;
322
323 if (!use_netns && mount_table[k].use_netns)
e83bebef
LP
324 continue;
325
326 where = prefix_root(dest, mount_table[k].where);
327 if (!where)
328 return log_oom();
329
330 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
331 if (r < 0 && r != -ENOENT)
332 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
333
334 /* Skip this entry if it is not a remount. */
335 if (mount_table[k].what && r > 0)
336 continue;
337
338 r = mkdir_p(where, 0755);
339 if (r < 0) {
340 if (mount_table[k].fatal)
341 return log_error_errno(r, "Failed to create directory %s: %m", where);
342
343 log_warning_errno(r, "Failed to create directory %s: %m", where);
344 continue;
345 }
346
347 o = mount_table[k].options;
348 if (streq_ptr(mount_table[k].type, "tmpfs")) {
403af78c 349 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
e83bebef
LP
350 if (r < 0)
351 return log_oom();
352 if (r > 0)
353 o = options;
354 }
355
356 if (mount(mount_table[k].what,
357 where,
358 mount_table[k].type,
359 mount_table[k].flags,
360 o) < 0) {
361
362 if (mount_table[k].fatal)
363 return log_error_errno(errno, "mount(%s) failed: %m", where);
364
365 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
366 }
367 }
368
369 return 0;
370}
371
372static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
373 const char *p = options;
374 unsigned long flags = *mount_flags;
375 char *opts = NULL;
376
377 assert(options);
378
379 for (;;) {
380 _cleanup_free_ char *word = NULL;
381 int r = extract_first_word(&p, &word, ",", 0);
382 if (r < 0)
383 return log_error_errno(r, "Failed to extract mount option: %m");
384 if (r == 0)
385 break;
386
387 if (streq(word, "rbind"))
388 flags |= MS_REC;
389 else if (streq(word, "norbind"))
390 flags &= ~MS_REC;
391 else {
392 log_error("Invalid bind mount option: %s", word);
393 return -EINVAL;
394 }
395 }
396
397 *mount_flags = flags;
398 /* in the future mount_opts will hold string options for mount(2) */
399 *mount_opts = opts;
400
401 return 0;
402}
403
404static int mount_bind(const char *dest, CustomMount *m) {
405 struct stat source_st, dest_st;
406 const char *where;
407 unsigned long mount_flags = MS_BIND | MS_REC;
408 _cleanup_free_ char *mount_opts = NULL;
409 int r;
410
411 assert(m);
412
413 if (m->options) {
414 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
415 if (r < 0)
416 return r;
417 }
418
419 if (stat(m->source, &source_st) < 0)
420 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
421
422 where = prefix_roota(dest, m->destination);
423
424 if (stat(where, &dest_st) >= 0) {
425 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
426 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
427 return -EINVAL;
428 }
429
430 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
431 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
432 return -EINVAL;
433 }
434
435 } else if (errno == ENOENT) {
436 r = mkdir_parents_label(where, 0755);
437 if (r < 0)
438 return log_error_errno(r, "Failed to make parents of %s: %m", where);
439 } else {
440 log_error_errno(errno, "Failed to stat %s: %m", where);
441 return -errno;
442 }
443
444 /* Create the mount point. Any non-directory file can be
445 * mounted on any non-directory file (regular, fifo, socket,
446 * char, block).
447 */
448 if (S_ISDIR(source_st.st_mode))
449 r = mkdir_label(where, 0755);
450 else
451 r = touch(where);
452 if (r < 0 && r != -EEXIST)
453 return log_error_errno(r, "Failed to create mount point %s: %m", where);
454
455 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
456 return log_error_errno(errno, "mount(%s) failed: %m", where);
457
458 if (m->read_only) {
459 r = bind_remount_recursive(where, true);
460 if (r < 0)
461 return log_error_errno(r, "Read-only bind mount failed: %m");
462 }
463
464 return 0;
465}
466
467static int mount_tmpfs(
468 const char *dest,
469 CustomMount *m,
470 bool userns, uid_t uid_shift, uid_t uid_range,
471 const char *selinux_apifs_context) {
472
473 const char *where, *options;
474 _cleanup_free_ char *buf = NULL;
475 int r;
476
477 assert(dest);
478 assert(m);
479
480 where = prefix_roota(dest, m->destination);
481
482 r = mkdir_p_label(where, 0755);
483 if (r < 0 && r != -EEXIST)
484 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
485
486 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
487 if (r < 0)
488 return log_oom();
489 options = r > 0 ? buf : m->options;
490
491 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
492 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
493
494 return 0;
495}
496
497static char *joined_and_escaped_lower_dirs(char * const *lower) {
498 _cleanup_strv_free_ char **sv = NULL;
499
500 sv = strv_copy(lower);
501 if (!sv)
502 return NULL;
503
504 strv_reverse(sv);
505
506 if (!strv_shell_escape(sv, ",:"))
507 return NULL;
508
509 return strv_join(sv, ":");
510}
511
512static int mount_overlay(const char *dest, CustomMount *m) {
513 _cleanup_free_ char *lower = NULL;
514 const char *where, *options;
515 int r;
516
517 assert(dest);
518 assert(m);
519
520 where = prefix_roota(dest, m->destination);
521
522 r = mkdir_label(where, 0755);
523 if (r < 0 && r != -EEXIST)
524 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
525
526 (void) mkdir_p_label(m->source, 0755);
527
528 lower = joined_and_escaped_lower_dirs(m->lower);
529 if (!lower)
530 return log_oom();
531
532 if (m->read_only) {
533 _cleanup_free_ char *escaped_source = NULL;
534
535 escaped_source = shell_escape(m->source, ",:");
536 if (!escaped_source)
537 return log_oom();
538
539 options = strjoina("lowerdir=", escaped_source, ":", lower);
540 } else {
541 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
542
543 assert(m->work_dir);
544 (void) mkdir_label(m->work_dir, 0700);
545
546 escaped_source = shell_escape(m->source, ",:");
547 if (!escaped_source)
548 return log_oom();
549 escaped_work_dir = shell_escape(m->work_dir, ",:");
550 if (!escaped_work_dir)
551 return log_oom();
552
553 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
554 }
555
556 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
557 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
558
559 return 0;
560}
561
562int mount_custom(
563 const char *dest,
564 CustomMount *mounts, unsigned n,
565 bool userns, uid_t uid_shift, uid_t uid_range,
566 const char *selinux_apifs_context) {
567
568 unsigned i;
569 int r;
570
571 assert(dest);
572
573 for (i = 0; i < n; i++) {
574 CustomMount *m = mounts + i;
575
576 switch (m->type) {
577
578 case CUSTOM_MOUNT_BIND:
579 r = mount_bind(dest, m);
580 break;
581
582 case CUSTOM_MOUNT_TMPFS:
583 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
584 break;
585
586 case CUSTOM_MOUNT_OVERLAY:
587 r = mount_overlay(dest, m);
588 break;
589
590 default:
591 assert_not_reached("Unknown custom mount type");
592 }
593
594 if (r < 0)
595 return r;
596 }
597
598 return 0;
599}
600
601static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
602 char *to;
603 int r;
604
ee30f6ac 605 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
e83bebef
LP
606
607 r = path_is_mount_point(to, 0);
608 if (r < 0 && r != -ENOENT)
609 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
610 if (r > 0)
611 return 0;
612
613 mkdir_p(to, 0755);
614
615 /* The superblock mount options of the mount point need to be
616 * identical to the hosts', and hence writable... */
617 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
618 return log_error_errno(errno, "Failed to mount to %s: %m", to);
619
620 /* ... hence let's only make the bind mount read-only, not the
621 * superblock. */
622 if (read_only) {
623 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
624 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
625 }
626 return 1;
627}
628
629static int mount_legacy_cgroups(
630 const char *dest,
631 bool userns, uid_t uid_shift, uid_t uid_range,
632 const char *selinux_apifs_context) {
633
634 _cleanup_set_free_free_ Set *controllers = NULL;
635 const char *cgroup_root;
636 int r;
637
638 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
639
d8fc6a00
LP
640 (void) mkdir_p(cgroup_root, 0755);
641
e83bebef
LP
642 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
643 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
644 if (r < 0)
645 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
646 if (r == 0) {
647 _cleanup_free_ char *options = NULL;
648
649 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
650 if (r < 0)
651 return log_oom();
652
653 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
654 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
655 }
656
657 if (cg_unified() > 0)
658 goto skip_controllers;
659
660 controllers = set_new(&string_hash_ops);
661 if (!controllers)
662 return log_oom();
663
664 r = cg_kernel_controllers(controllers);
665 if (r < 0)
666 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
667
668 for (;;) {
669 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
670
671 controller = set_steal_first(controllers);
672 if (!controller)
673 break;
674
675 origin = prefix_root("/sys/fs/cgroup/", controller);
676 if (!origin)
677 return log_oom();
678
679 r = readlink_malloc(origin, &combined);
680 if (r == -EINVAL) {
681 /* Not a symbolic link, but directly a single cgroup hierarchy */
682
683 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
684 if (r < 0)
685 return r;
686
687 } else if (r < 0)
688 return log_error_errno(r, "Failed to read link %s: %m", origin);
689 else {
690 _cleanup_free_ char *target = NULL;
691
692 target = prefix_root(dest, origin);
693 if (!target)
694 return log_oom();
695
696 /* A symbolic link, a combination of controllers in one hierarchy */
697
698 if (!filename_is_valid(combined)) {
699 log_warning("Ignoring invalid combined hierarchy %s.", combined);
700 continue;
701 }
702
703 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
704 if (r < 0)
705 return r;
706
707 r = symlink_idempotent(combined, target);
708 if (r == -EINVAL) {
709 log_error("Invalid existing symlink for combined hierarchy");
710 return r;
711 }
712 if (r < 0)
713 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
714 }
715 }
716
717skip_controllers:
718 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
719 if (r < 0)
720 return r;
721
722 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
723 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
724
725 return 0;
726}
727
728static int mount_unified_cgroups(const char *dest) {
729 const char *p;
730 int r;
731
732 assert(dest);
733
88e10572
MT
734 p = prefix_roota(dest, "/sys/fs/cgroup");
735
736 (void) mkdir_p(p, 0755);
e83bebef
LP
737
738 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
739 if (r < 0)
740 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
741 if (r > 0) {
88e10572 742 p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
e83bebef
LP
743 if (access(p, F_OK) >= 0)
744 return 0;
745 if (errno != ENOENT)
746 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
747
748 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
749 return -EINVAL;
750 }
751
752 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
753 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
754
755 return 0;
756}
757
758int mount_cgroups(
759 const char *dest,
760 bool unified_requested,
761 bool userns, uid_t uid_shift, uid_t uid_range,
762 const char *selinux_apifs_context) {
763
764 if (unified_requested)
765 return mount_unified_cgroups(dest);
766 else
767 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
768}
769
770int mount_systemd_cgroup_writable(
771 const char *dest,
772 bool unified_requested) {
773
774 _cleanup_free_ char *own_cgroup_path = NULL;
775 const char *systemd_root, *systemd_own;
776 int r;
777
778 assert(dest);
779
780 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
781 if (r < 0)
782 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
783
784 /* If we are living in the top-level, then there's nothing to do... */
785 if (path_equal(own_cgroup_path, "/"))
786 return 0;
787
788 if (unified_requested) {
789 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
790 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
791 } else {
792 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
793 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
794 }
795
796 /* Make our own cgroup a (writable) bind mount */
797 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
798 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
799
800 /* And then remount the systemd cgroup root read-only */
801 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
802 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
803
804 return 0;
805}
806
807int setup_volatile_state(
808 const char *directory,
809 VolatileMode mode,
810 bool userns, uid_t uid_shift, uid_t uid_range,
811 const char *selinux_apifs_context) {
812
813 _cleanup_free_ char *buf = NULL;
814 const char *p, *options;
815 int r;
816
817 assert(directory);
818
819 if (mode != VOLATILE_STATE)
820 return 0;
821
822 /* --volatile=state means we simply overmount /var
823 with a tmpfs, and the rest read-only. */
824
825 r = bind_remount_recursive(directory, true);
826 if (r < 0)
827 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
828
829 p = prefix_roota(directory, "/var");
830 r = mkdir(p, 0755);
831 if (r < 0 && errno != EEXIST)
832 return log_error_errno(errno, "Failed to create %s: %m", directory);
833
834 options = "mode=755";
835 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
836 if (r < 0)
837 return log_oom();
838 if (r > 0)
839 options = buf;
840
841 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
842 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
843
844 return 0;
845}
846
847int setup_volatile(
848 const char *directory,
849 VolatileMode mode,
850 bool userns, uid_t uid_shift, uid_t uid_range,
851 const char *selinux_apifs_context) {
852
853 bool tmpfs_mounted = false, bind_mounted = false;
854 char template[] = "/tmp/nspawn-volatile-XXXXXX";
855 _cleanup_free_ char *buf = NULL;
856 const char *f, *t, *options;
857 int r;
858
859 assert(directory);
860
861 if (mode != VOLATILE_YES)
862 return 0;
863
864 /* --volatile=yes means we mount a tmpfs to the root dir, and
865 the original /usr to use inside it, and that read-only. */
866
867 if (!mkdtemp(template))
868 return log_error_errno(errno, "Failed to create temporary directory: %m");
869
870 options = "mode=755";
871 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
872 if (r < 0)
873 return log_oom();
874 if (r > 0)
875 options = buf;
876
877 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
878 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
879 goto fail;
880 }
881
882 tmpfs_mounted = true;
883
884 f = prefix_roota(directory, "/usr");
885 t = prefix_roota(template, "/usr");
886
887 r = mkdir(t, 0755);
888 if (r < 0 && errno != EEXIST) {
889 r = log_error_errno(errno, "Failed to create %s: %m", t);
890 goto fail;
891 }
892
893 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
894 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
895 goto fail;
896 }
897
898 bind_mounted = true;
899
900 r = bind_remount_recursive(t, true);
901 if (r < 0) {
902 log_error_errno(r, "Failed to remount %s read-only: %m", t);
903 goto fail;
904 }
905
906 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
907 r = log_error_errno(errno, "Failed to move root mount: %m");
908 goto fail;
909 }
910
911 (void) rmdir(template);
912
913 return 0;
914
915fail:
916 if (bind_mounted)
917 (void) umount(t);
918
919 if (tmpfs_mounted)
920 (void) umount(template);
921 (void) rmdir(template);
922 return r;
923}
924
925VolatileMode volatile_mode_from_string(const char *s) {
926 int b;
927
928 if (isempty(s))
929 return _VOLATILE_MODE_INVALID;
930
931 b = parse_boolean(s);
932 if (b > 0)
933 return VOLATILE_YES;
934 if (b == 0)
935 return VOLATILE_NO;
936
937 if (streq(s, "state"))
938 return VOLATILE_STATE;
939
940 return _VOLATILE_MODE_INVALID;
941}