]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn-mount.c
util: move string_is_safe() to string-util.[ch]
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
CommitLineData
e83bebef
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
4f5dd394 22#include <sys/mount.h>
07630cea 23#include <linux/magic.h>
e83bebef 24
4f5dd394
LP
25#include "cgroup-util.h"
26#include "escape.h"
f4f15635 27#include "fs-util.h"
e83bebef 28#include "label.h"
4f5dd394 29#include "mkdir.h"
4349cd7c 30#include "mount-util.h"
6bedfcbb
LP
31#include "nspawn-mount.h"
32#include "parse-util.h"
4f5dd394
LP
33#include "path-util.h"
34#include "rm-rf.h"
e83bebef 35#include "set.h"
07630cea 36#include "string-util.h"
4f5dd394
LP
37#include "strv.h"
38#include "util.h"
e83bebef
LP
39
40CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
41 CustomMount *c, *ret;
42
43 assert(l);
44 assert(n);
45 assert(t >= 0);
46 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
47
48 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
49 if (!c)
50 return NULL;
51
52 *l = c;
53 ret = *l + *n;
54 (*n)++;
55
56 *ret = (CustomMount) { .type = t };
57
58 return ret;
59}
60
61void custom_mount_free_all(CustomMount *l, unsigned n) {
62 unsigned i;
63
64 for (i = 0; i < n; i++) {
65 CustomMount *m = l + i;
66
67 free(m->source);
68 free(m->destination);
69 free(m->options);
70
71 if (m->work_dir) {
72 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
73 free(m->work_dir);
74 }
75
76 strv_free(m->lower);
77 }
78
79 free(l);
80}
81
82int custom_mount_compare(const void *a, const void *b) {
83 const CustomMount *x = a, *y = b;
84 int r;
85
86 r = path_compare(x->destination, y->destination);
87 if (r != 0)
88 return r;
89
90 if (x->type < y->type)
91 return -1;
92 if (x->type > y->type)
93 return 1;
94
95 return 0;
96}
97
98int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
99 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
100 const char *p = s;
101 CustomMount *m;
102 int r;
103
104 assert(l);
105 assert(n);
106
107 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
108 if (r < 0)
109 return r;
110 if (r == 0)
111 return -EINVAL;
112
113 if (r == 1) {
114 destination = strdup(source);
115 if (!destination)
116 return -ENOMEM;
117 }
118
119 if (r == 2 && !isempty(p)) {
120 opts = strdup(p);
121 if (!opts)
122 return -ENOMEM;
123 }
124
125 if (!path_is_absolute(source))
126 return -EINVAL;
127
128 if (!path_is_absolute(destination))
129 return -EINVAL;
130
131 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
132 if (!m)
133 return log_oom();
134
135 m->source = source;
136 m->destination = destination;
137 m->read_only = read_only;
138 m->options = opts;
139
140 source = destination = opts = NULL;
141 return 0;
142}
143
144int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
145 _cleanup_free_ char *path = NULL, *opts = NULL;
146 const char *p = s;
147 CustomMount *m;
148 int r;
149
150 assert(l);
151 assert(n);
152 assert(s);
153
154 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
155 if (r < 0)
156 return r;
157 if (r == 0)
158 return -EINVAL;
159
160 if (isempty(p))
161 opts = strdup("mode=0755");
162 else
163 opts = strdup(p);
164 if (!opts)
165 return -ENOMEM;
166
167 if (!path_is_absolute(path))
168 return -EINVAL;
169
170 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
171 if (!m)
172 return -ENOMEM;
173
174 m->destination = path;
175 m->options = opts;
176
177 path = opts = NULL;
178 return 0;
179}
180
181static int tmpfs_patch_options(
182 const char *options,
183 bool userns, uid_t uid_shift, uid_t uid_range,
184 const char *selinux_apifs_context,
185 char **ret) {
186
187 char *buf = NULL;
188
189 if (userns && uid_shift != 0) {
190 assert(uid_shift != UID_INVALID);
191
192 if (options)
193 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
194 else
195 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
196 if (!buf)
197 return -ENOMEM;
198
199 options = buf;
200 }
201
202#ifdef HAVE_SELINUX
203 if (selinux_apifs_context) {
204 char *t;
205
206 if (options)
207 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
208 else
209 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
210 if (!t) {
211 free(buf);
212 return -ENOMEM;
213 }
214
215 free(buf);
216 buf = t;
217 }
218#endif
219
220 *ret = buf;
221 return !!buf;
222}
223
d8fc6a00
LP
224int mount_sysfs(const char *dest) {
225 const char *full, *top, *x;
d1678248 226 int r;
d8fc6a00
LP
227
228 top = prefix_roota(dest, "/sys");
d1678248
ILG
229 r = path_check_fstype(top, SYSFS_MAGIC);
230 if (r < 0)
231 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
232 /* /sys might already be mounted as sysfs by the outer child in the
233 * !netns case. In this case, it's all good. Don't touch it because we
234 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
235 */
236 if (r > 0)
237 return 0;
238
d8fc6a00
LP
239 full = prefix_roota(top, "/full");
240
241 (void) mkdir(full, 0755);
242
243 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
244 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
245
246 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
247 _cleanup_free_ char *from = NULL, *to = NULL;
248
249 from = prefix_root(full, x);
250 if (!from)
251 return log_oom();
252
253 to = prefix_root(top, x);
254 if (!to)
255 return log_oom();
256
257 (void) mkdir(to, 0755);
258
259 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
260 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
261
262 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
263 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
264 }
265
266 if (umount(full) < 0)
267 return log_error_errno(errno, "Failed to unmount %s: %m", full);
268
269 if (rmdir(full) < 0)
270 return log_error_errno(errno, "Failed to remove %s: %m", full);
271
272 x = prefix_roota(top, "/fs/kdbus");
273 (void) mkdir(x, 0755);
274
275 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
276 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
277
278 return 0;
279}
280
e83bebef 281int mount_all(const char *dest,
403af78c 282 bool use_userns, bool in_userns,
d1678248 283 bool use_netns,
403af78c 284 uid_t uid_shift, uid_t uid_range,
e83bebef
LP
285 const char *selinux_apifs_context) {
286
287 typedef struct MountPoint {
288 const char *what;
289 const char *where;
290 const char *type;
291 const char *options;
292 unsigned long flags;
293 bool fatal;
d1678248
ILG
294 bool in_userns;
295 bool use_netns;
e83bebef
LP
296 } MountPoint;
297
298 static const MountPoint mount_table[] = {
d1678248
ILG
299 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
300 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
301 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
302 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
303 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
304 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
305 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
306 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
307 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
e83bebef 308#ifdef HAVE_SELINUX
d1678248
ILG
309 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
310 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
e83bebef
LP
311#endif
312 };
313
314 unsigned k;
315 int r;
316
317 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
318 _cleanup_free_ char *where = NULL, *options = NULL;
319 const char *o;
320
d1678248
ILG
321 if (in_userns != mount_table[k].in_userns)
322 continue;
323
324 if (!use_netns && mount_table[k].use_netns)
e83bebef
LP
325 continue;
326
327 where = prefix_root(dest, mount_table[k].where);
328 if (!where)
329 return log_oom();
330
331 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
332 if (r < 0 && r != -ENOENT)
333 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
334
335 /* Skip this entry if it is not a remount. */
336 if (mount_table[k].what && r > 0)
337 continue;
338
339 r = mkdir_p(where, 0755);
340 if (r < 0) {
341 if (mount_table[k].fatal)
342 return log_error_errno(r, "Failed to create directory %s: %m", where);
343
344 log_warning_errno(r, "Failed to create directory %s: %m", where);
345 continue;
346 }
347
348 o = mount_table[k].options;
349 if (streq_ptr(mount_table[k].type, "tmpfs")) {
403af78c 350 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
e83bebef
LP
351 if (r < 0)
352 return log_oom();
353 if (r > 0)
354 o = options;
355 }
356
357 if (mount(mount_table[k].what,
358 where,
359 mount_table[k].type,
360 mount_table[k].flags,
361 o) < 0) {
362
363 if (mount_table[k].fatal)
364 return log_error_errno(errno, "mount(%s) failed: %m", where);
365
366 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
367 }
368 }
369
370 return 0;
371}
372
373static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
374 const char *p = options;
375 unsigned long flags = *mount_flags;
376 char *opts = NULL;
377
378 assert(options);
379
380 for (;;) {
381 _cleanup_free_ char *word = NULL;
382 int r = extract_first_word(&p, &word, ",", 0);
383 if (r < 0)
384 return log_error_errno(r, "Failed to extract mount option: %m");
385 if (r == 0)
386 break;
387
388 if (streq(word, "rbind"))
389 flags |= MS_REC;
390 else if (streq(word, "norbind"))
391 flags &= ~MS_REC;
392 else {
393 log_error("Invalid bind mount option: %s", word);
394 return -EINVAL;
395 }
396 }
397
398 *mount_flags = flags;
399 /* in the future mount_opts will hold string options for mount(2) */
400 *mount_opts = opts;
401
402 return 0;
403}
404
405static int mount_bind(const char *dest, CustomMount *m) {
406 struct stat source_st, dest_st;
407 const char *where;
408 unsigned long mount_flags = MS_BIND | MS_REC;
409 _cleanup_free_ char *mount_opts = NULL;
410 int r;
411
412 assert(m);
413
414 if (m->options) {
415 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
416 if (r < 0)
417 return r;
418 }
419
420 if (stat(m->source, &source_st) < 0)
421 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
422
423 where = prefix_roota(dest, m->destination);
424
425 if (stat(where, &dest_st) >= 0) {
426 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
427 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
428 return -EINVAL;
429 }
430
431 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
432 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
433 return -EINVAL;
434 }
435
436 } else if (errno == ENOENT) {
437 r = mkdir_parents_label(where, 0755);
438 if (r < 0)
439 return log_error_errno(r, "Failed to make parents of %s: %m", where);
440 } else {
441 log_error_errno(errno, "Failed to stat %s: %m", where);
442 return -errno;
443 }
444
445 /* Create the mount point. Any non-directory file can be
446 * mounted on any non-directory file (regular, fifo, socket,
447 * char, block).
448 */
449 if (S_ISDIR(source_st.st_mode))
450 r = mkdir_label(where, 0755);
451 else
452 r = touch(where);
453 if (r < 0 && r != -EEXIST)
454 return log_error_errno(r, "Failed to create mount point %s: %m", where);
455
456 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
457 return log_error_errno(errno, "mount(%s) failed: %m", where);
458
459 if (m->read_only) {
460 r = bind_remount_recursive(where, true);
461 if (r < 0)
462 return log_error_errno(r, "Read-only bind mount failed: %m");
463 }
464
465 return 0;
466}
467
468static int mount_tmpfs(
469 const char *dest,
470 CustomMount *m,
471 bool userns, uid_t uid_shift, uid_t uid_range,
472 const char *selinux_apifs_context) {
473
474 const char *where, *options;
475 _cleanup_free_ char *buf = NULL;
476 int r;
477
478 assert(dest);
479 assert(m);
480
481 where = prefix_roota(dest, m->destination);
482
483 r = mkdir_p_label(where, 0755);
484 if (r < 0 && r != -EEXIST)
485 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
486
487 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
488 if (r < 0)
489 return log_oom();
490 options = r > 0 ? buf : m->options;
491
492 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
493 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
494
495 return 0;
496}
497
498static char *joined_and_escaped_lower_dirs(char * const *lower) {
499 _cleanup_strv_free_ char **sv = NULL;
500
501 sv = strv_copy(lower);
502 if (!sv)
503 return NULL;
504
505 strv_reverse(sv);
506
507 if (!strv_shell_escape(sv, ",:"))
508 return NULL;
509
510 return strv_join(sv, ":");
511}
512
513static int mount_overlay(const char *dest, CustomMount *m) {
514 _cleanup_free_ char *lower = NULL;
515 const char *where, *options;
516 int r;
517
518 assert(dest);
519 assert(m);
520
521 where = prefix_roota(dest, m->destination);
522
523 r = mkdir_label(where, 0755);
524 if (r < 0 && r != -EEXIST)
525 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
526
527 (void) mkdir_p_label(m->source, 0755);
528
529 lower = joined_and_escaped_lower_dirs(m->lower);
530 if (!lower)
531 return log_oom();
532
533 if (m->read_only) {
534 _cleanup_free_ char *escaped_source = NULL;
535
536 escaped_source = shell_escape(m->source, ",:");
537 if (!escaped_source)
538 return log_oom();
539
540 options = strjoina("lowerdir=", escaped_source, ":", lower);
541 } else {
542 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
543
544 assert(m->work_dir);
545 (void) mkdir_label(m->work_dir, 0700);
546
547 escaped_source = shell_escape(m->source, ",:");
548 if (!escaped_source)
549 return log_oom();
550 escaped_work_dir = shell_escape(m->work_dir, ",:");
551 if (!escaped_work_dir)
552 return log_oom();
553
554 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
555 }
556
557 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
558 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
559
560 return 0;
561}
562
563int mount_custom(
564 const char *dest,
565 CustomMount *mounts, unsigned n,
566 bool userns, uid_t uid_shift, uid_t uid_range,
567 const char *selinux_apifs_context) {
568
569 unsigned i;
570 int r;
571
572 assert(dest);
573
574 for (i = 0; i < n; i++) {
575 CustomMount *m = mounts + i;
576
577 switch (m->type) {
578
579 case CUSTOM_MOUNT_BIND:
580 r = mount_bind(dest, m);
581 break;
582
583 case CUSTOM_MOUNT_TMPFS:
584 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
585 break;
586
587 case CUSTOM_MOUNT_OVERLAY:
588 r = mount_overlay(dest, m);
589 break;
590
591 default:
592 assert_not_reached("Unknown custom mount type");
593 }
594
595 if (r < 0)
596 return r;
597 }
598
599 return 0;
600}
601
602static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
603 char *to;
604 int r;
605
ee30f6ac 606 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
e83bebef
LP
607
608 r = path_is_mount_point(to, 0);
609 if (r < 0 && r != -ENOENT)
610 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
611 if (r > 0)
612 return 0;
613
614 mkdir_p(to, 0755);
615
616 /* The superblock mount options of the mount point need to be
617 * identical to the hosts', and hence writable... */
618 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
619 return log_error_errno(errno, "Failed to mount to %s: %m", to);
620
621 /* ... hence let's only make the bind mount read-only, not the
622 * superblock. */
623 if (read_only) {
624 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
625 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
626 }
627 return 1;
628}
629
630static int mount_legacy_cgroups(
631 const char *dest,
632 bool userns, uid_t uid_shift, uid_t uid_range,
633 const char *selinux_apifs_context) {
634
635 _cleanup_set_free_free_ Set *controllers = NULL;
636 const char *cgroup_root;
637 int r;
638
639 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
640
d8fc6a00
LP
641 (void) mkdir_p(cgroup_root, 0755);
642
e83bebef
LP
643 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
644 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
645 if (r < 0)
646 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
647 if (r == 0) {
648 _cleanup_free_ char *options = NULL;
649
650 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
651 if (r < 0)
652 return log_oom();
653
654 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
655 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
656 }
657
658 if (cg_unified() > 0)
659 goto skip_controllers;
660
661 controllers = set_new(&string_hash_ops);
662 if (!controllers)
663 return log_oom();
664
665 r = cg_kernel_controllers(controllers);
666 if (r < 0)
667 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
668
669 for (;;) {
670 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
671
672 controller = set_steal_first(controllers);
673 if (!controller)
674 break;
675
676 origin = prefix_root("/sys/fs/cgroup/", controller);
677 if (!origin)
678 return log_oom();
679
680 r = readlink_malloc(origin, &combined);
681 if (r == -EINVAL) {
682 /* Not a symbolic link, but directly a single cgroup hierarchy */
683
684 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
685 if (r < 0)
686 return r;
687
688 } else if (r < 0)
689 return log_error_errno(r, "Failed to read link %s: %m", origin);
690 else {
691 _cleanup_free_ char *target = NULL;
692
693 target = prefix_root(dest, origin);
694 if (!target)
695 return log_oom();
696
697 /* A symbolic link, a combination of controllers in one hierarchy */
698
699 if (!filename_is_valid(combined)) {
700 log_warning("Ignoring invalid combined hierarchy %s.", combined);
701 continue;
702 }
703
704 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
705 if (r < 0)
706 return r;
707
708 r = symlink_idempotent(combined, target);
709 if (r == -EINVAL) {
710 log_error("Invalid existing symlink for combined hierarchy");
711 return r;
712 }
713 if (r < 0)
714 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
715 }
716 }
717
718skip_controllers:
719 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
720 if (r < 0)
721 return r;
722
723 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
724 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
725
726 return 0;
727}
728
729static int mount_unified_cgroups(const char *dest) {
730 const char *p;
731 int r;
732
733 assert(dest);
734
88e10572
MT
735 p = prefix_roota(dest, "/sys/fs/cgroup");
736
737 (void) mkdir_p(p, 0755);
e83bebef
LP
738
739 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
740 if (r < 0)
741 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
742 if (r > 0) {
88e10572 743 p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
e83bebef
LP
744 if (access(p, F_OK) >= 0)
745 return 0;
746 if (errno != ENOENT)
747 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
748
749 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
750 return -EINVAL;
751 }
752
753 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
754 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
755
756 return 0;
757}
758
759int mount_cgroups(
760 const char *dest,
761 bool unified_requested,
762 bool userns, uid_t uid_shift, uid_t uid_range,
763 const char *selinux_apifs_context) {
764
765 if (unified_requested)
766 return mount_unified_cgroups(dest);
767 else
768 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
769}
770
771int mount_systemd_cgroup_writable(
772 const char *dest,
773 bool unified_requested) {
774
775 _cleanup_free_ char *own_cgroup_path = NULL;
776 const char *systemd_root, *systemd_own;
777 int r;
778
779 assert(dest);
780
781 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
782 if (r < 0)
783 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
784
785 /* If we are living in the top-level, then there's nothing to do... */
786 if (path_equal(own_cgroup_path, "/"))
787 return 0;
788
789 if (unified_requested) {
790 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
791 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
792 } else {
793 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
794 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
795 }
796
797 /* Make our own cgroup a (writable) bind mount */
798 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
799 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
800
801 /* And then remount the systemd cgroup root read-only */
802 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
803 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
804
805 return 0;
806}
807
808int setup_volatile_state(
809 const char *directory,
810 VolatileMode mode,
811 bool userns, uid_t uid_shift, uid_t uid_range,
812 const char *selinux_apifs_context) {
813
814 _cleanup_free_ char *buf = NULL;
815 const char *p, *options;
816 int r;
817
818 assert(directory);
819
820 if (mode != VOLATILE_STATE)
821 return 0;
822
823 /* --volatile=state means we simply overmount /var
824 with a tmpfs, and the rest read-only. */
825
826 r = bind_remount_recursive(directory, true);
827 if (r < 0)
828 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
829
830 p = prefix_roota(directory, "/var");
831 r = mkdir(p, 0755);
832 if (r < 0 && errno != EEXIST)
833 return log_error_errno(errno, "Failed to create %s: %m", directory);
834
835 options = "mode=755";
836 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
837 if (r < 0)
838 return log_oom();
839 if (r > 0)
840 options = buf;
841
842 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
843 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
844
845 return 0;
846}
847
848int setup_volatile(
849 const char *directory,
850 VolatileMode mode,
851 bool userns, uid_t uid_shift, uid_t uid_range,
852 const char *selinux_apifs_context) {
853
854 bool tmpfs_mounted = false, bind_mounted = false;
855 char template[] = "/tmp/nspawn-volatile-XXXXXX";
856 _cleanup_free_ char *buf = NULL;
857 const char *f, *t, *options;
858 int r;
859
860 assert(directory);
861
862 if (mode != VOLATILE_YES)
863 return 0;
864
865 /* --volatile=yes means we mount a tmpfs to the root dir, and
866 the original /usr to use inside it, and that read-only. */
867
868 if (!mkdtemp(template))
869 return log_error_errno(errno, "Failed to create temporary directory: %m");
870
871 options = "mode=755";
872 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
873 if (r < 0)
874 return log_oom();
875 if (r > 0)
876 options = buf;
877
878 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
879 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
880 goto fail;
881 }
882
883 tmpfs_mounted = true;
884
885 f = prefix_roota(directory, "/usr");
886 t = prefix_roota(template, "/usr");
887
888 r = mkdir(t, 0755);
889 if (r < 0 && errno != EEXIST) {
890 r = log_error_errno(errno, "Failed to create %s: %m", t);
891 goto fail;
892 }
893
894 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
895 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
896 goto fail;
897 }
898
899 bind_mounted = true;
900
901 r = bind_remount_recursive(t, true);
902 if (r < 0) {
903 log_error_errno(r, "Failed to remount %s read-only: %m", t);
904 goto fail;
905 }
906
907 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
908 r = log_error_errno(errno, "Failed to move root mount: %m");
909 goto fail;
910 }
911
912 (void) rmdir(template);
913
914 return 0;
915
916fail:
917 if (bind_mounted)
918 (void) umount(t);
919
920 if (tmpfs_mounted)
921 (void) umount(template);
922 (void) rmdir(template);
923 return r;
924}
925
926VolatileMode volatile_mode_from_string(const char *s) {
927 int b;
928
929 if (isempty(s))
930 return _VOLATILE_MODE_INVALID;
931
932 b = parse_boolean(s);
933 if (b > 0)
934 return VOLATILE_YES;
935 if (b == 0)
936 return VOLATILE_NO;
937
938 if (streq(s, "state"))
939 return VOLATILE_STATE;
940
941 return _VOLATILE_MODE_INVALID;
942}