]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/nspawn/nspawn-mount.c
util-lib: split string parsing related calls from util.[ch] into parse-util.[ch]
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
CommitLineData
e83bebef
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
4f5dd394 22#include <sys/mount.h>
07630cea 23#include <linux/magic.h>
e83bebef 24
4f5dd394
LP
25#include "cgroup-util.h"
26#include "escape.h"
e83bebef 27#include "label.h"
4f5dd394 28#include "mkdir.h"
6bedfcbb
LP
29#include "nspawn-mount.h"
30#include "parse-util.h"
4f5dd394
LP
31#include "path-util.h"
32#include "rm-rf.h"
e83bebef 33#include "set.h"
07630cea 34#include "string-util.h"
4f5dd394
LP
35#include "strv.h"
36#include "util.h"
e83bebef
LP
37
38CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
39 CustomMount *c, *ret;
40
41 assert(l);
42 assert(n);
43 assert(t >= 0);
44 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
45
46 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
47 if (!c)
48 return NULL;
49
50 *l = c;
51 ret = *l + *n;
52 (*n)++;
53
54 *ret = (CustomMount) { .type = t };
55
56 return ret;
57}
58
59void custom_mount_free_all(CustomMount *l, unsigned n) {
60 unsigned i;
61
62 for (i = 0; i < n; i++) {
63 CustomMount *m = l + i;
64
65 free(m->source);
66 free(m->destination);
67 free(m->options);
68
69 if (m->work_dir) {
70 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
71 free(m->work_dir);
72 }
73
74 strv_free(m->lower);
75 }
76
77 free(l);
78}
79
80int custom_mount_compare(const void *a, const void *b) {
81 const CustomMount *x = a, *y = b;
82 int r;
83
84 r = path_compare(x->destination, y->destination);
85 if (r != 0)
86 return r;
87
88 if (x->type < y->type)
89 return -1;
90 if (x->type > y->type)
91 return 1;
92
93 return 0;
94}
95
96int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
97 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
98 const char *p = s;
99 CustomMount *m;
100 int r;
101
102 assert(l);
103 assert(n);
104
105 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
106 if (r < 0)
107 return r;
108 if (r == 0)
109 return -EINVAL;
110
111 if (r == 1) {
112 destination = strdup(source);
113 if (!destination)
114 return -ENOMEM;
115 }
116
117 if (r == 2 && !isempty(p)) {
118 opts = strdup(p);
119 if (!opts)
120 return -ENOMEM;
121 }
122
123 if (!path_is_absolute(source))
124 return -EINVAL;
125
126 if (!path_is_absolute(destination))
127 return -EINVAL;
128
129 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
130 if (!m)
131 return log_oom();
132
133 m->source = source;
134 m->destination = destination;
135 m->read_only = read_only;
136 m->options = opts;
137
138 source = destination = opts = NULL;
139 return 0;
140}
141
142int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
143 _cleanup_free_ char *path = NULL, *opts = NULL;
144 const char *p = s;
145 CustomMount *m;
146 int r;
147
148 assert(l);
149 assert(n);
150 assert(s);
151
152 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
153 if (r < 0)
154 return r;
155 if (r == 0)
156 return -EINVAL;
157
158 if (isempty(p))
159 opts = strdup("mode=0755");
160 else
161 opts = strdup(p);
162 if (!opts)
163 return -ENOMEM;
164
165 if (!path_is_absolute(path))
166 return -EINVAL;
167
168 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
169 if (!m)
170 return -ENOMEM;
171
172 m->destination = path;
173 m->options = opts;
174
175 path = opts = NULL;
176 return 0;
177}
178
179static int tmpfs_patch_options(
180 const char *options,
181 bool userns, uid_t uid_shift, uid_t uid_range,
182 const char *selinux_apifs_context,
183 char **ret) {
184
185 char *buf = NULL;
186
187 if (userns && uid_shift != 0) {
188 assert(uid_shift != UID_INVALID);
189
190 if (options)
191 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
192 else
193 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
194 if (!buf)
195 return -ENOMEM;
196
197 options = buf;
198 }
199
200#ifdef HAVE_SELINUX
201 if (selinux_apifs_context) {
202 char *t;
203
204 if (options)
205 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
206 else
207 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
208 if (!t) {
209 free(buf);
210 return -ENOMEM;
211 }
212
213 free(buf);
214 buf = t;
215 }
216#endif
217
218 *ret = buf;
219 return !!buf;
220}
221
d8fc6a00
LP
222int mount_sysfs(const char *dest) {
223 const char *full, *top, *x;
d1678248 224 int r;
d8fc6a00
LP
225
226 top = prefix_roota(dest, "/sys");
d1678248
ILG
227 r = path_check_fstype(top, SYSFS_MAGIC);
228 if (r < 0)
229 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
230 /* /sys might already be mounted as sysfs by the outer child in the
231 * !netns case. In this case, it's all good. Don't touch it because we
232 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
233 */
234 if (r > 0)
235 return 0;
236
d8fc6a00
LP
237 full = prefix_roota(top, "/full");
238
239 (void) mkdir(full, 0755);
240
241 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
242 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
243
244 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
245 _cleanup_free_ char *from = NULL, *to = NULL;
246
247 from = prefix_root(full, x);
248 if (!from)
249 return log_oom();
250
251 to = prefix_root(top, x);
252 if (!to)
253 return log_oom();
254
255 (void) mkdir(to, 0755);
256
257 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
258 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
259
260 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
261 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
262 }
263
264 if (umount(full) < 0)
265 return log_error_errno(errno, "Failed to unmount %s: %m", full);
266
267 if (rmdir(full) < 0)
268 return log_error_errno(errno, "Failed to remove %s: %m", full);
269
270 x = prefix_roota(top, "/fs/kdbus");
271 (void) mkdir(x, 0755);
272
273 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
274 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
275
276 return 0;
277}
278
e83bebef 279int mount_all(const char *dest,
403af78c 280 bool use_userns, bool in_userns,
d1678248 281 bool use_netns,
403af78c 282 uid_t uid_shift, uid_t uid_range,
e83bebef
LP
283 const char *selinux_apifs_context) {
284
285 typedef struct MountPoint {
286 const char *what;
287 const char *where;
288 const char *type;
289 const char *options;
290 unsigned long flags;
291 bool fatal;
d1678248
ILG
292 bool in_userns;
293 bool use_netns;
e83bebef
LP
294 } MountPoint;
295
296 static const MountPoint mount_table[] = {
d1678248
ILG
297 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
298 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
299 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
300 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
301 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
302 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
303 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
304 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
305 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
e83bebef 306#ifdef HAVE_SELINUX
d1678248
ILG
307 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
308 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
e83bebef
LP
309#endif
310 };
311
312 unsigned k;
313 int r;
314
315 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
316 _cleanup_free_ char *where = NULL, *options = NULL;
317 const char *o;
318
d1678248
ILG
319 if (in_userns != mount_table[k].in_userns)
320 continue;
321
322 if (!use_netns && mount_table[k].use_netns)
e83bebef
LP
323 continue;
324
325 where = prefix_root(dest, mount_table[k].where);
326 if (!where)
327 return log_oom();
328
329 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
330 if (r < 0 && r != -ENOENT)
331 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
332
333 /* Skip this entry if it is not a remount. */
334 if (mount_table[k].what && r > 0)
335 continue;
336
337 r = mkdir_p(where, 0755);
338 if (r < 0) {
339 if (mount_table[k].fatal)
340 return log_error_errno(r, "Failed to create directory %s: %m", where);
341
342 log_warning_errno(r, "Failed to create directory %s: %m", where);
343 continue;
344 }
345
346 o = mount_table[k].options;
347 if (streq_ptr(mount_table[k].type, "tmpfs")) {
403af78c 348 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
e83bebef
LP
349 if (r < 0)
350 return log_oom();
351 if (r > 0)
352 o = options;
353 }
354
355 if (mount(mount_table[k].what,
356 where,
357 mount_table[k].type,
358 mount_table[k].flags,
359 o) < 0) {
360
361 if (mount_table[k].fatal)
362 return log_error_errno(errno, "mount(%s) failed: %m", where);
363
364 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
365 }
366 }
367
368 return 0;
369}
370
371static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
372 const char *p = options;
373 unsigned long flags = *mount_flags;
374 char *opts = NULL;
375
376 assert(options);
377
378 for (;;) {
379 _cleanup_free_ char *word = NULL;
380 int r = extract_first_word(&p, &word, ",", 0);
381 if (r < 0)
382 return log_error_errno(r, "Failed to extract mount option: %m");
383 if (r == 0)
384 break;
385
386 if (streq(word, "rbind"))
387 flags |= MS_REC;
388 else if (streq(word, "norbind"))
389 flags &= ~MS_REC;
390 else {
391 log_error("Invalid bind mount option: %s", word);
392 return -EINVAL;
393 }
394 }
395
396 *mount_flags = flags;
397 /* in the future mount_opts will hold string options for mount(2) */
398 *mount_opts = opts;
399
400 return 0;
401}
402
403static int mount_bind(const char *dest, CustomMount *m) {
404 struct stat source_st, dest_st;
405 const char *where;
406 unsigned long mount_flags = MS_BIND | MS_REC;
407 _cleanup_free_ char *mount_opts = NULL;
408 int r;
409
410 assert(m);
411
412 if (m->options) {
413 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
414 if (r < 0)
415 return r;
416 }
417
418 if (stat(m->source, &source_st) < 0)
419 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
420
421 where = prefix_roota(dest, m->destination);
422
423 if (stat(where, &dest_st) >= 0) {
424 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
425 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
426 return -EINVAL;
427 }
428
429 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
430 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
431 return -EINVAL;
432 }
433
434 } else if (errno == ENOENT) {
435 r = mkdir_parents_label(where, 0755);
436 if (r < 0)
437 return log_error_errno(r, "Failed to make parents of %s: %m", where);
438 } else {
439 log_error_errno(errno, "Failed to stat %s: %m", where);
440 return -errno;
441 }
442
443 /* Create the mount point. Any non-directory file can be
444 * mounted on any non-directory file (regular, fifo, socket,
445 * char, block).
446 */
447 if (S_ISDIR(source_st.st_mode))
448 r = mkdir_label(where, 0755);
449 else
450 r = touch(where);
451 if (r < 0 && r != -EEXIST)
452 return log_error_errno(r, "Failed to create mount point %s: %m", where);
453
454 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
455 return log_error_errno(errno, "mount(%s) failed: %m", where);
456
457 if (m->read_only) {
458 r = bind_remount_recursive(where, true);
459 if (r < 0)
460 return log_error_errno(r, "Read-only bind mount failed: %m");
461 }
462
463 return 0;
464}
465
466static int mount_tmpfs(
467 const char *dest,
468 CustomMount *m,
469 bool userns, uid_t uid_shift, uid_t uid_range,
470 const char *selinux_apifs_context) {
471
472 const char *where, *options;
473 _cleanup_free_ char *buf = NULL;
474 int r;
475
476 assert(dest);
477 assert(m);
478
479 where = prefix_roota(dest, m->destination);
480
481 r = mkdir_p_label(where, 0755);
482 if (r < 0 && r != -EEXIST)
483 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
484
485 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
486 if (r < 0)
487 return log_oom();
488 options = r > 0 ? buf : m->options;
489
490 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
491 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
492
493 return 0;
494}
495
496static char *joined_and_escaped_lower_dirs(char * const *lower) {
497 _cleanup_strv_free_ char **sv = NULL;
498
499 sv = strv_copy(lower);
500 if (!sv)
501 return NULL;
502
503 strv_reverse(sv);
504
505 if (!strv_shell_escape(sv, ",:"))
506 return NULL;
507
508 return strv_join(sv, ":");
509}
510
511static int mount_overlay(const char *dest, CustomMount *m) {
512 _cleanup_free_ char *lower = NULL;
513 const char *where, *options;
514 int r;
515
516 assert(dest);
517 assert(m);
518
519 where = prefix_roota(dest, m->destination);
520
521 r = mkdir_label(where, 0755);
522 if (r < 0 && r != -EEXIST)
523 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
524
525 (void) mkdir_p_label(m->source, 0755);
526
527 lower = joined_and_escaped_lower_dirs(m->lower);
528 if (!lower)
529 return log_oom();
530
531 if (m->read_only) {
532 _cleanup_free_ char *escaped_source = NULL;
533
534 escaped_source = shell_escape(m->source, ",:");
535 if (!escaped_source)
536 return log_oom();
537
538 options = strjoina("lowerdir=", escaped_source, ":", lower);
539 } else {
540 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
541
542 assert(m->work_dir);
543 (void) mkdir_label(m->work_dir, 0700);
544
545 escaped_source = shell_escape(m->source, ",:");
546 if (!escaped_source)
547 return log_oom();
548 escaped_work_dir = shell_escape(m->work_dir, ",:");
549 if (!escaped_work_dir)
550 return log_oom();
551
552 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
553 }
554
555 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
556 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
557
558 return 0;
559}
560
561int mount_custom(
562 const char *dest,
563 CustomMount *mounts, unsigned n,
564 bool userns, uid_t uid_shift, uid_t uid_range,
565 const char *selinux_apifs_context) {
566
567 unsigned i;
568 int r;
569
570 assert(dest);
571
572 for (i = 0; i < n; i++) {
573 CustomMount *m = mounts + i;
574
575 switch (m->type) {
576
577 case CUSTOM_MOUNT_BIND:
578 r = mount_bind(dest, m);
579 break;
580
581 case CUSTOM_MOUNT_TMPFS:
582 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
583 break;
584
585 case CUSTOM_MOUNT_OVERLAY:
586 r = mount_overlay(dest, m);
587 break;
588
589 default:
590 assert_not_reached("Unknown custom mount type");
591 }
592
593 if (r < 0)
594 return r;
595 }
596
597 return 0;
598}
599
600static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
601 char *to;
602 int r;
603
ee30f6ac 604 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
e83bebef
LP
605
606 r = path_is_mount_point(to, 0);
607 if (r < 0 && r != -ENOENT)
608 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
609 if (r > 0)
610 return 0;
611
612 mkdir_p(to, 0755);
613
614 /* The superblock mount options of the mount point need to be
615 * identical to the hosts', and hence writable... */
616 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
617 return log_error_errno(errno, "Failed to mount to %s: %m", to);
618
619 /* ... hence let's only make the bind mount read-only, not the
620 * superblock. */
621 if (read_only) {
622 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
623 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
624 }
625 return 1;
626}
627
628static int mount_legacy_cgroups(
629 const char *dest,
630 bool userns, uid_t uid_shift, uid_t uid_range,
631 const char *selinux_apifs_context) {
632
633 _cleanup_set_free_free_ Set *controllers = NULL;
634 const char *cgroup_root;
635 int r;
636
637 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
638
d8fc6a00
LP
639 (void) mkdir_p(cgroup_root, 0755);
640
e83bebef
LP
641 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
642 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
643 if (r < 0)
644 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
645 if (r == 0) {
646 _cleanup_free_ char *options = NULL;
647
648 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
649 if (r < 0)
650 return log_oom();
651
652 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
653 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
654 }
655
656 if (cg_unified() > 0)
657 goto skip_controllers;
658
659 controllers = set_new(&string_hash_ops);
660 if (!controllers)
661 return log_oom();
662
663 r = cg_kernel_controllers(controllers);
664 if (r < 0)
665 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
666
667 for (;;) {
668 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
669
670 controller = set_steal_first(controllers);
671 if (!controller)
672 break;
673
674 origin = prefix_root("/sys/fs/cgroup/", controller);
675 if (!origin)
676 return log_oom();
677
678 r = readlink_malloc(origin, &combined);
679 if (r == -EINVAL) {
680 /* Not a symbolic link, but directly a single cgroup hierarchy */
681
682 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
683 if (r < 0)
684 return r;
685
686 } else if (r < 0)
687 return log_error_errno(r, "Failed to read link %s: %m", origin);
688 else {
689 _cleanup_free_ char *target = NULL;
690
691 target = prefix_root(dest, origin);
692 if (!target)
693 return log_oom();
694
695 /* A symbolic link, a combination of controllers in one hierarchy */
696
697 if (!filename_is_valid(combined)) {
698 log_warning("Ignoring invalid combined hierarchy %s.", combined);
699 continue;
700 }
701
702 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
703 if (r < 0)
704 return r;
705
706 r = symlink_idempotent(combined, target);
707 if (r == -EINVAL) {
708 log_error("Invalid existing symlink for combined hierarchy");
709 return r;
710 }
711 if (r < 0)
712 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
713 }
714 }
715
716skip_controllers:
717 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
718 if (r < 0)
719 return r;
720
721 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
722 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
723
724 return 0;
725}
726
727static int mount_unified_cgroups(const char *dest) {
728 const char *p;
729 int r;
730
731 assert(dest);
732
88e10572
MT
733 p = prefix_roota(dest, "/sys/fs/cgroup");
734
735 (void) mkdir_p(p, 0755);
e83bebef
LP
736
737 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
738 if (r < 0)
739 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
740 if (r > 0) {
88e10572 741 p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
e83bebef
LP
742 if (access(p, F_OK) >= 0)
743 return 0;
744 if (errno != ENOENT)
745 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
746
747 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
748 return -EINVAL;
749 }
750
751 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
752 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
753
754 return 0;
755}
756
757int mount_cgroups(
758 const char *dest,
759 bool unified_requested,
760 bool userns, uid_t uid_shift, uid_t uid_range,
761 const char *selinux_apifs_context) {
762
763 if (unified_requested)
764 return mount_unified_cgroups(dest);
765 else
766 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
767}
768
769int mount_systemd_cgroup_writable(
770 const char *dest,
771 bool unified_requested) {
772
773 _cleanup_free_ char *own_cgroup_path = NULL;
774 const char *systemd_root, *systemd_own;
775 int r;
776
777 assert(dest);
778
779 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
780 if (r < 0)
781 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
782
783 /* If we are living in the top-level, then there's nothing to do... */
784 if (path_equal(own_cgroup_path, "/"))
785 return 0;
786
787 if (unified_requested) {
788 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
789 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
790 } else {
791 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
792 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
793 }
794
795 /* Make our own cgroup a (writable) bind mount */
796 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
797 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
798
799 /* And then remount the systemd cgroup root read-only */
800 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
801 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
802
803 return 0;
804}
805
806int setup_volatile_state(
807 const char *directory,
808 VolatileMode mode,
809 bool userns, uid_t uid_shift, uid_t uid_range,
810 const char *selinux_apifs_context) {
811
812 _cleanup_free_ char *buf = NULL;
813 const char *p, *options;
814 int r;
815
816 assert(directory);
817
818 if (mode != VOLATILE_STATE)
819 return 0;
820
821 /* --volatile=state means we simply overmount /var
822 with a tmpfs, and the rest read-only. */
823
824 r = bind_remount_recursive(directory, true);
825 if (r < 0)
826 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
827
828 p = prefix_roota(directory, "/var");
829 r = mkdir(p, 0755);
830 if (r < 0 && errno != EEXIST)
831 return log_error_errno(errno, "Failed to create %s: %m", directory);
832
833 options = "mode=755";
834 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
835 if (r < 0)
836 return log_oom();
837 if (r > 0)
838 options = buf;
839
840 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
841 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
842
843 return 0;
844}
845
846int setup_volatile(
847 const char *directory,
848 VolatileMode mode,
849 bool userns, uid_t uid_shift, uid_t uid_range,
850 const char *selinux_apifs_context) {
851
852 bool tmpfs_mounted = false, bind_mounted = false;
853 char template[] = "/tmp/nspawn-volatile-XXXXXX";
854 _cleanup_free_ char *buf = NULL;
855 const char *f, *t, *options;
856 int r;
857
858 assert(directory);
859
860 if (mode != VOLATILE_YES)
861 return 0;
862
863 /* --volatile=yes means we mount a tmpfs to the root dir, and
864 the original /usr to use inside it, and that read-only. */
865
866 if (!mkdtemp(template))
867 return log_error_errno(errno, "Failed to create temporary directory: %m");
868
869 options = "mode=755";
870 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
871 if (r < 0)
872 return log_oom();
873 if (r > 0)
874 options = buf;
875
876 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
877 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
878 goto fail;
879 }
880
881 tmpfs_mounted = true;
882
883 f = prefix_roota(directory, "/usr");
884 t = prefix_roota(template, "/usr");
885
886 r = mkdir(t, 0755);
887 if (r < 0 && errno != EEXIST) {
888 r = log_error_errno(errno, "Failed to create %s: %m", t);
889 goto fail;
890 }
891
892 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
893 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
894 goto fail;
895 }
896
897 bind_mounted = true;
898
899 r = bind_remount_recursive(t, true);
900 if (r < 0) {
901 log_error_errno(r, "Failed to remount %s read-only: %m", t);
902 goto fail;
903 }
904
905 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
906 r = log_error_errno(errno, "Failed to move root mount: %m");
907 goto fail;
908 }
909
910 (void) rmdir(template);
911
912 return 0;
913
914fail:
915 if (bind_mounted)
916 (void) umount(t);
917
918 if (tmpfs_mounted)
919 (void) umount(template);
920 (void) rmdir(template);
921 return r;
922}
923
924VolatileMode volatile_mode_from_string(const char *s) {
925 int b;
926
927 if (isempty(s))
928 return _VOLATILE_MODE_INVALID;
929
930 b = parse_boolean(s);
931 if (b > 0)
932 return VOLATILE_YES;
933 if (b == 0)
934 return VOLATILE_NO;
935
936 if (streq(s, "state"))
937 return VOLATILE_STATE;
938
939 return _VOLATILE_MODE_INVALID;
940}