]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/nspawn/nspawn-mount.c
util-lib: split our string related calls from util.[ch] into its own file string...
[thirdparty/systemd.git] / src / nspawn / nspawn-mount.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2015 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <linux/magic.h>
24
25 #include "cgroup-util.h"
26 #include "escape.h"
27 #include "label.h"
28 #include "mkdir.h"
29 #include "path-util.h"
30 #include "rm-rf.h"
31 #include "set.h"
32 #include "string-util.h"
33 #include "strv.h"
34 #include "util.h"
35 #include "nspawn-mount.h"
36
37 CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
38 CustomMount *c, *ret;
39
40 assert(l);
41 assert(n);
42 assert(t >= 0);
43 assert(t < _CUSTOM_MOUNT_TYPE_MAX);
44
45 c = realloc(*l, (*n + 1) * sizeof(CustomMount));
46 if (!c)
47 return NULL;
48
49 *l = c;
50 ret = *l + *n;
51 (*n)++;
52
53 *ret = (CustomMount) { .type = t };
54
55 return ret;
56 }
57
58 void custom_mount_free_all(CustomMount *l, unsigned n) {
59 unsigned i;
60
61 for (i = 0; i < n; i++) {
62 CustomMount *m = l + i;
63
64 free(m->source);
65 free(m->destination);
66 free(m->options);
67
68 if (m->work_dir) {
69 (void) rm_rf(m->work_dir, REMOVE_ROOT|REMOVE_PHYSICAL);
70 free(m->work_dir);
71 }
72
73 strv_free(m->lower);
74 }
75
76 free(l);
77 }
78
79 int custom_mount_compare(const void *a, const void *b) {
80 const CustomMount *x = a, *y = b;
81 int r;
82
83 r = path_compare(x->destination, y->destination);
84 if (r != 0)
85 return r;
86
87 if (x->type < y->type)
88 return -1;
89 if (x->type > y->type)
90 return 1;
91
92 return 0;
93 }
94
95 int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
96 _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
97 const char *p = s;
98 CustomMount *m;
99 int r;
100
101 assert(l);
102 assert(n);
103
104 r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
105 if (r < 0)
106 return r;
107 if (r == 0)
108 return -EINVAL;
109
110 if (r == 1) {
111 destination = strdup(source);
112 if (!destination)
113 return -ENOMEM;
114 }
115
116 if (r == 2 && !isempty(p)) {
117 opts = strdup(p);
118 if (!opts)
119 return -ENOMEM;
120 }
121
122 if (!path_is_absolute(source))
123 return -EINVAL;
124
125 if (!path_is_absolute(destination))
126 return -EINVAL;
127
128 m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
129 if (!m)
130 return log_oom();
131
132 m->source = source;
133 m->destination = destination;
134 m->read_only = read_only;
135 m->options = opts;
136
137 source = destination = opts = NULL;
138 return 0;
139 }
140
141 int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
142 _cleanup_free_ char *path = NULL, *opts = NULL;
143 const char *p = s;
144 CustomMount *m;
145 int r;
146
147 assert(l);
148 assert(n);
149 assert(s);
150
151 r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
152 if (r < 0)
153 return r;
154 if (r == 0)
155 return -EINVAL;
156
157 if (isempty(p))
158 opts = strdup("mode=0755");
159 else
160 opts = strdup(p);
161 if (!opts)
162 return -ENOMEM;
163
164 if (!path_is_absolute(path))
165 return -EINVAL;
166
167 m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
168 if (!m)
169 return -ENOMEM;
170
171 m->destination = path;
172 m->options = opts;
173
174 path = opts = NULL;
175 return 0;
176 }
177
178 static int tmpfs_patch_options(
179 const char *options,
180 bool userns, uid_t uid_shift, uid_t uid_range,
181 const char *selinux_apifs_context,
182 char **ret) {
183
184 char *buf = NULL;
185
186 if (userns && uid_shift != 0) {
187 assert(uid_shift != UID_INVALID);
188
189 if (options)
190 (void) asprintf(&buf, "%s,uid=" UID_FMT ",gid=" UID_FMT, options, uid_shift, uid_shift);
191 else
192 (void) asprintf(&buf, "uid=" UID_FMT ",gid=" UID_FMT, uid_shift, uid_shift);
193 if (!buf)
194 return -ENOMEM;
195
196 options = buf;
197 }
198
199 #ifdef HAVE_SELINUX
200 if (selinux_apifs_context) {
201 char *t;
202
203 if (options)
204 t = strjoin(options, ",context=\"", selinux_apifs_context, "\"", NULL);
205 else
206 t = strjoin("context=\"", selinux_apifs_context, "\"", NULL);
207 if (!t) {
208 free(buf);
209 return -ENOMEM;
210 }
211
212 free(buf);
213 buf = t;
214 }
215 #endif
216
217 *ret = buf;
218 return !!buf;
219 }
220
221 int mount_sysfs(const char *dest) {
222 const char *full, *top, *x;
223 int r;
224
225 top = prefix_roota(dest, "/sys");
226 r = path_check_fstype(top, SYSFS_MAGIC);
227 if (r < 0)
228 return log_error_errno(r, "Failed to determine filesystem type of %s: %m", top);
229 /* /sys might already be mounted as sysfs by the outer child in the
230 * !netns case. In this case, it's all good. Don't touch it because we
231 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
232 */
233 if (r > 0)
234 return 0;
235
236 full = prefix_roota(top, "/full");
237
238 (void) mkdir(full, 0755);
239
240 if (mount("sysfs", full, "sysfs", MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) < 0)
241 return log_error_errno(errno, "Failed to mount sysfs to %s: %m", full);
242
243 FOREACH_STRING(x, "block", "bus", "class", "dev", "devices", "kernel") {
244 _cleanup_free_ char *from = NULL, *to = NULL;
245
246 from = prefix_root(full, x);
247 if (!from)
248 return log_oom();
249
250 to = prefix_root(top, x);
251 if (!to)
252 return log_oom();
253
254 (void) mkdir(to, 0755);
255
256 if (mount(from, to, NULL, MS_BIND, NULL) < 0)
257 return log_error_errno(errno, "Failed to mount /sys/%s into place: %m", x);
258
259 if (mount(NULL, to, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
260 return log_error_errno(errno, "Failed to mount /sys/%s read-only: %m", x);
261 }
262
263 if (umount(full) < 0)
264 return log_error_errno(errno, "Failed to unmount %s: %m", full);
265
266 if (rmdir(full) < 0)
267 return log_error_errno(errno, "Failed to remove %s: %m", full);
268
269 x = prefix_roota(top, "/fs/kdbus");
270 (void) mkdir(x, 0755);
271
272 if (mount(NULL, top, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, NULL) < 0)
273 return log_error_errno(errno, "Failed to make %s read-only: %m", top);
274
275 return 0;
276 }
277
278 int mount_all(const char *dest,
279 bool use_userns, bool in_userns,
280 bool use_netns,
281 uid_t uid_shift, uid_t uid_range,
282 const char *selinux_apifs_context) {
283
284 typedef struct MountPoint {
285 const char *what;
286 const char *where;
287 const char *type;
288 const char *options;
289 unsigned long flags;
290 bool fatal;
291 bool in_userns;
292 bool use_netns;
293 } MountPoint;
294
295 static const MountPoint mount_table[] = {
296 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true, true, false },
297 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true, true, false }, /* Bind mount first */
298 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true, true, false }, /* Then, make it r/o */
299 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, true },
300 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true, false, false },
301 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true, false, false },
302 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
303 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true, false, false },
304 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME, true, false, false },
305 #ifdef HAVE_SELINUX
306 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false, false, false }, /* Bind mount first */
307 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, false, false, false }, /* Then, make it r/o */
308 #endif
309 };
310
311 unsigned k;
312 int r;
313
314 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
315 _cleanup_free_ char *where = NULL, *options = NULL;
316 const char *o;
317
318 if (in_userns != mount_table[k].in_userns)
319 continue;
320
321 if (!use_netns && mount_table[k].use_netns)
322 continue;
323
324 where = prefix_root(dest, mount_table[k].where);
325 if (!where)
326 return log_oom();
327
328 r = path_is_mount_point(where, AT_SYMLINK_FOLLOW);
329 if (r < 0 && r != -ENOENT)
330 return log_error_errno(r, "Failed to detect whether %s is a mount point: %m", where);
331
332 /* Skip this entry if it is not a remount. */
333 if (mount_table[k].what && r > 0)
334 continue;
335
336 r = mkdir_p(where, 0755);
337 if (r < 0) {
338 if (mount_table[k].fatal)
339 return log_error_errno(r, "Failed to create directory %s: %m", where);
340
341 log_warning_errno(r, "Failed to create directory %s: %m", where);
342 continue;
343 }
344
345 o = mount_table[k].options;
346 if (streq_ptr(mount_table[k].type, "tmpfs")) {
347 r = tmpfs_patch_options(o, use_userns, uid_shift, uid_range, selinux_apifs_context, &options);
348 if (r < 0)
349 return log_oom();
350 if (r > 0)
351 o = options;
352 }
353
354 if (mount(mount_table[k].what,
355 where,
356 mount_table[k].type,
357 mount_table[k].flags,
358 o) < 0) {
359
360 if (mount_table[k].fatal)
361 return log_error_errno(errno, "mount(%s) failed: %m", where);
362
363 log_warning_errno(errno, "mount(%s) failed, ignoring: %m", where);
364 }
365 }
366
367 return 0;
368 }
369
370 static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
371 const char *p = options;
372 unsigned long flags = *mount_flags;
373 char *opts = NULL;
374
375 assert(options);
376
377 for (;;) {
378 _cleanup_free_ char *word = NULL;
379 int r = extract_first_word(&p, &word, ",", 0);
380 if (r < 0)
381 return log_error_errno(r, "Failed to extract mount option: %m");
382 if (r == 0)
383 break;
384
385 if (streq(word, "rbind"))
386 flags |= MS_REC;
387 else if (streq(word, "norbind"))
388 flags &= ~MS_REC;
389 else {
390 log_error("Invalid bind mount option: %s", word);
391 return -EINVAL;
392 }
393 }
394
395 *mount_flags = flags;
396 /* in the future mount_opts will hold string options for mount(2) */
397 *mount_opts = opts;
398
399 return 0;
400 }
401
402 static int mount_bind(const char *dest, CustomMount *m) {
403 struct stat source_st, dest_st;
404 const char *where;
405 unsigned long mount_flags = MS_BIND | MS_REC;
406 _cleanup_free_ char *mount_opts = NULL;
407 int r;
408
409 assert(m);
410
411 if (m->options) {
412 r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
413 if (r < 0)
414 return r;
415 }
416
417 if (stat(m->source, &source_st) < 0)
418 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
419
420 where = prefix_roota(dest, m->destination);
421
422 if (stat(where, &dest_st) >= 0) {
423 if (S_ISDIR(source_st.st_mode) && !S_ISDIR(dest_st.st_mode)) {
424 log_error("Cannot bind mount directory %s on file %s.", m->source, where);
425 return -EINVAL;
426 }
427
428 if (!S_ISDIR(source_st.st_mode) && S_ISDIR(dest_st.st_mode)) {
429 log_error("Cannot bind mount file %s on directory %s.", m->source, where);
430 return -EINVAL;
431 }
432
433 } else if (errno == ENOENT) {
434 r = mkdir_parents_label(where, 0755);
435 if (r < 0)
436 return log_error_errno(r, "Failed to make parents of %s: %m", where);
437 } else {
438 log_error_errno(errno, "Failed to stat %s: %m", where);
439 return -errno;
440 }
441
442 /* Create the mount point. Any non-directory file can be
443 * mounted on any non-directory file (regular, fifo, socket,
444 * char, block).
445 */
446 if (S_ISDIR(source_st.st_mode))
447 r = mkdir_label(where, 0755);
448 else
449 r = touch(where);
450 if (r < 0 && r != -EEXIST)
451 return log_error_errno(r, "Failed to create mount point %s: %m", where);
452
453 if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
454 return log_error_errno(errno, "mount(%s) failed: %m", where);
455
456 if (m->read_only) {
457 r = bind_remount_recursive(where, true);
458 if (r < 0)
459 return log_error_errno(r, "Read-only bind mount failed: %m");
460 }
461
462 return 0;
463 }
464
465 static int mount_tmpfs(
466 const char *dest,
467 CustomMount *m,
468 bool userns, uid_t uid_shift, uid_t uid_range,
469 const char *selinux_apifs_context) {
470
471 const char *where, *options;
472 _cleanup_free_ char *buf = NULL;
473 int r;
474
475 assert(dest);
476 assert(m);
477
478 where = prefix_roota(dest, m->destination);
479
480 r = mkdir_p_label(where, 0755);
481 if (r < 0 && r != -EEXIST)
482 return log_error_errno(r, "Creating mount point for tmpfs %s failed: %m", where);
483
484 r = tmpfs_patch_options(m->options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
485 if (r < 0)
486 return log_oom();
487 options = r > 0 ? buf : m->options;
488
489 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, options) < 0)
490 return log_error_errno(errno, "tmpfs mount to %s failed: %m", where);
491
492 return 0;
493 }
494
495 static char *joined_and_escaped_lower_dirs(char * const *lower) {
496 _cleanup_strv_free_ char **sv = NULL;
497
498 sv = strv_copy(lower);
499 if (!sv)
500 return NULL;
501
502 strv_reverse(sv);
503
504 if (!strv_shell_escape(sv, ",:"))
505 return NULL;
506
507 return strv_join(sv, ":");
508 }
509
510 static int mount_overlay(const char *dest, CustomMount *m) {
511 _cleanup_free_ char *lower = NULL;
512 const char *where, *options;
513 int r;
514
515 assert(dest);
516 assert(m);
517
518 where = prefix_roota(dest, m->destination);
519
520 r = mkdir_label(where, 0755);
521 if (r < 0 && r != -EEXIST)
522 return log_error_errno(r, "Creating mount point for overlay %s failed: %m", where);
523
524 (void) mkdir_p_label(m->source, 0755);
525
526 lower = joined_and_escaped_lower_dirs(m->lower);
527 if (!lower)
528 return log_oom();
529
530 if (m->read_only) {
531 _cleanup_free_ char *escaped_source = NULL;
532
533 escaped_source = shell_escape(m->source, ",:");
534 if (!escaped_source)
535 return log_oom();
536
537 options = strjoina("lowerdir=", escaped_source, ":", lower);
538 } else {
539 _cleanup_free_ char *escaped_source = NULL, *escaped_work_dir = NULL;
540
541 assert(m->work_dir);
542 (void) mkdir_label(m->work_dir, 0700);
543
544 escaped_source = shell_escape(m->source, ",:");
545 if (!escaped_source)
546 return log_oom();
547 escaped_work_dir = shell_escape(m->work_dir, ",:");
548 if (!escaped_work_dir)
549 return log_oom();
550
551 options = strjoina("lowerdir=", lower, ",upperdir=", escaped_source, ",workdir=", escaped_work_dir);
552 }
553
554 if (mount("overlay", where, "overlay", m->read_only ? MS_RDONLY : 0, options) < 0)
555 return log_error_errno(errno, "overlay mount to %s failed: %m", where);
556
557 return 0;
558 }
559
560 int mount_custom(
561 const char *dest,
562 CustomMount *mounts, unsigned n,
563 bool userns, uid_t uid_shift, uid_t uid_range,
564 const char *selinux_apifs_context) {
565
566 unsigned i;
567 int r;
568
569 assert(dest);
570
571 for (i = 0; i < n; i++) {
572 CustomMount *m = mounts + i;
573
574 switch (m->type) {
575
576 case CUSTOM_MOUNT_BIND:
577 r = mount_bind(dest, m);
578 break;
579
580 case CUSTOM_MOUNT_TMPFS:
581 r = mount_tmpfs(dest, m, userns, uid_shift, uid_range, selinux_apifs_context);
582 break;
583
584 case CUSTOM_MOUNT_OVERLAY:
585 r = mount_overlay(dest, m);
586 break;
587
588 default:
589 assert_not_reached("Unknown custom mount type");
590 }
591
592 if (r < 0)
593 return r;
594 }
595
596 return 0;
597 }
598
599 static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
600 char *to;
601 int r;
602
603 to = strjoina(strempty(dest), "/sys/fs/cgroup/", hierarchy);
604
605 r = path_is_mount_point(to, 0);
606 if (r < 0 && r != -ENOENT)
607 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", to);
608 if (r > 0)
609 return 0;
610
611 mkdir_p(to, 0755);
612
613 /* The superblock mount options of the mount point need to be
614 * identical to the hosts', and hence writable... */
615 if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0)
616 return log_error_errno(errno, "Failed to mount to %s: %m", to);
617
618 /* ... hence let's only make the bind mount read-only, not the
619 * superblock. */
620 if (read_only) {
621 if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
622 return log_error_errno(errno, "Failed to remount %s read-only: %m", to);
623 }
624 return 1;
625 }
626
627 static int mount_legacy_cgroups(
628 const char *dest,
629 bool userns, uid_t uid_shift, uid_t uid_range,
630 const char *selinux_apifs_context) {
631
632 _cleanup_set_free_free_ Set *controllers = NULL;
633 const char *cgroup_root;
634 int r;
635
636 cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
637
638 (void) mkdir_p(cgroup_root, 0755);
639
640 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
641 r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
642 if (r < 0)
643 return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
644 if (r == 0) {
645 _cleanup_free_ char *options = NULL;
646
647 r = tmpfs_patch_options("mode=755", userns, uid_shift, uid_range, selinux_apifs_context, &options);
648 if (r < 0)
649 return log_oom();
650
651 if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
652 return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
653 }
654
655 if (cg_unified() > 0)
656 goto skip_controllers;
657
658 controllers = set_new(&string_hash_ops);
659 if (!controllers)
660 return log_oom();
661
662 r = cg_kernel_controllers(controllers);
663 if (r < 0)
664 return log_error_errno(r, "Failed to determine cgroup controllers: %m");
665
666 for (;;) {
667 _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL;
668
669 controller = set_steal_first(controllers);
670 if (!controller)
671 break;
672
673 origin = prefix_root("/sys/fs/cgroup/", controller);
674 if (!origin)
675 return log_oom();
676
677 r = readlink_malloc(origin, &combined);
678 if (r == -EINVAL) {
679 /* Not a symbolic link, but directly a single cgroup hierarchy */
680
681 r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
682 if (r < 0)
683 return r;
684
685 } else if (r < 0)
686 return log_error_errno(r, "Failed to read link %s: %m", origin);
687 else {
688 _cleanup_free_ char *target = NULL;
689
690 target = prefix_root(dest, origin);
691 if (!target)
692 return log_oom();
693
694 /* A symbolic link, a combination of controllers in one hierarchy */
695
696 if (!filename_is_valid(combined)) {
697 log_warning("Ignoring invalid combined hierarchy %s.", combined);
698 continue;
699 }
700
701 r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
702 if (r < 0)
703 return r;
704
705 r = symlink_idempotent(combined, target);
706 if (r == -EINVAL) {
707 log_error("Invalid existing symlink for combined hierarchy");
708 return r;
709 }
710 if (r < 0)
711 return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m");
712 }
713 }
714
715 skip_controllers:
716 r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
717 if (r < 0)
718 return r;
719
720 if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
721 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
722
723 return 0;
724 }
725
726 static int mount_unified_cgroups(const char *dest) {
727 const char *p;
728 int r;
729
730 assert(dest);
731
732 p = prefix_roota(dest, "/sys/fs/cgroup");
733
734 (void) mkdir_p(p, 0755);
735
736 r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
737 if (r < 0)
738 return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
739 if (r > 0) {
740 p = prefix_roota(dest, "/sys/fs/cgroup/cgroup.procs");
741 if (access(p, F_OK) >= 0)
742 return 0;
743 if (errno != ENOENT)
744 return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
745
746 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
747 return -EINVAL;
748 }
749
750 if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
751 return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
752
753 return 0;
754 }
755
756 int mount_cgroups(
757 const char *dest,
758 bool unified_requested,
759 bool userns, uid_t uid_shift, uid_t uid_range,
760 const char *selinux_apifs_context) {
761
762 if (unified_requested)
763 return mount_unified_cgroups(dest);
764 else
765 return mount_legacy_cgroups(dest, userns, uid_shift, uid_range, selinux_apifs_context);
766 }
767
768 int mount_systemd_cgroup_writable(
769 const char *dest,
770 bool unified_requested) {
771
772 _cleanup_free_ char *own_cgroup_path = NULL;
773 const char *systemd_root, *systemd_own;
774 int r;
775
776 assert(dest);
777
778 r = cg_pid_get_path(NULL, 0, &own_cgroup_path);
779 if (r < 0)
780 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
781
782 /* If we are living in the top-level, then there's nothing to do... */
783 if (path_equal(own_cgroup_path, "/"))
784 return 0;
785
786 if (unified_requested) {
787 systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
788 systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
789 } else {
790 systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
791 systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
792 }
793
794 /* Make our own cgroup a (writable) bind mount */
795 if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0)
796 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
797
798 /* And then remount the systemd cgroup root read-only */
799 if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
800 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
801
802 return 0;
803 }
804
805 int setup_volatile_state(
806 const char *directory,
807 VolatileMode mode,
808 bool userns, uid_t uid_shift, uid_t uid_range,
809 const char *selinux_apifs_context) {
810
811 _cleanup_free_ char *buf = NULL;
812 const char *p, *options;
813 int r;
814
815 assert(directory);
816
817 if (mode != VOLATILE_STATE)
818 return 0;
819
820 /* --volatile=state means we simply overmount /var
821 with a tmpfs, and the rest read-only. */
822
823 r = bind_remount_recursive(directory, true);
824 if (r < 0)
825 return log_error_errno(r, "Failed to remount %s read-only: %m", directory);
826
827 p = prefix_roota(directory, "/var");
828 r = mkdir(p, 0755);
829 if (r < 0 && errno != EEXIST)
830 return log_error_errno(errno, "Failed to create %s: %m", directory);
831
832 options = "mode=755";
833 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
834 if (r < 0)
835 return log_oom();
836 if (r > 0)
837 options = buf;
838
839 if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, options) < 0)
840 return log_error_errno(errno, "Failed to mount tmpfs to /var: %m");
841
842 return 0;
843 }
844
845 int setup_volatile(
846 const char *directory,
847 VolatileMode mode,
848 bool userns, uid_t uid_shift, uid_t uid_range,
849 const char *selinux_apifs_context) {
850
851 bool tmpfs_mounted = false, bind_mounted = false;
852 char template[] = "/tmp/nspawn-volatile-XXXXXX";
853 _cleanup_free_ char *buf = NULL;
854 const char *f, *t, *options;
855 int r;
856
857 assert(directory);
858
859 if (mode != VOLATILE_YES)
860 return 0;
861
862 /* --volatile=yes means we mount a tmpfs to the root dir, and
863 the original /usr to use inside it, and that read-only. */
864
865 if (!mkdtemp(template))
866 return log_error_errno(errno, "Failed to create temporary directory: %m");
867
868 options = "mode=755";
869 r = tmpfs_patch_options(options, userns, uid_shift, uid_range, selinux_apifs_context, &buf);
870 if (r < 0)
871 return log_oom();
872 if (r > 0)
873 options = buf;
874
875 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME, options) < 0) {
876 r = log_error_errno(errno, "Failed to mount tmpfs for root directory: %m");
877 goto fail;
878 }
879
880 tmpfs_mounted = true;
881
882 f = prefix_roota(directory, "/usr");
883 t = prefix_roota(template, "/usr");
884
885 r = mkdir(t, 0755);
886 if (r < 0 && errno != EEXIST) {
887 r = log_error_errno(errno, "Failed to create %s: %m", t);
888 goto fail;
889 }
890
891 if (mount(f, t, NULL, MS_BIND|MS_REC, NULL) < 0) {
892 r = log_error_errno(errno, "Failed to create /usr bind mount: %m");
893 goto fail;
894 }
895
896 bind_mounted = true;
897
898 r = bind_remount_recursive(t, true);
899 if (r < 0) {
900 log_error_errno(r, "Failed to remount %s read-only: %m", t);
901 goto fail;
902 }
903
904 if (mount(template, directory, NULL, MS_MOVE, NULL) < 0) {
905 r = log_error_errno(errno, "Failed to move root mount: %m");
906 goto fail;
907 }
908
909 (void) rmdir(template);
910
911 return 0;
912
913 fail:
914 if (bind_mounted)
915 (void) umount(t);
916
917 if (tmpfs_mounted)
918 (void) umount(template);
919 (void) rmdir(template);
920 return r;
921 }
922
923 VolatileMode volatile_mode_from_string(const char *s) {
924 int b;
925
926 if (isempty(s))
927 return _VOLATILE_MODE_INVALID;
928
929 b = parse_boolean(s);
930 if (b > 0)
931 return VOLATILE_YES;
932 if (b == 0)
933 return VOLATILE_NO;
934
935 if (streq(s, "state"))
936 return VOLATILE_STATE;
937
938 return _VOLATILE_MODE_INVALID;
939 }