2 This file is part of systemd.
4 Copyright 2015 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 #include <sys/mount.h>
21 #include <linux/magic.h>
23 #include "alloc-util.h"
24 #include "cgroup-util.h"
31 #include "mount-util.h"
32 #include "nspawn-mount.h"
33 #include "parse-util.h"
34 #include "path-util.h"
37 #include "stat-util.h"
38 #include "string-util.h"
40 #include "user-util.h"
43 CustomMount
* custom_mount_add(CustomMount
**l
, unsigned *n
, CustomMountType t
) {
49 assert(t
< _CUSTOM_MOUNT_TYPE_MAX
);
51 c
= realloc(*l
, (*n
+ 1) * sizeof(CustomMount
));
59 *ret
= (CustomMount
) { .type
= t
};
64 void custom_mount_free_all(CustomMount
*l
, unsigned n
) {
67 for (i
= 0; i
< n
; i
++) {
68 CustomMount
*m
= l
+ i
;
75 (void) rm_rf(m
->work_dir
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
85 int custom_mount_compare(const void *a
, const void *b
) {
86 const CustomMount
*x
= a
, *y
= b
;
89 r
= path_compare(x
->destination
, y
->destination
);
93 if (x
->type
< y
->type
)
95 if (x
->type
> y
->type
)
101 int bind_mount_parse(CustomMount
**l
, unsigned *n
, const char *s
, bool read_only
) {
102 _cleanup_free_
char *source
= NULL
, *destination
= NULL
, *opts
= NULL
;
110 r
= extract_many_words(&p
, ":", EXTRACT_DONT_COALESCE_SEPARATORS
, &source
, &destination
, NULL
);
117 destination
= strdup(source
);
122 if (r
== 2 && !isempty(p
)) {
128 if (!path_is_absolute(source
))
131 if (!path_is_absolute(destination
))
134 m
= custom_mount_add(l
, n
, CUSTOM_MOUNT_BIND
);
139 m
->destination
= destination
;
140 m
->read_only
= read_only
;
143 source
= destination
= opts
= NULL
;
147 int tmpfs_mount_parse(CustomMount
**l
, unsigned *n
, const char *s
) {
148 _cleanup_free_
char *path
= NULL
, *opts
= NULL
;
157 r
= extract_first_word(&p
, &path
, ":", EXTRACT_DONT_COALESCE_SEPARATORS
);
164 opts
= strdup("mode=0755");
170 if (!path_is_absolute(path
))
173 m
= custom_mount_add(l
, n
, CUSTOM_MOUNT_TMPFS
);
177 m
->destination
= path
;
184 static int tmpfs_patch_options(
187 uid_t uid_shift
, uid_t uid_range
,
189 const char *selinux_apifs_context
,
194 if ((userns
&& uid_shift
!= 0) || patch_ids
) {
195 assert(uid_shift
!= UID_INVALID
);
198 (void) asprintf(&buf
, "%s,uid=" UID_FMT
",gid=" UID_FMT
, options
, uid_shift
, uid_shift
);
200 (void) asprintf(&buf
, "uid=" UID_FMT
",gid=" UID_FMT
, uid_shift
, uid_shift
);
208 if (selinux_apifs_context
) {
212 t
= strjoin(options
, ",context=\"", selinux_apifs_context
, "\"", NULL
);
214 t
= strjoin("context=\"", selinux_apifs_context
, "\"", NULL
);
225 if (!buf
&& options
) {
226 buf
= strdup(options
);
235 int mount_sysfs(const char *dest
) {
236 const char *full
, *top
, *x
;
239 top
= prefix_roota(dest
, "/sys");
240 r
= path_check_fstype(top
, SYSFS_MAGIC
);
242 return log_error_errno(r
, "Failed to determine filesystem type of %s: %m", top
);
243 /* /sys might already be mounted as sysfs by the outer child in the
244 * !netns case. In this case, it's all good. Don't touch it because we
245 * don't have the right to do so, see https://github.com/systemd/systemd/issues/1555.
250 full
= prefix_roota(top
, "/full");
252 (void) mkdir(full
, 0755);
254 if (mount("sysfs", full
, "sysfs", MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
, NULL
) < 0)
255 return log_error_errno(errno
, "Failed to mount sysfs to %s: %m", full
);
257 FOREACH_STRING(x
, "block", "bus", "class", "dev", "devices", "kernel") {
258 _cleanup_free_
char *from
= NULL
, *to
= NULL
;
260 from
= prefix_root(full
, x
);
264 to
= prefix_root(top
, x
);
268 (void) mkdir(to
, 0755);
270 if (mount(from
, to
, NULL
, MS_BIND
, NULL
) < 0)
271 return log_error_errno(errno
, "Failed to mount /sys/%s into place: %m", x
);
273 if (mount(NULL
, to
, NULL
, MS_BIND
|MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
, NULL
) < 0)
274 return log_error_errno(errno
, "Failed to mount /sys/%s read-only: %m", x
);
277 if (umount(full
) < 0)
278 return log_error_errno(errno
, "Failed to unmount %s: %m", full
);
281 return log_error_errno(errno
, "Failed to remove %s: %m", full
);
283 x
= prefix_roota(top
, "/fs/kdbus");
284 (void) mkdir_p(x
, 0755);
286 /* Create mountpoint for cgroups. Otherwise we are not allowed since we
287 * remount /sys read-only.
289 if (cg_ns_supported()) {
290 x
= prefix_roota(top
, "/fs/cgroup");
291 (void) mkdir_p(x
, 0755);
294 if (mount(NULL
, top
, NULL
, MS_BIND
|MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
, NULL
) < 0)
295 return log_error_errno(errno
, "Failed to make %s read-only: %m", top
);
300 int mount_all(const char *dest
,
301 bool use_userns
, bool in_userns
,
303 uid_t uid_shift
, uid_t uid_range
,
304 const char *selinux_apifs_context
) {
306 typedef struct MountPoint
{
317 static const MountPoint mount_table
[] = {
318 { "proc", "/proc", "proc", NULL
, MS_NOSUID
|MS_NOEXEC
|MS_NODEV
, true, true, false },
319 { "/proc/sys", "/proc/sys", NULL
, NULL
, MS_BIND
, true, true, false }, /* Bind mount first ...*/
320 { "/proc/sys/net", "/proc/sys/net", NULL
, NULL
, MS_BIND
, true, true, true }, /* (except for this) */
321 { NULL
, "/proc/sys", NULL
, NULL
, MS_BIND
|MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
, true, true, false }, /* ... then, make it r/o */
322 { "tmpfs", "/sys", "tmpfs", "mode=755", MS_NOSUID
|MS_NOEXEC
|MS_NODEV
, true, false, true },
323 { "sysfs", "/sys", "sysfs", NULL
, MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
, true, false, false },
324 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID
|MS_STRICTATIME
, true, false, false },
325 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID
|MS_NODEV
|MS_STRICTATIME
, true, false, false },
326 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID
|MS_NODEV
|MS_STRICTATIME
, true, false, false },
327 { "tmpfs", "/tmp", "tmpfs", "mode=1777", MS_STRICTATIME
, true, false, false },
329 { "/sys/fs/selinux", "/sys/fs/selinux", NULL
, NULL
, MS_BIND
, false, false, false }, /* Bind mount first */
330 { NULL
, "/sys/fs/selinux", NULL
, NULL
, MS_BIND
|MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
, false, false, false }, /* Then, make it r/o */
337 for (k
= 0; k
< ELEMENTSOF(mount_table
); k
++) {
338 _cleanup_free_
char *where
= NULL
, *options
= NULL
;
341 if (in_userns
!= mount_table
[k
].in_userns
)
344 if (!use_netns
&& mount_table
[k
].use_netns
)
347 where
= prefix_root(dest
, mount_table
[k
].where
);
351 r
= path_is_mount_point(where
, AT_SYMLINK_FOLLOW
);
352 if (r
< 0 && r
!= -ENOENT
)
353 return log_error_errno(r
, "Failed to detect whether %s is a mount point: %m", where
);
355 /* Skip this entry if it is not a remount. */
356 if (mount_table
[k
].what
&& r
> 0)
359 r
= mkdir_p(where
, 0755);
361 if (mount_table
[k
].fatal
)
362 return log_error_errno(r
, "Failed to create directory %s: %m", where
);
364 log_debug_errno(r
, "Failed to create directory %s: %m", where
);
368 o
= mount_table
[k
].options
;
369 if (streq_ptr(mount_table
[k
].type
, "tmpfs")) {
370 r
= tmpfs_patch_options(o
, use_userns
, uid_shift
, uid_range
, false, selinux_apifs_context
, &options
);
377 if (mount(mount_table
[k
].what
,
380 mount_table
[k
].flags
,
383 if (mount_table
[k
].fatal
)
384 return log_error_errno(errno
, "mount(%s) failed: %m", where
);
386 log_warning_errno(errno
, "mount(%s) failed, ignoring: %m", where
);
393 static int parse_mount_bind_options(const char *options
, unsigned long *mount_flags
, char **mount_opts
) {
394 const char *p
= options
;
395 unsigned long flags
= *mount_flags
;
401 _cleanup_free_
char *word
= NULL
;
402 int r
= extract_first_word(&p
, &word
, ",", 0);
404 return log_error_errno(r
, "Failed to extract mount option: %m");
408 if (streq(word
, "rbind"))
410 else if (streq(word
, "norbind"))
413 log_error("Invalid bind mount option: %s", word
);
418 *mount_flags
= flags
;
419 /* in the future mount_opts will hold string options for mount(2) */
425 static int mount_bind(const char *dest
, CustomMount
*m
) {
426 struct stat source_st
, dest_st
;
428 unsigned long mount_flags
= MS_BIND
| MS_REC
;
429 _cleanup_free_
char *mount_opts
= NULL
;
435 r
= parse_mount_bind_options(m
->options
, &mount_flags
, &mount_opts
);
440 if (stat(m
->source
, &source_st
) < 0)
441 return log_error_errno(errno
, "Failed to stat %s: %m", m
->source
);
443 where
= prefix_roota(dest
, m
->destination
);
445 if (stat(where
, &dest_st
) >= 0) {
446 if (S_ISDIR(source_st
.st_mode
) && !S_ISDIR(dest_st
.st_mode
)) {
447 log_error("Cannot bind mount directory %s on file %s.", m
->source
, where
);
451 if (!S_ISDIR(source_st
.st_mode
) && S_ISDIR(dest_st
.st_mode
)) {
452 log_error("Cannot bind mount file %s on directory %s.", m
->source
, where
);
456 } else if (errno
== ENOENT
) {
457 r
= mkdir_parents_label(where
, 0755);
459 return log_error_errno(r
, "Failed to make parents of %s: %m", where
);
461 /* Create the mount point. Any non-directory file can be
462 * mounted on any non-directory file (regular, fifo, socket,
465 if (S_ISDIR(source_st
.st_mode
))
466 r
= mkdir_label(where
, 0755);
470 return log_error_errno(r
, "Failed to create mount point %s: %m", where
);
473 return log_error_errno(errno
, "Failed to stat %s: %m", where
);
476 if (mount(m
->source
, where
, NULL
, mount_flags
, mount_opts
) < 0)
477 return log_error_errno(errno
, "mount(%s) failed: %m", where
);
480 r
= bind_remount_recursive(where
, true);
482 return log_error_errno(r
, "Read-only bind mount failed: %m");
488 static int mount_tmpfs(
491 bool userns
, uid_t uid_shift
, uid_t uid_range
,
492 const char *selinux_apifs_context
) {
494 const char *where
, *options
;
495 _cleanup_free_
char *buf
= NULL
;
501 where
= prefix_roota(dest
, m
->destination
);
503 r
= mkdir_p_label(where
, 0755);
504 if (r
< 0 && r
!= -EEXIST
)
505 return log_error_errno(r
, "Creating mount point for tmpfs %s failed: %m", where
);
507 r
= tmpfs_patch_options(m
->options
, userns
, uid_shift
, uid_range
, false, selinux_apifs_context
, &buf
);
510 options
= r
> 0 ? buf
: m
->options
;
512 if (mount("tmpfs", where
, "tmpfs", MS_NODEV
|MS_STRICTATIME
, options
) < 0)
513 return log_error_errno(errno
, "tmpfs mount to %s failed: %m", where
);
518 static char *joined_and_escaped_lower_dirs(char * const *lower
) {
519 _cleanup_strv_free_
char **sv
= NULL
;
521 sv
= strv_copy(lower
);
527 if (!strv_shell_escape(sv
, ",:"))
530 return strv_join(sv
, ":");
533 static int mount_overlay(const char *dest
, CustomMount
*m
) {
534 _cleanup_free_
char *lower
= NULL
;
535 const char *where
, *options
;
541 where
= prefix_roota(dest
, m
->destination
);
543 r
= mkdir_label(where
, 0755);
544 if (r
< 0 && r
!= -EEXIST
)
545 return log_error_errno(r
, "Creating mount point for overlay %s failed: %m", where
);
547 (void) mkdir_p_label(m
->source
, 0755);
549 lower
= joined_and_escaped_lower_dirs(m
->lower
);
554 _cleanup_free_
char *escaped_source
= NULL
;
556 escaped_source
= shell_escape(m
->source
, ",:");
560 options
= strjoina("lowerdir=", escaped_source
, ":", lower
);
562 _cleanup_free_
char *escaped_source
= NULL
, *escaped_work_dir
= NULL
;
565 (void) mkdir_label(m
->work_dir
, 0700);
567 escaped_source
= shell_escape(m
->source
, ",:");
570 escaped_work_dir
= shell_escape(m
->work_dir
, ",:");
571 if (!escaped_work_dir
)
574 options
= strjoina("lowerdir=", lower
, ",upperdir=", escaped_source
, ",workdir=", escaped_work_dir
);
577 if (mount("overlay", where
, "overlay", m
->read_only
? MS_RDONLY
: 0, options
) < 0)
578 return log_error_errno(errno
, "overlay mount to %s failed: %m", where
);
585 CustomMount
*mounts
, unsigned n
,
586 bool userns
, uid_t uid_shift
, uid_t uid_range
,
587 const char *selinux_apifs_context
) {
594 for (i
= 0; i
< n
; i
++) {
595 CustomMount
*m
= mounts
+ i
;
599 case CUSTOM_MOUNT_BIND
:
600 r
= mount_bind(dest
, m
);
603 case CUSTOM_MOUNT_TMPFS
:
604 r
= mount_tmpfs(dest
, m
, userns
, uid_shift
, uid_range
, selinux_apifs_context
);
607 case CUSTOM_MOUNT_OVERLAY
:
608 r
= mount_overlay(dest
, m
);
612 assert_not_reached("Unknown custom mount type");
622 /* Retrieve existing subsystems. This function is called in a new cgroup
625 static int get_controllers(Set
*subsystems
) {
626 _cleanup_fclose_
FILE *f
= NULL
;
631 f
= fopen("/proc/self/cgroup", "re");
633 return errno
== ENOENT
? -ESRCH
: -errno
;
635 FOREACH_LINE(line
, f
, return -errno
) {
641 l
= strchr(line
, ':');
652 if (streq(l
, "") || streq(l
, "name=systemd"))
656 r
= set_consume(subsystems
, p
);
664 static int mount_legacy_cgroup_hierarchy(const char *dest
, const char *controller
, const char *hierarchy
, bool read_only
) {
668 to
= strjoina(strempty(dest
), "/sys/fs/cgroup/", hierarchy
);
670 r
= path_is_mount_point(to
, 0);
671 if (r
< 0 && r
!= -ENOENT
)
672 return log_error_errno(r
, "Failed to determine if %s is mounted already: %m", to
);
678 /* The superblock mount options of the mount point need to be
679 * identical to the hosts', and hence writable... */
680 if (mount("cgroup", to
, "cgroup", MS_NOSUID
|MS_NOEXEC
|MS_NODEV
, controller
) < 0)
681 return log_error_errno(errno
, "Failed to mount to %s: %m", to
);
683 /* ... hence let's only make the bind mount read-only, not the
686 if (mount(NULL
, to
, NULL
, MS_BIND
|MS_REMOUNT
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_RDONLY
, NULL
) < 0)
687 return log_error_errno(errno
, "Failed to remount %s read-only: %m", to
);
692 /* Mount a legacy cgroup hierarchy when cgroup namespaces are supported. */
693 static int mount_legacy_cgns_supported(
694 bool userns
, uid_t uid_shift
, uid_t uid_range
,
695 const char *selinux_apifs_context
) {
696 _cleanup_set_free_free_ Set
*controllers
= NULL
;
697 const char *cgroup_root
= "/sys/fs/cgroup", *c
;
700 (void) mkdir_p(cgroup_root
, 0755);
702 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
703 r
= path_is_mount_point(cgroup_root
, AT_SYMLINK_FOLLOW
);
705 return log_error_errno(r
, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
707 _cleanup_free_
char *options
= NULL
;
709 /* When cgroup namespaces are enabled and user namespaces are
710 * used then the mount of the cgroupfs is done *inside* the new
711 * user namespace. We're root in the new user namespace and the
712 * kernel will happily translate our uid/gid to the correct
713 * uid/gid as seen from e.g. /proc/1/mountinfo. So we simply
714 * pass uid 0 and not uid_shift to tmpfs_patch_options().
716 r
= tmpfs_patch_options("mode=755", userns
, 0, uid_range
, true, selinux_apifs_context
, &options
);
720 if (mount("tmpfs", cgroup_root
, "tmpfs", MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_STRICTATIME
, options
) < 0)
721 return log_error_errno(errno
, "Failed to mount /sys/fs/cgroup: %m");
724 if (cg_unified() > 0)
725 goto skip_controllers
;
727 controllers
= set_new(&string_hash_ops
);
731 r
= get_controllers(controllers
);
733 return log_error_errno(r
, "Failed to determine cgroup controllers: %m");
736 _cleanup_free_
const char *controller
= NULL
;
738 controller
= set_steal_first(controllers
);
742 r
= mount_legacy_cgroup_hierarchy("", controller
, controller
, !userns
);
746 /* When multiple hierarchies are co-mounted, make their
747 * constituting individual hierarchies a symlink to the
752 _cleanup_free_
char *target
= NULL
, *tok
= NULL
;
754 r
= extract_first_word(&c
, &tok
, ",", 0);
756 return log_error_errno(r
, "Failed to extract co-mounted cgroup controller: %m");
760 target
= prefix_root("/sys/fs/cgroup", tok
);
764 if (streq(controller
, tok
))
767 r
= symlink_idempotent(controller
, target
);
769 return log_error_errno(r
, "Invalid existing symlink for combined hierarchy: %m");
771 return log_error_errno(r
, "Failed to create symlink for combined hierarchy: %m");
776 r
= mount_legacy_cgroup_hierarchy("", "none,name=systemd,xattr", "systemd", false);
781 if (mount(NULL
, cgroup_root
, NULL
, MS_REMOUNT
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_STRICTATIME
|MS_RDONLY
, "mode=755") < 0)
782 return log_error_errno(errno
, "Failed to remount %s read-only: %m", cgroup_root
);
788 /* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */
789 static int mount_legacy_cgns_unsupported(
791 bool userns
, uid_t uid_shift
, uid_t uid_range
,
792 const char *selinux_apifs_context
) {
793 _cleanup_set_free_free_ Set
*controllers
= NULL
;
794 const char *cgroup_root
;
797 cgroup_root
= prefix_roota(dest
, "/sys/fs/cgroup");
799 (void) mkdir_p(cgroup_root
, 0755);
801 /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
802 r
= path_is_mount_point(cgroup_root
, AT_SYMLINK_FOLLOW
);
804 return log_error_errno(r
, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
806 _cleanup_free_
char *options
= NULL
;
808 r
= tmpfs_patch_options("mode=755", userns
, uid_shift
, uid_range
, false, selinux_apifs_context
, &options
);
812 if (mount("tmpfs", cgroup_root
, "tmpfs", MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_STRICTATIME
, options
) < 0)
813 return log_error_errno(errno
, "Failed to mount /sys/fs/cgroup: %m");
816 if (cg_unified() > 0)
817 goto skip_controllers
;
819 controllers
= set_new(&string_hash_ops
);
823 r
= cg_kernel_controllers(controllers
);
825 return log_error_errno(r
, "Failed to determine cgroup controllers: %m");
828 _cleanup_free_
char *controller
= NULL
, *origin
= NULL
, *combined
= NULL
;
830 controller
= set_steal_first(controllers
);
834 origin
= prefix_root("/sys/fs/cgroup/", controller
);
838 r
= readlink_malloc(origin
, &combined
);
840 /* Not a symbolic link, but directly a single cgroup hierarchy */
842 r
= mount_legacy_cgroup_hierarchy(dest
, controller
, controller
, true);
847 return log_error_errno(r
, "Failed to read link %s: %m", origin
);
849 _cleanup_free_
char *target
= NULL
;
851 target
= prefix_root(dest
, origin
);
855 /* A symbolic link, a combination of controllers in one hierarchy */
857 if (!filename_is_valid(combined
)) {
858 log_warning("Ignoring invalid combined hierarchy %s.", combined
);
862 r
= mount_legacy_cgroup_hierarchy(dest
, combined
, combined
, true);
866 r
= symlink_idempotent(combined
, target
);
868 return log_error_errno(r
, "Invalid existing symlink for combined hierarchy: %m");
870 return log_error_errno(r
, "Failed to create symlink for combined hierarchy: %m");
875 r
= mount_legacy_cgroup_hierarchy(dest
, "none,name=systemd,xattr", "systemd", false);
879 if (mount(NULL
, cgroup_root
, NULL
, MS_REMOUNT
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_STRICTATIME
|MS_RDONLY
, "mode=755") < 0)
880 return log_error_errno(errno
, "Failed to remount %s read-only: %m", cgroup_root
);
885 static int mount_unified_cgroups(const char *dest
) {
891 p
= prefix_roota(dest
, "/sys/fs/cgroup");
893 (void) mkdir_p(p
, 0755);
895 r
= path_is_mount_point(p
, AT_SYMLINK_FOLLOW
);
897 return log_error_errno(r
, "Failed to determine if %s is mounted already: %m", p
);
899 p
= prefix_roota(dest
, "/sys/fs/cgroup/cgroup.procs");
900 if (access(p
, F_OK
) >= 0)
903 return log_error_errno(errno
, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p
);
905 log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p
);
909 if (mount("cgroup", p
, "cgroup2", MS_NOSUID
|MS_NOEXEC
|MS_NODEV
, NULL
) < 0)
910 return log_error_errno(errno
, "Failed to mount unified cgroup hierarchy to %s: %m", p
);
917 bool unified_requested
,
918 bool userns
, uid_t uid_shift
, uid_t uid_range
,
919 const char *selinux_apifs_context
) {
921 if (unified_requested
)
922 return mount_unified_cgroups(dest
);
923 else if (cg_ns_supported())
924 return mount_legacy_cgns_supported(userns
, uid_shift
, uid_range
, selinux_apifs_context
);
926 return mount_legacy_cgns_unsupported(dest
, userns
, uid_shift
, uid_range
, selinux_apifs_context
);
929 int mount_systemd_cgroup_writable(
931 bool unified_requested
) {
933 _cleanup_free_
char *own_cgroup_path
= NULL
;
934 const char *systemd_root
, *systemd_own
;
939 r
= cg_pid_get_path(NULL
, 0, &own_cgroup_path
);
941 return log_error_errno(r
, "Failed to determine our own cgroup path: %m");
943 /* If we are living in the top-level, then there's nothing to do... */
944 if (path_equal(own_cgroup_path
, "/"))
947 if (unified_requested
) {
948 systemd_own
= strjoina(dest
, "/sys/fs/cgroup", own_cgroup_path
);
949 systemd_root
= prefix_roota(dest
, "/sys/fs/cgroup");
951 systemd_own
= strjoina(dest
, "/sys/fs/cgroup/systemd", own_cgroup_path
);
952 systemd_root
= prefix_roota(dest
, "/sys/fs/cgroup/systemd");
955 /* Make our own cgroup a (writable) bind mount */
956 if (mount(systemd_own
, systemd_own
, NULL
, MS_BIND
, NULL
) < 0)
957 return log_error_errno(errno
, "Failed to turn %s into a bind mount: %m", own_cgroup_path
);
959 /* And then remount the systemd cgroup root read-only */
960 if (mount(NULL
, systemd_root
, NULL
, MS_BIND
|MS_REMOUNT
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_RDONLY
, NULL
) < 0)
961 return log_error_errno(errno
, "Failed to mount cgroup root read-only: %m");
966 int setup_volatile_state(
967 const char *directory
,
969 bool userns
, uid_t uid_shift
, uid_t uid_range
,
970 const char *selinux_apifs_context
) {
972 _cleanup_free_
char *buf
= NULL
;
973 const char *p
, *options
;
978 if (mode
!= VOLATILE_STATE
)
981 /* --volatile=state means we simply overmount /var
982 with a tmpfs, and the rest read-only. */
984 r
= bind_remount_recursive(directory
, true);
986 return log_error_errno(r
, "Failed to remount %s read-only: %m", directory
);
988 p
= prefix_roota(directory
, "/var");
990 if (r
< 0 && errno
!= EEXIST
)
991 return log_error_errno(errno
, "Failed to create %s: %m", directory
);
993 options
= "mode=755";
994 r
= tmpfs_patch_options(options
, userns
, uid_shift
, uid_range
, false, selinux_apifs_context
, &buf
);
1000 if (mount("tmpfs", p
, "tmpfs", MS_STRICTATIME
, options
) < 0)
1001 return log_error_errno(errno
, "Failed to mount tmpfs to /var: %m");
1007 const char *directory
,
1009 bool userns
, uid_t uid_shift
, uid_t uid_range
,
1010 const char *selinux_apifs_context
) {
1012 bool tmpfs_mounted
= false, bind_mounted
= false;
1013 char template[] = "/tmp/nspawn-volatile-XXXXXX";
1014 _cleanup_free_
char *buf
= NULL
;
1015 const char *f
, *t
, *options
;
1020 if (mode
!= VOLATILE_YES
)
1023 /* --volatile=yes means we mount a tmpfs to the root dir, and
1024 the original /usr to use inside it, and that read-only. */
1026 if (!mkdtemp(template))
1027 return log_error_errno(errno
, "Failed to create temporary directory: %m");
1029 options
= "mode=755";
1030 r
= tmpfs_patch_options(options
, userns
, uid_shift
, uid_range
, false, selinux_apifs_context
, &buf
);
1036 if (mount("tmpfs", template, "tmpfs", MS_STRICTATIME
, options
) < 0) {
1037 r
= log_error_errno(errno
, "Failed to mount tmpfs for root directory: %m");
1041 tmpfs_mounted
= true;
1043 f
= prefix_roota(directory
, "/usr");
1044 t
= prefix_roota(template, "/usr");
1047 if (r
< 0 && errno
!= EEXIST
) {
1048 r
= log_error_errno(errno
, "Failed to create %s: %m", t
);
1052 if (mount(f
, t
, NULL
, MS_BIND
|MS_REC
, NULL
) < 0) {
1053 r
= log_error_errno(errno
, "Failed to create /usr bind mount: %m");
1057 bind_mounted
= true;
1059 r
= bind_remount_recursive(t
, true);
1061 log_error_errno(r
, "Failed to remount %s read-only: %m", t
);
1065 if (mount(template, directory
, NULL
, MS_MOVE
, NULL
) < 0) {
1066 r
= log_error_errno(errno
, "Failed to move root mount: %m");
1070 (void) rmdir(template);
1079 (void) umount(template);
1080 (void) rmdir(template);
1084 VolatileMode
volatile_mode_from_string(const char *s
) {
1088 return _VOLATILE_MODE_INVALID
;
1090 b
= parse_boolean(s
);
1092 return VOLATILE_YES
;
1096 if (streq(s
, "state"))
1097 return VOLATILE_STATE
;
1099 return _VOLATILE_MODE_INVALID
;