1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #include <linux/magic.h>
7 #include "alloc-util.h"
10 #include "extract-word.h"
12 #include "format-util.h"
15 #include "mkdir-label.h"
16 #include "mount-util.h"
17 #include "mountpoint-util.h"
18 #include "namespace-util.h"
19 #include "nspawn-mount.h"
20 #include "path-util.h"
22 #include "sort-util.h"
23 #include "stat-util.h"
24 #include "string-util.h"
26 #include "tmpfile-util.h"
28 CustomMount
* custom_mount_add(CustomMount
**l
, size_t *n
, CustomMountType t
) {
34 assert(t
< _CUSTOM_MOUNT_TYPE_MAX
);
36 if (!GREEDY_REALLOC(*l
, *n
+ 1))
42 *ret
= (CustomMount
) {
49 void custom_mount_free_all(CustomMount
*l
, size_t n
) {
50 FOREACH_ARRAY(m
, l
, n
) {
56 (void) rm_rf(m
->work_dir
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
60 if (m
->rm_rf_tmpdir
) {
61 (void) rm_rf(m
->rm_rf_tmpdir
, REMOVE_ROOT
|REMOVE_PHYSICAL
);
62 free(m
->rm_rf_tmpdir
);
66 free(m
->type_argument
);
72 static int custom_mount_compare(const CustomMount
*a
, const CustomMount
*b
) {
75 r
= path_compare(a
->destination
, b
->destination
);
79 return CMP(a
->type
, b
->type
);
82 static int source_path_parse(const char *p
, char **ret
) {
90 if (!path_is_absolute(p
+ 1))
101 return path_make_absolute_cwd(p
, ret
);
104 static int source_path_parse_nullable(const char *p
, char **ret
) {
113 return source_path_parse(p
, ret
);
116 static char *resolve_source_path(const char *dest
, const char *source
) {
120 if (source
[0] == '+')
121 return path_join(dest
, source
+ 1);
123 return strdup(source
);
126 static int allocate_temporary_source(CustomMount
*m
) {
131 assert(!m
->rm_rf_tmpdir
);
133 r
= mkdtemp_malloc("/var/tmp/nspawn-temp-XXXXXX", &m
->rm_rf_tmpdir
);
135 return log_error_errno(r
, "Failed to acquire temporary directory: %m");
137 m
->source
= path_join(m
->rm_rf_tmpdir
, "src");
141 if (mkdir(m
->source
, 0755) < 0)
142 return log_error_errno(errno
, "Failed to create %s: %m", m
->source
);
147 int custom_mount_prepare_all(const char *dest
, CustomMount
*l
, size_t n
) {
150 /* Prepare all custom mounts. This will make sure we know all temporary directories. This is called in the
151 * parent process, so that we know the temporary directories to remove on exit before we fork off the
156 /* Order the custom mounts, and make sure we have a working directory */
157 typesafe_qsort(l
, n
, custom_mount_compare
);
159 FOREACH_ARRAY(m
, l
, n
) {
160 /* /proc we mount in the inner child, i.e. when we acquired CLONE_NEWPID. All other mounts we mount
161 * already in the outer child, so that the mounts are already established before CLONE_NEWPID and in
162 * particular CLONE_NEWUSER. This also means any custom mounts below /proc also need to be mounted in
163 * the inner child, not the outer one. Determine this here. */
164 m
->in_userns
= path_startswith(m
->destination
, "/proc");
166 if (m
->type
== CUSTOM_MOUNT_BIND
) {
170 s
= resolve_source_path(dest
, m
->source
);
174 free_and_replace(m
->source
, s
);
176 /* No source specified? In that case, use a throw-away temporary directory in /var/tmp */
178 r
= allocate_temporary_source(m
);
184 if (m
->type
== CUSTOM_MOUNT_OVERLAY
) {
185 STRV_FOREACH(j
, m
->lower
) {
188 s
= resolve_source_path(dest
, *j
);
192 free_and_replace(*j
, s
);
198 s
= resolve_source_path(dest
, m
->source
);
202 free_and_replace(m
->source
, s
);
204 r
= allocate_temporary_source(m
);
212 s
= resolve_source_path(dest
, m
->work_dir
);
216 free_and_replace(m
->work_dir
, s
);
218 r
= tempfn_random(m
->source
, NULL
, &m
->work_dir
);
220 return log_error_errno(r
, "Failed to acquire working directory: %m");
223 (void) mkdir_label(m
->work_dir
, 0700);
230 int bind_mount_parse(CustomMount
**l
, size_t *n
, const char *s
, bool read_only
) {
231 _cleanup_free_
char *source
= NULL
, *destination
= NULL
, *opts
= NULL
, *p
= NULL
;
238 r
= extract_many_words(&s
, ":", EXTRACT_DONT_COALESCE_SEPARATORS
, &source
, &destination
);
244 destination
= strdup(source
[0] == '+' ? source
+1 : source
);
248 if (r
== 2 && !isempty(s
)) {
254 r
= source_path_parse_nullable(source
, &p
);
258 if (!path_is_absolute(destination
))
261 m
= custom_mount_add(l
, n
, CUSTOM_MOUNT_BIND
);
265 m
->source
= TAKE_PTR(p
);
266 m
->destination
= TAKE_PTR(destination
);
267 m
->read_only
= read_only
;
268 m
->options
= TAKE_PTR(opts
);
273 int tmpfs_mount_parse(CustomMount
**l
, size_t *n
, const char *s
) {
274 _cleanup_free_
char *path
= NULL
, *opts
= NULL
;
275 const char *p
= ASSERT_PTR(s
);
282 r
= extract_first_word(&p
, &path
, ":", EXTRACT_DONT_COALESCE_SEPARATORS
);
289 opts
= strdup("mode=0755");
295 if (!path_is_absolute(path
))
298 m
= custom_mount_add(l
, n
, CUSTOM_MOUNT_TMPFS
);
302 m
->destination
= TAKE_PTR(path
);
303 m
->options
= TAKE_PTR(opts
);
308 int overlay_mount_parse(CustomMount
**l
, size_t *n
, const char *s
, bool read_only
) {
309 _cleanup_free_
char *upper
= NULL
, *destination
= NULL
;
310 _cleanup_strv_free_
char **lower
= NULL
;
314 k
= strv_split_full(&lower
, s
, ":", EXTRACT_DONT_COALESCE_SEPARATORS
);
318 return -EADDRNOTAVAIL
;
320 _cleanup_free_
char *p
= NULL
;
322 /* If two parameters are specified, the first one is the lower, the second one the upper directory. And
323 * we'll also define the destination mount point the same as the upper. */
325 r
= source_path_parse(lower
[0], &p
);
329 free_and_replace(lower
[0], p
);
331 r
= source_path_parse(lower
[1], &p
);
335 free_and_replace(lower
[1], p
);
337 upper
= TAKE_PTR(lower
[1]);
339 destination
= strdup(upper
[0] == '+' ? upper
+1 : upper
); /* take the destination without "+" prefix */
343 _cleanup_free_
char *p
= NULL
;
345 /* If more than two parameters are specified, the last one is the destination, the second to last one
346 * the "upper", and all before that the "lower" directories. */
348 destination
= lower
[k
- 1];
349 upper
= TAKE_PTR(lower
[k
- 2]);
351 STRV_FOREACH(i
, lower
) {
352 r
= source_path_parse(*i
, &p
);
356 free_and_replace(*i
, p
);
359 /* If the upper directory is unspecified, then let's create it automatically as a throw-away directory
361 r
= source_path_parse_nullable(upper
, &p
);
365 free_and_replace(upper
, p
);
367 if (!path_is_absolute(destination
))
371 m
= custom_mount_add(l
, n
, CUSTOM_MOUNT_OVERLAY
);
375 m
->destination
= TAKE_PTR(destination
);
376 m
->source
= TAKE_PTR(upper
);
377 m
->lower
= TAKE_PTR(lower
);
378 m
->read_only
= read_only
;
383 int inaccessible_mount_parse(CustomMount
**l
, size_t *n
, const char *s
) {
384 _cleanup_free_
char *path
= NULL
;
391 if (!path_is_absolute(s
))
398 m
= custom_mount_add(l
, n
, CUSTOM_MOUNT_INACCESSIBLE
);
402 m
->destination
= TAKE_PTR(path
);
406 int tmpfs_patch_options(
409 const char *selinux_apifs_context
,
412 _cleanup_free_
char *buf
= NULL
;
417 buf
= strdup(options
);
422 if (uid_shift
!= UID_INVALID
)
423 if (strextendf_with_separator(&buf
, ",", "uid=" UID_FMT
",gid=" UID_FMT
, uid_shift
, uid_shift
) < 0)
427 if (selinux_apifs_context
)
428 if (strextendf_with_separator(&buf
, ",", "context=\"%s\"", selinux_apifs_context
) < 0)
432 *ret
= TAKE_PTR(buf
);
436 int mount_sysfs(const char *dest
, MountSettingsMask mount_settings
) {
437 _cleanup_free_
char *top
= NULL
, *full
= NULL
;;
438 unsigned long extra_flags
= 0;
441 top
= path_join(dest
, "/sys");
445 r
= path_is_mount_point(top
);
447 return log_error_errno(r
, "Failed to determine if '%s' is a mountpoint: %m", top
);
449 /* If this is not a mount point yet, then mount a tmpfs there */
450 r
= mount_nofollow_verbose(LOG_ERR
, "tmpfs", top
, "tmpfs", MS_NOSUID
|MS_NOEXEC
|MS_NODEV
, "mode=0555" TMPFS_LIMITS_SYS
);
454 r
= path_is_fs_type(top
, SYSFS_MAGIC
);
456 return log_error_errno(r
, "Failed to determine filesystem type of %s: %m", top
);
458 /* /sys/ might already be mounted as sysfs by the outer child in the !netns case. In this case, it's
459 * all good. Don't touch it because we don't have the right to do so, see
460 * https://github.com/systemd/systemd/issues/1555.
466 full
= path_join(top
, "/full");
470 if (mkdir(full
, 0755) < 0 && errno
!= EEXIST
)
471 return log_error_errno(errno
, "Failed to create directory '%s': %m", full
);
473 if (FLAGS_SET(mount_settings
, MOUNT_APPLY_APIVFS_RO
))
474 extra_flags
|= MS_RDONLY
;
476 r
= mount_nofollow_verbose(LOG_ERR
, "sysfs", full
, "sysfs",
477 MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|extra_flags
, NULL
);
481 FOREACH_STRING(x
, "block", "bus", "class", "dev", "devices", "kernel") {
482 _cleanup_free_
char *from
= NULL
, *to
= NULL
;
484 from
= path_join(full
, x
);
488 to
= path_join(top
, x
);
492 (void) mkdir(to
, 0755);
494 r
= mount_nofollow_verbose(LOG_ERR
, from
, to
, NULL
, MS_BIND
, NULL
);
498 r
= mount_nofollow_verbose(LOG_ERR
, NULL
, to
, NULL
,
499 MS_BIND
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
|extra_flags
, NULL
);
504 r
= umount_verbose(LOG_ERR
, full
, UMOUNT_NOFOLLOW
);
509 return log_error_errno(errno
, "Failed to remove %s: %m", full
);
511 /* Create mountpoint for cgroups. Otherwise we are not allowed since we remount /sys/ read-only. */
512 _cleanup_free_
char *x
= path_join(top
, "/fs/cgroup");
516 (void) mkdir_p(x
, 0755);
518 return mount_nofollow_verbose(LOG_ERR
, NULL
, top
, NULL
,
519 MS_BIND
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
|extra_flags
, NULL
);
522 #define PROC_DEFAULT_MOUNT_FLAGS (MS_NOSUID|MS_NOEXEC|MS_NODEV)
523 #define SYS_DEFAULT_MOUNT_FLAGS (MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV)
525 int mount_all(const char *dest
,
526 MountSettingsMask mount_settings
,
528 const char *selinux_apifs_context
) {
530 #define PROC_INACCESSIBLE_REG(path) \
531 { "/run/systemd/inaccessible/reg", (path), NULL, NULL, MS_BIND, \
532 MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
533 { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
534 MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
536 #define PROC_READ_ONLY(path) \
537 { (path), (path), NULL, NULL, MS_BIND, \
538 MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO }, /* Bind mount first ... */ \
539 { NULL, (path), NULL, NULL, MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, \
540 MOUNT_IN_USERNS|MOUNT_APPLY_APIVFS_RO } /* Then, make it r/o */
542 typedef struct MountPoint
{
548 MountSettingsMask mount_settings
;
551 static const MountPoint mount_table
[] = {
552 /* First we list inner child mounts (i.e. mounts applied *after* entering user namespacing when we are privileged) */
553 { "proc", "/proc", "proc", NULL
, PROC_DEFAULT_MOUNT_FLAGS
,
554 MOUNT_FATAL
|MOUNT_IN_USERNS
|MOUNT_MKDIR
|MOUNT_FOLLOW_SYMLINKS
}, /* we follow symlinks here since not following them requires /proc/ already being mounted, which we don't have here. */
556 { "/proc/sys", "/proc/sys", NULL
, NULL
, MS_BIND
,
557 MOUNT_FATAL
|MOUNT_IN_USERNS
|MOUNT_APPLY_APIVFS_RO
}, /* Bind mount first ... */
559 { "/proc/sys/net", "/proc/sys/net", NULL
, NULL
, MS_BIND
,
560 MOUNT_FATAL
|MOUNT_IN_USERNS
|MOUNT_APPLY_APIVFS_RO
|MOUNT_APPLY_APIVFS_NETNS
}, /* (except for this) */
562 { NULL
, "/proc/sys", NULL
, NULL
, MS_BIND
|MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
,
563 MOUNT_FATAL
|MOUNT_IN_USERNS
|MOUNT_APPLY_APIVFS_RO
}, /* ... then, make it r/o */
565 /* Make these files inaccessible to container payloads: they potentially leak information about kernel
566 * internals or the host's execution environment to the container */
567 PROC_INACCESSIBLE_REG("/proc/kallsyms"),
568 PROC_INACCESSIBLE_REG("/proc/kcore"),
569 PROC_INACCESSIBLE_REG("/proc/keys"),
570 PROC_INACCESSIBLE_REG("/proc/sysrq-trigger"),
571 PROC_INACCESSIBLE_REG("/proc/timer_list"),
573 /* Make these directories read-only to container payloads: they show hardware information, and in some
574 * cases contain tunables the container really shouldn't have access to. */
575 PROC_READ_ONLY("/proc/acpi"),
576 PROC_READ_ONLY("/proc/apm"),
577 PROC_READ_ONLY("/proc/asound"),
578 PROC_READ_ONLY("/proc/bus"),
579 PROC_READ_ONLY("/proc/fs"),
580 PROC_READ_ONLY("/proc/irq"),
581 PROC_READ_ONLY("/proc/scsi"),
583 { "mqueue", "/dev/mqueue", "mqueue", NULL
, MS_NOSUID
|MS_NOEXEC
|MS_NODEV
,
584 MOUNT_IN_USERNS
|MOUNT_MKDIR
},
586 /* Then we list outer child mounts (i.e. mounts applied *before* entering user namespacing when we are privileged) */
587 { "tmpfs", "/tmp", "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS
, MS_NOSUID
|MS_NODEV
|MS_STRICTATIME
,
588 MOUNT_FATAL
|MOUNT_APPLY_TMPFS_TMP
|MOUNT_MKDIR
|MOUNT_USRQUOTA_GRACEFUL
},
589 { "tmpfs", "/sys", "tmpfs", "mode=0555" TMPFS_LIMITS_SYS
, MS_NOSUID
|MS_NOEXEC
|MS_NODEV
,
590 MOUNT_FATAL
|MOUNT_APPLY_APIVFS_NETNS
|MOUNT_MKDIR
|MOUNT_UNMANAGED
},
591 { "sysfs", "/sys", "sysfs", NULL
, SYS_DEFAULT_MOUNT_FLAGS
,
592 MOUNT_FATAL
|MOUNT_APPLY_APIVFS_RO
|MOUNT_MKDIR
|MOUNT_UNMANAGED
}, /* skipped if above was mounted */
593 { "sysfs", "/sys", "sysfs", NULL
, MS_NOSUID
|MS_NOEXEC
|MS_NODEV
,
594 MOUNT_FATAL
|MOUNT_MKDIR
|MOUNT_UNMANAGED
}, /* skipped if above was mounted */
595 { "tmpfs", "/dev", "tmpfs", "mode=0755" TMPFS_LIMITS_PRIVATE_DEV
, MS_NOSUID
|MS_STRICTATIME
,
596 MOUNT_FATAL
|MOUNT_MKDIR
},
597 { "tmpfs", "/dev/shm", "tmpfs", "mode=01777" NESTED_TMPFS_LIMITS
, MS_NOSUID
|MS_NODEV
|MS_STRICTATIME
,
598 MOUNT_FATAL
|MOUNT_MKDIR
|MOUNT_USRQUOTA_GRACEFUL
},
599 { "tmpfs", "/run", "tmpfs", "mode=0755" TMPFS_LIMITS_RUN
, MS_NOSUID
|MS_NODEV
|MS_STRICTATIME
,
600 MOUNT_FATAL
|MOUNT_MKDIR
},
601 { "/run/host", "/run/host", NULL
, NULL
, MS_BIND
,
602 MOUNT_FATAL
|MOUNT_MKDIR
|MOUNT_PREFIX_ROOT
}, /* Prepare this so that we can make it read-only when we are done */
603 { "/etc/os-release", "/run/host/os-release", NULL
, NULL
, MS_BIND
,
604 MOUNT_TOUCH
}, /* As per kernel interface requirements, bind mount first (creating mount points) and make read-only later */
605 { "/usr/lib/os-release", "/run/host/os-release", NULL
, NULL
, MS_BIND
,
606 MOUNT_FATAL
}, /* If /etc/os-release doesn't exist use the version in /usr/lib as fallback */
607 { NULL
, "/run/host/os-release", NULL
, NULL
, MS_BIND
|MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
,
609 { NULL
, "/run/host/os-release", NULL
, NULL
, MS_PRIVATE
,
610 MOUNT_FATAL
}, /* Turn off propagation (we only want that for the mount propagation tunnel dir) */
611 { NULL
, "/run/host", NULL
, NULL
, MS_BIND
|MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
,
612 MOUNT_FATAL
|MOUNT_IN_USERNS
},
614 { "/sys/fs/selinux", "/sys/fs/selinux", NULL
, NULL
, MS_BIND
,
615 MOUNT_MKDIR
|MOUNT_PRIVILEGED
}, /* Bind mount first (mkdir/chown the mount point in case /sys/ is mounted as minimal skeleton tmpfs) */
616 { NULL
, "/sys/fs/selinux", NULL
, NULL
, MS_BIND
|MS_RDONLY
|MS_NOSUID
|MS_NOEXEC
|MS_NODEV
|MS_REMOUNT
,
617 MOUNT_UNMANAGED
|MOUNT_PRIVILEGED
}, /* Then, make it r/o (don't mkdir/chown the mount point here, the previous entry already did that) */
618 { NULL
, "/sys/fs/selinux", NULL
, NULL
, MS_PRIVATE
,
619 MOUNT_UNMANAGED
|MOUNT_PRIVILEGED
}, /* Turn off propagation (we only want that for the mount propagation tunnel dir) */
623 bool use_userns
= FLAGS_SET(mount_settings
, MOUNT_USE_USERNS
);
624 bool netns
= FLAGS_SET(mount_settings
, MOUNT_APPLY_APIVFS_NETNS
);
625 bool ro
= FLAGS_SET(mount_settings
, MOUNT_APPLY_APIVFS_RO
);
626 bool in_userns
= FLAGS_SET(mount_settings
, MOUNT_IN_USERNS
);
627 bool tmpfs_tmp
= FLAGS_SET(mount_settings
, MOUNT_APPLY_TMPFS_TMP
);
628 bool unmanaged
= FLAGS_SET(mount_settings
, MOUNT_UNMANAGED
);
629 bool privileged
= FLAGS_SET(mount_settings
, MOUNT_PRIVILEGED
);
632 FOREACH_ELEMENT(m
, mount_table
) {
633 _cleanup_free_
char *where
= NULL
, *options
= NULL
, *prefixed
= NULL
;
634 bool fatal
= FLAGS_SET(m
->mount_settings
, MOUNT_FATAL
);
637 /* If we are in managed user namespace mode but the entry is marked for mount outside of
638 * managed user namespace mode, and to be mounted outside the user namespace, then skip it */
639 if (!unmanaged
&& FLAGS_SET(m
->mount_settings
, MOUNT_UNMANAGED
) && !FLAGS_SET(m
->mount_settings
, MOUNT_IN_USERNS
))
642 if (in_userns
!= FLAGS_SET(m
->mount_settings
, MOUNT_IN_USERNS
))
645 if (!netns
&& FLAGS_SET(m
->mount_settings
, MOUNT_APPLY_APIVFS_NETNS
))
648 if (!ro
&& FLAGS_SET(m
->mount_settings
, MOUNT_APPLY_APIVFS_RO
))
651 if (!tmpfs_tmp
&& FLAGS_SET(m
->mount_settings
, MOUNT_APPLY_TMPFS_TMP
))
654 if (!privileged
&& FLAGS_SET(m
->mount_settings
, MOUNT_PRIVILEGED
))
657 r
= chase(m
->where
, dest
, CHASE_NONEXISTENT
|CHASE_PREFIX_ROOT
, &where
, NULL
);
659 return log_error_errno(r
, "Failed to resolve %s%s: %m", strempty(dest
), m
->where
);
661 /* Skip this entry if it is not a remount. */
663 r
= path_is_mount_point(where
);
664 if (r
< 0 && r
!= -ENOENT
)
665 return log_error_errno(r
, "Failed to detect whether %s is a mount point: %m", where
);
670 if ((m
->mount_settings
& (MOUNT_MKDIR
|MOUNT_TOUCH
)) != 0) {
671 uid_t u
= (use_userns
&& !in_userns
) ? uid_shift
: UID_INVALID
;
673 if (FLAGS_SET(m
->mount_settings
, MOUNT_TOUCH
))
674 r
= mkdir_parents_safe(dest
, where
, 0755, u
, u
, 0);
676 r
= mkdir_p_safe(dest
, where
, 0755, u
, u
, 0);
677 if (r
< 0 && r
!= -EEXIST
) {
678 if (fatal
&& r
!= -EROFS
)
679 return log_error_errno(r
, "Failed to create directory %s: %m", where
);
681 log_debug_errno(r
, "Failed to create directory %s: %m", where
);
683 /* If we failed mkdir() or chown() due to the root directory being read only,
684 * attempt to mount this fs anyway and let mount_verbose log any errors */
690 if (FLAGS_SET(m
->mount_settings
, MOUNT_TOUCH
)) {
692 if (r
< 0 && r
!= -EEXIST
) {
693 if (fatal
&& r
!= -EROFS
)
694 return log_error_errno(r
, "Failed to create file %s: %m", where
);
696 log_debug_errno(r
, "Failed to create file %s: %m", where
);
703 if (streq_ptr(m
->type
, "tmpfs")) {
704 r
= tmpfs_patch_options(o
, in_userns
? 0 : uid_shift
, selinux_apifs_context
, &options
);
711 if (FLAGS_SET(m
->mount_settings
, MOUNT_USRQUOTA_GRACEFUL
)) {
712 r
= mount_option_supported(m
->type
, /* key= */ "usrquota", /* value= */ NULL
);
714 log_warning_errno(r
, "Failed to determine if '%s' supports 'usrquota', assuming it doesn't: %m", m
->type
);
716 log_debug("Kernel doesn't support 'usrquota' on '%s', not including in mount options for '%s'.", m
->type
, m
->where
);
718 _cleanup_free_
char *joined
= NULL
;
720 if (!strextend_with_separator(&joined
, ",", o
?: POINTER_MAX
, "usrquota"))
723 free_and_replace(options
, joined
);
728 if (FLAGS_SET(m
->mount_settings
, MOUNT_PREFIX_ROOT
)) {
729 /* Optionally prefix the mount source with the root dir. This is useful in bind
730 * mounts to be created within the container image before we transition into it. Note
731 * that MOUNT_IN_USERNS is run after we transitioned hence prefixing is not necessary
733 r
= chase(m
->what
, dest
, CHASE_PREFIX_ROOT
, &prefixed
, NULL
);
735 return log_error_errno(r
, "Failed to resolve %s%s: %m", strempty(dest
), m
->what
);
738 r
= mount_verbose_full(
739 fatal
? LOG_ERR
: LOG_DEBUG
,
745 FLAGS_SET(m
->mount_settings
, MOUNT_FOLLOW_SYMLINKS
));
753 static int parse_mount_bind_options(const char *options
, unsigned long *mount_flags
, char **mount_opts
, RemountIdmapping
*idmapping
) {
754 unsigned long flags
= *mount_flags
;
756 RemountIdmapping new_idmapping
= *idmapping
;
762 _cleanup_free_
char *word
= NULL
;
764 r
= extract_first_word(&options
, &word
, ",", 0);
766 return log_error_errno(r
, "Failed to extract mount option: %m");
770 if (streq(word
, "rbind"))
772 else if (streq(word
, "norbind"))
774 else if (streq(word
, "idmap"))
775 new_idmapping
= REMOUNT_IDMAPPING_HOST_ROOT
;
776 else if (streq(word
, "noidmap"))
777 new_idmapping
= REMOUNT_IDMAPPING_NONE
;
778 else if (streq(word
, "rootidmap"))
779 new_idmapping
= REMOUNT_IDMAPPING_HOST_OWNER
;
780 else if (streq(word
, "owneridmap"))
781 new_idmapping
= REMOUNT_IDMAPPING_HOST_OWNER_TO_TARGET_OWNER
;
783 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
784 "Invalid bind mount option: %s", word
);
787 *mount_flags
= flags
;
788 *idmapping
= new_idmapping
;
789 /* in the future mount_opts will hold string options for mount(2) */
795 static int mount_bind(const char *dest
, CustomMount
*m
, uid_t uid_shift
, uid_t uid_range
) {
796 _cleanup_free_
char *mount_opts
= NULL
, *where
= NULL
;
797 unsigned long mount_flags
= MS_BIND
| MS_REC
;
798 struct stat source_st
, dest_st
;
799 uid_t dest_uid
= UID_INVALID
;
801 RemountIdmapping idmapping
= REMOUNT_IDMAPPING_NONE
;
807 r
= parse_mount_bind_options(m
->options
, &mount_flags
, &mount_opts
, &idmapping
);
812 /* If this is a bind mount from a temporary sources change ownership of the source to the container's
813 * root UID. Otherwise it would always show up as "nobody" if user namespacing is used. */
814 if (m
->rm_rf_tmpdir
&& chown(m
->source
, uid_shift
, uid_shift
) < 0)
815 return log_error_errno(errno
, "Failed to chown %s: %m", m
->source
);
817 if (stat(m
->source
, &source_st
) < 0)
818 return log_error_errno(errno
, "Failed to stat %s: %m", m
->source
);
820 r
= chase(m
->destination
, dest
, CHASE_PREFIX_ROOT
|CHASE_NONEXISTENT
, &where
, NULL
);
822 return log_error_errno(r
, "Failed to resolve %s/%s: %m", dest
, m
->destination
);
823 if (r
> 0) { /* Path exists already? */
825 if (stat(where
, &dest_st
) < 0)
826 return log_error_errno(errno
, "Failed to stat %s: %m", where
);
828 dest_uid
= dest_st
.st_uid
;
830 if (S_ISDIR(source_st
.st_mode
) && !S_ISDIR(dest_st
.st_mode
))
831 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
832 "Cannot bind mount directory %s on file %s.",
835 if (!S_ISDIR(source_st
.st_mode
) && S_ISDIR(dest_st
.st_mode
))
836 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
837 "Cannot bind mount file %s on directory %s.",
840 } else { /* Path doesn't exist yet? */
841 r
= mkdir_parents_safe_label(dest
, where
, 0755, uid_shift
, uid_shift
, MKDIR_IGNORE_EXISTING
);
843 return log_error_errno(r
, "Failed to make parents of %s: %m", where
);
845 /* Create the mount point. Any non-directory file can be
846 * mounted on any non-directory file (regular, fifo, socket,
849 if (S_ISDIR(source_st
.st_mode
))
850 r
= mkdir_label(where
, 0755);
854 return log_error_errno(r
, "Failed to create mount point %s: %m", where
);
856 if (chown(where
, uid_shift
, uid_shift
) < 0)
857 return log_error_errno(errno
, "Failed to chown %s: %m", where
);
859 dest_uid
= uid_shift
;
862 r
= mount_nofollow_verbose(LOG_ERR
, m
->source
, where
, NULL
, mount_flags
, mount_opts
);
867 r
= bind_remount_recursive(where
, MS_RDONLY
, MS_RDONLY
, NULL
);
869 return log_error_errno(r
, "Read-only bind mount failed: %m");
872 if (idmapping
!= REMOUNT_IDMAPPING_NONE
) {
873 r
= remount_idmap(STRV_MAKE(where
), uid_shift
, uid_range
, source_st
.st_uid
, dest_uid
, idmapping
);
875 return log_error_errno(r
, "Failed to map ids for bind mount %s: %m", where
);
881 static int mount_tmpfs(const char *dest
, CustomMount
*m
, uid_t uid_shift
, const char *selinux_apifs_context
) {
883 _cleanup_free_
char *buf
= NULL
, *where
= NULL
;
889 r
= chase(m
->destination
, dest
, CHASE_PREFIX_ROOT
|CHASE_NONEXISTENT
, &where
, NULL
);
891 return log_error_errno(r
, "Failed to resolve %s/%s: %m", dest
, m
->destination
);
892 if (r
== 0) { /* Doesn't exist yet? */
893 r
= mkdir_p_label(where
, 0755);
895 return log_error_errno(r
, "Creating mount point for tmpfs %s failed: %m", where
);
898 r
= tmpfs_patch_options(m
->options
, uid_shift
== 0 ? UID_INVALID
: uid_shift
, selinux_apifs_context
, &buf
);
901 options
= r
> 0 ? buf
: m
->options
;
903 return mount_nofollow_verbose(LOG_ERR
, "tmpfs", where
, "tmpfs", MS_NODEV
|MS_STRICTATIME
, options
);
906 static char *joined_and_escaped_lower_dirs(char **lower
) {
907 _cleanup_strv_free_
char **sv
= NULL
;
909 sv
= strv_copy(lower
);
915 if (!strv_shell_escape(sv
, ",:"))
918 return strv_join(sv
, ":");
921 static int mount_overlay(const char *dest
, CustomMount
*m
) {
922 _cleanup_free_
char *lower
= NULL
, *where
= NULL
, *escaped_source
= NULL
;
929 r
= chase(m
->destination
, dest
, CHASE_PREFIX_ROOT
|CHASE_NONEXISTENT
, &where
, NULL
);
931 return log_error_errno(r
, "Failed to resolve %s/%s: %m", dest
, m
->destination
);
932 if (r
== 0) { /* Doesn't exist yet? */
933 r
= mkdir_label(where
, 0755);
935 return log_error_errno(r
, "Creating mount point for overlay %s failed: %m", where
);
938 (void) mkdir_p_label(m
->source
, 0755);
940 lower
= joined_and_escaped_lower_dirs(m
->lower
);
944 escaped_source
= shell_escape(m
->source
, ",:");
949 options
= strjoina("lowerdir=", escaped_source
, ":", lower
);
951 _cleanup_free_
char *escaped_work_dir
= NULL
;
953 escaped_work_dir
= shell_escape(m
->work_dir
, ",:");
954 if (!escaped_work_dir
)
957 options
= strjoina("lowerdir=", lower
, ",upperdir=", escaped_source
, ",workdir=", escaped_work_dir
);
960 return mount_nofollow_verbose(LOG_ERR
, "overlay", where
, "overlay", m
->read_only
? MS_RDONLY
: 0, options
);
963 static int mount_inaccessible(const char *dest
, CustomMount
*m
) {
964 _cleanup_free_
char *where
= NULL
, *source
= NULL
;
971 r
= chase_and_stat(m
->destination
, dest
, CHASE_PREFIX_ROOT
, &where
, &st
);
973 log_full_errno(m
->graceful
? LOG_DEBUG
: LOG_ERR
, r
, "Failed to resolve %s/%s: %m", dest
, m
->destination
);
974 return m
->graceful
? 0 : r
;
977 r
= mode_to_inaccessible_node(NULL
, st
.st_mode
, &source
);
979 return m
->graceful
? 0 : r
;
981 r
= mount_nofollow_verbose(m
->graceful
? LOG_DEBUG
: LOG_ERR
, source
, where
, NULL
, MS_BIND
, NULL
);
983 return m
->graceful
? 0 : r
;
985 r
= mount_nofollow_verbose(m
->graceful
? LOG_DEBUG
: LOG_ERR
, NULL
, where
, NULL
, MS_BIND
|MS_RDONLY
|MS_REMOUNT
, NULL
);
987 (void) umount_verbose(m
->graceful
? LOG_DEBUG
: LOG_ERR
, where
, UMOUNT_NOFOLLOW
);
988 return m
->graceful
? 0 : r
;
994 static int mount_arbitrary(const char *dest
, CustomMount
*m
) {
995 _cleanup_free_
char *where
= NULL
;
1001 r
= chase(m
->destination
, dest
, CHASE_PREFIX_ROOT
|CHASE_NONEXISTENT
, &where
, NULL
);
1003 return log_error_errno(r
, "Failed to resolve %s/%s: %m", dest
, m
->destination
);
1004 if (r
== 0) { /* Doesn't exist yet? */
1005 r
= mkdir_p_label(where
, 0755);
1007 return log_error_errno(r
, "Creating mount point for mount %s failed: %m", where
);
1010 return mount_nofollow_verbose(LOG_ERR
, m
->source
, where
, m
->type_argument
, 0, m
->options
);
1015 CustomMount
*mounts
, size_t n
,
1018 const char *selinux_apifs_context
,
1019 MountSettingsMask mount_settings
) {
1024 FOREACH_ARRAY(m
, mounts
, n
) {
1025 if (FLAGS_SET(mount_settings
, MOUNT_IN_USERNS
) != m
->in_userns
)
1028 if (FLAGS_SET(mount_settings
, MOUNT_ROOT_ONLY
) && !path_equal(m
->destination
, "/"))
1031 if (FLAGS_SET(mount_settings
, MOUNT_NON_ROOT_ONLY
) && path_equal(m
->destination
, "/"))
1036 case CUSTOM_MOUNT_BIND
:
1037 r
= mount_bind(dest
, m
, uid_shift
, uid_range
);
1040 case CUSTOM_MOUNT_TMPFS
:
1041 r
= mount_tmpfs(dest
, m
, uid_shift
, selinux_apifs_context
);
1044 case CUSTOM_MOUNT_OVERLAY
:
1045 r
= mount_overlay(dest
, m
);
1048 case CUSTOM_MOUNT_INACCESSIBLE
:
1049 r
= mount_inaccessible(dest
, m
);
1052 case CUSTOM_MOUNT_ARBITRARY
:
1053 r
= mount_arbitrary(dest
, m
);
1057 assert_not_reached();
1067 bool has_custom_root_mount(const CustomMount
*mounts
, size_t n
) {
1068 FOREACH_ARRAY(m
, mounts
, n
)
1069 if (path_equal(m
->destination
, "/"))
1075 static int setup_volatile_state(const char *directory
) {
1080 /* --volatile=state means we simply overmount /var with a tmpfs, and the rest read-only. */
1082 /* First, remount the root directory. */
1083 r
= bind_remount_recursive(directory
, MS_RDONLY
, MS_RDONLY
, NULL
);
1085 return log_error_errno(r
, "Failed to remount %s read-only: %m", directory
);
1090 static int setup_volatile_state_after_remount_idmap(const char *directory
, uid_t uid_shift
, const char *selinux_apifs_context
) {
1091 _cleanup_free_
char *buf
= NULL
;
1096 /* Then, after remount_idmap(), overmount /var/ with a tmpfs. */
1098 _cleanup_free_
char *p
= path_join(directory
, "/var");
1103 if (r
< 0 && errno
!= EEXIST
)
1104 return log_error_errno(errno
, "Failed to create %s: %m", directory
);
1106 const char *options
= "mode=0755" TMPFS_LIMITS_VOLATILE_STATE
;
1107 r
= tmpfs_patch_options(options
, uid_shift
== 0 ? UID_INVALID
: uid_shift
, selinux_apifs_context
, &buf
);
1113 return mount_nofollow_verbose(LOG_ERR
, "tmpfs", p
, "tmpfs", MS_STRICTATIME
, options
);
1116 static int setup_volatile_yes(const char *directory
, uid_t uid_shift
, const char *selinux_apifs_context
) {
1117 bool tmpfs_mounted
= false, bind_mounted
= false;
1118 _cleanup_(rmdir_and_freep
) char *template = NULL
;
1119 _cleanup_free_
char *buf
= NULL
, *bindir
= NULL
, *f
= NULL
, *t
= NULL
;
1125 /* --volatile=yes means we mount a tmpfs to the root dir, and the original /usr to use inside it, and
1126 * that read-only. Before we start setting this up let's validate if the image has the /usr merge
1127 * implemented, and let's output a friendly log message if it hasn't. */
1129 bindir
= path_join(directory
, "/bin");
1132 if (lstat(bindir
, &st
) < 0) {
1133 if (errno
!= ENOENT
)
1134 return log_error_errno(errno
, "Failed to stat /bin directory below image: %m");
1136 /* ENOENT is fine, just means the image is probably just a naked /usr and we can create the
1138 } else if (S_ISDIR(st
.st_mode
))
1139 return log_error_errno(SYNTHETIC_ERRNO(EISDIR
),
1140 "Sorry, --volatile=yes mode is not supported with OS images that have not merged /bin/, /sbin/, /lib/, /lib64/ into /usr/. "
1141 "Please work with your distribution and help them adopt the merged /usr scheme.");
1142 else if (!S_ISLNK(st
.st_mode
))
1143 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1144 "Error starting image: if --volatile=yes is used /bin must be a symlink (for merged /usr support) or non-existent (in which case a symlink is created automatically).");
1146 r
= mkdtemp_malloc("/tmp/nspawn-volatile-XXXXXX", &template);
1148 return log_error_errno(r
, "Failed to create temporary directory: %m");
1150 const char *options
= "mode=0755" TMPFS_LIMITS_ROOTFS
;
1151 r
= tmpfs_patch_options(options
, uid_shift
== 0 ? UID_INVALID
: uid_shift
, selinux_apifs_context
, &buf
);
1157 r
= mount_nofollow_verbose(LOG_ERR
, "tmpfs", template, "tmpfs", MS_STRICTATIME
, options
);
1161 tmpfs_mounted
= true;
1163 f
= path_join(directory
, "/usr");
1169 t
= path_join(template, "/usr");
1176 if (r
< 0 && errno
!= EEXIST
) {
1177 r
= log_error_errno(errno
, "Failed to create %s: %m", t
);
1181 r
= mount_nofollow_verbose(LOG_ERR
, f
, t
, NULL
, MS_BIND
|MS_REC
, NULL
);
1185 bind_mounted
= true;
1187 r
= bind_remount_recursive(t
, MS_RDONLY
, MS_RDONLY
, NULL
);
1189 log_error_errno(r
, "Failed to remount %s read-only: %m", t
);
1193 r
= mount_nofollow_verbose(LOG_ERR
, template, directory
, NULL
, MS_MOVE
, NULL
);
1197 (void) rmdir(template);
1203 (void) umount_verbose(LOG_ERR
, t
, UMOUNT_NOFOLLOW
);
1206 (void) umount_verbose(LOG_ERR
, template, UMOUNT_NOFOLLOW
);
1211 static int setup_volatile_overlay(const char *directory
, uid_t uid_shift
, const char *selinux_apifs_context
) {
1212 _cleanup_free_
char *buf
= NULL
, *escaped_directory
= NULL
, *escaped_upper
= NULL
, *escaped_work
= NULL
;
1213 _cleanup_(rmdir_and_freep
) char *template = NULL
;
1214 const char *upper
, *work
, *options
;
1215 bool tmpfs_mounted
= false;
1220 /* --volatile=overlay means we mount an overlayfs to the root dir. */
1222 r
= mkdtemp_malloc("/tmp/nspawn-volatile-XXXXXX", &template);
1224 return log_error_errno(r
, "Failed to create temporary directory: %m");
1226 options
= "mode=0755" TMPFS_LIMITS_ROOTFS
;
1227 r
= tmpfs_patch_options(options
, uid_shift
== 0 ? UID_INVALID
: uid_shift
, selinux_apifs_context
, &buf
);
1233 r
= mount_nofollow_verbose(LOG_ERR
, "tmpfs", template, "tmpfs", MS_STRICTATIME
, options
);
1237 tmpfs_mounted
= true;
1239 upper
= strjoina(template, "/upper");
1240 work
= strjoina(template, "/work");
1242 if (mkdir(upper
, 0755) < 0) {
1243 r
= log_error_errno(errno
, "Failed to create %s: %m", upper
);
1246 if (mkdir(work
, 0755) < 0) {
1247 r
= log_error_errno(errno
, "Failed to create %s: %m", work
);
1251 /* And now, let's overmount the root dir with an overlayfs that uses the root dir as lower dir. It's kinda nice
1252 * that the kernel allows us to do that without going through some mount point rearrangements. */
1254 escaped_directory
= shell_escape(directory
, ",:");
1255 escaped_upper
= shell_escape(upper
, ",:");
1256 escaped_work
= shell_escape(work
, ",:");
1257 if (!escaped_directory
|| !escaped_upper
|| !escaped_work
) {
1262 options
= strjoina("lowerdir=", escaped_directory
, ",upperdir=", escaped_upper
, ",workdir=", escaped_work
);
1263 r
= mount_nofollow_verbose(LOG_ERR
, "overlay", directory
, "overlay", 0, options
);
1267 (void) umount_verbose(LOG_ERR
, template, UMOUNT_NOFOLLOW
);
1272 int setup_volatile_mode(
1273 const char *directory
,
1276 const char *selinux_apifs_context
) {
1281 return setup_volatile_yes(directory
, uid_shift
, selinux_apifs_context
);
1283 case VOLATILE_STATE
:
1284 return setup_volatile_state(directory
);
1286 case VOLATILE_OVERLAY
:
1287 return setup_volatile_overlay(directory
, uid_shift
, selinux_apifs_context
);
1294 int setup_volatile_mode_after_remount_idmap(
1295 const char *directory
,
1298 const char *selinux_apifs_context
) {
1302 case VOLATILE_STATE
:
1303 return setup_volatile_state_after_remount_idmap(directory
, uid_shift
, selinux_apifs_context
);
1310 /* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
1311 int pivot_root_parse(char **pivot_root_new
, char **pivot_root_old
, const char *s
) {
1312 _cleanup_free_
char *root_new
= NULL
, *root_old
= NULL
;
1316 assert(pivot_root_new
);
1317 assert(pivot_root_old
);
1319 r
= extract_first_word(&p
, &root_new
, ":", EXTRACT_DONT_COALESCE_SEPARATORS
);
1328 root_old
= strdup(p
);
1333 if (!path_is_absolute(root_new
))
1335 if (root_old
&& !path_is_absolute(root_old
))
1338 free_and_replace(*pivot_root_new
, root_new
);
1339 free_and_replace(*pivot_root_old
, root_old
);
1344 int setup_pivot_root(const char *directory
, const char *pivot_root_new
, const char *pivot_root_old
) {
1345 _cleanup_free_
char *directory_pivot_root_new
= NULL
;
1346 _cleanup_free_
char *pivot_tmp_pivot_root_old
= NULL
;
1347 _cleanup_(rmdir_and_freep
) char *pivot_tmp
= NULL
;
1352 if (!pivot_root_new
)
1355 /* Pivot pivot_root_new to / and the existing / to pivot_root_old.
1356 * If pivot_root_old is NULL, the existing / disappears.
1357 * This requires a temporary directory, pivot_tmp, which is
1358 * not a child of either.
1360 * This is typically used for OSTree-style containers, where the root partition contains several
1361 * sysroots which could be run. Normally, one would be chosen by the bootloader and pivoted to / by
1364 * For example, for an OSTree deployment, pivot_root_new
1365 * would be: /ostree/deploy/$os/deploy/$checksum. Note that this
1366 * code doesn’t do the /var mount which OSTree expects: use
1367 * --bind +/sysroot/ostree/deploy/$os/var:/var for that.
1369 * So in the OSTree case, we’ll end up with something like:
1370 * - directory = /tmp/nspawn-root-123456
1371 * - pivot_root_new = /ostree/deploy/os/deploy/123abc
1372 * - pivot_root_old = /sysroot
1373 * - directory_pivot_root_new =
1374 * /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
1375 * - pivot_tmp = /tmp/nspawn-pivot-123456
1376 * - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
1378 * Requires all file systems at directory and below to be mounted
1379 * MS_PRIVATE or MS_SLAVE so they can be moved.
1381 directory_pivot_root_new
= path_join(directory
, pivot_root_new
);
1382 if (!directory_pivot_root_new
)
1385 /* Remount directory_pivot_root_new to make it movable. */
1386 r
= mount_nofollow_verbose(LOG_ERR
, directory_pivot_root_new
, directory_pivot_root_new
, NULL
, MS_BIND
, NULL
);
1390 if (pivot_root_old
) {
1391 r
= mkdtemp_malloc("/tmp/nspawn-pivot-XXXXXX", &pivot_tmp
);
1393 return log_error_errno(r
, "Failed to create temporary directory: %m");
1395 pivot_tmp_pivot_root_old
= path_join(pivot_tmp
, pivot_root_old
);
1396 if (!pivot_tmp_pivot_root_old
)
1399 r
= mount_nofollow_verbose(LOG_ERR
, directory_pivot_root_new
, pivot_tmp
, NULL
, MS_MOVE
, NULL
);
1403 r
= mount_nofollow_verbose(LOG_ERR
, directory
, pivot_tmp_pivot_root_old
, NULL
, MS_MOVE
, NULL
);
1407 r
= mount_nofollow_verbose(LOG_ERR
, pivot_tmp
, directory
, NULL
, MS_MOVE
, NULL
);
1409 r
= mount_nofollow_verbose(LOG_ERR
, directory_pivot_root_new
, directory
, NULL
, MS_MOVE
, NULL
);
1417 #define NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS "/run/host/proc"
1418 #define NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS "/run/host/sys"
1420 int pin_fully_visible_api_fs(void) {
1423 log_debug("Pinning fully visible API FS");
1425 (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS
, 0755);
1426 (void) mkdir_p(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS
, 0755);
1428 r
= mount_follow_verbose(LOG_ERR
, "proc", NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS
, "proc", PROC_DEFAULT_MOUNT_FLAGS
, NULL
);
1432 r
= mount_follow_verbose(LOG_ERR
, "sysfs", NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS
, "sysfs", SYS_DEFAULT_MOUNT_FLAGS
, NULL
);
1439 static int do_wipe_fully_visible_api_fs(void) {
1440 if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS
, MNT_DETACH
) < 0)
1441 return log_error_errno(errno
, "Failed to unmount temporary proc: %m");
1443 if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_PROCFS
) < 0)
1444 return log_error_errno(errno
, "Failed to remove temporary proc mountpoint: %m");
1446 if (umount2(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS
, MNT_DETACH
) < 0)
1447 return log_error_errno(errno
, "Failed to unmount temporary sys: %m");
1449 if (rmdir(NSPAWN_PRIVATE_FULLY_VISIBLE_SYSFS
) < 0)
1450 return log_error_errno(errno
, "Failed to remove temporary sys mountpoint: %m");
1455 int wipe_fully_visible_api_fs(int mntns_fd
) {
1456 _cleanup_close_
int orig_mntns_fd
= -EBADF
;
1459 log_debug("Wiping fully visible API FS");
1461 orig_mntns_fd
= namespace_open_by_type(NAMESPACE_MOUNT
);
1462 if (orig_mntns_fd
< 0)
1463 return log_error_errno(orig_mntns_fd
, "Failed to pin originating mount namespace: %m");
1465 r
= namespace_enter(/* pidns_fd = */ -EBADF
,
1467 /* netns_fd = */ -EBADF
,
1468 /* userns_fd = */ -EBADF
,
1469 /* root_fd = */ -EBADF
);
1471 return log_error_errno(r
, "Failed to enter mount namespace: %m");
1473 rr
= do_wipe_fully_visible_api_fs();
1475 r
= namespace_enter(/* pidns_fd = */ -EBADF
,
1477 /* netns_fd = */ -EBADF
,
1478 /* userns_fd = */ -EBADF
,
1479 /* root_fd = */ -EBADF
);
1481 return log_error_errno(r
, "Failed to enter original mount namespace: %m");