1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
6 #include "creds-util.h"
7 #include "exec-credential.h"
10 #include "glob-util.h"
12 #include "iovec-util.h"
13 #include "label-util.h"
14 #include "mkdir-label.h"
15 #include "mount-util.h"
16 #include "mountpoint-util.h"
17 #include "process-util.h"
18 #include "random-util.h"
19 #include "recurse-dir.h"
21 #include "tmpfile-util.h"
23 ExecSetCredential
*exec_set_credential_free(ExecSetCredential
*sc
) {
32 ExecLoadCredential
*exec_load_credential_free(ExecLoadCredential
*lc
) {
41 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
42 exec_set_credential_hash_ops
,
43 char, string_hash_func
, string_compare_func
,
44 ExecSetCredential
, exec_set_credential_free
);
46 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
47 exec_load_credential_hash_ops
,
48 char, string_hash_func
, string_compare_func
,
49 ExecLoadCredential
, exec_load_credential_free
);
51 bool exec_params_need_credentials(const ExecParameters
*p
) {
54 return p
->flags
& (EXEC_SETUP_CREDENTIALS
|EXEC_SETUP_CREDENTIALS_FRESH
);
57 bool exec_context_has_credentials(const ExecContext
*c
) {
60 return !hashmap_isempty(c
->set_credentials
) ||
61 !hashmap_isempty(c
->load_credentials
) ||
62 !set_isempty(c
->import_credentials
);
65 bool exec_context_has_encrypted_credentials(const ExecContext
*c
) {
68 const ExecLoadCredential
*load_cred
;
69 HASHMAP_FOREACH(load_cred
, c
->load_credentials
)
70 if (load_cred
->encrypted
)
73 const ExecSetCredential
*set_cred
;
74 HASHMAP_FOREACH(set_cred
, c
->set_credentials
)
75 if (set_cred
->encrypted
)
81 static int get_credential_directory(
82 const char *runtime_prefix
,
90 if (!runtime_prefix
|| !unit
) {
95 p
= path_join(runtime_prefix
, "credentials", unit
);
103 int exec_context_get_credential_directory(
104 const ExecContext
*context
,
105 const ExecParameters
*params
,
114 if (!exec_params_need_credentials(params
) || !exec_context_has_credentials(context
)) {
119 return get_credential_directory(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], unit
, ret
);
122 int exec_context_destroy_credentials(const ExecContext
*c
, const char *runtime_prefix
, const char *unit
) {
123 _cleanup_free_
char *p
= NULL
;
128 r
= get_credential_directory(runtime_prefix
, unit
, &p
);
132 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
133 * unmount it, and afterwards remove the mount point */
134 (void) umount2(p
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
135 (void) rm_rf(p
, REMOVE_ROOT
|REMOVE_CHMOD
);
140 static int write_credential(
149 _cleanup_(unlink_and_freep
) char *tmp
= NULL
;
150 _cleanup_close_
int fd
= -EBADF
;
155 assert(data
|| size
== 0);
157 r
= tempfn_random_child("", "cred", &tmp
);
161 fd
= openat(dfd
, tmp
, O_CREAT
|O_RDWR
|O_CLOEXEC
|O_EXCL
|O_NOFOLLOW
|O_NOCTTY
, 0600);
167 r
= loop_write(fd
, data
, size
);
171 if (fchmod(fd
, 0400) < 0) /* Take away "w" bit */
174 if (uid_is_valid(uid
) && uid
!= getuid()) {
175 r
= fd_add_uid_acl_permission(fd
, uid
, ACL_READ
);
177 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
180 if (!ownership_ok
) /* Ideally we use ACLs, since we can neatly express what we want
181 * to express: that the user gets read access and nothing
182 * else. But if the backing fs can't support that (e.g. ramfs)
183 * then we can use file ownership instead. But that's only safe if
184 * we can then re-mount the whole thing read-only, so that the
185 * user can no longer chmod() the file to gain write access. */
188 if (fchown(fd
, uid
, gid
) < 0)
193 if (renameat(dfd
, tmp
, dfd
, id
) < 0)
200 typedef enum CredentialSearchPath
{
201 CREDENTIAL_SEARCH_PATH_TRUSTED
,
202 CREDENTIAL_SEARCH_PATH_ENCRYPTED
,
203 CREDENTIAL_SEARCH_PATH_ALL
,
204 _CREDENTIAL_SEARCH_PATH_MAX
,
205 _CREDENTIAL_SEARCH_PATH_INVALID
= -EINVAL
,
206 } CredentialSearchPath
;
208 static char **credential_search_path(const ExecParameters
*params
, CredentialSearchPath path
) {
209 _cleanup_strv_free_
char **l
= NULL
;
212 assert(path
>= 0 && path
< _CREDENTIAL_SEARCH_PATH_MAX
);
214 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
215 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
216 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
218 if (IN_SET(path
, CREDENTIAL_SEARCH_PATH_ENCRYPTED
, CREDENTIAL_SEARCH_PATH_ALL
)) {
219 if (strv_extend(&l
, params
->received_encrypted_credentials_directory
) < 0)
222 if (strv_extend_strv(&l
, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
226 if (IN_SET(path
, CREDENTIAL_SEARCH_PATH_TRUSTED
, CREDENTIAL_SEARCH_PATH_ALL
)) {
227 if (strv_extend(&l
, params
->received_credentials_directory
) < 0)
230 if (strv_extend_strv(&l
, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
235 _cleanup_free_
char *t
= strv_join(l
, ":");
237 log_debug("Credential search path is: %s", strempty(t
));
243 static int maybe_decrypt_and_write_credential(
254 _cleanup_(iovec_done_erase
) struct iovec plaintext
= {};
263 r
= decrypt_credential_and_warn(
266 /* tpm2_device= */ NULL
,
267 /* tpm2_signature_path= */ NULL
,
269 &IOVEC_MAKE(data
, size
),
270 CREDENTIAL_ANY_SCOPE
,
275 data
= plaintext
.iov_base
;
276 size
= plaintext
.iov_len
;
279 add
= strlen(id
) + size
;
283 r
= write_credential(dir_fd
, id
, data
, size
, uid
, gid
, ownership_ok
);
285 return log_debug_errno(r
, "Failed to write credential '%s': %m", id
);
291 static int load_credential_glob(
294 char * const *search_path
,
295 ReadFullFileFlags flags
,
306 assert(write_dfd
>= 0);
309 STRV_FOREACH(d
, search_path
) {
310 _cleanup_globfree_ glob_t pglob
= {};
311 _cleanup_free_
char *j
= NULL
;
313 j
= path_join(*d
, path
);
317 r
= safe_glob(j
, 0, &pglob
);
323 FOREACH_ARRAY(p
, pglob
.gl_pathv
, pglob
.gl_pathc
) {
324 _cleanup_free_
char *fn
= NULL
;
325 _cleanup_(erase_and_freep
) char *data
= NULL
;
328 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
329 r
= read_full_file_full(
333 encrypted
? CREDENTIAL_ENCRYPTED_SIZE_MAX
: CREDENTIAL_SIZE_MAX
,
338 return log_debug_errno(r
, "Failed to read credential '%s': %m", *p
);
340 r
= path_extract_filename(*p
, &fn
);
342 return log_debug_errno(r
, "Failed to extract filename from '%s': %m", *p
);
344 r
= maybe_decrypt_and_write_credential(
363 static int load_credential(
364 const ExecContext
*context
,
365 const ExecParameters
*params
,
377 ReadFullFileFlags flags
= READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
;
378 _cleanup_strv_free_
char **search_path
= NULL
;
379 _cleanup_(erase_and_freep
) char *data
= NULL
;
380 _cleanup_free_
char *bindname
= NULL
;
381 const char *source
= NULL
;
382 bool missing_ok
= true;
391 assert(read_dfd
>= 0 || read_dfd
== AT_FDCWD
);
392 assert(write_dfd
>= 0);
396 /* If a directory fd is specified, then read the file directly from that dir. In this case we
397 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
398 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
401 if (!filename_is_valid(path
)) /* safety check */
407 } else if (path_is_absolute(path
)) {
408 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
411 if (!path_is_valid(path
)) /* safety check */
414 flags
|= READ_FULL_FILE_CONNECT_SOCKET
;
416 /* Pass some minimal info about the unit and the credential name we are looking to acquire
417 * via the source socket address in case we read off an AF_UNIX socket. */
418 if (asprintf(&bindname
, "@%" PRIx64
"/unit/%s/%s", random_u64(), unit
, id
) < 0)
424 } else if (credential_name_valid(path
)) {
425 /* If this is a relative path, take it as credential name relative to the credentials
426 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
427 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
429 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_ALL
);
438 flags
|= READ_FULL_FILE_UNBASE64
;
440 maxsz
= encrypted
? CREDENTIAL_ENCRYPTED_SIZE_MAX
: CREDENTIAL_SIZE_MAX
;
443 STRV_FOREACH(d
, search_path
) {
444 _cleanup_free_
char *j
= NULL
;
446 j
= path_join(*d
, path
);
450 r
= read_full_file_full(
451 AT_FDCWD
, j
, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
461 r
= read_full_file_full(
471 if (r
== -ENOENT
&& (missing_ok
|| hashmap_contains(context
->set_credentials
, id
))) {
472 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
473 * will get clear errors if we don't pass such a missing credential on as they
474 * themselves will get ENOENT when trying to read them, which should not be much
475 * worse than when we handle the error here and make it fatal.
477 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
478 * we are fine, too. */
479 log_full_errno(hashmap_contains(context
->set_credentials
, id
) ? LOG_DEBUG
: LOG_INFO
,
480 r
, "Couldn't read inherited credential '%s', skipping: %m", path
);
484 return log_debug_errno(r
, "Failed to read credential '%s': %m", path
);
486 return maybe_decrypt_and_write_credential(write_dfd
, id
, encrypted
, uid
, gid
, ownership_ok
, data
, size
, left
);
489 struct load_cred_args
{
490 const ExecContext
*context
;
491 const ExecParameters
*params
;
501 static int load_cred_recurse_dir_cb(
502 RecurseDirEvent event
,
506 const struct dirent
*de
,
507 const struct statx
*sx
,
510 struct load_cred_args
*args
= ASSERT_PTR(userdata
);
511 _cleanup_free_
char *sub_id
= NULL
;
517 if (event
!= RECURSE_DIR_ENTRY
)
518 return RECURSE_DIR_CONTINUE
;
520 if (!IN_SET(de
->d_type
, DT_REG
, DT_SOCK
))
521 return RECURSE_DIR_CONTINUE
;
523 sub_id
= strreplace(path
, "/", "_");
527 if (!credential_name_valid(sub_id
))
528 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Credential would get ID %s, which is not valid, refusing", sub_id
);
530 if (faccessat(args
->dfd
, sub_id
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0) {
531 log_debug("Skipping credential with duplicated ID %s at %s", sub_id
, path
);
532 return RECURSE_DIR_CONTINUE
;
535 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", sub_id
);
553 return RECURSE_DIR_CONTINUE
;
556 static int acquire_credentials(
557 const ExecContext
*context
,
558 const ExecParameters
*params
,
565 uint64_t left
= CREDENTIALS_TOTAL_SIZE_MAX
;
566 _cleanup_close_
int dfd
= -EBADF
;
568 ExecLoadCredential
*lc
;
569 ExecSetCredential
*sc
;
577 dfd
= open(p
, O_DIRECTORY
|O_CLOEXEC
);
581 r
= fd_acl_make_writable(dfd
); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
585 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
586 HASHMAP_FOREACH(lc
, context
->load_credentials
) {
587 _cleanup_close_
int sub_fd
= -EBADF
;
589 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
590 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
591 * a regular file. Finally, if it's a relative path we will use it as a credential name to
592 * propagate a credential passed to us from further up. */
594 if (path_is_absolute(lc
->path
)) {
595 sub_fd
= open(lc
->path
, O_DIRECTORY
|O_CLOEXEC
|O_RDONLY
);
596 if (sub_fd
< 0 && !IN_SET(errno
,
597 ENOTDIR
, /* Not a directory */
598 ENOENT
)) /* Doesn't exist? */
599 return log_debug_errno(errno
, "Failed to open '%s': %m", lc
->path
);
603 /* Regular file (incl. a credential passed in from higher up) */
619 r
= recurse_dir(sub_fd
,
620 /* path= */ lc
->id
, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
622 /* n_depth_max= */ UINT_MAX
,
623 RECURSE_DIR_SORT
|RECURSE_DIR_IGNORE_DOT
|RECURSE_DIR_ENSURE_TYPE
,
624 load_cred_recurse_dir_cb
,
625 &(struct load_cred_args
) {
628 .encrypted
= lc
->encrypted
,
633 .ownership_ok
= ownership_ok
,
640 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
641 * override any credentials found earlier. */
642 SET_FOREACH(ic
, context
->import_credentials
) {
643 _cleanup_free_
char **search_path
= NULL
;
645 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_TRUSTED
);
649 r
= load_credential_glob(
651 /* encrypted = */ false,
653 READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
,
662 search_path
= strv_free(search_path
);
663 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_ENCRYPTED
);
667 r
= load_credential_glob(
669 /* encrypted = */ true,
671 READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
|READ_FULL_FILE_UNBASE64
,
681 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
682 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
683 HASHMAP_FOREACH(sc
, context
->set_credentials
) {
684 _cleanup_(iovec_done_erase
) struct iovec plaintext
= {};
688 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
689 * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
690 * slow and involved, hence it's nice to be able to skip that if the credential already
692 if (faccessat(dfd
, sc
->id
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0)
695 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", sc
->id
);
698 r
= decrypt_credential_and_warn(
701 /* tpm2_device= */ NULL
,
702 /* tpm2_signature_path= */ NULL
,
704 &IOVEC_MAKE(sc
->data
, sc
->size
),
705 CREDENTIAL_ANY_SCOPE
,
710 data
= plaintext
.iov_base
;
711 size
= plaintext
.iov_len
;
717 add
= strlen(sc
->id
) + size
;
721 r
= write_credential(dfd
, sc
->id
, data
, size
, uid
, gid
, ownership_ok
);
728 r
= fd_acl_make_read_only(dfd
); /* Now take away the "w" bit */
732 /* After we created all keys with the right perms, also make sure the credential store as a whole is
735 if (uid_is_valid(uid
) && uid
!= getuid()) {
736 r
= fd_add_uid_acl_permission(dfd
, uid
, ACL_READ
| ACL_EXECUTE
);
738 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
744 if (fchown(dfd
, uid
, gid
) < 0)
752 static int setup_credentials_internal(
753 const ExecContext
*context
,
754 const ExecParameters
*params
,
756 const char *final
, /* This is where the credential store shall eventually end up at */
757 const char *workspace
, /* This is where we can prepare it before moving it to the final place */
758 bool reuse_workspace
, /* Whether to reuse any existing workspace mount if it already is a mount */
759 bool must_mount
, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
764 int r
, workspace_mounted
; /* negative if we don't know yet whether we have/can mount something; true
765 * if we mounted something; false if we definitely can't mount anything */
773 r
= path_is_mount_point(final
);
776 final_mounted
= r
> 0;
779 if (FLAGS_SET(params
->flags
, EXEC_SETUP_CREDENTIALS_FRESH
)) {
780 r
= umount_verbose(LOG_DEBUG
, final
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
784 final_mounted
= false;
786 /* We can reuse the previous credential dir */
787 r
= dir_is_empty(final
, /* ignore_hidden_or_backup = */ false);
791 log_debug("Credential dir for unit '%s' already set up, skipping.", unit
);
797 if (reuse_workspace
) {
798 r
= path_is_mount_point(workspace
);
802 workspace_mounted
= true; /* If this is already a mount, and we are supposed to reuse
803 * it, let's keep this in mind */
805 workspace_mounted
= -1; /* We need to figure out if we can mount something to the workspace */
807 workspace_mounted
= -1; /* ditto */
809 /* If both the final place and the workspace are mounted, we have no mounts to set up, based on
810 * the assumption that they're actually the same tmpfs (but the latter with MS_RDONLY different).
811 * If the workspace is not mounted, we just bind the final place over and make it writable. */
812 must_mount
= must_mount
|| final_mounted
;
814 if (workspace_mounted
< 0) {
816 /* Nothing is mounted on the workspace yet, let's try to mount a new tmpfs if
817 * not using the final place. */
818 r
= mount_credentials_fs(workspace
, CREDENTIALS_TOTAL_SIZE_MAX
, /* ro= */ false);
819 if (final_mounted
|| r
< 0) {
820 /* If using final place or failed to mount new tmpfs, make a bind mount from
821 * the final to the workspace, so that we can make it writable there. */
822 r
= mount_nofollow_verbose(LOG_DEBUG
, final
, workspace
, NULL
, MS_BIND
|MS_REC
, NULL
);
824 if (!ERRNO_IS_PRIVILEGE(r
))
825 /* Propagate anything that isn't a permission problem. */
829 /* If it's not OK to use the plain directory fallback, propagate all
833 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
834 * for compat with container envs, and just use the final dir as is.
835 * Final place must not be mounted in this case (refused by must_mount
838 workspace_mounted
= false;
840 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
841 r
= mount_nofollow_verbose(LOG_DEBUG
,
845 MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ false),
850 workspace_mounted
= true;
853 workspace_mounted
= true;
856 assert(workspace_mounted
>= 0);
857 assert(!must_mount
|| workspace_mounted
);
859 const char *where
= workspace_mounted
? workspace
: final
;
861 (void) label_fix_full(AT_FDCWD
, where
, final
, 0);
863 r
= acquire_credentials(context
, params
, unit
, where
, uid
, gid
, workspace_mounted
);
865 /* If we're using final place as workspace, and failed to acquire credentials, we might
866 * have left half-written creds there. Let's get rid of the whole mount, so future
867 * calls won't reuse it. */
869 (void) umount_verbose(LOG_DEBUG
, final
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
874 if (workspace_mounted
) {
875 if (!final_mounted
) {
876 /* Make workspace read-only now, so that any bind mount we make from it defaults to
878 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, workspace
, NULL
, MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ true), NULL
);
882 /* And mount it to the final place, read-only */
883 r
= mount_nofollow_verbose(LOG_DEBUG
, workspace
, final
, NULL
, MS_MOVE
, NULL
);
885 /* Otherwise we just get rid of the bind mount of final place */
886 r
= umount_verbose(LOG_DEBUG
, workspace
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
890 _cleanup_free_
char *parent
= NULL
;
892 /* If we do not have our own mount put used the plain directory fallback, then we need to
893 * open access to the top-level credential directory and the per-service directory now */
895 r
= path_extract_directory(final
, &parent
);
898 if (chmod(parent
, 0755) < 0)
905 int exec_setup_credentials(
906 const ExecContext
*context
,
907 const ExecParameters
*params
,
912 _cleanup_free_
char *p
= NULL
, *q
= NULL
;
919 if (!exec_params_need_credentials(params
) || !exec_context_has_credentials(context
))
922 if (!params
->prefix
[EXEC_DIRECTORY_RUNTIME
])
925 /* This is where we'll place stuff when we are done; the main credentials directory is world-readable,
926 * and the subdir we mount over with a read-only file system readable by the service's user. */
927 q
= path_join(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], "credentials");
931 r
= mkdir_label(q
, 0755); /* top-level dir: world readable/searchable */
932 if (r
< 0 && r
!= -EEXIST
)
935 p
= path_join(q
, unit
);
939 r
= mkdir_label(p
, 0700); /* per-unit dir: private to user */
940 if (r
< 0 && r
!= -EEXIST
)
943 r
= safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM
|FORK_WAIT
|FORK_NEW_MOUNTNS
, NULL
);
945 _cleanup_(rmdir_and_freep
) char *u
= NULL
; /* remove the temporary workspace if we can */
946 _cleanup_free_
char *t
= NULL
;
948 /* If this is not a privilege or support issue then propagate the error */
949 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
952 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
953 * it into place, so that users can't access half-initialized credential stores. */
954 t
= path_join(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], "systemd/temporary-credentials");
958 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
959 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
960 * after it is fully set up */
961 u
= path_join(t
, unit
);
965 FOREACH_STRING(i
, t
, u
) {
966 r
= mkdir_label(i
, 0700);
967 if (r
< 0 && r
!= -EEXIST
)
971 r
= setup_credentials_internal(
975 p
, /* final mount point */
976 u
, /* temporary workspace to overmount */
977 true, /* reuse the workspace if it is already a mount */
978 false, /* it's OK to fall back to a plain directory if we can't mount anything */
986 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
987 * we can use the same directory for all cases, after turning off propagation. Question
988 * though is: where do we turn off propagation exactly, and where do we place the workspace
989 * directory? We need some place that is guaranteed to be a mount point in the host, and
990 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
991 * since we ultimately want to move the resulting file system there, i.e. we need propagation
992 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
993 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
994 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
995 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
996 * propagation on the former, and then overmount the latter.
998 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
999 * for this purpose, but there are few other candidates that work equally well for us, and
1000 * given that we do this in a privately namespaced short-lived single-threaded process that
1001 * no one else sees this should be OK to do. */
1003 /* Turn off propagation from our namespace to host */
1004 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, "/dev", NULL
, MS_SLAVE
|MS_REC
, NULL
);
1008 r
= setup_credentials_internal(
1012 p
, /* final mount point */
1013 "/dev/shm", /* temporary workspace to overmount */
1014 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1015 true, /* insist that something is mounted, do not allow fallback to plain directory */
1021 _exit(EXIT_SUCCESS
);
1024 _exit(EXIT_FAILURE
);
1027 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1028 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1029 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1030 * seen by users when trying access this inode. */