1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
6 #include "creds-util.h"
7 #include "exec-credential.h"
10 #include "glob-util.h"
12 #include "label-util.h"
13 #include "mkdir-label.h"
14 #include "mount-util.h"
16 #include "mountpoint-util.h"
17 #include "process-util.h"
18 #include "random-util.h"
19 #include "recurse-dir.h"
21 #include "tmpfile-util.h"
23 ExecSetCredential
*exec_set_credential_free(ExecSetCredential
*sc
) {
32 ExecLoadCredential
*exec_load_credential_free(ExecLoadCredential
*lc
) {
41 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
42 exec_set_credential_hash_ops
,
43 char, string_hash_func
, string_compare_func
,
44 ExecSetCredential
, exec_set_credential_free
);
46 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
47 exec_load_credential_hash_ops
,
48 char, string_hash_func
, string_compare_func
,
49 ExecLoadCredential
, exec_load_credential_free
);
51 bool exec_context_has_credentials(const ExecContext
*c
) {
54 return !hashmap_isempty(c
->set_credentials
) ||
55 !hashmap_isempty(c
->load_credentials
) ||
56 !set_isempty(c
->import_credentials
);
59 bool exec_context_has_encrypted_credentials(ExecContext
*c
) {
60 ExecLoadCredential
*load_cred
;
61 ExecSetCredential
*set_cred
;
65 HASHMAP_FOREACH(load_cred
, c
->load_credentials
)
66 if (load_cred
->encrypted
)
69 HASHMAP_FOREACH(set_cred
, c
->set_credentials
)
70 if (set_cred
->encrypted
)
76 static int get_credential_directory(
77 const char *runtime_prefix
,
85 if (!runtime_prefix
|| !unit
) {
90 p
= path_join(runtime_prefix
, "credentials", unit
);
98 int exec_context_get_credential_directory(
99 const ExecContext
*context
,
100 const ExecParameters
*params
,
109 if (!exec_context_has_credentials(context
)) {
114 return get_credential_directory(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], unit
, ret
);
117 int unit_add_default_credential_dependencies(Unit
*u
, const ExecContext
*c
) {
118 _cleanup_free_
char *p
= NULL
, *m
= NULL
;
124 if (!exec_context_has_credentials(c
))
127 /* Let's make sure the credentials directory of this service is unmounted *after* the service itself
128 * shuts down. This only matters if mount namespacing is not used for the service, and hence the
129 * credentials mount appears on the host. */
131 r
= get_credential_directory(u
->manager
->prefix
[EXEC_DIRECTORY_RUNTIME
], u
->id
, &p
);
135 r
= unit_name_from_path(p
, ".mount", &m
);
139 return unit_add_dependency_by_name(u
, UNIT_AFTER
, m
, /* add_reference= */ true, UNIT_DEPENDENCY_FILE
);
142 int exec_context_destroy_credentials(Unit
*u
) {
143 _cleanup_free_
char *p
= NULL
;
148 r
= get_credential_directory(u
->manager
->prefix
[EXEC_DIRECTORY_RUNTIME
], u
->id
, &p
);
152 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
153 * unmount it, and afterwards remove the mount point */
154 if (umount2(p
, MNT_DETACH
|UMOUNT_NOFOLLOW
) >= 0)
155 (void) mount_invalidate_state_by_path(u
->manager
, p
);
157 (void) rm_rf(p
, REMOVE_ROOT
|REMOVE_CHMOD
);
162 static int write_credential(
171 _cleanup_(unlink_and_freep
) char *tmp
= NULL
;
172 _cleanup_close_
int fd
= -EBADF
;
175 r
= tempfn_random_child("", "cred", &tmp
);
179 fd
= openat(dfd
, tmp
, O_CREAT
|O_RDWR
|O_CLOEXEC
|O_EXCL
|O_NOFOLLOW
|O_NOCTTY
, 0600);
185 r
= loop_write(fd
, data
, size
);
189 if (fchmod(fd
, 0400) < 0) /* Take away "w" bit */
192 if (uid_is_valid(uid
) && uid
!= getuid()) {
193 r
= fd_add_uid_acl_permission(fd
, uid
, ACL_READ
);
195 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
198 if (!ownership_ok
) /* Ideally we use ACLs, since we can neatly express what we want
199 * to express: that the user gets read access and nothing
200 * else. But if the backing fs can't support that (e.g. ramfs)
201 * then we can use file ownership instead. But that's only safe if
202 * we can then re-mount the whole thing read-only, so that the
203 * user can no longer chmod() the file to gain write access. */
206 if (fchown(fd
, uid
, gid
) < 0)
211 if (renameat(dfd
, tmp
, dfd
, id
) < 0)
218 typedef enum CredentialSearchPath
{
219 CREDENTIAL_SEARCH_PATH_TRUSTED
,
220 CREDENTIAL_SEARCH_PATH_ENCRYPTED
,
221 CREDENTIAL_SEARCH_PATH_ALL
,
222 _CREDENTIAL_SEARCH_PATH_MAX
,
223 _CREDENTIAL_SEARCH_PATH_INVALID
= -EINVAL
,
224 } CredentialSearchPath
;
226 static char **credential_search_path(const ExecParameters
*params
, CredentialSearchPath path
) {
228 _cleanup_strv_free_
char **l
= NULL
;
231 assert(path
>= 0 && path
< _CREDENTIAL_SEARCH_PATH_MAX
);
233 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
234 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
235 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
237 if (IN_SET(path
, CREDENTIAL_SEARCH_PATH_ENCRYPTED
, CREDENTIAL_SEARCH_PATH_ALL
)) {
238 if (strv_extend(&l
, params
->received_encrypted_credentials_directory
) < 0)
241 if (strv_extend_strv(&l
, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
245 if (IN_SET(path
, CREDENTIAL_SEARCH_PATH_TRUSTED
, CREDENTIAL_SEARCH_PATH_ALL
)) {
246 if (params
->received_credentials_directory
)
247 if (strv_extend(&l
, params
->received_credentials_directory
) < 0)
250 if (strv_extend_strv(&l
, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
255 _cleanup_free_
char *t
= strv_join(l
, ":");
257 log_debug("Credential search path is: %s", strempty(t
));
263 static int maybe_decrypt_and_write_credential(
274 _cleanup_free_
void *plaintext
= NULL
;
279 size_t plaintext_size
= 0;
281 r
= decrypt_credential_and_warn(id
, now(CLOCK_REALTIME
), NULL
, NULL
, data
, size
,
282 &plaintext
, &plaintext_size
);
287 size
= plaintext_size
;
290 add
= strlen(id
) + size
;
294 r
= write_credential(dir_fd
, id
, data
, size
, uid
, gid
, ownership_ok
);
296 return log_debug_errno(r
, "Failed to write credential '%s': %m", id
);
302 static int load_credential_glob(
306 ReadFullFileFlags flags
,
315 STRV_FOREACH(d
, search_path
) {
316 _cleanup_globfree_ glob_t pglob
= {};
317 _cleanup_free_
char *j
= NULL
;
319 j
= path_join(*d
, path
);
323 r
= safe_glob(j
, 0, &pglob
);
329 for (size_t n
= 0; n
< pglob
.gl_pathc
; n
++) {
330 _cleanup_free_
char *fn
= NULL
;
331 _cleanup_(erase_and_freep
) char *data
= NULL
;
334 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
335 r
= read_full_file_full(
339 encrypted
? CREDENTIAL_ENCRYPTED_SIZE_MAX
: CREDENTIAL_SIZE_MAX
,
344 return log_debug_errno(r
, "Failed to read credential '%s': %m",
347 r
= path_extract_filename(pglob
.gl_pathv
[n
], &fn
);
349 return log_debug_errno(r
, "Failed to extract filename from '%s': %m",
352 r
= maybe_decrypt_and_write_credential(
371 static int load_credential(
372 const ExecContext
*context
,
373 const ExecParameters
*params
,
385 ReadFullFileFlags flags
= READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
;
386 _cleanup_strv_free_
char **search_path
= NULL
;
387 _cleanup_(erase_and_freep
) char *data
= NULL
;
388 _cleanup_free_
char *bindname
= NULL
;
389 const char *source
= NULL
;
390 bool missing_ok
= true;
399 assert(read_dfd
>= 0 || read_dfd
== AT_FDCWD
);
400 assert(write_dfd
>= 0);
404 /* If a directory fd is specified, then read the file directly from that dir. In this case we
405 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
406 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
409 if (!filename_is_valid(path
)) /* safety check */
415 } else if (path_is_absolute(path
)) {
416 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
419 if (!path_is_valid(path
)) /* safety check */
422 flags
|= READ_FULL_FILE_CONNECT_SOCKET
;
424 /* Pass some minimal info about the unit and the credential name we are looking to acquire
425 * via the source socket address in case we read off an AF_UNIX socket. */
426 if (asprintf(&bindname
, "@%" PRIx64
"/unit/%s/%s", random_u64(), unit
, id
) < 0)
432 } else if (credential_name_valid(path
)) {
433 /* If this is a relative path, take it as credential name relative to the credentials
434 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
435 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
437 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_ALL
);
446 flags
|= READ_FULL_FILE_UNBASE64
;
448 maxsz
= encrypted
? CREDENTIAL_ENCRYPTED_SIZE_MAX
: CREDENTIAL_SIZE_MAX
;
451 STRV_FOREACH(d
, search_path
) {
452 _cleanup_free_
char *j
= NULL
;
454 j
= path_join(*d
, path
);
458 r
= read_full_file_full(
459 AT_FDCWD
, j
, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
469 r
= read_full_file_full(
479 if (r
== -ENOENT
&& (missing_ok
|| hashmap_contains(context
->set_credentials
, id
))) {
480 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
481 * will get clear errors if we don't pass such a missing credential on as they
482 * themselves will get ENOENT when trying to read them, which should not be much
483 * worse than when we handle the error here and make it fatal.
485 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
486 * we are fine, too. */
487 log_debug_errno(r
, "Couldn't read inherited credential '%s', skipping: %m", path
);
491 return log_debug_errno(r
, "Failed to read credential '%s': %m", path
);
493 return maybe_decrypt_and_write_credential(write_dfd
, id
, encrypted
, uid
, gid
, ownership_ok
, data
, size
, left
);
496 struct load_cred_args
{
497 const ExecContext
*context
;
498 const ExecParameters
*params
;
508 static int load_cred_recurse_dir_cb(
509 RecurseDirEvent event
,
513 const struct dirent
*de
,
514 const struct statx
*sx
,
517 struct load_cred_args
*args
= ASSERT_PTR(userdata
);
518 _cleanup_free_
char *sub_id
= NULL
;
521 if (event
!= RECURSE_DIR_ENTRY
)
522 return RECURSE_DIR_CONTINUE
;
524 if (!IN_SET(de
->d_type
, DT_REG
, DT_SOCK
))
525 return RECURSE_DIR_CONTINUE
;
527 sub_id
= strreplace(path
, "/", "_");
531 if (!credential_name_valid(sub_id
))
532 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Credential would get ID %s, which is not valid, refusing", sub_id
);
534 if (faccessat(args
->dfd
, sub_id
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0) {
535 log_debug("Skipping credential with duplicated ID %s at %s", sub_id
, path
);
536 return RECURSE_DIR_CONTINUE
;
539 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", sub_id
);
557 return RECURSE_DIR_CONTINUE
;
560 static int acquire_credentials(
561 const ExecContext
*context
,
562 const ExecParameters
*params
,
569 uint64_t left
= CREDENTIALS_TOTAL_SIZE_MAX
;
570 _cleanup_close_
int dfd
= -EBADF
;
572 ExecLoadCredential
*lc
;
573 ExecSetCredential
*sc
;
579 dfd
= open(p
, O_DIRECTORY
|O_CLOEXEC
);
583 r
= fd_acl_make_writable(dfd
); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
587 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
588 HASHMAP_FOREACH(lc
, context
->load_credentials
) {
589 _cleanup_close_
int sub_fd
= -EBADF
;
591 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
592 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
593 * a regular file. Finally, if it's a relative path we will use it as a credential name to
594 * propagate a credential passed to us from further up. */
596 if (path_is_absolute(lc
->path
)) {
597 sub_fd
= open(lc
->path
, O_DIRECTORY
|O_CLOEXEC
|O_RDONLY
);
598 if (sub_fd
< 0 && !IN_SET(errno
,
599 ENOTDIR
, /* Not a directory */
600 ENOENT
)) /* Doesn't exist? */
601 return log_debug_errno(errno
, "Failed to open '%s': %m", lc
->path
);
605 /* Regular file (incl. a credential passed in from higher up) */
623 /* path= */ lc
->id
, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
625 /* n_depth_max= */ UINT_MAX
,
626 RECURSE_DIR_SORT
|RECURSE_DIR_IGNORE_DOT
|RECURSE_DIR_ENSURE_TYPE
,
627 load_cred_recurse_dir_cb
,
628 &(struct load_cred_args
) {
631 .encrypted
= lc
->encrypted
,
636 .ownership_ok
= ownership_ok
,
643 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
644 * override any credentials found earlier. */
645 SET_FOREACH(ic
, context
->import_credentials
) {
646 _cleanup_free_
char **search_path
= NULL
;
648 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_TRUSTED
);
652 r
= load_credential_glob(
654 /* encrypted = */ false,
656 READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
,
665 search_path
= strv_free(search_path
);
666 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_ENCRYPTED
);
670 r
= load_credential_glob(
672 /* encrypted = */ true,
674 READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
|READ_FULL_FILE_UNBASE64
,
684 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
685 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
686 HASHMAP_FOREACH(sc
, context
->set_credentials
) {
687 _cleanup_(erase_and_freep
) void *plaintext
= NULL
;
691 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
692 * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
693 * slow and involved, hence it's nice to be able to skip that if the credential already
695 if (faccessat(dfd
, sc
->id
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0)
698 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", sc
->id
);
701 r
= decrypt_credential_and_warn(sc
->id
, now(CLOCK_REALTIME
), NULL
, NULL
, sc
->data
, sc
->size
, &plaintext
, &size
);
711 add
= strlen(sc
->id
) + size
;
715 r
= write_credential(dfd
, sc
->id
, data
, size
, uid
, gid
, ownership_ok
);
722 r
= fd_acl_make_read_only(dfd
); /* Now take away the "w" bit */
726 /* After we created all keys with the right perms, also make sure the credential store as a whole is
729 if (uid_is_valid(uid
) && uid
!= getuid()) {
730 r
= fd_add_uid_acl_permission(dfd
, uid
, ACL_READ
| ACL_EXECUTE
);
732 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
738 if (fchown(dfd
, uid
, gid
) < 0)
746 static int setup_credentials_internal(
747 const ExecContext
*context
,
748 const ExecParameters
*params
,
750 const char *final
, /* This is where the credential store shall eventually end up at */
751 const char *workspace
, /* This is where we can prepare it before moving it to the final place */
752 bool reuse_workspace
, /* Whether to reuse any existing workspace mount if it already is a mount */
753 bool must_mount
, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
757 int r
, workspace_mounted
; /* negative if we don't know yet whether we have/can mount something; true
758 * if we mounted something; false if we definitely can't mount anything */
766 if (reuse_workspace
) {
767 r
= path_is_mount_point(workspace
, NULL
, 0);
771 workspace_mounted
= true; /* If this is already a mount, and we are supposed to reuse
772 * it, let's keep this in mind */
774 workspace_mounted
= -1; /* We need to figure out if we can mount something to the workspace */
776 workspace_mounted
= -1; /* ditto */
778 r
= path_is_mount_point(final
, NULL
, 0);
782 /* If the final place already has something mounted, we use that. If the workspace also has
783 * something mounted we assume it's actually the same mount (but with MS_RDONLY
785 final_mounted
= true;
787 if (workspace_mounted
< 0) {
788 /* If the final place is mounted, but the workspace isn't, then let's bind mount
789 * the final version to the workspace, and make it writable, so that we can make
792 r
= mount_nofollow_verbose(LOG_DEBUG
, final
, workspace
, NULL
, MS_BIND
|MS_REC
, NULL
);
796 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, workspace
, NULL
, MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ false), NULL
);
800 workspace_mounted
= true;
803 final_mounted
= false;
805 if (workspace_mounted
< 0) {
806 /* Nothing is mounted on the workspace yet, let's try to mount something now */
808 r
= mount_credentials_fs(workspace
, CREDENTIALS_TOTAL_SIZE_MAX
, /* ro= */ false);
810 /* If that didn't work, try to make a bind mount from the final to the workspace, so
811 * that we can make it writable there. */
812 r
= mount_nofollow_verbose(LOG_DEBUG
, final
, workspace
, NULL
, MS_BIND
|MS_REC
, NULL
);
814 if (!ERRNO_IS_PRIVILEGE(r
))
815 /* Propagate anything that isn't a permission problem. */
819 /* If it's not OK to use the plain directory fallback, propagate all
823 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
824 * for compat with container envs, and just use the final dir as is. */
826 workspace_mounted
= false;
828 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
829 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, workspace
, NULL
, MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ false), NULL
);
833 workspace_mounted
= true;
836 workspace_mounted
= true;
839 assert(!must_mount
|| workspace_mounted
> 0);
840 where
= workspace_mounted
? workspace
: final
;
842 (void) label_fix_full(AT_FDCWD
, where
, final
, 0);
844 r
= acquire_credentials(context
, params
, unit
, where
, uid
, gid
, workspace_mounted
);
848 if (workspace_mounted
) {
851 /* Determine if we should actually install the prepared mount in the final location by bind
852 * mounting it there. We do so only if the mount is not established there already, and if the
853 * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
854 * case we are doing all this in a mount namespace, thus no one else will see that we
855 * allocated a file system we are getting rid of again here. */
857 install
= false; /* already installed */
859 r
= dir_is_empty(where
, /* ignore_hidden_or_backup= */ false);
863 install
= r
== 0; /* install only if non-empty */
867 /* Make workspace read-only now, so that any bind mount we make from it defaults to
869 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, workspace
, NULL
, MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ true), NULL
);
873 /* And mount it to the final place, read-only */
874 r
= mount_nofollow_verbose(LOG_DEBUG
, workspace
, final
, NULL
, MS_MOVE
, NULL
);
876 /* Otherwise get rid of it */
877 r
= umount_verbose(LOG_DEBUG
, workspace
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
881 _cleanup_free_
char *parent
= NULL
;
883 /* If we do not have our own mount put used the plain directory fallback, then we need to
884 * open access to the top-level credential directory and the per-service directory now */
886 r
= path_extract_directory(final
, &parent
);
889 if (chmod(parent
, 0755) < 0)
896 int exec_setup_credentials(
897 const ExecContext
*context
,
898 const ExecParameters
*params
,
903 _cleanup_free_
char *p
= NULL
, *q
= NULL
;
909 if (!exec_context_has_credentials(context
))
912 if (!params
->prefix
[EXEC_DIRECTORY_RUNTIME
])
915 /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
916 * and the subdir we mount over with a read-only file system readable by the service's user */
917 q
= path_join(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], "credentials");
921 r
= mkdir_label(q
, 0755); /* top-level dir: world readable/searchable */
922 if (r
< 0 && r
!= -EEXIST
)
925 p
= path_join(q
, unit
);
929 r
= mkdir_label(p
, 0700); /* per-unit dir: private to user */
930 if (r
< 0 && r
!= -EEXIST
)
933 r
= safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM
|FORK_WAIT
|FORK_NEW_MOUNTNS
, NULL
);
935 _cleanup_(rmdir_and_freep
) char *u
= NULL
; /* remove the temporary workspace if we can */
936 _cleanup_free_
char *t
= NULL
;
938 /* If this is not a privilege or support issue then propagate the error */
939 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
942 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
943 * it into place, so that users can't access half-initialized credential stores. */
944 t
= path_join(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], "systemd/temporary-credentials");
948 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
949 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
950 * after it is fully set up */
951 u
= path_join(t
, unit
);
955 FOREACH_STRING(i
, t
, u
) {
956 r
= mkdir_label(i
, 0700);
957 if (r
< 0 && r
!= -EEXIST
)
961 r
= setup_credentials_internal(
965 p
, /* final mount point */
966 u
, /* temporary workspace to overmount */
967 true, /* reuse the workspace if it is already a mount */
968 false, /* it's OK to fall back to a plain directory if we can't mount anything */
976 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
977 * we can use the same directory for all cases, after turning off propagation. Question
978 * though is: where do we turn off propagation exactly, and where do we place the workspace
979 * directory? We need some place that is guaranteed to be a mount point in the host, and
980 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
981 * since we ultimately want to move the resulting file system there, i.e. we need propagation
982 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
983 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
984 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
985 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
986 * propagation on the former, and then overmount the latter.
988 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
989 * for this purpose, but there are few other candidates that work equally well for us, and
990 * given that we do this in a privately namespaced short-lived single-threaded process that
991 * no one else sees this should be OK to do. */
993 /* Turn off propagation from our namespace to host */
994 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, "/dev", NULL
, MS_SLAVE
|MS_REC
, NULL
);
998 r
= setup_credentials_internal(
1002 p
, /* final mount point */
1003 "/dev/shm", /* temporary workspace to overmount */
1004 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1005 true, /* insist that something is mounted, do not allow fallback to plain directory */
1011 _exit(EXIT_SUCCESS
);
1014 _exit(EXIT_FAILURE
);
1017 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1018 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1019 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1020 * seen by users when trying access this inode. */