1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
6 #include "credential.h"
7 #include "creds-util.h"
10 #include "glob-util.h"
12 #include "label-util.h"
13 #include "mkdir-label.h"
14 #include "mount-util.h"
15 #include "mountpoint-util.h"
16 #include "process-util.h"
17 #include "random-util.h"
18 #include "recurse-dir.h"
20 #include "tmpfile-util.h"
22 ExecSetCredential
*exec_set_credential_free(ExecSetCredential
*sc
) {
31 ExecLoadCredential
*exec_load_credential_free(ExecLoadCredential
*lc
) {
40 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
41 exec_set_credential_hash_ops
,
42 char, string_hash_func
, string_compare_func
,
43 ExecSetCredential
, exec_set_credential_free
);
45 DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
46 exec_load_credential_hash_ops
,
47 char, string_hash_func
, string_compare_func
,
48 ExecLoadCredential
, exec_load_credential_free
);
50 bool exec_context_has_credentials(const ExecContext
*c
) {
53 return !hashmap_isempty(c
->set_credentials
) ||
54 !hashmap_isempty(c
->load_credentials
) ||
55 !set_isempty(c
->import_credentials
);
58 bool exec_context_has_encrypted_credentials(ExecContext
*c
) {
59 ExecLoadCredential
*load_cred
;
60 ExecSetCredential
*set_cred
;
64 HASHMAP_FOREACH(load_cred
, c
->load_credentials
)
65 if (load_cred
->encrypted
)
68 HASHMAP_FOREACH(set_cred
, c
->set_credentials
)
69 if (set_cred
->encrypted
)
75 static int get_credential_directory(
76 const char *runtime_prefix
,
84 if (!runtime_prefix
|| !unit
) {
89 p
= path_join(runtime_prefix
, "credentials", unit
);
97 int unit_add_default_credential_dependencies(Unit
*u
, const ExecContext
*c
) {
98 _cleanup_free_
char *p
= NULL
, *m
= NULL
;
104 if (!exec_context_has_credentials(c
))
107 /* Let's make sure the credentials directory of this service is unmounted *after* the service itself
108 * shuts down. This only matters if mount namespacing is not used for the service, and hence the
109 * credentials mount appears on the host. */
111 r
= get_credential_directory(u
->manager
->prefix
[EXEC_DIRECTORY_RUNTIME
], u
->id
, &p
);
115 r
= unit_name_from_path(p
, ".mount", &m
);
119 return unit_add_dependency_by_name(u
, UNIT_AFTER
, m
, /* add_reference= */ true, UNIT_DEPENDENCY_FILE
);
122 int exec_context_destroy_credentials(const ExecContext
*c
, const char *runtime_prefix
, const char *unit
) {
123 _cleanup_free_
char *p
= NULL
;
128 r
= get_credential_directory(runtime_prefix
, unit
, &p
);
132 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
133 * unmount it, and afterwards remove the mount point */
134 (void) umount2(p
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
135 (void) rm_rf(p
, REMOVE_ROOT
|REMOVE_CHMOD
);
140 static int write_credential(
149 _cleanup_(unlink_and_freep
) char *tmp
= NULL
;
150 _cleanup_close_
int fd
= -EBADF
;
153 r
= tempfn_random_child("", "cred", &tmp
);
157 fd
= openat(dfd
, tmp
, O_CREAT
|O_RDWR
|O_CLOEXEC
|O_EXCL
|O_NOFOLLOW
|O_NOCTTY
, 0600);
163 r
= loop_write(fd
, data
, size
, /* do_poll = */ false);
167 if (fchmod(fd
, 0400) < 0) /* Take away "w" bit */
170 if (uid_is_valid(uid
) && uid
!= getuid()) {
171 r
= fd_add_uid_acl_permission(fd
, uid
, ACL_READ
);
173 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
176 if (!ownership_ok
) /* Ideally we use ACLs, since we can neatly express what we want
177 * to express: that the user gets read access and nothing
178 * else. But if the backing fs can't support that (e.g. ramfs)
179 * then we can use file ownership instead. But that's only safe if
180 * we can then re-mount the whole thing read-only, so that the
181 * user can no longer chmod() the file to gain write access. */
184 if (fchown(fd
, uid
, gid
) < 0)
189 if (renameat(dfd
, tmp
, dfd
, id
) < 0)
196 typedef enum CredentialSearchPath
{
197 CREDENTIAL_SEARCH_PATH_TRUSTED
,
198 CREDENTIAL_SEARCH_PATH_ENCRYPTED
,
199 CREDENTIAL_SEARCH_PATH_ALL
,
200 _CREDENTIAL_SEARCH_PATH_MAX
,
201 _CREDENTIAL_SEARCH_PATH_INVALID
= -EINVAL
,
202 } CredentialSearchPath
;
204 static char **credential_search_path(const ExecParameters
*params
, CredentialSearchPath path
) {
206 _cleanup_strv_free_
char **l
= NULL
;
209 assert(path
>= 0 && path
< _CREDENTIAL_SEARCH_PATH_MAX
);
211 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
212 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
213 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
215 if (IN_SET(path
, CREDENTIAL_SEARCH_PATH_ENCRYPTED
, CREDENTIAL_SEARCH_PATH_ALL
)) {
216 if (strv_extend(&l
, params
->received_encrypted_credentials_directory
) < 0)
219 if (strv_extend_strv(&l
, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
223 if (IN_SET(path
, CREDENTIAL_SEARCH_PATH_TRUSTED
, CREDENTIAL_SEARCH_PATH_ALL
)) {
224 if (params
->received_credentials_directory
)
225 if (strv_extend(&l
, params
->received_credentials_directory
) < 0)
228 if (strv_extend_strv(&l
, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
233 _cleanup_free_
char *t
= strv_join(l
, ":");
235 log_debug("Credential search path is: %s", strempty(t
));
241 static int maybe_decrypt_and_write_credential(
252 _cleanup_free_
void *plaintext
= NULL
;
257 size_t plaintext_size
= 0;
259 r
= decrypt_credential_and_warn(id
, now(CLOCK_REALTIME
), NULL
, NULL
, data
, size
,
260 &plaintext
, &plaintext_size
);
265 size
= plaintext_size
;
268 add
= strlen(id
) + size
;
272 r
= write_credential(dir_fd
, id
, data
, size
, uid
, gid
, ownership_ok
);
274 return log_debug_errno(r
, "Failed to write credential '%s': %m", id
);
280 static int load_credential_glob(
284 ReadFullFileFlags flags
,
293 STRV_FOREACH(d
, search_path
) {
294 _cleanup_globfree_ glob_t pglob
= {};
295 _cleanup_free_
char *j
= NULL
;
297 j
= path_join(*d
, path
);
301 r
= safe_glob(j
, 0, &pglob
);
307 for (size_t n
= 0; n
< pglob
.gl_pathc
; n
++) {
308 _cleanup_free_
char *fn
= NULL
;
309 _cleanup_(erase_and_freep
) char *data
= NULL
;
312 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
313 r
= read_full_file_full(
317 encrypted
? CREDENTIAL_ENCRYPTED_SIZE_MAX
: CREDENTIAL_SIZE_MAX
,
322 return log_debug_errno(r
, "Failed to read credential '%s': %m",
325 r
= path_extract_filename(pglob
.gl_pathv
[n
], &fn
);
327 return log_debug_errno(r
, "Failed to extract filename from '%s': %m",
330 r
= maybe_decrypt_and_write_credential(
349 static int load_credential(
350 const ExecContext
*context
,
351 const ExecParameters
*params
,
363 ReadFullFileFlags flags
= READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
;
364 _cleanup_strv_free_
char **search_path
= NULL
;
365 _cleanup_(erase_and_freep
) char *data
= NULL
;
366 _cleanup_free_
char *bindname
= NULL
;
367 const char *source
= NULL
;
368 bool missing_ok
= true;
377 assert(read_dfd
>= 0 || read_dfd
== AT_FDCWD
);
378 assert(write_dfd
>= 0);
382 /* If a directory fd is specified, then read the file directly from that dir. In this case we
383 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
384 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
387 if (!filename_is_valid(path
)) /* safety check */
393 } else if (path_is_absolute(path
)) {
394 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
397 if (!path_is_valid(path
)) /* safety check */
400 flags
|= READ_FULL_FILE_CONNECT_SOCKET
;
402 /* Pass some minimal info about the unit and the credential name we are looking to acquire
403 * via the source socket address in case we read off an AF_UNIX socket. */
404 if (asprintf(&bindname
, "@%" PRIx64
"/unit/%s/%s", random_u64(), unit
, id
) < 0)
410 } else if (credential_name_valid(path
)) {
411 /* If this is a relative path, take it as credential name relative to the credentials
412 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
413 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
415 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_ALL
);
424 flags
|= READ_FULL_FILE_UNBASE64
;
426 maxsz
= encrypted
? CREDENTIAL_ENCRYPTED_SIZE_MAX
: CREDENTIAL_SIZE_MAX
;
429 STRV_FOREACH(d
, search_path
) {
430 _cleanup_free_
char *j
= NULL
;
432 j
= path_join(*d
, path
);
436 r
= read_full_file_full(
437 AT_FDCWD
, j
, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
447 r
= read_full_file_full(
457 if (r
== -ENOENT
&& (missing_ok
|| hashmap_contains(context
->set_credentials
, id
))) {
458 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
459 * will get clear errors if we don't pass such a missing credential on as they
460 * themselves will get ENOENT when trying to read them, which should not be much
461 * worse than when we handle the error here and make it fatal.
463 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
464 * we are fine, too. */
465 log_debug_errno(r
, "Couldn't read inherited credential '%s', skipping: %m", path
);
469 return log_debug_errno(r
, "Failed to read credential '%s': %m", path
);
471 return maybe_decrypt_and_write_credential(write_dfd
, id
, encrypted
, uid
, gid
, ownership_ok
, data
, size
, left
);
474 struct load_cred_args
{
475 const ExecContext
*context
;
476 const ExecParameters
*params
;
486 static int load_cred_recurse_dir_cb(
487 RecurseDirEvent event
,
491 const struct dirent
*de
,
492 const struct statx
*sx
,
495 struct load_cred_args
*args
= ASSERT_PTR(userdata
);
496 _cleanup_free_
char *sub_id
= NULL
;
499 if (event
!= RECURSE_DIR_ENTRY
)
500 return RECURSE_DIR_CONTINUE
;
502 if (!IN_SET(de
->d_type
, DT_REG
, DT_SOCK
))
503 return RECURSE_DIR_CONTINUE
;
505 sub_id
= strreplace(path
, "/", "_");
509 if (!credential_name_valid(sub_id
))
510 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Credential would get ID %s, which is not valid, refusing", sub_id
);
512 if (faccessat(args
->dfd
, sub_id
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0) {
513 log_debug("Skipping credential with duplicated ID %s at %s", sub_id
, path
);
514 return RECURSE_DIR_CONTINUE
;
517 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", sub_id
);
535 return RECURSE_DIR_CONTINUE
;
538 static int acquire_credentials(
539 const ExecContext
*context
,
540 const ExecParameters
*params
,
547 uint64_t left
= CREDENTIALS_TOTAL_SIZE_MAX
;
548 _cleanup_close_
int dfd
= -EBADF
;
550 ExecLoadCredential
*lc
;
551 ExecSetCredential
*sc
;
557 dfd
= open(p
, O_DIRECTORY
|O_CLOEXEC
);
561 r
= fd_acl_make_writable(dfd
); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
565 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
566 HASHMAP_FOREACH(lc
, context
->load_credentials
) {
567 _cleanup_close_
int sub_fd
= -EBADF
;
569 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
570 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
571 * a regular file. Finally, if it's a relative path we will use it as a credential name to
572 * propagate a credential passed to us from further up. */
574 if (path_is_absolute(lc
->path
)) {
575 sub_fd
= open(lc
->path
, O_DIRECTORY
|O_CLOEXEC
|O_RDONLY
);
576 if (sub_fd
< 0 && !IN_SET(errno
,
577 ENOTDIR
, /* Not a directory */
578 ENOENT
)) /* Doesn't exist? */
579 return log_debug_errno(errno
, "Failed to open '%s': %m", lc
->path
);
583 /* Regular file (incl. a credential passed in from higher up) */
601 /* path= */ lc
->id
, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
603 /* n_depth_max= */ UINT_MAX
,
604 RECURSE_DIR_SORT
|RECURSE_DIR_IGNORE_DOT
|RECURSE_DIR_ENSURE_TYPE
,
605 load_cred_recurse_dir_cb
,
606 &(struct load_cred_args
) {
609 .encrypted
= lc
->encrypted
,
614 .ownership_ok
= ownership_ok
,
621 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
622 * override any credentials found earlier. */
623 SET_FOREACH(ic
, context
->import_credentials
) {
624 _cleanup_free_
char **search_path
= NULL
;
626 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_TRUSTED
);
630 r
= load_credential_glob(
632 /* encrypted = */ false,
634 READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
,
643 search_path
= strv_free(search_path
);
644 search_path
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_ENCRYPTED
);
648 r
= load_credential_glob(
650 /* encrypted = */ true,
652 READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
|READ_FULL_FILE_UNBASE64
,
662 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
663 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
664 HASHMAP_FOREACH(sc
, context
->set_credentials
) {
665 _cleanup_(erase_and_freep
) void *plaintext
= NULL
;
669 /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
670 * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
671 * slow and involved, hence it's nice to be able to skip that if the credential already
673 if (faccessat(dfd
, sc
->id
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0)
676 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", sc
->id
);
679 r
= decrypt_credential_and_warn(sc
->id
, now(CLOCK_REALTIME
), NULL
, NULL
, sc
->data
, sc
->size
, &plaintext
, &size
);
689 add
= strlen(sc
->id
) + size
;
693 r
= write_credential(dfd
, sc
->id
, data
, size
, uid
, gid
, ownership_ok
);
700 r
= fd_acl_make_read_only(dfd
); /* Now take away the "w" bit */
704 /* After we created all keys with the right perms, also make sure the credential store as a whole is
707 if (uid_is_valid(uid
) && uid
!= getuid()) {
708 r
= fd_add_uid_acl_permission(dfd
, uid
, ACL_READ
| ACL_EXECUTE
);
710 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
716 if (fchown(dfd
, uid
, gid
) < 0)
724 static int setup_credentials_internal(
725 const ExecContext
*context
,
726 const ExecParameters
*params
,
728 const char *final
, /* This is where the credential store shall eventually end up at */
729 const char *workspace
, /* This is where we can prepare it before moving it to the final place */
730 bool reuse_workspace
, /* Whether to reuse any existing workspace mount if it already is a mount */
731 bool must_mount
, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
735 int r
, workspace_mounted
; /* negative if we don't know yet whether we have/can mount something; true
736 * if we mounted something; false if we definitely can't mount anything */
744 if (reuse_workspace
) {
745 r
= path_is_mount_point(workspace
, NULL
, 0);
749 workspace_mounted
= true; /* If this is already a mount, and we are supposed to reuse
750 * it, let's keep this in mind */
752 workspace_mounted
= -1; /* We need to figure out if we can mount something to the workspace */
754 workspace_mounted
= -1; /* ditto */
756 r
= path_is_mount_point(final
, NULL
, 0);
760 /* If the final place already has something mounted, we use that. If the workspace also has
761 * something mounted we assume it's actually the same mount (but with MS_RDONLY
763 final_mounted
= true;
765 if (workspace_mounted
< 0) {
766 /* If the final place is mounted, but the workspace isn't, then let's bind mount
767 * the final version to the workspace, and make it writable, so that we can make
770 r
= mount_nofollow_verbose(LOG_DEBUG
, final
, workspace
, NULL
, MS_BIND
|MS_REC
, NULL
);
774 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, workspace
, NULL
, MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ false), NULL
);
778 workspace_mounted
= true;
781 final_mounted
= false;
783 if (workspace_mounted
< 0) {
784 /* Nothing is mounted on the workspace yet, let's try to mount something now */
786 r
= mount_credentials_fs(workspace
, CREDENTIALS_TOTAL_SIZE_MAX
, /* ro= */ false);
788 /* If that didn't work, try to make a bind mount from the final to the workspace, so
789 * that we can make it writable there. */
790 r
= mount_nofollow_verbose(LOG_DEBUG
, final
, workspace
, NULL
, MS_BIND
|MS_REC
, NULL
);
792 if (!ERRNO_IS_PRIVILEGE(r
))
793 /* Propagate anything that isn't a permission problem. */
797 /* If it's not OK to use the plain directory fallback, propagate all
801 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
802 * for compat with container envs, and just use the final dir as is. */
804 workspace_mounted
= false;
806 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
807 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, workspace
, NULL
, MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ false), NULL
);
811 workspace_mounted
= true;
814 workspace_mounted
= true;
817 assert(!must_mount
|| workspace_mounted
> 0);
818 where
= workspace_mounted
? workspace
: final
;
820 (void) label_fix_full(AT_FDCWD
, where
, final
, 0);
822 r
= acquire_credentials(context
, params
, unit
, where
, uid
, gid
, workspace_mounted
);
826 if (workspace_mounted
) {
829 /* Determine if we should actually install the prepared mount in the final location by bind
830 * mounting it there. We do so only if the mount is not established there already, and if the
831 * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
832 * case we are doing all this in a mount namespace, thus no one else will see that we
833 * allocated a file system we are getting rid of again here. */
835 install
= false; /* already installed */
837 r
= dir_is_empty(where
, /* ignore_hidden_or_backup= */ false);
841 install
= r
== 0; /* install only if non-empty */
845 /* Make workspace read-only now, so that any bind mount we make from it defaults to
847 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, workspace
, NULL
, MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ true), NULL
);
851 /* And mount it to the final place, read-only */
852 r
= mount_nofollow_verbose(LOG_DEBUG
, workspace
, final
, NULL
, MS_MOVE
, NULL
);
854 /* Otherwise get rid of it */
855 r
= umount_verbose(LOG_DEBUG
, workspace
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
859 _cleanup_free_
char *parent
= NULL
;
861 /* If we do not have our own mount put used the plain directory fallback, then we need to
862 * open access to the top-level credential directory and the per-service directory now */
864 r
= path_extract_directory(final
, &parent
);
867 if (chmod(parent
, 0755) < 0)
874 int setup_credentials(
875 const ExecContext
*context
,
876 const ExecParameters
*params
,
881 _cleanup_free_
char *p
= NULL
, *q
= NULL
;
887 if (!exec_context_has_credentials(context
))
890 if (!params
->prefix
[EXEC_DIRECTORY_RUNTIME
])
893 /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
894 * and the subdir we mount over with a read-only file system readable by the service's user */
895 q
= path_join(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], "credentials");
899 r
= mkdir_label(q
, 0755); /* top-level dir: world readable/searchable */
900 if (r
< 0 && r
!= -EEXIST
)
903 p
= path_join(q
, unit
);
907 r
= mkdir_label(p
, 0700); /* per-unit dir: private to user */
908 if (r
< 0 && r
!= -EEXIST
)
911 r
= safe_fork("(sd-mkdcreds)", FORK_DEATHSIG
|FORK_WAIT
|FORK_NEW_MOUNTNS
, NULL
);
913 _cleanup_free_
char *t
= NULL
, *u
= NULL
;
915 /* If this is not a privilege or support issue then propagate the error */
916 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
919 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
920 * it into place, so that users can't access half-initialized credential stores. */
921 t
= path_join(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], "systemd/temporary-credentials");
925 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
926 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
927 * after it is fully set up */
928 u
= path_join(t
, unit
);
932 FOREACH_STRING(i
, t
, u
) {
933 r
= mkdir_label(i
, 0700);
934 if (r
< 0 && r
!= -EEXIST
)
938 r
= setup_credentials_internal(
942 p
, /* final mount point */
943 u
, /* temporary workspace to overmount */
944 true, /* reuse the workspace if it is already a mount */
945 false, /* it's OK to fall back to a plain directory if we can't mount anything */
949 (void) rmdir(u
); /* remove the workspace again if we can. */
956 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
957 * we can use the same directory for all cases, after turning off propagation. Question
958 * though is: where do we turn off propagation exactly, and where do we place the workspace
959 * directory? We need some place that is guaranteed to be a mount point in the host, and
960 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
961 * since we ultimately want to move the resulting file system there, i.e. we need propagation
962 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
963 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
964 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
965 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
966 * propagation on the former, and then overmount the latter.
968 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
969 * for this purpose, but there are few other candidates that work equally well for us, and
970 * given that we do this in a privately namespaced short-lived single-threaded process that
971 * no one else sees this should be OK to do. */
973 /* Turn off propagation from our namespace to host */
974 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, "/dev", NULL
, MS_SLAVE
|MS_REC
, NULL
);
978 r
= setup_credentials_internal(
982 p
, /* final mount point */
983 "/dev/shm", /* temporary workspace to overmount */
984 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
985 true, /* insist that something is mounted, do not allow fallback to plain directory */
997 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
998 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
999 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1000 * seen by users when trying access this inode. */