1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
8 #include "creds-util.h"
9 #include "errno-util.h"
10 #include "exec-credential.h"
14 #include "glob-util.h"
16 #include "iovec-util.h"
17 #include "label-util.h"
19 #include "mkdir-label.h"
20 #include "mount-util.h"
21 #include "mountpoint-util.h"
22 #include "ordered-set.h"
23 #include "path-lookup.h"
24 #include "path-util.h"
25 #include "process-util.h"
26 #include "random-util.h"
27 #include "recurse-dir.h"
29 #include "siphash24.h"
30 #include "stat-util.h"
32 #include "tmpfile-util.h"
33 #include "user-util.h"
35 ExecSetCredential
* exec_set_credential_free(ExecSetCredential
*sc
) {
44 ExecLoadCredential
* exec_load_credential_free(ExecLoadCredential
*lc
) {
53 ExecImportCredential
* exec_import_credential_free(ExecImportCredential
*ic
) {
62 static void exec_import_credential_hash_func(const ExecImportCredential
*ic
, struct siphash
*state
) {
66 siphash24_compress_string(ic
->glob
, state
);
68 siphash24_compress_string(ic
->rename
, state
);
71 static int exec_import_credential_compare_func(const ExecImportCredential
*a
, const ExecImportCredential
*b
) {
77 r
= strcmp(a
->glob
, b
->glob
);
81 return strcmp_ptr(a
->rename
, b
->rename
);
84 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
85 exec_set_credential_hash_ops
,
86 char, string_hash_func
, string_compare_func
,
87 ExecSetCredential
, exec_set_credential_free
);
89 DEFINE_PRIVATE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
90 exec_load_credential_hash_ops
,
91 char, string_hash_func
, string_compare_func
,
92 ExecLoadCredential
, exec_load_credential_free
);
94 DEFINE_PRIVATE_HASH_OPS_WITH_KEY_DESTRUCTOR(
95 exec_import_credential_hash_ops
,
97 exec_import_credential_hash_func
,
98 exec_import_credential_compare_func
,
99 exec_import_credential_free
);
101 int exec_context_put_load_credential(ExecContext
*c
, const char *id
, const char *path
, bool encrypted
) {
102 ExecLoadCredential
*old
;
109 old
= hashmap_get(c
->load_credentials
, id
);
111 r
= free_and_strdup(&old
->path
, path
);
115 old
->encrypted
= encrypted
;
117 _cleanup_(exec_load_credential_freep
) ExecLoadCredential
*lc
= NULL
;
119 lc
= new(ExecLoadCredential
, 1);
123 *lc
= (ExecLoadCredential
) {
125 .path
= strdup(path
),
126 .encrypted
= encrypted
,
128 if (!lc
->id
|| !lc
->path
)
131 r
= hashmap_ensure_put(&c
->load_credentials
, &exec_load_credential_hash_ops
, lc
->id
, lc
);
132 assert(r
!= -EEXIST
);
142 int exec_context_put_set_credential(
149 _cleanup_free_
void *data
= data_consume
;
150 ExecSetCredential
*old
;
153 /* Takes the ownership of data both on success and failure */
157 assert(data
|| size
== 0);
159 old
= hashmap_get(c
->set_credentials
, id
);
161 free_and_replace(old
->data
, data
);
163 old
->encrypted
= encrypted
;
165 _cleanup_(exec_set_credential_freep
) ExecSetCredential
*sc
= NULL
;
167 sc
= new(ExecSetCredential
, 1);
171 *sc
= (ExecSetCredential
) {
173 .data
= TAKE_PTR(data
),
175 .encrypted
= encrypted
,
180 r
= hashmap_ensure_put(&c
->set_credentials
, &exec_set_credential_hash_ops
, sc
->id
, sc
);
181 assert(r
!= -EEXIST
);
191 int exec_context_put_import_credential(ExecContext
*c
, const char *glob
, const char *rename
) {
192 _cleanup_(exec_import_credential_freep
) ExecImportCredential
*ic
= NULL
;
198 rename
= empty_to_null(rename
);
200 ic
= new(ExecImportCredential
, 1);
204 *ic
= (ExecImportCredential
) {
205 .glob
= strdup(glob
),
210 ic
->rename
= strdup(rename
);
215 if (ordered_set_contains(c
->import_credentials
, ic
))
218 r
= ordered_set_ensure_put(&c
->import_credentials
, &exec_import_credential_hash_ops
, ic
);
219 assert(r
!= -EEXIST
);
228 bool exec_params_need_credentials(const ExecParameters
*p
) {
231 return p
->flags
& (EXEC_SETUP_CREDENTIALS
|EXEC_SETUP_CREDENTIALS_FRESH
);
234 bool exec_context_has_credentials(const ExecContext
*c
) {
237 return !hashmap_isempty(c
->set_credentials
) ||
238 !hashmap_isempty(c
->load_credentials
) ||
239 !ordered_set_isempty(c
->import_credentials
);
242 bool mount_point_is_credentials(const char *runtime_prefix
, const char *path
) {
245 assert(runtime_prefix
);
248 e
= path_startswith(path
, runtime_prefix
);
252 return path_startswith(e
, "credentials");
255 static int get_credential_directory(
256 const char *runtime_prefix
,
264 if (!runtime_prefix
|| !unit
) {
269 p
= path_join(runtime_prefix
, "credentials", unit
);
277 int exec_context_get_credential_directory(
278 const ExecContext
*context
,
279 const ExecParameters
*params
,
288 if (!exec_params_need_credentials(params
) || !exec_context_has_credentials(context
)) {
293 return get_credential_directory(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], unit
, ret
);
296 int exec_context_destroy_credentials(const ExecContext
*c
, const char *runtime_prefix
, const char *unit
) {
297 _cleanup_free_
char *p
= NULL
;
302 r
= get_credential_directory(runtime_prefix
, unit
, &p
);
306 /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
307 * unmount it, and afterwards remove the mount point */
308 (void) umount2(p
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
309 (void) rm_rf(p
, REMOVE_ROOT
|REMOVE_CHMOD
);
314 static int write_credential(
323 _cleanup_free_
char *tmp
= NULL
;
324 _cleanup_close_
int fd
= -EBADF
;
329 assert(data
|| size
== 0);
331 r
= tempfn_random_child("", "cred", &tmp
);
335 fd
= openat(dfd
, tmp
, O_CREAT
|O_RDWR
|O_CLOEXEC
|O_EXCL
|O_NOFOLLOW
|O_NOCTTY
, 0600);
339 r
= loop_write(fd
, data
, size
);
343 r
= RET_NERRNO(fchmod(fd
, 0400)); /* Take away "w" bit */
347 if (uid_is_valid(uid
) && uid
!= getuid()) {
348 r
= fd_add_uid_acl_permission(fd
, uid
, ACL_READ
);
350 /* Ideally we use ACLs, since we can neatly express what we want to express:
351 * the user gets read access and nothing else. But if the backing fs can't
352 * support that (e.g. ramfs), then we can use file ownership instead. But that's
353 * only safe if we can then re-mount the whole thing read-only, so that the user
354 * can no longer chmod() the file to gain write access. */
355 if (!ownership_ok
|| (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
)))
358 r
= RET_NERRNO(fchown(fd
, uid
, gid
));
364 r
= RET_NERRNO(renameat(dfd
, tmp
, dfd
, id
));
371 (void) unlinkat(dfd
, tmp
, /* flags = */ 0);
375 typedef enum CredentialSearchPath
{
376 CREDENTIAL_SEARCH_PATH_TRUSTED
,
377 CREDENTIAL_SEARCH_PATH_ENCRYPTED
,
378 CREDENTIAL_SEARCH_PATH_ALL
,
379 _CREDENTIAL_SEARCH_PATH_MAX
,
380 _CREDENTIAL_SEARCH_PATH_INVALID
= -EINVAL
,
381 } CredentialSearchPath
;
383 static int credential_search_path(const ExecParameters
*params
, CredentialSearchPath path
, char ***ret
) {
384 _cleanup_strv_free_
char **l
= NULL
;
388 assert(path
>= 0 && path
< _CREDENTIAL_SEARCH_PATH_MAX
);
391 /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
392 * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
393 * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
395 if (IN_SET(path
, CREDENTIAL_SEARCH_PATH_ENCRYPTED
, CREDENTIAL_SEARCH_PATH_ALL
)) {
396 r
= strv_extend(&l
, params
->received_encrypted_credentials_directory
);
400 _cleanup_strv_free_
char **add
= NULL
;
401 r
= credential_store_path_encrypted(params
->runtime_scope
, &add
);
405 r
= strv_extend_strv_consume(&l
, TAKE_PTR(add
), /* filter_duplicates= */ false);
410 if (IN_SET(path
, CREDENTIAL_SEARCH_PATH_TRUSTED
, CREDENTIAL_SEARCH_PATH_ALL
)) {
411 r
= strv_extend(&l
, params
->received_credentials_directory
);
415 _cleanup_strv_free_
char **add
= NULL
;
416 r
= credential_store_path(params
->runtime_scope
, &add
);
420 r
= strv_extend_strv_consume(&l
, TAKE_PTR(add
), /* filter_duplicates= */ false);
426 _cleanup_free_
char *t
= strv_join(l
, ":");
427 log_debug("Credential search path is: %s", strempty(t
));
434 static bool device_nodes_restricted(
435 const ExecContext
*c
,
436 const CGroupContext
*cgroup_context
) {
439 assert(cgroup_context
);
441 /* Returns true if we have any reason to believe we might not be able to access the TPM device
442 * directly, even if we run as root/PID 1. This could be because /dev/ is replaced by a private
443 * version, or because a device node access list is configured. */
445 if (c
->private_devices
)
448 if (cgroup_context
->device_policy
!= CGROUP_DEVICE_POLICY_AUTO
||
449 cgroup_context
->device_allow
)
455 struct load_cred_args
{
456 const ExecContext
*context
;
457 const CGroupContext
*cgroup_context
;
458 const ExecParameters
*params
;
468 static int maybe_decrypt_and_write_credential(
469 struct load_cred_args
*args
,
474 _cleanup_(iovec_done_erase
) struct iovec plaintext
= {};
479 assert(args
->write_dfd
>= 0);
481 assert(data
|| size
== 0);
483 if (args
->encrypted
) {
484 CredentialFlags flags
= 0; /* only allow user creds in user scope */
486 switch (args
->params
->runtime_scope
) {
488 case RUNTIME_SCOPE_SYSTEM
:
489 /* In system mode talk directly to the TPM – unless we live in a device sandbox
490 * which might block TPM device access. */
492 flags
|= CREDENTIAL_ANY_SCOPE
;
494 if (!device_nodes_restricted(args
->context
, args
->cgroup_context
)) {
495 r
= decrypt_credential_and_warn(
498 /* tpm2_device= */ NULL
,
499 /* tpm2_signature_path= */ NULL
,
501 &IOVEC_MAKE(data
, size
),
509 case RUNTIME_SCOPE_USER
:
510 /* In per user mode we'll not have access to the machine secret, nor to the TPM (most
511 * likely), hence go via the IPC service instead. Do this if we are run in root's
512 * per-user invocation too, to minimize differences and because isolating this logic
513 * into a separate process is generally a good thing anyway. */
514 r
= ipc_decrypt_credential(
518 &IOVEC_MAKE(data
, size
),
524 assert_not_reached();
529 data
= plaintext
.iov_base
;
530 size
= plaintext
.iov_len
;
533 add
= strlen(id
) + size
;
534 if (add
> args
->left
)
537 r
= write_credential(args
->write_dfd
, id
, data
, size
, args
->uid
, args
->gid
, args
->ownership_ok
);
539 return log_debug_errno(r
, "Failed to write credential '%s': %m", id
);
546 static int load_credential_glob(
547 struct load_cred_args
*args
,
548 const ExecImportCredential
*ic
,
549 char * const *search_path
,
550 ReadFullFileFlags flags
) {
555 assert(args
->write_dfd
>= 0);
559 STRV_FOREACH(d
, search_path
) {
560 _cleanup_strv_free_
char **paths
= NULL
;
561 _cleanup_free_
char *j
= NULL
;
563 j
= path_join(*d
, ic
->glob
);
567 r
= safe_glob(j
, /* flags = */ 0, &paths
);
573 STRV_FOREACH(p
, paths
) {
574 _cleanup_free_
char *fn
= NULL
;
575 _cleanup_(erase_and_freep
) char *data
= NULL
;
578 r
= path_extract_filename(*p
, &fn
);
580 return log_debug_errno(r
, "Failed to extract filename from '%s': %m", *p
);
583 _cleanup_free_
char *renamed
= NULL
;
585 renamed
= strjoin(ic
->rename
, fn
+ strlen(ic
->glob
) - !!endswith(ic
->glob
, "*"));
587 return log_oom_debug();
589 free_and_replace(fn
, renamed
);
592 if (!credential_name_valid(fn
)) {
593 log_debug("Skipping credential with invalid name: %s", fn
);
597 if (faccessat(args
->write_dfd
, fn
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0) {
598 log_debug("Skipping credential with duplicated ID %s at %s", fn
, *p
);
602 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", fn
);
604 /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
605 r
= read_full_file_full(
609 args
->encrypted
? CREDENTIAL_ENCRYPTED_SIZE_MAX
: CREDENTIAL_SIZE_MAX
,
614 return log_debug_errno(r
, "Failed to read credential '%s': %m", *p
);
616 r
= maybe_decrypt_and_write_credential(args
, fn
, data
, size
);
625 static int load_credential(
626 struct load_cred_args
*args
,
631 ReadFullFileFlags flags
= READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
;
632 _cleanup_strv_free_
char **search_path
= NULL
;
633 _cleanup_free_
char *bindname
= NULL
;
634 const char *source
= NULL
;
636 _cleanup_(erase_and_freep
) char *data
= NULL
;
641 assert(args
->context
);
642 assert(args
->params
);
644 assert(args
->write_dfd
>= 0);
646 assert(read_dfd
>= 0 || read_dfd
== AT_FDCWD
);
650 /* If a directory fd is specified, then read the file directly from that dir. In this case we
651 * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
652 * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
655 if (!filename_is_valid(path
)) /* safety check */
661 } else if (path_is_absolute(path
)) {
662 /* If this is an absolute path, read the data directly from it, and support AF_UNIX
665 if (!path_is_valid(path
)) /* safety check */
668 flags
|= READ_FULL_FILE_CONNECT_SOCKET
;
670 /* Pass some minimal info about the unit and the credential name we are looking to acquire
671 * via the source socket address in case we read off an AF_UNIX socket. */
672 if (asprintf(&bindname
, "@%" PRIx64
"/unit/%s/%s", random_u64(), args
->unit
, id
) < 0)
678 } else if (credential_name_valid(path
)) {
679 /* If this is a relative path, take it as credential name relative to the credentials
680 * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
681 * are operating on a credential store, i.e. this is guaranteed to be regular files. */
683 r
= credential_search_path(args
->params
, CREDENTIAL_SEARCH_PATH_ALL
, &search_path
);
691 if (args
->encrypted
) {
692 flags
|= READ_FULL_FILE_UNBASE64
;
693 maxsz
= CREDENTIAL_ENCRYPTED_SIZE_MAX
;
695 maxsz
= CREDENTIAL_SIZE_MAX
;
698 STRV_FOREACH(d
, search_path
) {
699 _cleanup_free_
char *j
= NULL
;
701 j
= path_join(*d
, path
);
705 r
= read_full_file_full(
706 AT_FDCWD
, j
, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
716 r
= read_full_file_full(
724 assert_not_reached();
726 if (r
== -ENOENT
&& (missing_ok
|| hashmap_contains(args
->context
->set_credentials
, id
))) {
727 /* Make a missing inherited credential non-fatal, let's just continue. After all apps
728 * will get clear errors if we don't pass such a missing credential on as they
729 * themselves will get ENOENT when trying to read them, which should not be much
730 * worse than when we handle the error here and make it fatal.
732 * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
733 * we are fine, too. */
734 log_full_errno(hashmap_contains(args
->context
->set_credentials
, id
) ? LOG_DEBUG
: LOG_INFO
,
735 r
, "Couldn't read inherited credential '%s', skipping: %m", path
);
739 return log_debug_errno(r
, "Failed to read credential '%s': %m", path
);
741 return maybe_decrypt_and_write_credential(args
, id
, data
, size
);
744 static int load_cred_recurse_dir_cb(
745 RecurseDirEvent event
,
749 const struct dirent
*de
,
750 const struct statx
*sx
,
753 struct load_cred_args
*args
= ASSERT_PTR(userdata
);
754 _cleanup_free_
char *sub_id
= NULL
;
760 if (event
!= RECURSE_DIR_ENTRY
)
761 return RECURSE_DIR_CONTINUE
;
763 if (!IN_SET(de
->d_type
, DT_REG
, DT_SOCK
))
764 return RECURSE_DIR_CONTINUE
;
766 sub_id
= strreplace(path
, "/", "_");
770 if (!credential_name_valid(sub_id
))
771 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
), "Credential would get ID '%s', which is not valid, refusing.", sub_id
);
773 if (faccessat(args
->write_dfd
, sub_id
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0) {
774 log_debug("Skipping credential with duplicated ID %s at %s", sub_id
, path
);
775 return RECURSE_DIR_CONTINUE
;
778 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", sub_id
);
780 r
= load_credential(args
,
786 return RECURSE_DIR_CONTINUE
;
789 static int acquire_credentials(
790 const ExecContext
*context
,
791 const CGroupContext
*cgroup_context
,
792 const ExecParameters
*params
,
799 _cleanup_close_
int dfd
= -EBADF
;
803 assert(cgroup_context
);
808 dfd
= open(p
, O_DIRECTORY
|O_CLOEXEC
);
812 r
= fd_acl_make_writable(dfd
); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
816 struct load_cred_args args
= {
818 .cgroup_context
= cgroup_context
,
824 .ownership_ok
= ownership_ok
,
825 .left
= CREDENTIALS_TOTAL_SIZE_MAX
,
828 /* First, load credentials off disk (or acquire via AF_UNIX socket) */
829 ExecLoadCredential
*lc
;
830 HASHMAP_FOREACH(lc
, context
->load_credentials
) {
831 _cleanup_close_
int sub_fd
= -EBADF
;
833 args
.encrypted
= lc
->encrypted
;
835 /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
836 * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
837 * a regular file. Finally, if it's a relative path we will use it as a credential name to
838 * propagate a credential passed to us from further up. */
840 if (path_is_absolute(lc
->path
)) {
841 sub_fd
= open(lc
->path
, O_DIRECTORY
|O_CLOEXEC
);
842 if (sub_fd
< 0 && !IN_SET(errno
,
843 ENOTDIR
, /* Not a directory */
844 ENOENT
)) /* Doesn't exist? */
845 return log_debug_errno(errno
, "Failed to open credential source '%s': %m", lc
->path
);
849 /* Regular file (incl. a credential passed in from higher up) */
850 r
= load_credential(&args
,
855 r
= recurse_dir(sub_fd
,
856 /* path= */ lc
->id
, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
858 /* n_depth_max= */ UINT_MAX
,
859 RECURSE_DIR_SORT
|RECURSE_DIR_IGNORE_DOT
|RECURSE_DIR_ENSURE_TYPE
,
860 load_cred_recurse_dir_cb
,
866 /* Next, look for system credentials and credentials in the credentials store. Note that these do not
867 * override any credentials found earlier. */
868 ExecImportCredential
*ic
;
869 ORDERED_SET_FOREACH(ic
, context
->import_credentials
) {
870 _cleanup_free_
char **search_path
= NULL
;
872 r
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_TRUSTED
, &search_path
);
876 args
.encrypted
= false;
878 r
= load_credential_glob(&args
,
881 READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
);
885 search_path
= strv_free(search_path
);
887 r
= credential_search_path(params
, CREDENTIAL_SEARCH_PATH_ENCRYPTED
, &search_path
);
891 args
.encrypted
= true;
893 r
= load_credential_glob(&args
,
896 READ_FULL_FILE_SECURE
|READ_FULL_FILE_FAIL_WHEN_LARGER
|READ_FULL_FILE_UNBASE64
);
901 /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
902 * add them, so that they can act as a "default" if the same credential is specified multiple times. */
903 ExecSetCredential
*sc
;
904 HASHMAP_FOREACH(sc
, context
->set_credentials
) {
905 args
.encrypted
= sc
->encrypted
;
907 if (faccessat(dfd
, sc
->id
, F_OK
, AT_SYMLINK_NOFOLLOW
) >= 0) {
908 log_debug("Skipping credential with duplicated ID %s", sc
->id
);
912 return log_debug_errno(errno
, "Failed to test if credential %s exists: %m", sc
->id
);
914 r
= maybe_decrypt_and_write_credential(&args
, sc
->id
, sc
->data
, sc
->size
);
919 r
= fd_acl_make_read_only(dfd
); /* Now take away the "w" bit */
923 /* After we created all keys with the right perms, also make sure the credential store as a whole is
926 if (uid_is_valid(uid
) && uid
!= getuid()) {
927 r
= fd_add_uid_acl_permission(dfd
, uid
, ACL_READ
| ACL_EXECUTE
);
929 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
935 if (fchown(dfd
, uid
, gid
) < 0)
943 static int setup_credentials_internal(
944 const ExecContext
*context
,
945 const CGroupContext
*cgroup_context
,
946 const ExecParameters
*params
,
948 const char *final
, /* This is where the credential store shall eventually end up at */
949 const char *workspace
, /* This is where we can prepare it before moving it to the final place */
950 bool reuse_workspace
, /* Whether to reuse any existing workspace mount if it already is a mount */
951 bool must_mount
, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
956 int r
, workspace_mounted
; /* negative if we don't know yet whether we have/can mount something; true
957 * if we mounted something; false if we definitely can't mount anything */
965 r
= path_is_mount_point(final
);
967 return log_debug_errno(r
, "Failed to determine if '%s' is a mountpoint: %m", final
);
968 final_mounted
= r
> 0;
971 if (FLAGS_SET(params
->flags
, EXEC_SETUP_CREDENTIALS_FRESH
)) {
972 r
= umount_verbose(LOG_DEBUG
, final
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
976 final_mounted
= false;
978 /* We can reuse the previous credential dir */
979 r
= dir_is_empty(final
, /* ignore_hidden_or_backup = */ false);
983 log_debug("Credential dir for unit '%s' already set up, skipping.", unit
);
989 if (reuse_workspace
) {
990 r
= path_is_mount_point(workspace
);
994 workspace_mounted
= true; /* If this is already a mount, and we are supposed to reuse
995 * it, let's keep this in mind */
997 workspace_mounted
= -1; /* We need to figure out if we can mount something to the workspace */
999 workspace_mounted
= -1; /* ditto */
1001 /* If both the final place and the workspace are mounted, we have no mounts to set up, based on
1002 * the assumption that they're actually the same tmpfs (but the latter with MS_RDONLY different).
1003 * If the workspace is not mounted, we just bind the final place over and make it writable. */
1004 must_mount
= must_mount
|| final_mounted
;
1006 if (workspace_mounted
< 0) {
1008 /* Nothing is mounted on the workspace yet, let's try to mount a new tmpfs if
1009 * not using the final place. */
1010 r
= mount_credentials_fs(workspace
, CREDENTIALS_TOTAL_SIZE_MAX
, /* ro= */ false);
1011 if (final_mounted
|| r
< 0) {
1012 /* If using final place or failed to mount new tmpfs, make a bind mount from
1013 * the final to the workspace, so that we can make it writable there. */
1014 r
= mount_nofollow_verbose(LOG_DEBUG
, final
, workspace
, NULL
, MS_BIND
|MS_REC
, NULL
);
1016 if (!ERRNO_IS_PRIVILEGE(r
))
1017 /* Propagate anything that isn't a permission problem. */
1021 /* If it's not OK to use the plain directory fallback, propagate all
1025 /* If we lack privileges to bind mount stuff, then let's gracefully proceed
1026 * for compat with container envs, and just use the final dir as is.
1027 * Final place must not be mounted in this case (refused by must_mount
1030 workspace_mounted
= false;
1032 /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
1033 r
= mount_nofollow_verbose(LOG_DEBUG
,
1037 MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ false),
1042 workspace_mounted
= true;
1045 workspace_mounted
= true;
1048 assert(workspace_mounted
>= 0);
1049 assert(!must_mount
|| workspace_mounted
);
1051 const char *where
= workspace_mounted
? workspace
: final
;
1053 (void) label_fix_full(AT_FDCWD
, where
, final
, 0);
1055 r
= acquire_credentials(context
, cgroup_context
, params
, unit
, where
, uid
, gid
, workspace_mounted
);
1057 /* If we're using final place as workspace, and failed to acquire credentials, we might
1058 * have left half-written creds there. Let's get rid of the whole mount, so future
1059 * calls won't reuse it. */
1061 (void) umount_verbose(LOG_DEBUG
, final
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
1066 if (workspace_mounted
) {
1067 if (!final_mounted
) {
1068 /* Make workspace read-only now, so that any bind mount we make from it defaults to
1070 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, workspace
, NULL
, MS_BIND
|MS_REMOUNT
|credentials_fs_mount_flags(/* ro= */ true), NULL
);
1074 /* And mount it to the final place, read-only */
1075 r
= mount_nofollow_verbose(LOG_DEBUG
, workspace
, final
, NULL
, MS_MOVE
, NULL
);
1077 /* Otherwise we just get rid of the bind mount of final place */
1078 r
= umount_verbose(LOG_DEBUG
, workspace
, MNT_DETACH
|UMOUNT_NOFOLLOW
);
1082 _cleanup_free_
char *parent
= NULL
;
1084 /* If we do not have our own mount put used the plain directory fallback, then we need to
1085 * open access to the top-level credential directory and the per-service directory now */
1087 r
= path_extract_directory(final
, &parent
);
1090 if (chmod(parent
, 0755) < 0)
1097 int exec_setup_credentials(
1098 const ExecContext
*context
,
1099 const CGroupContext
*cgroup_context
,
1100 const ExecParameters
*params
,
1105 _cleanup_free_
char *p
= NULL
, *q
= NULL
;
1112 if (!exec_params_need_credentials(params
) || !exec_context_has_credentials(context
))
1115 if (!params
->prefix
[EXEC_DIRECTORY_RUNTIME
])
1118 /* This is where we'll place stuff when we are done; the main credentials directory is world-readable,
1119 * and the subdir we mount over with a read-only file system readable by the service's user. */
1120 q
= path_join(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], "credentials");
1124 r
= mkdir_label(q
, 0755); /* top-level dir: world readable/searchable */
1125 if (r
< 0 && r
!= -EEXIST
)
1128 p
= path_join(q
, unit
);
1132 r
= mkdir_label(p
, 0700); /* per-unit dir: private to user */
1133 if (r
< 0 && r
!= -EEXIST
)
1136 r
= safe_fork("(sd-mkdcreds)", FORK_DEATHSIG_SIGTERM
|FORK_WAIT
|FORK_NEW_MOUNTNS
, NULL
);
1138 _cleanup_(rmdir_and_freep
) char *u
= NULL
; /* remove the temporary workspace if we can */
1139 _cleanup_free_
char *t
= NULL
;
1141 /* If this is not a privilege or support issue then propagate the error */
1142 if (!ERRNO_IS_NOT_SUPPORTED(r
) && !ERRNO_IS_PRIVILEGE(r
))
1145 /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
1146 * it into place, so that users can't access half-initialized credential stores. */
1147 t
= path_join(params
->prefix
[EXEC_DIRECTORY_RUNTIME
], "systemd/temporary-credentials");
1151 /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
1152 * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
1153 * after it is fully set up */
1154 u
= path_join(t
, unit
);
1158 FOREACH_STRING(i
, t
, u
) {
1159 r
= mkdir_label(i
, 0700);
1160 if (r
< 0 && r
!= -EEXIST
)
1161 return log_debug_errno(r
, "Failed to make directory '%s': %m", i
);
1164 r
= setup_credentials_internal(
1169 p
, /* final mount point */
1170 u
, /* temporary workspace to overmount */
1171 true, /* reuse the workspace if it is already a mount */
1172 false, /* it's OK to fall back to a plain directory if we can't mount anything */
1178 } else if (r
== 0) {
1180 /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
1181 * we can use the same directory for all cases, after turning off propagation. Question
1182 * though is: where do we turn off propagation exactly, and where do we place the workspace
1183 * directory? We need some place that is guaranteed to be a mount point in the host, and
1184 * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
1185 * since we ultimately want to move the resulting file system there, i.e. we need propagation
1186 * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
1187 * would be visible in the host mount table all the time, which we want to avoid. Hence, what
1188 * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
1189 * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
1190 * propagation on the former, and then overmount the latter.
1192 * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
1193 * for this purpose, but there are few other candidates that work equally well for us, and
1194 * given that we do this in a privately namespaced short-lived single-threaded process that
1195 * no one else sees this should be OK to do. */
1197 /* Turn off propagation from our namespace to host */
1198 r
= mount_nofollow_verbose(LOG_DEBUG
, NULL
, "/dev", NULL
, MS_SLAVE
|MS_REC
, NULL
);
1202 r
= setup_credentials_internal(
1207 p
, /* final mount point */
1208 "/dev/shm", /* temporary workspace to overmount */
1209 false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
1210 true, /* insist that something is mounted, do not allow fallback to plain directory */
1216 _exit(EXIT_SUCCESS
);
1219 _exit(EXIT_FAILURE
);
1222 /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
1223 * try to remove it. This matters in particular if we created the dir as mount point but then didn't
1224 * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
1225 * seen by users when trying access this inode. */