--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <sys/mount.h>
+
+#include "acl-util.h"
+#include "credential.h"
+#include "creds-util.h"
+#include "execute.h"
+#include "fileio.h"
+#include "glob-util.h"
+#include "io-util.h"
+#include "label-util.h"
+#include "mkdir-label.h"
+#include "mount-util.h"
+#include "mountpoint-util.h"
+#include "process-util.h"
+#include "random-util.h"
+#include "recurse-dir.h"
+#include "rm-rf.h"
+#include "tmpfile-util.h"
+
+ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
+ if (!sc)
+ return NULL;
+
+ free(sc->id);
+ free(sc->data);
+ return mfree(sc);
+}
+
+ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
+ if (!lc)
+ return NULL;
+
+ free(lc->id);
+ free(lc->path);
+ return mfree(lc);
+}
+
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+ exec_set_credential_hash_ops,
+ char, string_hash_func, string_compare_func,
+ ExecSetCredential, exec_set_credential_free);
+
+DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
+ exec_load_credential_hash_ops,
+ char, string_hash_func, string_compare_func,
+ ExecLoadCredential, exec_load_credential_free);
+
+bool exec_context_has_credentials(const ExecContext *c) {
+ assert(c);
+
+ return !hashmap_isempty(c->set_credentials) ||
+ !hashmap_isempty(c->load_credentials) ||
+ !set_isempty(c->import_credentials);
+}
+
+bool exec_context_has_encrypted_credentials(ExecContext *c) {
+ ExecLoadCredential *load_cred;
+ ExecSetCredential *set_cred;
+
+ assert(c);
+
+ HASHMAP_FOREACH(load_cred, c->load_credentials)
+ if (load_cred->encrypted)
+ return true;
+
+ HASHMAP_FOREACH(set_cred, c->set_credentials)
+ if (set_cred->encrypted)
+ return true;
+
+ return false;
+}
+
+int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_prefix, const char *unit) {
+ _cleanup_free_ char *p = NULL;
+
+ assert(c);
+
+ if (!runtime_prefix || !unit)
+ return 0;
+
+ p = path_join(runtime_prefix, "credentials", unit);
+ if (!p)
+ return -ENOMEM;
+
+ /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
+ * unmount it, and afterwards remove the mount point */
+ (void) umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW);
+ (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
+
+ return 0;
+}
+
+static int write_credential(
+ int dfd,
+ const char *id,
+ const void *data,
+ size_t size,
+ uid_t uid,
+ gid_t gid,
+ bool ownership_ok) {
+
+ _cleanup_(unlink_and_freep) char *tmp = NULL;
+ _cleanup_close_ int fd = -EBADF;
+ int r;
+
+ r = tempfn_random_child("", "cred", &tmp);
+ if (r < 0)
+ return r;
+
+ fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
+ if (fd < 0) {
+ tmp = mfree(tmp);
+ return -errno;
+ }
+
+ r = loop_write(fd, data, size, /* do_poll = */ false);
+ if (r < 0)
+ return r;
+
+ if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
+ return -errno;
+
+ if (uid_is_valid(uid) && uid != getuid()) {
+ r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
+ if (r < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
+ return r;
+
+ if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
+ * to express: that the user gets read access and nothing
+ * else. But if the backing fs can't support that (e.g. ramfs)
+ * then we can use file ownership instead. But that's only safe if
+ * we can then re-mount the whole thing read-only, so that the
+ * user can no longer chmod() the file to gain write access. */
+ return r;
+
+ if (fchown(fd, uid, gid) < 0)
+ return -errno;
+ }
+ }
+
+ if (renameat(dfd, tmp, dfd, id) < 0)
+ return -errno;
+
+ tmp = mfree(tmp);
+ return 0;
+}
+
+typedef enum CredentialSearchPath {
+ CREDENTIAL_SEARCH_PATH_TRUSTED,
+ CREDENTIAL_SEARCH_PATH_ENCRYPTED,
+ CREDENTIAL_SEARCH_PATH_ALL,
+ _CREDENTIAL_SEARCH_PATH_MAX,
+ _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
+} CredentialSearchPath;
+
+static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
+
+ _cleanup_strv_free_ char **l = NULL;
+
+ assert(params);
+ assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
+
+ /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
+ * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
+ * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
+
+ if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
+ if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
+ return NULL;
+
+ if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
+ return NULL;
+ }
+
+ if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
+ if (params->received_credentials_directory)
+ if (strv_extend(&l, params->received_credentials_directory) < 0)
+ return NULL;
+
+ if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
+ return NULL;
+ }
+
+ if (DEBUG_LOGGING) {
+ _cleanup_free_ char *t = strv_join(l, ":");
+
+ log_debug("Credential search path is: %s", strempty(t));
+ }
+
+ return TAKE_PTR(l);
+}
+
+static int maybe_decrypt_and_write_credential(
+ int dir_fd,
+ const char *id,
+ bool encrypted,
+ uid_t uid,
+ gid_t gid,
+ bool ownership_ok,
+ const char *data,
+ size_t size,
+ uint64_t *left) {
+
+ _cleanup_free_ void *plaintext = NULL;
+ size_t add;
+ int r;
+
+ if (encrypted) {
+ size_t plaintext_size = 0;
+
+ r = decrypt_credential_and_warn(id, now(CLOCK_REALTIME), NULL, NULL, data, size,
+ &plaintext, &plaintext_size);
+ if (r < 0)
+ return r;
+
+ data = plaintext;
+ size = plaintext_size;
+ }
+
+ add = strlen(id) + size;
+ if (add > *left)
+ return -E2BIG;
+
+ r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to write credential '%s': %m", id);
+
+ *left -= add;
+ return 0;
+}
+
+static int load_credential_glob(
+ const char *path,
+ bool encrypted,
+ char **search_path,
+ ReadFullFileFlags flags,
+ int write_dfd,
+ uid_t uid,
+ gid_t gid,
+ bool ownership_ok,
+ uint64_t *left) {
+
+ int r;
+
+ STRV_FOREACH(d, search_path) {
+ _cleanup_globfree_ glob_t pglob = {};
+ _cleanup_free_ char *j = NULL;
+
+ j = path_join(*d, path);
+ if (!j)
+ return -ENOMEM;
+
+ r = safe_glob(j, 0, &pglob);
+ if (r == -ENOENT)
+ continue;
+ if (r < 0)
+ return r;
+
+ for (size_t n = 0; n < pglob.gl_pathc; n++) {
+ _cleanup_free_ char *fn = NULL;
+ _cleanup_(erase_and_freep) char *data = NULL;
+ size_t size;
+
+ /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
+ r = read_full_file_full(
+ AT_FDCWD,
+ pglob.gl_pathv[n],
+ UINT64_MAX,
+ encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
+ flags,
+ NULL,
+ &data, &size);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read credential '%s': %m",
+ pglob.gl_pathv[n]);
+
+ r = path_extract_filename(pglob.gl_pathv[n], &fn);
+ if (r < 0)
+ return log_debug_errno(r, "Failed to extract filename from '%s': %m",
+ pglob.gl_pathv[n]);
+
+ r = maybe_decrypt_and_write_credential(
+ write_dfd,
+ fn,
+ encrypted,
+ uid,
+ gid,
+ ownership_ok,
+ data, size,
+ left);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int load_credential(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *id,
+ const char *path,
+ bool encrypted,
+ const char *unit,
+ int read_dfd,
+ int write_dfd,
+ uid_t uid,
+ gid_t gid,
+ bool ownership_ok,
+ uint64_t *left) {
+
+ ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
+ _cleanup_strv_free_ char **search_path = NULL;
+ _cleanup_(erase_and_freep) char *data = NULL;
+ _cleanup_free_ char *bindname = NULL;
+ const char *source = NULL;
+ bool missing_ok = true;
+ size_t size, maxsz;
+ int r;
+
+ assert(context);
+ assert(params);
+ assert(id);
+ assert(path);
+ assert(unit);
+ assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
+ assert(write_dfd >= 0);
+ assert(left);
+
+ if (read_dfd >= 0) {
+ /* If a directory fd is specified, then read the file directly from that dir. In this case we
+ * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
+ * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
+ * open it. */
+
+ if (!filename_is_valid(path)) /* safety check */
+ return -EINVAL;
+
+ missing_ok = true;
+ source = path;
+
+ } else if (path_is_absolute(path)) {
+ /* If this is an absolute path, read the data directly from it, and support AF_UNIX
+ * sockets */
+
+ if (!path_is_valid(path)) /* safety check */
+ return -EINVAL;
+
+ flags |= READ_FULL_FILE_CONNECT_SOCKET;
+
+ /* Pass some minimal info about the unit and the credential name we are looking to acquire
+ * via the source socket address in case we read off an AF_UNIX socket. */
+ if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
+ return -ENOMEM;
+
+ missing_ok = false;
+ source = path;
+
+ } else if (credential_name_valid(path)) {
+ /* If this is a relative path, take it as credential name relative to the credentials
+ * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
+ * are operating on a credential store, i.e. this is guaranteed to be regular files. */
+
+ search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
+ if (!search_path)
+ return -ENOMEM;
+
+ missing_ok = true;
+ } else
+ source = NULL;
+
+ if (encrypted)
+ flags |= READ_FULL_FILE_UNBASE64;
+
+ maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
+
+ if (search_path) {
+ STRV_FOREACH(d, search_path) {
+ _cleanup_free_ char *j = NULL;
+
+ j = path_join(*d, path);
+ if (!j)
+ return -ENOMEM;
+
+ r = read_full_file_full(
+ AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
+ UINT64_MAX,
+ maxsz,
+ flags,
+ NULL,
+ &data, &size);
+ if (r != -ENOENT)
+ break;
+ }
+ } else if (source)
+ r = read_full_file_full(
+ read_dfd, source,
+ UINT64_MAX,
+ maxsz,
+ flags,
+ bindname,
+ &data, &size);
+ else
+ r = -ENOENT;
+
+ if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
+ /* Make a missing inherited credential non-fatal, let's just continue. After all apps
+ * will get clear errors if we don't pass such a missing credential on as they
+ * themselves will get ENOENT when trying to read them, which should not be much
+ * worse than when we handle the error here and make it fatal.
+ *
+ * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
+ * we are fine, too. */
+ log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
+ return 0;
+ }
+ if (r < 0)
+ return log_debug_errno(r, "Failed to read credential '%s': %m", path);
+
+ return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
+}
+
+struct load_cred_args {
+ const ExecContext *context;
+ const ExecParameters *params;
+ bool encrypted;
+ const char *unit;
+ int dfd;
+ uid_t uid;
+ gid_t gid;
+ bool ownership_ok;
+ uint64_t *left;
+};
+
+static int load_cred_recurse_dir_cb(
+ RecurseDirEvent event,
+ const char *path,
+ int dir_fd,
+ int inode_fd,
+ const struct dirent *de,
+ const struct statx *sx,
+ void *userdata) {
+
+ struct load_cred_args *args = ASSERT_PTR(userdata);
+ _cleanup_free_ char *sub_id = NULL;
+ int r;
+
+ if (event != RECURSE_DIR_ENTRY)
+ return RECURSE_DIR_CONTINUE;
+
+ if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
+ return RECURSE_DIR_CONTINUE;
+
+ sub_id = strreplace(path, "/", "_");
+ if (!sub_id)
+ return -ENOMEM;
+
+ if (!credential_name_valid(sub_id))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
+
+ if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
+ log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
+ return RECURSE_DIR_CONTINUE;
+ }
+ if (errno != ENOENT)
+ return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
+
+ r = load_credential(
+ args->context,
+ args->params,
+ sub_id,
+ de->d_name,
+ args->encrypted,
+ args->unit,
+ dir_fd,
+ args->dfd,
+ args->uid,
+ args->gid,
+ args->ownership_ok,
+ args->left);
+ if (r < 0)
+ return r;
+
+ return RECURSE_DIR_CONTINUE;
+}
+
+static int acquire_credentials(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *unit,
+ const char *p,
+ uid_t uid,
+ gid_t gid,
+ bool ownership_ok) {
+
+ uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
+ _cleanup_close_ int dfd = -EBADF;
+ const char *ic;
+ ExecLoadCredential *lc;
+ ExecSetCredential *sc;
+ int r;
+
+ assert(context);
+ assert(p);
+
+ dfd = open(p, O_DIRECTORY|O_CLOEXEC);
+ if (dfd < 0)
+ return -errno;
+
+ r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
+ if (r < 0)
+ return r;
+
+ /* First, load credentials off disk (or acquire via AF_UNIX socket) */
+ HASHMAP_FOREACH(lc, context->load_credentials) {
+ _cleanup_close_ int sub_fd = -EBADF;
+
+ /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
+ * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
+ * a regular file. Finally, if it's a relative path we will use it as a credential name to
+ * propagate a credential passed to us from further up. */
+
+ if (path_is_absolute(lc->path)) {
+ sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
+ if (sub_fd < 0 && !IN_SET(errno,
+ ENOTDIR, /* Not a directory */
+ ENOENT)) /* Doesn't exist? */
+ return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
+ }
+
+ if (sub_fd < 0)
+ /* Regular file (incl. a credential passed in from higher up) */
+ r = load_credential(
+ context,
+ params,
+ lc->id,
+ lc->path,
+ lc->encrypted,
+ unit,
+ AT_FDCWD,
+ dfd,
+ uid,
+ gid,
+ ownership_ok,
+ &left);
+ else
+ /* Directory */
+ r = recurse_dir(
+ sub_fd,
+ /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
+ /* statx_mask= */ 0,
+ /* n_depth_max= */ UINT_MAX,
+ RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
+ load_cred_recurse_dir_cb,
+ &(struct load_cred_args) {
+ .context = context,
+ .params = params,
+ .encrypted = lc->encrypted,
+ .unit = unit,
+ .dfd = dfd,
+ .uid = uid,
+ .gid = gid,
+ .ownership_ok = ownership_ok,
+ .left = &left,
+ });
+ if (r < 0)
+ return r;
+ }
+
+ /* Next, look for system credentials and credentials in the credentials store. Note that these do not
+ * override any credentials found earlier. */
+ SET_FOREACH(ic, context->import_credentials) {
+ _cleanup_free_ char **search_path = NULL;
+
+ search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
+ if (!search_path)
+ return -ENOMEM;
+
+ r = load_credential_glob(
+ ic,
+ /* encrypted = */ false,
+ search_path,
+ READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
+ dfd,
+ uid,
+ gid,
+ ownership_ok,
+ &left);
+ if (r < 0)
+ return r;
+
+ search_path = strv_free(search_path);
+ search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
+ if (!search_path)
+ return -ENOMEM;
+
+ r = load_credential_glob(
+ ic,
+ /* encrypted = */ true,
+ search_path,
+ READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
+ dfd,
+ uid,
+ gid,
+ ownership_ok,
+ &left);
+ if (r < 0)
+ return r;
+ }
+
+ /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
+ * add them, so that they can act as a "default" if the same credential is specified multiple times. */
+ HASHMAP_FOREACH(sc, context->set_credentials) {
+ _cleanup_(erase_and_freep) void *plaintext = NULL;
+ const char *data;
+ size_t size, add;
+
+ /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
+ * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
+ * slow and involved, hence it's nice to be able to skip that if the credential already
+ * exists anyway. */
+ if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
+ continue;
+ if (errno != ENOENT)
+ return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
+
+ if (sc->encrypted) {
+ r = decrypt_credential_and_warn(sc->id, now(CLOCK_REALTIME), NULL, NULL, sc->data, sc->size, &plaintext, &size);
+ if (r < 0)
+ return r;
+
+ data = plaintext;
+ } else {
+ data = sc->data;
+ size = sc->size;
+ }
+
+ add = strlen(sc->id) + size;
+ if (add > left)
+ return -E2BIG;
+
+ r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
+ if (r < 0)
+ return r;
+
+ left -= add;
+ }
+
+ r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
+ if (r < 0)
+ return r;
+
+ /* After we created all keys with the right perms, also make sure the credential store as a whole is
+ * accessible */
+
+ if (uid_is_valid(uid) && uid != getuid()) {
+ r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
+ if (r < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
+ return r;
+
+ if (!ownership_ok)
+ return r;
+
+ if (fchown(dfd, uid, gid) < 0)
+ return -errno;
+ }
+ }
+
+ return 0;
+}
+
+static int setup_credentials_internal(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *unit,
+ const char *final, /* This is where the credential store shall eventually end up at */
+ const char *workspace, /* This is where we can prepare it before moving it to the final place */
+ bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
+ bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
+ uid_t uid,
+ gid_t gid) {
+
+ int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
+ * if we mounted something; false if we definitely can't mount anything */
+ bool final_mounted;
+ const char *where;
+
+ assert(context);
+ assert(final);
+ assert(workspace);
+
+ if (reuse_workspace) {
+ r = path_is_mount_point(workspace, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse
+ * it, let's keep this in mind */
+ else
+ workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
+ } else
+ workspace_mounted = -1; /* ditto */
+
+ r = path_is_mount_point(final, NULL, 0);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ /* If the final place already has something mounted, we use that. If the workspace also has
+ * something mounted we assume it's actually the same mount (but with MS_RDONLY
+ * different). */
+ final_mounted = true;
+
+ if (workspace_mounted < 0) {
+ /* If the final place is mounted, but the workspace isn't, then let's bind mount
+ * the final version to the workspace, and make it writable, so that we can make
+ * changes */
+
+ r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
+ if (r < 0)
+ return r;
+
+ workspace_mounted = true;
+ }
+ } else
+ final_mounted = false;
+
+ if (workspace_mounted < 0) {
+ /* Nothing is mounted on the workspace yet, let's try to mount something now */
+
+ r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
+ if (r < 0) {
+ /* If that didn't work, try to make a bind mount from the final to the workspace, so
+ * that we can make it writable there. */
+ r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0) {
+ if (!ERRNO_IS_PRIVILEGE(r))
+ /* Propagate anything that isn't a permission problem. */
+ return r;
+
+ if (must_mount)
+ /* If it's not OK to use the plain directory fallback, propagate all
+ * errors too. */
+ return r;
+
+ /* If we lack privileges to bind mount stuff, then let's gracefully proceed
+ * for compat with container envs, and just use the final dir as is. */
+
+ workspace_mounted = false;
+ } else {
+ /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
+ if (r < 0)
+ return r;
+
+ workspace_mounted = true;
+ }
+ } else
+ workspace_mounted = true;
+ }
+
+ assert(!must_mount || workspace_mounted > 0);
+ where = workspace_mounted ? workspace : final;
+
+ (void) label_fix_full(AT_FDCWD, where, final, 0);
+
+ r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
+ if (r < 0)
+ return r;
+
+ if (workspace_mounted) {
+ bool install;
+
+ /* Determine if we should actually install the prepared mount in the final location by bind
+ * mounting it there. We do so only if the mount is not established there already, and if the
+ * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
+ * case we are doing all this in a mount namespace, thus no one else will see that we
+ * allocated a file system we are getting rid of again here. */
+ if (final_mounted)
+ install = false; /* already installed */
+ else {
+ r = dir_is_empty(where, /* ignore_hidden_or_backup= */ false);
+ if (r < 0)
+ return r;
+
+ install = r == 0; /* install only if non-empty */
+ }
+
+ if (install) {
+ /* Make workspace read-only now, so that any bind mount we make from it defaults to
+ * read-only too */
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
+ if (r < 0)
+ return r;
+
+ /* And mount it to the final place, read-only */
+ r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
+ } else
+ /* Otherwise get rid of it */
+ r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
+ if (r < 0)
+ return r;
+ } else {
+ _cleanup_free_ char *parent = NULL;
+
+ /* If we do not have our own mount put used the plain directory fallback, then we need to
+ * open access to the top-level credential directory and the per-service directory now */
+
+ r = path_extract_directory(final, &parent);
+ if (r < 0)
+ return r;
+ if (chmod(parent, 0755) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+int setup_credentials(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *unit,
+ uid_t uid,
+ gid_t gid) {
+
+ _cleanup_free_ char *p = NULL, *q = NULL;
+ int r;
+
+ assert(context);
+ assert(params);
+
+ if (!exec_context_has_credentials(context))
+ return 0;
+
+ if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
+ return -EINVAL;
+
+ /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
+ * and the subdir we mount over with a read-only file system readable by the service's user */
+ q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
+ if (!q)
+ return -ENOMEM;
+
+ r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ p = path_join(q, unit);
+ if (!p)
+ return -ENOMEM;
+
+ r = mkdir_label(p, 0700); /* per-unit dir: private to user */
+ if (r < 0 && r != -EEXIST)
+ return r;
+
+ r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
+ if (r < 0) {
+ _cleanup_free_ char *t = NULL, *u = NULL;
+
+ /* If this is not a privilege or support issue then propagate the error */
+ if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
+ return r;
+
+ /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
+ * it into place, so that users can't access half-initialized credential stores. */
+ t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
+ if (!t)
+ return -ENOMEM;
+
+ /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
+ * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
+ * after it is fully set up */
+ u = path_join(t, unit);
+ if (!u)
+ return -ENOMEM;
+
+ FOREACH_STRING(i, t, u) {
+ r = mkdir_label(i, 0700);
+ if (r < 0 && r != -EEXIST)
+ return r;
+ }
+
+ r = setup_credentials_internal(
+ context,
+ params,
+ unit,
+ p, /* final mount point */
+ u, /* temporary workspace to overmount */
+ true, /* reuse the workspace if it is already a mount */
+ false, /* it's OK to fall back to a plain directory if we can't mount anything */
+ uid,
+ gid);
+
+ (void) rmdir(u); /* remove the workspace again if we can. */
+
+ if (r < 0)
+ return r;
+
+ } else if (r == 0) {
+
+ /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
+ * we can use the same directory for all cases, after turning off propagation. Question
+ * though is: where do we turn off propagation exactly, and where do we place the workspace
+ * directory? We need some place that is guaranteed to be a mount point in the host, and
+ * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
+ * since we ultimately want to move the resulting file system there, i.e. we need propagation
+ * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
+ * would be visible in the host mount table all the time, which we want to avoid. Hence, what
+ * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
+ * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
+ * propagation on the former, and then overmount the latter.
+ *
+ * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
+ * for this purpose, but there are few other candidates that work equally well for us, and
+ * given that we do this in a privately namespaced short-lived single-threaded process that
+ * no one else sees this should be OK to do. */
+
+ /* Turn off propagation from our namespace to host */
+ r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL);
+ if (r < 0)
+ goto child_fail;
+
+ r = setup_credentials_internal(
+ context,
+ params,
+ unit,
+ p, /* final mount point */
+ "/dev/shm", /* temporary workspace to overmount */
+ false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
+ true, /* insist that something is mounted, do not allow fallback to plain directory */
+ uid,
+ gid);
+ if (r < 0)
+ goto child_fail;
+
+ _exit(EXIT_SUCCESS);
+
+ child_fail:
+ _exit(EXIT_FAILURE);
+ }
+
+ /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
+ * try to remove it. This matters in particular if we created the dir as mount point but then didn't
+ * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
+ * seen by users when trying access this inode. */
+ (void) rmdir(p);
+ return 0;
+}
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "hash-funcs.h"
+
+typedef struct ExecContext ExecContext;
+typedef struct ExecParameters ExecParameters;
+
+/* A credential configured with LoadCredential= */
+typedef struct ExecLoadCredential {
+ char *id, *path;
+ bool encrypted;
+} ExecLoadCredential;
+
+/* A credential configured with SetCredential= */
+typedef struct ExecSetCredential {
+ char *id;
+ bool encrypted;
+ void *data;
+ size_t size;
+} ExecSetCredential;
+
+ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc);
+DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSetCredential*, exec_set_credential_free);
+
+ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc);
+DEFINE_TRIVIAL_CLEANUP_FUNC(ExecLoadCredential*, exec_load_credential_free);
+
+extern const struct hash_ops exec_set_credential_hash_ops;
+extern const struct hash_ops exec_load_credential_hash_ops;
+
+bool exec_context_has_encrypted_credentials(ExecContext *c);
+bool exec_context_has_credentials(const ExecContext *c);
+
+int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_root, const char *unit);
+int setup_credentials(
+ const ExecContext *context,
+ const ExecParameters *params,
+ const char *unit,
+ uid_t uid,
+ gid_t gid);
#include "cap-list.h"
#include "capability-util.h"
#include "cpu-set-util.h"
+#include "credential.h"
#include "creds-util.h"
#include "dbus-execute.h"
#include "dbus-util.h"
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <sys/mount.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/shm.h>
#include "sd-messages.h"
-#include "acl-util.h"
#include "af-list.h"
#include "alloc-util.h"
#if HAVE_APPARMOR
#include "chown-recursive.h"
#include "constants.h"
#include "cpu-set-util.h"
-#include "creds-util.h"
+#include "credential.h"
#include "data-fd-util.h"
#include "env-file.h"
#include "env-util.h"
#include "execute.h"
#include "exit-status.h"
#include "fd-util.h"
-#include "fileio.h"
#include "format-util.h"
#include "glob-util.h"
#include "hexdecoct.h"
#include "io-util.h"
#include "ioprio-util.h"
-#include "label-util.h"
#include "lock-util.h"
#include "log.h"
#include "macro.h"
#include "missing_ioprio.h"
#include "missing_prctl.h"
#include "mkdir-label.h"
-#include "mount-util.h"
-#include "mountpoint-util.h"
#include "namespace.h"
#include "parse-util.h"
#include "path-util.h"
#include "proc-cmdline.h"
#include "process-util.h"
#include "psi-util.h"
-#include "random-util.h"
-#include "recurse-dir.h"
#include "rlimit-util.h"
#include "rm-rf.h"
#include "seccomp-util.h"
context_has_syscall_logs(c);
}
-bool exec_context_has_credentials(const ExecContext *context) {
-
- assert(context);
-
- return !hashmap_isempty(context->set_credentials) ||
- !hashmap_isempty(context->load_credentials) ||
- !set_isempty(context->import_credentials);
-}
-
#if HAVE_SECCOMP
static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
return r;
}
-static int write_credential(
- int dfd,
- const char *id,
- const void *data,
- size_t size,
- uid_t uid,
- gid_t gid,
- bool ownership_ok) {
-
- _cleanup_(unlink_and_freep) char *tmp = NULL;
- _cleanup_close_ int fd = -EBADF;
- int r;
-
- r = tempfn_random_child("", "cred", &tmp);
- if (r < 0)
- return r;
-
- fd = openat(dfd, tmp, O_CREAT|O_RDWR|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_NOCTTY, 0600);
- if (fd < 0) {
- tmp = mfree(tmp);
- return -errno;
- }
-
- r = loop_write(fd, data, size, /* do_poll = */ false);
- if (r < 0)
- return r;
-
- if (fchmod(fd, 0400) < 0) /* Take away "w" bit */
- return -errno;
-
- if (uid_is_valid(uid) && uid != getuid()) {
- r = fd_add_uid_acl_permission(fd, uid, ACL_READ);
- if (r < 0) {
- if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
- return r;
-
- if (!ownership_ok) /* Ideally we use ACLs, since we can neatly express what we want
- * to express: that the user gets read access and nothing
- * else. But if the backing fs can't support that (e.g. ramfs)
- * then we can use file ownership instead. But that's only safe if
- * we can then re-mount the whole thing read-only, so that the
- * user can no longer chmod() the file to gain write access. */
- return r;
-
- if (fchown(fd, uid, gid) < 0)
- return -errno;
- }
- }
-
- if (renameat(dfd, tmp, dfd, id) < 0)
- return -errno;
-
- tmp = mfree(tmp);
- return 0;
-}
-
-typedef enum CredentialSearchPath {
- CREDENTIAL_SEARCH_PATH_TRUSTED,
- CREDENTIAL_SEARCH_PATH_ENCRYPTED,
- CREDENTIAL_SEARCH_PATH_ALL,
- _CREDENTIAL_SEARCH_PATH_MAX,
- _CREDENTIAL_SEARCH_PATH_INVALID = -EINVAL,
-} CredentialSearchPath;
-
-static char **credential_search_path(const ExecParameters *params, CredentialSearchPath path) {
-
- _cleanup_strv_free_ char **l = NULL;
-
- assert(params);
- assert(path >= 0 && path < _CREDENTIAL_SEARCH_PATH_MAX);
-
- /* Assemble a search path to find credentials in. For non-encrypted credentials, We'll look in
- * /etc/credstore/ (and similar directories in /usr/lib/ + /run/). If we're looking for encrypted
- * credentials, we'll look in /etc/credstore.encrypted/ (and similar dirs). */
-
- if (IN_SET(path, CREDENTIAL_SEARCH_PATH_ENCRYPTED, CREDENTIAL_SEARCH_PATH_ALL)) {
- if (strv_extend(&l, params->received_encrypted_credentials_directory) < 0)
- return NULL;
-
- if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore.encrypted"), /* filter_duplicates= */ true) < 0)
- return NULL;
- }
-
- if (IN_SET(path, CREDENTIAL_SEARCH_PATH_TRUSTED, CREDENTIAL_SEARCH_PATH_ALL)) {
- if (params->received_credentials_directory)
- if (strv_extend(&l, params->received_credentials_directory) < 0)
- return NULL;
-
- if (strv_extend_strv(&l, CONF_PATHS_STRV("credstore"), /* filter_duplicates= */ true) < 0)
- return NULL;
- }
-
- if (DEBUG_LOGGING) {
- _cleanup_free_ char *t = strv_join(l, ":");
-
- log_debug("Credential search path is: %s", strempty(t));
- }
-
- return TAKE_PTR(l);
-}
-
-static int maybe_decrypt_and_write_credential(
- int dir_fd,
- const char *id,
- bool encrypted,
- uid_t uid,
- gid_t gid,
- bool ownership_ok,
- const char *data,
- size_t size,
- uint64_t *left) {
-
- _cleanup_free_ void *plaintext = NULL;
- size_t add;
- int r;
-
- if (encrypted) {
- size_t plaintext_size = 0;
-
- r = decrypt_credential_and_warn(id, now(CLOCK_REALTIME), NULL, NULL, data, size,
- &plaintext, &plaintext_size);
- if (r < 0)
- return r;
-
- data = plaintext;
- size = plaintext_size;
- }
-
- add = strlen(id) + size;
- if (add > *left)
- return -E2BIG;
-
- r = write_credential(dir_fd, id, data, size, uid, gid, ownership_ok);
- if (r < 0)
- return log_debug_errno(r, "Failed to write credential '%s': %m", id);
-
- *left -= add;
- return 0;
-}
-
-static int load_credential_glob(
- const char *path,
- bool encrypted,
- char **search_path,
- ReadFullFileFlags flags,
- int write_dfd,
- uid_t uid,
- gid_t gid,
- bool ownership_ok,
- uint64_t *left) {
-
- int r;
-
- STRV_FOREACH(d, search_path) {
- _cleanup_globfree_ glob_t pglob = {};
- _cleanup_free_ char *j = NULL;
-
- j = path_join(*d, path);
- if (!j)
- return -ENOMEM;
-
- r = safe_glob(j, 0, &pglob);
- if (r == -ENOENT)
- continue;
- if (r < 0)
- return r;
-
- for (size_t n = 0; n < pglob.gl_pathc; n++) {
- _cleanup_free_ char *fn = NULL;
- _cleanup_(erase_and_freep) char *data = NULL;
- size_t size;
-
- /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
- r = read_full_file_full(
- AT_FDCWD,
- pglob.gl_pathv[n],
- UINT64_MAX,
- encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX,
- flags,
- NULL,
- &data, &size);
- if (r < 0)
- return log_debug_errno(r, "Failed to read credential '%s': %m",
- pglob.gl_pathv[n]);
-
- r = path_extract_filename(pglob.gl_pathv[n], &fn);
- if (r < 0)
- return log_debug_errno(r, "Failed to extract filename from '%s': %m",
- pglob.gl_pathv[n]);
-
- r = maybe_decrypt_and_write_credential(
- write_dfd,
- fn,
- encrypted,
- uid,
- gid,
- ownership_ok,
- data, size,
- left);
- if (r == -EEXIST)
- continue;
- if (r < 0)
- return r;
- }
- }
-
- return 0;
-}
-
-static int load_credential(
- const ExecContext *context,
- const ExecParameters *params,
- const char *id,
- const char *path,
- bool encrypted,
- const char *unit,
- int read_dfd,
- int write_dfd,
- uid_t uid,
- gid_t gid,
- bool ownership_ok,
- uint64_t *left) {
-
- ReadFullFileFlags flags = READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER;
- _cleanup_strv_free_ char **search_path = NULL;
- _cleanup_(erase_and_freep) char *data = NULL;
- _cleanup_free_ char *bindname = NULL;
- const char *source = NULL;
- bool missing_ok = true;
- size_t size, maxsz;
- int r;
-
- assert(context);
- assert(params);
- assert(id);
- assert(path);
- assert(unit);
- assert(read_dfd >= 0 || read_dfd == AT_FDCWD);
- assert(write_dfd >= 0);
- assert(left);
-
- if (read_dfd >= 0) {
- /* If a directory fd is specified, then read the file directly from that dir. In this case we
- * won't do AF_UNIX stuff (we simply don't want to recursively iterate down a tree of AF_UNIX
- * IPC sockets). It's OK if a file vanishes here in the time we enumerate it and intend to
- * open it. */
-
- if (!filename_is_valid(path)) /* safety check */
- return -EINVAL;
-
- missing_ok = true;
- source = path;
-
- } else if (path_is_absolute(path)) {
- /* If this is an absolute path, read the data directly from it, and support AF_UNIX
- * sockets */
-
- if (!path_is_valid(path)) /* safety check */
- return -EINVAL;
-
- flags |= READ_FULL_FILE_CONNECT_SOCKET;
-
- /* Pass some minimal info about the unit and the credential name we are looking to acquire
- * via the source socket address in case we read off an AF_UNIX socket. */
- if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, id) < 0)
- return -ENOMEM;
-
- missing_ok = false;
- source = path;
-
- } else if (credential_name_valid(path)) {
- /* If this is a relative path, take it as credential name relative to the credentials
- * directory we received ourselves. We don't support the AF_UNIX stuff in this mode, since we
- * are operating on a credential store, i.e. this is guaranteed to be regular files. */
-
- search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ALL);
- if (!search_path)
- return -ENOMEM;
-
- missing_ok = true;
- } else
- source = NULL;
-
- if (encrypted)
- flags |= READ_FULL_FILE_UNBASE64;
-
- maxsz = encrypted ? CREDENTIAL_ENCRYPTED_SIZE_MAX : CREDENTIAL_SIZE_MAX;
-
- if (search_path) {
- STRV_FOREACH(d, search_path) {
- _cleanup_free_ char *j = NULL;
-
- j = path_join(*d, path);
- if (!j)
- return -ENOMEM;
-
- r = read_full_file_full(
- AT_FDCWD, j, /* path is absolute, hence pass AT_FDCWD as nop dir fd here */
- UINT64_MAX,
- maxsz,
- flags,
- NULL,
- &data, &size);
- if (r != -ENOENT)
- break;
- }
- } else if (source)
- r = read_full_file_full(
- read_dfd, source,
- UINT64_MAX,
- maxsz,
- flags,
- bindname,
- &data, &size);
- else
- r = -ENOENT;
-
- if (r == -ENOENT && (missing_ok || hashmap_contains(context->set_credentials, id))) {
- /* Make a missing inherited credential non-fatal, let's just continue. After all apps
- * will get clear errors if we don't pass such a missing credential on as they
- * themselves will get ENOENT when trying to read them, which should not be much
- * worse than when we handle the error here and make it fatal.
- *
- * Also, if the source file doesn't exist, but a fallback is set via SetCredentials=
- * we are fine, too. */
- log_debug_errno(r, "Couldn't read inherited credential '%s', skipping: %m", path);
- return 0;
- }
- if (r < 0)
- return log_debug_errno(r, "Failed to read credential '%s': %m", path);
-
- return maybe_decrypt_and_write_credential(write_dfd, id, encrypted, uid, gid, ownership_ok, data, size, left);
-}
-
-struct load_cred_args {
- const ExecContext *context;
- const ExecParameters *params;
- bool encrypted;
- const char *unit;
- int dfd;
- uid_t uid;
- gid_t gid;
- bool ownership_ok;
- uint64_t *left;
-};
-
-static int load_cred_recurse_dir_cb(
- RecurseDirEvent event,
- const char *path,
- int dir_fd,
- int inode_fd,
- const struct dirent *de,
- const struct statx *sx,
- void *userdata) {
-
- struct load_cred_args *args = ASSERT_PTR(userdata);
- _cleanup_free_ char *sub_id = NULL;
- int r;
-
- if (event != RECURSE_DIR_ENTRY)
- return RECURSE_DIR_CONTINUE;
-
- if (!IN_SET(de->d_type, DT_REG, DT_SOCK))
- return RECURSE_DIR_CONTINUE;
-
- sub_id = strreplace(path, "/", "_");
- if (!sub_id)
- return -ENOMEM;
-
- if (!credential_name_valid(sub_id))
- return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Credential would get ID %s, which is not valid, refusing", sub_id);
-
- if (faccessat(args->dfd, sub_id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) {
- log_debug("Skipping credential with duplicated ID %s at %s", sub_id, path);
- return RECURSE_DIR_CONTINUE;
- }
- if (errno != ENOENT)
- return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sub_id);
-
- r = load_credential(
- args->context,
- args->params,
- sub_id,
- de->d_name,
- args->encrypted,
- args->unit,
- dir_fd,
- args->dfd,
- args->uid,
- args->gid,
- args->ownership_ok,
- args->left);
- if (r < 0)
- return r;
-
- return RECURSE_DIR_CONTINUE;
-}
-
-static int acquire_credentials(
- const ExecContext *context,
- const ExecParameters *params,
- const char *unit,
- const char *p,
- uid_t uid,
- gid_t gid,
- bool ownership_ok) {
-
- uint64_t left = CREDENTIALS_TOTAL_SIZE_MAX;
- _cleanup_close_ int dfd = -EBADF;
- const char *ic;
- ExecLoadCredential *lc;
- ExecSetCredential *sc;
- int r;
-
- assert(context);
- assert(p);
-
- dfd = open(p, O_DIRECTORY|O_CLOEXEC);
- if (dfd < 0)
- return -errno;
-
- r = fd_acl_make_writable(dfd); /* Add the "w" bit, if we are reusing an already set up credentials dir where it was unset */
- if (r < 0)
- return r;
-
- /* First, load credentials off disk (or acquire via AF_UNIX socket) */
- HASHMAP_FOREACH(lc, context->load_credentials) {
- _cleanup_close_ int sub_fd = -EBADF;
-
- /* If this is an absolute path, then try to open it as a directory. If that works, then we'll
- * recurse into it. If it is an absolute path but it isn't a directory, then we'll open it as
- * a regular file. Finally, if it's a relative path we will use it as a credential name to
- * propagate a credential passed to us from further up. */
-
- if (path_is_absolute(lc->path)) {
- sub_fd = open(lc->path, O_DIRECTORY|O_CLOEXEC|O_RDONLY);
- if (sub_fd < 0 && !IN_SET(errno,
- ENOTDIR, /* Not a directory */
- ENOENT)) /* Doesn't exist? */
- return log_debug_errno(errno, "Failed to open '%s': %m", lc->path);
- }
-
- if (sub_fd < 0)
- /* Regular file (incl. a credential passed in from higher up) */
- r = load_credential(
- context,
- params,
- lc->id,
- lc->path,
- lc->encrypted,
- unit,
- AT_FDCWD,
- dfd,
- uid,
- gid,
- ownership_ok,
- &left);
- else
- /* Directory */
- r = recurse_dir(
- sub_fd,
- /* path= */ lc->id, /* recurse_dir() will suffix the subdir paths from here to the top-level id */
- /* statx_mask= */ 0,
- /* n_depth_max= */ UINT_MAX,
- RECURSE_DIR_SORT|RECURSE_DIR_IGNORE_DOT|RECURSE_DIR_ENSURE_TYPE,
- load_cred_recurse_dir_cb,
- &(struct load_cred_args) {
- .context = context,
- .params = params,
- .encrypted = lc->encrypted,
- .unit = unit,
- .dfd = dfd,
- .uid = uid,
- .gid = gid,
- .ownership_ok = ownership_ok,
- .left = &left,
- });
- if (r < 0)
- return r;
- }
-
- /* Next, look for system credentials and credentials in the credentials store. Note that these do not
- * override any credentials found earlier. */
- SET_FOREACH(ic, context->import_credentials) {
- _cleanup_free_ char **search_path = NULL;
-
- search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_TRUSTED);
- if (!search_path)
- return -ENOMEM;
-
- r = load_credential_glob(
- ic,
- /* encrypted = */ false,
- search_path,
- READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER,
- dfd,
- uid,
- gid,
- ownership_ok,
- &left);
- if (r < 0)
- return r;
-
- search_path = strv_free(search_path);
- search_path = credential_search_path(params, CREDENTIAL_SEARCH_PATH_ENCRYPTED);
- if (!search_path)
- return -ENOMEM;
-
- r = load_credential_glob(
- ic,
- /* encrypted = */ true,
- search_path,
- READ_FULL_FILE_SECURE|READ_FULL_FILE_FAIL_WHEN_LARGER|READ_FULL_FILE_UNBASE64,
- dfd,
- uid,
- gid,
- ownership_ok,
- &left);
- if (r < 0)
- return r;
- }
-
- /* Finally, we add in literally specified credentials. If the credentials already exist, we'll not
- * add them, so that they can act as a "default" if the same credential is specified multiple times. */
- HASHMAP_FOREACH(sc, context->set_credentials) {
- _cleanup_(erase_and_freep) void *plaintext = NULL;
- const char *data;
- size_t size, add;
-
- /* Note that we check ahead of time here instead of relying on O_EXCL|O_CREAT later to return
- * EEXIST if the credential already exists. That's because the TPM2-based decryption is kinda
- * slow and involved, hence it's nice to be able to skip that if the credential already
- * exists anyway. */
- if (faccessat(dfd, sc->id, F_OK, AT_SYMLINK_NOFOLLOW) >= 0)
- continue;
- if (errno != ENOENT)
- return log_debug_errno(errno, "Failed to test if credential %s exists: %m", sc->id);
-
- if (sc->encrypted) {
- r = decrypt_credential_and_warn(sc->id, now(CLOCK_REALTIME), NULL, NULL, sc->data, sc->size, &plaintext, &size);
- if (r < 0)
- return r;
-
- data = plaintext;
- } else {
- data = sc->data;
- size = sc->size;
- }
-
- add = strlen(sc->id) + size;
- if (add > left)
- return -E2BIG;
-
- r = write_credential(dfd, sc->id, data, size, uid, gid, ownership_ok);
- if (r < 0)
- return r;
-
- left -= add;
- }
-
- r = fd_acl_make_read_only(dfd); /* Now take away the "w" bit */
- if (r < 0)
- return r;
-
- /* After we created all keys with the right perms, also make sure the credential store as a whole is
- * accessible */
-
- if (uid_is_valid(uid) && uid != getuid()) {
- r = fd_add_uid_acl_permission(dfd, uid, ACL_READ | ACL_EXECUTE);
- if (r < 0) {
- if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
- return r;
-
- if (!ownership_ok)
- return r;
-
- if (fchown(dfd, uid, gid) < 0)
- return -errno;
- }
- }
-
- return 0;
-}
-
-static int setup_credentials_internal(
- const ExecContext *context,
- const ExecParameters *params,
- const char *unit,
- const char *final, /* This is where the credential store shall eventually end up at */
- const char *workspace, /* This is where we can prepare it before moving it to the final place */
- bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
- bool must_mount, /* Whether to require that we mount something, it's not OK to use the plain directory fall back */
- uid_t uid,
- gid_t gid) {
-
- int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
- * if we mounted something; false if we definitely can't mount anything */
- bool final_mounted;
- const char *where;
-
- assert(context);
- assert(final);
- assert(workspace);
-
- if (reuse_workspace) {
- r = path_is_mount_point(workspace, NULL, 0);
- if (r < 0)
- return r;
- if (r > 0)
- workspace_mounted = true; /* If this is already a mount, and we are supposed to reuse it, let's keep this in mind */
- else
- workspace_mounted = -1; /* We need to figure out if we can mount something to the workspace */
- } else
- workspace_mounted = -1; /* ditto */
-
- r = path_is_mount_point(final, NULL, 0);
- if (r < 0)
- return r;
- if (r > 0) {
- /* If the final place already has something mounted, we use that. If the workspace also has
- * something mounted we assume it's actually the same mount (but with MS_RDONLY
- * different). */
- final_mounted = true;
-
- if (workspace_mounted < 0) {
- /* If the final place is mounted, but the workspace isn't, then let's bind mount
- * the final version to the workspace, and make it writable, so that we can make
- * changes */
-
- r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
- if (r < 0)
- return r;
-
- r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
- if (r < 0)
- return r;
-
- workspace_mounted = true;
- }
- } else
- final_mounted = false;
-
- if (workspace_mounted < 0) {
- /* Nothing is mounted on the workspace yet, let's try to mount something now */
-
- r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
- if (r < 0) {
- /* If that didn't work, try to make a bind mount from the final to the workspace, so that we can make it writable there. */
- r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
- if (r < 0) {
- if (!ERRNO_IS_PRIVILEGE(r)) /* Propagate anything that isn't a permission problem */
- return r;
-
- if (must_mount) /* If we it's not OK to use the plain directory
- * fallback, propagate all errors too */
- return r;
-
- /* If we lack privileges to bind mount stuff, then let's gracefully
- * proceed for compat with container envs, and just use the final dir
- * as is. */
-
- workspace_mounted = false;
- } else {
- /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
- r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
- if (r < 0)
- return r;
-
- workspace_mounted = true;
- }
- } else
- workspace_mounted = true;
- }
-
- assert(!must_mount || workspace_mounted > 0);
- where = workspace_mounted ? workspace : final;
-
- (void) label_fix_full(AT_FDCWD, where, final, 0);
-
- r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
- if (r < 0)
- return r;
-
- if (workspace_mounted) {
- bool install;
-
- /* Determine if we should actually install the prepared mount in the final location by bind
- * mounting it there. We do so only if the mount is not established there already, and if the
- * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
- * case we are doing all this in a mount namespace, thus no one else will see that we
- * allocated a file system we are getting rid of again here. */
- if (final_mounted)
- install = false; /* already installed */
- else {
- r = dir_is_empty(where, /* ignore_hidden_or_backup= */ false);
- if (r < 0)
- return r;
-
- install = r == 0; /* install only if non-empty */
- }
-
- if (install) {
- /* Make workspace read-only now, so that any bind mount we make from it defaults to read-only too */
- r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
- if (r < 0)
- return r;
-
- /* And mount it to the final place, read-only */
- r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
- } else
- /* Otherwise get rid of it */
- r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
- if (r < 0)
- return r;
- } else {
- _cleanup_free_ char *parent = NULL;
-
- /* If we do not have our own mount put used the plain directory fallback, then we need to
- * open access to the top-level credential directory and the per-service directory now */
-
- r = path_extract_directory(final, &parent);
- if (r < 0)
- return r;
- if (chmod(parent, 0755) < 0)
- return -errno;
- }
-
- return 0;
-}
-
-static int setup_credentials(
- const ExecContext *context,
- const ExecParameters *params,
- const char *unit,
- uid_t uid,
- gid_t gid) {
-
- _cleanup_free_ char *p = NULL, *q = NULL;
- int r;
-
- assert(context);
- assert(params);
-
- if (!exec_context_has_credentials(context))
- return 0;
-
- if (!params->prefix[EXEC_DIRECTORY_RUNTIME])
- return -EINVAL;
-
- /* This where we'll place stuff when we are done; this main credentials directory is world-readable,
- * and the subdir we mount over with a read-only file system readable by the service's user */
- q = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials");
- if (!q)
- return -ENOMEM;
-
- r = mkdir_label(q, 0755); /* top-level dir: world readable/searchable */
- if (r < 0 && r != -EEXIST)
- return r;
-
- p = path_join(q, unit);
- if (!p)
- return -ENOMEM;
-
- r = mkdir_label(p, 0700); /* per-unit dir: private to user */
- if (r < 0 && r != -EEXIST)
- return r;
-
- r = safe_fork("(sd-mkdcreds)", FORK_DEATHSIG|FORK_WAIT|FORK_NEW_MOUNTNS, NULL);
- if (r < 0) {
- _cleanup_free_ char *t = NULL, *u = NULL;
-
- /* If this is not a privilege or support issue then propagate the error */
- if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
- return r;
-
- /* Temporary workspace, that remains inaccessible all the time. We prepare stuff there before moving
- * it into place, so that users can't access half-initialized credential stores. */
- t = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "systemd/temporary-credentials");
- if (!t)
- return -ENOMEM;
-
- /* We can't set up a mount namespace. In that case operate on a fixed, inaccessible per-unit
- * directory outside of /run/credentials/ first, and then move it over to /run/credentials/
- * after it is fully set up */
- u = path_join(t, unit);
- if (!u)
- return -ENOMEM;
-
- FOREACH_STRING(i, t, u) {
- r = mkdir_label(i, 0700);
- if (r < 0 && r != -EEXIST)
- return r;
- }
-
- r = setup_credentials_internal(
- context,
- params,
- unit,
- p, /* final mount point */
- u, /* temporary workspace to overmount */
- true, /* reuse the workspace if it is already a mount */
- false, /* it's OK to fall back to a plain directory if we can't mount anything */
- uid,
- gid);
-
- (void) rmdir(u); /* remove the workspace again if we can. */
-
- if (r < 0)
- return r;
-
- } else if (r == 0) {
-
- /* We managed to set up a mount namespace, and are now in a child. That's great. In this case
- * we can use the same directory for all cases, after turning off propagation. Question
- * though is: where do we turn off propagation exactly, and where do we place the workspace
- * directory? We need some place that is guaranteed to be a mount point in the host, and
- * which is guaranteed to have a subdir we can mount over. /run/ is not suitable for this,
- * since we ultimately want to move the resulting file system there, i.e. we need propagation
- * for /run/ eventually. We could use our own /run/systemd/bind mount on itself, but that
- * would be visible in the host mount table all the time, which we want to avoid. Hence, what
- * we do here instead we use /dev/ and /dev/shm/ for our purposes. We know for sure that
- * /dev/ is a mount point and we now for sure that /dev/shm/ exists. Hence we can turn off
- * propagation on the former, and then overmount the latter.
- *
- * Yes it's nasty playing games with /dev/ and /dev/shm/ like this, since it does not exist
- * for this purpose, but there are few other candidates that work equally well for us, and
- * given that the we do this in a privately namespaced short-lived single-threaded process
- * that no one else sees this should be OK to do. */
-
- r = mount_nofollow_verbose(LOG_DEBUG, NULL, "/dev", NULL, MS_SLAVE|MS_REC, NULL); /* Turn off propagation from our namespace to host */
- if (r < 0)
- goto child_fail;
-
- r = setup_credentials_internal(
- context,
- params,
- unit,
- p, /* final mount point */
- "/dev/shm", /* temporary workspace to overmount */
- false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
- true, /* insist that something is mounted, do not allow fallback to plain directory */
- uid,
- gid);
- if (r < 0)
- goto child_fail;
-
- _exit(EXIT_SUCCESS);
-
- child_fail:
- _exit(EXIT_FAILURE);
- }
-
- /* If the credentials dir is empty and not a mount point, then there's no point in having it. Let's
- * try to remove it. This matters in particular if we created the dir as mount point but then didn't
- * actually end up mounting anything on it. In that case we'd rather have ENOENT than EACCESS being
- * seen by users when trying access this inode. */
- (void) rmdir(p);
- return 0;
-}
-
#if ENABLE_SMACK
static int setup_smack(
const Manager *manager,
return 0;
}
-int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_prefix, const char *unit) {
- _cleanup_free_ char *p = NULL;
-
- assert(c);
-
- if (!runtime_prefix || !unit)
- return 0;
-
- p = path_join(runtime_prefix, "credentials", unit);
- if (!p)
- return -ENOMEM;
-
- /* This is either a tmpfs/ramfs of its own, or a plain directory. Either way, let's first try to
- * unmount it, and afterwards remove the mount point */
- (void) umount2(p, MNT_DETACH|UMOUNT_NOFOLLOW);
- (void) rm_rf(p, REMOVE_ROOT|REMOVE_CHMOD);
-
- return 0;
-}
-
int exec_context_destroy_mount_ns_dir(Unit *u) {
_cleanup_free_ char *p = NULL;
return 0;
}
-bool exec_context_has_encrypted_credentials(ExecContext *c) {
- ExecLoadCredential *load_cred;
- ExecSetCredential *set_cred;
-
- assert(c);
-
- HASHMAP_FOREACH(load_cred, c->load_credentials)
- if (load_cred->encrypted)
- return true;
-
- HASHMAP_FOREACH(set_cred, c->set_credentials)
- if (set_cred->encrypted)
- return true;
-
- return false;
-}
-
void exec_status_start(ExecStatus *s, pid_t pid) {
assert(s);
p->exec_fd = safe_close(p->exec_fd);
}
-ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc) {
- if (!sc)
- return NULL;
-
- free(sc->id);
- free(sc->data);
- return mfree(sc);
-}
-
-ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc) {
- if (!lc)
- return NULL;
-
- free(lc->id);
- free(lc->path);
- return mfree(lc);
-}
-
void exec_directory_done(ExecDirectory *d) {
if (!d)
return;
return 1U << t;
}
-DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(exec_set_credential_hash_ops, char, string_hash_func, string_compare_func, ExecSetCredential, exec_set_credential_free);
-DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(exec_load_credential_hash_ops, char, string_hash_func, string_compare_func, ExecLoadCredential, exec_load_credential_free);
-
static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
[EXEC_INPUT_NULL] = "null",
[EXEC_INPUT_TTY] = "tty",
_EXEC_CLEAN_MASK_INVALID = -EINVAL,
} ExecCleanMask;
-/* A credential configured with LoadCredential= */
-typedef struct ExecLoadCredential {
- char *id, *path;
- bool encrypted;
-} ExecLoadCredential;
-
-/* A credential configured with SetCredential= */
-typedef struct ExecSetCredential {
- char *id;
- bool encrypted;
- void *data;
- size_t size;
-} ExecSetCredential;
-
/* Encodes configuration parameters applied to invoked commands. Does not carry runtime data, but only configuration
* changes sourced from unit files and suchlike. ExecContext objects are usually embedded into Unit objects, and do not
* change after being loaded. */
void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix);
int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_root);
-int exec_context_destroy_credentials(const ExecContext *c, const char *runtime_root, const char *unit);
int exec_context_destroy_mount_ns_dir(Unit *u);
const char* exec_context_fdname(const ExecContext *c, int fd_index);
bool exec_context_may_touch_console(const ExecContext *c);
bool exec_context_maintains_privileges(const ExecContext *c);
-bool exec_context_has_encrypted_credentials(ExecContext *c);
-bool exec_context_has_credentials(const ExecContext *context);
int exec_context_get_effective_ioprio(const ExecContext *c);
bool exec_context_get_effective_mount_apivfs(const ExecContext *c);
bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c);
-ExecSetCredential *exec_set_credential_free(ExecSetCredential *sc);
-DEFINE_TRIVIAL_CLEANUP_FUNC(ExecSetCredential*, exec_set_credential_free);
-
-ExecLoadCredential *exec_load_credential_free(ExecLoadCredential *lc);
-DEFINE_TRIVIAL_CLEANUP_FUNC(ExecLoadCredential*, exec_load_credential_free);
-
void exec_directory_done(ExecDirectory *d);
int exec_directory_add(ExecDirectory *d, const char *path, const char *symlink);
void exec_directory_sort(ExecDirectory *d);
ExecCleanMask exec_clean_mask_from_string(const char *s);
-extern const struct hash_ops exec_set_credential_hash_ops;
-extern const struct hash_ops exec_load_credential_hash_ops;
-
const char* exec_output_to_string(ExecOutput i) _const_;
ExecOutput exec_output_from_string(const char *s) _pure_;
#include "conf-parser.h"
#include "core-varlink.h"
#include "cpu-set-util.h"
+#include "credential.h"
#include "creds-util.h"
#include "env-util.h"
#include "errno-list.h"
'bpf-socket-bind.c',
'cgroup.c',
'core-varlink.c',
+ 'credential.c',
'dbus-automount.c',
'dbus-cgroup.c',
'dbus-device.c',
#include "cgroup-util.h"
#include "chase.h"
#include "core-varlink.h"
+#include "credential.h"
#include "dbus-unit.h"
#include "dbus.h"
#include "dropin.h"