From: Mike Yuan Date: Sun, 14 Dec 2025 13:31:30 +0000 (+0100) Subject: core/exec-credential: introduce unit_refresh_credentials() X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=65ea71ead72a456d7ede35e38f812c44f6d6b3f0;p=thirdparty%2Fsystemd.git core/exec-credential: introduce unit_refresh_credentials() --- diff --git a/src/core/exec-credential.c b/src/core/exec-credential.c index 3766096a416..07d83ac7a6a 100644 --- a/src/core/exec-credential.c +++ b/src/core/exec-credential.c @@ -16,19 +16,25 @@ #include "iovec-util.h" #include "label-util.h" #include "log.h" +#include "manager.h" #include "mkdir-label.h" #include "mount-util.h" #include "mountpoint-util.h" +#include "namespace-util.h" #include "ordered-set.h" #include "path-lookup.h" #include "path-util.h" +#include "pidref.h" +#include "process-util.h" #include "random-util.h" #include "recurse-dir.h" #include "rm-rf.h" #include "siphash24.h" +#include "socket-util.h" #include "stat-util.h" #include "string-util.h" #include "strv.h" +#include "unit.h" #include "user-util.h" ExecSetCredential* exec_set_credential_free(ExecSetCredential *sc) { @@ -1158,3 +1164,187 @@ int exec_setup_credentials( return r; } + +static int refresh_credentials_in_namespace_child(int cfd, const char *cred_dir) { + int r; + + assert(cfd >= 0); + assert(cred_dir); + + /* Paranoia: before doing anything, check if the credentials tree inside the mountns is available. + * + * Note that setup_namespace() always installs a mount for cred dir, hence path_is_mount_point() + * is the appropriate check here. */ + r = path_is_mount_point(cred_dir); + if (IN_SET(r, 0, -ENOENT)) { + log_full_errno_zerook(LOG_WARNING, r, + "Credentials tree in the unit mount namespace is masked, skipping refresh."); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to check whether '%s' is a mountpoint in unit mount namespace: %m", + cred_dir); + + /* Inform the parent that we're good to go */ + ssize_t n = write(cfd, &r, sizeof(r)); + if (n < 0) + return log_error_errno(errno, "Failed to write to socket: %m"); + + _cleanup_close_ int mfd = receive_one_fd(cfd, /* flags = */ 0); + if (mfd < 0) + return log_error_errno(mfd, "Failed to receive credentials tree fd from socket: %m"); + + r = mount_exchange_graceful(mfd, cred_dir, /* mount_beneath = */ true); + if (r < 0) + return log_error_errno(r, "Failed to update credentials mount in namespace: %m"); + + return 1; +} + +int unit_refresh_credentials(Unit *u) { + _cleanup_free_ char *cred_dir = NULL; + int r; + + /* Refresh the credentials for a unit, potentially forking off a second process to join the mountns + * if needed. Returns > 0 on successful refresh, == 0 if the credentials tree is masked and the operation + * is skipped. */ + + assert(u); + assert(u->manager); + + r = get_credential_directory(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], u->id, &cred_dir); + if (r < 0) + return log_oom(); + assert(r > 0); + + if (access(cred_dir, F_OK) < 0) { + if (errno == ENOENT) { + log_warning_errno(errno, "Requested to refresh credentials, but credentials aren't populated, skipping."); + return 0; + } + + return log_error_errno(errno, "Failed to check if credentials dir '%s' exists: %m", cred_dir); + } + + _cleanup_close_pair_ int tunnel_fds[2] = EBADF_PAIR; + _cleanup_(pidref_done) PidRef child = PIDREF_NULL; + _cleanup_close_ int userns_fd = -EBADF; + + PidRef *main_pid = unit_main_pid(u); + if (pidref_is_set(main_pid)) { + _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF, pidns_fd = -EBADF; + + r = pidref_namespace_open(main_pid, + &pidns_fd, + &mntns_fd, + /* ret_netns_fd = */ NULL, + MANAGER_IS_USER(u->manager) ? &userns_fd : NULL, + &root_fd); + if (r < 0) + return log_error_errno(r, "Failed to open namespace of unit main process '" PID_FMT "': %m", + main_pid->pid); + + r = is_our_namespace(mntns_fd, NAMESPACE_MOUNT); + if (r < 0) + return log_error_errno(r, "Failed to check if main process resides in a separate mount namespace: %m"); + if (r == 0) { + if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, tunnel_fds) < 0) + return log_error_errno(errno, "Failed to allocate socket pair: %m"); + + r = namespace_fork_full("(sd-creds-ns)", "(sd-creds-ns-inner)", + (int[]) { tunnel_fds[1] }, 1, + FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG, + pidns_fd, mntns_fd, /* netns_fd = */ -EBADF, userns_fd, root_fd, + /* delegated = */ MANAGER_IS_USER(u->manager), + &child); + if (r < 0) + return log_full_errno(ERRNO_IS_NEG_PRIVILEGE(r) ? LOG_WARNING : LOG_ERR, r, + "Failed to fork off process into unit namespace to refresh credentials: %m"); + if (r == 0) { + r = refresh_credentials_in_namespace_child(tunnel_fds[1], cred_dir); + report_errno_and_exit(tunnel_fds[1], r); + } + + tunnel_fds[1] = safe_close(tunnel_fds[1]); + + /* Wait for the child to validate the creds tree in the unit namespace is populated. */ + ssize_t n = read(tunnel_fds[0], &r, sizeof(r)); + if (n < 0) + return log_error_errno(errno, "Failed to read from socket: %m"); + if (!IN_SET(n, 0, sizeof(r))) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Received unexpected amount of bytes (%zi) while reading errno.", n); + if (n == 0 || r == 0) { + /* The child exited without sending anything or 0 is received signifying + * the credentials are masked? Check the exit status to be sure. */ + r = pidref_wait_for_terminate_and_check("(sd-creds-ns)", &child, WAIT_LOG); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) + return -EPROTO; + + return 0; /* skipped */ + } + if (r < 0) + return r; + + /* Yay! Got > 0 from child indicating all good, proceed with refreshing. */ + } + } + + SetupCredentialsContext ctx = { + .scope = u->manager->runtime_scope, + .exec_context = ASSERT_PTR(unit_get_exec_context(u)), + .unit = u->id, + + .runtime_prefix = u->manager->prefix[EXEC_DIRECTORY_RUNTIME], + .received_credentials_directory = u->manager->received_credentials_directory, + .received_encrypted_credentials_directory = u->manager->received_encrypted_credentials_directory, + + .always_ipc = false, /* we don't migrate to unit cgroup, hence cannot be restricted by cgroup bpf */ + + .uid = u->ref_uid, + .gid = u->ref_gid, + }; + + r = setup_credentials_internal(&ctx, /* may_reuse = */ false, cred_dir); + if (r < 0) + return log_error_errno(r, "Failed to set up credentials: %m"); + + /* The main process doesn't run in a mountns hence nothing got forked off? Then we're all set. */ + if (!pidref_is_set(&child)) + return 1; + + if (userns_fd >= 0) { + assert(MANAGER_IS_USER(u->manager)); + + /* Enter the unit userns now and unshare mountns, so that we have permissions to clone + * the mount tree using open_tree() */ + + if (setns(userns_fd, CLONE_NEWUSER) < 0) + return log_error_errno(errno, "Failed to enter user namespace: %m"); + + if (unshare(CLONE_NEWNS) < 0) + return log_error_errno(errno, "Failed to unshare mount namespace: %m"); + } + + _cleanup_close_ int tfd = open_tree(AT_FDCWD, cred_dir, OPEN_TREE_CLONE|OPEN_TREE_CLOEXEC|AT_SYMLINK_NOFOLLOW); + if (tfd < 0) + return log_error_errno(errno, "Failed to clone mount tree at '%s': %m", cred_dir); + + r = send_one_fd(tunnel_fds[0], tfd, /* flags = */ 0); + if (r < 0) + return log_error_errno(r, "Failed to send mount fd to child: %m"); + + r = pidref_wait_for_terminate_and_check("(sd-creds-ns)", &child, WAIT_LOG_ABNORMAL); + if (r < 0) + return r; + if (r != EXIT_SUCCESS) { + r = read_errno(tunnel_fds[0]); + if (r < 0) + return r; + + return -EPROTO; + } + + return 1; +} diff --git a/src/core/exec-credential.h b/src/core/exec-credential.h index e7e60a9e024..89fd12e5bd5 100644 --- a/src/core/exec-credential.h +++ b/src/core/exec-credential.h @@ -60,4 +60,6 @@ int exec_setup_credentials( uid_t uid, gid_t gid); +int unit_refresh_credentials(Unit *u); + bool mount_point_is_credentials(const char *runtime_prefix, const char *path);