-/* SPDX-License-Identifier: LGPL-2.1+ */
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <errno.h>
#include <fcntl.h>
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
+#include "random-util.h"
#include "rlimit-util.h"
#include "rm-rf.h"
#if HAVE_SECCOMP
#include "terminal-util.h"
#include "tmpfile-util.h"
#include "umask-util.h"
-#include "unit.h"
+#include "unit-serialize.h"
#include "user-util.h"
#include "utmp-wtmp.h"
if (e == EXEC_OUTPUT_NAMED_FD)
return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
- if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
+ if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND, EXEC_OUTPUT_FILE_TRUNCATE))
return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
return true;
return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
case EXEC_OUTPUT_FILE:
- case EXEC_OUTPUT_FILE_APPEND: {
+ case EXEC_OUTPUT_FILE_APPEND:
+ case EXEC_OUTPUT_FILE_TRUNCATE: {
bool rw;
int fd, flags;
flags = O_WRONLY;
if (o == EXEC_OUTPUT_FILE_APPEND)
flags |= O_APPEND;
+ else if (o == EXEC_OUTPUT_FILE_TRUNCATE)
+ flags |= O_TRUNC;
fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
if (fd < 0)
* termination */
barrier_set_role(&barrier, BARRIER_CHILD);
- /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
- * are open here that have been opened by PAM. */
+ /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only
+ * those fds are open here that have been opened by PAM. */
(void) close_many(fds, n_fds);
/* Drop privileges - we don't need any to pam_close_session
assert(p);
assert(ret);
-#define N_ENV_VARS 16
+#define N_ENV_VARS 17
our_env = new0(char*, N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
if (!our_env)
return -ENOMEM;
our_env[n_env++] = x;
}
+ if (asprintf(&x, "SYSTEMD_EXEC_PID=" PID_FMT, getpid_cached()) < 0)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+
our_env[n_env++] = NULL;
assert(n_env <= N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
#undef N_ENV_VARS
return 0;
}
-static bool exec_needs_mount_namespace(
+bool exec_needs_mount_namespace(
const ExecContext *context,
const ExecParameters *params,
const ExecRuntime *runtime) {
assert(context);
- assert(params);
if (context->root_image)
return true;
if (!strv_isempty(context->read_write_paths) ||
!strv_isempty(context->read_only_paths) ||
- !strv_isempty(context->inaccessible_paths))
+ !strv_isempty(context->inaccessible_paths) ||
+ !strv_isempty(context->exec_paths) ||
+ !strv_isempty(context->no_exec_paths))
return true;
if (context->n_bind_mounts > 0)
return true;
for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
- if (!params->prefix[t])
+ if (params && !params->prefix[t])
continue;
if (!strv_isempty(context->directories[t].paths))
static int acquire_credentials(
const ExecContext *context,
const ExecParameters *params,
+ const char *unit,
const char *p,
uid_t uid,
bool ownership_ok) {
STRV_FOREACH_PAIR(id, fn, context->load_credentials) {
ReadFullFileFlags flags = READ_FULL_FILE_SECURE;
_cleanup_(erase_and_freep) char *data = NULL;
- _cleanup_free_ char *j = NULL;
+ _cleanup_free_ char *j = NULL, *bindname = NULL;
const char *source;
size_t size, add;
/* If this is an absolute path, read the data directly from it, and support AF_UNIX sockets */
source = *fn;
flags |= READ_FULL_FILE_CONNECT_SOCKET;
+
+ /* Pass some minimal info about the unit and the credential name we are looking to acquire
+ * via the source socket address in case we read off an AF_UNIX socket. */
+ if (asprintf(&bindname, "@%" PRIx64"/unit/%s/%s", random_u64(), unit, *id) < 0)
+ return -ENOMEM;
+
} else if (params->received_credentials) {
/* If this is a relative path, take it relative to the credentials we received
* ourselves. We don't support the AF_UNIX stuff in this mode, since we are operating
} else
source = NULL;
+
if (source)
- r = read_full_file_full(AT_FDCWD, source, flags, &data, &size);
+ r = read_full_file_full(AT_FDCWD, source, UINT64_MAX, SIZE_MAX, flags, bindname, &data, &size);
else
r = -ENOENT;
if (r == -ENOENT &&
static int setup_credentials_internal(
const ExecContext *context,
const ExecParameters *params,
+ const char *unit,
const char *final, /* This is where the credential store shall eventually end up at */
const char *workspace, /* This is where we can prepare it before moving it to the final place */
bool reuse_workspace, /* Whether to reuse any existing workspace mount if it already is a mount */
assert(!must_mount || workspace_mounted > 0);
where = workspace_mounted ? workspace : final;
- r = acquire_credentials(context, params, where, uid, workspace_mounted);
+ r = acquire_credentials(context, params, unit, where, uid, workspace_mounted);
if (r < 0)
return r;
r = setup_credentials_internal(
context,
params,
+ unit,
p, /* final mount point */
u, /* temporary workspace to overmount */
true, /* reuse the workspace if it is already a mount */
r = setup_credentials_internal(
context,
params,
+ unit,
p, /* final mount point */
"/dev/shm", /* temporary workspace to overmount */
false, /* do not reuse /dev/shm if it is already a mount, under no circumstances */
#if ENABLE_SMACK
static int setup_smack(
const ExecContext *context,
- const char *executable) {
+ int executable_fd) {
int r;
assert(context);
- assert(executable);
+ assert(executable_fd >= 0);
if (context->smack_process_label) {
r = mac_smack_apply_pid(0, context->smack_process_label);
else {
_cleanup_free_ char *exec_label = NULL;
- r = mac_smack_read(executable, SMACK_ATTR_EXEC, &exec_label);
+ r = mac_smack_read_fd(executable_fd, SMACK_ATTR_EXEC, &exec_label);
if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
return r;
_cleanup_strv_free_ char **empty_directories = NULL;
const char *tmp_dir = NULL, *var_tmp_dir = NULL;
const char *root_dir = NULL, *root_image = NULL;
- _cleanup_free_ char *creds_path = NULL;
+ _cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL;
NamespaceInfo ns_info;
bool needs_sandboxing;
BindMount *bind_mounts = NULL;
if (context->mount_flags == MS_SHARED)
log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
- if (exec_context_has_credentials(context) && params->prefix[EXEC_DIRECTORY_RUNTIME]) {
+ if (exec_context_has_credentials(context) &&
+ params->prefix[EXEC_DIRECTORY_RUNTIME] &&
+ FLAGS_SET(params->flags, EXEC_WRITE_CREDENTIALS)) {
creds_path = path_join(params->prefix[EXEC_DIRECTORY_RUNTIME], "credentials", u->id);
if (!creds_path) {
r = -ENOMEM;
}
}
+ if (MANAGER_IS_SYSTEM(u->manager)) {
+ propagate_dir = path_join("/run/systemd/propagate/", u->id);
+ if (!propagate_dir)
+ return -ENOMEM;
+ incoming_dir = strdup("/run/systemd/incoming");
+ if (!incoming_dir)
+ return -ENOMEM;
+ }
+
r = setup_namespace(root_dir, root_image, context->root_image_options,
&ns_info, context->read_write_paths,
needs_sandboxing ? context->read_only_paths : NULL,
needs_sandboxing ? context->inaccessible_paths : NULL,
+ needs_sandboxing ? context->exec_paths : NULL,
+ needs_sandboxing ? context->no_exec_paths : NULL,
empty_directories,
bind_mounts,
n_bind_mounts,
context->root_hash, context->root_hash_size, context->root_hash_path,
context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path,
context->root_verity,
+ propagate_dir,
+ incoming_dir,
+ root_dir || root_image ? params->notify_socket : NULL,
DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
error_path);
const DynamicCreds *dcreds,
int user_lookup_fd,
int socket_fd,
- int exec_fd,
const int *fds, size_t n_fds) {
size_t n_dont_close = 0;
if (socket_fd >= 0)
dont_close[n_dont_close++] = socket_fd;
- if (exec_fd >= 0)
- dont_close[n_dont_close++] = exec_fd;
if (n_fds > 0) {
memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
n_dont_close += n_fds;
return c->cpu_affinity_from_numa;
}
+static int add_shifted_fd(int *fds, size_t fds_size, size_t *n_fds, int fd, int *ret_fd) {
+ int r;
+
+ assert(fds);
+ assert(n_fds);
+ assert(*n_fds < fds_size);
+ assert(ret_fd);
+
+ if (fd < 0) {
+ *ret_fd = -1;
+ return 0;
+ }
+
+ if (fd < 3 + (int) *n_fds) {
+ /* Let's move the fd up, so that it's outside of the fd range we will use to store
+ * the fds we pass to the process (or which are closed only during execve). */
+
+ r = fcntl(fd, F_DUPFD_CLOEXEC, 3 + (int) *n_fds);
+ if (r < 0)
+ return -errno;
+
+ CLOSE_AND_REPLACE(fd, r);
+ }
+
+ *ret_fd = fds[*n_fds] = fd;
+ (*n_fds) ++;
+ return 1;
+}
+
static int exec_child(
Unit *unit,
const ExecCommand *command,
int *exit_status) {
_cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
- int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
+ int r, ngids = 0, exec_fd;
_cleanup_free_ gid_t *supplementary_gids = NULL;
const char *username = NULL, *groupname = NULL;
_cleanup_free_ char *home_buffer = NULL;
gid_t saved_gid = getgid();
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
- size_t n_fds;
+ size_t n_fds = n_socket_fds + n_storage_fds, /* fds to pass to the child */
+ n_keep_fds; /* total number of fds not to close */
int secure_bits;
_cleanup_free_ gid_t *gids_after_pam = NULL;
int ngids_after_pam = 0;
/* In case anything used libc syslog(), close this here, too */
closelog();
- n_fds = n_socket_fds + n_storage_fds;
- r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
+ int keep_fds[n_fds + 2];
+ memcpy_safe(keep_fds, fds, n_fds * sizeof(int));
+ n_keep_fds = n_fds;
+
+ r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, params->exec_fd, &exec_fd);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
+ }
+
+ r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, keep_fds, n_keep_fds);
if (r < 0) {
*exit_status = EXIT_FDS;
return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
if (r < 0) {
*exit_status = EXIT_USER;
- if (r == -EILSEQ) {
- log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
- return -EOPNOTSUPP;
- }
+ if (r == -EILSEQ)
+ return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP),
+ "Failed to update dynamic user credentials: User or group with specified name already exists.");
return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
}
if (!uid_is_valid(uid)) {
*exit_status = EXIT_USER;
- log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
- return -ESRCH;
+ return log_unit_error_errno(unit, SYNTHETIC_ERRNO(ESRCH), "UID validation failed for \""UID_FMT"\"", uid);
}
if (!gid_is_valid(gid)) {
*exit_status = EXIT_USER;
- log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
- return -ESRCH;
+ return log_unit_error_errno(unit, SYNTHETIC_ERRNO(ESRCH), "GID validation failed for \""GID_FMT"\"", gid);
}
if (dcreds->user)
/* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
* wins here. (See above.) */
+ /* All fds passed in the fds array will be closed in the pam child process. */
r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
if (r < 0) {
*exit_status = EXIT_PAM;
* shall execute. */
_cleanup_free_ char *executable = NULL;
- r = find_executable_full(command->path, false, &executable);
+ _cleanup_close_ int executable_fd = -1;
+ r = find_executable_full(command->path, false, &executable, &executable_fd);
if (r < 0) {
if (r != -ENOMEM && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
log_struct_errno(LOG_INFO, r,
"EXECUTABLE=%s", command->path);
}
+ r = add_shifted_fd(keep_fds, ELEMENTSOF(keep_fds), &n_keep_fds, executable_fd, &executable_fd);
+ if (r < 0) {
+ *exit_status = EXIT_FDS;
+ return log_unit_error_errno(unit, r, "Failed to shift fd and set FD_CLOEXEC: %m");
+ }
+
#if HAVE_SELINUX
if (needs_sandboxing && use_selinux && params->selinux_context_net && socket_fd >= 0) {
r = mac_selinux_get_child_mls_label(socket_fd, executable, context->selinux_context, &mac_selinux_context_net);
* more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
* however if we have it as we want to keep it open until the final execve(). */
- if (params->exec_fd >= 0) {
- exec_fd = params->exec_fd;
-
- if (exec_fd < 3 + (int) n_fds) {
- int moved_fd;
-
- /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
- * process we are about to execute. */
-
- moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
- if (moved_fd < 0) {
- *exit_status = EXIT_FDS;
- return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
- }
-
- CLOSE_AND_REPLACE(exec_fd, moved_fd);
- } else {
- /* This fd should be FD_CLOEXEC already, but let's make sure. */
- r = fd_cloexec(exec_fd, true);
- if (r < 0) {
- *exit_status = EXIT_FDS;
- return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
- }
- }
-
- fds_with_exec_fd = newa(int, n_fds + 1);
- memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
- fds_with_exec_fd[n_fds] = exec_fd;
- n_fds_with_exec_fd = n_fds + 1;
- } else {
- fds_with_exec_fd = fds;
- n_fds_with_exec_fd = n_fds;
- }
-
- r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
+ r = close_all_fds(keep_fds, n_keep_fds);
if (r >= 0)
r = shift_fds(fds, n_fds);
if (r >= 0)
/* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
* process. This is the latest place before dropping capabilities. Other MAC context are set later. */
if (use_smack) {
- r = setup_smack(context, executable);
+ r = setup_smack(context, executable_fd);
if (r < 0) {
*exit_status = EXIT_SMACK_PROCESS_LABEL;
return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
}
}
- execve(executable, final_argv, accum_env);
- r = -errno;
+ r = fexecve_or_execve(executable_fd, executable, final_argv, accum_env);
if (exec_fd >= 0) {
uint8_t hot = 0;
context->std_output == EXEC_OUTPUT_SOCKET ||
context->std_error == EXEC_OUTPUT_SOCKET) {
- if (params->n_socket_fds > 1) {
- log_unit_error(unit, "Got more than one socket.");
- return -EINVAL;
- }
+ if (params->n_socket_fds > 1)
+ return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EINVAL), "Got more than one socket.");
- if (params->n_socket_fds == 0) {
- log_unit_error(unit, "Got no socket.");
- return -EINVAL;
- }
+ if (params->n_socket_fds == 0)
+ return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EINVAL), "Got no socket.");
socket_fd = params->fds[0];
} else {
r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
if (r < 0)
return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
+
+ /* Normally we would not propagate the oomd xattrs to children but since we created this
+ * sub-cgroup internally we should do it. */
+ cgroup_oomd_xattr_apply(unit, subcgroup_path);
}
}
c->read_only_paths = strv_free(c->read_only_paths);
c->read_write_paths = strv_free(c->read_write_paths);
c->inaccessible_paths = strv_free(c->inaccessible_paths);
+ c->exec_paths = strv_free(c->exec_paths);
+ c->no_exec_paths = strv_free(c->no_exec_paths);
bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
c->bind_mounts = NULL;
}
void exec_command_done_array(ExecCommand *c, size_t n) {
- size_t i;
-
- for (i = 0; i < n; i++)
+ for (size_t i = 0; i < n; i++)
exec_command_done(c+i);
}
fprintf(f, " %s", *g);
}
+static void strv_dump(FILE* f, const char *prefix, const char *name, char **strv) {
+ assert(f);
+ assert(prefix);
+ assert(name);
+
+ if (!strv_isempty(strv)) {
+ fprintf(f, "%s%s:", name, prefix);
+ strv_fprintf(f, strv);
+ fputs("\n", f);
+ }
+}
+
void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
int r;
fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
+ if (c->std_output == EXEC_OUTPUT_FILE_TRUNCATE)
+ fprintf(f, "%sStandardOutputFileToTruncate: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
if (c->std_error == EXEC_OUTPUT_FILE)
fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
+ if (c->std_error == EXEC_OUTPUT_FILE_TRUNCATE)
+ fprintf(f, "%sStandardErrorFileToTruncate: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
if (c->tty_path)
fprintf(f,
fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
- if (!strv_isempty(c->supplementary_groups)) {
- fprintf(f, "%sSupplementaryGroups:", prefix);
- strv_fprintf(f, c->supplementary_groups);
- fputs("\n", f);
- }
+ strv_dump(f, prefix, "SupplementaryGroups", c->supplementary_groups);
if (c->pam_name)
fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
- if (!strv_isempty(c->read_write_paths)) {
- fprintf(f, "%sReadWritePaths:", prefix);
- strv_fprintf(f, c->read_write_paths);
- fputs("\n", f);
- }
-
- if (!strv_isempty(c->read_only_paths)) {
- fprintf(f, "%sReadOnlyPaths:", prefix);
- strv_fprintf(f, c->read_only_paths);
- fputs("\n", f);
- }
-
- if (!strv_isempty(c->inaccessible_paths)) {
- fprintf(f, "%sInaccessiblePaths:", prefix);
- strv_fprintf(f, c->inaccessible_paths);
- fputs("\n", f);
- }
+ strv_dump(f, prefix, "ReadWritePaths", c->read_write_paths);
+ strv_dump(f, prefix, "ReadOnlyPaths", c->read_only_paths);
+ strv_dump(f, prefix, "InaccessiblePaths", c->inaccessible_paths);
+ strv_dump(f, prefix, "ExecPaths", c->exec_paths);
+ strv_dump(f, prefix, "NoExecPaths", c->no_exec_paths);
for (size_t i = 0; i < c->n_bind_mounts; i++)
fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
/* tmp_dir, var_tmp_dir, netns_storage_socket fds are donated on success */
- r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
- if (r < 0)
- return r;
-
r = exec_runtime_allocate(&rt, id);
if (r < 0)
return r;
- r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
+ r = hashmap_ensure_put(&m->exec_runtime_by_id, &string_hash_ops, rt->id, rt);
if (r < 0)
return r;
[EXEC_OUTPUT_NAMED_FD] = "fd",
[EXEC_OUTPUT_FILE] = "file",
[EXEC_OUTPUT_FILE_APPEND] = "append",
+ [EXEC_OUTPUT_FILE_TRUNCATE] = "truncate",
};
DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);