#include "cap-list.h"
#include "capability-util.h"
#include "cgroup-setup.h"
-#include "chase-symlinks.h"
+#include "chase.h"
#include "chown-recursive.h"
#include "constants.h"
#include "cpu-set-util.h"
#include "memory-util.h"
#include "missing_fs.h"
#include "missing_ioprio.h"
+#include "missing_prctl.h"
#include "mkdir-label.h"
#include "mount-util.h"
#include "mountpoint-util.h"
#include "namespace.h"
#include "parse-util.h"
#include "path-util.h"
+#include "proc-cmdline.h"
#include "process-util.h"
+#include "psi-util.h"
#include "random-util.h"
#include "recurse-dir.h"
#include "rlimit-util.h"
return "/dev/console";
}
+static int exec_context_tty_size(const ExecContext *context, unsigned *ret_rows, unsigned *ret_cols) {
+ _cleanup_free_ char *rowskey = NULL, *rowsvalue = NULL, *colskey = NULL, *colsvalue = NULL;
+ unsigned rows, cols;
+ const char *tty;
+ int r;
+
+ assert(context);
+ assert(ret_rows);
+ assert(ret_cols);
+
+ rows = context->tty_rows;
+ cols = context->tty_cols;
+
+ tty = exec_context_tty_path(context);
+ if (!tty || (rows != UINT_MAX && cols != UINT_MAX)) {
+ *ret_rows = rows;
+ *ret_cols = cols;
+ return 0;
+ }
+
+ tty = skip_dev_prefix(tty);
+ if (!in_charset(tty, ALPHANUMERICAL)) {
+ log_debug("%s contains non-alphanumeric characters, ignoring", tty);
+ *ret_rows = rows;
+ *ret_cols = cols;
+ return 0;
+ }
+
+ rowskey = strjoin("systemd.tty.rows.", tty);
+ if (!rowskey)
+ return -ENOMEM;
+
+ colskey = strjoin("systemd.tty.columns.", tty);
+ if (!colskey)
+ return -ENOMEM;
+
+ r = proc_cmdline_get_key_many(/* flags = */ 0,
+ rowskey, &rowsvalue,
+ colskey, &colsvalue);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read TTY size of %s from kernel cmdline, ignoring: %m", tty);
+
+ if (rows == UINT_MAX && rowsvalue) {
+ r = safe_atou(rowsvalue, &rows);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse %s=%s, ignoring: %m", rowskey, rowsvalue);
+ }
+
+ if (cols == UINT_MAX && colsvalue) {
+ r = safe_atou(colsvalue, &cols);
+ if (r < 0)
+ log_debug_errno(r, "Failed to parse %s=%s, ignoring: %m", colskey, colsvalue);
+ }
+
+ *ret_rows = rows;
+ *ret_cols = cols;
+
+ return 0;
+}
+
static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
const char *path;
(void) reset_terminal(path);
}
- if (p && p->stdin_fd >= 0)
- (void) terminal_set_size_fd(p->stdin_fd, path, context->tty_rows, context->tty_cols);
+ if (p && p->stdin_fd >= 0) {
+ unsigned rows = context->tty_rows, cols = context->tty_cols;
+
+ (void) exec_context_tty_size(context, &rows, &cols);
+ (void) terminal_set_size_fd(p->stdin_fd, path, rows, cols);
+ }
if (context->tty_vt_disallocate && path)
(void) vt_disallocate(path);
/* Try to make this the controlling tty, if it is a tty, and reset it */
if (isatty(STDIN_FILENO)) {
+ unsigned rows = context->tty_rows, cols = context->tty_cols;
+
+ (void) exec_context_tty_size(context, &rows, &cols);
(void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
(void) reset_terminal_fd(STDIN_FILENO, true);
- (void) terminal_set_size_fd(STDIN_FILENO, NULL, context->tty_rows, context->tty_cols);
+ (void) terminal_set_size_fd(STDIN_FILENO, NULL, rows, cols);
}
return STDIN_FILENO;
case EXEC_INPUT_TTY:
case EXEC_INPUT_TTY_FORCE:
case EXEC_INPUT_TTY_FAIL: {
+ unsigned rows, cols;
int fd;
fd = acquire_terminal(exec_context_tty_path(context),
if (fd < 0)
return fd;
- r = terminal_set_size_fd(fd, exec_context_tty_path(context), context->tty_rows, context->tty_cols);
+ r = exec_context_tty_size(context, &rows, &cols);
+ if (r < 0)
+ return r;
+
+ r = terminal_set_size_fd(fd, exec_context_tty_path(context), rows, cols);
if (r < 0)
return r;
int *ret_saved_stdout) {
_cleanup_close_ int fd = -EBADF, saved_stdin = -EBADF, saved_stdout = -EBADF;
+ unsigned rows, cols;
int r;
assert(ret_saved_stdin);
if (r < 0)
return r;
- r = terminal_set_size_fd(fd, vc, context->tty_rows, context->tty_cols);
+ r = exec_context_tty_size(context, &rows, &cols);
+ if (r < 0)
+ return r;
+
+ r = terminal_set_size_fd(fd, vc, rows, cols);
if (r < 0)
return r;
return 1;
}
-static int enforce_user(const ExecContext *context, uid_t uid) {
+static int enforce_user(
+ const ExecContext *context,
+ uid_t uid,
+ uint64_t capability_ambient_set) {
assert(context);
int r;
* setting secure bits the capability CAP_SETPCAP is required, so we also need keep-caps in this
* case. */
- if ((context->capability_ambient_set != 0 || context->secure_bits != 0) && uid != 0) {
+ if ((capability_ambient_set != 0 || context->secure_bits != 0) && uid != 0) {
/* First step: If we need to keep capabilities but drop privileges we need to make sure we
* keep our caps, while we drop privileges. Add KEEP_CAPS to the securebits */
}
static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
+ int r;
+
assert(u);
assert(c);
if (!c->memory_deny_write_execute)
return 0;
+ /* use prctl() if kernel supports it (6.3) */
+ r = prctl(PR_SET_MDWE, PR_MDWE_REFUSE_EXEC_GAIN, 0, 0, 0);
+ if (r == 0) {
+ log_unit_debug(u, "Enabled MemoryDenyWriteExecute= with PR_SET_MDWE");
+ return 0;
+ }
+ if (r < 0 && errno != EINVAL)
+ return log_unit_debug_errno(u, errno, "Failed to enable MemoryDenyWriteExecute= with PR_SET_MDWE: %m");
+ /* else use seccomp */
+ log_unit_debug(u, "Kernel doesn't support PR_SET_MDWE: falling back to seccomp");
+
if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
return 0;
const Unit *u,
const ExecContext *c,
const ExecParameters *p,
+ const CGroupContext *cgroup_context,
size_t n_fds,
char **fdnames,
const char *home,
const char *shell,
dev_t journal_stream_dev,
ino_t journal_stream_ino,
+ const char *memory_pressure_path,
char ***ret) {
_cleanup_strv_free_ char **our_env = NULL;
size_t n_env = 0;
char *x;
+ int r;
assert(u);
assert(c);
assert(p);
assert(ret);
-#define N_ENV_VARS 17
+#define N_ENV_VARS 19
our_env = new0(char*, N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
if (!our_env)
return -ENOMEM;
}
if (exec_context_needs_term(c)) {
+ _cleanup_free_ char *cmdline = NULL;
const char *tty_path, *term = NULL;
tty_path = exec_context_tty_path(c);
if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
term = getenv("TERM");
+ else if (tty_path && in_charset(skip_dev_prefix(tty_path), ALPHANUMERICAL)) {
+ _cleanup_free_ char *key = NULL;
+
+ key = strjoin("systemd.tty.term.", skip_dev_prefix(tty_path));
+ if (!key)
+ return -ENOMEM;
+
+ r = proc_cmdline_get_key(key, 0, &cmdline);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read %s from kernel cmdline, ignoring: %m", key);
+ else if (r > 0)
+ term = cmdline;
+ }
if (!term)
term = default_term_for_tty(tty_path);
our_env[n_env++] = x;
- our_env[n_env++] = NULL;
- assert(n_env <= N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
+ if (memory_pressure_path) {
+ x = strjoin("MEMORY_PRESSURE_WATCH=", memory_pressure_path);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+
+ if (cgroup_context && !path_equal(memory_pressure_path, "/dev/null")) {
+ _cleanup_free_ char *b = NULL, *e = NULL;
+
+ if (asprintf(&b, "%s " USEC_FMT " " USEC_FMT,
+ MEMORY_PRESSURE_DEFAULT_TYPE,
+ cgroup_context->memory_pressure_threshold_usec == USEC_INFINITY ? MEMORY_PRESSURE_DEFAULT_THRESHOLD_USEC :
+ CLAMP(cgroup_context->memory_pressure_threshold_usec, 1U, MEMORY_PRESSURE_DEFAULT_WINDOW_USEC),
+ MEMORY_PRESSURE_DEFAULT_WINDOW_USEC) < 0)
+ return -ENOMEM;
+
+ if (base64mem(b, strlen(b) + 1, &e) < 0)
+ return -ENOMEM;
+
+ x = strjoin("MEMORY_PRESSURE_WRITE=", e);
+ if (!x)
+ return -ENOMEM;
+
+ our_env[n_env++] = x;
+ }
+ }
+
+ assert(n_env < N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
#undef N_ENV_VARS
*ret = TAKE_PTR(our_env);
bool exec_needs_mount_namespace(
const ExecContext *context,
const ExecParameters *params,
- const ExecRuntime *runtime) {
+ const ExecSharedRuntime *runtime) {
assert(context);
if (!strv_isempty(context->extension_directories))
return true;
- if (!IN_SET(context->mount_flags, 0, MS_SHARED))
+ if (!IN_SET(context->mount_propagation_flag, 0, MS_SHARED))
return true;
if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
return true;
if (context->private_devices ||
- context->private_mounts ||
+ context->private_mounts > 0 ||
+ (context->private_mounts < 0 && exec_needs_network_namespace(context)) ||
context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO ||
context->protect_kernel_tunables ||
* since they all support the private/ symlink logic at least in some
* configurations, see above. */
- r = chase_symlinks(target, NULL, 0, &target_resolved, NULL);
+ r = chase(target, NULL, 0, &target_resolved, NULL);
if (r < 0)
goto fail;
}
/* /var/lib or friends may be symlinks. So, let's chase them also. */
- r = chase_symlinks(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
+ r = chase(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
if (r < 0)
goto fail;
if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r))
return r;
- r = mac_smack_apply_pid(0, exec_label ? : manager->default_smack_process_label);
+ r = mac_smack_apply_pid(0, exec_label ?: manager->default_smack_process_label);
if (r < 0)
return r;
}
ExecCommandFlags command_flags,
const ExecContext *context,
const ExecParameters *params,
- const ExecRuntime *runtime,
+ const ExecSharedRuntime *runtime,
+ const char *memory_pressure_path,
char **error_path) {
- _cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL;
+ _cleanup_strv_free_ char **empty_directories = NULL, **symlinks = NULL,
+ **read_write_paths_cleanup = NULL;
const char *tmp_dir = NULL, *var_tmp_dir = NULL;
const char *root_dir = NULL, *root_image = NULL;
_cleanup_free_ char *creds_path = NULL, *incoming_dir = NULL, *propagate_dir = NULL,
*extension_dir = NULL;
+ char **read_write_paths;
NamespaceInfo ns_info;
bool needs_sandboxing;
BindMount *bind_mounts = NULL;
if (r < 0)
goto finalize;
+ /* We need to make the pressure path writable even if /sys/fs/cgroups is made read-only, as the
+ * service will need to write to it in order to start the notifications. */
+ if (context->protect_control_groups && memory_pressure_path && !streq(memory_pressure_path, "/dev/null")) {
+ read_write_paths_cleanup = strv_copy(context->read_write_paths);
+ if (!read_write_paths_cleanup) {
+ r = -ENOMEM;
+ goto finalize;
+ }
+
+ r = strv_extend(&read_write_paths_cleanup, memory_pressure_path);
+ if (r < 0)
+ goto finalize;
+
+ read_write_paths = read_write_paths_cleanup;
+ } else
+ read_write_paths = context->read_write_paths;
+
needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command_flags & EXEC_COMMAND_FULLY_PRIVILEGED);
if (needs_sandboxing) {
/* The runtime struct only contains the parent of the private /tmp,
.protect_system = context->protect_system,
.protect_proc = context->protect_proc,
.proc_subset = context->proc_subset,
+ .private_network = exec_needs_network_namespace(context),
.private_ipc = exec_needs_ipc_namespace(context),
/* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */
.mount_nosuid = context->no_new_privileges && !mac_selinux_use(),
else
ns_info = (NamespaceInfo) {};
- if (context->mount_flags == MS_SHARED)
+ if (context->mount_propagation_flag == MS_SHARED)
log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
if (exec_context_has_credentials(context) &&
}
r = setup_namespace(root_dir, root_image, context->root_image_options,
- &ns_info, context->read_write_paths,
+ &ns_info, read_write_paths,
needs_sandboxing ? context->read_only_paths : NULL,
needs_sandboxing ? context->inaccessible_paths : NULL,
needs_sandboxing ? context->exec_paths : NULL,
var_tmp_dir,
creds_path,
context->log_namespace,
- context->mount_flags,
+ context->mount_propagation_flag,
context->root_hash, context->root_hash_size, context->root_hash_path,
context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path,
context->root_verity,
static int close_remaining_fds(
const ExecParameters *params,
- const ExecRuntime *runtime,
+ const ExecSharedRuntime *runtime,
const DynamicCreds *dcreds,
int user_lookup_fd,
int socket_fd,
if (writev(user_lookup_fd,
(struct iovec[]) {
- IOVEC_INIT(&uid, sizeof(uid)),
- IOVEC_INIT(&gid, sizeof(gid)),
- IOVEC_INIT_STRING(unit->id) }, 3) < 0)
+ IOVEC_MAKE(&uid, sizeof(uid)),
+ IOVEC_MAKE(&gid, sizeof(gid)),
+ IOVEC_MAKE_STRING(unit->id) }, 3) < 0)
return -errno;
return 0;
ofd = open(of->path, O_PATH | O_CLOEXEC);
if (ofd < 0)
- return log_error_errno(errno, "Could not open \"%s\": %m", of->path);
+ return log_unit_error_errno(u, errno, "Could not open \"%s\": %m", of->path);
+
if (fstat(ofd, &st) < 0)
- return log_error_errno(errno, "Failed to stat %s: %m", of->path);
+ return log_unit_error_errno(u, errno, "Failed to stat %s: %m", of->path);
if (S_ISSOCK(st.st_mode)) {
fd = connect_unix_harder(u, of, ofd);
return fd;
if (FLAGS_SET(of->flags, OPENFILE_READ_ONLY) && shutdown(fd, SHUT_WR) < 0)
- return log_error_errno(errno, "Failed to shutdown send for socket %s: %m", of->path);
+ return log_unit_error_errno(u, errno, "Failed to shutdown send for socket %s: %m",
+ of->path);
log_unit_debug(u, "socket %s opened (fd=%d)", of->path, fd);
} else {
const ExecCommand *command,
const ExecContext *context,
const ExecParameters *params,
- ExecRuntime *runtime,
+ ExecSharedRuntime *runtime,
DynamicCreds *dcreds,
+ const CGroupContext *cgroup_context,
int socket_fd,
const int named_iofds[static 3],
int *params_fds,
int r, ngids = 0, exec_fd;
_cleanup_free_ gid_t *supplementary_gids = NULL;
const char *username = NULL, *groupname = NULL;
- _cleanup_free_ char *home_buffer = NULL;
+ _cleanup_free_ char *home_buffer = NULL, *memory_pressure_path = NULL;
const char *home = NULL, *shell = NULL;
char **final_argv = NULL;
dev_t journal_stream_dev = 0;
log_forget_fds();
log_set_open_when_needed(true);
+ log_settle_target();
/* In case anything used libc syslog(), close this here, too */
closelog();
* invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
* might internally call into other NSS modules that are involved in hostname resolution, we never know. */
if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
- setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
+ setenv("SYSTEMD_ACTIVATION_SCOPE", runtime_scope_to_string(unit->manager->runtime_scope), true) != 0) {
*exit_status = EXIT_MEMORY;
return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
}
if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
r = apply_numa_policy(&context->numa_policy);
- if (r == -EOPNOTSUPP)
- log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
- else if (r < 0) {
- *exit_status = EXIT_NUMA_POLICY;
- return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
+ if (r < 0) {
+ if (ERRNO_IS_NOT_SUPPORTED(r))
+ log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
+ else {
+ *exit_status = EXIT_NUMA_POLICY;
+ return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
+ }
}
}
}
}
- /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
- * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
- * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
- * touch a single hierarchy too. */
- if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
- r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
- if (r < 0) {
- *exit_status = EXIT_CGROUP;
- return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
+ if (params->cgroup_path) {
+ /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
+ * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
+ * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
+ * touch a single hierarchy too. */
+
+ if (params->flags & EXEC_CGROUP_DELEGATE) {
+ r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
+ if (r < 0) {
+ *exit_status = EXIT_CGROUP;
+ return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
+ }
+ }
+
+ if (cgroup_context && cg_unified() > 0 && is_pressure_supported() > 0) {
+ if (cgroup_context_want_memory_pressure(cgroup_context)) {
+ r = cg_get_path("memory", params->cgroup_path, "memory.pressure", &memory_pressure_path);
+ if (r < 0) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+
+ r = chmod_and_chown(memory_pressure_path, 0644, uid, gid);
+ if (r < 0) {
+ log_unit_full_errno(unit, r == -ENOENT || ERRNO_IS_PRIVILEGE(r) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to adjust ownership of '%s', ignoring: %m", memory_pressure_path);
+ memory_pressure_path = mfree(memory_pressure_path);
+ }
+ } else if (cgroup_context->memory_pressure_watch == CGROUP_PRESSURE_WATCH_OFF) {
+ memory_pressure_path = strdup("/dev/null"); /* /dev/null is explicit indicator for turning of memory pressure watch */
+ if (!memory_pressure_path) {
+ *exit_status = EXIT_MEMORY;
+ return log_oom();
+ }
+ }
}
}
unit,
context,
params,
+ cgroup_context,
n_fds,
fdnames,
home,
shell,
journal_stream_dev,
journal_stream_ino,
+ memory_pressure_path,
&our_env);
if (r < 0) {
*exit_status = EXIT_MEMORY;
else
needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
+ uint64_t capability_ambient_set = context->capability_ambient_set;
+
if (needs_sandboxing) {
/* MAC enablement checks need to be done before a new mount ns is created, as they rely on
* /sys being present. The actual MAC context application will happen later, as late as
return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
}
+ if (ambient_capabilities_supported()) {
+ uint64_t ambient_after_pam;
+
+ /* PAM modules might have set some ambient caps. Query them here and merge them into
+ * the caps we want to set in the end, so that we don't end up unsetting them. */
+ r = capability_get_ambient(&ambient_after_pam);
+ if (r < 0) {
+ *exit_status = EXIT_CAPABILITIES;
+ return log_unit_error_errno(unit, r, "Failed to query ambient caps: %m");
+ }
+
+ capability_ambient_set |= ambient_after_pam;
+ }
+
ngids_after_pam = getgroups_alloc(&gids_after_pam);
if (ngids_after_pam < 0) {
*exit_status = EXIT_MEMORY;
if (ns_type_supported(NAMESPACE_NET)) {
r = setup_shareable_ns(runtime->netns_storage_socket, CLONE_NEWNET);
- if (r == -EPERM)
- log_unit_warning_errno(unit, r,
- "PrivateNetwork=yes is configured, but network namespace setup failed, ignoring: %m");
- else if (r < 0) {
- *exit_status = EXIT_NETWORK;
- return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
+ if (r < 0) {
+ if (ERRNO_IS_PRIVILEGE(r))
+ log_unit_warning_errno(unit, r,
+ "PrivateNetwork=yes is configured, but network namespace setup failed, ignoring: %m");
+ else {
+ *exit_status = EXIT_NETWORK;
+ return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
+ }
}
} else if (context->network_namespace_path) {
*exit_status = EXIT_NETWORK;
if (needs_mount_namespace) {
_cleanup_free_ char *error_path = NULL;
- r = apply_mount_namespace(unit, command->flags, context, params, runtime, &error_path);
+ r = apply_mount_namespace(unit, command->flags, context, params, runtime, memory_pressure_path, &error_path);
if (r < 0) {
*exit_status = EXIT_NAMESPACE;
return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
(UINT64_C(1) << CAP_SETGID);
if (!cap_test_all(bset)) {
- r = capability_bounding_set_drop(bset, false);
+ r = capability_bounding_set_drop(bset, /* right_now= */ false);
if (r < 0) {
*exit_status = EXIT_CAPABILITIES;
return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
* The requested ambient capabilities are raised in the inheritable set if the second
* argument is true. */
if (!needs_ambient_hack) {
- r = capability_ambient_set_apply(context->capability_ambient_set, true);
+ r = capability_ambient_set_apply(capability_ambient_set, /* also_inherit= */ true);
if (r < 0) {
*exit_status = EXIT_CAPABILITIES;
return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
if (needs_setuid) {
if (uid_is_valid(uid)) {
- r = enforce_user(context, uid);
+ r = enforce_user(context, uid, capability_ambient_set);
if (r < 0) {
*exit_status = EXIT_USER;
return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
}
- if (!needs_ambient_hack &&
- context->capability_ambient_set != 0) {
+ if (!needs_ambient_hack && capability_ambient_set != 0) {
/* Raise the ambient capabilities after user change. */
- r = capability_ambient_set_apply(context->capability_ambient_set, false);
+ r = capability_ambient_set_apply(capability_ambient_set, /* also_inherit= */ false);
if (r < 0) {
*exit_status = EXIT_CAPABILITIES;
return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
ExecCommand *command,
const ExecContext *context,
const ExecParameters *params,
- ExecRuntime *runtime,
+ ExecSharedRuntime *runtime,
DynamicCreds *dcreds,
+ const CGroupContext *cgroup_context,
pid_t *ret) {
int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
assert(params);
assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
+ LOG_CONTEXT_PUSH_UNIT(unit);
+
if (context->std_input == EXEC_INPUT_SOCKET ||
context->std_output == EXEC_OUTPUT_SOCKET ||
context->std_error == EXEC_OUTPUT_SOCKET) {
params,
runtime,
dcreds,
+ cgroup_context,
socket_fd,
named_iofds,
fds,
c->tty_rows = UINT_MAX;
c->tty_cols = UINT_MAX;
numa_policy_reset(&c->numa_policy);
+ c->private_mounts = -1;
}
void exec_context_done(ExecContext *c) {
return 0;
}
+bool exec_context_has_encrypted_credentials(ExecContext *c) {
+ ExecLoadCredential *load_cred;
+ ExecSetCredential *set_cred;
+
+ assert(c);
+
+ HASHMAP_FOREACH(load_cred, c->load_credentials)
+ if (load_cred->encrypted)
+ return true;
+
+ HASHMAP_FOREACH(set_cred, c->set_credentials)
+ if (set_cred->encrypted)
+ return true;
+
+ return false;
+}
+
void exec_status_start(ExecStatus *s, pid_t pid) {
assert(s);
return NULL;
}
-static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
+static ExecSharedRuntime* exec_shared_runtime_free(ExecSharedRuntime *rt, bool destroy) {
int r;
if (!rt)
return NULL;
if (rt->manager)
- (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
+ (void) hashmap_remove(rt->manager->exec_shared_runtime_by_id, rt->id);
/* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
return mfree(rt);
}
-static void exec_runtime_freep(ExecRuntime **rt) {
- (void) exec_runtime_free(*rt, false);
+static void exec_shared_runtime_freep(ExecSharedRuntime **rt) {
+ (void) exec_shared_runtime_free(*rt, false);
}
-static int exec_runtime_allocate(ExecRuntime **ret, const char *id) {
+static int exec_shared_runtime_allocate(ExecSharedRuntime **ret, const char *id) {
_cleanup_free_ char *id_copy = NULL;
- ExecRuntime *n;
+ ExecSharedRuntime *n;
assert(ret);
if (!id_copy)
return -ENOMEM;
- n = new(ExecRuntime, 1);
+ n = new(ExecSharedRuntime, 1);
if (!n)
return -ENOMEM;
- *n = (ExecRuntime) {
+ *n = (ExecSharedRuntime) {
.id = TAKE_PTR(id_copy),
.netns_storage_socket = PIPE_EBADF,
.ipcns_storage_socket = PIPE_EBADF,
return 0;
}
-static int exec_runtime_add(
+static int exec_shared_runtime_add(
Manager *m,
const char *id,
char **tmp_dir,
char **var_tmp_dir,
int netns_storage_socket[2],
int ipcns_storage_socket[2],
- ExecRuntime **ret) {
+ ExecSharedRuntime **ret) {
- _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
+ _cleanup_(exec_shared_runtime_freep) ExecSharedRuntime *rt = NULL;
int r;
assert(m);
/* tmp_dir, var_tmp_dir, {net,ipc}ns_storage_socket fds are donated on success */
- r = exec_runtime_allocate(&rt, id);
+ r = exec_shared_runtime_allocate(&rt, id);
if (r < 0)
return r;
- r = hashmap_ensure_put(&m->exec_runtime_by_id, &string_hash_ops, rt->id, rt);
+ r = hashmap_ensure_put(&m->exec_shared_runtime_by_id, &string_hash_ops, rt->id, rt);
if (r < 0)
return r;
if (ret)
*ret = rt;
- /* do not remove created ExecRuntime object when the operation succeeds. */
+ /* do not remove created ExecSharedRuntime object when the operation succeeds. */
TAKE_PTR(rt);
return 0;
}
-static int exec_runtime_make(
+static int exec_shared_runtime_make(
Manager *m,
const ExecContext *c,
const char *id,
- ExecRuntime **ret) {
+ ExecSharedRuntime **ret) {
_cleanup_(namespace_cleanup_tmpdirp) char *tmp_dir = NULL, *var_tmp_dir = NULL;
_cleanup_close_pair_ int netns_storage_socket[2] = PIPE_EBADF, ipcns_storage_socket[2] = PIPE_EBADF;
assert(c);
assert(id);
- /* It is not necessary to create ExecRuntime object. */
+ /* It is not necessary to create ExecSharedRuntime object. */
if (!exec_needs_network_namespace(c) && !exec_needs_ipc_namespace(c) && !c->private_tmp) {
*ret = NULL;
return 0;
return -errno;
}
- r = exec_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_storage_socket, ipcns_storage_socket, ret);
+ r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_storage_socket, ipcns_storage_socket, ret);
if (r < 0)
return r;
return 1;
}
-int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
- ExecRuntime *rt;
+int exec_shared_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecSharedRuntime **ret) {
+ ExecSharedRuntime *rt;
int r;
assert(m);
assert(id);
assert(ret);
- rt = hashmap_get(m->exec_runtime_by_id, id);
+ rt = hashmap_get(m->exec_shared_runtime_by_id, id);
if (rt)
- /* We already have an ExecRuntime object, let's increase the ref count and reuse it */
+ /* We already have an ExecSharedRuntime object, let's increase the ref count and reuse it */
goto ref;
if (!create) {
}
/* If not found, then create a new object. */
- r = exec_runtime_make(m, c, id, &rt);
+ r = exec_shared_runtime_make(m, c, id, &rt);
if (r < 0)
return r;
if (r == 0) {
- /* When r == 0, it is not necessary to create ExecRuntime object. */
+ /* When r == 0, it is not necessary to create ExecSharedRuntime object. */
*ret = NULL;
return 0;
}
return 1;
}
-ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
+ExecSharedRuntime *exec_shared_runtime_unref(ExecSharedRuntime *rt, bool destroy) {
if (!rt)
return NULL;
if (rt->n_ref > 0)
return NULL;
- return exec_runtime_free(rt, destroy);
+ return exec_shared_runtime_free(rt, destroy);
}
-int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
- ExecRuntime *rt;
+int exec_shared_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
+ ExecSharedRuntime *rt;
assert(m);
assert(f);
assert(fds);
- HASHMAP_FOREACH(rt, m->exec_runtime_by_id) {
+ HASHMAP_FOREACH(rt, m->exec_shared_runtime_by_id) {
fprintf(f, "exec-runtime=%s", rt->id);
if (rt->tmp_dir)
return 0;
}
-int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
- _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
- ExecRuntime *rt;
+int exec_shared_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
+ _cleanup_(exec_shared_runtime_freep) ExecSharedRuntime *rt_create = NULL;
+ ExecSharedRuntime *rt;
int r;
/* This is for the migration from old (v237 or earlier) deserialization text.
* Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
- * Even if the ExecRuntime object originally created by the other unit, we cannot judge
+ * Even if the ExecSharedRuntime object originally created by the other unit, we cannot judge
* so or not from the serialized text, then we always creates a new object owned by this. */
assert(u);
assert(key);
assert(value);
- /* Manager manages ExecRuntime objects by the unit id.
+ /* Manager manages ExecSharedRuntime objects by the unit id.
* So, we omit the serialized text when the unit does not have id (yet?)... */
if (isempty(u->id)) {
log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
return 0;
}
- if (hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops) < 0)
+ if (hashmap_ensure_allocated(&u->manager->exec_shared_runtime_by_id, &string_hash_ops) < 0)
return log_oom();
- rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
+ rt = hashmap_get(u->manager->exec_shared_runtime_by_id, u->id);
if (!rt) {
- if (exec_runtime_allocate(&rt_create, u->id) < 0)
+ if (exec_shared_runtime_allocate(&rt_create, u->id) < 0)
return log_oom();
rt = rt_create;
} else
return 0;
- /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
+ /* If the object is newly created, then put it to the hashmap which manages ExecSharedRuntime objects. */
if (rt_create) {
- r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
+ r = hashmap_put(u->manager->exec_shared_runtime_by_id, rt_create->id, rt_create);
if (r < 0) {
log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
return 0;
return 1;
}
-int exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
+int exec_shared_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
_cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
char *id = NULL;
int r, netns_fdpair[] = {-1, -1}, ipcns_fdpair[] = {-1, -1};
}
finalize:
- r = exec_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_fdpair, ipcns_fdpair, NULL);
+ r = exec_shared_runtime_add(m, id, &tmp_dir, &var_tmp_dir, netns_fdpair, ipcns_fdpair, NULL);
if (r < 0)
return log_debug_errno(r, "Failed to add exec-runtime: %m");
return 0;
}
-void exec_runtime_vacuum(Manager *m) {
- ExecRuntime *rt;
+void exec_shared_runtime_vacuum(Manager *m) {
+ ExecSharedRuntime *rt;
assert(m);
- /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
+ /* Free unreferenced ExecSharedRuntime objects. This is used after manager deserialization process. */
- HASHMAP_FOREACH(rt, m->exec_runtime_by_id) {
+ HASHMAP_FOREACH(rt, m->exec_shared_runtime_by_id) {
if (rt->n_ref > 0)
continue;
- (void) exec_runtime_free(rt, false);
+ (void) exec_shared_runtime_free(rt, false);
}
}