#include "format-util.h"
#include "fs-util.h"
#include "glob-util.h"
+#include "hexdecoct.h"
#include "io-util.h"
#include "ioprio.h"
#include "label.h"
static bool is_terminal_output(ExecOutput o) {
return IN_SET(o,
EXEC_OUTPUT_TTY,
- EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
EXEC_OUTPUT_KMSG_AND_CONSOLE,
EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
}
-static bool is_syslog_output(ExecOutput o) {
- return IN_SET(o,
- EXEC_OUTPUT_SYSLOG,
- EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
-}
-
static bool is_kmsg_output(ExecOutput o) {
return IN_SET(o,
EXEC_OUTPUT_KMSG,
uid_t uid,
gid_t gid) {
- union sockaddr_union sa = {
- .un.sun_family = AF_UNIX,
- };
+ union sockaddr_union sa;
+ socklen_t sa_len;
uid_t olduid = UID_INVALID;
gid_t oldgid = GID_INVALID;
const char *j;
r = sockaddr_un_set_path(&sa.un, j);
if (r < 0)
return r;
+ sa_len = r;
if (gid_is_valid(gid)) {
oldgid = getgid();
}
}
- r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un)) < 0 ? -errno : 0;
+ r = connect(fd, &sa.sa, sa_len) < 0 ? -errno : 0;
/* If we fail to restore the uid or gid, things will likely
fail later on. This should only happen if an LSM interferes. */
params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
context->syslog_priority,
!!context->syslog_level_prefix,
- is_syslog_output(output),
+ false,
is_kmsg_output(output),
is_terminal_output(output)) < 0)
return -errno;
}
static int acquire_path(const char *path, int flags, mode_t mode) {
- union sockaddr_union sa = {};
+ union sockaddr_union sa;
+ socklen_t sa_len;
_cleanup_close_ int fd = -1;
- int r, salen;
+ int r;
assert(path);
if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
return -errno;
- if (strlen(path) >= sizeof(sa.un.sun_path)) /* Too long, can't be a UNIX socket */
- return -ENXIO;
/* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
+ r = sockaddr_un_set_path(&sa.un, path);
+ if (r < 0)
+ return r == -EINVAL ? -ENXIO : r;
+ sa_len = r;
+
fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (fd < 0)
return -errno;
- salen = sockaddr_un_set_path(&sa.un, path);
- if (salen < 0)
- return salen;
-
- if (connect(fd, &sa.sa, salen) < 0)
+ if (connect(fd, &sa.sa, sa_len) < 0)
return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
* indication that his wasn't an AF_UNIX socket after all */
else if ((flags & O_ACCMODE) == O_WRONLY)
r = shutdown(fd, SHUT_RD);
else
- return TAKE_FD(fd);
+ r = 0;
if (r < 0)
return -errno;
/* We don't reset the terminal if this is just about output */
return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
- case EXEC_OUTPUT_SYSLOG:
- case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
case EXEC_OUTPUT_KMSG:
case EXEC_OUTPUT_KMSG_AND_CONSOLE:
case EXEC_OUTPUT_JOURNAL:
static bool context_has_address_families(const ExecContext *c) {
assert(c);
- return c->address_families_whitelist ||
+ return c->address_families_allow_list ||
!set_isempty(c->address_families);
}
static bool context_has_syscall_filters(const ExecContext *c) {
assert(c);
- return c->syscall_whitelist ||
+ return c->syscall_allow_list ||
!hashmap_isempty(c->syscall_filter);
}
negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
- if (c->syscall_whitelist) {
+ if (c->syscall_allow_list) {
default_action = negative_action;
action = SCMP_ACT_ALLOW;
} else {
}
if (needs_ambient_hack) {
- r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
+ r = seccomp_filter_set_add(c->syscall_filter, c->syscall_allow_list, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
if (r < 0)
return r;
}
if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
return 0;
- return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
+ return seccomp_restrict_address_families(c->address_families, c->address_families_allow_list);
}
static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
return seccomp_protect_syslog();
}
-static int apply_protect_clock(const Unit *u, const ExecContext *c) {
+static int apply_protect_clock(const Unit *u, const ExecContext *c) {
assert(u);
assert(c);
#endif
+static int apply_protect_hostname(const Unit *u, const ExecContext *c, int *ret_exit_status) {
+ assert(u);
+ assert(c);
+
+ if (!c->protect_hostname)
+ return 0;
+
+ if (ns_type_supported(NAMESPACE_UTS)) {
+ if (unshare(CLONE_NEWUTS) < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
+ *ret_exit_status = EXIT_NAMESPACE;
+ return log_unit_error_errno(u, errno, "Failed to set up UTS namespacing: %m");
+ }
+
+ log_unit_warning(u, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
+ }
+ } else
+ log_unit_warning(u, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
+
+#if HAVE_SECCOMP
+ int r;
+
+ if (skip_seccomp_unavailable(u, "ProtectHostname="))
+ return 0;
+
+ r = seccomp_protect_hostname();
+ if (r < 0) {
+ *ret_exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(u, r, "Failed to apply hostname restrictions: %m");
+ }
+#endif
+
+ return 0;
+}
+
static void do_idle_pipe_dance(int idle_pipe[static 4]) {
assert(idle_pipe);
tty_path = exec_context_tty_path(c);
- /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
- * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
- * passes to PID 1 ends up all the way in the console login shown. */
+ /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try
+ * to inherit the $TERM set for PID 1. This is useful for containers so that the $TERM the
+ * container manager passes to PID 1 ends up all the way in the console login shown. */
- if (path_equal(tty_path, "/dev/console") && getppid() == 1)
+ if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
term = getenv("TERM");
+
if (!term)
term = default_term_for_tty(tty_path);
if (type != EXEC_DIRECTORY_CONFIGURATION &&
readlink_and_make_absolute(p, &target) >= 0) {
- _cleanup_free_ char *q = NULL;
+ _cleanup_free_ char *q = NULL, *q_resolved = NULL, *target_resolved = NULL;
/* This already exists and is a symlink? Interesting. Maybe it's one created
* by DynamicUser=1 (see above)?
* since they all support the private/ symlink logic at least in some
* configurations, see above. */
+ r = chase_symlinks(target, NULL, 0, &target_resolved, NULL);
+ if (r < 0)
+ goto fail;
+
q = path_join(params->prefix[type], "private", *rt);
if (!q) {
r = -ENOMEM;
goto fail;
}
- if (path_equal(q, target)) {
+ /* /var/lib or friends may be symlinks. So, let's chase them also. */
+ r = chase_symlinks(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
+ if (r < 0)
+ goto fail;
+
+ if (path_equal(q_resolved, target_resolved)) {
/* Hmm, apparently DynamicUser= was once turned on for this service,
* but is no longer. Let's move the directory back up. */
assert(n_bind_mounts == 0 || bind_mounts);
/* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
- * would alter the view on the file system beyond making things read-only or invisble, i.e. would
+ * would alter the view on the file system beyond making things read-only or invisible, i.e. would
* rearrange stuff in a way we cannot ignore gracefully. */
if (context->n_temporary_filesystems > 0)
needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
context->mount_flags,
+ context->root_hash, context->root_hash_size, context->root_hash_path,
+ context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path,
+ context->root_verity,
DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
error_path);
return using_subcgroup;
}
+static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
+ _cleanup_(cpu_set_reset) CPUSet s = {};
+ int r;
+
+ assert(c);
+ assert(ret);
+
+ if (!c->numa_policy.nodes.set) {
+ log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
+ return 0;
+ }
+
+ r = numa_to_cpu_set(&c->numa_policy, &s);
+ if (r < 0)
+ return r;
+
+ cpu_set_reset(ret);
+
+ return cpu_set_add_all(ret, &s);
+}
+
+bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
+ assert(c);
+
+ return c->cpu_affinity_from_numa;
+}
+
static int exec_child(
Unit *unit,
const ExecCommand *command,
}
}
+ if (context->coredump_filter_set) {
+ r = set_coredump_filter(context->coredump_filter);
+ if (ERRNO_IS_PRIVILEGE(r))
+ log_unit_debug_errno(unit, r, "Failed to adjust coredump_filter, ignoring: %m");
+ else if (r < 0)
+ return log_unit_error_errno(unit, r, "Failed to adjust coredump_filter: %m");
+ }
+
if (context->nice_set) {
r = setpriority_closest(context->nice);
if (r < 0)
}
}
- if (context->cpu_set.set)
- if (sched_setaffinity(0, context->cpu_set.allocated, context->cpu_set.set) < 0) {
+ if (context->cpu_affinity_from_numa || context->cpu_set.set) {
+ _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
+ const CPUSet *cpu_set;
+
+ if (context->cpu_affinity_from_numa) {
+ r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
+ if (r < 0) {
+ *exit_status = EXIT_CPUAFFINITY;
+ return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
+ }
+
+ cpu_set = &converted_cpu_set;
+ } else
+ cpu_set = &context->cpu_set;
+
+ if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
*exit_status = EXIT_CPUAFFINITY;
return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
}
+ }
if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
r = apply_numa_policy(&context->numa_policy);
our_env,
pass_env,
context->environment,
- files_env,
- NULL);
+ files_env);
if (!accum_env) {
*exit_status = EXIT_MEMORY;
return log_oom();
}
}
- if (context->protect_hostname) {
- if (ns_type_supported(NAMESPACE_UTS)) {
- if (unshare(CLONE_NEWUTS) < 0) {
- if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
- *exit_status = EXIT_NAMESPACE;
- return log_unit_error_errno(unit, errno, "Failed to set up UTS namespacing: %m");
- }
-
- log_unit_warning(unit, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
- }
- } else
- log_unit_warning(unit, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
-#if HAVE_SECCOMP
- r = seccomp_protect_hostname();
- if (r < 0) {
- *exit_status = EXIT_SECCOMP;
- return log_unit_error_errno(unit, r, "Failed to apply hostname restrictions: %m");
- }
-#endif
+ if (needs_sandboxing) {
+ r = apply_protect_hostname(unit, context, exit_status);
+ if (r < 0)
+ return r;
}
/* Drop groups as early as possible.
c->working_directory = mfree(c->working_directory);
c->root_directory = mfree(c->root_directory);
c->root_image = mfree(c->root_image);
+ c->root_hash = mfree(c->root_hash);
+ c->root_hash_size = 0;
+ c->root_hash_path = mfree(c->root_hash_path);
+ c->root_hash_sig = mfree(c->root_hash_sig);
+ c->root_hash_sig_size = 0;
+ c->root_hash_sig_path = mfree(c->root_hash_sig_path);
+ c->root_verity = mfree(c->root_verity);
c->tty_path = mfree(c->tty_path);
c->syslog_identifier = mfree(c->syslog_identifier);
c->user = mfree(c->user);
if (c->root_image)
fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
+ if (c->root_hash) {
+ _cleanup_free_ char *encoded = NULL;
+ encoded = hexmem(c->root_hash, c->root_hash_size);
+ if (encoded)
+ fprintf(f, "%sRootHash: %s\n", prefix, encoded);
+ }
+
+ if (c->root_hash_path)
+ fprintf(f, "%sRootHash: %s\n", prefix, c->root_hash_path);
+
+ if (c->root_hash_sig) {
+ _cleanup_free_ char *encoded = NULL;
+ ssize_t len;
+ len = base64mem(c->root_hash_sig, c->root_hash_sig_size, &encoded);
+ if (len)
+ fprintf(f, "%sRootHashSignature: base64:%s\n", prefix, encoded);
+ }
+
+ if (c->root_hash_sig_path)
+ fprintf(f, "%sRootHashSignature: %s\n", prefix, c->root_hash_sig_path);
+
+ if (c->root_verity)
+ fprintf(f, "%sRootVerity: %s\n", prefix, c->root_verity);
+
STRV_FOREACH(e, c->environment)
fprintf(f, "%sEnvironment: %s\n", prefix, *e);
"%sOOMScoreAdjust: %i\n",
prefix, c->oom_score_adjust);
+ if (c->coredump_filter_set)
+ fprintf(f,
+ "%sCoredumpFilter: 0x%"PRIx64"\n",
+ prefix, c->coredump_filter);
+
for (i = 0; i < RLIM_NLIMITS; i++)
if (c->rlimit[i]) {
fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
prefix, yes_no(c->tty_vt_disallocate));
if (IN_SET(c->std_output,
- EXEC_OUTPUT_SYSLOG,
EXEC_OUTPUT_KMSG,
EXEC_OUTPUT_JOURNAL,
- EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
EXEC_OUTPUT_KMSG_AND_CONSOLE,
EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
IN_SET(c->std_error,
- EXEC_OUTPUT_SYSLOG,
EXEC_OUTPUT_KMSG,
EXEC_OUTPUT_JOURNAL,
- EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
EXEC_OUTPUT_KMSG_AND_CONSOLE,
EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
"%sSystemCallFilter: ",
prefix);
- if (!c->syscall_whitelist)
+ if (!c->syscall_allow_list)
fputc('~', f);
#if HAVE_SECCOMP
r = namespace_flags_to_string(c->restrict_namespaces, &s);
if (r >= 0)
fprintf(f, "%sRestrictNamespaces: %s\n",
- prefix, s);
+ prefix, strna(s));
}
if (c->network_namespace_path)
if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
return 0;
- if (c->private_tmp) {
+ if (c->private_tmp &&
+ !(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
+ (prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
+ prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
if (r < 0)
return r;
[EXEC_OUTPUT_INHERIT] = "inherit",
[EXEC_OUTPUT_NULL] = "null",
[EXEC_OUTPUT_TTY] = "tty",
- [EXEC_OUTPUT_SYSLOG] = "syslog",
- [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
[EXEC_OUTPUT_KMSG] = "kmsg",
[EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
[EXEC_OUTPUT_JOURNAL] = "journal",