+/* SPDX-License-Identifier: LGPL-2.1+ */
/***
This file is part of systemd.
return 0;
}
-static int enforce_groups(const ExecContext *context, gid_t gid,
- gid_t *supplementary_gids, int ngids) {
+static int enforce_groups(gid_t gid, gid_t *supplementary_gids, int ngids) {
int r;
- assert(context);
-
- /* Handle SupplementaryGroups= even if it is empty */
- if (!strv_isempty(context->supplementary_groups)) {
+ /* Handle SupplementaryGroups= if it is not empty */
+ if (ngids > 0) {
r = maybe_setgroups(ngids, supplementary_gids);
if (r < 0)
return r;
assert(c);
return c->syscall_whitelist ||
- !set_isempty(c->syscall_filter);
+ !hashmap_isempty(c->syscall_filter);
}
static bool context_has_no_new_privileges(const ExecContext *c) {
!strv_isempty(context->inaccessible_paths))
return true;
- if (context->n_bind_mounts > 0)
+ if (context->n_bind_mounts > 0 ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths) ||
+ !strv_isempty(context->directories[EXEC_DIRECTORY_CONFIGURATION].paths))
return true;
if (context->mount_flags != 0)
if (context->mount_apivfs && (context->root_image || context->root_directory))
return true;
- if (context->dynamic_user &&
- (!strv_isempty(context->directories[EXEC_DIRECTORY_RUNTIME].paths) ||
- !strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
- !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
- !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
- return true;
-
return false;
}
if (r < 0)
goto fail;
- if (context->dynamic_user && type != EXEC_DIRECTORY_CONFIGURATION) {
+ if (context->dynamic_user &&
+ !IN_SET(type, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
_cleanup_free_ char *private_root = NULL, *relative = NULL, *parent = NULL;
/* So, here's one extra complication when dealing with DynamicUser=1 units. In that case we
* dirs it needs but no others. Tricky? Yes, absolutely, but it works!
*
* Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not to be
- * owned by the service itself. */
+ * owned by the service itself.
+ * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used for sharing
+ * files or sockets with other services. */
private_root = strjoin(params->prefix[type], "/private");
if (!private_root) {
}
/* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
- r = mkdir_safe_label(private_root, 0700, 0, 0);
+ r = mkdir_safe_label(private_root, 0700, 0, 0, false);
if (r < 0)
goto fail;
return 0;
}
-static int compile_read_write_paths(
- const ExecContext *context,
- const ExecParameters *params,
- char ***ret) {
-
- _cleanup_strv_free_ char **l = NULL;
- char **rt;
- ExecDirectoryType i;
-
- /* Compile the list of writable paths. This is the combination of
- * the explicitly configured paths, plus all runtime directories. */
-
- if (strv_isempty(context->read_write_paths)) {
- for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
- if (!strv_isempty(context->directories[i].paths))
- break;
-
- if (i == _EXEC_DIRECTORY_TYPE_MAX) {
- *ret = NULL; /* NOP if neither is set */
- return 0;
- }
- }
-
- l = strv_copy(context->read_write_paths);
- if (!l)
- return -ENOMEM;
-
- for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++) {
- if (!params->prefix[i])
- continue;
-
- STRV_FOREACH(rt, context->directories[i].paths) {
- char *s;
-
- s = strjoin(params->prefix[i], "/", *rt);
- if (!s)
- return -ENOMEM;
-
- if (strv_consume(&l, s) < 0)
- return -ENOMEM;
- }
- }
-
- *ret = l;
- l = NULL;
-
- return 0;
-}
-
static int compile_bind_mounts(
const ExecContext *context,
const ExecParameters *params,
if (!bind_mounts)
return -ENOMEM;
- for (i = 0; context->n_bind_mounts; i++) {
+ for (i = 0; i < context->n_bind_mounts; i++) {
BindMount *item = context->bind_mounts + i;
char *s, *d;
if (strv_isempty(context->directories[t].paths))
continue;
- if (context->dynamic_user && t != EXEC_DIRECTORY_CONFIGURATION) {
+ if (context->dynamic_user &&
+ !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION)) {
char *private_root;
/* So this is for a dynamic user, and we need to make sure the process can access its own
STRV_FOREACH(suffix, context->directories[t].paths) {
char *s, *d;
- if (context->dynamic_user && t != EXEC_DIRECTORY_CONFIGURATION)
+ if (context->dynamic_user &&
+ !IN_SET(t, EXEC_DIRECTORY_RUNTIME, EXEC_DIRECTORY_CONFIGURATION))
s = strjoin(params->prefix[t], "/private/", *suffix);
else
s = strjoin(params->prefix[t], "/", *suffix);
const ExecParameters *params,
ExecRuntime *runtime) {
- _cleanup_strv_free_ char **rw = NULL, **empty_directories = NULL;
+ _cleanup_strv_free_ char **empty_directories = NULL;
char *tmp = NULL, *var = NULL;
const char *root_dir = NULL, *root_image = NULL;
- NameSpaceInfo ns_info = {
+ NamespaceInfo ns_info = {
.ignore_protect_paths = false,
.private_dev = context->private_devices,
.protect_control_groups = context->protect_control_groups,
var = strjoina(runtime->var_tmp_dir, "/tmp");
}
- r = compile_read_write_paths(context, params, &rw);
- if (r < 0)
- return r;
-
if (params->flags & EXEC_APPLY_CHROOT) {
root_image = context->root_image;
needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
r = setup_namespace(root_dir, root_image,
- &ns_info, rw,
+ &ns_info, context->read_write_paths,
needs_sandboxing ? context->read_only_paths : NULL,
needs_sandboxing ? context->inaccessible_paths : NULL,
empty_directories,
STRV_FOREACH(i, c->directories[t].paths) {
char *e;
- e = strjoin(p->prefix[t], "/private/", *i);
+ if (t == EXEC_DIRECTORY_RUNTIME)
+ e = strjoin(p->prefix[t], "/", *i);
+ else
+ e = strjoin(p->prefix[t], "/private/", *i);
if (!e)
return -ENOMEM;
r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
if (r < 0) {
*exit_status = EXIT_USER;
+ if (r == -EILSEQ) {
+ log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
+ return -EOPNOTSUPP;
+ }
return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
}
}
if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
- r = setup_netns(runtime->netns_storage_socket);
- if (r < 0) {
- *exit_status = EXIT_NETWORK;
- return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
- }
+ if (ns_type_supported(NAMESPACE_NET)) {
+ r = setup_netns(runtime->netns_storage_socket);
+ if (r < 0) {
+ *exit_status = EXIT_NETWORK;
+ return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
+ }
+ } else
+ log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
}
needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
/* Drop groups as early as possbile */
if (needs_setuid) {
- r = enforce_groups(context, gid, supplementary_gids, ngids);
+ r = enforce_groups(gid, supplementary_gids, ngids);
if (r < 0) {
*exit_status = EXIT_GROUP;
return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
c->directories[i].mode = 0755;
c->capability_bounding_set = CAP_ALL;
c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
+ c->log_level_max = -1;
}
void exec_context_done(ExecContext *c) {
- unsigned l;
ExecDirectoryType i;
+ size_t l;
assert(c);
c->apparmor_profile = mfree(c->apparmor_profile);
c->smack_process_label = mfree(c->smack_process_label);
- c->syscall_filter = set_free(c->syscall_filter);
+ c->syscall_filter = hashmap_free(c->syscall_filter);
c->syscall_archs = set_free(c->syscall_archs);
c->address_families = set_free(c->address_families);
for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
c->directories[i].paths = strv_free(c->directories[i].paths);
+
+ c->log_level_max = -1;
+
+ exec_context_free_log_extra_fields(c);
}
int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
/* We execute this synchronously, since we need to be sure this is gone when we start the service
* next. */
(void) rm_rf(p, REMOVE_ROOT);
-
- /* Also destroy any matching subdirectory below /private/. This is done to support DynamicUser=1
- * setups. Note that we don't conditionalize here on that though, as the namespace is same way, and it
- * makes us a bit more robust towards changing unit settings. Or to say this differently: in the worst
- * case this is a NOP. */
-
- free(p);
- p = strjoin(runtime_prefix, "/private/", *i);
- if (!p)
- return -ENOMEM;
-
- (void) rm_rf(p, REMOVE_ROOT);
}
return 0;
}
void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
+ ExecDirectoryType dt;
char **e, **d;
unsigned i;
- ExecDirectoryType dt;
int r;
assert(c);
fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
}
+ if (c->log_level_max >= 0) {
+ _cleanup_free_ char *t = NULL;
+
+ (void) log_level_to_string_alloc(c->log_level_max, &t);
+
+ fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
+ }
+
+ if (c->n_log_extra_fields > 0) {
+ size_t j;
+
+ for (j = 0; j < c->n_log_extra_fields; j++) {
+ fprintf(f, "%sLogExtraFields: ", prefix);
+ fwrite(c->log_extra_fields[j].iov_base,
+ 1, c->log_extra_fields[j].iov_len,
+ f);
+ fputc('\n', f);
+ }
+ }
+
if (c->secure_bits) {
_cleanup_free_ char *str = NULL;
if (c->syscall_filter) {
#if HAVE_SECCOMP
Iterator j;
- void *id;
+ void *id, *val;
bool first = true;
#endif
fputc('~', f);
#if HAVE_SECCOMP
- SET_FOREACH(id, c->syscall_filter, j) {
+ HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
_cleanup_free_ char *name = NULL;
+ const char *errno_name = NULL;
+ int num = PTR_TO_INT(val);
if (first)
first = false;
name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
fputs(strna(name), f);
+
+ if (num >= 0) {
+ errno_name = errno_to_name(num);
+ if (errno_name)
+ fprintf(f, ":%s", errno_name);
+ else
+ fprintf(f, ":%d", num);
+ }
}
#endif
prefix, s);
}
- if (c->syscall_errno > 0)
- fprintf(f,
- "%sSystemCallErrorNumber: %s\n",
- prefix, strna(errno_to_name(c->syscall_errno)));
+ if (c->syscall_errno > 0) {
+ const char *errno_name;
+
+ fprintf(f, "%sSystemCallErrorNumber: ", prefix);
+
+ errno_name = errno_to_name(c->syscall_errno);
+ if (errno_name)
+ fprintf(f, "%s\n", errno_name);
+ else
+ fprintf(f, "%d\n", c->syscall_errno);
+ }
if (c->apparmor_profile)
fprintf(f,
return p;
}
+void exec_context_free_log_extra_fields(ExecContext *c) {
+ size_t l;
+
+ assert(c);
+
+ for (l = 0; l < c->n_log_extra_fields; l++)
+ free(c->log_extra_fields[l].iov_base);
+ c->log_extra_fields = mfree(c->log_extra_fields);
+ c->n_log_extra_fields = 0;
+}
+
void exec_status_start(ExecStatus *s, pid_t pid) {
assert(s);