While at it, remove TODO about assuming availability of cgns.
We generally want to keep that optional still.
/* We need to make the pressure path writable even if /sys/fs/cgroups is made read-only, as the
* service will need to write to it in order to start the notifications. */
- if (exec_is_cgroup_mount_read_only(context, params) && memory_pressure_path && !streq(memory_pressure_path, "/dev/null")) {
+ if (exec_is_cgroup_mount_read_only(context) && memory_pressure_path && !streq(memory_pressure_path, "/dev/null")) {
read_write_paths_cleanup = strv_copy(context->read_write_paths);
if (!read_write_paths_cleanup)
return -ENOMEM;
* sandbox inside the mount namespace. */
.ignore_protect_paths = !needs_sandboxing && !context->dynamic_user && root_dir,
- .protect_control_groups = needs_sandboxing ? exec_get_protect_control_groups(context, params) : PROTECT_CONTROL_GROUPS_NO,
+ .protect_control_groups = needs_sandboxing ? exec_get_protect_control_groups(context) : PROTECT_CONTROL_GROUPS_NO,
.protect_kernel_tunables = needs_sandboxing && context->protect_kernel_tunables,
.protect_kernel_modules = needs_sandboxing && context->protect_kernel_modules,
.protect_kernel_logs = needs_sandboxing && context->protect_kernel_logs,
LOG_EXEC_INVOCATION_ID(params));
}
-static bool exec_context_needs_cap_sys_admin(const ExecContext *context, const ExecParameters *params) {
+static bool exec_context_needs_cap_sys_admin(const ExecContext *context) {
assert(context);
- assert(params);
return context->private_users != PRIVATE_USERS_NO ||
context->private_tmp != PRIVATE_TMP_NO ||
context->protect_kernel_tunables ||
context->protect_kernel_modules ||
context->protect_kernel_logs ||
- exec_needs_cgroup_mount(context, params) ||
+ exec_needs_cgroup_mount(context) ||
context->protect_clock ||
context->protect_hostname != PROTECT_HOSTNAME_NO ||
!strv_isempty(context->read_write_paths) ||
/* If we need unprivileged private users, we've already unshared a user namespace by the time we call
* setup_delegated_namespaces() for the first time so let's make sure we do all other namespace
* unsharing in the first call to setup_delegated_namespaces() by returning false here. */
- if (!have_cap_sys_admin && exec_context_needs_cap_sys_admin(context, params))
+ if (!have_cap_sys_admin && exec_context_needs_cap_sys_admin(context))
return false;
if (context->delegate_namespaces == NAMESPACE_FLAGS_INITIAL)
log_exec_warning(context, params, "PrivateIPC=yes is configured, but the kernel does not support IPC namespaces, ignoring.");
}
- if (needs_sandboxing && exec_needs_cgroup_namespace(context, params) &&
+ if (needs_sandboxing && exec_needs_cgroup_namespace(context) &&
exec_namespace_is_delegated(context, params, have_cap_sys_admin, CLONE_NEWCGROUP) == delegate) {
if (unshare(CLONE_NEWCGROUP) < 0) {
*reterr_exit_status = EXIT_NAMESPACE;
* to the cgroup namespace to environment variables and mounts. If chown/chmod fails, we should not pass memory
* pressure path environment variable or read-write mount to the unit. This is why we check if
* memory_pressure_path != NULL in the conditional below. */
- if (memory_pressure_path && needs_sandboxing && exec_needs_cgroup_namespace(context, params)) {
+ if (memory_pressure_path && needs_sandboxing && exec_needs_cgroup_namespace(context)) {
memory_pressure_path = mfree(memory_pressure_path);
r = cg_get_path("memory", "", "memory.pressure", &memory_pressure_path);
if (r < 0) {
}
}
- if (needs_sandboxing && !have_cap_sys_admin && exec_context_needs_cap_sys_admin(context, params)) {
+ if (needs_sandboxing && !have_cap_sys_admin && exec_context_needs_cap_sys_admin(context)) {
/* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
* Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
* set up all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
return context->private_ipc || context->ipc_namespace_path;
}
-static bool can_apply_cgroup_namespace(const ExecContext *context, const ExecParameters *params) {
- return cg_all_unified() > 0 && ns_type_supported(NAMESPACE_CGROUP);
-}
-
static bool needs_cgroup_namespace(ProtectControlGroups i) {
return IN_SET(i, PROTECT_CONTROL_GROUPS_PRIVATE, PROTECT_CONTROL_GROUPS_STRICT);
}
-ProtectControlGroups exec_get_protect_control_groups(const ExecContext *context, const ExecParameters *params) {
+ProtectControlGroups exec_get_protect_control_groups(const ExecContext *context) {
assert(context);
/* If cgroup namespace is configured via ProtectControlGroups=private or strict but we can't actually
- * use cgroup namespace, either from not having unified hierarchy or kernel support, we ignore the
- * setting and do not unshare the namespace. ProtectControlGroups=private and strict get downgraded
- * to no and yes respectively. This ensures that strict always gets a read-only mount of /sys/fs/cgroup.
- *
- * TODO: Remove fallback once cgroupv1 support is removed in v258. */
- if (needs_cgroup_namespace(context->protect_control_groups) && !can_apply_cgroup_namespace(context, params)) {
+ * use cgroup namespace, we ignore the setting and do not unshare the namespace.
+ * ProtectControlGroups=private and strict get downgraded to no and yes respectively. This ensures
+ * that strict always gets a read-only mount of /sys/fs/cgroup/. */
+ if (needs_cgroup_namespace(context->protect_control_groups) && !ns_type_supported(NAMESPACE_CGROUP)) {
if (context->protect_control_groups == PROTECT_CONTROL_GROUPS_PRIVATE)
return PROTECT_CONTROL_GROUPS_NO;
if (context->protect_control_groups == PROTECT_CONTROL_GROUPS_STRICT)
return context->protect_control_groups;
}
-bool exec_needs_cgroup_namespace(const ExecContext *context, const ExecParameters *params) {
+bool exec_needs_cgroup_namespace(const ExecContext *context) {
assert(context);
- return needs_cgroup_namespace(exec_get_protect_control_groups(context, params));
+ return needs_cgroup_namespace(exec_get_protect_control_groups(context));
}
-bool exec_needs_cgroup_mount(const ExecContext *context, const ExecParameters *params) {
+bool exec_needs_cgroup_mount(const ExecContext *context) {
assert(context);
- return exec_get_protect_control_groups(context, params) != PROTECT_CONTROL_GROUPS_NO;
+ return exec_get_protect_control_groups(context) != PROTECT_CONTROL_GROUPS_NO;
}
-bool exec_is_cgroup_mount_read_only(const ExecContext *context, const ExecParameters *params) {
+bool exec_is_cgroup_mount_read_only(const ExecContext *context) {
assert(context);
- return IN_SET(exec_get_protect_control_groups(context, params), PROTECT_CONTROL_GROUPS_YES, PROTECT_CONTROL_GROUPS_STRICT);
+ return IN_SET(exec_get_protect_control_groups(context), PROTECT_CONTROL_GROUPS_YES, PROTECT_CONTROL_GROUPS_STRICT);
}
bool exec_needs_pid_namespace(const ExecContext *context) {
context->protect_kernel_tunables ||
context->protect_kernel_modules ||
context->protect_kernel_logs ||
- exec_needs_cgroup_mount(context, params) ||
+ exec_needs_cgroup_mount(context) ||
context->protect_proc != PROTECT_PROC_DEFAULT ||
context->proc_subset != PROC_SUBSET_ALL ||
exec_needs_ipc_namespace(context) ||
bool exec_needs_ipc_namespace(const ExecContext *context);
bool exec_needs_pid_namespace(const ExecContext *context);
-ProtectControlGroups exec_get_protect_control_groups(const ExecContext *context, const ExecParameters *params);
-bool exec_needs_cgroup_namespace(const ExecContext *context, const ExecParameters *params);
-bool exec_needs_cgroup_mount(const ExecContext *context, const ExecParameters *params);
-bool exec_is_cgroup_mount_read_only(const ExecContext *context, const ExecParameters *params);
+ProtectControlGroups exec_get_protect_control_groups(const ExecContext *context);
+bool exec_needs_cgroup_namespace(const ExecContext *context);
+bool exec_needs_cgroup_mount(const ExecContext *context);
+bool exec_is_cgroup_mount_read_only(const ExecContext *context);
+
const char* exec_get_private_notify_socket_path(const ExecContext *context, const ExecParameters *params, bool needs_sandboxing);
/* These logging macros do the same logging as those in unit.h, but using ExecContext and ExecParameters