From: Lennart Poettering Date: Tue, 6 Feb 2018 10:57:35 +0000 (+0100) Subject: cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and... X-Git-Tag: v238~101^2~20 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1d9cc8768f173b25757c01aa0d4c7be7cd7116bc;p=thirdparty%2Fsystemd.git cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and service units only Currently we allowed delegation for alluntis with cgroup backing except for slices. Let's make this a bit more strict for now, and only allow this in service and scope units. Let's also add a generic accessor unit_cgroup_delegate() for checking whether a unit has delegation turned on that checks the new bool first. Also, when doing transient units, let's explcitly refuse turning on delegation for unit types that don#t support it. This is mostly cosmetical as we wouldn't act on the delegation request anyway, but certainly helpful for debugging. --- diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c index f3f40fb0e87..e11550f11c5 100644 --- a/src/core/bpf-firewall.c +++ b/src/core/bpf-firewall.c @@ -569,7 +569,7 @@ int bpf_firewall_install(Unit *u) { if (r < 0) return log_error_errno(r, "Kernel upload of egress BPF program failed: %m"); - r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0); + r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0); if (r < 0) return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path); } else { @@ -584,7 +584,7 @@ int bpf_firewall_install(Unit *u) { if (r < 0) return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m"); - r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0); + r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0); if (r < 0) return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path); } else { diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 97b3756567f..d05e338d110 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1124,14 +1124,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) { * * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */ - if (u->type == UNIT_SLICE) - return 0; - - c = unit_get_cgroup_context(u); - if (!c) - return 0; - - if (!c->delegate) + if (!unit_cgroup_delegate(u)) return 0; if (cg_all_unified() <= 0) { @@ -1142,6 +1135,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) { return 0; } + assert_se(c = unit_get_cgroup_context(u)); return c->delegate_controllers; } @@ -1496,7 +1490,7 @@ static int unit_create_cgroup( u->cgroup_enabled_mask = enable_mask; u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF; - if (u->type != UNIT_SLICE && !c->delegate) { + if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) { /* Then, possibly move things over, but not if * subgroups may contain processes, which is the case @@ -2563,6 +2557,21 @@ void unit_invalidate_cgroup_bpf(Unit *u) { } } +bool unit_cgroup_delegate(Unit *u) { + CGroupContext *c; + + assert(u); + + if (!UNIT_VTABLE(u)->can_delegate) + return false; + + c = unit_get_cgroup_context(u); + if (!c) + return false; + + return c->delegate; +} + void manager_invalidate_startup_units(Manager *m) { Iterator i; Unit *u; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 1f504414126..51133134054 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -219,3 +219,5 @@ void manager_invalidate_startup_units(Manager *m); const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_; CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_; + +bool unit_cgroup_delegate(Unit *u); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index f8d90d4b3a9..30456fa9f5d 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -351,6 +351,9 @@ static int bus_cgroup_set_transient_property( if (streq(name, "Delegate")) { int b; + if (!UNIT_VTABLE(u)->can_delegate) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type"); + r = sd_bus_message_read(message, "b", &b); if (r < 0) return r; @@ -367,6 +370,9 @@ static int bus_cgroup_set_transient_property( } else if (streq(name, "DelegateControllers")) { CGroupMask mask = 0; + if (!UNIT_VTABLE(u)->can_delegate) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type"); + r = sd_bus_message_enter_container(message, 'a', "s"); if (r < 0) return r; diff --git a/src/core/scope.c b/src/core/scope.c index 2831e057a1e..47c47bc7083 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -599,6 +599,7 @@ const UnitVTable scope_vtable = { .private_section = "Scope", .can_transient = true, + .can_delegate = true, .init = scope_init, .load = scope_load, diff --git a/src/core/service.c b/src/core/service.c index 37904874b5c..3c1dbdfe0f8 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -3909,6 +3909,9 @@ const UnitVTable service_vtable = { "Install\0", .private_section = "Service", + .can_transient = true, + .can_delegate = true, + .init = service_init, .done = service_done, .load = service_load, @@ -3954,7 +3957,6 @@ const UnitVTable service_vtable = { .get_timeout = service_get_timeout, .needs_console = service_needs_console, - .can_transient = true, .status_message_formats = { .starting_stopping = { diff --git a/src/core/unit.c b/src/core/unit.c index 9a57bcfb4b3..68f29137b8e 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4541,22 +4541,15 @@ int unit_kill_context( } else if (r > 0) { - /* FIXME: For now, on the legacy hierarchy, we - * will not wait for the cgroup members to die - * if we are running in a container or if this - * is a delegation unit, simply because cgroup - * notification is unreliable in these - * cases. It doesn't work at all in - * containers, and outside of containers it - * can be confused easily by left-over - * directories in the cgroup — which however - * should not exist in non-delegated units. On - * the unified hierarchy that's different, - * there we get proper events. Hence rely on - * them. */ + /* FIXME: For now, on the legacy hierarchy, we will not wait for the cgroup members to die if + * we are running in a container or if this is a delegation unit, simply because cgroup + * notification is unreliable in these cases. It doesn't work at all in containers, and outside + * of containers it can be confused easily by left-over directories in the cgroup — which + * however should not exist in non-delegated units. On the unified hierarchy that's different, + * there we get proper events. Hence rely on them. */ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0 || - (detect_container() == 0 && !UNIT_CGROUP_BOOL(u, delegate))) + (detect_container() == 0 && !unit_cgroup_delegate(u))) wait_for_exit = true; if (send_sighup) { @@ -5007,7 +5000,7 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) { assert(p); p->cgroup_path = u->cgroup_path; - SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, UNIT_CGROUP_BOOL(u, delegate)); + SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u)); } int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) { diff --git a/src/core/unit.h b/src/core/unit.h index 32105830509..6d49c7d5f22 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -568,6 +568,9 @@ struct UnitVTable { /* True if transient units of this type are OK */ bool can_transient:1; + /* True if cgroup delegation is permissible */ + bool can_delegate:1; + /* True if queued jobs of this type should be GC'ed if no other job needs them anymore */ bool gc_jobs:1; };