From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 6 Feb 2018 10:57:35 +0000 (+0100)
Subject: cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and... 
X-Git-Tag: v238~101^2~20
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1d9cc8768f173b25757c01aa0d4c7be7cd7116bc;p=thirdparty%2Fsystemd.git

cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and service units only

Currently we allowed delegation for alluntis with cgroup backing
except for slices. Let's make this a bit more strict for now, and only
allow this in service and scope units.

Let's also add a generic accessor unit_cgroup_delegate() for checking
whether a unit has delegation turned on that checks the new bool first.

Also, when doing transient units, let's explcitly refuse turning on
delegation for unit types that don#t support it. This is mostly
cosmetical as we wouldn't act on the delegation request anyway, but
certainly helpful for debugging.
---

diff --git a/src/core/bpf-firewall.c b/src/core/bpf-firewall.c
index f3f40fb0e87..e11550f11c5 100644
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@@ -569,7 +569,7 @@ int bpf_firewall_install(Unit *u) {
                 if (r < 0)
                         return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
 
-                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0);
                 if (r < 0)
                         return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
         } else {
@@ -584,7 +584,7 @@ int bpf_firewall_install(Unit *u) {
                 if (r < 0)
                         return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
 
-                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0);
                 if (r < 0)
                         return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
         } else {
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 97b3756567f..d05e338d110 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -1124,14 +1124,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
          *
          * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */
 
-        if (u->type == UNIT_SLICE)
-                return 0;
-
-        c = unit_get_cgroup_context(u);
-        if (!c)
-                return 0;
-
-        if (!c->delegate)
+        if (!unit_cgroup_delegate(u))
                 return 0;
 
         if (cg_all_unified() <= 0) {
@@ -1142,6 +1135,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
                         return 0;
         }
 
+        assert_se(c = unit_get_cgroup_context(u));
         return c->delegate_controllers;
 }
 
@@ -1496,7 +1490,7 @@ static int unit_create_cgroup(
         u->cgroup_enabled_mask = enable_mask;
         u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
 
-        if (u->type != UNIT_SLICE && !c->delegate) {
+        if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
 
                 /* Then, possibly move things over, but not if
                  * subgroups may contain processes, which is the case
@@ -2563,6 +2557,21 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
         }
 }
 
+bool unit_cgroup_delegate(Unit *u) {
+        CGroupContext *c;
+
+        assert(u);
+
+        if (!UNIT_VTABLE(u)->can_delegate)
+                return false;
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return false;
+
+        return c->delegate;
+}
+
 void manager_invalidate_startup_units(Manager *m) {
         Iterator i;
         Unit *u;
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 1f504414126..51133134054 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -219,3 +219,5 @@ void manager_invalidate_startup_units(Manager *m);
 
 const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
 CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
+
+bool unit_cgroup_delegate(Unit *u);
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index f8d90d4b3a9..30456fa9f5d 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -351,6 +351,9 @@ static int bus_cgroup_set_transient_property(
         if (streq(name, "Delegate")) {
                 int b;
 
+                if (!UNIT_VTABLE(u)->can_delegate)
+                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
                 r = sd_bus_message_read(message, "b", &b);
                 if (r < 0)
                         return r;
@@ -367,6 +370,9 @@ static int bus_cgroup_set_transient_property(
         } else if (streq(name, "DelegateControllers")) {
                 CGroupMask mask = 0;
 
+                if (!UNIT_VTABLE(u)->can_delegate)
+                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
                 r = sd_bus_message_enter_container(message, 'a', "s");
                 if (r < 0)
                         return r;
diff --git a/src/core/scope.c b/src/core/scope.c
index 2831e057a1e..47c47bc7083 100644
--- a/src/core/scope.c
+++ b/src/core/scope.c
@@ -599,6 +599,7 @@ const UnitVTable scope_vtable = {
         .private_section = "Scope",
 
         .can_transient = true,
+        .can_delegate = true,
 
         .init = scope_init,
         .load = scope_load,
diff --git a/src/core/service.c b/src/core/service.c
index 37904874b5c..3c1dbdfe0f8 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -3909,6 +3909,9 @@ const UnitVTable service_vtable = {
                 "Install\0",
         .private_section = "Service",
 
+        .can_transient = true,
+        .can_delegate = true,
+
         .init = service_init,
         .done = service_done,
         .load = service_load,
@@ -3954,7 +3957,6 @@ const UnitVTable service_vtable = {
 
         .get_timeout = service_get_timeout,
         .needs_console = service_needs_console,
-        .can_transient = true,
 
         .status_message_formats = {
                 .starting_stopping = {
diff --git a/src/core/unit.c b/src/core/unit.c
index 9a57bcfb4b3..68f29137b8e 100644
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -4541,22 +4541,15 @@ int unit_kill_context(
 
                 } else if (r > 0) {
 
-                        /* FIXME: For now, on the legacy hierarchy, we
-                         * will not wait for the cgroup members to die
-                         * if we are running in a container or if this
-                         * is a delegation unit, simply because cgroup
-                         * notification is unreliable in these
-                         * cases. It doesn't work at all in
-                         * containers, and outside of containers it
-                         * can be confused easily by left-over
-                         * directories in the cgroup â which however
-                         * should not exist in non-delegated units. On
-                         * the unified hierarchy that's different,
-                         * there we get proper events. Hence rely on
-                         * them. */
+                        /* FIXME: For now, on the legacy hierarchy, we will not wait for the cgroup members to die if
+                         * we are running in a container or if this is a delegation unit, simply because cgroup
+                         * notification is unreliable in these cases. It doesn't work at all in containers, and outside
+                         * of containers it can be confused easily by left-over directories in the cgroup â which
+                         * however should not exist in non-delegated units. On the unified hierarchy that's different,
+                         * there we get proper events. Hence rely on them. */
 
                         if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
-                            (detect_container() == 0 && !UNIT_CGROUP_BOOL(u, delegate)))
+                            (detect_container() == 0 && !unit_cgroup_delegate(u)))
                                 wait_for_exit = true;
 
                         if (send_sighup) {
@@ -5007,7 +5000,7 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
         assert(p);
 
         p->cgroup_path = u->cgroup_path;
-        SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, UNIT_CGROUP_BOOL(u, delegate));
+        SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
 }
 
 int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) {
diff --git a/src/core/unit.h b/src/core/unit.h
index 32105830509..6d49c7d5f22 100644
--- a/src/core/unit.h
+++ b/src/core/unit.h
@@ -568,6 +568,9 @@ struct UnitVTable {
         /* True if transient units of this type are OK */
         bool can_transient:1;
 
+        /* True if cgroup delegation is permissible */
+        bool can_delegate:1;
+
         /* True if queued jobs of this type should be GC'ed if no other job needs them anymore */
         bool gc_jobs:1;
 };