]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
cgroup: be more careful with which controllers we can enable/disable on a cgroup
authorLennart Poettering <lennart@poettering.net>
Thu, 22 Nov 2018 20:45:33 +0000 (21:45 +0100)
committerLennart Poettering <lennart@poettering.net>
Fri, 23 Nov 2018 12:41:37 +0000 (13:41 +0100)
This changes cg_enable_everywhere() to return which controllers are
enabled for the specified cgroup. This information is then used to
correctly track the enablement mask currently in effect for a unit.
Moreover, when we try to turn off a controller, and this works, then
this is indicates that the parent unit might succesfully turn it off
now, too as our unit might have kept it busy.

So far, when realizing cgroups, i.e. when syncing up the kernel
representation of relevant cgroups with our own idea we would strictly
work from the root to the leaves. This is generally a good approach, as
when controllers are enabled this has to happen in root-to-leaves order.
However, when controllers are disabled this has to happen in the
opposite order: in leaves-to-root order (this is because controllers can
only be enabled in a child if it is already enabled in the parent, and
if it shall be disabled in the parent then it has to be disabled in the
child first, otherwise it is considered busy when it is attempted to
remove it in the parent).

To make things complicated when invalidating a unit's cgroup membershup
systemd can actually turn off some controllers previously turned on at
the very same time as it turns on other controllers previously turned
off. In such a case we have to work up leaves-to-root *and*
root-to-leaves right after each other. With this patch this is
implemented: we still generally operate root-to-leaves, but as soon as
we noticed we successfully turned off a controller previously turned on
for a cgroup we'll re-enqueue the cgroup realization for all parents of
a unit, thus implementing leaves-to-root where necessary.

src/basic/cgroup-util.c
src/basic/cgroup-util.h
src/core/cgroup.c
src/core/cgroup.h
src/nspawn/nspawn-cgroup.c

index 2d3039148939da219e15e91f3188b95e1ecf7ed2..d38452cfcbb8ed33798fedfb8e04fddcb262bcdc 100644 (file)
@@ -2595,22 +2595,45 @@ int cg_unified_flush(void) {
         return cg_unified_update();
 }
 
-int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
+int cg_enable_everywhere(
+                CGroupMask supported,
+                CGroupMask mask,
+                const char *p,
+                CGroupMask *ret_result_mask) {
+
         _cleanup_fclose_ FILE *f = NULL;
         _cleanup_free_ char *fs = NULL;
         CGroupController c;
+        CGroupMask ret = 0;
         int r;
 
         assert(p);
 
-        if (supported == 0)
+        if (supported == 0) {
+                if (ret_result_mask)
+                        *ret_result_mask = 0;
                 return 0;
+        }
 
         r = cg_all_unified();
         if (r < 0)
                 return r;
-        if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
+        if (r == 0) {
+                /* On the legacy hiearchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
+                 * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
+                 * caller tends to use the returned mask later on to compare if all controllers where properly joined,
+                 * and if not requeues realization. This use is the primary purpose of the return value, hence let's
+                 * minimize surprises here and reduce triggers for re-realization by always saying we fully
+                 * succeeded.) */
+                if (ret_result_mask)
+                        *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
+                                                                               * CGROUP_MASK_V2: The 'supported' mask
+                                                                               * might contain pure-V1 or BPF
+                                                                               * controllers, and we never want to
+                                                                               * claim that we could enable those with
+                                                                               * cgroup.subtree_control */
                 return 0;
+        }
 
         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
         if (r < 0)
@@ -2644,10 +2667,39 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
                                 log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
                                                 FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
                                 clearerr(f);
+
+                                /* If we can't turn off a controller, leave it on in the reported resulting mask. This
+                                 * happens for example when we attempt to turn off a controller up in the tree that is
+                                 * used down in the tree. */
+                                if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
+                                                                           * only here, and not follow the same logic
+                                                                           * for other errors such as EINVAL or
+                                                                           * EOPNOTSUPP or anything else. That's
+                                                                           * because EBUSY indicates that the
+                                                                           * controllers is currently enabled and
+                                                                           * cannot be disabled because something down
+                                                                           * the hierarchy is still using it. Any other
+                                                                           * error most likely means something like "I
+                                                                           * never heard of this controller" or
+                                                                           * similar. In the former case it's hence
+                                                                           * safe to assume the controller is still on
+                                                                           * after the failed operation, while in the
+                                                                           * latter case it's safer to assume the
+                                                                           * controller is unknown and hence certainly
+                                                                           * not enabled. */
+                                        ret |= bit;
+                        } else {
+                                /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
+                                if (FLAGS_SET(mask, bit))
+                                        ret |= bit;
                         }
                 }
         }
 
+        /* Let's return the precise set of controllers now enabled for the cgroup. */
+        if (ret_result_mask)
+                *ret_result_mask = ret;
+
         return 0;
 }
 
index 7919864df9198f6919ecff351ce67e2922d7fc37..ea9a3332904e99e129bdd2f4aa9f5f3a59d58724 100644 (file)
@@ -245,7 +245,7 @@ int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_m
 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata);
 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata);
 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root);
-int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p);
+int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask);
 
 int cg_mask_supported(CGroupMask *ret);
 int cg_mask_from_string(const char *s, CGroupMask *ret);
index 7e5ee1b25b935d06b0554ec9410345d5c5be325c..ec58d7820b48b63ac1a34cdf56a533e55cab3cc8 100644 (file)
@@ -1593,8 +1593,8 @@ static int unit_create_cgroup(
                 CGroupMask enable_mask) {
 
         CGroupContext *c;
-        int r;
         bool created;
+        int r;
 
         assert(u);
 
@@ -1618,18 +1618,37 @@ static int unit_create_cgroup(
 
         /* Preserve enabled controllers in delegated units, adjust others. */
         if (created || !unit_cgroup_delegate(u)) {
+                CGroupMask result_mask = 0;
 
                 /* Enable all controllers we need */
-                r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path);
+                r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask);
                 if (r < 0)
-                        log_unit_warning_errno(u, r, "Failed to enable controllers on cgroup %s, ignoring: %m",
-                                               u->cgroup_path);
+                        log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", u->cgroup_path);
+
+                /* If we just turned off a controller, this might release the controller for our parent too, let's
+                 * enqueue the parent for re-realization in that case again. */
+                if (UNIT_ISSET(u->slice)) {
+                        CGroupMask turned_off;
+
+                        turned_off = (u->cgroup_realized ? u->cgroup_enabled_mask & ~result_mask : 0);
+                        if (turned_off != 0) {
+                                Unit *parent;
+
+                                /* Force the parent to propagate the enable mask to the kernel again, by invalidating
+                                 * the controller we just turned off. */
+
+                                for (parent = UNIT_DEREF(u->slice); parent; parent = UNIT_DEREF(parent->slice))
+                                        unit_invalidate_cgroup(parent, turned_off);
+                        }
+                }
+
+                /* Remember what's actually enabled now */
+                u->cgroup_enabled_mask = result_mask;
         }
 
         /* Keep track that this is now realized */
         u->cgroup_realized = true;
         u->cgroup_realized_mask = target_mask;
-        u->cgroup_enabled_mask = enable_mask;
 
         if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
 
@@ -1815,7 +1834,7 @@ static bool unit_has_mask_realized(
                 u->cgroup_invalidated_mask == 0;
 }
 
-static void unit_add_to_cgroup_realize_queue(Unit *u) {
+void unit_add_to_cgroup_realize_queue(Unit *u) {
         assert(u);
 
         if (u->in_cgroup_realize_queue)
index 2c7287841a1ec9e9901c5bf6483dca38b470d3cc..64dec408ccdc1413f529923a23964ee269f86293 100644 (file)
@@ -151,12 +151,13 @@ CGroupMask unit_get_delegate_mask(Unit *u);
 CGroupMask unit_get_members_mask(Unit *u);
 CGroupMask unit_get_siblings_mask(Unit *u);
 CGroupMask unit_get_subtree_mask(Unit *u);
-
 CGroupMask unit_get_target_mask(Unit *u);
 CGroupMask unit_get_enable_mask(Unit *u);
 
 void unit_update_cgroup_members_masks(Unit *u);
 
+void unit_add_to_cgroup_realize_queue(Unit *u);
+
 const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask);
 char *unit_default_cgroup_path(Unit *u);
 int unit_set_cgroup_path(Unit *u, const char *path);
index 0197474dbc6f2ceb242808142b8f254130f74d92..53c42f0ee478e08a955ee53172dd41c376193c13 100644 (file)
@@ -189,7 +189,7 @@ int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested)
         }
 
         /* Try to enable as many controllers as possible for the new payload. */
-        (void) cg_enable_everywhere(supported, supported, cgroup);
+        (void) cg_enable_everywhere(supported, supported, cgroup, NULL);
         return 0;
 }