]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
cgroup v2: Don't require CPU controller for CPU accounting in 4.15+
authorChris Down <chris@chrisdown.name>
Sat, 17 Nov 2018 11:19:07 +0000 (11:19 +0000)
committerChris Down <chris@chrisdown.name>
Sun, 18 Nov 2018 12:21:41 +0000 (12:21 +0000)
systemd only uses functions that are as of Linux 4.15+ provided
externally to the CPU controller (currently usage_usec), so if we have a
new enough kernel, we don't need to set CGROUP_MASK_CPU for
CPUAccounting=true as the CPU controller does not need to necessarily be
enabled in this case.

Part of this patch is modelled on an earlier patch by Ryutaroh Matsumoto
(see PR #9665).

src/basic/cgroup-util.c
src/basic/cgroup-util.h
src/core/cgroup.c
src/core/dbus-cgroup.c
src/test/test-cgroup-mask.c
test/daughter.service

index 04508508324bbb94e9aa181e4fba33ad69d03898..613801f2c5539c2ced9ec0623acb639ea27437e2 100644 (file)
@@ -12,6 +12,7 @@
 #include <sys/stat.h>
 #include <sys/statfs.h>
 #include <sys/types.h>
+#include <sys/utsname.h>
 #include <sys/xattr.h>
 #include <unistd.h>
 
@@ -2822,3 +2823,54 @@ static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
 };
 
 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);
+
+CGroupMask get_cpu_accounting_mask(void) {
+        static CGroupMask needed_mask = (CGroupMask) -1;
+
+        /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is
+         * provided externally from the CPU controller, which means we don't
+         * need to enable the CPU controller just to get metrics. This is good,
+         * because enabling the CPU controller comes at a minor performance
+         * hit, especially when it's propagated deep into large hierarchies.
+         * There's also no separate CPU accounting controller available within
+         * a unified hierarchy.
+         *
+         * This combination of factors results in the desired cgroup mask to
+         * enable for CPU accounting varying as follows:
+         *
+         *                   ╔═════════════════════╤═════════════════════╗
+         *                   ║     Linux ≥4.15     │     Linux <4.15     ║
+         *   ╔═══════════════╬═════════════════════╪═════════════════════╣
+         *   ║ Unified       ║ nothing             │ CGROUP_MASK_CPU     ║
+         *   ╟───────────────╫─────────────────────┼─────────────────────╢
+         *   ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║
+         *   ╚═══════════════╩═════════════════════╧═════════════════════╝
+         *
+         * We check kernel version here instead of manually checking whether
+         * cpu.stat is present for every cgroup, as that check in itself would
+         * already be fairly expensive.
+         *
+         * Kernels where this patch has been backported will therefore have the
+         * CPU controller enabled unnecessarily. This is more expensive than
+         * necessary, but harmless. ☺️
+         */
+
+        if (needed_mask == (CGroupMask) -1) {
+                if (cg_all_unified()) {
+                        struct utsname u;
+                        assert_se(uname(&u) >= 0);
+
+                        if (str_verscmp(u.release, "4.15") < 0)
+                                needed_mask = CGROUP_MASK_CPU;
+                        else
+                                needed_mask = 0;
+                } else
+                        needed_mask = CGROUP_MASK_CPUACCT;
+        }
+
+        return needed_mask;
+}
+
+bool cpu_accounting_is_cheap(void) {
+        return get_cpu_accounting_mask() == 0;
+}
index 463a8dc84ad6b34b1419ec680acf2e58a295444f..7919864df9198f6919ecff351ce67e2922d7fc37 100644 (file)
@@ -69,6 +69,9 @@ static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) {
         return mask;
 }
 
+CGroupMask get_cpu_accounting_mask(void);
+bool cpu_accounting_is_cheap(void);
+
 /* Special values for all weight knobs on unified hierarchy */
 #define CGROUP_WEIGHT_INVALID ((uint64_t) -1)
 #define CGROUP_WEIGHT_MIN UINT64_C(1)
index 45a7581d45c77d6ed13a89233a06e6d9814ce668..0fb09935e1fb0a2c0f68b213247ec7c5e28a0916 100644 (file)
@@ -1178,7 +1178,7 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
         /* Figure out which controllers we need, based on the cgroup context object */
 
         if (c->cpu_accounting)
-                mask |= CGROUP_MASK_CPUACCT;
+                mask |= get_cpu_accounting_mask();
 
         if (cgroup_context_has_cpu_weight(c) ||
             cgroup_context_has_cpu_shares(c) ||
@@ -2617,13 +2617,15 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
         r = cg_all_unified();
         if (r < 0)
                 return r;
+
+        /* Requisite controllers for CPU accounting are not enabled */
+        if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0)
+                return -ENODATA;
+
         if (r > 0) {
                 _cleanup_free_ char *val = NULL;
                 uint64_t us;
 
-                if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
-                        return -ENODATA;
-
                 r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
                 if (r < 0)
                         return r;
@@ -2636,9 +2638,6 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
 
                 ns = us * NSEC_PER_USEC;
         } else {
-                if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0)
-                        return -ENODATA;
-
                 r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v);
                 if (r == -ENOENT)
                         return -ENODATA;
index 856d9d91b222f174a4cb73b5f39f2963d68315e5..a2f532076dcdf0798c24b10f98d154c3ff471e76 100644 (file)
@@ -603,7 +603,7 @@ int bus_cgroup_set_property(
         flags |= UNIT_PRIVATE;
 
         if (streq(name, "CPUAccounting"))
-                return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, CGROUP_MASK_CPUACCT|CGROUP_MASK_CPU, message, flags, error);
+                return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, get_cpu_accounting_mask(), message, flags, error);
 
         if (streq(name, "CPUWeight"))
                 return bus_cgroup_set_cpu_weight(u, name, &c->cpu_weight, message, flags, error);
index d9223f5f61f252d6812ab0392f329e79bd5643df..143e7aa42477edb1f7c72f6a70111369d3511754 100644 (file)
@@ -2,6 +2,8 @@
 
 #include <stdio.h>
 
+#include "cgroup.h"
+#include "cgroup-util.h"
 #include "macro.h"
 #include "manager.h"
 #include "rm-rf.h"
 #include "tests.h"
 #include "unit.h"
 
+#define ASSERT_CGROUP_MASK(got, expected) \
+        log_cgroup_mask(got, expected); \
+        assert_se(got == expected)
+
+#define ASSERT_CGROUP_MASK_JOINED(got, expected) ASSERT_CGROUP_MASK(got, CGROUP_MASK_EXTEND_JOINED(expected))
+
+static void log_cgroup_mask(CGroupMask got, CGroupMask expected) {
+        _cleanup_free_ char *e_store = NULL, *g_store = NULL;
+
+        assert_se(cg_mask_to_string(expected, &e_store) >= 0);
+        log_info("Expected mask: %s\n", e_store);
+        assert_se(cg_mask_to_string(got, &g_store) >= 0);
+        log_info("Got mask: %s\n", g_store);
+}
+
 static int test_cgroup_mask(void) {
         _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
         _cleanup_(manager_freep) Manager *m = NULL;
         Unit *son, *daughter, *parent, *root, *grandchild, *parent_deep;
         int r;
+        CGroupMask cpu_accounting_mask = get_cpu_accounting_mask();
 
         r = enter_cgroup_subroot();
         if (r == -ENOMEDIUM)
@@ -57,36 +75,36 @@ static int test_cgroup_mask(void) {
         root = UNIT_DEREF(parent->slice);
 
         /* Verify per-unit cgroups settings. */
-        assert_se(unit_get_own_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT));
-        assert_se(unit_get_own_mask(daughter) == 0);
-        assert_se(unit_get_own_mask(grandchild) == 0);
-        assert_se(unit_get_own_mask(parent_deep) == CGROUP_MASK_MEMORY);
-        assert_se(unit_get_own_mask(parent) == (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
-        assert_se(unit_get_own_mask(root) == 0);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(son), CGROUP_MASK_CPU);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(daughter), cpu_accounting_mask);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(grandchild), 0);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent_deep), CGROUP_MASK_MEMORY);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
+        ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(root), 0);
 
         /* Verify aggregation of member masks */
-        assert_se(unit_get_members_mask(son) == 0);
-        assert_se(unit_get_members_mask(daughter) == 0);
-        assert_se(unit_get_members_mask(grandchild) == 0);
-        assert_se(unit_get_members_mask(parent_deep) == 0);
-        assert_se(unit_get_members_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));
-        assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+        ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(son), 0);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(daughter), 0);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(grandchild), 0);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent_deep), 0);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+        ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
 
         /* Verify aggregation of sibling masks. */
-        assert_se(unit_get_siblings_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));
-        assert_se(unit_get_siblings_mask(daughter) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));
-        assert_se(unit_get_siblings_mask(grandchild) == 0);
-        assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY));
-        assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
-        assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+        ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(son), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+        ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(daughter), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+        ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(grandchild), 0);
+        ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent_deep), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+        ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+        ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
 
         /* Verify aggregation of target masks. */
-        assert_se(unit_get_target_mask(son) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported));
-        assert_se(unit_get_target_mask(daughter) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported));
-        assert_se(unit_get_target_mask(grandchild) == 0);
-        assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported));
-        assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
-        assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+        ASSERT_CGROUP_MASK(unit_get_target_mask(son), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+        ASSERT_CGROUP_MASK(unit_get_target_mask(daughter), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+        ASSERT_CGROUP_MASK(unit_get_target_mask(grandchild), 0);
+        ASSERT_CGROUP_MASK(unit_get_target_mask(parent_deep), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+        ASSERT_CGROUP_MASK(unit_get_target_mask(parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+        ASSERT_CGROUP_MASK(unit_get_target_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
 
         return 0;
 }
index aebedca348491e19b95e3ddf36e5470547816818..c790b9d04bb7cb6a59494871a346f435b37fcfcd 100644 (file)
@@ -5,3 +5,4 @@ Description=Daughter Service
 Slice=parent.slice
 Type=oneshot
 ExecStart=/bin/true
+CPUAccounting=true