From f98c25850fa5eaad815d7ea2cf51f5795ab43d42 Mon Sep 17 00:00:00 2001 From: Chris Down Date: Sat, 17 Nov 2018 11:19:07 +0000 Subject: [PATCH] cgroup v2: Don't require CPU controller for CPU accounting in 4.15+ systemd only uses functions that are as of Linux 4.15+ provided externally to the CPU controller (currently usage_usec), so if we have a new enough kernel, we don't need to set CGROUP_MASK_CPU for CPUAccounting=true as the CPU controller does not need to necessarily be enabled in this case. Part of this patch is modelled on an earlier patch by Ryutaroh Matsumoto (see PR #9665). --- src/basic/cgroup-util.c | 52 +++++++++++++++++++++++++++++ src/basic/cgroup-util.h | 3 ++ src/core/cgroup.c | 13 ++++---- src/core/dbus-cgroup.c | 2 +- src/test/test-cgroup-mask.c | 66 +++++++++++++++++++++++-------------- test/daughter.service | 1 + 6 files changed, 105 insertions(+), 32 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 04508508324..613801f2c55 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -2822,3 +2823,54 @@ static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { }; DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController); + +CGroupMask get_cpu_accounting_mask(void) { + static CGroupMask needed_mask = (CGroupMask) -1; + + /* On kernel ≥4.15 with unified hierarchy, cpu.stat's usage_usec is + * provided externally from the CPU controller, which means we don't + * need to enable the CPU controller just to get metrics. This is good, + * because enabling the CPU controller comes at a minor performance + * hit, especially when it's propagated deep into large hierarchies. + * There's also no separate CPU accounting controller available within + * a unified hierarchy. + * + * This combination of factors results in the desired cgroup mask to + * enable for CPU accounting varying as follows: + * + * ╔═════════════════════╤═════════════════════╗ + * ║ Linux ≥4.15 │ Linux <4.15 ║ + * ╔═══════════════╬═════════════════════╪═════════════════════╣ + * ║ Unified ║ nothing │ CGROUP_MASK_CPU ║ + * ╟───────────────╫─────────────────────┼─────────────────────╢ + * ║ Hybrid/Legacy ║ CGROUP_MASK_CPUACCT │ CGROUP_MASK_CPUACCT ║ + * ╚═══════════════╩═════════════════════╧═════════════════════╝ + * + * We check kernel version here instead of manually checking whether + * cpu.stat is present for every cgroup, as that check in itself would + * already be fairly expensive. + * + * Kernels where this patch has been backported will therefore have the + * CPU controller enabled unnecessarily. This is more expensive than + * necessary, but harmless. ☺️ + */ + + if (needed_mask == (CGroupMask) -1) { + if (cg_all_unified()) { + struct utsname u; + assert_se(uname(&u) >= 0); + + if (str_verscmp(u.release, "4.15") < 0) + needed_mask = CGROUP_MASK_CPU; + else + needed_mask = 0; + } else + needed_mask = CGROUP_MASK_CPUACCT; + } + + return needed_mask; +} + +bool cpu_accounting_is_cheap(void) { + return get_cpu_accounting_mask() == 0; +} diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 463a8dc84ad..7919864df91 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -69,6 +69,9 @@ static inline CGroupMask CGROUP_MASK_EXTEND_JOINED(CGroupMask mask) { return mask; } +CGroupMask get_cpu_accounting_mask(void); +bool cpu_accounting_is_cheap(void); + /* Special values for all weight knobs on unified hierarchy */ #define CGROUP_WEIGHT_INVALID ((uint64_t) -1) #define CGROUP_WEIGHT_MIN UINT64_C(1) diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 45a7581d45c..0fb09935e1f 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1178,7 +1178,7 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { /* Figure out which controllers we need, based on the cgroup context object */ if (c->cpu_accounting) - mask |= CGROUP_MASK_CPUACCT; + mask |= get_cpu_accounting_mask(); if (cgroup_context_has_cpu_weight(c) || cgroup_context_has_cpu_shares(c) || @@ -2617,13 +2617,15 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { r = cg_all_unified(); if (r < 0) return r; + + /* Requisite controllers for CPU accounting are not enabled */ + if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0) + return -ENODATA; + if (r > 0) { _cleanup_free_ char *val = NULL; uint64_t us; - if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0) - return -ENODATA; - r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val); if (r < 0) return r; @@ -2636,9 +2638,6 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { ns = us * NSEC_PER_USEC; } else { - if ((u->cgroup_realized_mask & CGROUP_MASK_CPUACCT) == 0) - return -ENODATA; - r = cg_get_attribute("cpuacct", u->cgroup_path, "cpuacct.usage", &v); if (r == -ENOENT) return -ENODATA; diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 856d9d91b22..a2f532076dc 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -603,7 +603,7 @@ int bus_cgroup_set_property( flags |= UNIT_PRIVATE; if (streq(name, "CPUAccounting")) - return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, CGROUP_MASK_CPUACCT|CGROUP_MASK_CPU, message, flags, error); + return bus_cgroup_set_boolean(u, name, &c->cpu_accounting, get_cpu_accounting_mask(), message, flags, error); if (streq(name, "CPUWeight")) return bus_cgroup_set_cpu_weight(u, name, &c->cpu_weight, message, flags, error); diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c index d9223f5f61f..143e7aa4247 100644 --- a/src/test/test-cgroup-mask.c +++ b/src/test/test-cgroup-mask.c @@ -2,6 +2,8 @@ #include +#include "cgroup.h" +#include "cgroup-util.h" #include "macro.h" #include "manager.h" #include "rm-rf.h" @@ -10,11 +12,27 @@ #include "tests.h" #include "unit.h" +#define ASSERT_CGROUP_MASK(got, expected) \ + log_cgroup_mask(got, expected); \ + assert_se(got == expected) + +#define ASSERT_CGROUP_MASK_JOINED(got, expected) ASSERT_CGROUP_MASK(got, CGROUP_MASK_EXTEND_JOINED(expected)) + +static void log_cgroup_mask(CGroupMask got, CGroupMask expected) { + _cleanup_free_ char *e_store = NULL, *g_store = NULL; + + assert_se(cg_mask_to_string(expected, &e_store) >= 0); + log_info("Expected mask: %s\n", e_store); + assert_se(cg_mask_to_string(got, &g_store) >= 0); + log_info("Got mask: %s\n", g_store); +} + static int test_cgroup_mask(void) { _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL; _cleanup_(manager_freep) Manager *m = NULL; Unit *son, *daughter, *parent, *root, *grandchild, *parent_deep; int r; + CGroupMask cpu_accounting_mask = get_cpu_accounting_mask(); r = enter_cgroup_subroot(); if (r == -ENOMEDIUM) @@ -57,36 +75,36 @@ static int test_cgroup_mask(void) { root = UNIT_DEREF(parent->slice); /* Verify per-unit cgroups settings. */ - assert_se(unit_get_own_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT)); - assert_se(unit_get_own_mask(daughter) == 0); - assert_se(unit_get_own_mask(grandchild) == 0); - assert_se(unit_get_own_mask(parent_deep) == CGROUP_MASK_MEMORY); - assert_se(unit_get_own_mask(parent) == (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); - assert_se(unit_get_own_mask(root) == 0); + ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(son), CGROUP_MASK_CPU); + ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(daughter), cpu_accounting_mask); + ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(grandchild), 0); + ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent_deep), CGROUP_MASK_MEMORY); + ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO)); + ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(root), 0); /* Verify aggregation of member masks */ - assert_se(unit_get_members_mask(son) == 0); - assert_se(unit_get_members_mask(daughter) == 0); - assert_se(unit_get_members_mask(grandchild) == 0); - assert_se(unit_get_members_mask(parent_deep) == 0); - assert_se(unit_get_members_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_members_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(son), 0); + ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(daughter), 0); + ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(grandchild), 0); + ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent_deep), 0); + ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY)); + ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of sibling masks. */ - assert_se(unit_get_siblings_mask(son) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(daughter) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(grandchild) == 0); - assert_se(unit_get_siblings_mask(parent_deep) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(parent) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); - assert_se(unit_get_siblings_mask(root) == (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(son), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY)); + ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(daughter), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY)); + ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(grandchild), 0); + ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent_deep), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY)); + ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); + ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY)); /* Verify aggregation of target masks. */ - assert_se(unit_get_target_mask(son) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(daughter) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(grandchild) == 0); - assert_se(unit_get_target_mask(parent_deep) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(parent) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); - assert_se(unit_get_target_mask(root) == ((CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + ASSERT_CGROUP_MASK(unit_get_target_mask(son), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + ASSERT_CGROUP_MASK(unit_get_target_mask(daughter), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + ASSERT_CGROUP_MASK(unit_get_target_mask(grandchild), 0); + ASSERT_CGROUP_MASK(unit_get_target_mask(parent_deep), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + ASSERT_CGROUP_MASK(unit_get_target_mask(parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); + ASSERT_CGROUP_MASK(unit_get_target_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported)); return 0; } diff --git a/test/daughter.service b/test/daughter.service index aebedca3484..c790b9d04bb 100644 --- a/test/daughter.service +++ b/test/daughter.service @@ -5,3 +5,4 @@ Description=Daughter Service Slice=parent.slice Type=oneshot ExecStart=/bin/true +CPUAccounting=true -- 2.39.2