]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: add CPUQuotaPeriodSec=
authorFilipe Brandenburger <filbranden@google.com>
Fri, 2 Nov 2018 16:21:57 +0000 (09:21 -0700)
committerFilipe Brandenburger <filbranden@google.com>
Thu, 14 Feb 2019 19:04:42 +0000 (11:04 -0800)
This new setting allows configuration of CFS period on the CPU cgroup, instead
of using a hardcoded default of 100ms.

Tested:
- Legacy cgroup + Unified cgroup
- systemctl set-property
- systemctl show
- Confirmed that the cgroup settings (such as cpu.cfs_period_ns) were set
  appropriately, including updating the CPU quota (cpu.cfs_quota_ns) when
  CPUQuotaPeriodSec= is updated.
- Checked that clamping works properly when either period or (quota * period)
  are below the resolution of 1ms, or if period is above the max of 1s.

docs/TRANSIENT-SETTINGS.md
man/systemd.resource-control.xml
src/core/cgroup.c
src/core/cgroup.h
src/core/dbus-cgroup.c
src/core/load-fragment-gperf.gperf.m4
src/core/load-fragment.c
src/shared/bus-unit-util.c
src/test/meson.build
src/test/test-cgroup-cpu.c [new file with mode: 0644]

index 0ac77f04976a849c4812e00798cf48f71790fc6a..ac89fb174ff96c5e4d1b4f402d3412801d0b60cf 100644 (file)
@@ -224,6 +224,7 @@ All cgroup/resource control settings are available for transient units
 ✓ CPUShares=
 ✓ StartupCPUShares=
 ✓ CPUQuota=
+✓ CPUQuotaPeriodSec=
 ✓ MemoryAccounting=
 ✓ MemoryMin=
 ✓ MemoryLow=
index a4d793c32ee9c3ea7cb4cfea1cb4b1da2c52eab3..744bfa938f7cbfd5ec9dd26ee82b7382f807ab58 100644 (file)
         </listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>CPUQuotaPeriodSec=</varname></term>
+
+        <listitem>
+          <para>Assign the duration over which the CPU time quota specified by <varname>CPUQuota=</varname> is measured.
+          Takes a time duration value in seconds, with an optional suffix such as "ms" for milliseconds (or "s" for seconds.)
+          The default setting is 100ms. The period is clamped to the range supported by the kernel, which is [1ms, 1000ms].
+          Additionally, the period is adjusted up so that the quota interval is also at least 1ms.
+          Setting <varname>CPUQuotaPeriodSec=</varname> to an empty value resets it to the default.</para>
+
+          <para>This controls the second field of <literal>cpu.max</literal> attribute on the unified control group hierarchy
+          and <literal>cpu.cfs_period_us</literal> on legacy. For details about these control group attributes, see
+          <ulink url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink> and
+          <ulink url="https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para>
+
+          <para>Example: <varname>CPUQuotaPeriodSec=10ms</varname> to request that the CPU quota is measured in periods of 10ms.</para>
+        </listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>MemoryAccounting=</varname></term>
 
index 18d470b6d675eead53b8a0281ec597d282cf8280..9fe3ba990711332a4a0b68da2b2dfcf011f6df6f 100644 (file)
@@ -25,7 +25,7 @@
 #include "string-util.h"
 #include "virt.h"
 
-#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
+#define CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
 
 /* Returns the log level to use when cgroup attribute writes fail. When an attribute is missing or we have access
  * problems we downgrade to LOG_DEBUG. This is supposed to be nice to container managers and kernels which want to mask
@@ -98,6 +98,7 @@ void cgroup_context_init(CGroupContext *c) {
                 .cpu_weight = CGROUP_WEIGHT_INVALID,
                 .startup_cpu_weight = CGROUP_WEIGHT_INVALID,
                 .cpu_quota_per_sec_usec = USEC_INFINITY,
+                .cpu_quota_period_usec = USEC_INFINITY,
 
                 .cpu_shares = CGROUP_CPU_SHARES_INVALID,
                 .startup_cpu_shares = CGROUP_CPU_SHARES_INVALID,
@@ -206,6 +207,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
         CGroupDeviceAllow *a;
         IPAddressAccessItem *iaai;
         char u[FORMAT_TIMESPAN_MAX];
+        char v[FORMAT_TIMESPAN_MAX];
 
         assert(c);
         assert(f);
@@ -224,6 +226,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 "%sCPUShares=%" PRIu64 "\n"
                 "%sStartupCPUShares=%" PRIu64 "\n"
                 "%sCPUQuotaPerSecSec=%s\n"
+                "%sCPUQuotaPeriodSec=%s\n"
                 "%sIOWeight=%" PRIu64 "\n"
                 "%sStartupIOWeight=%" PRIu64 "\n"
                 "%sBlockIOWeight=%" PRIu64 "\n"
@@ -248,6 +251,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 prefix, c->cpu_shares,
                 prefix, c->startup_cpu_shares,
                 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
+                prefix, format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1),
                 prefix, c->io_weight,
                 prefix, c->startup_io_weight,
                 prefix, c->blockio_weight,
@@ -656,6 +660,39 @@ static uint64_t cgroup_context_cpu_shares(CGroupContext *c, ManagerState state)
                 return CGROUP_CPU_SHARES_DEFAULT;
 }
 
+usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period) {
+        /* kernel uses a minimum resolution of 1ms, so both period and (quota * period)
+         * need to be higher than that boundary. quota is specified in USecPerSec.
+         * Additionally, period must be at most max_period. */
+        assert(quota > 0);
+
+        return MIN(MAX3(period, resolution, resolution * USEC_PER_SEC / quota), max_period);
+}
+
+static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) {
+        usec_t new_period;
+
+        if (quota == USEC_INFINITY)
+                /* Always use default period for infinity quota. */
+                return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
+
+        if (period == USEC_INFINITY)
+                /* Default period was requested. */
+                period = CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
+
+        /* Clamp to interval [1ms, 1s] */
+        new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC);
+
+        if (new_period != period) {
+                char v[FORMAT_TIMESPAN_MAX];
+                log_unit_full(u, LOG_WARNING, 0,
+                              "Clamping CPU interval for cpu.max: period is now %s",
+                              format_timespan(v, sizeof(v), new_period, 1));
+        }
+
+        return new_period;
+}
+
 static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) {
         char buf[DECIMAL_STR_MAX(uint64_t) + 2];
 
@@ -663,14 +700,15 @@ static void cgroup_apply_unified_cpu_weight(Unit *u, uint64_t weight) {
         (void) set_attribute_and_warn(u, "cpu", "cpu.weight", buf);
 }
 
-static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota) {
+static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota, usec_t period) {
         char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1];
 
+        period = cgroup_cpu_adjust_period_and_log(u, period, quota);
         if (quota != USEC_INFINITY)
                 xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
-                         quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC, CGROUP_CPU_QUOTA_PERIOD_USEC);
+                         MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC), period);
         else
-                xsprintf(buf, "max " USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+                xsprintf(buf, "max " USEC_FMT "\n", period);
         (void) set_attribute_and_warn(u, "cpu", "cpu.max", buf);
 }
 
@@ -681,14 +719,16 @@ static void cgroup_apply_legacy_cpu_shares(Unit *u, uint64_t shares) {
         (void) set_attribute_and_warn(u, "cpu", "cpu.shares", buf);
 }
 
-static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota) {
+static void cgroup_apply_legacy_cpu_quota(Unit *u, usec_t quota, usec_t period) {
         char buf[DECIMAL_STR_MAX(usec_t) + 2];
 
-        xsprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+        period = cgroup_cpu_adjust_period_and_log(u, period, quota);
+
+        xsprintf(buf, USEC_FMT "\n", period);
         (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_period_us", buf);
 
         if (quota != USEC_INFINITY) {
-                xsprintf(buf, USEC_FMT "\n", quota * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+                xsprintf(buf, USEC_FMT "\n", MAX(quota * period / USEC_PER_SEC, USEC_PER_MSEC));
                 (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", buf);
         } else
                 (void) set_attribute_and_warn(u, "cpu", "cpu.cfs_quota_us", "-1\n");
@@ -911,7 +951,7 @@ static void cgroup_context_apply(
                                 weight = CGROUP_WEIGHT_DEFAULT;
 
                         cgroup_apply_unified_cpu_weight(u, weight);
-                        cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec);
+                        cgroup_apply_unified_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
 
                 } else {
                         uint64_t shares;
@@ -930,7 +970,7 @@ static void cgroup_context_apply(
                                 shares = CGROUP_CPU_SHARES_DEFAULT;
 
                         cgroup_apply_legacy_cpu_shares(u, shares);
-                        cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec);
+                        cgroup_apply_legacy_cpu_quota(u, c->cpu_quota_per_sec_usec, c->cpu_quota_period_usec);
                 }
         }
 
index 266daa20a5b6ef597ef60ae06b087da77e941b58..42da777c298df45f58e9ca061ba639209827c80e 100644 (file)
@@ -83,6 +83,7 @@ struct CGroupContext {
         uint64_t cpu_weight;
         uint64_t startup_cpu_weight;
         usec_t cpu_quota_per_sec_usec;
+        usec_t cpu_quota_period_usec;
 
         uint64_t io_weight;
         uint64_t startup_io_weight;
@@ -135,6 +136,8 @@ typedef enum CGroupIPAccountingMetric {
 typedef struct Unit Unit;
 typedef struct Manager Manager;
 
+usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period);
+
 void cgroup_context_init(CGroupContext *c);
 void cgroup_context_done(CGroupContext *c);
 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix);
index 53890bcafbfe49317c1cd0191b1ba7c078d9342f..d1c34fed1b721e87a2580975302f90f2d99dcbae 100644 (file)
@@ -330,6 +330,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("CPUShares", "t", NULL, offsetof(CGroupContext, cpu_shares), 0),
         SD_BUS_PROPERTY("StartupCPUShares", "t", NULL, offsetof(CGroupContext, startup_cpu_shares), 0),
         SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_per_sec_usec), 0),
+        SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0),
         SD_BUS_PROPERTY("IOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, io_accounting), 0),
         SD_BUS_PROPERTY("IOWeight", "t", NULL, offsetof(CGroupContext, io_weight), 0),
         SD_BUS_PROPERTY("StartupIOWeight", "t", NULL, offsetof(CGroupContext, startup_io_weight), 0),
@@ -727,6 +728,28 @@ int bus_cgroup_set_property(
 
                 return 1;
 
+        } else if (streq(name, "CPUQuotaPeriodUSec")) {
+                uint64_t u64;
+
+                r = sd_bus_message_read(message, "t", &u64);
+                if (r < 0)
+                        return r;
+
+                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                        c->cpu_quota_period_usec = u64;
+                        unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
+                        if (c->cpu_quota_period_usec == USEC_INFINITY)
+                                unit_write_setting(u, flags, "CPUQuotaPeriodSec", "CPUQuotaPeriodSec=");
+                        else {
+                                char v[FORMAT_TIMESPAN_MAX];
+                                unit_write_settingf(u, flags, "CPUQuotaPeriodSec",
+                                                    "CPUQuotaPeriodSec=%s",
+                                                    format_timespan(v, sizeof(v), c->cpu_quota_period_usec, 1));
+                        }
+                }
+
+                return 1;
+
         } else if ((iol_type = cgroup_io_limit_type_from_string(name)) >= 0) {
                 const char *path;
                 unsigned n = 0;
index cdbc67f885d2f4c689d290d3bcfbb6b309de618a..ca3923bbb0842bf81d0bddffed32be50493b33be 100644 (file)
@@ -165,6 +165,7 @@ $1.StartupCPUWeight,             config_parse_cg_weight,             0,
 $1.CPUShares,                    config_parse_cpu_shares,            0,                             offsetof($1, cgroup_context.cpu_shares)
 $1.StartupCPUShares,             config_parse_cpu_shares,            0,                             offsetof($1, cgroup_context.startup_cpu_shares)
 $1.CPUQuota,                     config_parse_cpu_quota,             0,                             offsetof($1, cgroup_context)
+$1.CPUQuotaPeriodSec,            config_parse_sec_def_infinity,      0,                             offsetof($1, cgroup_context.cpu_quota_period_usec)
 $1.MemoryAccounting,             config_parse_bool,                  0,                             offsetof($1, cgroup_context.memory_accounting)
 $1.MemoryMin,                    config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
 $1.MemoryLow,                    config_parse_memory_limit,          0,                             offsetof($1, cgroup_context)
index 44f00a30460b10ae166969c7448fa9157be42248..1d4a1bd34350ee2356a6acde57cb7c8fe840bf72 100644 (file)
@@ -54,6 +54,7 @@
 #include "unit-name.h"
 #include "unit-printf.h"
 #include "user-util.h"
+#include "time-util.h"
 #include "web-util.h"
 
 static int parse_socket_protocol(const char *s) {
index 9a8051d0635abfc878ec4649be8d0029226e5419..a74b213155b568a5ea69c481bfd74d76219fcde9 100644 (file)
@@ -464,6 +464,20 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
                 return 1;
         }
 
+        if (streq(field, "CPUQuotaPeriodSec")) {
+                usec_t u = USEC_INFINITY;
+
+                r = parse_sec_def_infinity(eq, &u);
+                if (r < 0)
+                        return log_error_errno(r, "CPU quota period '%s' invalid.", eq);
+
+                r = sd_bus_message_append(m, "(sv)", "CPUQuotaPeriodUSec", "t", u);
+                if (r < 0)
+                        return bus_log_create_error(r);
+
+                return 1;
+        }
+
         if (streq(field, "DeviceAllow")) {
 
                 if (isempty(eq))
index 08026ea8c491c2334d4b90963a2d15ed9c1c366f..7537bdae34dd84098e8c5cc4b54e53db70b2704c 100644 (file)
@@ -560,6 +560,11 @@ tests += [
          [],
          '', 'manual'],
 
+        [['src/test/test-cgroup-cpu.c'],
+         [libcore,
+          libshared],
+         []],
+
         [['src/test/test-cgroup-mask.c',
           'src/test/test-helper.c'],
          [libcore,
diff --git a/src/test/test-cgroup-cpu.c b/src/test/test-cgroup-cpu.c
new file mode 100644 (file)
index 0000000..a445acc
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "cgroup.h"
+#include "log.h"
+
+static void test_cgroup_cpu_adjust_period(void) {
+        log_info("/* %s */", __func__);
+
+        /* Period 1ms, quota 40% -> Period 2.5ms */
+        assert_se(2500 == cgroup_cpu_adjust_period(USEC_PER_MSEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 10ms, quota 10% -> keep. */
+        assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 100 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 1ms, quota 1000% -> keep. */
+        assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(USEC_PER_MSEC, 10000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 100ms, quota 30% -> keep. */
+        assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(100 * USEC_PER_MSEC, 300 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 5s, quota 40% -> adjust to 1s. */
+        assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(5 * USEC_PER_SEC, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 2s, quota 250% -> adjust to 1s. */
+        assert_se(USEC_PER_SEC == cgroup_cpu_adjust_period(2 * USEC_PER_SEC, 2500 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 10us, quota 5,000,000% -> adjust to 1ms. */
+        assert_se(USEC_PER_MSEC == cgroup_cpu_adjust_period(10, 50000000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 10ms, quota 50,000% -> keep. */
+        assert_se(10 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 500000 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 10ms, quota 1% -> adjust to 100ms. */
+        assert_se(100 * USEC_PER_MSEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 10ms, quota .001% -> adjust to 1s. */
+        assert_se(1 * USEC_PER_SEC == cgroup_cpu_adjust_period(10 * USEC_PER_MSEC, 10, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 0ms, quota 200% -> adjust to 1ms. */
+        assert_se(1 * USEC_PER_MSEC == cgroup_cpu_adjust_period(0, 2 * USEC_PER_SEC, USEC_PER_MSEC, USEC_PER_SEC));
+        /* Period 0ms, quota 40% -> adjust to 2.5ms. */
+        assert_se(2500 == cgroup_cpu_adjust_period(0, 400 * USEC_PER_MSEC, USEC_PER_MSEC, USEC_PER_SEC));
+}
+
+int main(int argc, char *argv[]) {
+        test_cgroup_cpu_adjust_period();
+        return 0;
+}