]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: make TasksMax a partially dynamic property
authorZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Tue, 5 Nov 2019 12:50:28 +0000 (13:50 +0100)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Thu, 14 Nov 2019 17:41:54 +0000 (18:41 +0100)
TasksMax= and DefaultTasksMax= can be specified as percentages. We don't
actually document of what the percentage is relative to, but the implementation
uses the smallest of /proc/sys/kernel/pid_max, /proc/sys/kernel/threads-max,
and /sys/fs/cgroup/pids.max (when present). When the value is a percentage,
we immediately convert it to an absolute value. If the limit later changes
(which can happen e.g. when systemd-sysctl runs), the absolute value becomes
outdated.

So let's store either the percentage or absolute value, whatever was specified,
and only convert to an absolute value when the value is used. For example, when
starting a unit, the absolute value will be calculated when the cgroup for
the unit is created.

Fixes #13419.

12 files changed:
src/basic/cgroup-util.h
src/basic/limits-util.c
src/core/cgroup.c
src/core/cgroup.h
src/core/dbus-cgroup.c
src/core/dbus-cgroup.h
src/core/dbus-manager.c
src/core/load-fragment.c
src/core/main.c
src/core/manager.c
src/core/manager.h
src/test/test-cgroup-mask.c

index 0a64715af5e22066d745b1aa3282d6b00bdce739..6b4d6e1006fbcf9980961c88dabdc00b68ae5c8a 100644 (file)
@@ -130,7 +130,6 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) {
 }
 
 /* Default resource limits */
-#define DEFAULT_TASKS_MAX_PERCENTAGE            15U /* 15% of PIDs, 4915 on default settings */
 #define DEFAULT_USER_TASKS_MAX_PERCENTAGE       33U /* 33% of PIDs, 10813 on default settings */
 
 typedef enum CGroupUnified {
index fbf52e55f1a55d40dc31b8cf3f427df85606df49..a74d8197ea7afcaaecfe5c80cbe5a2282928f788 100644 (file)
@@ -99,7 +99,6 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
 }
 
 uint64_t system_tasks_max(void) {
-
         uint64_t a = TASKS_MAX, b = TASKS_MAX;
         _cleanup_free_ char *root = NULL;
         int r;
index ca86cdc3c4b3928ac080fc3f497c5a8867ecee75..b7718e19667c0b4a5b98f3527ec0a82d08b8ffb9 100644 (file)
@@ -16,6 +16,7 @@
 #include "fd-util.h"
 #include "fileio.h"
 #include "fs-util.h"
+#include "limits-util.h"
 #include "parse-util.h"
 #include "path-util.h"
 #include "process-util.h"
  * out specific attributes from us. */
 #define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING)
 
+uint64_t tasks_max_resolve(const TasksMax *tasks_max) {
+        if (tasks_max->scale == 0)
+                return tasks_max->value;
+
+        return system_tasks_max_scale(tasks_max->value, tasks_max->scale);
+}
+
 bool manager_owns_host_root_cgroup(Manager *m) {
         assert(m);
 
@@ -117,7 +125,7 @@ void cgroup_context_init(CGroupContext *c) {
                 .blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
                 .startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID,
 
-                .tasks_max = CGROUP_LIMIT_MAX,
+                .tasks_max = TASKS_MAX_UNSET,
         };
 }
 
@@ -447,7 +455,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
                 prefix, c->memory_max, format_cgroup_memory_limit_comparison(cdd, sizeof(cdd), u, "MemoryMax"),
                 prefix, c->memory_swap_max, format_cgroup_memory_limit_comparison(cde, sizeof(cde), u, "MemorySwapMax"),
                 prefix, c->memory_limit,
-                prefix, c->tasks_max,
+                prefix, tasks_max_resolve(&c->tasks_max),
                 prefix, cgroup_device_policy_to_string(c->device_policy),
                 prefix, strempty(disable_controllers_str),
                 prefix, yes_no(c->delegate));
@@ -1339,9 +1347,9 @@ static void cgroup_context_apply(
                          * which is desirable so that there's an official way to release control of the sysctl from
                          * systemd: set the limit to unbounded and reload. */
 
-                        if (c->tasks_max != CGROUP_LIMIT_MAX) {
+                        if (tasks_max_isset(&c->tasks_max)) {
                                 u->manager->sysctl_pid_max_changed = true;
-                                r = procfs_tasks_set_limit(c->tasks_max);
+                                r = procfs_tasks_set_limit(tasks_max_resolve(&c->tasks_max));
                         } else if (u->manager->sysctl_pid_max_changed)
                                 r = procfs_tasks_set_limit(TASKS_MAX);
                         else
@@ -1354,10 +1362,10 @@ static void cgroup_context_apply(
                 /* The attribute itself is not available on the host root cgroup, and in the container case we want to
                  * leave it for the container manager. */
                 if (!is_local_root) {
-                        if (c->tasks_max != CGROUP_LIMIT_MAX) {
-                                char buf[DECIMAL_STR_MAX(uint64_t) + 2];
+                        if (tasks_max_isset(&c->tasks_max)) {
+                                char buf[DECIMAL_STR_MAX(uint64_t) + 1];
 
-                                sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
+                                xsprintf(buf, "%" PRIu64 "\n", tasks_max_resolve(&c->tasks_max));
                                 (void) set_attribute_and_warn(u, "pids", "pids.max", buf);
                         } else
                                 (void) set_attribute_and_warn(u, "pids", "pids.max", "max\n");
@@ -1434,7 +1442,7 @@ static CGroupMask unit_get_cgroup_mask(Unit *u) {
                 mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES;
 
         if (c->tasks_accounting ||
-            c->tasks_max != CGROUP_LIMIT_MAX)
+            tasks_max_isset(&c->tasks_max))
                 mask |= CGROUP_MASK_PIDS;
 
         return CGROUP_MASK_EXTEND_JOINED(mask);
index 3d4bb4142df6dd3018d71c80ccb13f6b2eff03c4..b6bd4e0de5312d067204d8305d5ded5c0f75da93 100644 (file)
@@ -9,6 +9,21 @@
 #include "list.h"
 #include "time-util.h"
 
+typedef struct TasksMax {
+        /* If scale == 0, just use value; otherwise, value / scale.
+         * See tasks_max_resolve(). */
+        uint64_t value;
+        uint64_t scale;
+} TasksMax;
+
+#define TASKS_MAX_UNSET ((TasksMax) { .value = UINT64_MAX, .scale = 0 })
+
+static inline bool tasks_max_isset(const TasksMax *tasks_max) {
+        return tasks_max->value != UINT64_MAX || tasks_max->scale != 0;
+}
+
+uint64_t tasks_max_resolve(const TasksMax *tasks_max);
+
 typedef struct CGroupContext CGroupContext;
 typedef struct CGroupDeviceAllow CGroupDeviceAllow;
 typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
@@ -135,7 +150,7 @@ struct CGroupContext {
         LIST_HEAD(CGroupDeviceAllow, device_allow);
 
         /* Common */
-        uint64_t tasks_max;
+        TasksMax tasks_max;
 };
 
 /* Used when querying IP accounting data */
index 647c3a512c8a9213e9d861cc39b1ae9a1c7427ca..1c203e2f74408cb8743fad5bb1a2ceef66ea6a6c 100644 (file)
@@ -16,6 +16,8 @@
 #include "limits-util.h"
 #include "path-util.h"
 
+BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_resolve);
+
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
 
 static int property_get_cgroup_mask(
@@ -382,7 +384,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0),
         SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
         SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
-        SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
+        SD_BUS_PROPERTY("TasksMax", "t", bus_property_get_tasks_max, offsetof(CGroupContext, tasks_max), 0),
         SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
         SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
         SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
@@ -715,9 +717,76 @@ BUS_DEFINE_SET_CGROUP_WEIGHT(blockio_weight, CGROUP_MASK_BLKIO, CGROUP_BLKIO_WEI
 BUS_DEFINE_SET_CGROUP_LIMIT(memory, CGROUP_MASK_MEMORY, physical_memory_scale, 1);
 BUS_DEFINE_SET_CGROUP_LIMIT(memory_protection, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
 BUS_DEFINE_SET_CGROUP_LIMIT(swap, CGROUP_MASK_MEMORY, physical_memory_scale, 0);
-BUS_DEFINE_SET_CGROUP_LIMIT(tasks_max, CGROUP_MASK_PIDS, system_tasks_max_scale, 1);
 #pragma GCC diagnostic pop
 
+static int bus_cgroup_set_tasks_max(
+                Unit *u,
+                const char *name,
+                TasksMax *p,
+                sd_bus_message *message,
+                UnitWriteFlags flags,
+                sd_bus_error *error) {
+
+        uint64_t v;
+        int r;
+
+        assert(p);
+
+        r = sd_bus_message_read(message, "t", &v);
+        if (r < 0)
+                return r;
+
+        if (v < 1)
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+                                         "Value specified in %s is out of range", name);
+
+        if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                *p = (TasksMax) { .value = v, .scale = 0 }; /* When .scale==0, .value is the absolute value */
+                unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+
+                if (v == CGROUP_LIMIT_MAX)
+                        unit_write_settingf(u, flags, name,
+                                            "%s=infinity", name);
+                else
+                        unit_write_settingf(u, flags, name,
+                                            "%s=%" PRIu64, name, v);
+        }
+
+        return 1;
+}
+
+static int bus_cgroup_set_tasks_max_scale(
+                Unit *u,
+                const char *name,
+                TasksMax *p,
+                sd_bus_message *message,
+                UnitWriteFlags flags,
+                sd_bus_error *error) {
+
+        uint32_t v;
+        int r;
+
+        assert(p);
+
+        r = sd_bus_message_read(message, "u", &v);
+        if (r < 0)
+                return r;
+
+        if (v < 1 || v >= UINT32_MAX)
+                return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS,
+                                         "Value specified in %s is out of range", name);
+
+        if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                *p = (TasksMax) { v, UINT32_MAX }; /* .scale is not 0, so this is interpreted as v/UINT32_MAX. */
+                unit_invalidate_cgroup(u, CGROUP_MASK_PIDS);
+
+                unit_write_settingf(u, flags, name, "%s=%" PRIu32 "%%", "TasksMax",
+                                    (uint32_t) (DIV_ROUND_UP((uint64_t) v * 100U, (uint64_t) UINT32_MAX)));
+        }
+
+        return 1;
+}
+
 int bus_cgroup_set_property(
                 Unit *u,
                 CGroupContext *c,
index 188baa99e944dd35996ee9613e4f95f9e3cc9757..5ca68a63bf387cb1b7a5fd25a9c835ea639c9f02 100644 (file)
@@ -9,4 +9,6 @@
 
 extern const sd_bus_vtable bus_cgroup_vtable[];
 
+int bus_property_get_tasks_max(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error);
+
 int bus_cgroup_set_property(Unit *u, CGroupContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error);
index 76d8a0370232fece88ac14c18964513d44db98a7..64a612a7f0f110ee8d9b61d5e31071ff986d2acf 100644 (file)
@@ -9,6 +9,7 @@
 #include "architecture.h"
 #include "build.h"
 #include "bus-common-errors.h"
+#include "dbus-cgroup.h"
 #include "dbus-execute.h"
 #include "dbus-job.h"
 #include "dbus-manager.h"
@@ -2465,7 +2466,7 @@ const sd_bus_vtable bus_manager_vtable[] = {
         SD_BUS_PROPERTY("DefaultLimitRTPRIOSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("DefaultLimitRTTIME", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("DefaultLimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST),
-        SD_BUS_PROPERTY("DefaultTasksMax", "t", NULL, offsetof(Manager, default_tasks_max), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("DefaultTasksMax", "t", bus_property_get_tasks_max, offsetof(Manager, default_tasks_max), 0),
         SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST),
 
index 288de616b65a1d8929447eb561e04c6aba7e35bd..0631ea5ce203d639b15126e0d34b2484004654a1 100644 (file)
@@ -3263,36 +3263,39 @@ int config_parse_tasks_max(
                 void *data,
                 void *userdata) {
 
-        uint64_t *tasks_max = data, v;
         const Unit *u = userdata;
+        TasksMax *tasks_max = data;
+        uint64_t v;
         int r;
 
         if (isempty(rvalue)) {
-                *tasks_max = u ? u->manager->default_tasks_max : UINT64_MAX;
+                *tasks_max = u ? u->manager->default_tasks_max : TASKS_MAX_UNSET;
                 return 0;
         }
 
         if (streq(rvalue, "infinity")) {
-                *tasks_max = CGROUP_LIMIT_MAX;
+                *tasks_max = TASKS_MAX_UNSET;
                 return 0;
         }
 
         r = parse_permille(rvalue);
-        if (r < 0) {
+        if (r >= 0)
+                *tasks_max = (TasksMax) { r, 1000U }; /* r‰ */
+        else {
                 r = safe_atou64(rvalue, &v);
                 if (r < 0) {
                         log_syntax(unit, LOG_ERR, filename, line, r, "Invalid maximum tasks value '%s', ignoring: %m", rvalue);
                         return 0;
                 }
-        } else
-                v = system_tasks_max_scale(r, 1000U);
 
-        if (v <= 0 || v >= UINT64_MAX) {
-                log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
-                return 0;
+                if (v <= 0 || v >= UINT64_MAX) {
+                        log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue);
+                        return 0;
+                }
+
+                *tasks_max = (TasksMax) { v };
         }
 
-        *tasks_max = v;
         return 0;
 }
 
index bfbb9331f32f18013e0610c2d2898d4d34f2bd95..6e101f02785cd366636e93612718a81cf58a362c 100644 (file)
@@ -84,6 +84,8 @@
 #include <sanitizer/lsan_interface.h>
 #endif
 
+#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */
+
 static enum {
         ACTION_RUN,
         ACTION_HELP,
@@ -135,7 +137,7 @@ static bool arg_default_ip_accounting;
 static bool arg_default_blockio_accounting;
 static bool arg_default_memory_accounting;
 static bool arg_default_tasks_accounting;
-static uint64_t arg_default_tasks_max;
+static TasksMax arg_default_tasks_max;
 static sd_id128_t arg_machine_id;
 static EmergencyAction arg_cad_burst_action;
 static OOMPolicy arg_default_oom_policy;
@@ -2131,7 +2133,7 @@ static void reset_arguments(void) {
         arg_default_blockio_accounting = false;
         arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT;
         arg_default_tasks_accounting = true;
-        arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U);
+        arg_default_tasks_max = DEFAULT_TASKS_MAX;
         arg_machine_id = (sd_id128_t) {};
         arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE;
         arg_default_oom_policy = OOM_STOP;
index aaf894c9ecf450f5b745308c71497f3149245c7e..13a6b49a8f252e25f82fd3c62469c66882733fb1 100644 (file)
@@ -762,7 +762,7 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager
                 .default_timer_accuracy_usec = USEC_PER_MINUTE,
                 .default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT,
                 .default_tasks_accounting = true,
-                .default_tasks_max = UINT64_MAX,
+                .default_tasks_max = TASKS_MAX_UNSET,
                 .default_timeout_start_usec = DEFAULT_TIMEOUT_USEC,
                 .default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC,
                 .default_restart_usec = DEFAULT_RESTART_USEC,
index 424bf71704143bd497256eb8e03d6bb43d06b593..51df7f8cd9d067de94fdac7932bba50f05f20147 100644 (file)
@@ -9,6 +9,7 @@
 #include "sd-event.h"
 
 #include "cgroup-util.h"
+#include "cgroup.h"
 #include "fdset.h"
 #include "hashmap.h"
 #include "ip-address-access.h"
@@ -349,7 +350,7 @@ struct Manager {
         bool default_tasks_accounting;
         bool default_ip_accounting;
 
-        uint64_t default_tasks_max;
+        TasksMax default_tasks_max;
         usec_t default_timer_accuracy_usec;
 
         OOMPolicy default_oom_policy;
index 7fad58f690e3f6498088ba4e173ca1cb6668d207..02c8549b4b653bf9878efb56e57689fc64e64f67 100644 (file)
@@ -56,7 +56,7 @@ static int test_cgroup_mask(void) {
                 m->default_blockio_accounting =
                 m->default_io_accounting =
                 m->default_tasks_accounting = false;
-        m->default_tasks_max = (uint64_t) -1;
+        m->default_tasks_max = TASKS_MAX_UNSET;
 
         assert_se(manager_startup(m, NULL, NULL) >= 0);