From: Zbigniew Jędrzejewski-Szmek Date: Tue, 5 Nov 2019 12:50:28 +0000 (+0100) Subject: core: make TasksMax a partially dynamic property X-Git-Tag: v244-rc1~27^2~4 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3a0f06c41a29b760fe6c3da7529cf595e583aa06;p=thirdparty%2Fsystemd.git core: make TasksMax a partially dynamic property TasksMax= and DefaultTasksMax= can be specified as percentages. We don't actually document of what the percentage is relative to, but the implementation uses the smallest of /proc/sys/kernel/pid_max, /proc/sys/kernel/threads-max, and /sys/fs/cgroup/pids.max (when present). When the value is a percentage, we immediately convert it to an absolute value. If the limit later changes (which can happen e.g. when systemd-sysctl runs), the absolute value becomes outdated. So let's store either the percentage or absolute value, whatever was specified, and only convert to an absolute value when the value is used. For example, when starting a unit, the absolute value will be calculated when the cgroup for the unit is created. Fixes #13419. --- diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 0a64715af5e..6b4d6e1006f 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -130,7 +130,6 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) { } /* Default resource limits */ -#define DEFAULT_TASKS_MAX_PERCENTAGE 15U /* 15% of PIDs, 4915 on default settings */ #define DEFAULT_USER_TASKS_MAX_PERCENTAGE 33U /* 33% of PIDs, 10813 on default settings */ typedef enum CGroupUnified { diff --git a/src/basic/limits-util.c b/src/basic/limits-util.c index fbf52e55f1a..a74d8197ea7 100644 --- a/src/basic/limits-util.c +++ b/src/basic/limits-util.c @@ -99,7 +99,6 @@ uint64_t physical_memory_scale(uint64_t v, uint64_t max) { } uint64_t system_tasks_max(void) { - uint64_t a = TASKS_MAX, b = TASKS_MAX; _cleanup_free_ char *root = NULL; int r; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index ca86cdc3c4b..b7718e19667 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -16,6 +16,7 @@ #include "fd-util.h" #include "fileio.h" #include "fs-util.h" +#include "limits-util.h" #include "parse-util.h" #include "path-util.h" #include "process-util.h" @@ -34,6 +35,13 @@ * out specific attributes from us. */ #define LOG_LEVEL_CGROUP_WRITE(r) (IN_SET(abs(r), ENOENT, EROFS, EACCES, EPERM) ? LOG_DEBUG : LOG_WARNING) +uint64_t tasks_max_resolve(const TasksMax *tasks_max) { + if (tasks_max->scale == 0) + return tasks_max->value; + + return system_tasks_max_scale(tasks_max->value, tasks_max->scale); +} + bool manager_owns_host_root_cgroup(Manager *m) { assert(m); @@ -117,7 +125,7 @@ void cgroup_context_init(CGroupContext *c) { .blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID, .startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID, - .tasks_max = CGROUP_LIMIT_MAX, + .tasks_max = TASKS_MAX_UNSET, }; } @@ -447,7 +455,7 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) { prefix, c->memory_max, format_cgroup_memory_limit_comparison(cdd, sizeof(cdd), u, "MemoryMax"), prefix, c->memory_swap_max, format_cgroup_memory_limit_comparison(cde, sizeof(cde), u, "MemorySwapMax"), prefix, c->memory_limit, - prefix, c->tasks_max, + prefix, tasks_max_resolve(&c->tasks_max), prefix, cgroup_device_policy_to_string(c->device_policy), prefix, strempty(disable_controllers_str), prefix, yes_no(c->delegate)); @@ -1339,9 +1347,9 @@ static void cgroup_context_apply( * which is desirable so that there's an official way to release control of the sysctl from * systemd: set the limit to unbounded and reload. */ - if (c->tasks_max != CGROUP_LIMIT_MAX) { + if (tasks_max_isset(&c->tasks_max)) { u->manager->sysctl_pid_max_changed = true; - r = procfs_tasks_set_limit(c->tasks_max); + r = procfs_tasks_set_limit(tasks_max_resolve(&c->tasks_max)); } else if (u->manager->sysctl_pid_max_changed) r = procfs_tasks_set_limit(TASKS_MAX); else @@ -1354,10 +1362,10 @@ static void cgroup_context_apply( /* The attribute itself is not available on the host root cgroup, and in the container case we want to * leave it for the container manager. */ if (!is_local_root) { - if (c->tasks_max != CGROUP_LIMIT_MAX) { - char buf[DECIMAL_STR_MAX(uint64_t) + 2]; + if (tasks_max_isset(&c->tasks_max)) { + char buf[DECIMAL_STR_MAX(uint64_t) + 1]; - sprintf(buf, "%" PRIu64 "\n", c->tasks_max); + xsprintf(buf, "%" PRIu64 "\n", tasks_max_resolve(&c->tasks_max)); (void) set_attribute_and_warn(u, "pids", "pids.max", buf); } else (void) set_attribute_and_warn(u, "pids", "pids.max", "max\n"); @@ -1434,7 +1442,7 @@ static CGroupMask unit_get_cgroup_mask(Unit *u) { mask |= CGROUP_MASK_DEVICES | CGROUP_MASK_BPF_DEVICES; if (c->tasks_accounting || - c->tasks_max != CGROUP_LIMIT_MAX) + tasks_max_isset(&c->tasks_max)) mask |= CGROUP_MASK_PIDS; return CGROUP_MASK_EXTEND_JOINED(mask); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 3d4bb4142df..b6bd4e0de53 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -9,6 +9,21 @@ #include "list.h" #include "time-util.h" +typedef struct TasksMax { + /* If scale == 0, just use value; otherwise, value / scale. + * See tasks_max_resolve(). */ + uint64_t value; + uint64_t scale; +} TasksMax; + +#define TASKS_MAX_UNSET ((TasksMax) { .value = UINT64_MAX, .scale = 0 }) + +static inline bool tasks_max_isset(const TasksMax *tasks_max) { + return tasks_max->value != UINT64_MAX || tasks_max->scale != 0; +} + +uint64_t tasks_max_resolve(const TasksMax *tasks_max); + typedef struct CGroupContext CGroupContext; typedef struct CGroupDeviceAllow CGroupDeviceAllow; typedef struct CGroupIODeviceWeight CGroupIODeviceWeight; @@ -135,7 +150,7 @@ struct CGroupContext { LIST_HEAD(CGroupDeviceAllow, device_allow); /* Common */ - uint64_t tasks_max; + TasksMax tasks_max; }; /* Used when querying IP accounting data */ diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 647c3a512c8..1c203e2f744 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -16,6 +16,8 @@ #include "limits-util.h" #include "path-util.h" +BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_resolve); + static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy); static int property_get_cgroup_mask( @@ -382,7 +384,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_PROPERTY("DevicePolicy", "s", property_get_cgroup_device_policy, offsetof(CGroupContext, device_policy), 0), SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0), SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0), - SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0), + SD_BUS_PROPERTY("TasksMax", "t", bus_property_get_tasks_max, offsetof(CGroupContext, tasks_max), 0), SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0), SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0), SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0), @@ -715,9 +717,76 @@ BUS_DEFINE_SET_CGROUP_WEIGHT(blockio_weight, CGROUP_MASK_BLKIO, CGROUP_BLKIO_WEI BUS_DEFINE_SET_CGROUP_LIMIT(memory, CGROUP_MASK_MEMORY, physical_memory_scale, 1); BUS_DEFINE_SET_CGROUP_LIMIT(memory_protection, CGROUP_MASK_MEMORY, physical_memory_scale, 0); BUS_DEFINE_SET_CGROUP_LIMIT(swap, CGROUP_MASK_MEMORY, physical_memory_scale, 0); -BUS_DEFINE_SET_CGROUP_LIMIT(tasks_max, CGROUP_MASK_PIDS, system_tasks_max_scale, 1); #pragma GCC diagnostic pop +static int bus_cgroup_set_tasks_max( + Unit *u, + const char *name, + TasksMax *p, + sd_bus_message *message, + UnitWriteFlags flags, + sd_bus_error *error) { + + uint64_t v; + int r; + + assert(p); + + r = sd_bus_message_read(message, "t", &v); + if (r < 0) + return r; + + if (v < 1) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, + "Value specified in %s is out of range", name); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + *p = (TasksMax) { .value = v, .scale = 0 }; /* When .scale==0, .value is the absolute value */ + unit_invalidate_cgroup(u, CGROUP_MASK_PIDS); + + if (v == CGROUP_LIMIT_MAX) + unit_write_settingf(u, flags, name, + "%s=infinity", name); + else + unit_write_settingf(u, flags, name, + "%s=%" PRIu64, name, v); + } + + return 1; +} + +static int bus_cgroup_set_tasks_max_scale( + Unit *u, + const char *name, + TasksMax *p, + sd_bus_message *message, + UnitWriteFlags flags, + sd_bus_error *error) { + + uint32_t v; + int r; + + assert(p); + + r = sd_bus_message_read(message, "u", &v); + if (r < 0) + return r; + + if (v < 1 || v >= UINT32_MAX) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, + "Value specified in %s is out of range", name); + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + *p = (TasksMax) { v, UINT32_MAX }; /* .scale is not 0, so this is interpreted as v/UINT32_MAX. */ + unit_invalidate_cgroup(u, CGROUP_MASK_PIDS); + + unit_write_settingf(u, flags, name, "%s=%" PRIu32 "%%", "TasksMax", + (uint32_t) (DIV_ROUND_UP((uint64_t) v * 100U, (uint64_t) UINT32_MAX))); + } + + return 1; +} + int bus_cgroup_set_property( Unit *u, CGroupContext *c, diff --git a/src/core/dbus-cgroup.h b/src/core/dbus-cgroup.h index 188baa99e94..5ca68a63bf3 100644 --- a/src/core/dbus-cgroup.h +++ b/src/core/dbus-cgroup.h @@ -9,4 +9,6 @@ extern const sd_bus_vtable bus_cgroup_vtable[]; +int bus_property_get_tasks_max(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error); + int bus_cgroup_set_property(Unit *u, CGroupContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 76d8a037023..64a612a7f0f 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -9,6 +9,7 @@ #include "architecture.h" #include "build.h" #include "bus-common-errors.h" +#include "dbus-cgroup.h" #include "dbus-execute.h" #include "dbus-job.h" #include "dbus-manager.h" @@ -2465,7 +2466,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_PROPERTY("DefaultLimitRTPRIOSoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTPRIO]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("DefaultLimitRTTIME", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("DefaultLimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("DefaultTasksMax", "t", NULL, offsetof(Manager, default_tasks_max), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("DefaultTasksMax", "t", bus_property_get_tasks_max, offsetof(Manager, default_tasks_max), 0), SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST), diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 288de616b65..0631ea5ce20 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -3263,36 +3263,39 @@ int config_parse_tasks_max( void *data, void *userdata) { - uint64_t *tasks_max = data, v; const Unit *u = userdata; + TasksMax *tasks_max = data; + uint64_t v; int r; if (isempty(rvalue)) { - *tasks_max = u ? u->manager->default_tasks_max : UINT64_MAX; + *tasks_max = u ? u->manager->default_tasks_max : TASKS_MAX_UNSET; return 0; } if (streq(rvalue, "infinity")) { - *tasks_max = CGROUP_LIMIT_MAX; + *tasks_max = TASKS_MAX_UNSET; return 0; } r = parse_permille(rvalue); - if (r < 0) { + if (r >= 0) + *tasks_max = (TasksMax) { r, 1000U }; /* r‰ */ + else { r = safe_atou64(rvalue, &v); if (r < 0) { log_syntax(unit, LOG_ERR, filename, line, r, "Invalid maximum tasks value '%s', ignoring: %m", rvalue); return 0; } - } else - v = system_tasks_max_scale(r, 1000U); - if (v <= 0 || v >= UINT64_MAX) { - log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue); - return 0; + if (v <= 0 || v >= UINT64_MAX) { + log_syntax(unit, LOG_ERR, filename, line, 0, "Maximum tasks value '%s' out of range, ignoring.", rvalue); + return 0; + } + + *tasks_max = (TasksMax) { v }; } - *tasks_max = v; return 0; } diff --git a/src/core/main.c b/src/core/main.c index bfbb9331f32..6e101f02785 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -84,6 +84,8 @@ #include #endif +#define DEFAULT_TASKS_MAX ((TasksMax) { 15U, 100U }) /* 15% */ + static enum { ACTION_RUN, ACTION_HELP, @@ -135,7 +137,7 @@ static bool arg_default_ip_accounting; static bool arg_default_blockio_accounting; static bool arg_default_memory_accounting; static bool arg_default_tasks_accounting; -static uint64_t arg_default_tasks_max; +static TasksMax arg_default_tasks_max; static sd_id128_t arg_machine_id; static EmergencyAction arg_cad_burst_action; static OOMPolicy arg_default_oom_policy; @@ -2131,7 +2133,7 @@ static void reset_arguments(void) { arg_default_blockio_accounting = false; arg_default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT; arg_default_tasks_accounting = true; - arg_default_tasks_max = system_tasks_max_scale(DEFAULT_TASKS_MAX_PERCENTAGE, 100U); + arg_default_tasks_max = DEFAULT_TASKS_MAX; arg_machine_id = (sd_id128_t) {}; arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE; arg_default_oom_policy = OOM_STOP; diff --git a/src/core/manager.c b/src/core/manager.c index aaf894c9ecf..13a6b49a8f2 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -762,7 +762,7 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager .default_timer_accuracy_usec = USEC_PER_MINUTE, .default_memory_accounting = MEMORY_ACCOUNTING_DEFAULT, .default_tasks_accounting = true, - .default_tasks_max = UINT64_MAX, + .default_tasks_max = TASKS_MAX_UNSET, .default_timeout_start_usec = DEFAULT_TIMEOUT_USEC, .default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC, .default_restart_usec = DEFAULT_RESTART_USEC, diff --git a/src/core/manager.h b/src/core/manager.h index 424bf717041..51df7f8cd9d 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -9,6 +9,7 @@ #include "sd-event.h" #include "cgroup-util.h" +#include "cgroup.h" #include "fdset.h" #include "hashmap.h" #include "ip-address-access.h" @@ -349,7 +350,7 @@ struct Manager { bool default_tasks_accounting; bool default_ip_accounting; - uint64_t default_tasks_max; + TasksMax default_tasks_max; usec_t default_timer_accuracy_usec; OOMPolicy default_oom_policy; diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c index 7fad58f690e..02c8549b4b6 100644 --- a/src/test/test-cgroup-mask.c +++ b/src/test/test-cgroup-mask.c @@ -56,7 +56,7 @@ static int test_cgroup_mask(void) { m->default_blockio_accounting = m->default_io_accounting = m->default_tasks_accounting = false; - m->default_tasks_max = (uint64_t) -1; + m->default_tasks_max = TASKS_MAX_UNSET; assert_se(manager_startup(m, NULL, NULL) >= 0);