From: Matt Fleming Date: Thu, 23 Oct 2025 09:13:49 +0000 (+0100) Subject: process-util: Add support SCHED_EXT scheduling policy X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4dcbfbb1adaeefbf2ed9d78f61a56fbcd0251ead;p=thirdparty%2Fsystemd.git process-util: Add support SCHED_EXT scheduling policy Allow CPUSchedulingPolicy to be set to "ext". SCHED_EXT is a new scheduling policy in Linux v6.12 that allows processes to be scheduled using custom BPF schedulers instead of the default in-kernel ones. Selectively setting the SCHED_EXT policy is useful for systems running in "partial mode" where not all processes are run using a custom scheduler. Fallback to SCHED_OTHER and print an error message for systems where SCHED_EXT isn't available. --- diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 87ee2f8d045..927fa3e0c42 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1326,7 +1326,7 @@ CapabilityBoundingSet=~CAP_B CAP_C CPUSchedulingPolicy= Sets the CPU scheduling policy for executed processes. Takes one of , - , , or . See + , , , or . See sched_setscheduler2 for details. diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 4c59ea4918f..a06380332bd 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -1248,13 +1248,39 @@ bool nice_is_valid(int n) { } bool sched_policy_is_valid(int i) { - return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR); + return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR, SCHED_EXT); } bool sched_priority_is_valid(int i) { return i >= 0 && i <= sched_get_priority_max(SCHED_RR); } +bool sched_policy_supported(int policy) { + return sched_get_priority_min(policy) >= 0; +} + +/* Wrappers around sched_get_priority_{min,max}() that gracefully handles missing SCHED_EXT support in the kernel */ +int sched_get_priority_min_safe(int policy) { + int r; + + r = sched_get_priority_min(policy); + if (r >= 0) + return r; + + /* Fallback priority */ + return 0; +} + +int sched_get_priority_max_safe(int policy) { + int r; + + r = sched_get_priority_max(policy); + if (r >= 0) + return r; + + return 0; +} + /* The cached PID, possible values: * * == UNSET [0] → cache not initialized yet @@ -2257,9 +2283,10 @@ DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int); static const char* const sched_policy_table[] = { [SCHED_OTHER] = "other", [SCHED_BATCH] = "batch", - [SCHED_IDLE] = "idle", - [SCHED_FIFO] = "fifo", - [SCHED_RR] = "rr", + [SCHED_IDLE] = "idle", + [SCHED_FIFO] = "fifo", + [SCHED_EXT] = "ext", + [SCHED_RR] = "rr", }; DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX); diff --git a/src/basic/process-util.h b/src/basic/process-util.h index 24873b448e2..94013ff7ede 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -128,7 +128,10 @@ int pid_compare_func(const pid_t *a, const pid_t *b); bool nice_is_valid(int n) _const_; bool sched_policy_is_valid(int i) _const_; +bool sched_policy_supported(int i); bool sched_priority_is_valid(int i) _const_; +int sched_get_priority_min_safe(int i); +int sched_get_priority_max_safe(int i); #define PID_AUTOMATIC ((pid_t) INT_MIN) /* special value indicating "acquire pid from connection peer" */ diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 73fa7c23dae..658fc1ee060 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -3167,7 +3167,7 @@ int bus_exec_context_set_transient_property( return r; c->cpu_sched_policy = q; - c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(q), sched_get_priority_max(q)); + c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min_safe(q), sched_get_priority_max_safe(q)); c->cpu_sched_set = true; unit_write_settingf(u, flags, name, "CPUSchedulingPolicy=%s", s); diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 01a653f2f1c..25b7f8366da 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -5473,8 +5473,12 @@ int exec_invoke( r = sched_setattr(/* pid= */ 0, &attr, /* flags= */ 0); if (r < 0) { - *exit_status = EXIT_SETSCHEDULER; - return log_error_errno(errno, "Failed to set up CPU scheduling: %m"); + if (errno != EINVAL || sched_policy_supported(attr.sched_policy)) { + *exit_status = EXIT_SETSCHEDULER; + return log_error_errno(errno, "Failed to set up CPU scheduling: %m"); + } + + log_warning_errno(errno, "CPU scheduling policy %u is not supported, ignoring: %m", attr.sched_policy); } } diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 0954631cad2..aebfb9275ca 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -1539,9 +1539,12 @@ int config_parse_exec_cpu_sched_policy( return 0; } + if (!sched_policy_supported(x)) + log_syntax(unit, LOG_WARNING, filename, line, x, "Unsupported CPU scheduling policy: %s", rvalue); + c->cpu_sched_policy = x; /* Moving to or from real-time policy? We need to adjust the priority */ - c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(x), sched_get_priority_max(x)); + c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min_safe(x), sched_get_priority_max_safe(x)); c->cpu_sched_set = true; return 0; diff --git a/src/include/override/sched.h b/src/include/override/sched.h index ce40c009b45..08f4576eca1 100644 --- a/src/include/override/sched.h +++ b/src/include/override/sched.h @@ -55,3 +55,11 @@ int __clone2(int (*fn)(void *), void *stack_base, size_t stack_size, int flags, int missing_sched_setattr(pid_t pid, struct sched_attr *attr, unsigned flags); # define sched_setattr missing_sched_setattr #endif + +/* f0e1a0643a59bf1f922fa209cec86a170b784f3f (6.12), + * defined in sched.h in glibc since glibc-2.41. */ +#ifndef SCHED_EXT +# define SCHED_EXT 7 +#else +static_assert(SCHED_EXT == 7, ""); +#endif diff --git a/src/test/test-sched-prio.c b/src/test/test-sched-prio.c index 1dfa91d10a4..c1305ac4abd 100644 --- a/src/test/test-sched-prio.c +++ b/src/test/test-sched-prio.c @@ -13,7 +13,7 @@ int main(int argc, char *argv[]) { _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL; _cleanup_(manager_freep) Manager *m = NULL; - Unit *idle_ok, *idle_bad, *rr_ok, *rr_bad, *rr_sched; + Unit *idle_ok, *idle_bad, *rr_ok, *rr_bad, *rr_sched, *ext_ok; Service *ser; int r; @@ -76,5 +76,11 @@ int main(int argc, char *argv[]) { assert_se(ser->exec_context.cpu_sched_policy == SCHED_RR); assert_se(ser->exec_context.cpu_sched_priority == 99); + /* load ext ok */ + assert_se(manager_load_startable_unit_or_warn(m, "sched_ext_ok.service", NULL, &ext_ok) >= 0); + ser = SERVICE(ext_ok); + assert_se(ser->exec_context.cpu_sched_policy == SCHED_EXT); + assert_se(ser->exec_context.cpu_sched_priority == 0); + return EXIT_SUCCESS; } diff --git a/test/test-sched-prio/sched_ext_ok.service b/test/test-sched-prio/sched_ext_ok.service new file mode 100644 index 00000000000..4541aa287d1 --- /dev/null +++ b/test/test-sched-prio/sched_ext_ok.service @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Unit] +Description=Sched ext with prio 0 + +[Service] +ExecStart=true +CPUSchedulingPriority=0 +CPUSchedulingPolicy=ext