]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
process-util: Add support SCHED_EXT scheduling policy
authorMatt Fleming <mfleming@cloudflare.com>
Thu, 23 Oct 2025 09:13:49 +0000 (10:13 +0100)
committerMike Yuan <me@yhndnzj.com>
Sat, 20 Dec 2025 17:31:55 +0000 (18:31 +0100)
Allow CPUSchedulingPolicy to be set to "ext". SCHED_EXT is a new
scheduling policy in Linux v6.12 that allows processes to be scheduled
using custom BPF schedulers instead of the default in-kernel ones.

Selectively setting the SCHED_EXT policy is useful for systems running
in "partial mode" where not all processes are run using a custom
scheduler.

Fallback to SCHED_OTHER and print an error message for systems where
SCHED_EXT isn't available.

man/systemd.exec.xml
src/basic/process-util.c
src/basic/process-util.h
src/core/dbus-execute.c
src/core/exec-invoke.c
src/core/load-fragment.c
src/include/override/sched.h
src/test/test-sched-prio.c
test/test-sched-prio/sched_ext_ok.service [new file with mode: 0644]

index 87ee2f8d04533cc35e8851966f9bf0d2b4728d6c..927fa3e0c42329340e0ae6eff56ea62a002e4f9f 100644 (file)
@@ -1326,7 +1326,7 @@ CapabilityBoundingSet=~CAP_B CAP_C</programlisting>
         <term><varname>CPUSchedulingPolicy=</varname></term>
 
         <listitem><para>Sets the CPU scheduling policy for executed processes. Takes one of <option>other</option>,
-        <option>batch</option>, <option>idle</option>, <option>fifo</option> or <option>rr</option>. See
+        <option>batch</option>, <option>idle</option>, <option>fifo</option>, <option>rr</option> or <option>ext</option>. See
         <citerefentry project='man-pages'><refentrytitle>sched_setscheduler</refentrytitle><manvolnum>2</manvolnum></citerefentry> for
         details.</para></listitem>
       </varlistentry>
index 4c59ea4918f80b836677fbc02291da4b3df841a3..a06380332bd46a32788695b9bba73b60f4214b06 100644 (file)
@@ -1248,13 +1248,39 @@ bool nice_is_valid(int n) {
 }
 
 bool sched_policy_is_valid(int i) {
-        return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR);
+        return IN_SET(i, SCHED_OTHER, SCHED_BATCH, SCHED_IDLE, SCHED_FIFO, SCHED_RR, SCHED_EXT);
 }
 
 bool sched_priority_is_valid(int i) {
         return i >= 0 && i <= sched_get_priority_max(SCHED_RR);
 }
 
+bool sched_policy_supported(int policy) {
+        return sched_get_priority_min(policy) >= 0;
+}
+
+/* Wrappers around sched_get_priority_{min,max}() that gracefully handles missing SCHED_EXT support in the kernel */
+int sched_get_priority_min_safe(int policy) {
+        int r;
+
+        r = sched_get_priority_min(policy);
+        if (r >= 0)
+                return r;
+
+        /* Fallback priority */
+        return 0;
+}
+
+int sched_get_priority_max_safe(int policy) {
+        int r;
+
+        r = sched_get_priority_max(policy);
+        if (r >= 0)
+                return r;
+
+        return 0;
+}
+
 /* The cached PID, possible values:
  *
  *     == UNSET [0]  → cache not initialized yet
@@ -2257,9 +2283,10 @@ DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
 static const char* const sched_policy_table[] = {
         [SCHED_OTHER] = "other",
         [SCHED_BATCH] = "batch",
-        [SCHED_IDLE] = "idle",
-        [SCHED_FIFO] = "fifo",
-        [SCHED_RR] = "rr",
+        [SCHED_IDLE]  = "idle",
+        [SCHED_FIFO]  = "fifo",
+        [SCHED_EXT]   = "ext",
+        [SCHED_RR]    = "rr",
 };
 
 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);
index 24873b448e2e287e4aeb1ee48e294d52d8a43374..94013ff7ede845d4038237e1b0d21ead35b97aa3 100644 (file)
@@ -128,7 +128,10 @@ int pid_compare_func(const pid_t *a, const pid_t *b);
 bool nice_is_valid(int n) _const_;
 
 bool sched_policy_is_valid(int i) _const_;
+bool sched_policy_supported(int i);
 bool sched_priority_is_valid(int i) _const_;
+int sched_get_priority_min_safe(int i);
+int sched_get_priority_max_safe(int i);
 
 #define PID_AUTOMATIC ((pid_t) INT_MIN) /* special value indicating "acquire pid from connection peer" */
 
index 73fa7c23dae597ffd470bad37d14d486655bfb24..658fc1ee060d3ea7d1631f9bdb2f213a593835e7 100644 (file)
@@ -3167,7 +3167,7 @@ int bus_exec_context_set_transient_property(
                                 return r;
 
                         c->cpu_sched_policy = q;
-                        c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(q), sched_get_priority_max(q));
+                        c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min_safe(q), sched_get_priority_max_safe(q));
                         c->cpu_sched_set = true;
 
                         unit_write_settingf(u, flags, name, "CPUSchedulingPolicy=%s", s);
index 01a653f2f1c429278898c9a141d40ff2a4529138..25b7f8366dace6da9d2e3c4008b7538b2fb94113 100644 (file)
@@ -5473,8 +5473,12 @@ int exec_invoke(
 
                 r = sched_setattr(/* pid= */ 0, &attr, /* flags= */ 0);
                 if (r < 0) {
-                        *exit_status = EXIT_SETSCHEDULER;
-                        return log_error_errno(errno, "Failed to set up CPU scheduling: %m");
+                        if (errno != EINVAL || sched_policy_supported(attr.sched_policy)) {
+                                *exit_status = EXIT_SETSCHEDULER;
+                                return log_error_errno(errno, "Failed to set up CPU scheduling: %m");
+                        }
+
+                        log_warning_errno(errno, "CPU scheduling policy %u is not supported, ignoring: %m", attr.sched_policy);
                 }
         }
 
index 0954631cad240fbdf02fb074c0a6a9d9057c2d23..aebfb9275caec0a4ecb311cf825ae579ef0b79c4 100644 (file)
@@ -1539,9 +1539,12 @@ int config_parse_exec_cpu_sched_policy(
                 return 0;
         }
 
+        if (!sched_policy_supported(x))
+                log_syntax(unit, LOG_WARNING, filename, line, x, "Unsupported CPU scheduling policy: %s", rvalue);
+
         c->cpu_sched_policy = x;
         /* Moving to or from real-time policy? We need to adjust the priority */
-        c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min(x), sched_get_priority_max(x));
+        c->cpu_sched_priority = CLAMP(c->cpu_sched_priority, sched_get_priority_min_safe(x), sched_get_priority_max_safe(x));
         c->cpu_sched_set = true;
 
         return 0;
index ce40c009b45feac6801fe6406e43945c1a7ca258..08f4576eca1e322ee404f046bb15a2cbf8b7d085 100644 (file)
@@ -55,3 +55,11 @@ int __clone2(int (*fn)(void *), void *stack_base, size_t stack_size, int flags,
 int missing_sched_setattr(pid_t pid, struct sched_attr *attr, unsigned flags);
 #  define sched_setattr missing_sched_setattr
 #endif
+
+/* f0e1a0643a59bf1f922fa209cec86a170b784f3f (6.12),
+ * defined in sched.h in glibc since glibc-2.41. */
+#ifndef SCHED_EXT
+#  define SCHED_EXT 7
+#else
+static_assert(SCHED_EXT == 7, "");
+#endif
index 1dfa91d10a43b748e45deaebfa6a96f8cbf7f16d..c1305ac4abd21bfd5f29bcced460829a7998001f 100644 (file)
@@ -13,7 +13,7 @@
 int main(int argc, char *argv[]) {
         _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
         _cleanup_(manager_freep) Manager *m = NULL;
-        Unit *idle_ok, *idle_bad, *rr_ok, *rr_bad, *rr_sched;
+        Unit *idle_ok, *idle_bad, *rr_ok, *rr_bad, *rr_sched, *ext_ok;
         Service *ser;
         int r;
 
@@ -76,5 +76,11 @@ int main(int argc, char *argv[]) {
         assert_se(ser->exec_context.cpu_sched_policy == SCHED_RR);
         assert_se(ser->exec_context.cpu_sched_priority == 99);
 
+        /* load ext ok */
+        assert_se(manager_load_startable_unit_or_warn(m, "sched_ext_ok.service", NULL, &ext_ok) >= 0);
+        ser = SERVICE(ext_ok);
+        assert_se(ser->exec_context.cpu_sched_policy == SCHED_EXT);
+        assert_se(ser->exec_context.cpu_sched_priority == 0);
+
         return EXIT_SUCCESS;
 }
diff --git a/test/test-sched-prio/sched_ext_ok.service b/test/test-sched-prio/sched_ext_ok.service
new file mode 100644 (file)
index 0000000..4541aa2
--- /dev/null
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: LGPL-2.1-or-later
+[Unit]
+Description=Sched ext with prio 0
+
+[Service]
+ExecStart=true
+CPUSchedulingPriority=0
+CPUSchedulingPolicy=ext