selftests/sched_ext: Add test for DL server total_bw consistency

author Joel Fernandes <joelagnelf@nvidia.com>

Mon, 26 Jan 2026 09:59:05 +0000 (10:59 +0100)

committer Peter Zijlstra <peterz@infradead.org>

Tue, 3 Feb 2026 11:04:18 +0000 (12:04 +0100)
author Joel Fernandes <joelagnelf@nvidia.com>
Mon, 26 Jan 2026 09:59:05 +0000 (10:59 +0100)
committer Peter Zijlstra <peterz@infradead.org>
Tue, 3 Feb 2026 11:04:18 +0000 (12:04 +0100)
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile

index c9255d1499b6ed8a3cea39387ab81be4a003bfb2..2c601a7eaff5f0fee2198ec93770d91182a6de7d 100644 (file)
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -185,6 +185,7 @@ auto-test-targets :=                        \
         select_cpu_vtime                \
         rt_stall                        \
         test_example                    \
+       total_bw                        \
  
  testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets)))
  
diff --git a/tools/testing/selftests/sched_ext/total_bw.c b/tools/testing/selftests/sched_ext/total_bw.c

new file mode 100644 (file)

index 0000000..5b0a619
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/total_bw.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test to verify that total_bw value remains consistent across all CPUs
+ * in different BPF program states.
+ *
+ * Copyright (C) 2025 NVIDIA Corporation.
+ */
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <pthread.h>
+#include <scx/common.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "minimal.bpf.skel.h"
+#include "scx_test.h"
+
+#define MAX_CPUS 512
+#define STRESS_DURATION_SEC 5
+
+struct total_bw_ctx {
+       struct minimal *skel;
+       long baseline_bw[MAX_CPUS];
+       int nr_cpus;
+};
+
+static void *cpu_stress_thread(void *arg)
+{
+       volatile int i;
+       time_t end_time = time(NULL) + STRESS_DURATION_SEC;
+
+       while (time(NULL) < end_time)
+               for (i = 0; i < 1000000; i++)
+                       ;
+
+       return NULL;
+}
+
+/*
+ * The first enqueue on a CPU causes the DL server to start, for that
+ * reason run stressor threads in the hopes it schedules on all CPUs.
+ */
+static int run_cpu_stress(int nr_cpus)
+{
+       pthread_t *threads;
+       int i, ret = 0;
+
+       threads = calloc(nr_cpus, sizeof(pthread_t));
+       if (!threads)
+               return -ENOMEM;
+
+       /* Create threads to run on each CPU */
+       for (i = 0; i < nr_cpus; i++) {
+               if (pthread_create(&threads[i], NULL, cpu_stress_thread, NULL)) {
+                       ret = -errno;
+                       fprintf(stderr, "Failed to create thread %d: %s\n", i, strerror(-ret));
+                       break;
+               }
+       }
+
+       /* Wait for all threads to complete */
+       for (i = 0; i < nr_cpus; i++) {
+               if (threads[i])
+                       pthread_join(threads[i], NULL);
+       }
+
+       free(threads);
+       return ret;
+}
+
+static int read_total_bw_values(long *bw_values, int max_cpus)
+{
+       FILE *fp;
+       char line[256];
+       int cpu_count = 0;
+
+       fp = fopen("/sys/kernel/debug/sched/debug", "r");
+       if (!fp) {
+               SCX_ERR("Failed to open debug file");
+               return -1;
+       }
+
+       while (fgets(line, sizeof(line), fp)) {
+               char *bw_str = strstr(line, "total_bw");
+
+               if (bw_str) {
+                       bw_str = strchr(bw_str, ':');
+                       if (bw_str) {
+                               /* Only store up to max_cpus values */
+                               if (cpu_count < max_cpus)
+                                       bw_values[cpu_count] = atol(bw_str + 1);
+                               cpu_count++;
+                       }
+               }
+       }
+
+       fclose(fp);
+       return cpu_count;
+}
+
+static bool verify_total_bw_consistency(long *bw_values, int count)
+{
+       int i;
+       long first_value;
+
+       if (count <= 0)
+               return false;
+
+       first_value = bw_values[0];
+
+       for (i = 1; i < count; i++) {
+               if (bw_values[i] != first_value) {
+                       SCX_ERR("Inconsistent total_bw: CPU0=%ld, CPU%d=%ld",
+                               first_value, i, bw_values[i]);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+static int fetch_verify_total_bw(long *bw_values, int nr_cpus)
+{
+       int attempts = 0;
+       int max_attempts = 10;
+       int count;
+
+       /*
+        * The first enqueue on a CPU causes the DL server to start, for that
+        * reason run stressor threads in the hopes it schedules on all CPUs.
+        */
+       if (run_cpu_stress(nr_cpus) < 0) {
+               SCX_ERR("Failed to run CPU stress");
+               return -1;
+       }
+
+       /* Try multiple times to get stable values */
+       while (attempts < max_attempts) {
+               count = read_total_bw_values(bw_values, nr_cpus);
+               fprintf(stderr, "Read %d total_bw values (testing %d CPUs)\n", count, nr_cpus);
+               /* If system has more CPUs than we're testing, that's OK */
+               if (count < nr_cpus) {
+                       SCX_ERR("Expected at least %d CPUs, got %d", nr_cpus, count);
+                       attempts++;
+                       sleep(1);
+                       continue;
+               }
+
+               /* Only verify the CPUs we're testing */
+               if (verify_total_bw_consistency(bw_values, nr_cpus)) {
+                       fprintf(stderr, "Values are consistent: %ld\n", bw_values[0]);
+                       return 0;
+               }
+
+               attempts++;
+               sleep(1);
+       }
+
+       return -1;
+}
+
+static enum scx_test_status setup(void **ctx)
+{
+       struct total_bw_ctx *test_ctx;
+
+       if (access("/sys/kernel/debug/sched/debug", R_OK) != 0) {
+               fprintf(stderr, "Skipping test: debugfs sched/debug not accessible\n");
+               return SCX_TEST_SKIP;
+       }
+
+       test_ctx = calloc(1, sizeof(*test_ctx));
+       if (!test_ctx)
+               return SCX_TEST_FAIL;
+
+       test_ctx->nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+       if (test_ctx->nr_cpus <= 0) {
+               free(test_ctx);
+               return SCX_TEST_FAIL;
+       }
+
+       /* If system has more CPUs than MAX_CPUS, just test the first MAX_CPUS */
+       if (test_ctx->nr_cpus > MAX_CPUS)
+               test_ctx->nr_cpus = MAX_CPUS;
+
+       /* Test scenario 1: BPF program not loaded */
+       /* Read and verify baseline total_bw before loading BPF program */
+       fprintf(stderr, "BPF prog initially not loaded, reading total_bw values\n");
+       if (fetch_verify_total_bw(test_ctx->baseline_bw, test_ctx->nr_cpus) < 0) {
+               SCX_ERR("Failed to get stable baseline values");
+               free(test_ctx);
+               return SCX_TEST_FAIL;
+       }
+
+       /* Load the BPF skeleton */
+       test_ctx->skel = minimal__open();
+       if (!test_ctx->skel) {
+               free(test_ctx);
+               return SCX_TEST_FAIL;
+       }
+
+       SCX_ENUM_INIT(test_ctx->skel);
+       if (minimal__load(test_ctx->skel)) {
+               minimal__destroy(test_ctx->skel);
+               free(test_ctx);
+               return SCX_TEST_FAIL;
+       }
+
+       *ctx = test_ctx;
+       return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+       struct total_bw_ctx *test_ctx = ctx;
+       struct bpf_link *link;
+       long loaded_bw[MAX_CPUS];
+       long unloaded_bw[MAX_CPUS];
+       int i;
+
+       /* Test scenario 2: BPF program loaded */
+       link = bpf_map__attach_struct_ops(test_ctx->skel->maps.minimal_ops);
+       if (!link) {
+               SCX_ERR("Failed to attach scheduler");
+               return SCX_TEST_FAIL;
+       }
+
+       fprintf(stderr, "BPF program loaded, reading total_bw values\n");
+       if (fetch_verify_total_bw(loaded_bw, test_ctx->nr_cpus) < 0) {
+               SCX_ERR("Failed to get stable values with BPF loaded");
+               bpf_link__destroy(link);
+               return SCX_TEST_FAIL;
+       }
+       bpf_link__destroy(link);
+
+       /* Test scenario 3: BPF program unloaded */
+       fprintf(stderr, "BPF program unloaded, reading total_bw values\n");
+       if (fetch_verify_total_bw(unloaded_bw, test_ctx->nr_cpus) < 0) {
+               SCX_ERR("Failed to get stable values after BPF unload");
+               return SCX_TEST_FAIL;
+       }
+
+       /* Verify all three scenarios have the same total_bw values */
+       for (i = 0; i < test_ctx->nr_cpus; i++) {
+               if (test_ctx->baseline_bw[i] != loaded_bw[i]) {
+                       SCX_ERR("CPU%d: baseline_bw=%ld != loaded_bw=%ld",
+                               i, test_ctx->baseline_bw[i], loaded_bw[i]);
+                       return SCX_TEST_FAIL;
+               }
+
+               if (test_ctx->baseline_bw[i] != unloaded_bw[i]) {
+                       SCX_ERR("CPU%d: baseline_bw=%ld != unloaded_bw=%ld",
+                               i, test_ctx->baseline_bw[i], unloaded_bw[i]);
+                       return SCX_TEST_FAIL;
+               }
+       }
+
+       fprintf(stderr, "All total_bw values are consistent across all scenarios\n");
+       return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+       struct total_bw_ctx *test_ctx = ctx;
+
+       if (test_ctx) {
+               if (test_ctx->skel)
+                       minimal__destroy(test_ctx->skel);
+               free(test_ctx);
+       }
+}
+
+struct scx_test total_bw = {
+       .name = "total_bw",
+       .description = "Verify total_bw consistency across BPF program states",
+       .setup = setup,
+       .run = run,
+       .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&total_bw)
author	Joel Fernandes <joelagnelf@nvidia.com>
	Mon, 26 Jan 2026 09:59:05 +0000 (10:59 +0100)
committer	Peter Zijlstra <peterz@infradead.org>
	Tue, 3 Feb 2026 11:04:18 +0000 (12:04 +0100)
tools/testing/selftests/sched_ext/Makefile		patch \| blob \| blame \| history
tools/testing/selftests/sched_ext/total_bw.c	[new file with mode: 0644]	patch \| blob