]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
tools/sched_ext: Receive updates from SCX repo
authorAndrea Righi <arighi@nvidia.com>
Mon, 4 Aug 2025 11:04:49 +0000 (13:04 +0200)
committerTejun Heo <tj@kernel.org>
Mon, 11 Aug 2025 18:21:57 +0000 (08:21 -1000)
Receive tools/sched_ext updates form https://github.com/sched-ext/scx to
sync userspace bits:

 - basic BPF arena allocator abstractions,

 - additional process flags definitions,

 - fixed is_migration_disabled() helper,

 - separate out user_exit_info BPF and user space code.

This also fixes the following warning when building the selftests:

 tools/sched_ext/include/scx/common.bpf.h:550:9: warning: 'likely' macro redefined [-Wmacro-redefined]
  550 | #define likely(x) __builtin_expect(!!(x), 1)
      |         ^

Co-developed-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
Signed-off-by: Andrea Righi <arighi@nvidia.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
13 files changed:
tools/sched_ext/include/scx/bpf_arena_common.bpf.h [new file with mode: 0644]
tools/sched_ext/include/scx/bpf_arena_common.h [new file with mode: 0644]
tools/sched_ext/include/scx/common.bpf.h
tools/sched_ext/include/scx/common.h
tools/sched_ext/include/scx/compat.bpf.h
tools/sched_ext/include/scx/user_exit_info.bpf.h [new file with mode: 0644]
tools/sched_ext/include/scx/user_exit_info.h
tools/sched_ext/include/scx/user_exit_info_common.h [new file with mode: 0644]
tools/sched_ext/scx_central.bpf.c
tools/sched_ext/scx_central.c
tools/sched_ext/scx_flatcg.bpf.c
tools/sched_ext/scx_flatcg.c
tools/sched_ext/scx_simple.c

diff --git a/tools/sched_ext/include/scx/bpf_arena_common.bpf.h b/tools/sched_ext/include/scx/bpf_arena_common.bpf.h
new file mode 100644 (file)
index 0000000..4366fb3
--- /dev/null
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+/*
+ * for older kernels try sizeof(struct genradix_node)
+ * or flexible:
+ * static inline long __bpf_page_size(void) {
+ *   return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
+ * }
+ * but generated code is not great.
+ */
+#endif
+
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#define __arena __attribute__((address_space(1)))
+#define __arena_global __attribute__((address_space(1)))
+#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
+#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
+#else
+
+/* emit instruction:
+ * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
+ *
+ * This is a workaround for LLVM compiler versions without
+ * __BPF_FEATURE_ADDR_SPACE_CAST that do not automatically cast between arena
+ * pointers and native kernel/userspace ones. In this case we explicitly do so
+ * with cast_kern() and cast_user(). E.g., in the Linux kernel tree,
+ * tools/testing/selftests/bpf includes tests that use these macros to implement
+ * linked lists and hashtables backed by arena memory. In sched_ext, we use
+ * cast_kern() and cast_user() for compatibility with older LLVM toolchains.
+ */
+#ifndef bpf_addr_space_cast
+#define bpf_addr_space_cast(var, dst_as, src_as)\
+       asm volatile(".byte 0xBF;               \
+                    .ifc %[reg], r0;           \
+                    .byte 0x00;                \
+                    .endif;                    \
+                    .ifc %[reg], r1;           \
+                    .byte 0x11;                \
+                    .endif;                    \
+                    .ifc %[reg], r2;           \
+                    .byte 0x22;                \
+                    .endif;                    \
+                    .ifc %[reg], r3;           \
+                    .byte 0x33;                \
+                    .endif;                    \
+                    .ifc %[reg], r4;           \
+                    .byte 0x44;                \
+                    .endif;                    \
+                    .ifc %[reg], r5;           \
+                    .byte 0x55;                \
+                    .endif;                    \
+                    .ifc %[reg], r6;           \
+                    .byte 0x66;                \
+                    .endif;                    \
+                    .ifc %[reg], r7;           \
+                    .byte 0x77;                \
+                    .endif;                    \
+                    .ifc %[reg], r8;           \
+                    .byte 0x88;                \
+                    .endif;                    \
+                    .ifc %[reg], r9;           \
+                    .byte 0x99;                \
+                    .endif;                    \
+                    .short %[off];             \
+                    .long %[as]"               \
+                    : [reg]"+r"(var)           \
+                    : [off]"i"(BPF_ADDR_SPACE_CAST) \
+                    , [as]"i"((dst_as << 16) | src_as));
+#endif
+
+#define __arena
+#define __arena_global SEC(".addr_space.1")
+#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
+#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
+#endif
+
+void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
+                                   int node_id, __u64 flags) __ksym __weak;
+void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+
+/*
+ * Note that cond_break can only be portably used in the body of a breakable
+ * construct, whereas can_loop can be used anywhere.
+ */
+#ifdef TEST
+#define can_loop true
+#define __cond_break(expr) expr
+#else
+#ifdef __BPF_FEATURE_MAY_GOTO
+#define can_loop                                       \
+       ({ __label__ l_break, l_continue;               \
+       bool ret = true;                                \
+       asm volatile goto("may_goto %l[l_break]"        \
+                     :::: l_break);                    \
+       goto l_continue;                                \
+       l_break: ret = false;                           \
+       l_continue:;                                    \
+       ret;                                            \
+       })
+
+#define __cond_break(expr)                             \
+       ({ __label__ l_break, l_continue;               \
+       asm volatile goto("may_goto %l[l_break]"        \
+                     :::: l_break);                    \
+       goto l_continue;                                \
+       l_break: expr;                                  \
+       l_continue:;                                    \
+       })
+#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define can_loop                                       \
+       ({ __label__ l_break, l_continue;               \
+       bool ret = true;                                \
+       asm volatile goto("1:.byte 0xe5;                \
+                     .byte 0;                          \
+                     .long ((%l[l_break] - 1b - 8) / 8) & 0xffff;      \
+                     .short 0"                         \
+                     :::: l_break);                    \
+       goto l_continue;                                \
+       l_break: ret = false;                           \
+       l_continue:;                                    \
+       ret;                                            \
+       })
+
+#define __cond_break(expr)                             \
+       ({ __label__ l_break, l_continue;               \
+       asm volatile goto("1:.byte 0xe5;                \
+                     .byte 0;                          \
+                     .long ((%l[l_break] - 1b - 8) / 8) & 0xffff;      \
+                     .short 0"                         \
+                     :::: l_break);                    \
+       goto l_continue;                                \
+       l_break: expr;                                  \
+       l_continue:;                                    \
+       })
+#else
+#define can_loop                                       \
+       ({ __label__ l_break, l_continue;               \
+       bool ret = true;                                \
+       asm volatile goto("1:.byte 0xe5;                \
+                     .byte 0;                          \
+                     .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16;      \
+                     .short 0"                         \
+                     :::: l_break);                    \
+       goto l_continue;                                \
+       l_break: ret = false;                           \
+       l_continue:;                                    \
+       ret;                                            \
+       })
+
+#define __cond_break(expr)                             \
+       ({ __label__ l_break, l_continue;               \
+       asm volatile goto("1:.byte 0xe5;                \
+                     .byte 0;                          \
+                     .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16;      \
+                     .short 0"                         \
+                     :::: l_break);                    \
+       goto l_continue;                                \
+       l_break: expr;                                  \
+       l_continue:;                                    \
+       })
+#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
+#endif /* __BPF_FEATURE_MAY_GOTO */
+#endif /* TEST */
+
+#define cond_break __cond_break(break)
+#define cond_break_label(label) __cond_break(goto label)
+
+
+void bpf_preempt_disable(void) __weak __ksym;
+void bpf_preempt_enable(void) __weak __ksym;
diff --git a/tools/sched_ext/include/scx/bpf_arena_common.h b/tools/sched_ext/include/scx/bpf_arena_common.h
new file mode 100644 (file)
index 0000000..10141db
--- /dev/null
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef arena_container_of
+#define arena_container_of(ptr, type, member)                  \
+       ({                                                      \
+               void __arena *__mptr = (void __arena *)(ptr);   \
+               ((type *)(__mptr - offsetof(type, member)));    \
+       })
+#endif
+
+/* Provide the definition of PAGE_SIZE. */
+#include <sys/user.h>
+
+#define __arena
+#define __arg_arena
+#define cast_kern(ptr) /* nop for user space */
+#define cast_user(ptr) /* nop for user space */
+char __attribute__((weak)) arena[1];
+
+#ifndef offsetof
+#define offsetof(type, member)  ((unsigned long)&((type *)0)->member)
+#endif
+
+static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
+                                                 int node_id, __u64 flags)
+{
+       return NULL;
+}
+static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
+{
+}
index d4e21558e98269a70e73c0c77a311ab0b8bf5815..86abdb3c3142ac8f0416808a6594e445b00afcce 100644 (file)
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include <asm-generic/errno.h>
-#include "user_exit_info.h"
+#include "user_exit_info.bpf.h"
 #include "enum_defs.autogen.h"
 
+#define PF_IDLE                                0x00000002      /* I am an IDLE thread */
+#define PF_IO_WORKER                   0x00000010      /* Task is an IO worker */
 #define PF_WQ_WORKER                   0x00000020      /* I'm a workqueue worker */
+#define PF_KCOMPACTD                   0x00010000      /* I am kcompactd */
+#define PF_KSWAPD                      0x00020000      /* I am kswapd */
 #define PF_KTHREAD                     0x00200000      /* I am a kernel thread */
 #define PF_EXITING                     0x00000004
 #define CLOCK_MONOTONIC                        1
 
+#ifndef NR_CPUS
+#define NR_CPUS 1024
+#endif
+
+#ifndef NUMA_NO_NODE
+#define        NUMA_NO_NODE    (-1)
+#endif
+
 extern int LINUX_KERNEL_VERSION __kconfig;
 extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
 extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
@@ -107,6 +119,9 @@ void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __
 static inline __attribute__((format(printf, 1, 2)))
 void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
 
+#define SCX_STRINGIFY(x) #x
+#define SCX_TOSTRING(x) SCX_STRINGIFY(x)
+
 /*
  * Helper macro for initializing the fmt and variadic argument inputs to both
  * bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
@@ -141,13 +156,15 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
  * scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
  * instead of an array of u64. Invoking this macro will cause the scheduler to
  * exit in an erroneous state, with diagnostic information being passed to the
- * user.
+ * user. It appends the file and line number to aid debugging.
  */
 #define scx_bpf_error(fmt, args...)                                            \
 ({                                                                             \
-       scx_bpf_bstr_preamble(fmt, args)                                        \
+       scx_bpf_bstr_preamble(                                                  \
+               __FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args)           \
        scx_bpf_error_bstr(___fmt, ___param, sizeof(___param));                 \
-       ___scx_bpf_bstr_format_checker(fmt, ##args);                            \
+       ___scx_bpf_bstr_format_checker(                                         \
+               __FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args);          \
 })
 
 /*
@@ -229,6 +246,7 @@ BPF_PROG(name, ##args)
  * be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
  * `MEMBER_VPTR(ptr, ->member)`.
  */
+#ifndef MEMBER_VPTR
 #define MEMBER_VPTR(base, member) (typeof((base) member) *)                    \
 ({                                                                             \
        u64 __base = (u64)&(base);                                              \
@@ -245,6 +263,7 @@ BPF_PROG(name, ##args)
                  [max]"i"(sizeof(base) - sizeof((base) member)));              \
        __addr;                                                                 \
 })
+#endif /* MEMBER_VPTR */
 
 /**
  * ARRAY_ELEM_PTR - Obtain the verified pointer to an array element
@@ -260,6 +279,7 @@ BPF_PROG(name, ##args)
  * size of the array to compute the max, which will result in rejection by
  * the verifier.
  */
+#ifndef ARRAY_ELEM_PTR
 #define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *)                           \
 ({                                                                             \
        u64 __base = (u64)arr;                                                  \
@@ -274,7 +294,7 @@ BPF_PROG(name, ##args)
                  [max]"r"(sizeof(arr[0]) * ((n) - 1)));                        \
        __addr;                                                                 \
 })
-
+#endif /* ARRAY_ELEM_PTR */
 
 /*
  * BPF declarations and helpers
@@ -438,8 +458,27 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
  */
 static inline bool is_migration_disabled(const struct task_struct *p)
 {
-       if (bpf_core_field_exists(p->migration_disabled))
-               return p->migration_disabled;
+       /*
+        * Testing p->migration_disabled in a BPF code is tricky because the
+        * migration is _always_ disabled while running the BPF code.
+        * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) for BPF
+        * code execution disable and re-enable the migration of the current
+        * task, respectively. So, the _current_ task of the sched_ext ops is
+        * always migration-disabled. Moreover, p->migration_disabled could be
+        * two or greater when a sched_ext ops BPF code (e.g., ops.tick) is
+        * executed in the middle of the other BPF code execution.
+        *
+        * Therefore, we should decide that the _current_ task is
+        * migration-disabled only when its migration_disabled count is greater
+        * than one. In other words, when  p->migration_disabled == 1, there is
+        * an ambiguity, so we should check if @p is the current task or not.
+        */
+       if (bpf_core_field_exists(p->migration_disabled)) {
+               if (p->migration_disabled == 1)
+                       return bpf_get_current_task_btf() != p;
+               else
+                       return p->migration_disabled;
+       }
        return false;
 }
 
@@ -476,7 +515,7 @@ static inline s64 time_delta(u64 after, u64 before)
  */
 static inline bool time_after(u64 a, u64 b)
 {
-        return (s64)(b - a) < 0;
+       return (s64)(b - a) < 0;
 }
 
 /**
@@ -500,7 +539,7 @@ static inline bool time_before(u64 a, u64 b)
  */
 static inline bool time_after_eq(u64 a, u64 b)
 {
-        return (s64)(a - b) >= 0;
+       return (s64)(a - b) >= 0;
 }
 
 /**
@@ -547,9 +586,15 @@ static inline bool time_in_range_open(u64 a, u64 b, u64 c)
  */
 
 /* useful compiler attributes */
+#ifndef likely
 #define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
 #define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+#ifndef __maybe_unused
 #define __maybe_unused __attribute__((__unused__))
+#endif
 
 /*
  * READ/WRITE_ONCE() are from kernel (include/asm-generic/rwonce.h). They
@@ -632,6 +677,26 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
        __u.__val;                                                              \
 })
 
+/*
+ * __calc_avg - Calculate exponential weighted moving average (EWMA) with
+ * @old and @new values. @decay represents how large the @old value remains.
+ * With a larger @decay value, the moving average changes slowly, exhibiting
+ * fewer fluctuations.
+ */
+#define __calc_avg(old, new, decay) ({                                         \
+       typeof(decay) thr = 1 << (decay);                                       \
+       typeof(old) ret;                                                        \
+       if (((old) < thr) || ((new) < thr)) {                                   \
+               if (((old) == 1) && ((new) == 0))                               \
+                       ret = 0;                                                \
+               else                                                            \
+                       ret = ((old) - ((old) >> 1)) + ((new) >> 1);            \
+       } else {                                                                \
+               ret = ((old) - ((old) >> (decay))) + ((new) >> (decay));        \
+       }                                                                       \
+       ret;                                                                    \
+})
+
 /*
  * log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
  * @v: The value for which we're computing the base 2 logarithm.
@@ -662,6 +727,25 @@ static inline u32 log2_u64(u64 v)
                 return log2_u32(v) + 1;
 }
 
+/*
+ * sqrt_u64 - Calculate the square root of value @x using Newton's method.
+ */
+static inline u64 __sqrt_u64(u64 x)
+{
+       if (x == 0 || x == 1)
+               return x;
+
+       u64 r = ((1ULL << 32) > x) ? x : (1ULL << 32);
+
+       for (int i = 0; i < 8; ++i) {
+               u64 q = x / r;
+               if (r <= q)
+                       break;
+               r = (r + q) >> 1;
+       }
+       return r;
+}
+
 /*
  * Return a value proportionally scaled to the task's weight.
  */
index 1dc76bd842966026cdc52fa9be4789401a9d206d..b3c6372bcf810b7396a78946bb1f6f143ac84128 100644 (file)
@@ -75,8 +75,9 @@ typedef int64_t s64;
 #include "enums.h"
 
 /* not available when building kernel tools/sched_ext */
-#if __has_include(<lib/sdt_task.h>)
-#include <lib/sdt_task.h>
+#if __has_include(<lib/sdt_task_defs.h>)
+#include "bpf_arena_common.h"
+#include <lib/sdt_task_defs.h>
 #endif
 
 #endif /* __SCHED_EXT_COMMON_H */
index 9252e1a00556f555db5359efb5a10fe9f47b625a..36e0cd2fd4edafb55726bd24ba21824882ce54ac 100644 (file)
@@ -38,6 +38,7 @@ void scx_bpf_dispatch_from_dsq_set_slice___compat(struct bpf_iter_scx_dsq *it__i
 void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
 bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
 bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
+int bpf_cpumask_populate(struct cpumask *dst, void *src, size_t src__sz) __ksym __weak;
 
 #define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags)                                \
        (bpf_ksym_exists(scx_bpf_dsq_insert) ?                                  \
@@ -82,6 +83,10 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
          scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
          false))
 
+#define __COMPAT_bpf_cpumask_populate(cpumask, src, size__sz)          \
+       (bpf_ksym_exists(bpf_cpumask_populate) ?                        \
+        (bpf_cpumask_populate(cpumask, src, size__sz)) : -EOPNOTSUPP)
+
 #define scx_bpf_dispatch(p, dsq_id, slice, enq_flags)                          \
        _Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()")
 
diff --git a/tools/sched_ext/include/scx/user_exit_info.bpf.h b/tools/sched_ext/include/scx/user_exit_info.bpf.h
new file mode 100644 (file)
index 0000000..e7ac661
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Define struct user_exit_info which is shared between BPF and userspace parts
+ * to communicate exit status and other information.
+ *
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
+ */
+
+#ifndef __USER_EXIT_INFO_BPF_H
+#define __USER_EXIT_INFO_BPF_H
+
+#ifndef LSP
+#include "vmlinux.h"
+#endif
+#include <bpf/bpf_core_read.h>
+
+#include "user_exit_info_common.h"
+
+#define UEI_DEFINE(__name)                                                     \
+       char RESIZABLE_ARRAY(data, __name##_dump);                              \
+       const volatile u32 __name##_dump_len;                                   \
+       struct user_exit_info __name SEC(".data")
+
+#define UEI_RECORD(__uei_name, __ei) ({                                                \
+       bpf_probe_read_kernel_str(__uei_name.reason,                            \
+                                 sizeof(__uei_name.reason), (__ei)->reason);   \
+       bpf_probe_read_kernel_str(__uei_name.msg,                               \
+                                 sizeof(__uei_name.msg), (__ei)->msg);         \
+       bpf_probe_read_kernel_str(__uei_name##_dump,                            \
+                                 __uei_name##_dump_len, (__ei)->dump);         \
+       if (bpf_core_field_exists((__ei)->exit_code))                           \
+               __uei_name.exit_code = (__ei)->exit_code;                       \
+       /* use __sync to force memory barrier */                                \
+       __sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind,          \
+                                   (__ei)->kind);                              \
+})
+
+#endif /* __USER_EXIT_INFO_BPF_H */
index 66f856640ee7e29fc1ce050000c5807f69ef7395..399697fa372fb1ecac9b3296c175f3444bc9381a 100644 (file)
 #ifndef __USER_EXIT_INFO_H
 #define __USER_EXIT_INFO_H
 
-#ifdef LSP
-#define __bpf__
-#include "../vmlinux.h"
-#endif
-
-enum uei_sizes {
-       UEI_REASON_LEN          = 128,
-       UEI_MSG_LEN             = 1024,
-       UEI_DUMP_DFL_LEN        = 32768,
-};
-
-struct user_exit_info {
-       int             kind;
-       s64             exit_code;
-       char            reason[UEI_REASON_LEN];
-       char            msg[UEI_MSG_LEN];
-};
-
-#ifdef __bpf__
-
-#ifndef LSP
-#include "vmlinux.h"
-#endif
-#include <bpf/bpf_core_read.h>
-
-#define UEI_DEFINE(__name)                                                     \
-       char RESIZABLE_ARRAY(data, __name##_dump);                              \
-       const volatile u32 __name##_dump_len;                                   \
-       struct user_exit_info __name SEC(".data")
-
-#define UEI_RECORD(__uei_name, __ei) ({                                                \
-       bpf_probe_read_kernel_str(__uei_name.reason,                            \
-                                 sizeof(__uei_name.reason), (__ei)->reason);   \
-       bpf_probe_read_kernel_str(__uei_name.msg,                               \
-                                 sizeof(__uei_name.msg), (__ei)->msg);         \
-       bpf_probe_read_kernel_str(__uei_name##_dump,                            \
-                                 __uei_name##_dump_len, (__ei)->dump);         \
-       if (bpf_core_field_exists((__ei)->exit_code))                           \
-               __uei_name.exit_code = (__ei)->exit_code;                       \
-       /* use __sync to force memory barrier */                                \
-       __sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind,          \
-                                   (__ei)->kind);                              \
-})
-
-#else  /* !__bpf__ */
-
 #include <stdio.h>
 #include <stdbool.h>
 
+#include "user_exit_info_common.h"
+
 /* no need to call the following explicitly if SCX_OPS_LOAD() is used */
 #define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({                                        \
        u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \
@@ -114,5 +70,4 @@ enum uei_ecode_mask {
 
 #define UEI_ECODE_RESTART(__ecode)     (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
 
-#endif /* __bpf__ */
 #endif /* __USER_EXIT_INFO_H */
diff --git a/tools/sched_ext/include/scx/user_exit_info_common.h b/tools/sched_ext/include/scx/user_exit_info_common.h
new file mode 100644 (file)
index 0000000..2d0981a
--- /dev/null
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Define struct user_exit_info which is shared between BPF and userspace parts
+ * to communicate exit status and other information.
+ *
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
+ */
+#ifndef __USER_EXIT_INFO_COMMON_H
+#define __USER_EXIT_INFO_COMMON_H
+
+#ifdef LSP
+#include "../vmlinux.h"
+#endif
+
+enum uei_sizes {
+       UEI_REASON_LEN          = 128,
+       UEI_MSG_LEN             = 1024,
+       UEI_DUMP_DFL_LEN        = 32768,
+};
+
+struct user_exit_info {
+       int             kind;
+       s64             exit_code;
+       char            reason[UEI_REASON_LEN];
+       char            msg[UEI_MSG_LEN];
+};
+
+#endif /* __USER_EXIT_INFO_COMMON_H */
index 50bc1737c167a174955458fc1b608a592b43c11d..55df8b7988657bf145c871d7aec05a10fdc08bca 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * A central FIFO sched_ext scheduler which demonstrates the followings:
+ * A central FIFO sched_ext scheduler which demonstrates the following:
  *
  * a. Making all scheduling decisions from one CPU:
  *
index 6ba6e610eeaa0382d625b2e4a27277bfe53eb8f9..55931a4cd71c7c9c0eee7d44ddb0f1861f780cfd 100644 (file)
@@ -61,6 +61,7 @@ restart:
        skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
        skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
 
+       assert(skel->rodata->nr_cpu_ids > 0);
        assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
 
        while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
index fdc7170639e604c2da034dc4e015a151bdd7beb8..2c720e3ecad59369eef61b98613c3670a8bcae33 100644 (file)
@@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops,
               .cgroup_move             = (void *)fcg_cgroup_move,
               .init                    = (void *)fcg_init,
               .exit                    = (void *)fcg_exit,
-              .flags                   = SCX_OPS_ENQ_EXITING,
+              .flags                   = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING,
               .name                    = "flatcg");
index 6dd423eeb4ff98e13e4a02fe098cbff26b01c118..cd85eb4011793c79813fff525e96c35d14555ce6 100644 (file)
@@ -6,6 +6,7 @@
  */
 #include <stdio.h>
 #include <signal.h>
+#include <assert.h>
 #include <unistd.h>
 #include <libgen.h>
 #include <limits.h>
@@ -137,6 +138,7 @@ restart:
        skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
 
        skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+       assert(skel->rodata->nr_cpus > 0);
        skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
 
        while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
index 76d83199545cb27673a42a236a6d7e5940ecd550..06d4b13bf76bcc4121954da78fa692823a9ae7ed 100644 (file)
@@ -7,6 +7,7 @@
 #include <stdio.h>
 #include <unistd.h>
 #include <signal.h>
+#include <assert.h>
 #include <libgen.h>
 #include <bpf/bpf.h>
 #include <scx/common.h>
@@ -41,6 +42,7 @@ static void sigint_handler(int simple)
 static void read_stats(struct scx_simple *skel, __u64 *stats)
 {
        int nr_cpus = libbpf_num_possible_cpus();
+       assert(nr_cpus > 0);
        __u64 cnts[2][nr_cpus];
        __u32 idx;