sched/numa: add tracepoint that tracks the skipping of numa balancing due to cpuset...

author Libo Chen <libo.chen@oracle.com>

Thu, 24 Apr 2025 02:45:23 +0000 (19:45 -0700)

committer Andrew Morton <akpm@linux-foundation.org>

Tue, 13 May 2025 06:50:46 +0000 (23:50 -0700)
author Libo Chen <libo.chen@oracle.com>
Thu, 24 Apr 2025 02:45:23 +0000 (19:45 -0700)
committer Andrew Morton <akpm@linux-foundation.org>
Tue, 13 May 2025 06:50:46 +0000 (23:50 -0700)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h

index 8994e97d86c13a3c43238aa3ee2570abd5fda421..ff3990318aecd8922b3360aedd79786fd1fc1208 100644 (file)
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -745,6 +745,39 @@ TRACE_EVENT(sched_skip_vma_numa,
                   __entry->vm_end,
                   __print_symbolic(__entry->reason, NUMAB_SKIP_REASON))
  );
+
+TRACE_EVENT(sched_skip_cpuset_numa,
+
+       TP_PROTO(struct task_struct *tsk, nodemask_t *mem_allowed_ptr),
+
+       TP_ARGS(tsk, mem_allowed_ptr),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,           TASK_COMM_LEN           )
+               __field( pid_t,         pid                                     )
+               __field( pid_t,         tgid                                    )
+               __field( pid_t,         ngid                                    )
+               __array( unsigned long, mem_allowed, BITS_TO_LONGS(MAX_NUMNODES))
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid             = task_pid_nr(tsk);
+               __entry->tgid            = task_tgid_nr(tsk);
+               __entry->ngid            = task_numa_group_id(tsk);
+               BUILD_BUG_ON(sizeof(nodemask_t) != \
+                            BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long));
+               memcpy(__entry->mem_allowed, mem_allowed_ptr->bits,
+                      sizeof(__entry->mem_allowed));
+       ),
+
+       TP_printk("comm=%s pid=%d tgid=%d ngid=%d mem_nodes_allowed=%*pbl",
+                 __entry->comm,
+                 __entry->pid,
+                 __entry->tgid,
+                 __entry->ngid,
+                 MAX_NUMNODES, __entry->mem_allowed)
+);
  #endif /* CONFIG_NUMA_BALANCING */
  
  /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index b3b715e8a7cbc6b8d3ac2bae9ebbef8e1e693e6c..cef163c174bd09828602638aff676917278ae118 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3333,8 +3333,10 @@ static void task_numa_work(struct callback_head *work)
          * Memory is pinned to only one NUMA node via cpuset.mems, naturally
          * no page can be migrated.
          */
-       if (cpusets_enabled() && nodes_weight(cpuset_current_mems_allowed) == 1)
+       if (cpusets_enabled() && nodes_weight(cpuset_current_mems_allowed) == 1) {
+               trace_sched_skip_cpuset_numa(current, &cpuset_current_mems_allowed);
                 return;
+       }
  
         if (!mm->numa_next_scan) {
                 mm->numa_next_scan = now +
author	Libo Chen <libo.chen@oracle.com>
	Thu, 24 Apr 2025 02:45:23 +0000 (19:45 -0700)
committer	Andrew Morton <akpm@linux-foundation.org>
	Tue, 13 May 2025 06:50:46 +0000 (23:50 -0700)
include/trace/events/sched.h		patch \| blob \| blame \| history
kernel/sched/fair.c		patch \| blob \| blame \| history