]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
sched/numa: add tracepoint that tracks the skipping of numa balancing due to cpuset...
authorLibo Chen <libo.chen@oracle.com>
Thu, 24 Apr 2025 02:45:23 +0000 (19:45 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 13 May 2025 06:50:46 +0000 (23:50 -0700)
Unlike sched_skip_vma_numa tracepoint which tracks skipped VMAs, this
tracks the task subjected to cpuset.mems pinning and prints out its
allowed memory node mask.

Link: https://lkml.kernel.org/r/20250424024523.2298272-3-libo.chen@oracle.com
Signed-off-by: Libo Chen <libo.chen@oracle.com>
Cc: "Chen, Tim C" <tim.c.chen@intel.com>
Cc: Chen Yu <yu.c.chen@intel.com>
Cc: Chris Hyser <chris.hyser@oracle.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Cc: Mel Gorman <mgorman <mgorman@suse.de>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Raghavendra K T <raghavendra.kt@amd.com>
Cc: Srikanth Aithal <sraithal@amd.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/trace/events/sched.h
kernel/sched/fair.c

index 8994e97d86c13a3c43238aa3ee2570abd5fda421..ff3990318aecd8922b3360aedd79786fd1fc1208 100644 (file)
@@ -745,6 +745,39 @@ TRACE_EVENT(sched_skip_vma_numa,
                  __entry->vm_end,
                  __print_symbolic(__entry->reason, NUMAB_SKIP_REASON))
 );
+
+TRACE_EVENT(sched_skip_cpuset_numa,
+
+       TP_PROTO(struct task_struct *tsk, nodemask_t *mem_allowed_ptr),
+
+       TP_ARGS(tsk, mem_allowed_ptr),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,           TASK_COMM_LEN           )
+               __field( pid_t,         pid                                     )
+               __field( pid_t,         tgid                                    )
+               __field( pid_t,         ngid                                    )
+               __array( unsigned long, mem_allowed, BITS_TO_LONGS(MAX_NUMNODES))
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid             = task_pid_nr(tsk);
+               __entry->tgid            = task_tgid_nr(tsk);
+               __entry->ngid            = task_numa_group_id(tsk);
+               BUILD_BUG_ON(sizeof(nodemask_t) != \
+                            BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long));
+               memcpy(__entry->mem_allowed, mem_allowed_ptr->bits,
+                      sizeof(__entry->mem_allowed));
+       ),
+
+       TP_printk("comm=%s pid=%d tgid=%d ngid=%d mem_nodes_allowed=%*pbl",
+                 __entry->comm,
+                 __entry->pid,
+                 __entry->tgid,
+                 __entry->ngid,
+                 MAX_NUMNODES, __entry->mem_allowed)
+);
 #endif /* CONFIG_NUMA_BALANCING */
 
 /*
index b3b715e8a7cbc6b8d3ac2bae9ebbef8e1e693e6c..cef163c174bd09828602638aff676917278ae118 100644 (file)
@@ -3333,8 +3333,10 @@ static void task_numa_work(struct callback_head *work)
         * Memory is pinned to only one NUMA node via cpuset.mems, naturally
         * no page can be migrated.
         */
-       if (cpusets_enabled() && nodes_weight(cpuset_current_mems_allowed) == 1)
+       if (cpusets_enabled() && nodes_weight(cpuset_current_mems_allowed) == 1) {
+               trace_sched_skip_cpuset_numa(current, &cpuset_current_mems_allowed);
                return;
+       }
 
        if (!mm->numa_next_scan) {
                mm->numa_next_scan = now +