]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
ring-buffer: Use a housekeeping CPU to wake up waiters
authorPetr Tesarik <ptesarik@suse.com>
Thu, 8 Jan 2026 13:21:32 +0000 (14:21 +0100)
committerSteven Rostedt (Google) <rostedt@goodmis.org>
Mon, 26 Jan 2026 22:44:53 +0000 (17:44 -0500)
Avoid running the wakeup irq_work on an isolated CPU. Since the wakeup can
run on any CPU, let's pick a housekeeping CPU to do the job.

This change reduces additional noise when tracing isolated CPUs. For
example, the following ipi_send_cpu stack trace was captured with
nohz_full=2 on the isolated CPU:

          <idle>-0       [002] d.h4.  1255.379293: ipi_send_cpu: cpu=2 callsite=irq_work_queue+0x2d/0x50 callback=rb_wake_up_waiters+0x0/0x80
          <idle>-0       [002] d.h4.  1255.379329: <stack trace>
 => trace_event_raw_event_ipi_send_cpu
 => __irq_work_queue_local
 => irq_work_queue
 => ring_buffer_unlock_commit
 => trace_buffer_unlock_commit_regs
 => trace_event_buffer_commit
 => trace_event_raw_event_x86_irq_vector
 => __sysvec_apic_timer_interrupt
 => sysvec_apic_timer_interrupt
 => asm_sysvec_apic_timer_interrupt
 => pv_native_safe_halt
 => default_idle
 => default_idle_call
 => do_idle
 => cpu_startup_entry
 => start_secondary
 => common_startup_64

The IRQ work interrupt alone adds considerable noise, but the impact can
get even worse with PREEMPT_RT, because the IRQ work interrupt is then
handled by a separate kernel thread. This requires a task switch and makes
tracing useless for analyzing latency on an isolated CPU.

After applying the patch, the trace is similar, but ipi_send_cpu always
targets a non-isolated CPU.

Unfortunately, irq_work_queue_on() is not NMI-safe. When running in NMI
context, fall back to queuing the irq work on the local CPU.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Clark Williams <clrkwllms@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Link: https://patch.msgid.link/20260108132132.2473515-1-ptesarik@suse.com
Signed-off-by: Petr Tesarik <ptesarik@suse.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
kernel/trace/ring_buffer.c

index 630221b00838ef3c1428352aa5a07753443e6e7b..d331034089552d081f86be4e79d7725fa95997a9 100644 (file)
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  */
+#include <linux/sched/isolation.h>
 #include <linux/trace_recursion.h>
 #include <linux/trace_events.h>
 #include <linux/ring_buffer.h>
@@ -4013,19 +4014,36 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer)
        rb_end_commit(cpu_buffer);
 }
 
+static bool
+rb_irq_work_queue(struct rb_irq_work *irq_work)
+{
+       int cpu;
+
+       /* irq_work_queue_on() is not NMI-safe */
+       if (unlikely(in_nmi()))
+               return irq_work_queue(&irq_work->work);
+
+       /*
+        * If CPU isolation is not active, cpu is always the current
+        * CPU, and the following is equivallent to irq_work_queue().
+        */
+       cpu = housekeeping_any_cpu(HK_TYPE_KERNEL_NOISE);
+       return irq_work_queue_on(&irq_work->work, cpu);
+}
+
 static __always_inline void
 rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
 {
        if (buffer->irq_work.waiters_pending) {
                buffer->irq_work.waiters_pending = false;
                /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&buffer->irq_work.work);
+               rb_irq_work_queue(&buffer->irq_work);
        }
 
        if (cpu_buffer->irq_work.waiters_pending) {
                cpu_buffer->irq_work.waiters_pending = false;
                /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&cpu_buffer->irq_work.work);
+               rb_irq_work_queue(&cpu_buffer->irq_work);
        }
 
        if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched))
@@ -4045,7 +4063,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->irq_work.wakeup_full = true;
        cpu_buffer->irq_work.full_waiters_pending = false;
        /* irq_work_queue() supplies it's own memory barriers */
-       irq_work_queue(&cpu_buffer->irq_work.work);
+       rb_irq_work_queue(&cpu_buffer->irq_work);
 }
 
 #ifdef CONFIG_RING_BUFFER_RECORD_RECURSION