]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf: Split __perf_pending_irq() out of perf_pending_irq()
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Thu, 4 Jul 2024 17:03:41 +0000 (19:03 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 9 Jul 2024 11:26:37 +0000 (13:26 +0200)
perf_pending_irq() invokes perf_event_wakeup() and __perf_pending_irq().
The former is in charge of waking any tasks which waits to be woken up
while the latter disables perf-events.

The irq_work perf_pending_irq(), while this an irq_work, the callback
is invoked in thread context on PREEMPT_RT. This is needed because all
the waking functions (wake_up_all(), kill_fasync()) acquire sleep locks
which must not be used with disabled interrupts.
Disabling events, as done by __perf_pending_irq(), expects a hardirq
context and disabled interrupts. This requirement is not fulfilled on
PREEMPT_RT.

Split functionality based on perf_event::pending_disable into irq_work
named `pending_disable_irq' and invoke it in hardirq context on
PREEMPT_RT. Rename the split out callback to perf_pending_disable().

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Marco Elver <elver@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/20240704170424.1466941-8-bigeasy@linutronix.de
include/linux/perf_event.h
kernel/events/core.c

index 99a7ea1d29ed554cf2a5abe51861afe275b686d8..65ece0d5b4b6d1a1660837eb2c2758c0c58e76fd 100644 (file)
@@ -783,6 +783,7 @@ struct perf_event {
        unsigned int                    pending_disable;
        unsigned long                   pending_addr;   /* SIGTRAP */
        struct irq_work                 pending_irq;
+       struct irq_work                 pending_disable_irq;
        struct callback_head            pending_task;
        unsigned int                    pending_work;
        struct rcuwait                  pending_work_wait;
index 96e03d6b52d182fc0486d304977425cdc4d0055a..f64c30e7d5dad535c3f6c910a8e9c2fed0897a8e 100644 (file)
@@ -2451,7 +2451,7 @@ static void __perf_event_disable(struct perf_event *event,
  * hold the top-level event's child_mutex, so any descendant that
  * goes to exit will block in perf_event_exit_event().
  *
- * When called from perf_pending_irq it's OK because event->ctx
+ * When called from perf_pending_disable it's OK because event->ctx
  * is the current context on this CPU and preemption is disabled,
  * hence we can't get into perf_event_task_sched_out for this context.
  */
@@ -2491,7 +2491,7 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
 void perf_event_disable_inatomic(struct perf_event *event)
 {
        event->pending_disable = 1;
-       irq_work_queue(&event->pending_irq);
+       irq_work_queue(&event->pending_disable_irq);
 }
 
 #define MAX_INTERRUPTS (~0ULL)
@@ -5218,6 +5218,7 @@ static void perf_pending_task_sync(struct perf_event *event)
 static void _free_event(struct perf_event *event)
 {
        irq_work_sync(&event->pending_irq);
+       irq_work_sync(&event->pending_disable_irq);
        perf_pending_task_sync(event);
 
        unaccount_event(event);
@@ -6749,7 +6750,7 @@ static void perf_sigtrap(struct perf_event *event)
 /*
  * Deliver the pending work in-event-context or follow the context.
  */
-static void __perf_pending_irq(struct perf_event *event)
+static void __perf_pending_disable(struct perf_event *event)
 {
        int cpu = READ_ONCE(event->oncpu);
 
@@ -6787,11 +6788,26 @@ static void __perf_pending_irq(struct perf_event *event)
         *                                irq_work_queue(); // FAILS
         *
         *  irq_work_run()
-        *    perf_pending_irq()
+        *    perf_pending_disable()
         *
         * But the event runs on CPU-B and wants disabling there.
         */
-       irq_work_queue_on(&event->pending_irq, cpu);
+       irq_work_queue_on(&event->pending_disable_irq, cpu);
+}
+
+static void perf_pending_disable(struct irq_work *entry)
+{
+       struct perf_event *event = container_of(entry, struct perf_event, pending_disable_irq);
+       int rctx;
+
+       /*
+        * If we 'fail' here, that's OK, it means recursion is already disabled
+        * and we won't recurse 'further'.
+        */
+       rctx = perf_swevent_get_recursion_context();
+       __perf_pending_disable(event);
+       if (rctx >= 0)
+               perf_swevent_put_recursion_context(rctx);
 }
 
 static void perf_pending_irq(struct irq_work *entry)
@@ -6814,8 +6830,6 @@ static void perf_pending_irq(struct irq_work *entry)
                perf_event_wakeup(event);
        }
 
-       __perf_pending_irq(event);
-
        if (rctx >= 0)
                perf_swevent_put_recursion_context(rctx);
 }
@@ -11956,6 +11970,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
        init_waitqueue_head(&event->waitq);
        init_irq_work(&event->pending_irq, perf_pending_irq);
+       event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
        init_task_work(&event->pending_task, perf_pending_task);
        rcuwait_init(&event->pending_work_wait);