]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf: Unify perf_event_free_task() / perf_event_exit_task_context()
authorPeter Zijlstra <peterz@infradead.org>
Thu, 13 Feb 2025 13:04:07 +0000 (14:04 +0100)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 8 Apr 2025 18:55:47 +0000 (20:55 +0200)
Both perf_event_free_task() and perf_event_exit_task_context() are
very similar, except perf_event_exit_task_context() is a little more
generic / makes less assumptions.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lkml.kernel.org/r/20250307193723.274039710@infradead.org
kernel/events/core.c

index f75b0d38a40f295b7f7fd6a5508ae6337504acc6..85c8b795cad86e0f4dd18fa251acfd3bf4a558e6 100644 (file)
@@ -13742,13 +13742,11 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
        perf_event_wakeup(event);
 }
 
-static void perf_event_exit_task_context(struct task_struct *child)
+static void perf_event_exit_task_context(struct task_struct *child, bool exit)
 {
        struct perf_event_context *child_ctx, *clone_ctx = NULL;
        struct perf_event *child_event, *next;
 
-       WARN_ON_ONCE(child != current);
-
        child_ctx = perf_pin_task_context(child);
        if (!child_ctx)
                return;
@@ -13771,7 +13769,8 @@ static void perf_event_exit_task_context(struct task_struct *child)
         * in.
         */
        raw_spin_lock_irq(&child_ctx->lock);
-       task_ctx_sched_out(child_ctx, NULL, EVENT_ALL);
+       if (exit)
+               task_ctx_sched_out(child_ctx, NULL, EVENT_ALL);
 
        /*
         * Now that the context is inactive, destroy the task <-> ctx relation
@@ -13780,7 +13779,7 @@ static void perf_event_exit_task_context(struct task_struct *child)
        RCU_INIT_POINTER(child->perf_event_ctxp, NULL);
        put_ctx(child_ctx); /* cannot be last */
        WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
-       put_task_struct(current); /* cannot be last */
+       put_task_struct(child); /* cannot be last */
 
        clone_ctx = unclone_ctx(child_ctx);
        raw_spin_unlock_irq(&child_ctx->lock);
@@ -13793,13 +13792,31 @@ static void perf_event_exit_task_context(struct task_struct *child)
         * won't get any samples after PERF_RECORD_EXIT. We can however still
         * get a few PERF_RECORD_READ events.
         */
-       perf_event_task(child, child_ctx, 0);
+       if (exit)
+               perf_event_task(child, child_ctx, 0);
 
        list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
                perf_event_exit_event(child_event, child_ctx);
 
        mutex_unlock(&child_ctx->mutex);
 
+       if (!exit) {
+               /*
+                * perf_event_release_kernel() could still have a reference on
+                * this context. In that case we must wait for these events to
+                * have been freed (in particular all their references to this
+                * task must've been dropped).
+                *
+                * Without this copy_process() will unconditionally free this
+                * task (irrespective of its reference count) and
+                * _free_event()'s put_task_struct(event->hw.target) will be a
+                * use-after-free.
+                *
+                * Wait for all events to drop their context reference.
+                */
+               wait_var_event(&child_ctx->refcount,
+                              refcount_read(&child_ctx->refcount) == 1);
+       }
        put_ctx(child_ctx);
 }
 
@@ -13827,7 +13844,7 @@ void perf_event_exit_task(struct task_struct *child)
        }
        mutex_unlock(&child->perf_event_mutex);
 
-       perf_event_exit_task_context(child);
+       perf_event_exit_task_context(child, true);
 
        /*
         * The perf_event_exit_task_context calls perf_event_task
@@ -13844,25 +13861,6 @@ void perf_event_exit_task(struct task_struct *child)
        detach_task_ctx_data(child);
 }
 
-static void perf_free_event(struct perf_event *event,
-                           struct perf_event_context *ctx)
-{
-       struct perf_event *parent = event->parent;
-
-       if (WARN_ON_ONCE(!parent))
-               return;
-
-       mutex_lock(&parent->child_mutex);
-       list_del_init(&event->child_list);
-       mutex_unlock(&parent->child_mutex);
-
-       raw_spin_lock_irq(&ctx->lock);
-       perf_group_detach(event);
-       list_del_event(event, ctx);
-       raw_spin_unlock_irq(&ctx->lock);
-       put_event(event);
-}
-
 /*
  * Free a context as created by inheritance by perf_event_init_task() below,
  * used by fork() in case of fail.
@@ -13872,48 +13870,7 @@ static void perf_free_event(struct perf_event *event,
  */
 void perf_event_free_task(struct task_struct *task)
 {
-       struct perf_event_context *ctx;
-       struct perf_event *event, *tmp;
-
-       ctx = rcu_access_pointer(task->perf_event_ctxp);
-       if (!ctx)
-               return;
-
-       mutex_lock(&ctx->mutex);
-       raw_spin_lock_irq(&ctx->lock);
-       /*
-        * Destroy the task <-> ctx relation and mark the context dead.
-        *
-        * This is important because even though the task hasn't been
-        * exposed yet the context has been (through child_list).
-        */
-       RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
-       WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
-       put_task_struct(task); /* cannot be last */
-       raw_spin_unlock_irq(&ctx->lock);
-
-
-       list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry)
-               perf_free_event(event, ctx);
-
-       mutex_unlock(&ctx->mutex);
-
-       /*
-        * perf_event_release_kernel() could've stolen some of our
-        * child events and still have them on its free_list. In that
-        * case we must wait for these events to have been freed (in
-        * particular all their references to this task must've been
-        * dropped).
-        *
-        * Without this copy_process() will unconditionally free this
-        * task (irrespective of its reference count) and
-        * _free_event()'s put_task_struct(event->hw.target) will be a
-        * use-after-free.
-        *
-        * Wait for all events to drop their context reference.
-        */
-       wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
-       put_ctx(ctx); /* must be last */
+       perf_event_exit_task_context(task, false);
 }
 
 void perf_event_delayed_put(struct task_struct *task)