From: Peter Zijlstra Date: Thu, 13 Feb 2025 13:04:07 +0000 (+0100) Subject: perf: Unify perf_event_free_task() / perf_event_exit_task_context() X-Git-Tag: v6.16-rc1~196^2~57 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=90661365021a6d0d7f3a2c5046ebe33e4df53b92;p=thirdparty%2Fkernel%2Flinux.git perf: Unify perf_event_free_task() / perf_event_exit_task_context() Both perf_event_free_task() and perf_event_exit_task_context() are very similar, except perf_event_exit_task_context() is a little more generic / makes less assumptions. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ravi Bangoria Link: https://lkml.kernel.org/r/20250307193723.274039710@infradead.org --- diff --git a/kernel/events/core.c b/kernel/events/core.c index f75b0d38a40f2..85c8b795cad86 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -13742,13 +13742,11 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) perf_event_wakeup(event); } -static void perf_event_exit_task_context(struct task_struct *child) +static void perf_event_exit_task_context(struct task_struct *child, bool exit) { struct perf_event_context *child_ctx, *clone_ctx = NULL; struct perf_event *child_event, *next; - WARN_ON_ONCE(child != current); - child_ctx = perf_pin_task_context(child); if (!child_ctx) return; @@ -13771,7 +13769,8 @@ static void perf_event_exit_task_context(struct task_struct *child) * in. */ raw_spin_lock_irq(&child_ctx->lock); - task_ctx_sched_out(child_ctx, NULL, EVENT_ALL); + if (exit) + task_ctx_sched_out(child_ctx, NULL, EVENT_ALL); /* * Now that the context is inactive, destroy the task <-> ctx relation @@ -13780,7 +13779,7 @@ static void perf_event_exit_task_context(struct task_struct *child) RCU_INIT_POINTER(child->perf_event_ctxp, NULL); put_ctx(child_ctx); /* cannot be last */ WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE); - put_task_struct(current); /* cannot be last */ + put_task_struct(child); /* cannot be last */ clone_ctx = unclone_ctx(child_ctx); raw_spin_unlock_irq(&child_ctx->lock); @@ -13793,13 +13792,31 @@ static void perf_event_exit_task_context(struct task_struct *child) * won't get any samples after PERF_RECORD_EXIT. We can however still * get a few PERF_RECORD_READ events. */ - perf_event_task(child, child_ctx, 0); + if (exit) + perf_event_task(child, child_ctx, 0); list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) perf_event_exit_event(child_event, child_ctx); mutex_unlock(&child_ctx->mutex); + if (!exit) { + /* + * perf_event_release_kernel() could still have a reference on + * this context. In that case we must wait for these events to + * have been freed (in particular all their references to this + * task must've been dropped). + * + * Without this copy_process() will unconditionally free this + * task (irrespective of its reference count) and + * _free_event()'s put_task_struct(event->hw.target) will be a + * use-after-free. + * + * Wait for all events to drop their context reference. + */ + wait_var_event(&child_ctx->refcount, + refcount_read(&child_ctx->refcount) == 1); + } put_ctx(child_ctx); } @@ -13827,7 +13844,7 @@ void perf_event_exit_task(struct task_struct *child) } mutex_unlock(&child->perf_event_mutex); - perf_event_exit_task_context(child); + perf_event_exit_task_context(child, true); /* * The perf_event_exit_task_context calls perf_event_task @@ -13844,25 +13861,6 @@ void perf_event_exit_task(struct task_struct *child) detach_task_ctx_data(child); } -static void perf_free_event(struct perf_event *event, - struct perf_event_context *ctx) -{ - struct perf_event *parent = event->parent; - - if (WARN_ON_ONCE(!parent)) - return; - - mutex_lock(&parent->child_mutex); - list_del_init(&event->child_list); - mutex_unlock(&parent->child_mutex); - - raw_spin_lock_irq(&ctx->lock); - perf_group_detach(event); - list_del_event(event, ctx); - raw_spin_unlock_irq(&ctx->lock); - put_event(event); -} - /* * Free a context as created by inheritance by perf_event_init_task() below, * used by fork() in case of fail. @@ -13872,48 +13870,7 @@ static void perf_free_event(struct perf_event *event, */ void perf_event_free_task(struct task_struct *task) { - struct perf_event_context *ctx; - struct perf_event *event, *tmp; - - ctx = rcu_access_pointer(task->perf_event_ctxp); - if (!ctx) - return; - - mutex_lock(&ctx->mutex); - raw_spin_lock_irq(&ctx->lock); - /* - * Destroy the task <-> ctx relation and mark the context dead. - * - * This is important because even though the task hasn't been - * exposed yet the context has been (through child_list). - */ - RCU_INIT_POINTER(task->perf_event_ctxp, NULL); - WRITE_ONCE(ctx->task, TASK_TOMBSTONE); - put_task_struct(task); /* cannot be last */ - raw_spin_unlock_irq(&ctx->lock); - - - list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) - perf_free_event(event, ctx); - - mutex_unlock(&ctx->mutex); - - /* - * perf_event_release_kernel() could've stolen some of our - * child events and still have them on its free_list. In that - * case we must wait for these events to have been freed (in - * particular all their references to this task must've been - * dropped). - * - * Without this copy_process() will unconditionally free this - * task (irrespective of its reference count) and - * _free_event()'s put_task_struct(event->hw.target) will be a - * use-after-free. - * - * Wait for all events to drop their context reference. - */ - wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1); - put_ctx(ctx); /* must be last */ + perf_event_exit_task_context(task, false); } void perf_event_delayed_put(struct task_struct *task)