Like in the attach_global_ctx_data() it has a O(N^2) loop to delete task
context data for each thread. But perf_free_ctx_data_rcu() can be
called under RCU read lock, so just calls it directly rather than
iterating the whole thread list again.
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260211223222.3119790-4-namhyung@kernel.org
struct task_struct *g, *p;
struct perf_ctx_data *cd;
-again:
scoped_guard (rcu) {
for_each_process_thread(g, p) {
cd = rcu_dereference(p->perf_ctx_data);
- if (!cd || !cd->global)
- continue;
- cd->global = 0;
- get_task_struct(p);
- goto detach;
+ if (cd && cd->global) {
+ cd->global = 0;
+ detach_task_ctx_data(p);
+ }
}
}
- return;
-detach:
- detach_task_ctx_data(p);
- put_task_struct(p);
- goto again;
}
static void detach_global_ctx_data(void)