From: Sasha Levin Date: Tue, 8 Apr 2025 00:42:38 +0000 (-0400) Subject: Fixes for 6.6 X-Git-Tag: v5.4.292~72 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=544462ddb59193392b1520c8798297003947a80c;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.6 Signed-off-by: Sasha Levin --- diff --git a/queue-6.6/drm-amdgpu-gfx11-fix-num_mec.patch b/queue-6.6/drm-amdgpu-gfx11-fix-num_mec.patch new file mode 100644 index 0000000000..3be4d6920f --- /dev/null +++ b/queue-6.6/drm-amdgpu-gfx11-fix-num_mec.patch @@ -0,0 +1,35 @@ +From 51f68674ad424849d672d392f06a9e00343f2fb8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Mar 2025 09:35:02 -0400 +Subject: drm/amdgpu/gfx11: fix num_mec + +From: Alex Deucher + +[ Upstream commit 4161050d47e1b083a7e1b0b875c9907e1a6f1f1f ] + +GC11 only has 1 mec. + +Fixes: 3d879e81f0f9 ("drm/amdgpu: add init support for GFX11 (v2)") +Reviewed-by: Sunil Khatri +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +index 54ec9b32562c2..480d718d09cb6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +@@ -1318,7 +1318,7 @@ static int gfx_v11_0_sw_init(void *handle) + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; +- adev->gfx.mec.num_mec = 2; ++ adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 4; + break; +-- +2.39.5 + diff --git a/queue-6.6/perf-core-fix-child_total_time_enabled-accounting-bu.patch b/queue-6.6/perf-core-fix-child_total_time_enabled-accounting-bu.patch new file mode 100644 index 0000000000..7ae4fcb0e6 --- /dev/null +++ b/queue-6.6/perf-core-fix-child_total_time_enabled-accounting-bu.patch @@ -0,0 +1,143 @@ +From dc733cc133ee7a2d1d5487e7b108a309667252e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Mar 2025 08:20:03 +0000 +Subject: perf/core: Fix child_total_time_enabled accounting bug at task exit + +From: Yeoreum Yun + +[ Upstream commit a3c3c66670cee11eb13aa43905904bf29cb92d32 ] + +The perf events code fails to account for total_time_enabled of +inactive events. + +Here is a failure case for accounting total_time_enabled for +CPU PMU events: + + sudo ./perf stat -vvv -e armv8_pmuv3_0/event=0x08/ -e armv8_pmuv3_1/event=0x08/ -- stress-ng --pthread=2 -t 2s + ... + + armv8_pmuv3_0/event=0x08/: 1138698008 2289429840 2174835740 + armv8_pmuv3_1/event=0x08/: 1826791390 1950025700 847648440 + ` ` ` + ` ` > total_time_running with child + ` > total_time_enabled with child + > count with child + + Performance counter stats for 'stress-ng --pthread=2 -t 2s': + + 1,138,698,008 armv8_pmuv3_0/event=0x08/ (94.99%) + 1,826,791,390 armv8_pmuv3_1/event=0x08/ (43.47%) + +The two events above are opened on two different CPU PMUs, for example, +each event is opened for a cluster in an Arm big.LITTLE system, they +will never run on the same CPU. In theory, the total enabled time should +be same for both events, as two events are opened and closed together. + +As the result show, the two events' total enabled time including +child event is different (2289429840 vs 1950025700). + +This is because child events are not accounted properly +if a event is INACTIVE state when the task exits: + + perf_event_exit_event() + `> perf_remove_from_context() + `> __perf_remove_from_context() + `> perf_child_detach() -> Accumulate child_total_time_enabled + `> list_del_event() -> Update child event's time + +The problem is the time accumulation happens prior to child event's +time updating. Thus, it misses to account the last period's time when +the event exits. + +The perf core layer follows the rule that timekeeping is tied to state +change. To address the issue, make __perf_remove_from_context() +handle the task exit case by passing 'DETACH_EXIT' to it and +invoke perf_event_state() for state alongside with accounting the time. + +Then, perf_child_detach() populates the time into the parent's time metrics. + +After this patch, the bug is fixed: + + sudo ./perf stat -vvv -e armv8_pmuv3_0/event=0x08/ -e armv8_pmuv3_1/event=0x08/ -- stress-ng --pthread=2 -t 10s + ... + armv8_pmuv3_0/event=0x08/: 15396770398 32157963940 21898169000 + armv8_pmuv3_1/event=0x08/: 22428964974 32157963940 10259794940 + + Performance counter stats for 'stress-ng --pthread=2 -t 10s': + + 15,396,770,398 armv8_pmuv3_0/event=0x08/ (68.10%) + 22,428,964,974 armv8_pmuv3_1/event=0x08/ (31.90%) + +[ mingo: Clarified the changelog. ] + +Fixes: ef54c1a476aef ("perf: Rework perf_event_exit_event()") +Suggested-by: Peter Zijlstra +Signed-off-by: Yeoreum Yun +Signed-off-by: Ingo Molnar +Tested-by: Leo Yan +Link: https://lore.kernel.org/r/20250326082003.1630986-1-yeoreum.yun@arm.com +Signed-off-by: Sasha Levin +--- + kernel/events/core.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/kernel/events/core.c b/kernel/events/core.c +index a524329149a71..b710976fb01b1 100644 +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2333,6 +2333,7 @@ group_sched_out(struct perf_event *group_event, struct perf_event_context *ctx) + #define DETACH_GROUP 0x01UL + #define DETACH_CHILD 0x02UL + #define DETACH_DEAD 0x04UL ++#define DETACH_EXIT 0x08UL + + /* + * Cross CPU call to remove a performance event +@@ -2347,6 +2348,7 @@ __perf_remove_from_context(struct perf_event *event, + void *info) + { + struct perf_event_pmu_context *pmu_ctx = event->pmu_ctx; ++ enum perf_event_state state = PERF_EVENT_STATE_OFF; + unsigned long flags = (unsigned long)info; + + if (ctx->is_active & EVENT_TIME) { +@@ -2358,16 +2360,19 @@ __perf_remove_from_context(struct perf_event *event, + * Ensure event_sched_out() switches to OFF, at the very least + * this avoids raising perf_pending_task() at this time. + */ +- if (flags & DETACH_DEAD) ++ if (flags & DETACH_EXIT) ++ state = PERF_EVENT_STATE_EXIT; ++ if (flags & DETACH_DEAD) { + event->pending_disable = 1; ++ state = PERF_EVENT_STATE_DEAD; ++ } + event_sched_out(event, ctx); ++ perf_event_set_state(event, min(event->state, state)); + if (flags & DETACH_GROUP) + perf_group_detach(event); + if (flags & DETACH_CHILD) + perf_child_detach(event); + list_del_event(event, ctx); +- if (flags & DETACH_DEAD) +- event->state = PERF_EVENT_STATE_DEAD; + + if (!pmu_ctx->nr_events) { + pmu_ctx->rotate_necessary = 0; +@@ -13140,12 +13145,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) + mutex_lock(&parent_event->child_mutex); + } + +- perf_remove_from_context(event, detach_flags); +- +- raw_spin_lock_irq(&ctx->lock); +- if (event->state > PERF_EVENT_STATE_EXIT) +- perf_event_set_state(event, PERF_EVENT_STATE_EXIT); +- raw_spin_unlock_irq(&ctx->lock); ++ perf_remove_from_context(event, detach_flags | DETACH_EXIT); + + /* + * Child events can be freed. +-- +2.39.5 + diff --git a/queue-6.6/series b/queue-6.6/series index 0aee58230c..a6bc43b2c9 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -219,3 +219,10 @@ ipv6-start-path-selection-from-the-first-nexthop.patch ipv6-do-not-consider-link-down-nexthops-in-path-sele.patch arcnet-add-null-check-in-com20020pci_probe.patch net-ibmveth-make-veth_pool_store-stop-hanging.patch +drm-amdgpu-gfx11-fix-num_mec.patch +perf-core-fix-child_total_time_enabled-accounting-bu.patch +tracing-allow-creating-instances-with-specified-syst.patch +tracing-switch-trace_events_hist.c-code-over-to-use-.patch +tracing-hist-add-poll-pollin-support-on-hist-file.patch +tracing-hist-support-pollpri-event-for-poll-on-histo.patch +tracing-correct-the-refcount-if-the-hist-hist_debug-.patch diff --git a/queue-6.6/tracing-allow-creating-instances-with-specified-syst.patch b/queue-6.6/tracing-allow-creating-instances-with-specified-syst.patch new file mode 100644 index 0000000000..49c7ef0d32 --- /dev/null +++ b/queue-6.6/tracing-allow-creating-instances-with-specified-syst.patch @@ -0,0 +1,296 @@ +From 2c72520341d2505b952e8ec7f48d12a81778c5c6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Dec 2023 09:37:01 -0500 +Subject: tracing: Allow creating instances with specified system events + +From: Steven Rostedt (Google) + +[ Upstream commit d23569979ca1cd139a42c410e0c7b9e6014c3b3a ] + +A trace instance may only need to enable specific events. As the eventfs +directory of an instance currently creates all events which adds overhead, +allow internal instances to be created with just the events in systems +that they care about. This currently only deals with systems and not +individual events, but this should bring down the overhead of creating +instances for specific use cases quite bit. + +The trace_array_get_by_name() now has another parameter "systems". This +parameter is a const string pointer of a comma/space separated list of +event systems that should be created by the trace_array. (Note if the +trace_array already exists, this parameter is ignored). + +The list of systems is saved and if a module is loaded, its events will +not be added unless the system for those events also match the systems +string. + +Link: https://lore.kernel.org/linux-trace-kernel/20231213093701.03fddec0@gandalf.local.home + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Sean Paul +Cc: Arun Easi +Cc: Daniel Wagner +Tested-by: Dmytro Maluka +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 0b4ffbe4888a ("tracing: Correct the refcount if the hist/hist_debug file fails to open") +Signed-off-by: Sasha Levin +--- + drivers/scsi/qla2xxx/qla_os.c | 2 +- + include/linux/trace.h | 4 +-- + kernel/trace/trace.c | 23 +++++++++++--- + kernel/trace/trace.h | 1 + + kernel/trace/trace_boot.c | 2 +- + kernel/trace/trace_events.c | 48 +++++++++++++++++++++++++++-- + samples/ftrace/sample-trace-array.c | 2 +- + 7 files changed, 70 insertions(+), 12 deletions(-) + +diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c +index 91d12198cc6c8..0a3a5af67f0ae 100644 +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -2883,7 +2883,7 @@ static void qla2x00_iocb_work_fn(struct work_struct *work) + static void + qla_trace_init(void) + { +- qla_trc_array = trace_array_get_by_name("qla2xxx"); ++ qla_trc_array = trace_array_get_by_name("qla2xxx", NULL); + if (!qla_trc_array) { + ql_log(ql_log_fatal, NULL, 0x0001, + "Unable to create qla2xxx trace instance, instance logging will be disabled.\n"); +diff --git a/include/linux/trace.h b/include/linux/trace.h +index 2a70a447184c9..fdcd76b7be83d 100644 +--- a/include/linux/trace.h ++++ b/include/linux/trace.h +@@ -51,7 +51,7 @@ int trace_array_printk(struct trace_array *tr, unsigned long ip, + const char *fmt, ...); + int trace_array_init_printk(struct trace_array *tr); + void trace_array_put(struct trace_array *tr); +-struct trace_array *trace_array_get_by_name(const char *name); ++struct trace_array *trace_array_get_by_name(const char *name, const char *systems); + int trace_array_destroy(struct trace_array *tr); + + /* For osnoise tracer */ +@@ -84,7 +84,7 @@ static inline int trace_array_init_printk(struct trace_array *tr) + static inline void trace_array_put(struct trace_array *tr) + { + } +-static inline struct trace_array *trace_array_get_by_name(const char *name) ++static inline struct trace_array *trace_array_get_by_name(const char *name, const char *systems) + { + return NULL; + } +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 9d9af60b238e2..a41c99350a5bf 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -9417,7 +9417,8 @@ static int trace_array_create_dir(struct trace_array *tr) + return ret; + } + +-static struct trace_array *trace_array_create(const char *name) ++static struct trace_array * ++trace_array_create_systems(const char *name, const char *systems) + { + struct trace_array *tr; + int ret; +@@ -9437,6 +9438,12 @@ static struct trace_array *trace_array_create(const char *name) + if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) + goto out_free_tr; + ++ if (systems) { ++ tr->system_names = kstrdup_const(systems, GFP_KERNEL); ++ if (!tr->system_names) ++ goto out_free_tr; ++ } ++ + tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; + + cpumask_copy(tr->tracing_cpumask, cpu_all_mask); +@@ -9480,12 +9487,18 @@ static struct trace_array *trace_array_create(const char *name) + free_trace_buffers(tr); + free_cpumask_var(tr->pipe_cpumask); + free_cpumask_var(tr->tracing_cpumask); ++ kfree_const(tr->system_names); + kfree(tr->name); + kfree(tr); + + return ERR_PTR(ret); + } + ++static struct trace_array *trace_array_create(const char *name) ++{ ++ return trace_array_create_systems(name, NULL); ++} ++ + static int instance_mkdir(const char *name) + { + struct trace_array *tr; +@@ -9511,6 +9524,7 @@ static int instance_mkdir(const char *name) + /** + * trace_array_get_by_name - Create/Lookup a trace array, given its name. + * @name: The name of the trace array to be looked up/created. ++ * @systems: A list of systems to create event directories for (NULL for all) + * + * Returns pointer to trace array with given name. + * NULL, if it cannot be created. +@@ -9524,7 +9538,7 @@ static int instance_mkdir(const char *name) + * trace_array_put() is called, user space can not delete it. + * + */ +-struct trace_array *trace_array_get_by_name(const char *name) ++struct trace_array *trace_array_get_by_name(const char *name, const char *systems) + { + struct trace_array *tr; + +@@ -9536,7 +9550,7 @@ struct trace_array *trace_array_get_by_name(const char *name) + goto out_unlock; + } + +- tr = trace_array_create(name); ++ tr = trace_array_create_systems(name, systems); + + if (IS_ERR(tr)) + tr = NULL; +@@ -9583,6 +9597,7 @@ static int __remove_instance(struct trace_array *tr) + + free_cpumask_var(tr->pipe_cpumask); + free_cpumask_var(tr->tracing_cpumask); ++ kfree_const(tr->system_names); + kfree(tr->name); + kfree(tr); + +@@ -10301,7 +10316,7 @@ __init static void enable_instances(void) + if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) + do_allocate_snapshot(tok); + +- tr = trace_array_get_by_name(tok); ++ tr = trace_array_get_by_name(tok, NULL); + if (!tr) { + pr_warn("Failed to create instance buffer %s\n", curr_str); + continue; +diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h +index e45756f1ac2b1..db0d2641125e7 100644 +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -377,6 +377,7 @@ struct trace_array { + unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE]; + unsigned int flags; + raw_spinlock_t start_lock; ++ const char *system_names; + struct list_head err_log; + struct dentry *dir; + struct dentry *options; +diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c +index 7ccc7a8e155b9..dbe29b4c6a7a0 100644 +--- a/kernel/trace/trace_boot.c ++++ b/kernel/trace/trace_boot.c +@@ -633,7 +633,7 @@ trace_boot_init_instances(struct xbc_node *node) + if (!p || *p == '\0') + continue; + +- tr = trace_array_get_by_name(p); ++ tr = trace_array_get_by_name(p, NULL); + if (!tr) { + pr_err("Failed to get trace instance %s\n", p); + continue; +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index 9d22745cdea5a..15041912c277d 100644 +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -3056,6 +3056,27 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) + up_write(&trace_event_sem); + } + ++static bool event_in_systems(struct trace_event_call *call, ++ const char *systems) ++{ ++ const char *system; ++ const char *p; ++ ++ if (!systems) ++ return true; ++ ++ system = call->class->system; ++ p = strstr(systems, system); ++ if (!p) ++ return false; ++ ++ if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',') ++ return false; ++ ++ p += strlen(system); ++ return !*p || isspace(*p) || *p == ','; ++} ++ + static struct trace_event_file * + trace_create_new_event(struct trace_event_call *call, + struct trace_array *tr) +@@ -3065,9 +3086,12 @@ trace_create_new_event(struct trace_event_call *call, + struct trace_event_file *file; + unsigned int first; + ++ if (!event_in_systems(call, tr->system_names)) ++ return NULL; ++ + file = kmem_cache_alloc(file_cachep, GFP_TRACE); + if (!file) +- return NULL; ++ return ERR_PTR(-ENOMEM); + + pid_list = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); +@@ -3132,8 +3156,17 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) + struct trace_event_file *file; + + file = trace_create_new_event(call, tr); ++ /* ++ * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed ++ * allocation, or NULL if the event is not part of the tr->system_names. ++ * When the event is not part of the tr->system_names, return zero, not ++ * an error. ++ */ + if (!file) +- return -ENOMEM; ++ return 0; ++ ++ if (IS_ERR(file)) ++ return PTR_ERR(file); + + if (eventdir_initialized) + return event_create_dir(tr->event_dir, file); +@@ -3172,8 +3205,17 @@ __trace_early_add_new_event(struct trace_event_call *call, + int ret; + + file = trace_create_new_event(call, tr); ++ /* ++ * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed ++ * allocation, or NULL if the event is not part of the tr->system_names. ++ * When the event is not part of the tr->system_names, return zero, not ++ * an error. ++ */ + if (!file) +- return -ENOMEM; ++ return 0; ++ ++ if (IS_ERR(file)) ++ return PTR_ERR(file); + + ret = event_define_fields(call); + if (ret) +diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c +index 6aba02a31c96c..d0ee9001c7b37 100644 +--- a/samples/ftrace/sample-trace-array.c ++++ b/samples/ftrace/sample-trace-array.c +@@ -105,7 +105,7 @@ static int __init sample_trace_array_init(void) + * NOTE: This function increments the reference counter + * associated with the trace array - "tr". + */ +- tr = trace_array_get_by_name("sample-instance"); ++ tr = trace_array_get_by_name("sample-instance", "sched,timer,kprobes"); + + if (!tr) + return -1; +-- +2.39.5 + diff --git a/queue-6.6/tracing-correct-the-refcount-if-the-hist-hist_debug-.patch b/queue-6.6/tracing-correct-the-refcount-if-the-hist-hist_debug-.patch new file mode 100644 index 0000000000..b6a89e10c4 --- /dev/null +++ b/queue-6.6/tracing-correct-the-refcount-if-the-hist-hist_debug-.patch @@ -0,0 +1,92 @@ +From 40ff4fd352472b39d5987f49c31767e7f9ea8c54 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Mar 2025 06:53:35 +0000 +Subject: tracing: Correct the refcount if the hist/hist_debug file fails to + open + +From: Tengda Wu + +[ Upstream commit 0b4ffbe4888a2c71185eaf5c1a02dd3586a9bc04 ] + +The function event_{hist,hist_debug}_open() maintains the refcount of +'file->tr' and 'file' through tracing_open_file_tr(). However, it does +not roll back these counts on subsequent failure paths, resulting in a +refcount leak. + +A very obvious case is that if the hist/hist_debug file belongs to a +specific instance, the refcount leak will prevent the deletion of that +instance, as it relies on the condition 'tr->ref == 1' within +__remove_instance(). + +Fix this by calling tracing_release_file_tr() on all failure paths in +event_{hist,hist_debug}_open() to correct the refcount. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Cc: Zheng Yejian +Link: https://lore.kernel.org/20250314065335.1202817-1-wutengda@huaweicloud.com +Fixes: 1cc111b9cddc ("tracing: Fix uaf issue when open the hist or hist_debug file") +Signed-off-by: Tengda Wu +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_events_hist.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c +index 08cc6405b8837..e6f9cbc622c75 100644 +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -5700,12 +5700,16 @@ static int event_hist_open(struct inode *inode, struct file *file) + guard(mutex)(&event_mutex); + + event_file = event_file_data(file); +- if (!event_file) +- return -ENODEV; ++ if (!event_file) { ++ ret = -ENODEV; ++ goto err; ++ } + + hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL); +- if (!hist_file) +- return -ENOMEM; ++ if (!hist_file) { ++ ret = -ENOMEM; ++ goto err; ++ } + + hist_file->file = file; + hist_file->last_act = get_hist_hit_count(event_file); +@@ -5713,9 +5717,14 @@ static int event_hist_open(struct inode *inode, struct file *file) + /* Clear private_data to avoid warning in single_open() */ + file->private_data = NULL; + ret = single_open(file, hist_show, hist_file); +- if (ret) ++ if (ret) { + kfree(hist_file); ++ goto err; ++ } + ++ return 0; ++err: ++ tracing_release_file_tr(inode, file); + return ret; + } + +@@ -5990,7 +5999,10 @@ static int event_hist_debug_open(struct inode *inode, struct file *file) + + /* Clear private_data to avoid warning in single_open() */ + file->private_data = NULL; +- return single_open(file, hist_debug_show, file); ++ ret = single_open(file, hist_debug_show, file); ++ if (ret) ++ tracing_release_file_tr(inode, file); ++ return ret; + } + + const struct file_operations event_hist_debug_fops = { +-- +2.39.5 + diff --git a/queue-6.6/tracing-hist-add-poll-pollin-support-on-hist-file.patch b/queue-6.6/tracing-hist-add-poll-pollin-support-on-hist-file.patch new file mode 100644 index 0000000000..3f5a2c585e --- /dev/null +++ b/queue-6.6/tracing-hist-add-poll-pollin-support-on-hist-file.patch @@ -0,0 +1,216 @@ +From bf44cefa0aea8adae9af1dad9b1396b0dc20a0a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 27 Dec 2024 13:07:57 +0900 +Subject: tracing/hist: Add poll(POLLIN) support on hist file + +From: Masami Hiramatsu (Google) + +[ Upstream commit 1bd13edbbed6e7e396f1aab92b224a4775218e68 ] + +Add poll syscall support on the `hist` file. The Waiter will be waken +up when the histogram is updated with POLLIN. + +Currently, there is no way to wait for a specific event in userspace. +So user needs to peek the `trace` periodicaly, or wait on `trace_pipe`. +But it is not a good idea to peek at the `trace` for an event that +randomly happens. And `trace_pipe` is not coming back until a page is +filled with events. + +This allows a user to wait for a specific event on the `hist` file. User +can set a histogram trigger on the event which they want to monitor +and poll() on its `hist` file. Since this poll() returns POLLIN, the next +poll() will return soon unless a read() happens on that hist file. + +NOTE: To read the hist file again, you must set the file offset to 0, +but just for monitoring the event, you may not need to read the +histogram. + +Cc: Shuah Khan +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/173527247756.464571.14236296701625509931.stgit@devnote2 +Signed-off-by: Masami Hiramatsu (Google) +Reviewed-by: Tom Zanussi +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 0b4ffbe4888a ("tracing: Correct the refcount if the hist/hist_debug file fails to open") +Signed-off-by: Sasha Levin +--- + include/linux/trace_events.h | 14 +++++++ + kernel/trace/trace_events.c | 14 +++++++ + kernel/trace/trace_events_hist.c | 70 ++++++++++++++++++++++++++++++-- + 3 files changed, 95 insertions(+), 3 deletions(-) + +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index aa1bc41726620..fe95d13c5e4d8 100644 +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -683,6 +683,20 @@ struct trace_event_file { + atomic_t tm_ref; /* trigger-mode reference counter */ + }; + ++#ifdef CONFIG_HIST_TRIGGERS ++extern struct irq_work hist_poll_work; ++extern wait_queue_head_t hist_poll_wq; ++ ++static inline void hist_poll_wakeup(void) ++{ ++ if (wq_has_sleeper(&hist_poll_wq)) ++ irq_work_queue(&hist_poll_work); ++} ++ ++#define hist_poll_wait(file, wait) \ ++ poll_wait(file, &hist_poll_wq, wait) ++#endif ++ + #define __TRACE_EVENT_FLAGS(name, value) \ + static int __init trace_init_flags_##name(void) \ + { \ +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index 15041912c277d..562efd6685726 100644 +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -3077,6 +3077,20 @@ static bool event_in_systems(struct trace_event_call *call, + return !*p || isspace(*p) || *p == ','; + } + ++#ifdef CONFIG_HIST_TRIGGERS ++/* ++ * Wake up waiter on the hist_poll_wq from irq_work because the hist trigger ++ * may happen in any context. ++ */ ++static void hist_poll_event_irq_work(struct irq_work *work) ++{ ++ wake_up_all(&hist_poll_wq); ++} ++ ++DEFINE_IRQ_WORK(hist_poll_work, hist_poll_event_irq_work); ++DECLARE_WAIT_QUEUE_HEAD(hist_poll_wq); ++#endif ++ + static struct trace_event_file * + trace_create_new_event(struct trace_event_call *call, + struct trace_array *tr) +diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c +index 755db2451fb2d..49b7811dec9f8 100644 +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -5322,6 +5322,8 @@ static void event_hist_trigger(struct event_trigger_data *data, + + if (resolve_var_refs(hist_data, key, var_ref_vals, true)) + hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals); ++ ++ hist_poll_wakeup(); + } + + static void hist_trigger_stacktrace_print(struct seq_file *m, +@@ -5601,15 +5603,36 @@ static void hist_trigger_show(struct seq_file *m, + n_entries, (u64)atomic64_read(&hist_data->map->drops)); + } + ++struct hist_file_data { ++ struct file *file; ++ u64 last_read; ++}; ++ ++static u64 get_hist_hit_count(struct trace_event_file *event_file) ++{ ++ struct hist_trigger_data *hist_data; ++ struct event_trigger_data *data; ++ u64 ret = 0; ++ ++ list_for_each_entry(data, &event_file->triggers, list) { ++ if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) { ++ hist_data = data->private_data; ++ ret += atomic64_read(&hist_data->map->hits); ++ } ++ } ++ return ret; ++} ++ + static int hist_show(struct seq_file *m, void *v) + { ++ struct hist_file_data *hist_file = m->private; + struct event_trigger_data *data; + struct trace_event_file *event_file; + int n = 0; + + guard(mutex)(&event_mutex); + +- event_file = event_file_file(m->private); ++ event_file = event_file_file(hist_file->file); + if (unlikely(!event_file)) + return -ENODEV; + +@@ -5617,27 +5640,68 @@ static int hist_show(struct seq_file *m, void *v) + if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) + hist_trigger_show(m, data, n++); + } ++ hist_file->last_read = get_hist_hit_count(event_file); ++ + return 0; + } + ++static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wait) ++{ ++ struct trace_event_file *event_file; ++ struct seq_file *m = file->private_data; ++ struct hist_file_data *hist_file = m->private; ++ ++ guard(mutex)(&event_mutex); ++ ++ event_file = event_file_data(file); ++ if (!event_file) ++ return EPOLLERR; ++ ++ hist_poll_wait(file, wait); ++ ++ if (hist_file->last_read != get_hist_hit_count(event_file)) ++ return EPOLLIN | EPOLLRDNORM; ++ ++ return 0; ++} ++ ++static int event_hist_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *m = file->private_data; ++ struct hist_file_data *hist_file = m->private; ++ ++ kfree(hist_file); ++ return tracing_single_release_file_tr(inode, file); ++} ++ + static int event_hist_open(struct inode *inode, struct file *file) + { ++ struct hist_file_data *hist_file; + int ret; + + ret = tracing_open_file_tr(inode, file); + if (ret) + return ret; + ++ hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL); ++ if (!hist_file) ++ return -ENOMEM; ++ hist_file->file = file; ++ + /* Clear private_data to avoid warning in single_open() */ + file->private_data = NULL; +- return single_open(file, hist_show, file); ++ ret = single_open(file, hist_show, hist_file); ++ if (ret) ++ kfree(hist_file); ++ return ret; + } + + const struct file_operations event_hist_fops = { + .open = event_hist_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = tracing_single_release_file_tr, ++ .release = event_hist_release, ++ .poll = event_hist_poll, + }; + + #ifdef CONFIG_HIST_TRIGGERS_DEBUG +-- +2.39.5 + diff --git a/queue-6.6/tracing-hist-support-pollpri-event-for-poll-on-histo.patch b/queue-6.6/tracing-hist-support-pollpri-event-for-poll-on-histo.patch new file mode 100644 index 0000000000..ea084ff038 --- /dev/null +++ b/queue-6.6/tracing-hist-support-pollpri-event-for-poll-on-histo.patch @@ -0,0 +1,119 @@ +From d5bd37a7fed7a40737957cb6ce474ba2c4cc96d4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 27 Dec 2024 13:08:07 +0900 +Subject: tracing/hist: Support POLLPRI event for poll on histogram + +From: Masami Hiramatsu (Google) + +[ Upstream commit 66fc6f521a0b91051ce6968a216a30bc52267bf8 ] + +Since POLLIN will not be flushed until the hist file is read, the user +needs to repeatedly read() and poll() on the hist file for monitoring the +event continuously. But the read() is somewhat redundant when the user is +only monitoring for event updates. + +Add POLLPRI poll event on the hist file so the event returns when a +histogram is updated after open(), poll() or read(). Thus it is possible +to wait for the next event without having to issue a read(). + +Cc: Shuah Khan +Cc: Mathieu Desnoyers +Link: https://lore.kernel.org/173527248770.464571.2536902137325258133.stgit@devnote2 +Signed-off-by: Masami Hiramatsu (Google) +Reviewed-by: Tom Zanussi +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 0b4ffbe4888a ("tracing: Correct the refcount if the hist/hist_debug file fails to open") +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_events_hist.c | 29 ++++++++++++++++++++++++++--- + 1 file changed, 26 insertions(+), 3 deletions(-) + +diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c +index 49b7811dec9f8..08cc6405b8837 100644 +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -5606,6 +5606,7 @@ static void hist_trigger_show(struct seq_file *m, + struct hist_file_data { + struct file *file; + u64 last_read; ++ u64 last_act; + }; + + static u64 get_hist_hit_count(struct trace_event_file *event_file) +@@ -5641,6 +5642,11 @@ static int hist_show(struct seq_file *m, void *v) + hist_trigger_show(m, data, n++); + } + hist_file->last_read = get_hist_hit_count(event_file); ++ /* ++ * Update last_act too so that poll()/POLLPRI can wait for the next ++ * event after any syscall on hist file. ++ */ ++ hist_file->last_act = hist_file->last_read; + + return 0; + } +@@ -5650,6 +5656,8 @@ static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wai + struct trace_event_file *event_file; + struct seq_file *m = file->private_data; + struct hist_file_data *hist_file = m->private; ++ __poll_t ret = 0; ++ u64 cnt; + + guard(mutex)(&event_mutex); + +@@ -5659,10 +5667,15 @@ static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wai + + hist_poll_wait(file, wait); + +- if (hist_file->last_read != get_hist_hit_count(event_file)) +- return EPOLLIN | EPOLLRDNORM; ++ cnt = get_hist_hit_count(event_file); ++ if (hist_file->last_read != cnt) ++ ret |= EPOLLIN | EPOLLRDNORM; ++ if (hist_file->last_act != cnt) { ++ hist_file->last_act = cnt; ++ ret |= EPOLLPRI; ++ } + +- return 0; ++ return ret; + } + + static int event_hist_release(struct inode *inode, struct file *file) +@@ -5676,6 +5689,7 @@ static int event_hist_release(struct inode *inode, struct file *file) + + static int event_hist_open(struct inode *inode, struct file *file) + { ++ struct trace_event_file *event_file; + struct hist_file_data *hist_file; + int ret; + +@@ -5683,16 +5697,25 @@ static int event_hist_open(struct inode *inode, struct file *file) + if (ret) + return ret; + ++ guard(mutex)(&event_mutex); ++ ++ event_file = event_file_data(file); ++ if (!event_file) ++ return -ENODEV; ++ + hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL); + if (!hist_file) + return -ENOMEM; ++ + hist_file->file = file; ++ hist_file->last_act = get_hist_hit_count(event_file); + + /* Clear private_data to avoid warning in single_open() */ + file->private_data = NULL; + ret = single_open(file, hist_show, hist_file); + if (ret) + kfree(hist_file); ++ + return ret; + } + +-- +2.39.5 + diff --git a/queue-6.6/tracing-switch-trace_events_hist.c-code-over-to-use-.patch b/queue-6.6/tracing-switch-trace_events_hist.c-code-over-to-use-.patch new file mode 100644 index 0000000000..9184bfb0ec --- /dev/null +++ b/queue-6.6/tracing-switch-trace_events_hist.c-code-over-to-use-.patch @@ -0,0 +1,99 @@ +From ee360b3c21c20d4012033fbdbcd63d74c5cbfb01 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 19 Dec 2024 15:12:05 -0500 +Subject: tracing: Switch trace_events_hist.c code over to use guard() + +From: Steven Rostedt + +[ Upstream commit 2b36a97aeeb71b1e4a48bfedc7f21f44aeb1e6fb ] + +There are a couple functions in trace_events_hist.c that have "goto out" or +equivalent on error in order to release locks that were taken. This can be +error prone or just simply make the code more complex. + +Switch every location that ends with unlocking a mutex on error over to +using the guard(mutex)() infrastructure to let the compiler worry about +releasing locks. This makes the code easier to read and understand. + +Cc: Masami Hiramatsu +Cc: Mark Rutland +Cc: Mathieu Desnoyers +Cc: Andrew Morton +Cc: Peter Zijlstra +Link: https://lore.kernel.org/20241219201345.694601480@goodmis.org +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 0b4ffbe4888a ("tracing: Correct the refcount if the hist/hist_debug file fails to open") +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_events_hist.c | 32 ++++++++++---------------------- + 1 file changed, 10 insertions(+), 22 deletions(-) + +diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c +index 604d63380a90b..755db2451fb2d 100644 +--- a/kernel/trace/trace_events_hist.c ++++ b/kernel/trace/trace_events_hist.c +@@ -5605,25 +5605,19 @@ static int hist_show(struct seq_file *m, void *v) + { + struct event_trigger_data *data; + struct trace_event_file *event_file; +- int n = 0, ret = 0; ++ int n = 0; + +- mutex_lock(&event_mutex); ++ guard(mutex)(&event_mutex); + + event_file = event_file_file(m->private); +- if (unlikely(!event_file)) { +- ret = -ENODEV; +- goto out_unlock; +- } ++ if (unlikely(!event_file)) ++ return -ENODEV; + + list_for_each_entry(data, &event_file->triggers, list) { + if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) + hist_trigger_show(m, data, n++); + } +- +- out_unlock: +- mutex_unlock(&event_mutex); +- +- return ret; ++ return 0; + } + + static int event_hist_open(struct inode *inode, struct file *file) +@@ -5884,25 +5878,19 @@ static int hist_debug_show(struct seq_file *m, void *v) + { + struct event_trigger_data *data; + struct trace_event_file *event_file; +- int n = 0, ret = 0; ++ int n = 0; + +- mutex_lock(&event_mutex); ++ guard(mutex)(&event_mutex); + + event_file = event_file_file(m->private); +- if (unlikely(!event_file)) { +- ret = -ENODEV; +- goto out_unlock; +- } ++ if (unlikely(!event_file)) ++ return -ENODEV; + + list_for_each_entry(data, &event_file->triggers, list) { + if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) + hist_trigger_debug_show(m, data, n++); + } +- +- out_unlock: +- mutex_unlock(&event_mutex); +- +- return ret; ++ return 0; + } + + static int event_hist_debug_open(struct inode *inode, struct file *file) +-- +2.39.5 +