]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.6
authorSasha Levin <sashal@kernel.org>
Tue, 8 Apr 2025 00:42:38 +0000 (20:42 -0400)
committerSasha Levin <sashal@kernel.org>
Tue, 8 Apr 2025 00:42:38 +0000 (20:42 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-6.6/drm-amdgpu-gfx11-fix-num_mec.patch [new file with mode: 0644]
queue-6.6/perf-core-fix-child_total_time_enabled-accounting-bu.patch [new file with mode: 0644]
queue-6.6/series
queue-6.6/tracing-allow-creating-instances-with-specified-syst.patch [new file with mode: 0644]
queue-6.6/tracing-correct-the-refcount-if-the-hist-hist_debug-.patch [new file with mode: 0644]
queue-6.6/tracing-hist-add-poll-pollin-support-on-hist-file.patch [new file with mode: 0644]
queue-6.6/tracing-hist-support-pollpri-event-for-poll-on-histo.patch [new file with mode: 0644]
queue-6.6/tracing-switch-trace_events_hist.c-code-over-to-use-.patch [new file with mode: 0644]

diff --git a/queue-6.6/drm-amdgpu-gfx11-fix-num_mec.patch b/queue-6.6/drm-amdgpu-gfx11-fix-num_mec.patch
new file mode 100644 (file)
index 0000000..3be4d69
--- /dev/null
@@ -0,0 +1,35 @@
+From 51f68674ad424849d672d392f06a9e00343f2fb8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Mar 2025 09:35:02 -0400
+Subject: drm/amdgpu/gfx11: fix num_mec
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit 4161050d47e1b083a7e1b0b875c9907e1a6f1f1f ]
+
+GC11 only has 1 mec.
+
+Fixes: 3d879e81f0f9 ("drm/amdgpu: add init support for GFX11 (v2)")
+Reviewed-by: Sunil Khatri <sunil.khatri@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+index 54ec9b32562c2..480d718d09cb6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+@@ -1318,7 +1318,7 @@ static int gfx_v11_0_sw_init(void *handle)
+               adev->gfx.me.num_me = 1;
+               adev->gfx.me.num_pipe_per_me = 1;
+               adev->gfx.me.num_queue_per_pipe = 1;
+-              adev->gfx.mec.num_mec = 2;
++              adev->gfx.mec.num_mec = 1;
+               adev->gfx.mec.num_pipe_per_mec = 4;
+               adev->gfx.mec.num_queue_per_pipe = 4;
+               break;
+-- 
+2.39.5
+
diff --git a/queue-6.6/perf-core-fix-child_total_time_enabled-accounting-bu.patch b/queue-6.6/perf-core-fix-child_total_time_enabled-accounting-bu.patch
new file mode 100644 (file)
index 0000000..7ae4fcb
--- /dev/null
@@ -0,0 +1,143 @@
+From dc733cc133ee7a2d1d5487e7b108a309667252e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Mar 2025 08:20:03 +0000
+Subject: perf/core: Fix child_total_time_enabled accounting bug at task exit
+
+From: Yeoreum Yun <yeoreum.yun@arm.com>
+
+[ Upstream commit a3c3c66670cee11eb13aa43905904bf29cb92d32 ]
+
+The perf events code fails to account for total_time_enabled of
+inactive events.
+
+Here is a failure case for accounting total_time_enabled for
+CPU PMU events:
+
+  sudo ./perf stat -vvv -e armv8_pmuv3_0/event=0x08/ -e armv8_pmuv3_1/event=0x08/ -- stress-ng --pthread=2 -t 2s
+  ...
+
+  armv8_pmuv3_0/event=0x08/: 1138698008 2289429840 2174835740
+  armv8_pmuv3_1/event=0x08/: 1826791390 1950025700 847648440
+                             `          `          `
+                             `          `          > total_time_running with child
+                             `          > total_time_enabled with child
+                             > count with child
+
+  Performance counter stats for 'stress-ng --pthread=2 -t 2s':
+
+       1,138,698,008      armv8_pmuv3_0/event=0x08/                                               (94.99%)
+       1,826,791,390      armv8_pmuv3_1/event=0x08/                                               (43.47%)
+
+The two events above are opened on two different CPU PMUs, for example,
+each event is opened for a cluster in an Arm big.LITTLE system, they
+will never run on the same CPU.  In theory, the total enabled time should
+be same for both events, as two events are opened and closed together.
+
+As the result show, the two events' total enabled time including
+child event is different (2289429840 vs 1950025700).
+
+This is because child events are not accounted properly
+if a event is INACTIVE state when the task exits:
+
+  perf_event_exit_event()
+   `> perf_remove_from_context()
+     `> __perf_remove_from_context()
+       `> perf_child_detach()   -> Accumulate child_total_time_enabled
+         `> list_del_event()    -> Update child event's time
+
+The problem is the time accumulation happens prior to child event's
+time updating. Thus, it misses to account the last period's time when
+the event exits.
+
+The perf core layer follows the rule that timekeeping is tied to state
+change. To address the issue, make __perf_remove_from_context()
+handle the task exit case by passing 'DETACH_EXIT' to it and
+invoke perf_event_state() for state alongside with accounting the time.
+
+Then, perf_child_detach() populates the time into the parent's time metrics.
+
+After this patch, the bug is fixed:
+
+  sudo ./perf stat -vvv -e armv8_pmuv3_0/event=0x08/ -e armv8_pmuv3_1/event=0x08/ -- stress-ng --pthread=2 -t 10s
+  ...
+  armv8_pmuv3_0/event=0x08/: 15396770398 32157963940 21898169000
+  armv8_pmuv3_1/event=0x08/: 22428964974 32157963940 10259794940
+
+   Performance counter stats for 'stress-ng --pthread=2 -t 10s':
+
+      15,396,770,398      armv8_pmuv3_0/event=0x08/                                               (68.10%)
+      22,428,964,974      armv8_pmuv3_1/event=0x08/                                               (31.90%)
+
+[ mingo: Clarified the changelog. ]
+
+Fixes: ef54c1a476aef ("perf: Rework perf_event_exit_event()")
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Leo Yan <leo.yan@arm.com>
+Link: https://lore.kernel.org/r/20250326082003.1630986-1-yeoreum.yun@arm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index a524329149a71..b710976fb01b1 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -2333,6 +2333,7 @@ group_sched_out(struct perf_event *group_event, struct perf_event_context *ctx)
+ #define DETACH_GROUP  0x01UL
+ #define DETACH_CHILD  0x02UL
+ #define DETACH_DEAD   0x04UL
++#define DETACH_EXIT   0x08UL
+ /*
+  * Cross CPU call to remove a performance event
+@@ -2347,6 +2348,7 @@ __perf_remove_from_context(struct perf_event *event,
+                          void *info)
+ {
+       struct perf_event_pmu_context *pmu_ctx = event->pmu_ctx;
++      enum perf_event_state state = PERF_EVENT_STATE_OFF;
+       unsigned long flags = (unsigned long)info;
+       if (ctx->is_active & EVENT_TIME) {
+@@ -2358,16 +2360,19 @@ __perf_remove_from_context(struct perf_event *event,
+        * Ensure event_sched_out() switches to OFF, at the very least
+        * this avoids raising perf_pending_task() at this time.
+        */
+-      if (flags & DETACH_DEAD)
++      if (flags & DETACH_EXIT)
++              state = PERF_EVENT_STATE_EXIT;
++      if (flags & DETACH_DEAD) {
+               event->pending_disable = 1;
++              state = PERF_EVENT_STATE_DEAD;
++      }
+       event_sched_out(event, ctx);
++      perf_event_set_state(event, min(event->state, state));
+       if (flags & DETACH_GROUP)
+               perf_group_detach(event);
+       if (flags & DETACH_CHILD)
+               perf_child_detach(event);
+       list_del_event(event, ctx);
+-      if (flags & DETACH_DEAD)
+-              event->state = PERF_EVENT_STATE_DEAD;
+       if (!pmu_ctx->nr_events) {
+               pmu_ctx->rotate_necessary = 0;
+@@ -13140,12 +13145,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
+               mutex_lock(&parent_event->child_mutex);
+       }
+-      perf_remove_from_context(event, detach_flags);
+-
+-      raw_spin_lock_irq(&ctx->lock);
+-      if (event->state > PERF_EVENT_STATE_EXIT)
+-              perf_event_set_state(event, PERF_EVENT_STATE_EXIT);
+-      raw_spin_unlock_irq(&ctx->lock);
++      perf_remove_from_context(event, detach_flags | DETACH_EXIT);
+       /*
+        * Child events can be freed.
+-- 
+2.39.5
+
index 0aee58230c9f846f9761ab133d3db95027fcf1d5..a6bc43b2c9842ebbf796c57330b984a90cbf1a8f 100644 (file)
@@ -219,3 +219,10 @@ ipv6-start-path-selection-from-the-first-nexthop.patch
 ipv6-do-not-consider-link-down-nexthops-in-path-sele.patch
 arcnet-add-null-check-in-com20020pci_probe.patch
 net-ibmveth-make-veth_pool_store-stop-hanging.patch
+drm-amdgpu-gfx11-fix-num_mec.patch
+perf-core-fix-child_total_time_enabled-accounting-bu.patch
+tracing-allow-creating-instances-with-specified-syst.patch
+tracing-switch-trace_events_hist.c-code-over-to-use-.patch
+tracing-hist-add-poll-pollin-support-on-hist-file.patch
+tracing-hist-support-pollpri-event-for-poll-on-histo.patch
+tracing-correct-the-refcount-if-the-hist-hist_debug-.patch
diff --git a/queue-6.6/tracing-allow-creating-instances-with-specified-syst.patch b/queue-6.6/tracing-allow-creating-instances-with-specified-syst.patch
new file mode 100644 (file)
index 0000000..49c7ef0
--- /dev/null
@@ -0,0 +1,296 @@
+From 2c72520341d2505b952e8ec7f48d12a81778c5c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Dec 2023 09:37:01 -0500
+Subject: tracing: Allow creating instances with specified system events
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit d23569979ca1cd139a42c410e0c7b9e6014c3b3a ]
+
+A trace instance may only need to enable specific events. As the eventfs
+directory of an instance currently creates all events which adds overhead,
+allow internal instances to be created with just the events in systems
+that they care about. This currently only deals with systems and not
+individual events, but this should bring down the overhead of creating
+instances for specific use cases quite bit.
+
+The trace_array_get_by_name() now has another parameter "systems". This
+parameter is a const string pointer of a comma/space separated list of
+event systems that should be created by the trace_array. (Note if the
+trace_array already exists, this parameter is ignored).
+
+The list of systems is saved and if a module is loaded, its events will
+not be added unless the system for those events also match the systems
+string.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20231213093701.03fddec0@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Sean Paul <seanpaul@chromium.org>
+Cc: Arun Easi   <aeasi@marvell.com>
+Cc: Daniel Wagner <dwagner@suse.de>
+Tested-by: Dmytro Maluka <dmaluka@chromium.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 0b4ffbe4888a ("tracing: Correct the refcount if the hist/hist_debug file fails to open")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_os.c       |  2 +-
+ include/linux/trace.h               |  4 +--
+ kernel/trace/trace.c                | 23 +++++++++++---
+ kernel/trace/trace.h                |  1 +
+ kernel/trace/trace_boot.c           |  2 +-
+ kernel/trace/trace_events.c         | 48 +++++++++++++++++++++++++++--
+ samples/ftrace/sample-trace-array.c |  2 +-
+ 7 files changed, 70 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index 91d12198cc6c8..0a3a5af67f0ae 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -2883,7 +2883,7 @@ static void qla2x00_iocb_work_fn(struct work_struct *work)
+ static void
+ qla_trace_init(void)
+ {
+-      qla_trc_array = trace_array_get_by_name("qla2xxx");
++      qla_trc_array = trace_array_get_by_name("qla2xxx", NULL);
+       if (!qla_trc_array) {
+               ql_log(ql_log_fatal, NULL, 0x0001,
+                      "Unable to create qla2xxx trace instance, instance logging will be disabled.\n");
+diff --git a/include/linux/trace.h b/include/linux/trace.h
+index 2a70a447184c9..fdcd76b7be83d 100644
+--- a/include/linux/trace.h
++++ b/include/linux/trace.h
+@@ -51,7 +51,7 @@ int trace_array_printk(struct trace_array *tr, unsigned long ip,
+                      const char *fmt, ...);
+ int trace_array_init_printk(struct trace_array *tr);
+ void trace_array_put(struct trace_array *tr);
+-struct trace_array *trace_array_get_by_name(const char *name);
++struct trace_array *trace_array_get_by_name(const char *name, const char *systems);
+ int trace_array_destroy(struct trace_array *tr);
+ /* For osnoise tracer */
+@@ -84,7 +84,7 @@ static inline int trace_array_init_printk(struct trace_array *tr)
+ static inline void trace_array_put(struct trace_array *tr)
+ {
+ }
+-static inline struct trace_array *trace_array_get_by_name(const char *name)
++static inline struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
+ {
+       return NULL;
+ }
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 9d9af60b238e2..a41c99350a5bf 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -9417,7 +9417,8 @@ static int trace_array_create_dir(struct trace_array *tr)
+       return ret;
+ }
+-static struct trace_array *trace_array_create(const char *name)
++static struct trace_array *
++trace_array_create_systems(const char *name, const char *systems)
+ {
+       struct trace_array *tr;
+       int ret;
+@@ -9437,6 +9438,12 @@ static struct trace_array *trace_array_create(const char *name)
+       if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
+               goto out_free_tr;
++      if (systems) {
++              tr->system_names = kstrdup_const(systems, GFP_KERNEL);
++              if (!tr->system_names)
++                      goto out_free_tr;
++      }
++
+       tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
+       cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
+@@ -9480,12 +9487,18 @@ static struct trace_array *trace_array_create(const char *name)
+       free_trace_buffers(tr);
+       free_cpumask_var(tr->pipe_cpumask);
+       free_cpumask_var(tr->tracing_cpumask);
++      kfree_const(tr->system_names);
+       kfree(tr->name);
+       kfree(tr);
+       return ERR_PTR(ret);
+ }
++static struct trace_array *trace_array_create(const char *name)
++{
++      return trace_array_create_systems(name, NULL);
++}
++
+ static int instance_mkdir(const char *name)
+ {
+       struct trace_array *tr;
+@@ -9511,6 +9524,7 @@ static int instance_mkdir(const char *name)
+ /**
+  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
+  * @name: The name of the trace array to be looked up/created.
++ * @systems: A list of systems to create event directories for (NULL for all)
+  *
+  * Returns pointer to trace array with given name.
+  * NULL, if it cannot be created.
+@@ -9524,7 +9538,7 @@ static int instance_mkdir(const char *name)
+  * trace_array_put() is called, user space can not delete it.
+  *
+  */
+-struct trace_array *trace_array_get_by_name(const char *name)
++struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
+ {
+       struct trace_array *tr;
+@@ -9536,7 +9550,7 @@ struct trace_array *trace_array_get_by_name(const char *name)
+                       goto out_unlock;
+       }
+-      tr = trace_array_create(name);
++      tr = trace_array_create_systems(name, systems);
+       if (IS_ERR(tr))
+               tr = NULL;
+@@ -9583,6 +9597,7 @@ static int __remove_instance(struct trace_array *tr)
+       free_cpumask_var(tr->pipe_cpumask);
+       free_cpumask_var(tr->tracing_cpumask);
++      kfree_const(tr->system_names);
+       kfree(tr->name);
+       kfree(tr);
+@@ -10301,7 +10316,7 @@ __init static void enable_instances(void)
+               if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
+                       do_allocate_snapshot(tok);
+-              tr = trace_array_get_by_name(tok);
++              tr = trace_array_get_by_name(tok, NULL);
+               if (!tr) {
+                       pr_warn("Failed to create instance buffer %s\n", curr_str);
+                       continue;
+diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
+index e45756f1ac2b1..db0d2641125e7 100644
+--- a/kernel/trace/trace.h
++++ b/kernel/trace/trace.h
+@@ -377,6 +377,7 @@ struct trace_array {
+       unsigned char           trace_flags_index[TRACE_FLAGS_MAX_SIZE];
+       unsigned int            flags;
+       raw_spinlock_t          start_lock;
++      const char              *system_names;
+       struct list_head        err_log;
+       struct dentry           *dir;
+       struct dentry           *options;
+diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
+index 7ccc7a8e155b9..dbe29b4c6a7a0 100644
+--- a/kernel/trace/trace_boot.c
++++ b/kernel/trace/trace_boot.c
+@@ -633,7 +633,7 @@ trace_boot_init_instances(struct xbc_node *node)
+               if (!p || *p == '\0')
+                       continue;
+-              tr = trace_array_get_by_name(p);
++              tr = trace_array_get_by_name(p, NULL);
+               if (!tr) {
+                       pr_err("Failed to get trace instance %s\n", p);
+                       continue;
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 9d22745cdea5a..15041912c277d 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -3056,6 +3056,27 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
+       up_write(&trace_event_sem);
+ }
++static bool event_in_systems(struct trace_event_call *call,
++                           const char *systems)
++{
++      const char *system;
++      const char *p;
++
++      if (!systems)
++              return true;
++
++      system = call->class->system;
++      p = strstr(systems, system);
++      if (!p)
++              return false;
++
++      if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',')
++              return false;
++
++      p += strlen(system);
++      return !*p || isspace(*p) || *p == ',';
++}
++
+ static struct trace_event_file *
+ trace_create_new_event(struct trace_event_call *call,
+                      struct trace_array *tr)
+@@ -3065,9 +3086,12 @@ trace_create_new_event(struct trace_event_call *call,
+       struct trace_event_file *file;
+       unsigned int first;
++      if (!event_in_systems(call, tr->system_names))
++              return NULL;
++
+       file = kmem_cache_alloc(file_cachep, GFP_TRACE);
+       if (!file)
+-              return NULL;
++              return ERR_PTR(-ENOMEM);
+       pid_list = rcu_dereference_protected(tr->filtered_pids,
+                                            lockdep_is_held(&event_mutex));
+@@ -3132,8 +3156,17 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
+       struct trace_event_file *file;
+       file = trace_create_new_event(call, tr);
++      /*
++       * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed
++       * allocation, or NULL if the event is not part of the tr->system_names.
++       * When the event is not part of the tr->system_names, return zero, not
++       * an error.
++       */
+       if (!file)
+-              return -ENOMEM;
++              return 0;
++
++      if (IS_ERR(file))
++              return PTR_ERR(file);
+       if (eventdir_initialized)
+               return event_create_dir(tr->event_dir, file);
+@@ -3172,8 +3205,17 @@ __trace_early_add_new_event(struct trace_event_call *call,
+       int ret;
+       file = trace_create_new_event(call, tr);
++      /*
++       * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed
++       * allocation, or NULL if the event is not part of the tr->system_names.
++       * When the event is not part of the tr->system_names, return zero, not
++       * an error.
++       */
+       if (!file)
+-              return -ENOMEM;
++              return 0;
++
++      if (IS_ERR(file))
++              return PTR_ERR(file);
+       ret = event_define_fields(call);
+       if (ret)
+diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c
+index 6aba02a31c96c..d0ee9001c7b37 100644
+--- a/samples/ftrace/sample-trace-array.c
++++ b/samples/ftrace/sample-trace-array.c
+@@ -105,7 +105,7 @@ static int __init sample_trace_array_init(void)
+        * NOTE: This function increments the reference counter
+        * associated with the trace array - "tr".
+        */
+-      tr = trace_array_get_by_name("sample-instance");
++      tr = trace_array_get_by_name("sample-instance", "sched,timer,kprobes");
+       if (!tr)
+               return -1;
+-- 
+2.39.5
+
diff --git a/queue-6.6/tracing-correct-the-refcount-if-the-hist-hist_debug-.patch b/queue-6.6/tracing-correct-the-refcount-if-the-hist-hist_debug-.patch
new file mode 100644 (file)
index 0000000..b6a89e1
--- /dev/null
@@ -0,0 +1,92 @@
+From 40ff4fd352472b39d5987f49c31767e7f9ea8c54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Mar 2025 06:53:35 +0000
+Subject: tracing: Correct the refcount if the hist/hist_debug file fails to
+ open
+
+From: Tengda Wu <wutengda@huaweicloud.com>
+
+[ Upstream commit 0b4ffbe4888a2c71185eaf5c1a02dd3586a9bc04 ]
+
+The function event_{hist,hist_debug}_open() maintains the refcount of
+'file->tr' and 'file' through tracing_open_file_tr(). However, it does
+not roll back these counts on subsequent failure paths, resulting in a
+refcount leak.
+
+A very obvious case is that if the hist/hist_debug file belongs to a
+specific instance, the refcount leak will prevent the deletion of that
+instance, as it relies on the condition 'tr->ref == 1' within
+__remove_instance().
+
+Fix this by calling tracing_release_file_tr() on all failure paths in
+event_{hist,hist_debug}_open() to correct the refcount.
+
+Cc: stable@vger.kernel.org
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Zheng Yejian <zhengyejian1@huawei.com>
+Link: https://lore.kernel.org/20250314065335.1202817-1-wutengda@huaweicloud.com
+Fixes: 1cc111b9cddc ("tracing: Fix uaf issue when open the hist or hist_debug file")
+Signed-off-by: Tengda Wu <wutengda@huaweicloud.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_events_hist.c | 24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index 08cc6405b8837..e6f9cbc622c75 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -5700,12 +5700,16 @@ static int event_hist_open(struct inode *inode, struct file *file)
+       guard(mutex)(&event_mutex);
+       event_file = event_file_data(file);
+-      if (!event_file)
+-              return -ENODEV;
++      if (!event_file) {
++              ret = -ENODEV;
++              goto err;
++      }
+       hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL);
+-      if (!hist_file)
+-              return -ENOMEM;
++      if (!hist_file) {
++              ret = -ENOMEM;
++              goto err;
++      }
+       hist_file->file = file;
+       hist_file->last_act = get_hist_hit_count(event_file);
+@@ -5713,9 +5717,14 @@ static int event_hist_open(struct inode *inode, struct file *file)
+       /* Clear private_data to avoid warning in single_open() */
+       file->private_data = NULL;
+       ret = single_open(file, hist_show, hist_file);
+-      if (ret)
++      if (ret) {
+               kfree(hist_file);
++              goto err;
++      }
++      return 0;
++err:
++      tracing_release_file_tr(inode, file);
+       return ret;
+ }
+@@ -5990,7 +5999,10 @@ static int event_hist_debug_open(struct inode *inode, struct file *file)
+       /* Clear private_data to avoid warning in single_open() */
+       file->private_data = NULL;
+-      return single_open(file, hist_debug_show, file);
++      ret = single_open(file, hist_debug_show, file);
++      if (ret)
++              tracing_release_file_tr(inode, file);
++      return ret;
+ }
+ const struct file_operations event_hist_debug_fops = {
+-- 
+2.39.5
+
diff --git a/queue-6.6/tracing-hist-add-poll-pollin-support-on-hist-file.patch b/queue-6.6/tracing-hist-add-poll-pollin-support-on-hist-file.patch
new file mode 100644 (file)
index 0000000..3f5a2c5
--- /dev/null
@@ -0,0 +1,216 @@
+From bf44cefa0aea8adae9af1dad9b1396b0dc20a0a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Dec 2024 13:07:57 +0900
+Subject: tracing/hist: Add poll(POLLIN) support on hist file
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 1bd13edbbed6e7e396f1aab92b224a4775218e68 ]
+
+Add poll syscall support on the `hist` file. The Waiter will be waken
+up when the histogram is updated with POLLIN.
+
+Currently, there is no way to wait for a specific event in userspace.
+So user needs to peek the `trace` periodicaly, or wait on `trace_pipe`.
+But it is not a good idea to peek at the `trace` for an event that
+randomly happens. And `trace_pipe` is not coming back until a page is
+filled with events.
+
+This allows a user to wait for a specific event on the `hist` file. User
+can set a histogram trigger on the event which they want to monitor
+and poll() on its `hist` file. Since this poll() returns POLLIN, the next
+poll() will return soon unless a read() happens on that hist file.
+
+NOTE: To read the hist file again, you must set the file offset to 0,
+but just for monitoring the event, you may not need to read the
+histogram.
+
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Link: https://lore.kernel.org/173527247756.464571.14236296701625509931.stgit@devnote2
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reviewed-by: Tom Zanussi <zanussi@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 0b4ffbe4888a ("tracing: Correct the refcount if the hist/hist_debug file fails to open")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/trace_events.h     | 14 +++++++
+ kernel/trace/trace_events.c      | 14 +++++++
+ kernel/trace/trace_events_hist.c | 70 ++++++++++++++++++++++++++++++--
+ 3 files changed, 95 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
+index aa1bc41726620..fe95d13c5e4d8 100644
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -683,6 +683,20 @@ struct trace_event_file {
+       atomic_t                tm_ref; /* trigger-mode reference counter */
+ };
++#ifdef CONFIG_HIST_TRIGGERS
++extern struct irq_work hist_poll_work;
++extern wait_queue_head_t hist_poll_wq;
++
++static inline void hist_poll_wakeup(void)
++{
++      if (wq_has_sleeper(&hist_poll_wq))
++              irq_work_queue(&hist_poll_work);
++}
++
++#define hist_poll_wait(file, wait)    \
++      poll_wait(file, &hist_poll_wq, wait)
++#endif
++
+ #define __TRACE_EVENT_FLAGS(name, value)                              \
+       static int __init trace_init_flags_##name(void)                 \
+       {                                                               \
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 15041912c277d..562efd6685726 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -3077,6 +3077,20 @@ static bool event_in_systems(struct trace_event_call *call,
+       return !*p || isspace(*p) || *p == ',';
+ }
++#ifdef CONFIG_HIST_TRIGGERS
++/*
++ * Wake up waiter on the hist_poll_wq from irq_work because the hist trigger
++ * may happen in any context.
++ */
++static void hist_poll_event_irq_work(struct irq_work *work)
++{
++      wake_up_all(&hist_poll_wq);
++}
++
++DEFINE_IRQ_WORK(hist_poll_work, hist_poll_event_irq_work);
++DECLARE_WAIT_QUEUE_HEAD(hist_poll_wq);
++#endif
++
+ static struct trace_event_file *
+ trace_create_new_event(struct trace_event_call *call,
+                      struct trace_array *tr)
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index 755db2451fb2d..49b7811dec9f8 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -5322,6 +5322,8 @@ static void event_hist_trigger(struct event_trigger_data *data,
+       if (resolve_var_refs(hist_data, key, var_ref_vals, true))
+               hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals);
++
++      hist_poll_wakeup();
+ }
+ static void hist_trigger_stacktrace_print(struct seq_file *m,
+@@ -5601,15 +5603,36 @@ static void hist_trigger_show(struct seq_file *m,
+                  n_entries, (u64)atomic64_read(&hist_data->map->drops));
+ }
++struct hist_file_data {
++      struct file *file;
++      u64 last_read;
++};
++
++static u64 get_hist_hit_count(struct trace_event_file *event_file)
++{
++      struct hist_trigger_data *hist_data;
++      struct event_trigger_data *data;
++      u64 ret = 0;
++
++      list_for_each_entry(data, &event_file->triggers, list) {
++              if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) {
++                      hist_data = data->private_data;
++                      ret += atomic64_read(&hist_data->map->hits);
++              }
++      }
++      return ret;
++}
++
+ static int hist_show(struct seq_file *m, void *v)
+ {
++      struct hist_file_data *hist_file = m->private;
+       struct event_trigger_data *data;
+       struct trace_event_file *event_file;
+       int n = 0;
+       guard(mutex)(&event_mutex);
+-      event_file = event_file_file(m->private);
++      event_file = event_file_file(hist_file->file);
+       if (unlikely(!event_file))
+               return -ENODEV;
+@@ -5617,27 +5640,68 @@ static int hist_show(struct seq_file *m, void *v)
+               if (data->cmd_ops->trigger_type == ETT_EVENT_HIST)
+                       hist_trigger_show(m, data, n++);
+       }
++      hist_file->last_read = get_hist_hit_count(event_file);
++
+       return 0;
+ }
++static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wait)
++{
++      struct trace_event_file *event_file;
++      struct seq_file *m = file->private_data;
++      struct hist_file_data *hist_file = m->private;
++
++      guard(mutex)(&event_mutex);
++
++      event_file = event_file_data(file);
++      if (!event_file)
++              return EPOLLERR;
++
++      hist_poll_wait(file, wait);
++
++      if (hist_file->last_read != get_hist_hit_count(event_file))
++              return EPOLLIN | EPOLLRDNORM;
++
++      return 0;
++}
++
++static int event_hist_release(struct inode *inode, struct file *file)
++{
++      struct seq_file *m = file->private_data;
++      struct hist_file_data *hist_file = m->private;
++
++      kfree(hist_file);
++      return tracing_single_release_file_tr(inode, file);
++}
++
+ static int event_hist_open(struct inode *inode, struct file *file)
+ {
++      struct hist_file_data *hist_file;
+       int ret;
+       ret = tracing_open_file_tr(inode, file);
+       if (ret)
+               return ret;
++      hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL);
++      if (!hist_file)
++              return -ENOMEM;
++      hist_file->file = file;
++
+       /* Clear private_data to avoid warning in single_open() */
+       file->private_data = NULL;
+-      return single_open(file, hist_show, file);
++      ret = single_open(file, hist_show, hist_file);
++      if (ret)
++              kfree(hist_file);
++      return ret;
+ }
+ const struct file_operations event_hist_fops = {
+       .open = event_hist_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+-      .release = tracing_single_release_file_tr,
++      .release = event_hist_release,
++      .poll = event_hist_poll,
+ };
+ #ifdef CONFIG_HIST_TRIGGERS_DEBUG
+-- 
+2.39.5
+
diff --git a/queue-6.6/tracing-hist-support-pollpri-event-for-poll-on-histo.patch b/queue-6.6/tracing-hist-support-pollpri-event-for-poll-on-histo.patch
new file mode 100644 (file)
index 0000000..ea084ff
--- /dev/null
@@ -0,0 +1,119 @@
+From d5bd37a7fed7a40737957cb6ce474ba2c4cc96d4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Dec 2024 13:08:07 +0900
+Subject: tracing/hist: Support POLLPRI event for poll on histogram
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 66fc6f521a0b91051ce6968a216a30bc52267bf8 ]
+
+Since POLLIN will not be flushed until the hist file is read, the user
+needs to repeatedly read() and poll() on the hist file for monitoring the
+event continuously. But the read() is somewhat redundant when the user is
+only monitoring for event updates.
+
+Add POLLPRI poll event on the hist file so the event returns when a
+histogram is updated after open(), poll() or read(). Thus it is possible
+to wait for the next event without having to issue a read().
+
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Link: https://lore.kernel.org/173527248770.464571.2536902137325258133.stgit@devnote2
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Reviewed-by: Tom Zanussi <zanussi@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 0b4ffbe4888a ("tracing: Correct the refcount if the hist/hist_debug file fails to open")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_events_hist.c | 29 ++++++++++++++++++++++++++---
+ 1 file changed, 26 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index 49b7811dec9f8..08cc6405b8837 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -5606,6 +5606,7 @@ static void hist_trigger_show(struct seq_file *m,
+ struct hist_file_data {
+       struct file *file;
+       u64 last_read;
++      u64 last_act;
+ };
+ static u64 get_hist_hit_count(struct trace_event_file *event_file)
+@@ -5641,6 +5642,11 @@ static int hist_show(struct seq_file *m, void *v)
+                       hist_trigger_show(m, data, n++);
+       }
+       hist_file->last_read = get_hist_hit_count(event_file);
++      /*
++       * Update last_act too so that poll()/POLLPRI can wait for the next
++       * event after any syscall on hist file.
++       */
++      hist_file->last_act = hist_file->last_read;
+       return 0;
+ }
+@@ -5650,6 +5656,8 @@ static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wai
+       struct trace_event_file *event_file;
+       struct seq_file *m = file->private_data;
+       struct hist_file_data *hist_file = m->private;
++      __poll_t ret = 0;
++      u64 cnt;
+       guard(mutex)(&event_mutex);
+@@ -5659,10 +5667,15 @@ static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wai
+       hist_poll_wait(file, wait);
+-      if (hist_file->last_read != get_hist_hit_count(event_file))
+-              return EPOLLIN | EPOLLRDNORM;
++      cnt = get_hist_hit_count(event_file);
++      if (hist_file->last_read != cnt)
++              ret |= EPOLLIN | EPOLLRDNORM;
++      if (hist_file->last_act != cnt) {
++              hist_file->last_act = cnt;
++              ret |= EPOLLPRI;
++      }
+-      return 0;
++      return ret;
+ }
+ static int event_hist_release(struct inode *inode, struct file *file)
+@@ -5676,6 +5689,7 @@ static int event_hist_release(struct inode *inode, struct file *file)
+ static int event_hist_open(struct inode *inode, struct file *file)
+ {
++      struct trace_event_file *event_file;
+       struct hist_file_data *hist_file;
+       int ret;
+@@ -5683,16 +5697,25 @@ static int event_hist_open(struct inode *inode, struct file *file)
+       if (ret)
+               return ret;
++      guard(mutex)(&event_mutex);
++
++      event_file = event_file_data(file);
++      if (!event_file)
++              return -ENODEV;
++
+       hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL);
+       if (!hist_file)
+               return -ENOMEM;
++
+       hist_file->file = file;
++      hist_file->last_act = get_hist_hit_count(event_file);
+       /* Clear private_data to avoid warning in single_open() */
+       file->private_data = NULL;
+       ret = single_open(file, hist_show, hist_file);
+       if (ret)
+               kfree(hist_file);
++
+       return ret;
+ }
+-- 
+2.39.5
+
diff --git a/queue-6.6/tracing-switch-trace_events_hist.c-code-over-to-use-.patch b/queue-6.6/tracing-switch-trace_events_hist.c-code-over-to-use-.patch
new file mode 100644 (file)
index 0000000..9184bfb
--- /dev/null
@@ -0,0 +1,99 @@
+From ee360b3c21c20d4012033fbdbcd63d74c5cbfb01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 19 Dec 2024 15:12:05 -0500
+Subject: tracing: Switch trace_events_hist.c code over to use guard()
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit 2b36a97aeeb71b1e4a48bfedc7f21f44aeb1e6fb ]
+
+There are a couple functions in trace_events_hist.c that have "goto out" or
+equivalent on error in order to release locks that were taken. This can be
+error prone or just simply make the code more complex.
+
+Switch every location that ends with unlocking a mutex on error over to
+using the guard(mutex)() infrastructure to let the compiler worry about
+releasing locks. This makes the code easier to read and understand.
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/20241219201345.694601480@goodmis.org
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 0b4ffbe4888a ("tracing: Correct the refcount if the hist/hist_debug file fails to open")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace_events_hist.c | 32 ++++++++++----------------------
+ 1 file changed, 10 insertions(+), 22 deletions(-)
+
+diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
+index 604d63380a90b..755db2451fb2d 100644
+--- a/kernel/trace/trace_events_hist.c
++++ b/kernel/trace/trace_events_hist.c
+@@ -5605,25 +5605,19 @@ static int hist_show(struct seq_file *m, void *v)
+ {
+       struct event_trigger_data *data;
+       struct trace_event_file *event_file;
+-      int n = 0, ret = 0;
++      int n = 0;
+-      mutex_lock(&event_mutex);
++      guard(mutex)(&event_mutex);
+       event_file = event_file_file(m->private);
+-      if (unlikely(!event_file)) {
+-              ret = -ENODEV;
+-              goto out_unlock;
+-      }
++      if (unlikely(!event_file))
++              return -ENODEV;
+       list_for_each_entry(data, &event_file->triggers, list) {
+               if (data->cmd_ops->trigger_type == ETT_EVENT_HIST)
+                       hist_trigger_show(m, data, n++);
+       }
+-
+- out_unlock:
+-      mutex_unlock(&event_mutex);
+-
+-      return ret;
++      return 0;
+ }
+ static int event_hist_open(struct inode *inode, struct file *file)
+@@ -5884,25 +5878,19 @@ static int hist_debug_show(struct seq_file *m, void *v)
+ {
+       struct event_trigger_data *data;
+       struct trace_event_file *event_file;
+-      int n = 0, ret = 0;
++      int n = 0;
+-      mutex_lock(&event_mutex);
++      guard(mutex)(&event_mutex);
+       event_file = event_file_file(m->private);
+-      if (unlikely(!event_file)) {
+-              ret = -ENODEV;
+-              goto out_unlock;
+-      }
++      if (unlikely(!event_file))
++              return -ENODEV;
+       list_for_each_entry(data, &event_file->triggers, list) {
+               if (data->cmd_ops->trigger_type == ETT_EVENT_HIST)
+                       hist_trigger_debug_show(m, data, n++);
+       }
+-
+- out_unlock:
+-      mutex_unlock(&event_mutex);
+-
+-      return ret;
++      return 0;
+ }
+ static int event_hist_debug_open(struct inode *inode, struct file *file)
+-- 
+2.39.5
+