1 From e4410a5b3b89fad247246464c0aa17559f8e52b3 Mon Sep 17 00:00:00 2001
2 From: Sasha Levin <sashal@kernel.org>
3 Date: Fri, 8 Mar 2024 15:24:04 -0500
4 Subject: ring-buffer: Fix resetting of shortest_full
6 From: Steven Rostedt (Google) <rostedt@goodmis.org>
8 [ Upstream commit 68282dd930ea38b068ce2c109d12405f40df3f93 ]
10 The "shortest_full" variable is used to keep track of the waiter that is
11 waiting for the smallest amount on the ring buffer before being woken up.
12 When a tasks waits on the ring buffer, it passes in a "full" value that is
13 a percentage. 0 means wake up on any data. 1-100 means wake up from 1% to
16 As all waiters are on the same wait queue, the wake up happens for the
17 waiter with the smallest percentage.
19 The problem is that the smallest_full on the cpu_buffer that stores the
20 smallest amount doesn't get reset when all the waiters are woken up. It
21 does get reset when the ring buffer is reset (echo > /sys/kernel/tracing/trace).
23 This means that tasks may be woken up more often then when they want to
24 be. Instead, have the shortest_full field get reset just before waking up
25 all the tasks. If the tasks wait again, they will update the shortest_full
28 Also add locking around setting of shortest_full in the poll logic, and
29 change "work" to "rbwork" to match the variable name for rb_irq_work
30 structures that are used in other places.
32 Link: https://lore.kernel.org/linux-trace-kernel/20240308202431.948914369@goodmis.org
34 Cc: stable@vger.kernel.org
35 Cc: Masami Hiramatsu <mhiramat@kernel.org>
36 Cc: Mark Rutland <mark.rutland@arm.com>
37 Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
38 Cc: Andrew Morton <akpm@linux-foundation.org>
39 Cc: Linus Torvalds <torvalds@linux-foundation.org>
40 Cc: linke li <lilinke99@qq.com>
41 Cc: Rabin Vincent <rabin@rab.in>
42 Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader")
43 Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
44 Stable-dep-of: 8145f1c35fa6 ("ring-buffer: Fix full_waiters_pending in poll")
45 Signed-off-by: Sasha Levin <sashal@kernel.org>
47 kernel/trace/ring_buffer.c | 30 +++++++++++++++++++++++-------
48 1 file changed, 23 insertions(+), 7 deletions(-)
50 diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
51 index 5b665e5991bf7..5465f4c950f27 100644
52 --- a/kernel/trace/ring_buffer.c
53 +++ b/kernel/trace/ring_buffer.c
54 @@ -831,8 +831,19 @@ static void rb_wake_up_waiters(struct irq_work *work)
56 wake_up_all(&rbwork->waiters);
57 if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
58 + /* Only cpu_buffer sets the above flags */
59 + struct ring_buffer_per_cpu *cpu_buffer =
60 + container_of(rbwork, struct ring_buffer_per_cpu, irq_work);
62 + /* Called from interrupt context */
63 + raw_spin_lock(&cpu_buffer->reader_lock);
64 rbwork->wakeup_full = false;
65 rbwork->full_waiters_pending = false;
67 + /* Waking up all waiters, they will reset the shortest full */
68 + cpu_buffer->shortest_full = 0;
69 + raw_spin_unlock(&cpu_buffer->reader_lock);
71 wake_up_all(&rbwork->full_waiters);
74 @@ -999,28 +1010,33 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
75 struct file *filp, poll_table *poll_table, int full)
77 struct ring_buffer_per_cpu *cpu_buffer;
78 - struct rb_irq_work *work;
79 + struct rb_irq_work *rbwork;
81 if (cpu == RING_BUFFER_ALL_CPUS) {
82 - work = &buffer->irq_work;
83 + rbwork = &buffer->irq_work;
86 if (!cpumask_test_cpu(cpu, buffer->cpumask))
89 cpu_buffer = buffer->buffers[cpu];
90 - work = &cpu_buffer->irq_work;
91 + rbwork = &cpu_buffer->irq_work;
95 - poll_wait(filp, &work->full_waiters, poll_table);
96 - work->full_waiters_pending = true;
97 + unsigned long flags;
99 + poll_wait(filp, &rbwork->full_waiters, poll_table);
101 + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
102 + rbwork->full_waiters_pending = true;
103 if (!cpu_buffer->shortest_full ||
104 cpu_buffer->shortest_full > full)
105 cpu_buffer->shortest_full = full;
106 + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
108 - poll_wait(filp, &work->waiters, poll_table);
109 - work->waiters_pending = true;
110 + poll_wait(filp, &rbwork->waiters, poll_table);
111 + rbwork->waiters_pending = true;