]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
tracing/ring-buffer: Have polling block on watermark
authorSteven Rostedt (Google) <rostedt@goodmis.org>
Fri, 21 Oct 2022 03:14:27 +0000 (23:14 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 8 Dec 2022 10:23:05 +0000 (11:23 +0100)
commit 42fb0a1e84ff525ebe560e2baf9451ab69127e2b upstream.

Currently the way polling works on the ring buffer is broken. It will
return immediately if there's any data in the ring buffer whereas a read
will block until the watermark (defined by the tracefs buffer_percent file)
is hit.

That is, a select() or poll() will return as if there's data available,
but then the following read will block. This is broken for the way
select()s and poll()s are supposed to work.

Have the polling on the ring buffer also block the same way reads and
splice does on the ring buffer.

Link: https://lkml.kernel.org/r/20221020231427.41be3f26@gandalf.local.home
Cc: Linux Trace Kernel <linux-trace-kernel@vger.kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Primiano Tucci <primiano@google.com>
Cc: stable@vger.kernel.org
Fixes: 1e0d6714aceb7 ("ring-buffer: Do not wake up a splice waiter when page is not full")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/linux/ring_buffer.h
kernel/trace/ring_buffer.c
kernel/trace/trace.c

index 1a40277b512c959813f87e000810d40d719b0071..b739507722998c337f4aef1ba5acdb3d424186b1 100644 (file)
@@ -99,7 +99,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 
 int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full);
 __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
-                         struct file *filp, poll_table *poll_table);
+                         struct file *filp, poll_table *poll_table, int full);
 
 
 #define RING_BUFFER_ALL_CPUS -1
index c4234430afeacdda5d0c25e6b8b359cc1e9d6d78..176d858903bdba444d36a36d628bd50fea7ff691 100644 (file)
@@ -566,6 +566,21 @@ size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu)
        return cnt - read;
 }
 
+static __always_inline bool full_hit(struct ring_buffer *buffer, int cpu, int full)
+{
+       struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+       size_t nr_pages;
+       size_t dirty;
+
+       nr_pages = cpu_buffer->nr_pages;
+       if (!nr_pages || !full)
+               return true;
+
+       dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+
+       return (dirty * 100) > (full * nr_pages);
+}
+
 /*
  * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
  *
@@ -661,22 +676,20 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full)
                    !ring_buffer_empty_cpu(buffer, cpu)) {
                        unsigned long flags;
                        bool pagebusy;
-                       size_t nr_pages;
-                       size_t dirty;
+                       bool done;
 
                        if (!full)
                                break;
 
                        raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
                        pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
-                       nr_pages = cpu_buffer->nr_pages;
-                       dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+                       done = !pagebusy && full_hit(buffer, cpu, full);
+
                        if (!cpu_buffer->shortest_full ||
                            cpu_buffer->shortest_full > full)
                                cpu_buffer->shortest_full = full;
                        raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-                       if (!pagebusy &&
-                           (!nr_pages || (dirty * 100) > full * nr_pages))
+                       if (done)
                                break;
                }
 
@@ -697,6 +710,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full)
  * @cpu: the cpu buffer to wait on
  * @filp: the file descriptor
  * @poll_table: The poll descriptor
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
  *
  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
  * as data is added to any of the @buffer's cpu buffers. Otherwise
@@ -706,14 +720,14 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full)
  * zero otherwise.
  */
 __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
-                         struct file *filp, poll_table *poll_table)
+                         struct file *filp, poll_table *poll_table, int full)
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct rb_irq_work *work;
 
-       if (cpu == RING_BUFFER_ALL_CPUS)
+       if (cpu == RING_BUFFER_ALL_CPUS) {
                work = &buffer->irq_work;
-       else {
+       else {
                if (!cpumask_test_cpu(cpu, buffer->cpumask))
                        return -EINVAL;
 
@@ -721,8 +735,14 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
                work = &cpu_buffer->irq_work;
        }
 
-       poll_wait(filp, &work->waiters, poll_table);
-       work->waiters_pending = true;
+       if (full) {
+               poll_wait(filp, &work->full_waiters, poll_table);
+               work->full_waiters_pending = true;
+       } else {
+               poll_wait(filp, &work->waiters, poll_table);
+               work->waiters_pending = true;
+       }
+
        /*
         * There's a tight race between setting the waiters_pending and
         * checking if the ring buffer is empty.  Once the waiters_pending bit
@@ -738,6 +758,9 @@ __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
         */
        smp_mb();
 
+       if (full)
+               return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
+
        if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
            (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
                return EPOLLIN | EPOLLRDNORM;
@@ -2640,10 +2663,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 static __always_inline void
 rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
 {
-       size_t nr_pages;
-       size_t dirty;
-       size_t full;
-
        if (buffer->irq_work.waiters_pending) {
                buffer->irq_work.waiters_pending = false;
                /* irq_work_queue() supplies it's own memory barriers */
@@ -2667,10 +2686,7 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
 
        cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
 
-       full = cpu_buffer->shortest_full;
-       nr_pages = cpu_buffer->nr_pages;
-       dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
-       if (full && nr_pages && (dirty * 100) <= full * nr_pages)
+       if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full))
                return;
 
        cpu_buffer->irq_work.wakeup_full = true;
index 62fd8798b0c4fe0c62eeb99c44181afcbeab72a5..42f45665e0597f196fe333986a121b67940f398f 100644 (file)
@@ -5993,7 +5993,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl
                return EPOLLIN | EPOLLRDNORM;
        else
                return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
-                                            filp, poll_table);
+                                            filp, poll_table, iter->tr->buffer_percent);
 }
 
 static __poll_t