4.9-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 16 Oct 2022 15:39:01 +0000 (17:39 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sun, 16 Oct 2022 15:39:01 +0000 (17:39 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 16 Oct 2022 15:39:01 +0000 (17:39 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 16 Oct 2022 15:39:01 +0000 (17:39 +0200)
diff --git a/queue-4.9/ring-buffer-allow-splice-to-read-previous-partially-read-pages.patch b/queue-4.9/ring-buffer-allow-splice-to-read-previous-partially-read-pages.patch

new file mode 100644 (file)

index 0000000..a037483
--- /dev/null
+++ b/queue-4.9/ring-buffer-allow-splice-to-read-previous-partially-read-pages.patch
@@ -0,0 +1,52 @@
+From fa8f4a89736b654125fb254b0db753ac68a5fced Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Tue, 27 Sep 2022 14:43:17 -0400
+Subject: ring-buffer: Allow splice to read previous partially read pages
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit fa8f4a89736b654125fb254b0db753ac68a5fced upstream.
+
+If a page is partially read, and then the splice system call is run
+against the ring buffer, it will always fail to read, no matter how much
+is in the ring buffer. That's because the code path for a partial read of
+the page does will fail if the "full" flag is set.
+
+The splice system call wants full pages, so if the read of the ring buffer
+is not yet full, it should return zero, and the splice will block. But if
+a previous read was done, where the beginning has been consumed, it should
+still be given to the splice caller if the rest of the page has been
+written to.
+
+This caused the splice command to never consume data in this scenario, and
+let the ring buffer just fill up and lose events.
+
+Link: https://lkml.kernel.org/r/20220927144317.46be6b80@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Fixes: 8789a9e7df6bf ("ring-buffer: read page interface")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -4623,7 +4623,15 @@ int ring_buffer_read_page(struct ring_bu
+               unsigned int pos = 0;
+               unsigned int size;
+ 
+-              if (full)
++              /*
++               * If a full page is expected, this can still be returned
++               * if there's been a previous partial read and the
++               * rest of the page can be read and the commit page is off
++               * the reader page.
++               */
++              if (full &&
++                  (!read || (len < (commit - read)) ||
++                   cpu_buffer->reader_page == cpu_buffer->commit_page))
+                       goto out_unlock;
+ 
+               if (len > (commit - read))
diff --git a/queue-4.9/ring-buffer-check-pending-waiters-when-doing-wake-ups-as-well.patch b/queue-4.9/ring-buffer-check-pending-waiters-when-doing-wake-ups-as-well.patch

new file mode 100644 (file)

index 0000000..afe5711
--- /dev/null
+++ b/queue-4.9/ring-buffer-check-pending-waiters-when-doing-wake-ups-as-well.patch
@@ -0,0 +1,45 @@
+From ec0bbc5ec5664dcee344f79373852117dc672c86 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Tue, 27 Sep 2022 19:15:25 -0400
+Subject: ring-buffer: Check pending waiters when doing wake ups as well
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit ec0bbc5ec5664dcee344f79373852117dc672c86 upstream.
+
+The wake up waiters only checks the "wakeup_full" variable and not the
+"full_waiters_pending". The full_waiters_pending is set when a waiter is
+added to the wait queue. The wakeup_full is only set when an event is
+triggered, and it clears the full_waiters_pending to avoid multiple calls
+to irq_work_queue().
+
+The irq_work callback really needs to check both wakeup_full as well as
+full_waiters_pending such that this code can be used to wake up waiters
+when a file is closed that represents the ring buffer and the waiters need
+to be woken up.
+
+Link: https://lkml.kernel.org/r/20220927231824.209460321@goodmis.org
+
+Cc: stable@vger.kernel.org
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Fixes: 15693458c4bc0 ("tracing/ring-buffer: Move poll wake ups into ring buffer code")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -513,8 +513,9 @@ static void rb_wake_up_waiters(struct ir
+       struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
+ 
+       wake_up_all(&rbwork->waiters);
+-      if (rbwork->wakeup_full) {
++      if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+               rbwork->wakeup_full = false;
++              rbwork->full_waiters_pending = false;
+               wake_up_all(&rbwork->full_waiters);
+       }
+ }
diff --git a/queue-4.9/ring-buffer-fix-race-between-reset-page-and-reading-page.patch b/queue-4.9/ring-buffer-fix-race-between-reset-page-and-reading-page.patch

new file mode 100644 (file)

index 0000000..e530809
--- /dev/null
+++ b/queue-4.9/ring-buffer-fix-race-between-reset-page-and-reading-page.patch
@@ -0,0 +1,115 @@
+From a0fcaaed0c46cf9399d3a2d6e0c87ddb3df0e044 Mon Sep 17 00:00:00 2001
+From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
+Date: Thu, 29 Sep 2022 10:49:09 -0400
+Subject: ring-buffer: Fix race between reset page and reading page
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+commit a0fcaaed0c46cf9399d3a2d6e0c87ddb3df0e044 upstream.
+
+The ring buffer is broken up into sub buffers (currently of page size).
+Each sub buffer has a pointer to its "tail" (the last event written to the
+sub buffer). When a new event is requested, the tail is locally
+incremented to cover the size of the new event. This is done in a way that
+there is no need for locking.
+
+If the tail goes past the end of the sub buffer, the process of moving to
+the next sub buffer takes place. After setting the current sub buffer to
+the next one, the previous one that had the tail go passed the end of the
+sub buffer needs to be reset back to the original tail location (before
+the new event was requested) and the rest of the sub buffer needs to be
+"padded".
+
+The race happens when a reader takes control of the sub buffer. As readers
+do a "swap" of sub buffers from the ring buffer to get exclusive access to
+the sub buffer, it replaces the "head" sub buffer with an empty sub buffer
+that goes back into the writable portion of the ring buffer. This swap can
+happen as soon as the writer moves to the next sub buffer and before it
+updates the last sub buffer with padding.
+
+Because the sub buffer can be released to the reader while the writer is
+still updating the padding, it is possible for the reader to see the event
+that goes past the end of the sub buffer. This can cause obvious issues.
+
+To fix this, add a few memory barriers so that the reader definitely sees
+the updates to the sub buffer, and also waits until the writer has put
+back the "tail" of the sub buffer back to the last event that was written
+on it.
+
+To be paranoid, it will only spin for 1 second, otherwise it will
+warn and shutdown the ring buffer code. 1 second should be enough as
+the writer does have preemption disabled. If the writer doesn't move
+within 1 second (with preemption disabled) something is horribly
+wrong. No interrupt should last 1 second!
+
+Link: https://lore.kernel.org/all/20220830120854.7545-1-jiazi.li@transsion.com/
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216369
+Link: https://lkml.kernel.org/r/20220929104909.0650a36c@gandalf.local.home
+
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Fixes: c7b0930857e22 ("ring-buffer: prevent adding write in discarded area")
+Reported-by: Jiazi.Li <jiazi.li@transsion.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/ring_buffer.c |   33 +++++++++++++++++++++++++++++++++
+ 1 file changed, 33 insertions(+)
+
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -2122,6 +2122,9 @@ rb_reset_tail(struct ring_buffer_per_cpu
+               /* Mark the rest of the page with padding */
+               rb_event_set_padding(event);
+ 
++              /* Make sure the padding is visible before the write update */
++              smp_wmb();
++
+               /* Set the write back to the previous setting */
+               local_sub(length, &tail_page->write);
+               return;
+@@ -2133,6 +2136,9 @@ rb_reset_tail(struct ring_buffer_per_cpu
+       /* time delta must be non zero */
+       event->time_delta = 1;
+ 
++      /* Make sure the padding is visible before the tail_page->write update */
++      smp_wmb();
++
+       /* Set write to end of buffer */
+       length = (tail + length) - BUF_PAGE_SIZE;
+       local_sub(length, &tail_page->write);
+@@ -3724,6 +3730,33 @@ rb_get_reader_page(struct ring_buffer_pe
+       arch_spin_unlock(&cpu_buffer->lock);
+       local_irq_restore(flags);
+ 
++      /*
++       * The writer has preempt disable, wait for it. But not forever
++       * Although, 1 second is pretty much "forever"
++       */
++#define USECS_WAIT    1000000
++        for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) {
++              /* If the write is past the end of page, a writer is still updating it */
++              if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE))
++                      break;
++
++              udelay(1);
++
++              /* Get the latest version of the reader write value */
++              smp_rmb();
++      }
++
++      /* The writer is not moving forward? Something is wrong */
++      if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT))
++              reader = NULL;
++
++      /*
++       * Make sure we see any padding after the write update
++       * (see rb_reset_tail())
++       */
++      smp_rmb();
++
++
+       return reader;
+ }
+ 
diff --git a/queue-4.9/series b/queue-4.9/series

index 37d76ff2298fb36f78d1212a46d832758f99cb63..f8fd6aa648bb3ceba76b6d6dd43d8d525ce9c1a8 100644 (file)
--- a/queue-4.9/series
+++ b/queue-4.9/series
@@ -56,3 +56,6 @@ ext4-avoid-crash-when-inline-data-creation-follows-dio-write.patch
  ext4-fix-null-ptr-deref-in-ext4_write_info.patch
  ext4-make-ext4_lazyinit_thread-freezable.patch
  ext4-place-buffer-head-allocation-before-handle-start.patch
+ring-buffer-allow-splice-to-read-previous-partially-read-pages.patch
+ring-buffer-check-pending-waiters-when-doing-wake-ups-as-well.patch
+ring-buffer-fix-race-between-reset-page-and-reading-page.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 16 Oct 2022 15:39:01 +0000 (17:39 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sun, 16 Oct 2022 15:39:01 +0000 (17:39 +0200)
queue-4.9/ring-buffer-allow-splice-to-read-previous-partially-read-pages.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ring-buffer-check-pending-waiters-when-doing-wake-ups-as-well.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/ring-buffer-fix-race-between-reset-page-and-reading-page.patch	[new file with mode: 0644]	patch \| blob
queue-4.9/series		patch \| blob \| blame \| history