Fixes for 5.4

author Sasha Levin <sashal@kernel.org>

Wed, 17 Apr 2024 17:16:44 +0000 (13:16 -0400)

committer Sasha Levin <sashal@kernel.org>

Wed, 17 Apr 2024 17:16:44 +0000 (13:16 -0400)
author Sasha Levin <sashal@kernel.org>
Wed, 17 Apr 2024 17:16:44 +0000 (13:16 -0400)
committer Sasha Levin <sashal@kernel.org>
Wed, 17 Apr 2024 17:16:44 +0000 (13:16 -0400)
diff --git a/queue-5.4/btrfs-record-delayed-inode-root-in-transaction.patch b/queue-5.4/btrfs-record-delayed-inode-root-in-transaction.patch

new file mode 100644 (file)

index 0000000..9e54f37
--- /dev/null
+++ b/queue-5.4/btrfs-record-delayed-inode-root-in-transaction.patch
@@ -0,0 +1,41 @@
+From eaeffc0a17ce4a49c7899c7dad37e1df366cef83 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Mar 2024 10:14:24 -0700
+Subject: btrfs: record delayed inode root in transaction
+
+From: Boris Burkov <boris@bur.io>
+
+[ Upstream commit 71537e35c324ea6fbd68377a4f26bb93a831ae35 ]
+
+When running delayed inode updates, we do not record the inode's root in
+the transaction, but we do allocate PREALLOC and thus converted PERTRANS
+space for it. To be sure we free that PERTRANS meta rsv, we must ensure
+that we record the root in the transaction.
+
+Fixes: 4f5427ccce5d ("btrfs: delayed-inode: Use new qgroup meta rsv for delayed inode and item")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/delayed-inode.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
+index eacc020b1419f..95afe5ef7500c 100644
+--- a/fs/btrfs/delayed-inode.c
++++ b/fs/btrfs/delayed-inode.c
+@@ -1137,6 +1137,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
+       if (ret)
+               return ret;
+ 
++      ret = btrfs_record_root_in_trans(trans, node->root);
++      if (ret)
++              return ret;
+       ret = btrfs_update_delayed_inode(trans, node->root, path, node);
+       return ret;
+ }
+-- 
+2.43.0
+
diff --git a/queue-5.4/ring-buffer-only-update-pages_touched-when-a-new-pag.patch b/queue-5.4/ring-buffer-only-update-pages_touched-when-a-new-pag.patch

new file mode 100644 (file)

index 0000000..fed2758
--- /dev/null
+++ b/queue-5.4/ring-buffer-only-update-pages_touched-when-a-new-pag.patch
@@ -0,0 +1,88 @@
+From 54668a2133aba01b97c8945fa5eb0483c99ad8fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Apr 2024 15:13:09 -0400
+Subject: ring-buffer: Only update pages_touched when a new page is touched
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit ffe3986fece696cf65e0ef99e74c75f848be8e30 ]
+
+The "buffer_percent" logic that is used by the ring buffer splice code to
+only wake up the tasks when there's no data after the buffer is filled to
+the percentage of the "buffer_percent" file is dependent on three
+variables that determine the amount of data that is in the ring buffer:
+
+ 1) pages_read - incremented whenever a new sub-buffer is consumed
+ 2) pages_lost - incremented every time a writer overwrites a sub-buffer
+ 3) pages_touched - incremented when a write goes to a new sub-buffer
+
+The percentage is the calculation of:
+
+  (pages_touched - (pages_lost + pages_read)) / nr_pages
+
+Basically, the amount of data is the total number of sub-bufs that have been
+touched, minus the number of sub-bufs lost and sub-bufs consumed. This is
+divided by the total count to give the buffer percentage. When the
+percentage is greater than the value in the "buffer_percent" file, it
+wakes up splice readers waiting for that amount.
+
+It was observed that over time, the amount read from the splice was
+constantly decreasing the longer the trace was running. That is, if one
+asked for 60%, it would read over 60% when it first starts tracing, but
+then it would be woken up at under 60% and would slowly decrease the
+amount of data read after being woken up, where the amount becomes much
+less than the buffer percent.
+
+This was due to an accounting of the pages_touched incrementation. This
+value is incremented whenever a writer transfers to a new sub-buffer. But
+the place where it was incremented was incorrect. If a writer overflowed
+the current sub-buffer it would go to the next one. If it gets preempted
+by an interrupt at that time, and the interrupt performs a trace, it too
+will end up going to the next sub-buffer. But only one should increment
+the counter. Unfortunately, that was not the case.
+
+Change the cmpxchg() that does the real switch of the tail-page into a
+try_cmpxchg(), and on success, perform the increment of pages_touched. This
+will only increment the counter once for when the writer moves to a new
+sub-buffer, and not when there's a race and is incremented for when a
+writer and its preempting writer both move to the same new sub-buffer.
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240409151309.0d0e5056@gandalf.local.home
+
+Cc: stable@vger.kernel.org
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader")
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/ring_buffer.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index d2dba546fbbe1..1f0ef428b2f1c 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -1163,7 +1163,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
+       old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
+       old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
+ 
+-      local_inc(&cpu_buffer->pages_touched);
+       /*
+        * Just make sure we have seen our old_write and synchronize
+        * with any interrupts that come in.
+@@ -1200,8 +1199,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
+                */
+               local_set(&next_page->page->commit, 0);
+ 
+-              /* Again, either we update tail_page or an interrupt does */
+-              (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
++              /* Either we update tail_page or an interrupt does */
++              if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page))
++                      local_inc(&cpu_buffer->pages_touched);
+       }
+ }
+ 
+-- 
+2.43.0
+
diff --git a/queue-5.4/series b/queue-5.4/series

index 94b842c9f6e57edb994c9ae8bd9390627e6a3a9f..0619eed09096beae53acfd831f1456baed1c12fe 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -16,3 +16,5 @@ drm-client-fully-protect-modes-with-dev-mode_config.mutex.patch
  vhost-add-smp_rmb-in-vhost_vq_avail_empty.patch
  selftests-timers-fix-abs-warning-in-posix_timers-test.patch
  x86-apic-force-native_apic_mem_read-to-use-the-mov-instruction.patch
+btrfs-record-delayed-inode-root-in-transaction.patch
+ring-buffer-only-update-pages_touched-when-a-new-pag.patch
author	Sasha Levin <sashal@kernel.org>
	Wed, 17 Apr 2024 17:16:44 +0000 (13:16 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Wed, 17 Apr 2024 17:16:44 +0000 (13:16 -0400)
queue-5.4/btrfs-record-delayed-inode-root-in-transaction.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/ring-buffer-only-update-pages_touched-when-a-new-pag.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history