Fixes for 6.8

author Sasha Levin <sashal@kernel.org>

Sun, 31 Mar 2024 13:26:23 +0000 (09:26 -0400)

committer Sasha Levin <sashal@kernel.org>

Sun, 31 Mar 2024 13:26:23 +0000 (09:26 -0400)
author Sasha Levin <sashal@kernel.org>
Sun, 31 Mar 2024 13:26:23 +0000 (09:26 -0400)
committer Sasha Levin <sashal@kernel.org>
Sun, 31 Mar 2024 13:26:23 +0000 (09:26 -0400)
diff --git a/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch b/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch

new file mode 100644 (file)

index 0000000..e720147
--- /dev/null
+++ b/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch
@@ -0,0 +1,45 @@
+From 8d619ded0caaaca65facceea8842dfc41e3370b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Mar 2024 11:37:31 +0000
+Subject: btrfs: fix extent map leak in unexpected scenario at
+ unpin_extent_cache()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 8a565ec04d6c43f330e7401e5af3458431b29bc6 ]
+
+At unpin_extent_cache() if we happen to find an extent map with an
+unexpected start offset, we jump to the 'out' label and never release the
+reference we added to the extent map through the call to
+lookup_extent_mapping(), therefore resulting in a leak. So fix this by
+moving the free_extent_map() under the 'out' label.
+
+Fixes: c03c89f821e5 ("btrfs: handle errors returned from unpin_extent_cache()")
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_map.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
+index c02039db5d247..76378382dd8c4 100644
+--- a/fs/btrfs/extent_map.c
++++ b/fs/btrfs/extent_map.c
+@@ -342,9 +342,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
+               em->mod_len = em->len;
+       }
+ 
+-      free_extent_map(em);
+ out:
+       write_unlock(&tree->lock);
++      free_extent_map(em);
+       return ret;
+ 
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch b/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch

new file mode 100644 (file)

index 0000000..35b6808
--- /dev/null
+++ b/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch
@@ -0,0 +1,133 @@
+From 6d42c58b93ecfefaad182e663d984429ad079d18 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Mar 2024 06:31:15 -0400
+Subject: ring-buffer: Make wake once of ring_buffer_wait() more robust
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit b70f2938242a028f8e9473781ede175486a59dc8 ]
+
+The default behavior of ring_buffer_wait() when passed a NULL "cond"
+parameter is to exit the function the first time it is woken up. The
+current implementation uses a counter that starts at zero and when it is
+greater than one it exits the wait_event_interruptible().
+
+But this relies on the internal working of wait_event_interruptible() as
+that code basically has:
+
+  if (cond)
+    return;
+  prepare_to_wait();
+  if (!cond)
+    schedule();
+  finish_wait();
+
+That is, cond is called twice before it sleeps. The default cond of
+ring_buffer_wait() needs to account for that and wait for its counter to
+increment twice before exiting.
+
+Instead, use the seq/atomic_inc logic that is used by the tracing code
+that calls this function. Add an atomic_t seq to rb_irq_work and when cond
+is NULL, have the default callback take a descriptor as its data that
+holds the rbwork and the value of the seq when it started.
+
+The wakeups will now increment the rbwork->seq and the cond callback will
+simply check if that number is different, and no longer have to rely on
+the implementation of wait_event_interruptible().
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240315063115.6cb5d205@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: 7af9ded0c2ca ("ring-buffer: Use wait_event_interruptible() in ring_buffer_wait()")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/ring_buffer.c | 34 +++++++++++++++++++++-------------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index ad0d475d1f570..43060a7ae15e7 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -384,6 +384,7 @@ struct rb_irq_work {
+       struct irq_work                 work;
+       wait_queue_head_t               waiters;
+       wait_queue_head_t               full_waiters;
++      atomic_t                        seq;
+       bool                            waiters_pending;
+       bool                            full_waiters_pending;
+       bool                            wakeup_full;
+@@ -753,6 +754,9 @@ static void rb_wake_up_waiters(struct irq_work *work)
+ {
+       struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
+ 
++      /* For waiters waiting for the first wake up */
++      (void)atomic_fetch_inc_release(&rbwork->seq);
++
+       wake_up_all(&rbwork->waiters);
+       if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+               /* Only cpu_buffer sets the above flags */
+@@ -881,20 +885,21 @@ rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer,
+       return false;
+ }
+ 
++struct rb_wait_data {
++      struct rb_irq_work              *irq_work;
++      int                             seq;
++};
++
+ /*
+  * The default wait condition for ring_buffer_wait() is to just to exit the
+  * wait loop the first time it is woken up.
+  */
+ static bool rb_wait_once(void *data)
+ {
+-      long *once = data;
++      struct rb_wait_data *rdata = data;
++      struct rb_irq_work *rbwork = rdata->irq_work;
+ 
+-      /* wait_event() actually calls this twice before scheduling*/
+-      if (*once > 1)
+-              return true;
+-
+-      (*once)++;
+-      return false;
++      return atomic_read_acquire(&rbwork->seq) != rdata->seq;
+ }
+ 
+ /**
+@@ -915,14 +920,9 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
+       struct ring_buffer_per_cpu *cpu_buffer;
+       struct wait_queue_head *waitq;
+       struct rb_irq_work *rbwork;
+-      long once = 0;
++      struct rb_wait_data rdata;
+       int ret = 0;
+ 
+-      if (!cond) {
+-              cond = rb_wait_once;
+-              data = &once;
+-      }
+-
+       /*
+        * Depending on what the caller is waiting for, either any
+        * data in any cpu buffer, or a specific buffer, put the
+@@ -944,6 +944,14 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
+       else
+               waitq = &rbwork->waiters;
+ 
++      /* Set up to exit loop as soon as it is woken */
++      if (!cond) {
++              cond = rb_wait_once;
++              rdata.irq_work = rbwork;
++              rdata.seq = atomic_read_acquire(&rbwork->seq);
++              data = &rdata;
++      }
++
+       ret = wait_event_interruptible((*waitq),
+                               rb_wait_cond(rbwork, buffer, cpu, full, cond, data));
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.8/series b/queue-6.8/series

index cf466576f1747ca469a6ee97885c66a2db435e34..c195e5697bb6f633bcc8f1f7116a95724f6a420d 100644 (file)
--- a/queue-6.8/series
+++ b/queue-6.8/series
@@ -343,3 +343,6 @@ revert-usb-phy-generic-get-the-vbus-supply.patch
  usb-cdc-wdm-close-race-between-read-and-workqueue.patch
  usb-misc-ljca-fix-double-free-in-error-handling-path.patch
  usb-uas-return-enodev-when-submit-urbs-fail-with-device-not-attached.patch
+vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch
+ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch
+btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch
diff --git a/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch b/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch

new file mode 100644 (file)

index 0000000..135633f
--- /dev/null
+++ b/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch
@@ -0,0 +1,110 @@
+From 239660ef1c64bb092520887e11b868aa9e300ef5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Mar 2024 10:21:48 -0800
+Subject: vfio/pds: Make sure migration file isn't accessed after reset
+
+From: Brett Creeley <brett.creeley@amd.com>
+
+[ Upstream commit 457f7308254756b6e4b8fc3876cb770dcf0e7cc7 ]
+
+It's possible the migration file is accessed after reset when it has
+been cleaned up, especially when it's initiated by the device. This is
+because the driver doesn't rip out the filep when cleaning up it only
+frees the related page structures and sets its local struct
+pds_vfio_lm_file pointer to NULL. This can cause a NULL pointer
+dereference, which is shown in the example below during a restore after
+a device initiated reset:
+
+BUG: kernel NULL pointer dereference, address: 000000000000000c
+PF: supervisor read access in kernel mode
+PF: error_code(0x0000) - not-present page
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP NOPTI
+RIP: 0010:pds_vfio_get_file_page+0x5d/0xf0 [pds_vfio_pci]
+[...]
+Call Trace:
+ <TASK>
+ pds_vfio_restore_write+0xf6/0x160 [pds_vfio_pci]
+ vfs_write+0xc9/0x3f0
+ ? __fget_light+0xc9/0x110
+ ksys_write+0xb5/0xf0
+ __x64_sys_write+0x1a/0x20
+ do_syscall_64+0x38/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+[...]
+
+Add a disabled flag to the driver's struct pds_vfio_lm_file that gets
+set during cleanup. Then make sure to check the flag when the migration
+file is accessed via its file_operations. By default this flag will be
+false as the memory for struct pds_vfio_lm_file is kzalloc'd, which means
+the struct pds_vfio_lm_file is enabled and accessible. Also, since the
+file_operations and driver's migration file cleanup happen under the
+protection of the same pds_vfio_lm_file.lock, using this flag is thread
+safe.
+
+Fixes: 8512ed256334 ("vfio/pds: Always clear the save/restore FDs on reset")
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Signed-off-by: Brett Creeley <brett.creeley@amd.com>
+Link: https://lore.kernel.org/r/20240308182149.22036-2-brett.creeley@amd.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vfio/pci/pds/lm.c | 13 +++++++++++++
+ drivers/vfio/pci/pds/lm.h |  1 +
+ 2 files changed, 14 insertions(+)
+
+diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c
+index 79fe2e66bb498..6b94cc0bf45b4 100644
+--- a/drivers/vfio/pci/pds/lm.c
++++ b/drivers/vfio/pci/pds/lm.c
+@@ -92,8 +92,10 @@ static void pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file)
+ {
+       mutex_lock(&lm_file->lock);
+ 
++      lm_file->disabled = true;
+       lm_file->size = 0;
+       lm_file->alloc_size = 0;
++      lm_file->filep->f_pos = 0;
+ 
+       /* Free scatter list of file pages */
+       sg_free_table(&lm_file->sg_table);
+@@ -183,6 +185,12 @@ static ssize_t pds_vfio_save_read(struct file *filp, char __user *buf,
+       pos = &filp->f_pos;
+ 
+       mutex_lock(&lm_file->lock);
++
++      if (lm_file->disabled) {
++              done = -ENODEV;
++              goto out_unlock;
++      }
++
+       if (*pos > lm_file->size) {
+               done = -EINVAL;
+               goto out_unlock;
+@@ -283,6 +291,11 @@ static ssize_t pds_vfio_restore_write(struct file *filp, const char __user *buf,
+ 
+       mutex_lock(&lm_file->lock);
+ 
++      if (lm_file->disabled) {
++              done = -ENODEV;
++              goto out_unlock;
++      }
++
+       while (len) {
+               size_t page_offset;
+               struct page *page;
+diff --git a/drivers/vfio/pci/pds/lm.h b/drivers/vfio/pci/pds/lm.h
+index 13be893198b74..9511b1afc6a11 100644
+--- a/drivers/vfio/pci/pds/lm.h
++++ b/drivers/vfio/pci/pds/lm.h
+@@ -27,6 +27,7 @@ struct pds_vfio_lm_file {
+       struct scatterlist *last_offset_sg;     /* Iterator */
+       unsigned int sg_last_entry;
+       unsigned long last_offset;
++      bool disabled;
+ };
+ 
+ struct pds_vfio_pci_device;
+-- 
+2.43.0
+
author	Sasha Levin <sashal@kernel.org>
	Sun, 31 Mar 2024 13:26:23 +0000 (09:26 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 31 Mar 2024 13:26:23 +0000 (09:26 -0400)
queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch	[new file with mode: 0644]	patch \| blob
queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch	[new file with mode: 0644]	patch \| blob
queue-6.8/series		patch \| blob \| blame \| history
queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch	[new file with mode: 0644]	patch \| blob