From: Sasha Levin <sashal@kernel.org>
Date: Sun, 31 Mar 2024 13:26:23 +0000 (-0400)
Subject: Fixes for 6.8
X-Git-Tag: v6.7.12~90
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1c3a6fd1635e69a543f8ae7fb0484d75f233d7ae;p=thirdparty%2Fkernel%2Fstable-queue.git

Fixes for 6.8

Signed-off-by: Sasha Levin <sashal@kernel.org>
---

diff --git a/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch b/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch
new file mode 100644
index 00000000000..e720147307b
--- /dev/null
+++ b/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch
@@ -0,0 +1,45 @@
+From 8d619ded0caaaca65facceea8842dfc41e3370b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Mar 2024 11:37:31 +0000
+Subject: btrfs: fix extent map leak in unexpected scenario at
+ unpin_extent_cache()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 8a565ec04d6c43f330e7401e5af3458431b29bc6 ]
+
+At unpin_extent_cache() if we happen to find an extent map with an
+unexpected start offset, we jump to the 'out' label and never release the
+reference we added to the extent map through the call to
+lookup_extent_mapping(), therefore resulting in a leak. So fix this by
+moving the free_extent_map() under the 'out' label.
+
+Fixes: c03c89f821e5 ("btrfs: handle errors returned from unpin_extent_cache()")
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_map.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
+index c02039db5d247..76378382dd8c4 100644
+--- a/fs/btrfs/extent_map.c
++++ b/fs/btrfs/extent_map.c
+@@ -342,9 +342,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen)
+ 		em->mod_len = em->len;
+ 	}
+ 
+-	free_extent_map(em);
+ out:
+ 	write_unlock(&tree->lock);
++	free_extent_map(em);
+ 	return ret;
+ 
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch b/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch
new file mode 100644
index 00000000000..35b6808cc2f
--- /dev/null
+++ b/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch
@@ -0,0 +1,133 @@
+From 6d42c58b93ecfefaad182e663d984429ad079d18 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Mar 2024 06:31:15 -0400
+Subject: ring-buffer: Make wake once of ring_buffer_wait() more robust
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit b70f2938242a028f8e9473781ede175486a59dc8 ]
+
+The default behavior of ring_buffer_wait() when passed a NULL "cond"
+parameter is to exit the function the first time it is woken up. The
+current implementation uses a counter that starts at zero and when it is
+greater than one it exits the wait_event_interruptible().
+
+But this relies on the internal working of wait_event_interruptible() as
+that code basically has:
+
+  if (cond)
+    return;
+  prepare_to_wait();
+  if (!cond)
+    schedule();
+  finish_wait();
+
+That is, cond is called twice before it sleeps. The default cond of
+ring_buffer_wait() needs to account for that and wait for its counter to
+increment twice before exiting.
+
+Instead, use the seq/atomic_inc logic that is used by the tracing code
+that calls this function. Add an atomic_t seq to rb_irq_work and when cond
+is NULL, have the default callback take a descriptor as its data that
+holds the rbwork and the value of the seq when it started.
+
+The wakeups will now increment the rbwork->seq and the cond callback will
+simply check if that number is different, and no longer have to rely on
+the implementation of wait_event_interruptible().
+
+Link: https://lore.kernel.org/linux-trace-kernel/20240315063115.6cb5d205@gandalf.local.home
+
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Fixes: 7af9ded0c2ca ("ring-buffer: Use wait_event_interruptible() in ring_buffer_wait()")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/ring_buffer.c | 34 +++++++++++++++++++++-------------
+ 1 file changed, 21 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
+index ad0d475d1f570..43060a7ae15e7 100644
+--- a/kernel/trace/ring_buffer.c
++++ b/kernel/trace/ring_buffer.c
+@@ -384,6 +384,7 @@ struct rb_irq_work {
+ 	struct irq_work			work;
+ 	wait_queue_head_t		waiters;
+ 	wait_queue_head_t		full_waiters;
++	atomic_t			seq;
+ 	bool				waiters_pending;
+ 	bool				full_waiters_pending;
+ 	bool				wakeup_full;
+@@ -753,6 +754,9 @@ static void rb_wake_up_waiters(struct irq_work *work)
+ {
+ 	struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
+ 
++	/* For waiters waiting for the first wake up */
++	(void)atomic_fetch_inc_release(&rbwork->seq);
++
+ 	wake_up_all(&rbwork->waiters);
+ 	if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
+ 		/* Only cpu_buffer sets the above flags */
+@@ -881,20 +885,21 @@ rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer,
+ 	return false;
+ }
+ 
++struct rb_wait_data {
++	struct rb_irq_work		*irq_work;
++	int				seq;
++};
++
+ /*
+  * The default wait condition for ring_buffer_wait() is to just to exit the
+  * wait loop the first time it is woken up.
+  */
+ static bool rb_wait_once(void *data)
+ {
+-	long *once = data;
++	struct rb_wait_data *rdata = data;
++	struct rb_irq_work *rbwork = rdata->irq_work;
+ 
+-	/* wait_event() actually calls this twice before scheduling*/
+-	if (*once > 1)
+-		return true;
+-
+-	(*once)++;
+-	return false;
++	return atomic_read_acquire(&rbwork->seq) != rdata->seq;
+ }
+ 
+ /**
+@@ -915,14 +920,9 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
+ 	struct ring_buffer_per_cpu *cpu_buffer;
+ 	struct wait_queue_head *waitq;
+ 	struct rb_irq_work *rbwork;
+-	long once = 0;
++	struct rb_wait_data rdata;
+ 	int ret = 0;
+ 
+-	if (!cond) {
+-		cond = rb_wait_once;
+-		data = &once;
+-	}
+-
+ 	/*
+ 	 * Depending on what the caller is waiting for, either any
+ 	 * data in any cpu buffer, or a specific buffer, put the
+@@ -944,6 +944,14 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
+ 	else
+ 		waitq = &rbwork->waiters;
+ 
++	/* Set up to exit loop as soon as it is woken */
++	if (!cond) {
++		cond = rb_wait_once;
++		rdata.irq_work = rbwork;
++		rdata.seq = atomic_read_acquire(&rbwork->seq);
++		data = &rdata;
++	}
++
+ 	ret = wait_event_interruptible((*waitq),
+ 				rb_wait_cond(rbwork, buffer, cpu, full, cond, data));
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.8/series b/queue-6.8/series
index cf466576f17..c195e5697bb 100644
--- a/queue-6.8/series
+++ b/queue-6.8/series
@@ -343,3 +343,6 @@ revert-usb-phy-generic-get-the-vbus-supply.patch
 usb-cdc-wdm-close-race-between-read-and-workqueue.patch
 usb-misc-ljca-fix-double-free-in-error-handling-path.patch
 usb-uas-return-enodev-when-submit-urbs-fail-with-device-not-attached.patch
+vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch
+ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch
+btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch
diff --git a/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch b/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch
new file mode 100644
index 00000000000..135633fcd7d
--- /dev/null
+++ b/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch
@@ -0,0 +1,110 @@
+From 239660ef1c64bb092520887e11b868aa9e300ef5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Mar 2024 10:21:48 -0800
+Subject: vfio/pds: Make sure migration file isn't accessed after reset
+
+From: Brett Creeley <brett.creeley@amd.com>
+
+[ Upstream commit 457f7308254756b6e4b8fc3876cb770dcf0e7cc7 ]
+
+It's possible the migration file is accessed after reset when it has
+been cleaned up, especially when it's initiated by the device. This is
+because the driver doesn't rip out the filep when cleaning up it only
+frees the related page structures and sets its local struct
+pds_vfio_lm_file pointer to NULL. This can cause a NULL pointer
+dereference, which is shown in the example below during a restore after
+a device initiated reset:
+
+BUG: kernel NULL pointer dereference, address: 000000000000000c
+PF: supervisor read access in kernel mode
+PF: error_code(0x0000) - not-present page
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP NOPTI
+RIP: 0010:pds_vfio_get_file_page+0x5d/0xf0 [pds_vfio_pci]
+[...]
+Call Trace:
+ <TASK>
+ pds_vfio_restore_write+0xf6/0x160 [pds_vfio_pci]
+ vfs_write+0xc9/0x3f0
+ ? __fget_light+0xc9/0x110
+ ksys_write+0xb5/0xf0
+ __x64_sys_write+0x1a/0x20
+ do_syscall_64+0x38/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+[...]
+
+Add a disabled flag to the driver's struct pds_vfio_lm_file that gets
+set during cleanup. Then make sure to check the flag when the migration
+file is accessed via its file_operations. By default this flag will be
+false as the memory for struct pds_vfio_lm_file is kzalloc'd, which means
+the struct pds_vfio_lm_file is enabled and accessible. Also, since the
+file_operations and driver's migration file cleanup happen under the
+protection of the same pds_vfio_lm_file.lock, using this flag is thread
+safe.
+
+Fixes: 8512ed256334 ("vfio/pds: Always clear the save/restore FDs on reset")
+Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
+Signed-off-by: Brett Creeley <brett.creeley@amd.com>
+Link: https://lore.kernel.org/r/20240308182149.22036-2-brett.creeley@amd.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vfio/pci/pds/lm.c | 13 +++++++++++++
+ drivers/vfio/pci/pds/lm.h |  1 +
+ 2 files changed, 14 insertions(+)
+
+diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c
+index 79fe2e66bb498..6b94cc0bf45b4 100644
+--- a/drivers/vfio/pci/pds/lm.c
++++ b/drivers/vfio/pci/pds/lm.c
+@@ -92,8 +92,10 @@ static void pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file)
+ {
+ 	mutex_lock(&lm_file->lock);
+ 
++	lm_file->disabled = true;
+ 	lm_file->size = 0;
+ 	lm_file->alloc_size = 0;
++	lm_file->filep->f_pos = 0;
+ 
+ 	/* Free scatter list of file pages */
+ 	sg_free_table(&lm_file->sg_table);
+@@ -183,6 +185,12 @@ static ssize_t pds_vfio_save_read(struct file *filp, char __user *buf,
+ 	pos = &filp->f_pos;
+ 
+ 	mutex_lock(&lm_file->lock);
++
++	if (lm_file->disabled) {
++		done = -ENODEV;
++		goto out_unlock;
++	}
++
+ 	if (*pos > lm_file->size) {
+ 		done = -EINVAL;
+ 		goto out_unlock;
+@@ -283,6 +291,11 @@ static ssize_t pds_vfio_restore_write(struct file *filp, const char __user *buf,
+ 
+ 	mutex_lock(&lm_file->lock);
+ 
++	if (lm_file->disabled) {
++		done = -ENODEV;
++		goto out_unlock;
++	}
++
+ 	while (len) {
+ 		size_t page_offset;
+ 		struct page *page;
+diff --git a/drivers/vfio/pci/pds/lm.h b/drivers/vfio/pci/pds/lm.h
+index 13be893198b74..9511b1afc6a11 100644
+--- a/drivers/vfio/pci/pds/lm.h
++++ b/drivers/vfio/pci/pds/lm.h
+@@ -27,6 +27,7 @@ struct pds_vfio_lm_file {
+ 	struct scatterlist *last_offset_sg;	/* Iterator */
+ 	unsigned int sg_last_entry;
+ 	unsigned long last_offset;
++	bool disabled;
+ };
+ 
+ struct pds_vfio_pci_device;
+-- 
+2.43.0
+