From: Sasha Levin Date: Sun, 31 Mar 2024 13:26:23 +0000 (-0400) Subject: Fixes for 6.8 X-Git-Tag: v6.7.12~90 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1c3a6fd1635e69a543f8ae7fb0484d75f233d7ae;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.8 Signed-off-by: Sasha Levin --- diff --git a/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch b/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch new file mode 100644 index 00000000000..e720147307b --- /dev/null +++ b/queue-6.8/btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch @@ -0,0 +1,45 @@ +From 8d619ded0caaaca65facceea8842dfc41e3370b7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 13 Mar 2024 11:37:31 +0000 +Subject: btrfs: fix extent map leak in unexpected scenario at + unpin_extent_cache() + +From: Filipe Manana + +[ Upstream commit 8a565ec04d6c43f330e7401e5af3458431b29bc6 ] + +At unpin_extent_cache() if we happen to find an extent map with an +unexpected start offset, we jump to the 'out' label and never release the +reference we added to the extent map through the call to +lookup_extent_mapping(), therefore resulting in a leak. So fix this by +moving the free_extent_map() under the 'out' label. + +Fixes: c03c89f821e5 ("btrfs: handle errors returned from unpin_extent_cache()") +Reviewed-by: Qu Wenruo +Reviewed-by: Anand Jain +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/extent_map.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c +index c02039db5d247..76378382dd8c4 100644 +--- a/fs/btrfs/extent_map.c ++++ b/fs/btrfs/extent_map.c +@@ -342,9 +342,9 @@ int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen) + em->mod_len = em->len; + } + +- free_extent_map(em); + out: + write_unlock(&tree->lock); ++ free_extent_map(em); + return ret; + + } +-- +2.43.0 + diff --git a/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch b/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch new file mode 100644 index 00000000000..35b6808cc2f --- /dev/null +++ b/queue-6.8/ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch @@ -0,0 +1,133 @@ +From 6d42c58b93ecfefaad182e663d984429ad079d18 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Mar 2024 06:31:15 -0400 +Subject: ring-buffer: Make wake once of ring_buffer_wait() more robust + +From: Steven Rostedt (Google) + +[ Upstream commit b70f2938242a028f8e9473781ede175486a59dc8 ] + +The default behavior of ring_buffer_wait() when passed a NULL "cond" +parameter is to exit the function the first time it is woken up. The +current implementation uses a counter that starts at zero and when it is +greater than one it exits the wait_event_interruptible(). + +But this relies on the internal working of wait_event_interruptible() as +that code basically has: + + if (cond) + return; + prepare_to_wait(); + if (!cond) + schedule(); + finish_wait(); + +That is, cond is called twice before it sleeps. The default cond of +ring_buffer_wait() needs to account for that and wait for its counter to +increment twice before exiting. + +Instead, use the seq/atomic_inc logic that is used by the tracing code +that calls this function. Add an atomic_t seq to rb_irq_work and when cond +is NULL, have the default callback take a descriptor as its data that +holds the rbwork and the value of the seq when it started. + +The wakeups will now increment the rbwork->seq and the cond callback will +simply check if that number is different, and no longer have to rely on +the implementation of wait_event_interruptible(). + +Link: https://lore.kernel.org/linux-trace-kernel/20240315063115.6cb5d205@gandalf.local.home + +Cc: Masami Hiramatsu +Cc: Mathieu Desnoyers +Fixes: 7af9ded0c2ca ("ring-buffer: Use wait_event_interruptible() in ring_buffer_wait()") +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/ring_buffer.c | 34 +++++++++++++++++++++------------- + 1 file changed, 21 insertions(+), 13 deletions(-) + +diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c +index ad0d475d1f570..43060a7ae15e7 100644 +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -384,6 +384,7 @@ struct rb_irq_work { + struct irq_work work; + wait_queue_head_t waiters; + wait_queue_head_t full_waiters; ++ atomic_t seq; + bool waiters_pending; + bool full_waiters_pending; + bool wakeup_full; +@@ -753,6 +754,9 @@ static void rb_wake_up_waiters(struct irq_work *work) + { + struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); + ++ /* For waiters waiting for the first wake up */ ++ (void)atomic_fetch_inc_release(&rbwork->seq); ++ + wake_up_all(&rbwork->waiters); + if (rbwork->full_waiters_pending || rbwork->wakeup_full) { + /* Only cpu_buffer sets the above flags */ +@@ -881,20 +885,21 @@ rb_wait_cond(struct rb_irq_work *rbwork, struct trace_buffer *buffer, + return false; + } + ++struct rb_wait_data { ++ struct rb_irq_work *irq_work; ++ int seq; ++}; ++ + /* + * The default wait condition for ring_buffer_wait() is to just to exit the + * wait loop the first time it is woken up. + */ + static bool rb_wait_once(void *data) + { +- long *once = data; ++ struct rb_wait_data *rdata = data; ++ struct rb_irq_work *rbwork = rdata->irq_work; + +- /* wait_event() actually calls this twice before scheduling*/ +- if (*once > 1) +- return true; +- +- (*once)++; +- return false; ++ return atomic_read_acquire(&rbwork->seq) != rdata->seq; + } + + /** +@@ -915,14 +920,9 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, + struct ring_buffer_per_cpu *cpu_buffer; + struct wait_queue_head *waitq; + struct rb_irq_work *rbwork; +- long once = 0; ++ struct rb_wait_data rdata; + int ret = 0; + +- if (!cond) { +- cond = rb_wait_once; +- data = &once; +- } +- + /* + * Depending on what the caller is waiting for, either any + * data in any cpu buffer, or a specific buffer, put the +@@ -944,6 +944,14 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, + else + waitq = &rbwork->waiters; + ++ /* Set up to exit loop as soon as it is woken */ ++ if (!cond) { ++ cond = rb_wait_once; ++ rdata.irq_work = rbwork; ++ rdata.seq = atomic_read_acquire(&rbwork->seq); ++ data = &rdata; ++ } ++ + ret = wait_event_interruptible((*waitq), + rb_wait_cond(rbwork, buffer, cpu, full, cond, data)); + +-- +2.43.0 + diff --git a/queue-6.8/series b/queue-6.8/series index cf466576f17..c195e5697bb 100644 --- a/queue-6.8/series +++ b/queue-6.8/series @@ -343,3 +343,6 @@ revert-usb-phy-generic-get-the-vbus-supply.patch usb-cdc-wdm-close-race-between-read-and-workqueue.patch usb-misc-ljca-fix-double-free-in-error-handling-path.patch usb-uas-return-enodev-when-submit-urbs-fail-with-device-not-attached.patch +vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch +ring-buffer-make-wake-once-of-ring_buffer_wait-more-.patch +btrfs-fix-extent-map-leak-in-unexpected-scenario-at-.patch diff --git a/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch b/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch new file mode 100644 index 00000000000..135633fcd7d --- /dev/null +++ b/queue-6.8/vfio-pds-make-sure-migration-file-isn-t-accessed-aft.patch @@ -0,0 +1,110 @@ +From 239660ef1c64bb092520887e11b868aa9e300ef5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Mar 2024 10:21:48 -0800 +Subject: vfio/pds: Make sure migration file isn't accessed after reset + +From: Brett Creeley + +[ Upstream commit 457f7308254756b6e4b8fc3876cb770dcf0e7cc7 ] + +It's possible the migration file is accessed after reset when it has +been cleaned up, especially when it's initiated by the device. This is +because the driver doesn't rip out the filep when cleaning up it only +frees the related page structures and sets its local struct +pds_vfio_lm_file pointer to NULL. This can cause a NULL pointer +dereference, which is shown in the example below during a restore after +a device initiated reset: + +BUG: kernel NULL pointer dereference, address: 000000000000000c +PF: supervisor read access in kernel mode +PF: error_code(0x0000) - not-present page +PGD 0 P4D 0 +Oops: 0000 [#1] PREEMPT SMP NOPTI +RIP: 0010:pds_vfio_get_file_page+0x5d/0xf0 [pds_vfio_pci] +[...] +Call Trace: + + pds_vfio_restore_write+0xf6/0x160 [pds_vfio_pci] + vfs_write+0xc9/0x3f0 + ? __fget_light+0xc9/0x110 + ksys_write+0xb5/0xf0 + __x64_sys_write+0x1a/0x20 + do_syscall_64+0x38/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd +[...] + +Add a disabled flag to the driver's struct pds_vfio_lm_file that gets +set during cleanup. Then make sure to check the flag when the migration +file is accessed via its file_operations. By default this flag will be +false as the memory for struct pds_vfio_lm_file is kzalloc'd, which means +the struct pds_vfio_lm_file is enabled and accessible. Also, since the +file_operations and driver's migration file cleanup happen under the +protection of the same pds_vfio_lm_file.lock, using this flag is thread +safe. + +Fixes: 8512ed256334 ("vfio/pds: Always clear the save/restore FDs on reset") +Reviewed-by: Shannon Nelson +Signed-off-by: Brett Creeley +Link: https://lore.kernel.org/r/20240308182149.22036-2-brett.creeley@amd.com +Signed-off-by: Alex Williamson +Signed-off-by: Sasha Levin +--- + drivers/vfio/pci/pds/lm.c | 13 +++++++++++++ + drivers/vfio/pci/pds/lm.h | 1 + + 2 files changed, 14 insertions(+) + +diff --git a/drivers/vfio/pci/pds/lm.c b/drivers/vfio/pci/pds/lm.c +index 79fe2e66bb498..6b94cc0bf45b4 100644 +--- a/drivers/vfio/pci/pds/lm.c ++++ b/drivers/vfio/pci/pds/lm.c +@@ -92,8 +92,10 @@ static void pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file) + { + mutex_lock(&lm_file->lock); + ++ lm_file->disabled = true; + lm_file->size = 0; + lm_file->alloc_size = 0; ++ lm_file->filep->f_pos = 0; + + /* Free scatter list of file pages */ + sg_free_table(&lm_file->sg_table); +@@ -183,6 +185,12 @@ static ssize_t pds_vfio_save_read(struct file *filp, char __user *buf, + pos = &filp->f_pos; + + mutex_lock(&lm_file->lock); ++ ++ if (lm_file->disabled) { ++ done = -ENODEV; ++ goto out_unlock; ++ } ++ + if (*pos > lm_file->size) { + done = -EINVAL; + goto out_unlock; +@@ -283,6 +291,11 @@ static ssize_t pds_vfio_restore_write(struct file *filp, const char __user *buf, + + mutex_lock(&lm_file->lock); + ++ if (lm_file->disabled) { ++ done = -ENODEV; ++ goto out_unlock; ++ } ++ + while (len) { + size_t page_offset; + struct page *page; +diff --git a/drivers/vfio/pci/pds/lm.h b/drivers/vfio/pci/pds/lm.h +index 13be893198b74..9511b1afc6a11 100644 +--- a/drivers/vfio/pci/pds/lm.h ++++ b/drivers/vfio/pci/pds/lm.h +@@ -27,6 +27,7 @@ struct pds_vfio_lm_file { + struct scatterlist *last_offset_sg; /* Iterator */ + unsigned int sg_last_entry; + unsigned long last_offset; ++ bool disabled; + }; + + struct pds_vfio_pci_device; +-- +2.43.0 +