From e5e443f2866d7197efb3314f04efe3702bbae56f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 12 Aug 2024 16:23:12 +0200 Subject: [PATCH] 6.10-stable patches added patches: btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch idpf-fix-memleak-in-vport-interrupt-configuration.patch --- ...ult-in-during-direct-io-append-write.patch | 252 ++++++++++++++++++ ...eak-in-vport-interrupt-configuration.patch | 91 +++++++ queue-6.10/series | 2 + 3 files changed, 345 insertions(+) create mode 100644 queue-6.10/btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch create mode 100644 queue-6.10/idpf-fix-memleak-in-vport-interrupt-configuration.patch diff --git a/queue-6.10/btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch b/queue-6.10/btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch new file mode 100644 index 00000000000..bdeebc2972f --- /dev/null +++ b/queue-6.10/btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch @@ -0,0 +1,252 @@ +From 939b656bc8ab203fdbde26ccac22bcb7f0985be5 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 26 Jul 2024 11:12:52 +0100 +Subject: btrfs: fix corruption after buffer fault in during direct IO append write + +From: Filipe Manana + +commit 939b656bc8ab203fdbde26ccac22bcb7f0985be5 upstream. + +During an append (O_APPEND write flag) direct IO write if the input buffer +was not previously faulted in, we can corrupt the file in a way that the +final size is unexpected and it includes an unexpected hole. + +The problem happens like this: + +1) We have an empty file, with size 0, for example; + +2) We do an O_APPEND direct IO with a length of 4096 bytes and the input + buffer is not currently faulted in; + +3) We enter btrfs_direct_write(), lock the inode and call + generic_write_checks(), which calls generic_write_checks_count(), and + that function sets the iocb position to 0 with the following code: + + if (iocb->ki_flags & IOCB_APPEND) + iocb->ki_pos = i_size_read(inode); + +4) We call btrfs_dio_write() and enter into iomap, which will end up + calling btrfs_dio_iomap_begin() and that calls + btrfs_get_blocks_direct_write(), where we update the i_size of the + inode to 4096 bytes; + +5) After btrfs_dio_iomap_begin() returns, iomap will attempt to access + the page of the write input buffer (at iomap_dio_bio_iter(), with a + call to bio_iov_iter_get_pages()) and fail with -EFAULT, which gets + returned to btrfs at btrfs_direct_write() via btrfs_dio_write(); + +6) At btrfs_direct_write() we get the -EFAULT error, unlock the inode, + fault in the write buffer and then goto to the label 'relock'; + +7) We lock again the inode, do all the necessary checks again and call + again generic_write_checks(), which calls generic_write_checks_count() + again, and there we set the iocb's position to 4K, which is the current + i_size of the inode, with the following code pointed above: + + if (iocb->ki_flags & IOCB_APPEND) + iocb->ki_pos = i_size_read(inode); + +8) Then we go again to btrfs_dio_write() and enter iomap and the write + succeeds, but it wrote to the file range [4K, 8K), leaving a hole in + the [0, 4K) range and an i_size of 8K, which goes against the + expectations of having the data written to the range [0, 4K) and get an + i_size of 4K. + +Fix this by not unlocking the inode before faulting in the input buffer, +in case we get -EFAULT or an incomplete write, and not jumping to the +'relock' label after faulting in the buffer - instead jump to a location +immediately before calling iomap, skipping all the write checks and +relocking. This solves this problem and it's fine even in case the input +buffer is memory mapped to the same file range, since only holding the +range locked in the inode's io tree can cause a deadlock, it's safe to +keep the inode lock (VFS lock), as was fixed and described in commit +51bd9563b678 ("btrfs: fix deadlock due to page faults during direct IO +reads and writes"). + +A sample reproducer provided by a reporter is the following: + + $ cat test.c + #ifndef _GNU_SOURCE + #define _GNU_SOURCE + #endif + + #include + #include + #include + #include + #include + + int main(int argc, char *argv[]) + { + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + int fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT | + O_APPEND, 0644); + if (fd < 0) { + perror("creating test file"); + return 1; + } + + char *buf = mmap(NULL, 4096, PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ssize_t ret = write(fd, buf, 4096); + if (ret < 0) { + perror("pwritev2"); + return 1; + } + + struct stat stbuf; + ret = fstat(fd, &stbuf); + if (ret < 0) { + perror("stat"); + return 1; + } + + printf("size: %llu\n", (unsigned long long)stbuf.st_size); + return stbuf.st_size == 4096 ? 0 : 1; + } + +A test case for fstests will be sent soon. + +Reported-by: Hanna Czenczek +Link: https://lore.kernel.org/linux-btrfs/0b841d46-12fe-4e64-9abb-871d8d0de271@redhat.com/ +Fixes: 8184620ae212 ("btrfs: fix lost file sync on direct IO write with nowait and dsync iocb") +CC: stable@vger.kernel.org # 6.1+ +Tested-by: Hanna Czenczek +Reviewed-by: Josef Bacik +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 1 + + fs/btrfs/file.c | 55 ++++++++++++++++++++++++++++++++++++++++++------------- + 2 files changed, 43 insertions(+), 13 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -457,6 +457,7 @@ struct btrfs_file_private { + void *filldir_buf; + u64 last_index; + struct extent_state *llseek_cached_state; ++ bool fsync_skip_inode_lock; + }; + + static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -1550,21 +1550,37 @@ relock: + * So here we disable page faults in the iov_iter and then retry if we + * got -EFAULT, faulting in the pages before the retry. + */ ++again: + from->nofault = true; + dio = btrfs_dio_write(iocb, from, written); + from->nofault = false; + +- /* +- * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync +- * iocb, and that needs to lock the inode. So unlock it before calling +- * iomap_dio_complete() to avoid a deadlock. +- */ +- btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); +- +- if (IS_ERR_OR_NULL(dio)) ++ if (IS_ERR_OR_NULL(dio)) { + ret = PTR_ERR_OR_ZERO(dio); +- else ++ } else { ++ struct btrfs_file_private stack_private = { 0 }; ++ struct btrfs_file_private *private; ++ const bool have_private = (file->private_data != NULL); ++ ++ if (!have_private) ++ file->private_data = &stack_private; ++ ++ /* ++ * If we have a synchoronous write, we must make sure the fsync ++ * triggered by the iomap_dio_complete() call below doesn't ++ * deadlock on the inode lock - we are already holding it and we ++ * can't call it after unlocking because we may need to complete ++ * partial writes due to the input buffer (or parts of it) not ++ * being already faulted in. ++ */ ++ private = file->private_data; ++ private->fsync_skip_inode_lock = true; + ret = iomap_dio_complete(dio); ++ private->fsync_skip_inode_lock = false; ++ ++ if (!have_private) ++ file->private_data = NULL; ++ } + + /* No increment (+=) because iomap returns a cumulative value. */ + if (ret > 0) +@@ -1591,10 +1607,12 @@ relock: + } else { + fault_in_iov_iter_readable(from, left); + prev_left = left; +- goto relock; ++ goto again; + } + } + ++ btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); ++ + /* + * If 'ret' is -ENOTBLK or we have not written all data, then it means + * we must fallback to buffered IO. +@@ -1793,6 +1811,7 @@ static inline bool skip_inode_logging(co + */ + int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) + { ++ struct btrfs_file_private *private = file->private_data; + struct dentry *dentry = file_dentry(file); + struct inode *inode = d_inode(dentry); + struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); +@@ -1802,6 +1821,7 @@ int btrfs_sync_file(struct file *file, l + int ret = 0, err; + u64 len; + bool full_sync; ++ const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); + + trace_btrfs_sync_file(file, datasync); + +@@ -1829,7 +1849,10 @@ int btrfs_sync_file(struct file *file, l + if (ret) + goto out; + +- btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); ++ if (skip_ilock) ++ down_write(&BTRFS_I(inode)->i_mmap_lock); ++ else ++ btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + + atomic_inc(&root->log_batch); + +@@ -1853,7 +1876,10 @@ int btrfs_sync_file(struct file *file, l + */ + ret = start_ordered_ops(inode, start, end); + if (ret) { +- btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); ++ if (skip_ilock) ++ up_write(&BTRFS_I(inode)->i_mmap_lock); ++ else ++ btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + goto out; + } + +@@ -1982,7 +2008,10 @@ int btrfs_sync_file(struct file *file, l + * file again, but that will end up using the synchronization + * inside btrfs_sync_log to keep things safe. + */ +- btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); ++ if (skip_ilock) ++ up_write(&BTRFS_I(inode)->i_mmap_lock); ++ else ++ btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + + if (ret == BTRFS_NO_LOG_SYNC) { + ret = btrfs_end_transaction(trans); diff --git a/queue-6.10/idpf-fix-memleak-in-vport-interrupt-configuration.patch b/queue-6.10/idpf-fix-memleak-in-vport-interrupt-configuration.patch new file mode 100644 index 00000000000..b10eb03ba12 --- /dev/null +++ b/queue-6.10/idpf-fix-memleak-in-vport-interrupt-configuration.patch @@ -0,0 +1,91 @@ +From 3cc88e8405b8d55e0ff035e31971aadd6baee2b6 Mon Sep 17 00:00:00 2001 +From: Michal Kubiak +Date: Tue, 6 Aug 2024 15:09:21 -0700 +Subject: idpf: fix memleak in vport interrupt configuration + +From: Michal Kubiak + +commit 3cc88e8405b8d55e0ff035e31971aadd6baee2b6 upstream. + +The initialization of vport interrupt consists of two functions: + 1) idpf_vport_intr_init() where a generic configuration is done + 2) idpf_vport_intr_req_irq() where the irq for each q_vector is + requested. + +The first function used to create a base name for each interrupt using +"kasprintf()" call. Unfortunately, although that call allocated memory +for a text buffer, that memory was never released. + +Fix this by removing creating the interrupt base name in 1). +Instead, always create a full interrupt name in the function 2), because +there is no need to create a base name separately, considering that the +function 2) is never called out of idpf_vport_intr_init() context. + +Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport") +Cc: stable@vger.kernel.org # 6.7 +Signed-off-by: Michal Kubiak +Reviewed-by: Pavan Kumar Linga +Signed-off-by: Alexander Lobakin +Reviewed-by: Simon Horman +Tested-by: Krishneil Singh +Signed-off-by: Tony Nguyen +Link: https://patch.msgid.link/20240806220923.3359860-3-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/idpf/idpf_txrx.c | 19 ++++++++----------- + 1 file changed, 8 insertions(+), 11 deletions(-) + +--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c ++++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c +@@ -3614,13 +3614,15 @@ void idpf_vport_intr_update_itr_ena_irq( + /** + * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport + * @vport: main vport structure +- * @basename: name for the vector + */ +-static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) ++static int idpf_vport_intr_req_irq(struct idpf_vport *vport) + { + struct idpf_adapter *adapter = vport->adapter; ++ const char *drv_name, *if_name, *vec_name; + int vector, err, irq_num, vidx; +- const char *vec_name; ++ ++ drv_name = dev_driver_string(&adapter->pdev->dev); ++ if_name = netdev_name(vport->netdev); + + for (vector = 0; vector < vport->num_q_vectors; vector++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; +@@ -3637,8 +3639,8 @@ static int idpf_vport_intr_req_irq(struc + else + continue; + +- q_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", +- basename, vec_name, vidx); ++ q_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, ++ if_name, vec_name, vidx); + + err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, + q_vector->name, q_vector); +@@ -4148,7 +4150,6 @@ error: + */ + int idpf_vport_intr_init(struct idpf_vport *vport) + { +- char *int_name; + int err; + + err = idpf_vport_intr_init_vec_idx(vport); +@@ -4162,11 +4163,7 @@ int idpf_vport_intr_init(struct idpf_vpo + if (err) + goto unroll_vectors_alloc; + +- int_name = kasprintf(GFP_KERNEL, "%s-%s", +- dev_driver_string(&vport->adapter->pdev->dev), +- vport->netdev->name); +- +- err = idpf_vport_intr_req_irq(vport, int_name); ++ err = idpf_vport_intr_req_irq(vport); + if (err) + goto unroll_vectors_alloc; + diff --git a/queue-6.10/series b/queue-6.10/series index ff0e2d1bf8f..6e924f8b68d 100644 --- a/queue-6.10/series +++ b/queue-6.10/series @@ -250,3 +250,5 @@ mptcp-pm-deny-endp-with-signal-subflow-port.patch block-use-the-right-type-for-stub-rq_integrity_vec.patch revert-drm-amd-display-handle-hpd_irq-for-internal-link.patch revert-drm-amd-display-add-null-check-for-afb-before-dereferencing-in-amdgpu_dm_plane_handle_cursor_update.patch +btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch +idpf-fix-memleak-in-vport-interrupt-configuration.patch -- 2.47.3