From: Greg Kroah-Hartman Date: Mon, 7 Nov 2022 12:22:08 +0000 (+0100) Subject: 6.0-stable patches X-Git-Tag: v4.9.333~49 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6bcaddb029092613318ab2d86dbfc0568fcf4cd2;p=thirdparty%2Fkernel%2Fstable-queue.git 6.0-stable patches added patches: btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch --- diff --git a/queue-6.0/btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch b/queue-6.0/btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch new file mode 100644 index 00000000000..1cad9185a61 --- /dev/null +++ b/queue-6.0/btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch @@ -0,0 +1,152 @@ +From 8184620ae21213d51eaf2e0bd4186baacb928172 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Fri, 28 Oct 2022 13:15:35 +0100 +Subject: btrfs: fix lost file sync on direct IO write with nowait and dsync iocb +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Filipe Manana + +commit 8184620ae21213d51eaf2e0bd4186baacb928172 upstream. + +When doing a direct IO write using a iocb with nowait and dsync set, we +end up not syncing the file once the write completes. + +This is because we tell iomap to not call generic_write_sync(), which +would result in calling btrfs_sync_file(), in order to avoid a deadlock +since iomap can call it while we are holding the inode's lock and +btrfs_sync_file() needs to acquire the inode's lock. The deadlock happens +only if the write happens synchronously, when iomap_dio_rw() calls +iomap_dio_complete() before it returns. Instead we do the sync ourselves +at btrfs_do_write_iter(). + +For a nowait write however we can end up not doing the sync ourselves at +at btrfs_do_write_iter() because the write could have been queued, and +therefore we get -EIOCBQUEUED returned from iomap in such case. That makes +us skip the sync call at btrfs_do_write_iter(), as we don't do it for +any error returned from btrfs_direct_write(). We can't simply do the call +even if -EIOCBQUEUED is returned, since that would block the task waiting +for IO, both for the data since there are bios still in progress as well +as potentially blocking when joining a log transaction and when syncing +the log (writing log trees, super blocks, etc). + +So let iomap do the sync call itself and in order to avoid deadlocks for +the case of synchronous writes (without nowait), use __iomap_dio_rw() and +have ourselves call iomap_dio_complete() after unlocking the inode. + +A test case will later be sent for fstests, after this is fixed in Linus' +tree. + +Fixes: 51bd9563b678 ("btrfs: fix deadlock due to page faults during direct IO reads and writes") +Reported-by: Марк Коренберг +Link: https://lore.kernel.org/linux-btrfs/CAEmTpZGRKbzc16fWPvxbr6AfFsQoLmz-Lcg-7OgJOZDboJ+SGQ@mail.gmail.com/ +CC: stable@vger.kernel.org # 6.0+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 5 ++++- + fs/btrfs/file.c | 22 ++++++++++++++++------ + fs/btrfs/inode.c | 14 +++++++++++--- + 3 files changed, 31 insertions(+), 10 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -3407,7 +3407,10 @@ ssize_t btrfs_encoded_read(struct kiocb + ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from, + const struct btrfs_ioctl_encoded_io_args *encoded); + +-ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before); ++ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, ++ size_t done_before); ++struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, ++ size_t done_before); + + extern const struct dentry_operations btrfs_dentry_operations; + +--- a/fs/btrfs/file.c ++++ b/fs/btrfs/file.c +@@ -1889,6 +1889,7 @@ static ssize_t btrfs_direct_write(struct + loff_t endbyte; + ssize_t err; + unsigned int ilock_flags = 0; ++ struct iomap_dio *dio; + + if (iocb->ki_flags & IOCB_NOWAIT) + ilock_flags |= BTRFS_ILOCK_TRY; +@@ -1949,11 +1950,22 @@ relock: + * So here we disable page faults in the iov_iter and then retry if we + * got -EFAULT, faulting in the pages before the retry. + */ +-again: + from->nofault = true; +- err = btrfs_dio_rw(iocb, from, written); ++ dio = btrfs_dio_write(iocb, from, written); + from->nofault = false; + ++ /* ++ * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync ++ * iocb, and that needs to lock the inode. So unlock it before calling ++ * iomap_dio_complete() to avoid a deadlock. ++ */ ++ btrfs_inode_unlock(inode, ilock_flags); ++ ++ if (IS_ERR_OR_NULL(dio)) ++ err = PTR_ERR_OR_ZERO(dio); ++ else ++ err = iomap_dio_complete(dio); ++ + /* No increment (+=) because iomap returns a cumulative value. */ + if (err > 0) + written = err; +@@ -1979,12 +1991,10 @@ again: + } else { + fault_in_iov_iter_readable(from, left); + prev_left = left; +- goto again; ++ goto relock; + } + } + +- btrfs_inode_unlock(inode, ilock_flags); +- + /* + * If 'err' is -ENOTBLK or we have not written all data, then it means + * we must fallback to buffered IO. +@@ -3787,7 +3797,7 @@ again: + */ + pagefault_disable(); + to->nofault = true; +- ret = btrfs_dio_rw(iocb, to, read); ++ ret = btrfs_dio_read(iocb, to, read); + to->nofault = false; + pagefault_enable(); + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -8241,13 +8241,21 @@ static const struct iomap_dio_ops btrfs_ + .bio_set = &btrfs_dio_bioset, + }; + +-ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) ++ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before) + { + struct btrfs_dio_data data; + + return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, +- IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC, +- &data, done_before); ++ IOMAP_DIO_PARTIAL, &data, done_before); ++} ++ ++struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter, ++ size_t done_before) ++{ ++ struct btrfs_dio_data data; ++ ++ return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops, ++ IOMAP_DIO_PARTIAL, &data, done_before); + } + + static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/queue-6.0/series b/queue-6.0/series index 0ffc31242fc..6ac76c59ad7 100644 --- a/queue-6.0/series +++ b/queue-6.0/series @@ -126,3 +126,4 @@ bluetooth-l2cap-fix-attempting-to-access-uninitialized-memory.patch fscrypt-stop-using-keyrings-subsystem-for-fscrypt_master_key.patch fscrypt-fix-keyring-memory-leak-on-mount-failure.patch clk-renesas-r8a779g0-add-sasyncper-clocks.patch +btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch