]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.0-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Nov 2022 12:22:08 +0000 (13:22 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Nov 2022 12:22:08 +0000 (13:22 +0100)
added patches:
btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch

queue-6.0/btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch [new file with mode: 0644]
queue-6.0/series

diff --git a/queue-6.0/btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch b/queue-6.0/btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch
new file mode 100644 (file)
index 0000000..1cad918
--- /dev/null
@@ -0,0 +1,152 @@
+From 8184620ae21213d51eaf2e0bd4186baacb928172 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Fri, 28 Oct 2022 13:15:35 +0100
+Subject: btrfs: fix lost file sync on direct IO write with nowait and dsync iocb
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 8184620ae21213d51eaf2e0bd4186baacb928172 upstream.
+
+When doing a direct IO write using a iocb with nowait and dsync set, we
+end up not syncing the file once the write completes.
+
+This is because we tell iomap to not call generic_write_sync(), which
+would result in calling btrfs_sync_file(), in order to avoid a deadlock
+since iomap can call it while we are holding the inode's lock and
+btrfs_sync_file() needs to acquire the inode's lock. The deadlock happens
+only if the write happens synchronously, when iomap_dio_rw() calls
+iomap_dio_complete() before it returns. Instead we do the sync ourselves
+at btrfs_do_write_iter().
+
+For a nowait write however we can end up not doing the sync ourselves at
+at btrfs_do_write_iter() because the write could have been queued, and
+therefore we get -EIOCBQUEUED returned from iomap in such case. That makes
+us skip the sync call at btrfs_do_write_iter(), as we don't do it for
+any error returned from btrfs_direct_write(). We can't simply do the call
+even if -EIOCBQUEUED is returned, since that would block the task waiting
+for IO, both for the data since there are bios still in progress as well
+as potentially blocking when joining a log transaction and when syncing
+the log (writing log trees, super blocks, etc).
+
+So let iomap do the sync call itself and in order to avoid deadlocks for
+the case of synchronous writes (without nowait), use __iomap_dio_rw() and
+have ourselves call iomap_dio_complete() after unlocking the inode.
+
+A test case will later be sent for fstests, after this is fixed in Linus'
+tree.
+
+Fixes: 51bd9563b678 ("btrfs: fix deadlock due to page faults during direct IO reads and writes")
+Reported-by: Марк Коренберг <socketpair@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CAEmTpZGRKbzc16fWPvxbr6AfFsQoLmz-Lcg-7OgJOZDboJ+SGQ@mail.gmail.com/
+CC: stable@vger.kernel.org # 6.0+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h |    5 ++++-
+ fs/btrfs/file.c  |   22 ++++++++++++++++------
+ fs/btrfs/inode.c |   14 +++++++++++---
+ 3 files changed, 31 insertions(+), 10 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3407,7 +3407,10 @@ ssize_t btrfs_encoded_read(struct kiocb
+ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
+                            const struct btrfs_ioctl_encoded_io_args *encoded);
+-ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before);
++ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
++                     size_t done_before);
++struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
++                                size_t done_before);
+ extern const struct dentry_operations btrfs_dentry_operations;
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -1889,6 +1889,7 @@ static ssize_t btrfs_direct_write(struct
+       loff_t endbyte;
+       ssize_t err;
+       unsigned int ilock_flags = 0;
++      struct iomap_dio *dio;
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               ilock_flags |= BTRFS_ILOCK_TRY;
+@@ -1949,11 +1950,22 @@ relock:
+        * So here we disable page faults in the iov_iter and then retry if we
+        * got -EFAULT, faulting in the pages before the retry.
+        */
+-again:
+       from->nofault = true;
+-      err = btrfs_dio_rw(iocb, from, written);
++      dio = btrfs_dio_write(iocb, from, written);
+       from->nofault = false;
++      /*
++       * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync
++       * iocb, and that needs to lock the inode. So unlock it before calling
++       * iomap_dio_complete() to avoid a deadlock.
++       */
++      btrfs_inode_unlock(inode, ilock_flags);
++
++      if (IS_ERR_OR_NULL(dio))
++              err = PTR_ERR_OR_ZERO(dio);
++      else
++              err = iomap_dio_complete(dio);
++
+       /* No increment (+=) because iomap returns a cumulative value. */
+       if (err > 0)
+               written = err;
+@@ -1979,12 +1991,10 @@ again:
+               } else {
+                       fault_in_iov_iter_readable(from, left);
+                       prev_left = left;
+-                      goto again;
++                      goto relock;
+               }
+       }
+-      btrfs_inode_unlock(inode, ilock_flags);
+-
+       /*
+        * If 'err' is -ENOTBLK or we have not written all data, then it means
+        * we must fallback to buffered IO.
+@@ -3787,7 +3797,7 @@ again:
+        */
+       pagefault_disable();
+       to->nofault = true;
+-      ret = btrfs_dio_rw(iocb, to, read);
++      ret = btrfs_dio_read(iocb, to, read);
+       to->nofault = false;
+       pagefault_enable();
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -8241,13 +8241,21 @@ static const struct iomap_dio_ops btrfs_
+       .bio_set                = &btrfs_dio_bioset,
+ };
+-ssize_t btrfs_dio_rw(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
++ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter, size_t done_before)
+ {
+       struct btrfs_dio_data data;
+       return iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+-                          IOMAP_DIO_PARTIAL | IOMAP_DIO_NOSYNC,
+-                          &data, done_before);
++                          IOMAP_DIO_PARTIAL, &data, done_before);
++}
++
++struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
++                                size_t done_before)
++{
++      struct btrfs_dio_data data;
++
++      return __iomap_dio_rw(iocb, iter, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
++                          IOMAP_DIO_PARTIAL, &data, done_before);
+ }
+ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
index 0ffc31242fceb3a67e6e30d5df671a78d0e15a37..6ac76c59ad772c4362d86b914a392523403898bb 100644 (file)
@@ -126,3 +126,4 @@ bluetooth-l2cap-fix-attempting-to-access-uninitialized-memory.patch
 fscrypt-stop-using-keyrings-subsystem-for-fscrypt_master_key.patch
 fscrypt-fix-keyring-memory-leak-on-mount-failure.patch
 clk-renesas-r8a779g0-add-sasyncper-clocks.patch
+btrfs-fix-lost-file-sync-on-direct-io-write-with-nowait-and-dsync-iocb.patch