]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Drop bcachefs-revert-lockless-buffered-io-path.patch
authorSasha Levin <sashal@kernel.org>
Sun, 8 Sep 2024 13:30:44 +0000 (09:30 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 8 Sep 2024 13:30:44 +0000 (09:30 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-6.10/bcachefs-revert-lockless-buffered-io-path.patch [deleted file]
queue-6.10/series

diff --git a/queue-6.10/bcachefs-revert-lockless-buffered-io-path.patch b/queue-6.10/bcachefs-revert-lockless-buffered-io-path.patch
deleted file mode 100644 (file)
index b3a41e1..0000000
+++ /dev/null
@@ -1,276 +0,0 @@
-From 9b6a94a42a3867dbb7ce7271f1d77958517823fe Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Sat, 31 Aug 2024 17:44:51 -0400
-Subject: bcachefs: Revert lockless buffered IO path
-
-From: Kent Overstreet <kent.overstreet@linux.dev>
-
-[ Upstream commit e3e6940940910c2287fe962bdf72015efd4fee81 ]
-
-We had a report of data corruption on nixos when building installer
-images.
-
-https://github.com/NixOS/nixpkgs/pull/321055#issuecomment-2184131334
-
-It seems that writes are being dropped, but only when issued by QEMU,
-and possibly only in snapshot mode. It's undetermined if it's write
-calls are being dropped or dirty folios.
-
-Further testing, via minimizing the original patch to just the change
-that skips the inode lock on non appends/truncates, reveals that it
-really is just not taking the inode lock that causes the corruption: it
-has nothing to do with the other logic changes for preserving write
-atomicity in corner cases.
-
-It's also kernel config dependent: it doesn't reproduce with the minimal
-kernel config that ktest uses, but it does reproduce with nixos's distro
-config. Bisection the kernel config initially pointer the finger at page
-migration or compaction, but it appears that was erroneous; we haven't
-yet determined what kernel config option actually triggers it.
-
-Sadly it appears this will have to be reverted since we're getting too
-close to release and my plate is full, but we'd _really_ like to fully
-debug it.
-
-My suspicion is that this patch is exposing a preexisting bug - the
-inode lock actually covers very little in IO paths, and we have a
-different lock (the pagecache add lock) that guards against races with
-truncate here.
-
-Fixes: 7e64c86cdc6c ("bcachefs: Buffered write path now can avoid the inode lock")
-Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- fs/bcachefs/errcode.h        |   1 -
- fs/bcachefs/fs-io-buffered.c | 149 ++++++++++-------------------------
- 2 files changed, 40 insertions(+), 110 deletions(-)
-
-diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
-index a268af3e52bf..81236e678866 100644
---- a/fs/bcachefs/errcode.h
-+++ b/fs/bcachefs/errcode.h
-@@ -256,7 +256,6 @@
-       x(BCH_ERR_nopromote,            nopromote_in_flight)                    \
-       x(BCH_ERR_nopromote,            nopromote_no_writes)                    \
-       x(BCH_ERR_nopromote,            nopromote_enomem)                       \
--      x(0,                            need_inode_lock)                        \
-       x(0,                            invalid_snapshot_node)                  \
-       x(0,                            option_needs_open_fs)
-diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
-index 54873ecc635c..98c1e26a313a 100644
---- a/fs/bcachefs/fs-io-buffered.c
-+++ b/fs/bcachefs/fs-io-buffered.c
-@@ -802,8 +802,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi)
- static int __bch2_buffered_write(struct bch_inode_info *inode,
-                                struct address_space *mapping,
-                                struct iov_iter *iter,
--                               loff_t pos, unsigned len,
--                               bool inode_locked)
-+                               loff_t pos, unsigned len)
- {
-       struct bch_fs *c = inode->v.i_sb->s_fs_info;
-       struct bch2_folio_reservation res;
-@@ -828,15 +827,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
-       BUG_ON(!fs.nr);
--      /*
--       * If we're not using the inode lock, we need to lock all the folios for
--       * atomiticity of writes vs. other writes:
--       */
--      if (!inode_locked && folio_end_pos(darray_last(fs)) < end) {
--              ret = -BCH_ERR_need_inode_lock;
--              goto out;
--      }
--
-       f = darray_first(fs);
-       if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
-               ret = bch2_read_single_folio(f, mapping);
-@@ -931,10 +921,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
-       end = pos + copied;
-       spin_lock(&inode->v.i_lock);
--      if (end > inode->v.i_size) {
--              BUG_ON(!inode_locked);
-+      if (end > inode->v.i_size)
-               i_size_write(&inode->v, end);
--      }
-       spin_unlock(&inode->v.i_lock);
-       f_pos = pos;
-@@ -978,68 +966,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
-       struct file *file = iocb->ki_filp;
-       struct address_space *mapping = file->f_mapping;
-       struct bch_inode_info *inode = file_bch_inode(file);
--      loff_t pos;
--      bool inode_locked = false;
--      ssize_t written = 0, written2 = 0, ret = 0;
--
--      /*
--       * We don't take the inode lock unless i_size will be changing. Folio
--       * locks provide exclusion with other writes, and the pagecache add lock
--       * provides exclusion with truncate and hole punching.
--       *
--       * There is one nasty corner case where atomicity would be broken
--       * without great care: when copying data from userspace to the page
--       * cache, we do that with faults disable - a page fault would recurse
--       * back into the filesystem, taking filesystem locks again, and
--       * deadlock; so it's done with faults disabled, and we fault in the user
--       * buffer when we aren't holding locks.
--       *
--       * If we do part of the write, but we then race and in the userspace
--       * buffer have been evicted and are no longer resident, then we have to
--       * drop our folio locks to re-fault them in, breaking write atomicity.
--       *
--       * To fix this, we restart the write from the start, if we weren't
--       * holding the inode lock.
--       *
--       * There is another wrinkle after that; if we restart the write from the
--       * start, and then get an unrecoverable error, we _cannot_ claim to
--       * userspace that we did not write data we actually did - so we must
--       * track (written2) the most we ever wrote.
--       */
--
--      if ((iocb->ki_flags & IOCB_APPEND) ||
--          (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) {
--              inode_lock(&inode->v);
--              inode_locked = true;
--      }
--
--      ret = generic_write_checks(iocb, iter);
--      if (ret <= 0)
--              goto unlock;
--
--      ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0);
--      if (ret) {
--              if (!inode_locked) {
--                      inode_lock(&inode->v);
--                      inode_locked = true;
--                      ret = file_remove_privs_flags(file, 0);
--              }
--              if (ret)
--                      goto unlock;
--      }
--
--      ret = file_update_time(file);
--      if (ret)
--              goto unlock;
--
--      pos = iocb->ki_pos;
-+      loff_t pos = iocb->ki_pos;
-+      ssize_t written = 0;
-+      int ret = 0;
-       bch2_pagecache_add_get(inode);
--      if (!inode_locked &&
--          (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v)))
--              goto get_inode_lock;
--
-       do {
-               unsigned offset = pos & (PAGE_SIZE - 1);
-               unsigned bytes = iov_iter_count(iter);
-@@ -1064,17 +996,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
-                       }
-               }
--              if (unlikely(bytes != iov_iter_count(iter) && !inode_locked))
--                      goto get_inode_lock;
--
-               if (unlikely(fatal_signal_pending(current))) {
-                       ret = -EINTR;
-                       break;
-               }
--              ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked);
--              if (ret == -BCH_ERR_need_inode_lock)
--                      goto get_inode_lock;
-+              ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
-               if (unlikely(ret < 0))
-                       break;
-@@ -1095,46 +1022,50 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
-               }
-               pos += ret;
-               written += ret;
--              written2 = max(written, written2);
--
--              if (ret != bytes && !inode_locked)
--                      goto get_inode_lock;
-               ret = 0;
-               balance_dirty_pages_ratelimited(mapping);
--
--              if (0) {
--get_inode_lock:
--                      bch2_pagecache_add_put(inode);
--                      inode_lock(&inode->v);
--                      inode_locked = true;
--                      bch2_pagecache_add_get(inode);
--
--                      iov_iter_revert(iter, written);
--                      pos -= written;
--                      written = 0;
--                      ret = 0;
--              }
-       } while (iov_iter_count(iter));
--      bch2_pagecache_add_put(inode);
--unlock:
--      if (inode_locked)
--              inode_unlock(&inode->v);
--      iocb->ki_pos += written;
-+      bch2_pagecache_add_put(inode);
--      ret = max(written, written2) ?: ret;
--      if (ret > 0)
--              ret = generic_write_sync(iocb, ret);
--      return ret;
-+      return written ? written : ret;
- }
--ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *iter)
-+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
- {
--      ssize_t ret = iocb->ki_flags & IOCB_DIRECT
--              ? bch2_direct_write(iocb, iter)
--              : bch2_buffered_write(iocb, iter);
-+      struct file *file = iocb->ki_filp;
-+      struct bch_inode_info *inode = file_bch_inode(file);
-+      ssize_t ret;
-+
-+      if (iocb->ki_flags & IOCB_DIRECT) {
-+              ret = bch2_direct_write(iocb, from);
-+              goto out;
-+      }
-+
-+      inode_lock(&inode->v);
-+
-+      ret = generic_write_checks(iocb, from);
-+      if (ret <= 0)
-+              goto unlock;
-+
-+      ret = file_remove_privs(file);
-+      if (ret)
-+              goto unlock;
-+
-+      ret = file_update_time(file);
-+      if (ret)
-+              goto unlock;
-+
-+      ret = bch2_buffered_write(iocb, from);
-+      if (likely(ret > 0))
-+              iocb->ki_pos += ret;
-+unlock:
-+      inode_unlock(&inode->v);
-+      if (ret > 0)
-+              ret = generic_write_sync(iocb, ret);
-+out:
-       return bch2_err_class(ret);
- }
--- 
-2.43.0
-
index 05075470a298efa0273c25eb9baad0447982e750..46c646fff888dea61b2a3b0a641bb3d8d1d96457 100644 (file)
@@ -172,7 +172,6 @@ bluetooth-hci_sync-introduce-hci_cmd_sync_run-hci_cm.patch
 bluetooth-mgmt-fix-not-generating-command-complete-f.patch
 bcachefs-add-printbuf-arg-to-bch2_parse_mount_opts.patch
 bcachefs-add-error-code-to-defer-option-parsing.patch
-bcachefs-revert-lockless-buffered-io-path.patch
 hwmon-ltc2991-fix-register-bits-defines.patch
 scripts-fix-gfp-translate-after-___gfp_-_bits-conver.patch
 igc-unlock-on-error-in-igc_io_resume.patch