From: Sasha Levin Date: Sun, 8 Sep 2024 13:30:44 +0000 (-0400) Subject: Drop bcachefs-revert-lockless-buffered-io-path.patch X-Git-Tag: v4.19.322~64 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=9a46b1b249bf1261c288bece0c5505138ddcd984;p=thirdparty%2Fkernel%2Fstable-queue.git Drop bcachefs-revert-lockless-buffered-io-path.patch Signed-off-by: Sasha Levin --- diff --git a/queue-6.10/bcachefs-revert-lockless-buffered-io-path.patch b/queue-6.10/bcachefs-revert-lockless-buffered-io-path.patch deleted file mode 100644 index b3a41e155b4..00000000000 --- a/queue-6.10/bcachefs-revert-lockless-buffered-io-path.patch +++ /dev/null @@ -1,276 +0,0 @@ -From 9b6a94a42a3867dbb7ce7271f1d77958517823fe Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Sat, 31 Aug 2024 17:44:51 -0400 -Subject: bcachefs: Revert lockless buffered IO path - -From: Kent Overstreet - -[ Upstream commit e3e6940940910c2287fe962bdf72015efd4fee81 ] - -We had a report of data corruption on nixos when building installer -images. - -https://github.com/NixOS/nixpkgs/pull/321055#issuecomment-2184131334 - -It seems that writes are being dropped, but only when issued by QEMU, -and possibly only in snapshot mode. It's undetermined if it's write -calls are being dropped or dirty folios. - -Further testing, via minimizing the original patch to just the change -that skips the inode lock on non appends/truncates, reveals that it -really is just not taking the inode lock that causes the corruption: it -has nothing to do with the other logic changes for preserving write -atomicity in corner cases. - -It's also kernel config dependent: it doesn't reproduce with the minimal -kernel config that ktest uses, but it does reproduce with nixos's distro -config. Bisection the kernel config initially pointer the finger at page -migration or compaction, but it appears that was erroneous; we haven't -yet determined what kernel config option actually triggers it. - -Sadly it appears this will have to be reverted since we're getting too -close to release and my plate is full, but we'd _really_ like to fully -debug it. - -My suspicion is that this patch is exposing a preexisting bug - the -inode lock actually covers very little in IO paths, and we have a -different lock (the pagecache add lock) that guards against races with -truncate here. - -Fixes: 7e64c86cdc6c ("bcachefs: Buffered write path now can avoid the inode lock") -Signed-off-by: Kent Overstreet -Signed-off-by: Sasha Levin ---- - fs/bcachefs/errcode.h | 1 - - fs/bcachefs/fs-io-buffered.c | 149 ++++++++++------------------------- - 2 files changed, 40 insertions(+), 110 deletions(-) - -diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h -index a268af3e52bf..81236e678866 100644 ---- a/fs/bcachefs/errcode.h -+++ b/fs/bcachefs/errcode.h -@@ -256,7 +256,6 @@ - x(BCH_ERR_nopromote, nopromote_in_flight) \ - x(BCH_ERR_nopromote, nopromote_no_writes) \ - x(BCH_ERR_nopromote, nopromote_enomem) \ -- x(0, need_inode_lock) \ - x(0, invalid_snapshot_node) \ - x(0, option_needs_open_fs) - -diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c -index 54873ecc635c..98c1e26a313a 100644 ---- a/fs/bcachefs/fs-io-buffered.c -+++ b/fs/bcachefs/fs-io-buffered.c -@@ -802,8 +802,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi) - static int __bch2_buffered_write(struct bch_inode_info *inode, - struct address_space *mapping, - struct iov_iter *iter, -- loff_t pos, unsigned len, -- bool inode_locked) -+ loff_t pos, unsigned len) - { - struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct bch2_folio_reservation res; -@@ -828,15 +827,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, - - BUG_ON(!fs.nr); - -- /* -- * If we're not using the inode lock, we need to lock all the folios for -- * atomiticity of writes vs. other writes: -- */ -- if (!inode_locked && folio_end_pos(darray_last(fs)) < end) { -- ret = -BCH_ERR_need_inode_lock; -- goto out; -- } -- - f = darray_first(fs); - if (pos != folio_pos(f) && !folio_test_uptodate(f)) { - ret = bch2_read_single_folio(f, mapping); -@@ -931,10 +921,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, - end = pos + copied; - - spin_lock(&inode->v.i_lock); -- if (end > inode->v.i_size) { -- BUG_ON(!inode_locked); -+ if (end > inode->v.i_size) - i_size_write(&inode->v, end); -- } - spin_unlock(&inode->v.i_lock); - - f_pos = pos; -@@ -978,68 +966,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct bch_inode_info *inode = file_bch_inode(file); -- loff_t pos; -- bool inode_locked = false; -- ssize_t written = 0, written2 = 0, ret = 0; -- -- /* -- * We don't take the inode lock unless i_size will be changing. Folio -- * locks provide exclusion with other writes, and the pagecache add lock -- * provides exclusion with truncate and hole punching. -- * -- * There is one nasty corner case where atomicity would be broken -- * without great care: when copying data from userspace to the page -- * cache, we do that with faults disable - a page fault would recurse -- * back into the filesystem, taking filesystem locks again, and -- * deadlock; so it's done with faults disabled, and we fault in the user -- * buffer when we aren't holding locks. -- * -- * If we do part of the write, but we then race and in the userspace -- * buffer have been evicted and are no longer resident, then we have to -- * drop our folio locks to re-fault them in, breaking write atomicity. -- * -- * To fix this, we restart the write from the start, if we weren't -- * holding the inode lock. -- * -- * There is another wrinkle after that; if we restart the write from the -- * start, and then get an unrecoverable error, we _cannot_ claim to -- * userspace that we did not write data we actually did - so we must -- * track (written2) the most we ever wrote. -- */ -- -- if ((iocb->ki_flags & IOCB_APPEND) || -- (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) { -- inode_lock(&inode->v); -- inode_locked = true; -- } -- -- ret = generic_write_checks(iocb, iter); -- if (ret <= 0) -- goto unlock; -- -- ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0); -- if (ret) { -- if (!inode_locked) { -- inode_lock(&inode->v); -- inode_locked = true; -- ret = file_remove_privs_flags(file, 0); -- } -- if (ret) -- goto unlock; -- } -- -- ret = file_update_time(file); -- if (ret) -- goto unlock; -- -- pos = iocb->ki_pos; -+ loff_t pos = iocb->ki_pos; -+ ssize_t written = 0; -+ int ret = 0; - - bch2_pagecache_add_get(inode); - -- if (!inode_locked && -- (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) -- goto get_inode_lock; -- - do { - unsigned offset = pos & (PAGE_SIZE - 1); - unsigned bytes = iov_iter_count(iter); -@@ -1064,17 +996,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) - } - } - -- if (unlikely(bytes != iov_iter_count(iter) && !inode_locked)) -- goto get_inode_lock; -- - if (unlikely(fatal_signal_pending(current))) { - ret = -EINTR; - break; - } - -- ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked); -- if (ret == -BCH_ERR_need_inode_lock) -- goto get_inode_lock; -+ ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes); - if (unlikely(ret < 0)) - break; - -@@ -1095,46 +1022,50 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter) - } - pos += ret; - written += ret; -- written2 = max(written, written2); -- -- if (ret != bytes && !inode_locked) -- goto get_inode_lock; - ret = 0; - - balance_dirty_pages_ratelimited(mapping); -- -- if (0) { --get_inode_lock: -- bch2_pagecache_add_put(inode); -- inode_lock(&inode->v); -- inode_locked = true; -- bch2_pagecache_add_get(inode); -- -- iov_iter_revert(iter, written); -- pos -= written; -- written = 0; -- ret = 0; -- } - } while (iov_iter_count(iter)); -- bch2_pagecache_add_put(inode); --unlock: -- if (inode_locked) -- inode_unlock(&inode->v); - -- iocb->ki_pos += written; -+ bch2_pagecache_add_put(inode); - -- ret = max(written, written2) ?: ret; -- if (ret > 0) -- ret = generic_write_sync(iocb, ret); -- return ret; -+ return written ? written : ret; - } - --ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *iter) -+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from) - { -- ssize_t ret = iocb->ki_flags & IOCB_DIRECT -- ? bch2_direct_write(iocb, iter) -- : bch2_buffered_write(iocb, iter); -+ struct file *file = iocb->ki_filp; -+ struct bch_inode_info *inode = file_bch_inode(file); -+ ssize_t ret; -+ -+ if (iocb->ki_flags & IOCB_DIRECT) { -+ ret = bch2_direct_write(iocb, from); -+ goto out; -+ } -+ -+ inode_lock(&inode->v); -+ -+ ret = generic_write_checks(iocb, from); -+ if (ret <= 0) -+ goto unlock; -+ -+ ret = file_remove_privs(file); -+ if (ret) -+ goto unlock; -+ -+ ret = file_update_time(file); -+ if (ret) -+ goto unlock; -+ -+ ret = bch2_buffered_write(iocb, from); -+ if (likely(ret > 0)) -+ iocb->ki_pos += ret; -+unlock: -+ inode_unlock(&inode->v); - -+ if (ret > 0) -+ ret = generic_write_sync(iocb, ret); -+out: - return bch2_err_class(ret); - } - --- -2.43.0 - diff --git a/queue-6.10/series b/queue-6.10/series index 05075470a29..46c646fff88 100644 --- a/queue-6.10/series +++ b/queue-6.10/series @@ -172,7 +172,6 @@ bluetooth-hci_sync-introduce-hci_cmd_sync_run-hci_cm.patch bluetooth-mgmt-fix-not-generating-command-complete-f.patch bcachefs-add-printbuf-arg-to-bch2_parse_mount_opts.patch bcachefs-add-error-code-to-defer-option-parsing.patch -bcachefs-revert-lockless-buffered-io-path.patch hwmon-ltc2991-fix-register-bits-defines.patch scripts-fix-gfp-translate-after-___gfp_-_bits-conver.patch igc-unlock-on-error-in-igc_io_resume.patch