From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 12 Aug 2023 21:31:57 +0000 (+0200)
Subject: 5.15-stable patches
X-Git-Tag: v4.14.323~25
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=5483e9214ce01b043469d863cbf56733d4b8419f;p=thirdparty%2Fkernel%2Fstable-queue.git

5.15-stable patches

added patches:
	btrfs-don-t-stop-integrity-writeback-too-early.patch
	btrfs-don-t-wait-for-writeback-on-clean-pages-in-extent_write_cache_pages.patch
	btrfs-exit-gracefully-if-reloc-roots-don-t-match.patch
	btrfs-reject-invalid-reloc-tree-root-keys-with-stack-dump.patch
	btrfs-set-cache_block_group_error-if-we-find-an-error.patch
---

diff --git a/queue-5.15/btrfs-don-t-stop-integrity-writeback-too-early.patch b/queue-5.15/btrfs-don-t-stop-integrity-writeback-too-early.patch
new file mode 100644
index 00000000000..1a2a524c612
--- /dev/null
+++ b/queue-5.15/btrfs-don-t-stop-integrity-writeback-too-early.patch
@@ -0,0 +1,50 @@
+From effa24f689ce0948f68c754991a445a8d697d3a8 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 24 Jul 2023 06:26:53 -0700
+Subject: btrfs: don't stop integrity writeback too early
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit effa24f689ce0948f68c754991a445a8d697d3a8 upstream.
+
+extent_write_cache_pages stops writing pages as soon as nr_to_write hits
+zero.  That is the right thing for opportunistic writeback, but incorrect
+for data integrity writeback, which needs to ensure that no dirty pages
+are left in the range.  Thus only stop the writeback for WB_SYNC_NONE
+if nr_to_write hits 0.
+
+This is a port of write_cache_pages changes in commit 05fe478dd04e
+("mm: write_cache_pages integrity fix").
+
+Note that I've only trigger the problem with other changes to the btrfs
+writeback code, but this condition seems worthwhile fixing anyway.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ updated comment ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -4844,11 +4844,12 @@ retry:
+ 			}
+ 
+ 			/*
+-			 * the filesystem may choose to bump up nr_to_write.
++			 * The filesystem may choose to bump up nr_to_write.
+ 			 * We have to make sure to honor the new nr_to_write
+-			 * at any time
++			 * at any time.
+ 			 */
+-			nr_to_write_done = wbc->nr_to_write <= 0;
++			nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
++					    wbc->nr_to_write <= 0);
+ 		}
+ 		pagevec_release(&pvec);
+ 		cond_resched();
diff --git a/queue-5.15/btrfs-don-t-wait-for-writeback-on-clean-pages-in-extent_write_cache_pages.patch b/queue-5.15/btrfs-don-t-wait-for-writeback-on-clean-pages-in-extent_write_cache_pages.patch
new file mode 100644
index 00000000000..7906f4ff3db
--- /dev/null
+++ b/queue-5.15/btrfs-don-t-wait-for-writeback-on-clean-pages-in-extent_write_cache_pages.patch
@@ -0,0 +1,45 @@
+From 5c25699871112853f231e52d51c576d5c759a020 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 24 Jul 2023 06:26:54 -0700
+Subject: btrfs: don't wait for writeback on clean pages in extent_write_cache_pages
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 5c25699871112853f231e52d51c576d5c759a020 upstream.
+
+__extent_writepage could have started on more pages than the one it was
+called for.  This happens regularly for zoned file systems, and in theory
+could happen for compressed I/O if the worker thread was executed very
+quickly. For such pages extent_write_cache_pages waits for writeback
+to complete before moving on to the next page, which is highly inefficient
+as it blocks the flusher thread.
+
+Port over the PageDirty check that was added to write_cache_pages in
+commit 515f4a037fb ("mm: write_cache_pages optimise page cleaning") to
+fix this.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -5013,6 +5013,12 @@ retry:
+ 				continue;
+ 			}
+ 
++			if (!folio_test_dirty(folio)) {
++				/* Someone wrote it for us. */
++				folio_unlock(folio);
++				continue;
++			}
++
+ 			if (wbc->sync_mode != WB_SYNC_NONE) {
+ 				if (PageWriteback(page)) {
+ 					ret = flush_write_bio(epd);
diff --git a/queue-5.15/btrfs-exit-gracefully-if-reloc-roots-don-t-match.patch b/queue-5.15/btrfs-exit-gracefully-if-reloc-roots-don-t-match.patch
new file mode 100644
index 00000000000..2d5a06f118b
--- /dev/null
+++ b/queue-5.15/btrfs-exit-gracefully-if-reloc-roots-don-t-match.patch
@@ -0,0 +1,116 @@
+From 05d7ce504545f7874529701664c90814ca645c5d Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 3 Aug 2023 17:20:42 +0800
+Subject: btrfs: exit gracefully if reloc roots don't match
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 05d7ce504545f7874529701664c90814ca645c5d upstream.
+
+[BUG]
+Syzbot reported a crash that an ASSERT() got triggered inside
+prepare_to_merge().
+
+[CAUSE]
+The root cause of the triggered ASSERT() is we can have a race between
+quota tree creation and relocation.
+
+This leads us to create a duplicated quota tree in the
+btrfs_read_fs_root() path, and since it's treated as fs tree, it would
+have ROOT_SHAREABLE flag, causing us to create a reloc tree for it.
+
+The bug itself is fixed by a dedicated patch for it, but this already
+taught us the ASSERT() is not something straightforward for
+developers.
+
+[ENHANCEMENT]
+Instead of using an ASSERT(), let's handle it gracefully and output
+extra info about the mismatch reloc roots to help debug.
+
+Also with the above ASSERT() removed, we can trigger ASSERT(0)s inside
+merge_reloc_roots() later.
+Also replace those ASSERT(0)s with WARN_ON()s.
+
+CC: stable@vger.kernel.org # 5.15+
+Reported-by: syzbot+ae97a827ae1c3336bbb4@syzkaller.appspotmail.com
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/relocation.c |   45 +++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 37 insertions(+), 8 deletions(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1905,7 +1905,39 @@ again:
+ 				err = PTR_ERR(root);
+ 			break;
+ 		}
+-		ASSERT(root->reloc_root == reloc_root);
++
++		if (unlikely(root->reloc_root != reloc_root)) {
++			if (root->reloc_root) {
++				btrfs_err(fs_info,
++"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu",
++					  root->root_key.objectid,
++					  root->reloc_root->root_key.objectid,
++					  root->reloc_root->root_key.type,
++					  root->reloc_root->root_key.offset,
++					  btrfs_root_generation(
++						  &root->reloc_root->root_item),
++					  reloc_root->root_key.objectid,
++					  reloc_root->root_key.type,
++					  reloc_root->root_key.offset,
++					  btrfs_root_generation(
++						  &reloc_root->root_item));
++			} else {
++				btrfs_err(fs_info,
++"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu",
++					  root->root_key.objectid,
++					  reloc_root->root_key.objectid,
++					  reloc_root->root_key.type,
++					  reloc_root->root_key.offset,
++					  btrfs_root_generation(
++						  &reloc_root->root_item));
++			}
++			list_add(&reloc_root->root_list, &reloc_roots);
++			btrfs_put_root(root);
++			btrfs_abort_transaction(trans, -EUCLEAN);
++			if (!err)
++				err = -EUCLEAN;
++			break;
++		}
+ 
+ 		/*
+ 		 * set reference count to 1, so btrfs_recover_relocation
+@@ -1978,7 +2010,7 @@ again:
+ 		root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
+ 					 false);
+ 		if (btrfs_root_refs(&reloc_root->root_item) > 0) {
+-			if (IS_ERR(root)) {
++			if (WARN_ON(IS_ERR(root))) {
+ 				/*
+ 				 * For recovery we read the fs roots on mount,
+ 				 * and if we didn't find the root then we marked
+@@ -1987,17 +2019,14 @@ again:
+ 				 * memory.  However there's no reason we can't
+ 				 * handle the error properly here just in case.
+ 				 */
+-				ASSERT(0);
+ 				ret = PTR_ERR(root);
+ 				goto out;
+ 			}
+-			if (root->reloc_root != reloc_root) {
++			if (WARN_ON(root->reloc_root != reloc_root)) {
+ 				/*
+-				 * This is actually impossible without something
+-				 * going really wrong (like weird race condition
+-				 * or cosmic rays).
++				 * This can happen if on-disk metadata has some
++				 * corruption, e.g. bad reloc tree key offset.
+ 				 */
+-				ASSERT(0);
+ 				ret = -EINVAL;
+ 				goto out;
+ 			}
diff --git a/queue-5.15/btrfs-reject-invalid-reloc-tree-root-keys-with-stack-dump.patch b/queue-5.15/btrfs-reject-invalid-reloc-tree-root-keys-with-stack-dump.patch
new file mode 100644
index 00000000000..011d255c009
--- /dev/null
+++ b/queue-5.15/btrfs-reject-invalid-reloc-tree-root-keys-with-stack-dump.patch
@@ -0,0 +1,84 @@
+From 6ebcd021c92b8e4b904552e4d87283032100796d Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 3 Aug 2023 17:20:43 +0800
+Subject: btrfs: reject invalid reloc tree root keys with stack dump
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 6ebcd021c92b8e4b904552e4d87283032100796d upstream.
+
+[BUG]
+Syzbot reported a crash that an ASSERT() got triggered inside
+prepare_to_merge().
+
+That ASSERT() makes sure the reloc tree is properly pointed back by its
+subvolume tree.
+
+[CAUSE]
+After more debugging output, it turns out we had an invalid reloc tree:
+
+  BTRFS error (device loop1): reloc tree mismatch, root 8 has no reloc root, expect reloc root key (-8, 132, 8) gen 17
+
+Note the above root key is (TREE_RELOC_OBJECTID, ROOT_ITEM,
+QUOTA_TREE_OBJECTID), meaning it's a reloc tree for quota tree.
+
+But reloc trees can only exist for subvolumes, as for non-subvolume
+trees, we just COW the involved tree block, no need to create a reloc
+tree since those tree blocks won't be shared with other trees.
+
+Only subvolumes tree can share tree blocks with other trees (thus they
+have BTRFS_ROOT_SHAREABLE flag).
+
+Thus this new debug output proves my previous assumption that corrupted
+on-disk data can trigger that ASSERT().
+
+[FIX]
+Besides the dedicated fix and the graceful exit, also let tree-checker to
+check such root keys, to make sure reloc trees can only exist for subvolumes.
+
+CC: stable@vger.kernel.org # 5.15+
+Reported-by: syzbot+ae97a827ae1c3336bbb4@syzkaller.appspotmail.com
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c      |    3 ++-
+ fs/btrfs/tree-checker.c |   14 ++++++++++++++
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1437,7 +1437,8 @@ static int btrfs_init_fs_root(struct btr
+ 		goto fail;
+ 
+ 	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
+-	    !btrfs_is_data_reloc_root(root)) {
++	    !btrfs_is_data_reloc_root(root) &&
++	    is_fstree(root->root_key.objectid)) {
+ 		set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
+ 		btrfs_check_and_init_root_item(&root->root_item);
+ 	}
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -442,6 +442,20 @@ static int check_root_key(struct extent_
+ 	btrfs_item_key_to_cpu(leaf, &item_key, slot);
+ 	is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
+ 
++	/*
++	 * Bad rootid for reloc trees.
++	 *
++	 * Reloc trees are only for subvolume trees, other trees only need
++	 * to be COWed to be relocated.
++	 */
++	if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
++		     !is_fstree(key->offset))) {
++		generic_err(leaf, slot,
++		"invalid reloc tree for root %lld, root id is not a subvolume tree",
++			    key->offset);
++		return -EUCLEAN;
++	}
++
+ 	/* No such tree id */
+ 	if (unlikely(key->objectid == 0)) {
+ 		if (is_root_item)
diff --git a/queue-5.15/btrfs-set-cache_block_group_error-if-we-find-an-error.patch b/queue-5.15/btrfs-set-cache_block_group_error-if-we-find-an-error.patch
new file mode 100644
index 00000000000..2458e45ead2
--- /dev/null
+++ b/queue-5.15/btrfs-set-cache_block_group_error-if-we-find-an-error.patch
@@ -0,0 +1,47 @@
+From 92fb94b69c6accf1e49fff699640fa0ce03dc910 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 2 Aug 2023 09:20:24 -0400
+Subject: btrfs: set cache_block_group_error if we find an error
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 92fb94b69c6accf1e49fff699640fa0ce03dc910 upstream.
+
+We set cache_block_group_error if btrfs_cache_block_group() returns an
+error, this is because we could end up not finding space to allocate and
+mistakenly return -ENOSPC, and which could then abort the transaction
+with the incorrect errno, and in the case of ENOSPC result in a
+WARN_ON() that will trip up tests like generic/475.
+
+However there's the case where multiple threads can be racing, one
+thread gets the proper error, and the other thread doesn't actually call
+btrfs_cache_block_group(), it instead sees ->cached ==
+BTRFS_CACHE_ERROR.  Again the result is the same, we fail to allocate
+our space and return -ENOSPC.  Instead we need to set
+cache_block_group_error to -EIO in this case to make sure that if we do
+not make our allocation we get the appropriate error returned back to
+the caller.
+
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4334,8 +4334,11 @@ have_block_group:
+ 			ret = 0;
+ 		}
+ 
+-		if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
++		if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
++			if (!cache_block_group_error)
++				cache_block_group_error = -EIO;
+ 			goto loop;
++		}
+ 
+ 		bg_ret = NULL;
+ 		ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
diff --git a/queue-5.15/series b/queue-5.15/series
index d8ca87dafc9..8901771be32 100644
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -68,3 +68,8 @@ net-mlx5-skip-clock-update-work-when-device-is-in-error-state.patch
 ibmvnic-enforce-stronger-sanity-checks-on-login-response.patch
 ibmvnic-unmap-dma-login-rsp-buffer-on-send-login-fail.patch
 ibmvnic-handle-dma-unmapping-of-login-buffs-in-release-functions.patch
+btrfs-don-t-stop-integrity-writeback-too-early.patch
+btrfs-don-t-wait-for-writeback-on-clean-pages-in-extent_write_cache_pages.patch
+btrfs-exit-gracefully-if-reloc-roots-don-t-match.patch
+btrfs-reject-invalid-reloc-tree-root-keys-with-stack-dump.patch
+btrfs-set-cache_block_group_error-if-we-find-an-error.patch