--- /dev/null
+From effa24f689ce0948f68c754991a445a8d697d3a8 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 24 Jul 2023 06:26:53 -0700
+Subject: btrfs: don't stop integrity writeback too early
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit effa24f689ce0948f68c754991a445a8d697d3a8 upstream.
+
+extent_write_cache_pages stops writing pages as soon as nr_to_write hits
+zero. That is the right thing for opportunistic writeback, but incorrect
+for data integrity writeback, which needs to ensure that no dirty pages
+are left in the range. Thus only stop the writeback for WB_SYNC_NONE
+if nr_to_write hits 0.
+
+This is a port of write_cache_pages changes in commit 05fe478dd04e
+("mm: write_cache_pages integrity fix").
+
+Note that I've only trigger the problem with other changes to the btrfs
+writeback code, but this condition seems worthwhile fixing anyway.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ updated comment ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2181,11 +2181,12 @@ retry:
+ }
+
+ /*
+- * the filesystem may choose to bump up nr_to_write.
++ * The filesystem may choose to bump up nr_to_write.
+ * We have to make sure to honor the new nr_to_write
+- * at any time
++ * at any time.
+ */
+- nr_to_write_done = wbc->nr_to_write <= 0;
++ nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
++ wbc->nr_to_write <= 0);
+ }
+ folio_batch_release(&fbatch);
+ cond_resched();
--- /dev/null
+From 5c25699871112853f231e52d51c576d5c759a020 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 24 Jul 2023 06:26:54 -0700
+Subject: btrfs: don't wait for writeback on clean pages in extent_write_cache_pages
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 5c25699871112853f231e52d51c576d5c759a020 upstream.
+
+__extent_writepage could have started on more pages than the one it was
+called for. This happens regularly for zoned file systems, and in theory
+could happen for compressed I/O if the worker thread was executed very
+quickly. For such pages extent_write_cache_pages waits for writeback
+to complete before moving on to the next page, which is highly inefficient
+as it blocks the flusher thread.
+
+Port over the PageDirty check that was added to write_cache_pages in
+commit 515f4a037fb ("mm: write_cache_pages optimise page cleaning") to
+fix this.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2345,6 +2345,12 @@ retry:
+ continue;
+ }
+
++ if (!folio_test_dirty(folio)) {
++ /* Someone wrote it for us. */
++ folio_unlock(folio);
++ continue;
++ }
++
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ if (folio_test_writeback(folio))
+ submit_write_bio(bio_ctrl, 0);
--- /dev/null
+From 05d7ce504545f7874529701664c90814ca645c5d Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 3 Aug 2023 17:20:42 +0800
+Subject: btrfs: exit gracefully if reloc roots don't match
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 05d7ce504545f7874529701664c90814ca645c5d upstream.
+
+[BUG]
+Syzbot reported a crash that an ASSERT() got triggered inside
+prepare_to_merge().
+
+[CAUSE]
+The root cause of the triggered ASSERT() is we can have a race between
+quota tree creation and relocation.
+
+This leads us to create a duplicated quota tree in the
+btrfs_read_fs_root() path, and since it's treated as fs tree, it would
+have ROOT_SHAREABLE flag, causing us to create a reloc tree for it.
+
+The bug itself is fixed by a dedicated patch for it, but this already
+taught us the ASSERT() is not something straightforward for
+developers.
+
+[ENHANCEMENT]
+Instead of using an ASSERT(), let's handle it gracefully and output
+extra info about the mismatch reloc roots to help debug.
+
+Also with the above ASSERT() removed, we can trigger ASSERT(0)s inside
+merge_reloc_roots() later.
+Also replace those ASSERT(0)s with WARN_ON()s.
+
+CC: stable@vger.kernel.org # 5.15+
+Reported-by: syzbot+ae97a827ae1c3336bbb4@syzkaller.appspotmail.com
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/relocation.c | 45 +++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 37 insertions(+), 8 deletions(-)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1916,7 +1916,39 @@ again:
+ err = PTR_ERR(root);
+ break;
+ }
+- ASSERT(root->reloc_root == reloc_root);
++
++ if (unlikely(root->reloc_root != reloc_root)) {
++ if (root->reloc_root) {
++ btrfs_err(fs_info,
++"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu",
++ root->root_key.objectid,
++ root->reloc_root->root_key.objectid,
++ root->reloc_root->root_key.type,
++ root->reloc_root->root_key.offset,
++ btrfs_root_generation(
++ &root->reloc_root->root_item),
++ reloc_root->root_key.objectid,
++ reloc_root->root_key.type,
++ reloc_root->root_key.offset,
++ btrfs_root_generation(
++ &reloc_root->root_item));
++ } else {
++ btrfs_err(fs_info,
++"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu",
++ root->root_key.objectid,
++ reloc_root->root_key.objectid,
++ reloc_root->root_key.type,
++ reloc_root->root_key.offset,
++ btrfs_root_generation(
++ &reloc_root->root_item));
++ }
++ list_add(&reloc_root->root_list, &reloc_roots);
++ btrfs_put_root(root);
++ btrfs_abort_transaction(trans, -EUCLEAN);
++ if (!err)
++ err = -EUCLEAN;
++ break;
++ }
+
+ /*
+ * set reference count to 1, so btrfs_recover_relocation
+@@ -1989,7 +2021,7 @@ again:
+ root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
+ false);
+ if (btrfs_root_refs(&reloc_root->root_item) > 0) {
+- if (IS_ERR(root)) {
++ if (WARN_ON(IS_ERR(root))) {
+ /*
+ * For recovery we read the fs roots on mount,
+ * and if we didn't find the root then we marked
+@@ -1998,17 +2030,14 @@ again:
+ * memory. However there's no reason we can't
+ * handle the error properly here just in case.
+ */
+- ASSERT(0);
+ ret = PTR_ERR(root);
+ goto out;
+ }
+- if (root->reloc_root != reloc_root) {
++ if (WARN_ON(root->reloc_root != reloc_root)) {
+ /*
+- * This is actually impossible without something
+- * going really wrong (like weird race condition
+- * or cosmic rays).
++ * This can happen if on-disk metadata has some
++ * corruption, e.g. bad reloc tree key offset.
+ */
+- ASSERT(0);
+ ret = -EINVAL;
+ goto out;
+ }
--- /dev/null
+From 12b2d64e591652a2d97dd3afa2b062ca7a4ba352 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Mon, 24 Jul 2023 06:26:55 -0700
+Subject: btrfs: properly clear end of the unreserved range in cow_file_range
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 12b2d64e591652a2d97dd3afa2b062ca7a4ba352 upstream.
+
+When the call to btrfs_reloc_clone_csums in cow_file_range returns an
+error, we jump to the out_unlock label with the extent_reserved variable
+set to false. The cleanup at the label will then call
+extent_clear_unlock_delalloc on the range from start to end. But we've
+already added cur_alloc_size to start before the jump, so there might no
+range be left from the newly incremented start to end. Move the check for
+'start < end' so that it is reached by also for the !extent_reserved case.
+
+CC: stable@vger.kernel.org # 6.1+
+Fixes: a315e68f6e8b ("Btrfs: fix invalid attempt to free reserved space on failure to cow range")
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1453,8 +1453,6 @@ out_unlock:
+ clear_bits,
+ page_ops);
+ start += cur_alloc_size;
+- if (start >= end)
+- return ret;
+ }
+
+ /*
+@@ -1463,9 +1461,11 @@ out_unlock:
+ * space_info's bytes_may_use counter, reserved in
+ * btrfs_check_data_free_space().
+ */
+- extent_clear_unlock_delalloc(inode, start, end, locked_page,
+- clear_bits | EXTENT_CLEAR_DATA_RESV,
+- page_ops);
++ if (start < end) {
++ clear_bits |= EXTENT_CLEAR_DATA_RESV;
++ extent_clear_unlock_delalloc(inode, start, end, locked_page,
++ clear_bits, page_ops);
++ }
+ return ret;
+ }
+
--- /dev/null
+From 6ebcd021c92b8e4b904552e4d87283032100796d Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Thu, 3 Aug 2023 17:20:43 +0800
+Subject: btrfs: reject invalid reloc tree root keys with stack dump
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 6ebcd021c92b8e4b904552e4d87283032100796d upstream.
+
+[BUG]
+Syzbot reported a crash that an ASSERT() got triggered inside
+prepare_to_merge().
+
+That ASSERT() makes sure the reloc tree is properly pointed back by its
+subvolume tree.
+
+[CAUSE]
+After more debugging output, it turns out we had an invalid reloc tree:
+
+ BTRFS error (device loop1): reloc tree mismatch, root 8 has no reloc root, expect reloc root key (-8, 132, 8) gen 17
+
+Note the above root key is (TREE_RELOC_OBJECTID, ROOT_ITEM,
+QUOTA_TREE_OBJECTID), meaning it's a reloc tree for quota tree.
+
+But reloc trees can only exist for subvolumes, as for non-subvolume
+trees, we just COW the involved tree block, no need to create a reloc
+tree since those tree blocks won't be shared with other trees.
+
+Only subvolumes tree can share tree blocks with other trees (thus they
+have BTRFS_ROOT_SHAREABLE flag).
+
+Thus this new debug output proves my previous assumption that corrupted
+on-disk data can trigger that ASSERT().
+
+[FIX]
+Besides the dedicated fix and the graceful exit, also let tree-checker to
+check such root keys, to make sure reloc trees can only exist for subvolumes.
+
+CC: stable@vger.kernel.org # 5.15+
+Reported-by: syzbot+ae97a827ae1c3336bbb4@syzkaller.appspotmail.com
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/disk-io.c | 3 ++-
+ fs/btrfs/tree-checker.c | 14 ++++++++++++++
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -1351,7 +1351,8 @@ static int btrfs_init_fs_root(struct btr
+ btrfs_drew_lock_init(&root->snapshot_lock);
+
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
+- !btrfs_is_data_reloc_root(root)) {
++ !btrfs_is_data_reloc_root(root) &&
++ is_fstree(root->root_key.objectid)) {
+ set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
+ btrfs_check_and_init_root_item(&root->root_item);
+ }
+--- a/fs/btrfs/tree-checker.c
++++ b/fs/btrfs/tree-checker.c
+@@ -446,6 +446,20 @@ static int check_root_key(struct extent_
+ btrfs_item_key_to_cpu(leaf, &item_key, slot);
+ is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
+
++ /*
++ * Bad rootid for reloc trees.
++ *
++ * Reloc trees are only for subvolume trees, other trees only need
++ * to be COWed to be relocated.
++ */
++ if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
++ !is_fstree(key->offset))) {
++ generic_err(leaf, slot,
++ "invalid reloc tree for root %lld, root id is not a subvolume tree",
++ key->offset);
++ return -EUCLEAN;
++ }
++
+ /* No such tree id */
+ if (unlikely(key->objectid == 0)) {
+ if (is_root_item)
--- /dev/null
+From 92fb94b69c6accf1e49fff699640fa0ce03dc910 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 2 Aug 2023 09:20:24 -0400
+Subject: btrfs: set cache_block_group_error if we find an error
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 92fb94b69c6accf1e49fff699640fa0ce03dc910 upstream.
+
+We set cache_block_group_error if btrfs_cache_block_group() returns an
+error, this is because we could end up not finding space to allocate and
+mistakenly return -ENOSPC, and which could then abort the transaction
+with the incorrect errno, and in the case of ENOSPC result in a
+WARN_ON() that will trip up tests like generic/475.
+
+However there's the case where multiple threads can be racing, one
+thread gets the proper error, and the other thread doesn't actually call
+btrfs_cache_block_group(), it instead sees ->cached ==
+BTRFS_CACHE_ERROR. Again the result is the same, we fail to allocate
+our space and return -ENOSPC. Instead we need to set
+cache_block_group_error to -EIO in this case to make sure that if we do
+not make our allocation we get the appropriate error returned back to
+the caller.
+
+CC: stable@vger.kernel.org # 4.14+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4318,8 +4318,11 @@ have_block_group:
+ ret = 0;
+ }
+
+- if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
++ if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
++ if (!cache_block_group_error)
++ cache_block_group_error = -EIO;
+ goto loop;
++ }
+
+ if (!find_free_extent_check_size_class(ffe_ctl, block_group))
+ goto loop;
--- /dev/null
+From fc1f91b9231a28fba333f931a031bf776bc6ef0e Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Fri, 21 Jul 2023 16:09:43 -0400
+Subject: btrfs: wait for actual caching progress during allocation
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit fc1f91b9231a28fba333f931a031bf776bc6ef0e upstream.
+
+Recently we've been having mysterious hangs while running generic/475 on
+the CI system. This turned out to be something like this:
+
+ Task 1
+ dmsetup suspend --nolockfs
+ -> __dm_suspend
+ -> dm_wait_for_completion
+ -> dm_wait_for_bios_completion
+ -> Unable to complete because of IO's on a plug in Task 2
+
+ Task 2
+ wb_workfn
+ -> wb_writeback
+ -> blk_start_plug
+ -> writeback_sb_inodes
+ -> Infinite loop unable to make an allocation
+
+ Task 3
+ cache_block_group
+ ->read_extent_buffer_pages
+ ->Waiting for IO to complete that can't be submitted because Task 1
+ suspended the DM device
+
+The problem here is that we need Task 2 to be scheduled completely for
+the blk plug to flush. Normally this would happen, we normally wait for
+the block group caching to finish (Task 3), and this schedule would
+result in the block plug flushing.
+
+However if there's enough free space available from the current caching
+to satisfy the allocation we won't actually wait for the caching to
+complete. This check however just checks that we have enough space, not
+that we can make the allocation. In this particular case we were trying
+to allocate 9MiB, and we had 10MiB of free space, but we didn't have
+9MiB of contiguous space to allocate, and thus the allocation failed and
+we looped.
+
+We specifically don't cycle through the FFE loop until we stop finding
+cached block groups because we don't want to allocate new block groups
+just because we're caching, so we short circuit the normal loop once we
+hit LOOP_CACHING_WAIT and we found a caching block group.
+
+This is normally fine, except in this particular case where the caching
+thread can't make progress because the DM device has been suspended.
+
+Fix this by not only waiting for free space to >= the amount of space we
+want to allocate, but also that we make some progress in caching from
+the time we start waiting. This will keep us from busy looping when the
+caching is taking a while but still theoretically has enough space for
+us to allocate from, and fixes this particular case by forcing us to
+actually sleep and wait for forward progress, which will flush the plug.
+
+With this fix we're no longer hanging with generic/475.
+
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/block-group.c | 17 +++++++++++++++--
+ fs/btrfs/block-group.h | 2 ++
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progre
+ u64 num_bytes)
+ {
+ struct btrfs_caching_control *caching_ctl;
++ int progress;
+
+ caching_ctl = btrfs_get_caching_control(cache);
+ if (!caching_ctl)
+ return;
+
++ /*
++ * We've already failed to allocate from this block group, so even if
++ * there's enough space in the block group it isn't contiguous enough to
++ * allow for an allocation, so wait for at least the next wakeup tick,
++ * or for the thing to be done.
++ */
++ progress = atomic_read(&caching_ctl->progress);
++
+ wait_event(caching_ctl->wait, btrfs_block_group_done(cache) ||
+- (cache->free_space_ctl->free_space >= num_bytes));
++ (progress != atomic_read(&caching_ctl->progress) &&
++ (cache->free_space_ctl->free_space >= num_bytes)));
+
+ btrfs_put_caching_control(caching_ctl);
+ }
+@@ -802,8 +812,10 @@ next:
+
+ if (total_found > CACHING_CTL_WAKE_UP) {
+ total_found = 0;
+- if (wakeup)
++ if (wakeup) {
++ atomic_inc(&caching_ctl->progress);
+ wake_up(&caching_ctl->wait);
++ }
+ }
+ }
+ path->slots[0]++;
+@@ -910,6 +922,7 @@ int btrfs_cache_block_group(struct btrfs
+ init_waitqueue_head(&caching_ctl->wait);
+ caching_ctl->block_group = cache;
+ refcount_set(&caching_ctl->count, 2);
++ atomic_set(&caching_ctl->progress, 0);
+ btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
+
+ spin_lock(&cache->lock);
+--- a/fs/btrfs/block-group.h
++++ b/fs/btrfs/block-group.h
+@@ -85,6 +85,8 @@ struct btrfs_caching_control {
+ wait_queue_head_t wait;
+ struct btrfs_work work;
+ struct btrfs_block_group *block_group;
++ /* Track progress of caching during allocation. */
++ atomic_t progress;
+ refcount_t count;
+ };
+
ibmvnic-ensure-login-failure-recovery-is-safe-from-other-resets.patch
gpio-ws16c48-fix-off-by-one-error-in-ws16c48-resource-region-extent.patch
gpio-sim-mark-the-gpio-chip-as-a-one-that-can-sleep.patch
+btrfs-wait-for-actual-caching-progress-during-allocation.patch
+btrfs-don-t-stop-integrity-writeback-too-early.patch
+btrfs-don-t-wait-for-writeback-on-clean-pages-in-extent_write_cache_pages.patch
+btrfs-properly-clear-end-of-the-unreserved-range-in-cow_file_range.patch
+btrfs-exit-gracefully-if-reloc-roots-don-t-match.patch
+btrfs-reject-invalid-reloc-tree-root-keys-with-stack-dump.patch
+btrfs-set-cache_block_group_error-if-we-find-an-error.patch