From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 23 Jan 2022 17:18:27 +0000 (+0100)
Subject: 5.16-stable patches
X-Git-Tag: v4.4.300~107
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c92e3b62e9d4f80fb4790ebde27822fa6365d392;p=thirdparty%2Fkernel%2Fstable-queue.git

5.16-stable patches

added patches:
	btrfs-add-extent-allocator-hook-to-decide-to-allocate-chunk-or-not.patch
	btrfs-check-the-root-node-for-uptodate-before-returning-it.patch
	btrfs-fix-deadlock-between-quota-enable-and-other-quota-operations.patch
	btrfs-respect-the-max-size-in-the-header-when-activating-swap-file.patch
	btrfs-zoned-cache-reported-zone-during-mount.patch
	btrfs-zoned-fix-chunk-allocation-condition-for-zoned-allocator.patch
	btrfs-zoned-unset-dedicated-block-group-on-allocation-failure.patch
	ext4-fix-a-possible-abba-deadlock-due-to-busy-pa.patch
	ext4-fix-fast-commit-may-miss-tracking-range-for-falloc_fl_zero_range.patch
	ext4-initialize-err_blk-before-calling-__ext4_get_inode_loc.patch
	ext4-make-sure-quota-gets-properly-shutdown-on-error.patch
	ext4-make-sure-to-reset-inode-lockdep-class-when-quota-enabling-fails.patch
---

diff --git a/queue-5.16/btrfs-add-extent-allocator-hook-to-decide-to-allocate-chunk-or-not.patch b/queue-5.16/btrfs-add-extent-allocator-hook-to-decide-to-allocate-chunk-or-not.patch
new file mode 100644
index 00000000000..1d245070255
--- /dev/null
+++ b/queue-5.16/btrfs-add-extent-allocator-hook-to-decide-to-allocate-chunk-or-not.patch
@@ -0,0 +1,59 @@
+From 50475cd57706359d6cc652be88369dace7a4c2eb Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Wed, 8 Dec 2021 00:35:48 +0900
+Subject: btrfs: add extent allocator hook to decide to allocate chunk or not
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 50475cd57706359d6cc652be88369dace7a4c2eb upstream.
+
+Introduce a new hook for an extent allocator policy. With the new
+hook, a policy can decide to allocate a new block group or not. If
+not, it will return -ENOSPC, so btrfs_reserve_extent() will cut the
+allocation size in half and retry the allocation if min_alloc_size is
+large enough.
+
+The hook has a place holder and will be replaced with the real
+implementation in the next patch.
+
+CC: stable@vger.kernel.org # 5.16
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |   17 +++++++++++++++++
+ 1 file changed, 17 insertions(+)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3947,6 +3947,19 @@ static void found_extent(struct find_fre
+ 	}
+ }
+ 
++static bool can_allocate_chunk(struct btrfs_fs_info *fs_info,
++			       struct find_free_extent_ctl *ffe_ctl)
++{
++	switch (ffe_ctl->policy) {
++	case BTRFS_EXTENT_ALLOC_CLUSTERED:
++		return true;
++	case BTRFS_EXTENT_ALLOC_ZONED:
++		return true;
++	default:
++		BUG();
++	}
++}
++
+ static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
+ {
+ 	switch (ffe_ctl->policy) {
+@@ -4034,6 +4047,10 @@ static int find_free_extent_update_loop(
+ 			struct btrfs_trans_handle *trans;
+ 			int exist = 0;
+ 
++			/*Check if allocation policy allows to create a new chunk */
++			if (!can_allocate_chunk(fs_info, ffe_ctl))
++				return -ENOSPC;
++
+ 			trans = current->journal_info;
+ 			if (trans)
+ 				exist = 1;
diff --git a/queue-5.16/btrfs-check-the-root-node-for-uptodate-before-returning-it.patch b/queue-5.16/btrfs-check-the-root-node-for-uptodate-before-returning-it.patch
new file mode 100644
index 00000000000..cb978f9bc6c
--- /dev/null
+++ b/queue-5.16/btrfs-check-the-root-node-for-uptodate-before-returning-it.patch
@@ -0,0 +1,68 @@
+From 120de408e4b97504a2d9b5ca534b383de2c73d49 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Wed, 24 Nov 2021 14:14:24 -0500
+Subject: btrfs: check the root node for uptodate before returning it
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 120de408e4b97504a2d9b5ca534b383de2c73d49 upstream.
+
+Now that we clear the extent buffer uptodate if we fail to write it out
+we need to check to see if our root node is uptodate before we search
+down it.  Otherwise we could return stale data (or potentially corrupt
+data that was caught by the write verification step) and think that the
+path is OK to search down.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Nikolay Borisov <nborisov@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.c |   19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -1570,12 +1570,9 @@ static struct extent_buffer *btrfs_searc
+ {
+ 	struct btrfs_fs_info *fs_info = root->fs_info;
+ 	struct extent_buffer *b;
+-	int root_lock;
++	int root_lock = 0;
+ 	int level = 0;
+ 
+-	/* We try very hard to do read locks on the root */
+-	root_lock = BTRFS_READ_LOCK;
+-
+ 	if (p->search_commit_root) {
+ 		/*
+ 		 * The commit roots are read only so we always do read locks,
+@@ -1613,6 +1610,9 @@ static struct extent_buffer *btrfs_searc
+ 		goto out;
+ 	}
+ 
++	/* We try very hard to do read locks on the root */
++	root_lock = BTRFS_READ_LOCK;
++
+ 	/*
+ 	 * If the level is set to maximum, we can skip trying to get the read
+ 	 * lock.
+@@ -1639,6 +1639,17 @@ static struct extent_buffer *btrfs_searc
+ 	level = btrfs_header_level(b);
+ 
+ out:
++	/*
++	 * The root may have failed to write out at some point, and thus is no
++	 * longer valid, return an error in this case.
++	 */
++	if (!extent_buffer_uptodate(b)) {
++		if (root_lock)
++			btrfs_tree_unlock_rw(b, root_lock);
++		free_extent_buffer(b);
++		return ERR_PTR(-EIO);
++	}
++
+ 	p->nodes[level] = b;
+ 	if (!p->skip_locking)
+ 		p->locks[level] = root_lock;
diff --git a/queue-5.16/btrfs-fix-deadlock-between-quota-enable-and-other-quota-operations.patch b/queue-5.16/btrfs-fix-deadlock-between-quota-enable-and-other-quota-operations.patch
new file mode 100644
index 00000000000..ac9b5cfff84
--- /dev/null
+++ b/queue-5.16/btrfs-fix-deadlock-between-quota-enable-and-other-quota-operations.patch
@@ -0,0 +1,128 @@
+From 232796df8c1437c41d308d161007f0715bac0a54 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 27 Oct 2021 18:30:25 +0100
+Subject: btrfs: fix deadlock between quota enable and other quota operations
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 232796df8c1437c41d308d161007f0715bac0a54 upstream.
+
+When enabling quotas, we attempt to commit a transaction while holding the
+mutex fs_info->qgroup_ioctl_lock. This can result on a deadlock with other
+quota operations such as:
+
+- qgroup creation and deletion, ioctl BTRFS_IOC_QGROUP_CREATE;
+
+- adding and removing qgroup relations, ioctl BTRFS_IOC_QGROUP_ASSIGN.
+
+This is because these operations join a transaction and after that they
+attempt to lock the mutex fs_info->qgroup_ioctl_lock. Acquiring that mutex
+after joining or starting a transaction is a pattern followed everywhere
+in qgroups, so the quota enablement operation is the one at fault here,
+and should not commit a transaction while holding that mutex.
+
+Fix this by making the transaction commit while not holding the mutex.
+We are safe from two concurrent tasks trying to enable quotas because
+we are serialized by the rw semaphore fs_info->subvol_sem at
+btrfs_ioctl_quota_ctl(), which is the only call site for enabling
+quotas.
+
+When this deadlock happens, it produces a trace like the following:
+
+  INFO: task syz-executor:25604 blocked for more than 143 seconds.
+  Not tainted 5.15.0-rc6 #4
+  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  task:syz-executor state:D stack:24800 pid:25604 ppid: 24873 flags:0x00004004
+  Call Trace:
+  context_switch kernel/sched/core.c:4940 [inline]
+  __schedule+0xcd9/0x2530 kernel/sched/core.c:6287
+  schedule+0xd3/0x270 kernel/sched/core.c:6366
+  btrfs_commit_transaction+0x994/0x2e90 fs/btrfs/transaction.c:2201
+  btrfs_quota_enable+0x95c/0x1790 fs/btrfs/qgroup.c:1120
+  btrfs_ioctl_quota_ctl fs/btrfs/ioctl.c:4229 [inline]
+  btrfs_ioctl+0x637e/0x7b70 fs/btrfs/ioctl.c:5010
+  vfs_ioctl fs/ioctl.c:51 [inline]
+  __do_sys_ioctl fs/ioctl.c:874 [inline]
+  __se_sys_ioctl fs/ioctl.c:860 [inline]
+  __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
+  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+  do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+  entry_SYSCALL_64_after_hwframe+0x44/0xae
+  RIP: 0033:0x7f86920b2c4d
+  RSP: 002b:00007f868f61ac58 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+  RAX: ffffffffffffffda RBX: 00007f86921d90a0 RCX: 00007f86920b2c4d
+  RDX: 0000000020005e40 RSI: 00000000c0109428 RDI: 0000000000000008
+  RBP: 00007f869212bd80 R08: 0000000000000000 R09: 0000000000000000
+  R10: 0000000000000000 R11: 0000000000000246 R12: 00007f86921d90a0
+  R13: 00007fff6d233e4f R14: 00007fff6d233ff0 R15: 00007f868f61adc0
+  INFO: task syz-executor:25628 blocked for more than 143 seconds.
+  Not tainted 5.15.0-rc6 #4
+  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  task:syz-executor state:D stack:29080 pid:25628 ppid: 24873 flags:0x00004004
+  Call Trace:
+  context_switch kernel/sched/core.c:4940 [inline]
+  __schedule+0xcd9/0x2530 kernel/sched/core.c:6287
+  schedule+0xd3/0x270 kernel/sched/core.c:6366
+  schedule_preempt_disabled+0xf/0x20 kernel/sched/core.c:6425
+  __mutex_lock_common kernel/locking/mutex.c:669 [inline]
+  __mutex_lock+0xc96/0x1680 kernel/locking/mutex.c:729
+  btrfs_remove_qgroup+0xb7/0x7d0 fs/btrfs/qgroup.c:1548
+  btrfs_ioctl_qgroup_create fs/btrfs/ioctl.c:4333 [inline]
+  btrfs_ioctl+0x683c/0x7b70 fs/btrfs/ioctl.c:5014
+  vfs_ioctl fs/ioctl.c:51 [inline]
+  __do_sys_ioctl fs/ioctl.c:874 [inline]
+  __se_sys_ioctl fs/ioctl.c:860 [inline]
+  __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
+  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+  do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+  entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Reported-by: Hao Sun <sunhao.th@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CACkBjsZQF19bQ1C6=yetF3BvL10OSORpFUcWXTP6HErshDB4dQ@mail.gmail.com/
+Fixes: 340f1aa27f36 ("btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable")
+CC: stable@vger.kernel.org # 4.19
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/qgroup.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -940,6 +940,14 @@ int btrfs_quota_enable(struct btrfs_fs_i
+ 	int ret = 0;
+ 	int slot;
+ 
++	/*
++	 * We need to have subvol_sem write locked, to prevent races between
++	 * concurrent tasks trying to enable quotas, because we will unlock
++	 * and relock qgroup_ioctl_lock before setting fs_info->quota_root
++	 * and before setting BTRFS_FS_QUOTA_ENABLED.
++	 */
++	lockdep_assert_held_write(&fs_info->subvol_sem);
++
+ 	mutex_lock(&fs_info->qgroup_ioctl_lock);
+ 	if (fs_info->quota_root)
+ 		goto out;
+@@ -1117,8 +1125,19 @@ out_add_root:
+ 		goto out_free_path;
+ 	}
+ 
++	mutex_unlock(&fs_info->qgroup_ioctl_lock);
++	/*
++	 * Commit the transaction while not holding qgroup_ioctl_lock, to avoid
++	 * a deadlock with tasks concurrently doing other qgroup operations, such
++	 * adding/removing qgroups or adding/deleting qgroup relations for example,
++	 * because all qgroup operations first start or join a transaction and then
++	 * lock the qgroup_ioctl_lock mutex.
++	 * We are safe from a concurrent task trying to enable quotas, by calling
++	 * this function, since we are serialized by fs_info->subvol_sem.
++	 */
+ 	ret = btrfs_commit_transaction(trans);
+ 	trans = NULL;
++	mutex_lock(&fs_info->qgroup_ioctl_lock);
+ 	if (ret)
+ 		goto out_free_path;
+ 
diff --git a/queue-5.16/btrfs-respect-the-max-size-in-the-header-when-activating-swap-file.patch b/queue-5.16/btrfs-respect-the-max-size-in-the-header-when-activating-swap-file.patch
new file mode 100644
index 00000000000..8d468e83810
--- /dev/null
+++ b/queue-5.16/btrfs-respect-the-max-size-in-the-header-when-activating-swap-file.patch
@@ -0,0 +1,65 @@
+From c2f822635df873c510bda6fb7fd1b10b7c31be2d Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 16 Dec 2021 15:00:32 +0000
+Subject: btrfs: respect the max size in the header when activating swap file
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit c2f822635df873c510bda6fb7fd1b10b7c31be2d upstream.
+
+If we extended the size of a swapfile after its header was created (by the
+mkswap utility) and then try to activate it, we will map the entire file
+when activating the swap file, instead of limiting to the max size defined
+in the swap file's header.
+
+Currently test case generic/643 from fstests fails because we do not
+respect that size limit defined in the swap file's header.
+
+So fix this by not mapping file ranges beyond the max size defined in the
+swap header.
+
+This is the same type of bug that iomap used to have, and was fixed in
+commit 36ca7943ac18ae ("mm/swap: consider max pages in
+iomap_swapfile_add_extent").
+
+Fixes: ed46ff3d423780 ("Btrfs: support swap files")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-and-tested-by: Josef Bacik <josef@toxicpanda.com
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |   11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -10595,9 +10595,19 @@ static int btrfs_add_swap_extent(struct
+ 				 struct btrfs_swap_info *bsi)
+ {
+ 	unsigned long nr_pages;
++	unsigned long max_pages;
+ 	u64 first_ppage, first_ppage_reported, next_ppage;
+ 	int ret;
+ 
++	/*
++	 * Our swapfile may have had its size extended after the swap header was
++	 * written. In that case activating the swapfile should not go beyond
++	 * the max size set in the swap header.
++	 */
++	if (bsi->nr_pages >= sis->max)
++		return 0;
++
++	max_pages = sis->max - bsi->nr_pages;
+ 	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
+ 	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
+ 				PAGE_SIZE) >> PAGE_SHIFT;
+@@ -10605,6 +10615,7 @@ static int btrfs_add_swap_extent(struct
+ 	if (first_ppage >= next_ppage)
+ 		return 0;
+ 	nr_pages = next_ppage - first_ppage;
++	nr_pages = min(nr_pages, max_pages);
+ 
+ 	first_ppage_reported = first_ppage;
+ 	if (bsi->start == 0)
diff --git a/queue-5.16/btrfs-zoned-cache-reported-zone-during-mount.patch b/queue-5.16/btrfs-zoned-cache-reported-zone-during-mount.patch
new file mode 100644
index 00000000000..c3fdb57ff85
--- /dev/null
+++ b/queue-5.16/btrfs-zoned-cache-reported-zone-during-mount.patch
@@ -0,0 +1,291 @@
+From 16beac87e95e2fb278b552397c8260637f8a63f7 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Thu, 11 Nov 2021 14:14:38 +0900
+Subject: btrfs: zoned: cache reported zone during mount
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 16beac87e95e2fb278b552397c8260637f8a63f7 upstream.
+
+When mounting a device, we are reporting the zones twice: once for
+checking the zone attributes in btrfs_get_dev_zone_info and once for
+loading block groups' zone info in
+btrfs_load_block_group_zone_info(). With a lot of block groups, that
+leads to a lot of REPORT ZONE commands and slows down the mount
+process.
+
+This patch introduces a zone info cache in struct
+btrfs_zoned_device_info. The cache is populated while in
+btrfs_get_dev_zone_info() and used for
+btrfs_load_block_group_zone_info() to reduce the number of REPORT ZONE
+commands. The zone cache is then released after loading the block
+groups, as it will not be much effective during the run time.
+
+Benchmark: Mount an HDD with 57,007 block groups
+Before patch: 171.368 seconds
+After patch: 64.064 seconds
+
+While it still takes a minute due to the slowness of loading all the
+block groups, the patch reduces the mount time by 1/3.
+
+Link: https://lore.kernel.org/linux-btrfs/CAHQ7scUiLtcTqZOMMY5kbWUBOhGRwKo6J6wYPT5WY+C=cD49nQ@mail.gmail.com/
+Fixes: 5b316468983d ("btrfs: get zone information of zoned block devices")
+CC: stable@vger.kernel.org
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/dev-replace.c |    2 -
+ fs/btrfs/disk-io.c     |    2 +
+ fs/btrfs/volumes.c     |    2 -
+ fs/btrfs/zoned.c       |   86 +++++++++++++++++++++++++++++++++++++++++++------
+ fs/btrfs/zoned.h       |    8 +++-
+ 5 files changed, 87 insertions(+), 13 deletions(-)
+
+--- a/fs/btrfs/dev-replace.c
++++ b/fs/btrfs/dev-replace.c
+@@ -322,7 +322,7 @@ static int btrfs_init_dev_replace_tgtdev
+ 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
+ 	device->fs_devices = fs_info->fs_devices;
+ 
+-	ret = btrfs_get_dev_zone_info(device);
++	ret = btrfs_get_dev_zone_info(device, false);
+ 	if (ret)
+ 		goto error;
+ 
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3571,6 +3571,8 @@ int __cold open_ctree(struct super_block
+ 		goto fail_sysfs;
+ 	}
+ 
++	btrfs_free_zone_cache(fs_info);
++
+ 	if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
+ 	    !btrfs_check_rw_degradable(fs_info, NULL)) {
+ 		btrfs_warn(fs_info,
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -2643,7 +2643,7 @@ int btrfs_init_new_device(struct btrfs_f
+ 	device->fs_info = fs_info;
+ 	device->bdev = bdev;
+ 
+-	ret = btrfs_get_dev_zone_info(device);
++	ret = btrfs_get_dev_zone_info(device, false);
+ 	if (ret)
+ 		goto error_free_device;
+ 
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -5,6 +5,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/sched/mm.h>
+ #include <linux/atomic.h>
++#include <linux/vmalloc.h>
+ #include "ctree.h"
+ #include "volumes.h"
+ #include "zoned.h"
+@@ -213,6 +214,8 @@ static int emulate_report_zones(struct b
+ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
+ 			       struct blk_zone *zones, unsigned int *nr_zones)
+ {
++	struct btrfs_zoned_device_info *zinfo = device->zone_info;
++	u32 zno;
+ 	int ret;
+ 
+ 	if (!*nr_zones)
+@@ -224,6 +227,34 @@ static int btrfs_get_dev_zones(struct bt
+ 		return 0;
+ 	}
+ 
++	/* Check cache */
++	if (zinfo->zone_cache) {
++		unsigned int i;
++
++		ASSERT(IS_ALIGNED(pos, zinfo->zone_size));
++		zno = pos >> zinfo->zone_size_shift;
++		/*
++		 * We cannot report zones beyond the zone end. So, it is OK to
++		 * cap *nr_zones to at the end.
++		 */
++		*nr_zones = min_t(u32, *nr_zones, zinfo->nr_zones - zno);
++
++		for (i = 0; i < *nr_zones; i++) {
++			struct blk_zone *zone_info;
++
++			zone_info = &zinfo->zone_cache[zno + i];
++			if (!zone_info->len)
++				break;
++		}
++
++		if (i == *nr_zones) {
++			/* Cache hit on all the zones */
++			memcpy(zones, zinfo->zone_cache + zno,
++			       sizeof(*zinfo->zone_cache) * *nr_zones);
++			return 0;
++		}
++	}
++
+ 	ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
+ 				  copy_zone_info_cb, zones);
+ 	if (ret < 0) {
+@@ -237,6 +268,11 @@ static int btrfs_get_dev_zones(struct bt
+ 	if (!ret)
+ 		return -EIO;
+ 
++	/* Populate cache */
++	if (zinfo->zone_cache)
++		memcpy(zinfo->zone_cache + zno, zones,
++		       sizeof(*zinfo->zone_cache) * *nr_zones);
++
+ 	return 0;
+ }
+ 
+@@ -300,7 +336,7 @@ int btrfs_get_dev_zone_info_all_devices(
+ 		if (!device->bdev)
+ 			continue;
+ 
+-		ret = btrfs_get_dev_zone_info(device);
++		ret = btrfs_get_dev_zone_info(device, true);
+ 		if (ret)
+ 			break;
+ 	}
+@@ -309,7 +345,7 @@ int btrfs_get_dev_zone_info_all_devices(
+ 	return ret;
+ }
+ 
+-int btrfs_get_dev_zone_info(struct btrfs_device *device)
++int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
+ {
+ 	struct btrfs_fs_info *fs_info = device->fs_info;
+ 	struct btrfs_zoned_device_info *zone_info = NULL;
+@@ -339,6 +375,8 @@ int btrfs_get_dev_zone_info(struct btrfs
+ 	if (!zone_info)
+ 		return -ENOMEM;
+ 
++	device->zone_info = zone_info;
++
+ 	if (!bdev_is_zoned(bdev)) {
+ 		if (!fs_info->zone_size) {
+ 			ret = calculate_emulated_zone_size(fs_info);
+@@ -407,6 +445,23 @@ int btrfs_get_dev_zone_info(struct btrfs
+ 		goto out;
+ 	}
+ 
++	/*
++	 * Enable zone cache only for a zoned device. On a non-zoned device, we
++	 * fill the zone info with emulated CONVENTIONAL zones, so no need to
++	 * use the cache.
++	 */
++	if (populate_cache && bdev_is_zoned(device->bdev)) {
++		zone_info->zone_cache = vzalloc(sizeof(struct blk_zone) *
++						zone_info->nr_zones);
++		if (!zone_info->zone_cache) {
++			btrfs_err_in_rcu(device->fs_info,
++				"zoned: failed to allocate zone cache for %s",
++				rcu_str_deref(device->name));
++			ret = -ENOMEM;
++			goto out;
++		}
++	}
++
+ 	/* Get zones type */
+ 	nactive = 0;
+ 	while (sector < nr_sectors) {
+@@ -505,8 +560,6 @@ int btrfs_get_dev_zone_info(struct btrfs
+ 
+ 	kfree(zones);
+ 
+-	device->zone_info = zone_info;
+-
+ 	switch (bdev_zoned_model(bdev)) {
+ 	case BLK_ZONED_HM:
+ 		model = "host-managed zoned";
+@@ -539,11 +592,7 @@ int btrfs_get_dev_zone_info(struct btrfs
+ out:
+ 	kfree(zones);
+ out_free_zone_info:
+-	bitmap_free(zone_info->active_zones);
+-	bitmap_free(zone_info->empty_zones);
+-	bitmap_free(zone_info->seq_zones);
+-	kfree(zone_info);
+-	device->zone_info = NULL;
++	btrfs_destroy_dev_zone_info(device);
+ 
+ 	return ret;
+ }
+@@ -558,6 +607,7 @@ void btrfs_destroy_dev_zone_info(struct
+ 	bitmap_free(zone_info->active_zones);
+ 	bitmap_free(zone_info->seq_zones);
+ 	bitmap_free(zone_info->empty_zones);
++	vfree(zone_info->zone_cache);
+ 	kfree(zone_info);
+ 	device->zone_info = NULL;
+ }
+@@ -1975,3 +2025,21 @@ void btrfs_clear_data_reloc_bg(struct bt
+ 		fs_info->data_reloc_bg = 0;
+ 	spin_unlock(&fs_info->relocation_bg_lock);
+ }
++
++void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
++{
++	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
++	struct btrfs_device *device;
++
++	if (!btrfs_is_zoned(fs_info))
++		return;
++
++	mutex_lock(&fs_devices->device_list_mutex);
++	list_for_each_entry(device, &fs_devices->devices, dev_list) {
++		if (device->zone_info) {
++			vfree(device->zone_info->zone_cache);
++			device->zone_info->zone_cache = NULL;
++		}
++	}
++	mutex_unlock(&fs_devices->device_list_mutex);
++}
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -28,6 +28,7 @@ struct btrfs_zoned_device_info {
+ 	unsigned long *seq_zones;
+ 	unsigned long *empty_zones;
+ 	unsigned long *active_zones;
++	struct blk_zone *zone_cache;
+ 	struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
+ };
+ 
+@@ -35,7 +36,7 @@ struct btrfs_zoned_device_info {
+ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+ 		       struct blk_zone *zone);
+ int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
+-int btrfs_get_dev_zone_info(struct btrfs_device *device);
++int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
+ void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
+ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
+ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info);
+@@ -76,6 +77,7 @@ bool btrfs_can_activate_zone(struct btrf
+ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
+ 			     u64 length);
+ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
++void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
+ #else /* CONFIG_BLK_DEV_ZONED */
+ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+ 				     struct blk_zone *zone)
+@@ -88,7 +90,8 @@ static inline int btrfs_get_dev_zone_inf
+ 	return 0;
+ }
+ 
+-static inline int btrfs_get_dev_zone_info(struct btrfs_device *device)
++static inline int btrfs_get_dev_zone_info(struct btrfs_device *device,
++					  bool populate_cache)
+ {
+ 	return 0;
+ }
+@@ -232,6 +235,7 @@ static inline void btrfs_zone_finish_end
+ 
+ static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
+ 
++static inline void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info) { }
+ #endif
+ 
+ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
diff --git a/queue-5.16/btrfs-zoned-fix-chunk-allocation-condition-for-zoned-allocator.patch b/queue-5.16/btrfs-zoned-fix-chunk-allocation-condition-for-zoned-allocator.patch
new file mode 100644
index 00000000000..f4353f770e6
--- /dev/null
+++ b/queue-5.16/btrfs-zoned-fix-chunk-allocation-condition-for-zoned-allocator.patch
@@ -0,0 +1,144 @@
+From 82187d2ecdfb22ab7ee05f388402a39236d31428 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Wed, 8 Dec 2021 00:35:49 +0900
+Subject: btrfs: zoned: fix chunk allocation condition for zoned allocator
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 82187d2ecdfb22ab7ee05f388402a39236d31428 upstream.
+
+The ZNS specification defines a limit on the number of "active"
+zones. That limit impose us to limit the number of block groups which
+can be used for an allocation at the same time. Not to exceed the
+limit, we reuse the existing active block groups as much as possible
+when we can't activate any other zones without sacrificing an already
+activated block group in commit a85f05e59bc1 ("btrfs: zoned: avoid
+chunk allocation if active block group has enough space").
+
+However, the check is wrong in two ways. First, it checks the
+condition for every raid index (ffe_ctl->index). Even if it reaches
+the condition and "ffe_ctl->max_extent_size >=
+ffe_ctl->min_alloc_size" is met, there can be other block groups
+having enough space to hold ffe_ctl->num_bytes. (Actually, this won't
+happen in the current zoned code as it only supports SINGLE
+profile. But, it can happen once it enables other RAID types.)
+
+Second, it checks the active zone availability depending on the
+raid index. The raid index is just an index for
+space_info->block_groups, so it has nothing to do with chunk allocation.
+
+These mistakes are causing a faulty allocation in a certain
+situation. Consider we are running zoned btrfs on a device whose
+max_active_zone == 0 (no limit). And, suppose no block group have a
+room to fit ffe_ctl->num_bytes but some room to meet
+ffe_ctl->min_alloc_size (i.e. max_extent_size > num_bytes >=
+min_alloc_size).
+
+In this situation, the following occur:
+
+- With SINGLE raid_index, it reaches the chunk allocation checking
+  code
+- The check returns true because we can activate a new zone (no limit)
+- But, before allocating the chunk, it iterates to the next raid index
+  (RAID5)
+- Since there are no RAID5 block groups on zoned mode, it again
+  reaches the check code
+- The check returns false because of btrfs_can_activate_zone()'s "if
+  (raid_index != BTRFS_RAID_SINGLE)" part
+- That results in returning -ENOSPC without allocating a new chunk
+
+As a result, we end up hitting -ENOSPC too early.
+
+Move the check to the right place in the can_allocate_chunk() hook,
+and do the active zone check depending on the allocation flag, not on
+the raid index.
+
+CC: stable@vger.kernel.org # 5.16
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |   21 +++++++++------------
+ fs/btrfs/zoned.c       |    5 ++---
+ fs/btrfs/zoned.h       |    5 ++---
+ 3 files changed, 13 insertions(+), 18 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3966,6 +3966,15 @@ static bool can_allocate_chunk(struct bt
+ 	case BTRFS_EXTENT_ALLOC_CLUSTERED:
+ 		return true;
+ 	case BTRFS_EXTENT_ALLOC_ZONED:
++		/*
++		 * If we have enough free space left in an already
++		 * active block group and we can't activate any other
++		 * zone now, do not allow allocating a new chunk and
++		 * let find_free_extent() retry with a smaller size.
++		 */
++		if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
++		    !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
++			return false;
+ 		return true;
+ 	default:
+ 		BUG();
+@@ -4012,18 +4021,6 @@ static int find_free_extent_update_loop(
+ 		return 0;
+ 	}
+ 
+-	if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
+-	    !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->index)) {
+-		/*
+-		 * If we have enough free space left in an already active block
+-		 * group and we can't activate any other zone now, retry the
+-		 * active ones with a smaller allocation size.  Returning early
+-		 * from here will tell btrfs_reserve_extent() to haven the
+-		 * size.
+-		 */
+-		return -ENOSPC;
+-	}
+-
+ 	if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
+ 		return 1;
+ 
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1934,7 +1934,7 @@ int btrfs_zone_finish(struct btrfs_block
+ 	return ret;
+ }
+ 
+-bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index)
++bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
+ {
+ 	struct btrfs_device *device;
+ 	bool ret = false;
+@@ -1943,8 +1943,7 @@ bool btrfs_can_activate_zone(struct btrf
+ 		return true;
+ 
+ 	/* Non-single profiles are not supported yet */
+-	if (raid_index != BTRFS_RAID_SINGLE)
+-		return false;
++	ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0);
+ 
+ 	/* Check if there is a device with active zones left */
+ 	mutex_lock(&fs_devices->device_list_mutex);
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -72,8 +72,7 @@ struct btrfs_device *btrfs_zoned_get_dev
+ 					    u64 logical, u64 length);
+ bool btrfs_zone_activate(struct btrfs_block_group *block_group);
+ int btrfs_zone_finish(struct btrfs_block_group *block_group);
+-bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
+-			     int raid_index);
++bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
+ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
+ 			     u64 length);
+ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
+@@ -225,7 +224,7 @@ static inline int btrfs_zone_finish(stru
+ }
+ 
+ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
+-					   int raid_index)
++					   u64 flags)
+ {
+ 	return true;
+ }
diff --git a/queue-5.16/btrfs-zoned-unset-dedicated-block-group-on-allocation-failure.patch b/queue-5.16/btrfs-zoned-unset-dedicated-block-group-on-allocation-failure.patch
new file mode 100644
index 00000000000..ed2dc2d4e77
--- /dev/null
+++ b/queue-5.16/btrfs-zoned-unset-dedicated-block-group-on-allocation-failure.patch
@@ -0,0 +1,72 @@
+From 1ada69f61c88abb75a1038ee457633325658a183 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Wed, 8 Dec 2021 00:35:47 +0900
+Subject: btrfs: zoned: unset dedicated block group on allocation failure
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 1ada69f61c88abb75a1038ee457633325658a183 upstream.
+
+Allocating an extent from a block group can fail for various reasons.
+When an allocation from a dedicated block group (for tree-log or
+relocation data) fails, we need to unregister it as a dedicated one so
+that we can allocate a new block group for the dedicated one.
+
+However, we are returning early when the block group in case it is
+read-only, fully used, or not be able to activate the zone. As a result,
+we keep the non-usable block group as a dedicated one, leading to
+further allocation failure. With many block groups, the allocator will
+iterate hopeless loop to find a free extent, results in a hung task.
+
+Fix the issue by delaying the return and doing the proper cleanups.
+
+CC: stable@vger.kernel.org # 5.16
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |   20 ++++++++++++++++----
+ 1 file changed, 16 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3790,23 +3790,35 @@ static int do_allocation_zoned(struct bt
+ 	spin_unlock(&fs_info->relocation_bg_lock);
+ 	if (skip)
+ 		return 1;
++
+ 	/* Check RO and no space case before trying to activate it */
+ 	spin_lock(&block_group->lock);
+ 	if (block_group->ro ||
+ 	    block_group->alloc_offset == block_group->zone_capacity) {
+-		spin_unlock(&block_group->lock);
+-		return 1;
++		ret = 1;
++		/*
++		 * May need to clear fs_info->{treelog,data_reloc}_bg.
++		 * Return the error after taking the locks.
++		 */
+ 	}
+ 	spin_unlock(&block_group->lock);
+ 
+-	if (!btrfs_zone_activate(block_group))
+-		return 1;
++	if (!ret && !btrfs_zone_activate(block_group)) {
++		ret = 1;
++		/*
++		 * May need to clear fs_info->{treelog,data_reloc}_bg.
++		 * Return the error after taking the locks.
++		 */
++	}
+ 
+ 	spin_lock(&space_info->lock);
+ 	spin_lock(&block_group->lock);
+ 	spin_lock(&fs_info->treelog_bg_lock);
+ 	spin_lock(&fs_info->relocation_bg_lock);
+ 
++	if (ret)
++		goto out;
++
+ 	ASSERT(!ffe_ctl->for_treelog ||
+ 	       block_group->start == fs_info->treelog_bg ||
+ 	       fs_info->treelog_bg == 0);
diff --git a/queue-5.16/ext4-fix-a-possible-abba-deadlock-due-to-busy-pa.patch b/queue-5.16/ext4-fix-a-possible-abba-deadlock-due-to-busy-pa.patch
new file mode 100644
index 00000000000..f347f0d37dd
--- /dev/null
+++ b/queue-5.16/ext4-fix-a-possible-abba-deadlock-due-to-busy-pa.patch
@@ -0,0 +1,154 @@
+From 8c80fb312d7abf8bcd66cca1d843a80318a2c522 Mon Sep 17 00:00:00 2001
+From: Chunguang Xu <brookxu@tencent.com>
+Date: Tue, 23 Nov 2021 09:17:57 +0800
+Subject: ext4: fix a possible ABBA deadlock due to busy PA
+
+From: Chunguang Xu <brookxu@tencent.com>
+
+commit 8c80fb312d7abf8bcd66cca1d843a80318a2c522 upstream.
+
+We found on older kernel (3.10) that in the scenario of insufficient
+disk space, system may trigger an ABBA deadlock problem, it seems that
+this problem still exists in latest kernel, try to fix it here. The
+main process triggered by this problem is that task A occupies the PA
+and waits for the jbd2 transaction finish, the jbd2 transaction waits
+for the completion of task B's IO (plug_list), but task B waits for
+the release of PA by task A to finish discard, which indirectly forms
+an ABBA deadlock. The related calltrace is as follows:
+
+    Task A
+    vfs_write
+    ext4_mb_new_blocks()
+    ext4_mb_mark_diskspace_used()       JBD2
+    jbd2_journal_get_write_access()  -> jbd2_journal_commit_transaction()
+  ->schedule()                          filemap_fdatawait()
+ |                                              |
+ | Task B                                       |
+ | do_unlinkat()                                |
+ | ext4_evict_inode()                           |
+ | jbd2_journal_begin_ordered_truncate()        |
+ | filemap_fdatawrite_range()                   |
+ | ext4_mb_new_blocks()                         |
+  -ext4_mb_discard_group_preallocations() <-----
+
+Here, try to cancel ext4_mb_discard_group_preallocations() internal
+retry due to PA busy, and do a limited number of retries inside
+ext4_mb_discard_preallocations(), which can circumvent the above
+problems, but also has some advantages:
+
+1. Since the PA is in a busy state, if other groups have free PAs,
+   keeping the current PA may help to reduce fragmentation.
+2. Continue to traverse forward instead of waiting for the current
+   group PA to be released. In most scenarios, the PA discard time
+   can be reduced.
+
+However, in the case of smaller free space, if only a few groups have
+space, then due to multiple traversals of the group, it may increase
+CPU overhead. But in contrast, I feel that the overall benefit is
+better than the cost.
+
+Signed-off-by: Chunguang Xu <brookxu@tencent.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/1637630277-23496-1-git-send-email-brookxu.cn@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c |   40 ++++++++++++++++++----------------------
+ 1 file changed, 18 insertions(+), 22 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -4814,7 +4814,7 @@ ext4_mb_release_group_pa(struct ext4_bud
+  */
+ static noinline_for_stack int
+ ext4_mb_discard_group_preallocations(struct super_block *sb,
+-					ext4_group_t group, int needed)
++				     ext4_group_t group, int *busy)
+ {
+ 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ 	struct buffer_head *bitmap_bh = NULL;
+@@ -4822,8 +4822,7 @@ ext4_mb_discard_group_preallocations(str
+ 	struct list_head list;
+ 	struct ext4_buddy e4b;
+ 	int err;
+-	int busy = 0;
+-	int free, free_total = 0;
++	int free = 0;
+ 
+ 	mb_debug(sb, "discard preallocation for group %u\n", group);
+ 	if (list_empty(&grp->bb_prealloc_list))
+@@ -4846,19 +4845,14 @@ ext4_mb_discard_group_preallocations(str
+ 		goto out_dbg;
+ 	}
+ 
+-	if (needed == 0)
+-		needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
+-
+ 	INIT_LIST_HEAD(&list);
+-repeat:
+-	free = 0;
+ 	ext4_lock_group(sb, group);
+ 	list_for_each_entry_safe(pa, tmp,
+ 				&grp->bb_prealloc_list, pa_group_list) {
+ 		spin_lock(&pa->pa_lock);
+ 		if (atomic_read(&pa->pa_count)) {
+ 			spin_unlock(&pa->pa_lock);
+-			busy = 1;
++			*busy = 1;
+ 			continue;
+ 		}
+ 		if (pa->pa_deleted) {
+@@ -4898,22 +4892,13 @@ repeat:
+ 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
+ 	}
+ 
+-	free_total += free;
+-
+-	/* if we still need more blocks and some PAs were used, try again */
+-	if (free_total < needed && busy) {
+-		ext4_unlock_group(sb, group);
+-		cond_resched();
+-		busy = 0;
+-		goto repeat;
+-	}
+ 	ext4_unlock_group(sb, group);
+ 	ext4_mb_unload_buddy(&e4b);
+ 	put_bh(bitmap_bh);
+ out_dbg:
+ 	mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
+-		 free_total, group, grp->bb_free);
+-	return free_total;
++		 free, group, grp->bb_free);
++	return free;
+ }
+ 
+ /*
+@@ -5455,13 +5440,24 @@ static int ext4_mb_discard_preallocation
+ {
+ 	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+ 	int ret;
+-	int freed = 0;
++	int freed = 0, busy = 0;
++	int retry = 0;
+ 
+ 	trace_ext4_mb_discard_preallocations(sb, needed);
++
++	if (needed == 0)
++		needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
++ repeat:
+ 	for (i = 0; i < ngroups && needed > 0; i++) {
+-		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
++		ret = ext4_mb_discard_group_preallocations(sb, i, &busy);
+ 		freed += ret;
+ 		needed -= ret;
++		cond_resched();
++	}
++
++	if (needed > 0 && busy && ++retry < 3) {
++		busy = 0;
++		goto repeat;
+ 	}
+ 
+ 	return freed;
diff --git a/queue-5.16/ext4-fix-fast-commit-may-miss-tracking-range-for-falloc_fl_zero_range.patch b/queue-5.16/ext4-fix-fast-commit-may-miss-tracking-range-for-falloc_fl_zero_range.patch
new file mode 100644
index 00000000000..a8c3facd3aa
--- /dev/null
+++ b/queue-5.16/ext4-fix-fast-commit-may-miss-tracking-range-for-falloc_fl_zero_range.patch
@@ -0,0 +1,54 @@
+From 5e4d0eba1ccaf19f93222abdeda5a368be141785 Mon Sep 17 00:00:00 2001
+From: Xin Yin <yinxin.x@bytedance.com>
+Date: Tue, 21 Dec 2021 10:28:39 +0800
+Subject: ext4: fix fast commit may miss tracking range for FALLOC_FL_ZERO_RANGE
+
+From: Xin Yin <yinxin.x@bytedance.com>
+
+commit 5e4d0eba1ccaf19f93222abdeda5a368be141785 upstream.
+
+when call falloc with FALLOC_FL_ZERO_RANGE, to set an range to unwritten,
+which has been already initialized. If the range is align to blocksize,
+fast commit will not track range for this change.
+
+Also track range for unwritten range in ext4_map_blocks().
+
+Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
+Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Link: https://lore.kernel.org/r/20211221022839.374606-1-yinxin.x@bytedance.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/extents.c |    2 --
+ fs/ext4/inode.c   |    7 ++++---
+ 2 files changed, 4 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -4647,8 +4647,6 @@ static long ext4_zero_range(struct file
+ 	ret = ext4_mark_inode_dirty(handle, inode);
+ 	if (unlikely(ret))
+ 		goto out_handle;
+-	ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
+-			(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
+ 	/* Zero out partial block at the edges of the range */
+ 	ret = ext4_zero_partial_blocks(handle, inode, offset, len);
+ 	if (ret >= 0)
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -741,10 +741,11 @@ out_sem:
+ 			if (ret)
+ 				return ret;
+ 		}
+-		ext4_fc_track_range(handle, inode, map->m_lblk,
+-			    map->m_lblk + map->m_len - 1);
+ 	}
+-
++	if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN ||
++				map->m_flags & EXT4_MAP_MAPPED))
++		ext4_fc_track_range(handle, inode, map->m_lblk,
++					map->m_lblk + map->m_len - 1);
+ 	if (retval < 0)
+ 		ext_debug(inode, "failed with err %d\n", retval);
+ 	return retval;
diff --git a/queue-5.16/ext4-initialize-err_blk-before-calling-__ext4_get_inode_loc.patch b/queue-5.16/ext4-initialize-err_blk-before-calling-__ext4_get_inode_loc.patch
new file mode 100644
index 00000000000..3c19045d2ed
--- /dev/null
+++ b/queue-5.16/ext4-initialize-err_blk-before-calling-__ext4_get_inode_loc.patch
@@ -0,0 +1,43 @@
+From c27c29c6af4f3f4ce925a2111c256733c5a5b430 Mon Sep 17 00:00:00 2001
+From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Date: Wed, 1 Dec 2021 08:34:21 -0800
+Subject: ext4: initialize err_blk before calling __ext4_get_inode_loc
+
+From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+
+commit c27c29c6af4f3f4ce925a2111c256733c5a5b430 upstream.
+
+It is not guaranteed that __ext4_get_inode_loc will definitely set
+err_blk pointer when it returns EIO. To avoid using uninitialized
+variables, let's first set err_blk to 0.
+
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
+Link: https://lore.kernel.org/r/20211201163421.2631661-1-harshads@google.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/inode.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4523,7 +4523,7 @@ has_buffer:
+ static int __ext4_get_inode_loc_noinmem(struct inode *inode,
+ 					struct ext4_iloc *iloc)
+ {
+-	ext4_fsblk_t err_blk;
++	ext4_fsblk_t err_blk = 0;
+ 	int ret;
+ 
+ 	ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc,
+@@ -4538,7 +4538,7 @@ static int __ext4_get_inode_loc_noinmem(
+ 
+ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
+ {
+-	ext4_fsblk_t err_blk;
++	ext4_fsblk_t err_blk = 0;
+ 	int ret;
+ 
+ 	ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc,
diff --git a/queue-5.16/ext4-make-sure-quota-gets-properly-shutdown-on-error.patch b/queue-5.16/ext4-make-sure-quota-gets-properly-shutdown-on-error.patch
new file mode 100644
index 00000000000..19993a915d5
--- /dev/null
+++ b/queue-5.16/ext4-make-sure-quota-gets-properly-shutdown-on-error.patch
@@ -0,0 +1,51 @@
+From 15fc69bbbbbc8c72e5f6cc4e1be0f51283c5448e Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 7 Oct 2021 17:53:35 +0200
+Subject: ext4: make sure quota gets properly shutdown on error
+
+From: Jan Kara <jack@suse.cz>
+
+commit 15fc69bbbbbc8c72e5f6cc4e1be0f51283c5448e upstream.
+
+When we hit an error when enabling quotas and setting inode flags, we do
+not properly shutdown quota subsystem despite returning error from
+Q_QUOTAON quotactl. This can lead to some odd situations like kernel
+using quota file while it is still writeable for userspace. Make sure we
+properly cleanup the quota subsystem in case of error.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Cc: stable@kernel.org
+Link: https://lore.kernel.org/r/20211007155336.12493-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/super.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -6275,10 +6275,7 @@ static int ext4_quota_on(struct super_bl
+ 
+ 	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
+ 	err = dquot_quota_on(sb, type, format_id, path);
+-	if (err) {
+-		lockdep_set_quota_inode(path->dentry->d_inode,
+-					     I_DATA_SEM_NORMAL);
+-	} else {
++	if (!err) {
+ 		struct inode *inode = d_inode(path->dentry);
+ 		handle_t *handle;
+ 
+@@ -6298,7 +6295,12 @@ static int ext4_quota_on(struct super_bl
+ 		ext4_journal_stop(handle);
+ 	unlock_inode:
+ 		inode_unlock(inode);
++		if (err)
++			dquot_quota_off(sb, type);
+ 	}
++	if (err)
++		lockdep_set_quota_inode(path->dentry->d_inode,
++					     I_DATA_SEM_NORMAL);
+ 	return err;
+ }
+ 
diff --git a/queue-5.16/ext4-make-sure-to-reset-inode-lockdep-class-when-quota-enabling-fails.patch b/queue-5.16/ext4-make-sure-to-reset-inode-lockdep-class-when-quota-enabling-fails.patch
new file mode 100644
index 00000000000..16123e1e4b7
--- /dev/null
+++ b/queue-5.16/ext4-make-sure-to-reset-inode-lockdep-class-when-quota-enabling-fails.patch
@@ -0,0 +1,49 @@
+From 4013d47a5307fdb5c13370b5392498b00fedd274 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 7 Oct 2021 17:53:36 +0200
+Subject: ext4: make sure to reset inode lockdep class when quota enabling fails
+
+From: Jan Kara <jack@suse.cz>
+
+commit 4013d47a5307fdb5c13370b5392498b00fedd274 upstream.
+
+When we succeed in enabling some quota type but fail to enable another
+one with quota feature, we correctly disable all enabled quota types.
+However we forget to reset i_data_sem lockdep class. When the inode gets
+freed and reused, it will inherit this lockdep class (i_data_sem is
+initialized only when a slab is created) and thus eventually lockdep
+barfs about possible deadlocks.
+
+Reported-and-tested-by: syzbot+3b6f9218b1301ddda3e2@syzkaller.appspotmail.com
+Signed-off-by: Jan Kara <jack@suse.cz>
+Cc: stable@kernel.org
+Link: https://lore.kernel.org/r/20211007155336.12493-3-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/super.c |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -6361,8 +6361,19 @@ int ext4_enable_quotas(struct super_bloc
+ 					"Failed to enable quota tracking "
+ 					"(type=%d, err=%d). Please run "
+ 					"e2fsck to fix.", type, err);
+-				for (type--; type >= 0; type--)
++				for (type--; type >= 0; type--) {
++					struct inode *inode;
++
++					inode = sb_dqopt(sb)->files[type];
++					if (inode)
++						inode = igrab(inode);
+ 					dquot_quota_off(sb, type);
++					if (inode) {
++						lockdep_set_quota_inode(inode,
++							I_DATA_SEM_NORMAL);
++						iput(inode);
++					}
++				}
+ 
+ 				return err;
+ 			}
diff --git a/queue-5.16/series b/queue-5.16/series
index 3eb224c967c..fdcb0a26cad 100644
--- a/queue-5.16/series
+++ b/queue-5.16/series
@@ -879,3 +879,15 @@ pci-pci-bridge-emul-correctly-set-pcie-capabilities.patch
 pci-pci-bridge-emul-set-pci_status_cap_list-for-pcie-device.patch
 xfrm-fix-policy-lookup-for-ipv6-gre-packets.patch
 xfrm-fix-dflt-policy-check-when-there-is-no-policy-configured.patch
+btrfs-fix-deadlock-between-quota-enable-and-other-quota-operations.patch
+btrfs-zoned-cache-reported-zone-during-mount.patch
+btrfs-check-the-root-node-for-uptodate-before-returning-it.patch
+btrfs-add-extent-allocator-hook-to-decide-to-allocate-chunk-or-not.patch
+btrfs-zoned-unset-dedicated-block-group-on-allocation-failure.patch
+btrfs-zoned-fix-chunk-allocation-condition-for-zoned-allocator.patch
+btrfs-respect-the-max-size-in-the-header-when-activating-swap-file.patch
+ext4-make-sure-to-reset-inode-lockdep-class-when-quota-enabling-fails.patch
+ext4-make-sure-quota-gets-properly-shutdown-on-error.patch
+ext4-fix-a-possible-abba-deadlock-due-to-busy-pa.patch
+ext4-initialize-err_blk-before-calling-__ext4_get_inode_loc.patch
+ext4-fix-fast-commit-may-miss-tracking-range-for-falloc_fl_zero_range.patch