From fc70e0b4c30182e30c398fb9aa132838967ceed2 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sashal@kernel.org>
Date: Fri, 9 Feb 2024 13:42:40 -0500
Subject: [PATCH] Fixes for 6.6

Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 ...catherine-as-xfs-maintainer-for-6.6..patch |  32 +++
 queue-6.6/series                              |  21 ++
 ...ent-items-when-recovery-intents-fail.patch | 128 ++++++++++
 ...d-io-and-ficlone-to-run-concurrently.patch | 233 ++++++++++++++++++
 .../xfs-bump-max-fsgeom-struct-version.patch  |  41 +++
 queue-6.6/xfs-clean-up-dqblk-extraction.patch |  92 +++++++
 ...xflag_realtime-handling-in-xfs_ioctl.patch |  67 +++++
 ...ry-does-not-validate-the-recovered-d.patch |  57 +++++
 ...s-factor-out-xfs_defer_pending_abort.patch |  74 ++++++
 ...lect-in-kconfig-xfs_online_scrub_sta.patch |  47 ++++
 ...-internal-error-from-agfl-exhaustion.patch | 111 +++++++++
 ...nversion-error-in-xfs_bmap_del_exten.patch |  43 ++++
 ...s-0-from-xfs_bmapi_write-in-xfs_allo.patch | 100 ++++++++
 ...eing-of-rt-data-fork-extent-mappings.patch | 140 +++++++++++
 ...ry-does-not-validate-the-recovered-i.patch |  77 ++++++
 ...-introduce-protection-for-drop-nlink.patch |  43 ++++
 ...xlen-is-still-congruent-with-prod-wh.patch | 117 +++++++++
 ...he-written-blocks-in-xfs_reflink_end.patch |  53 ++++
 ...vent-rt-growfs-when-quota-is-enabled.patch |  42 ++++
 ...-stable-writes-flag-on-the-rt-device.patch |  84 +++++++
 ...uld-return-negative-errnos-when-rt-d.patch |  67 +++++
 ...c_sema-if-flushing-data-device-fails.patch | 102 ++++++++
 22 files changed, 1771 insertions(+)
 create mode 100644 queue-6.6/maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch
 create mode 100644 queue-6.6/xfs-abort-intent-items-when-recovery-intents-fail.patch
 create mode 100644 queue-6.6/xfs-allow-read-io-and-ficlone-to-run-concurrently.patch
 create mode 100644 queue-6.6/xfs-bump-max-fsgeom-struct-version.patch
 create mode 100644 queue-6.6/xfs-clean-up-dqblk-extraction.patch
 create mode 100644 queue-6.6/xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch
 create mode 100644 queue-6.6/xfs-dquot-recovery-does-not-validate-the-recovered-d.patch
 create mode 100644 queue-6.6/xfs-factor-out-xfs_defer_pending_abort.patch
 create mode 100644 queue-6.6/xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch
 create mode 100644 queue-6.6/xfs-fix-internal-error-from-agfl-exhaustion.patch
 create mode 100644 queue-6.6/xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch
 create mode 100644 queue-6.6/xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch
 create mode 100644 queue-6.6/xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch
 create mode 100644 queue-6.6/xfs-inode-recovery-does-not-validate-the-recovered-i.patch
 create mode 100644 queue-6.6/xfs-introduce-protection-for-drop-nlink.patch
 create mode 100644 queue-6.6/xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch
 create mode 100644 queue-6.6/xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch
 create mode 100644 queue-6.6/xfs-prevent-rt-growfs-when-quota-is-enabled.patch
 create mode 100644 queue-6.6/xfs-respect-the-stable-writes-flag-on-the-rt-device.patch
 create mode 100644 queue-6.6/xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch
 create mode 100644 queue-6.6/xfs-up-ic_sema-if-flushing-data-device-fails.patch

diff --git a/queue-6.6/maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch b/queue-6.6/maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch
new file mode 100644
index 00000000000..e556f03924f
--- /dev/null
+++ b/queue-6.6/maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch
@@ -0,0 +1,32 @@
+From edbd1fc15e333d71ba8a12534f21738eaf617869 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:34 -0800
+Subject: MAINTAINERS: add Catherine as xfs maintainer for 6.6.y
+
+From: Catherine Hoang <catherine.hoang@oracle.com>
+
+This is an attempt to direct the bots and humans that are testing
+LTS 6.6.y towards the maintainer of xfs in the 6.6.y tree.
+
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ MAINTAINERS | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index dd5de540ec0b..40312bb550f0 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -23630,6 +23630,7 @@ F:	include/xen/arm/swiotlb-xen.h
+ F:	include/xen/swiotlb-xen.h
+ 
+ XFS FILESYSTEM
++M:	Catherine Hoang <catherine.hoang@oracle.com>
+ M:	Chandan Babu R <chandan.babu@oracle.com>
+ R:	Darrick J. Wong <djwong@kernel.org>
+ L:	linux-xfs@vger.kernel.org
+-- 
+2.43.0
+
diff --git a/queue-6.6/series b/queue-6.6/series
index 9f21b1bd48c..2ccd55c87c8 100644
--- a/queue-6.6/series
+++ b/queue-6.6/series
@@ -14,3 +14,24 @@ rust-upgrade-to-rust-1.72.1.patch
 rust-task-remove-redundant-explicit-link.patch
 rust-print-use-explicit-link-in-documentation.patch
 rust-upgrade-to-rust-1.73.0.patch
+maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch
+xfs-bump-max-fsgeom-struct-version.patch
+xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch
+xfs-prevent-rt-growfs-when-quota-is-enabled.patch
+xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch
+xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch
+xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch
+xfs-introduce-protection-for-drop-nlink.patch
+xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch
+xfs-allow-read-io-and-ficlone-to-run-concurrently.patch
+xfs-factor-out-xfs_defer_pending_abort.patch
+xfs-abort-intent-items-when-recovery-intents-fail.patch
+xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch
+xfs-up-ic_sema-if-flushing-data-device-fails.patch
+xfs-fix-internal-error-from-agfl-exhaustion.patch
+xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch
+xfs-inode-recovery-does-not-validate-the-recovered-i.patch
+xfs-clean-up-dqblk-extraction.patch
+xfs-dquot-recovery-does-not-validate-the-recovered-d.patch
+xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch
+xfs-respect-the-stable-writes-flag-on-the-rt-device.patch
diff --git a/queue-6.6/xfs-abort-intent-items-when-recovery-intents-fail.patch b/queue-6.6/xfs-abort-intent-items-when-recovery-intents-fail.patch
new file mode 100644
index 00000000000..d27744e26e8
--- /dev/null
+++ b/queue-6.6/xfs-abort-intent-items-when-recovery-intents-fail.patch
@@ -0,0 +1,128 @@
+From dedf4b860cb59b0e44f55e778b02dc7196b7ac4e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:45 -0800
+Subject: xfs: abort intent items when recovery intents fail
+
+From: Long Li <leo.lilong@huawei.com>
+
+commit f8f9d952e42dd49ae534f61f2fa7ca0876cb9848 upstream.
+
+When recovering intents, we capture newly created intent items as part of
+committing recovered intent items.  If intent recovery fails at a later
+point, we forget to remove those newly created intent items from the AIL
+and hang:
+
+    [root@localhost ~]# cat /proc/539/stack
+    [<0>] xfs_ail_push_all_sync+0x174/0x230
+    [<0>] xfs_unmount_flush_inodes+0x8d/0xd0
+    [<0>] xfs_mountfs+0x15f7/0x1e70
+    [<0>] xfs_fs_fill_super+0x10ec/0x1b20
+    [<0>] get_tree_bdev+0x3c8/0x730
+    [<0>] vfs_get_tree+0x89/0x2c0
+    [<0>] path_mount+0xecf/0x1800
+    [<0>] do_mount+0xf3/0x110
+    [<0>] __x64_sys_mount+0x154/0x1f0
+    [<0>] do_syscall_64+0x39/0x80
+    [<0>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+When newly created intent items fail to commit via transaction, intent
+recovery hasn't created done items for these newly created intent items,
+so the capture structure is the sole owner of the captured intent items.
+We must release them explicitly or else they leak:
+
+unreferenced object 0xffff888016719108 (size 432):
+  comm "mount", pid 529, jiffies 4294706839 (age 144.463s)
+  hex dump (first 32 bytes):
+    08 91 71 16 80 88 ff ff 08 91 71 16 80 88 ff ff  ..q.......q.....
+    18 91 71 16 80 88 ff ff 18 91 71 16 80 88 ff ff  ..q.......q.....
+  backtrace:
+    [<ffffffff8230c68f>] xfs_efi_init+0x18f/0x1d0
+    [<ffffffff8230c720>] xfs_extent_free_create_intent+0x50/0x150
+    [<ffffffff821b671a>] xfs_defer_create_intents+0x16a/0x340
+    [<ffffffff821bac3e>] xfs_defer_ops_capture_and_commit+0x8e/0xad0
+    [<ffffffff82322bb9>] xfs_cui_item_recover+0x819/0x980
+    [<ffffffff823289b6>] xlog_recover_process_intents+0x246/0xb70
+    [<ffffffff8233249a>] xlog_recover_finish+0x8a/0x9a0
+    [<ffffffff822eeafb>] xfs_log_mount_finish+0x2bb/0x4a0
+    [<ffffffff822c0f4f>] xfs_mountfs+0x14bf/0x1e70
+    [<ffffffff822d1f80>] xfs_fs_fill_super+0x10d0/0x1b20
+    [<ffffffff81a21fa2>] get_tree_bdev+0x3d2/0x6d0
+    [<ffffffff81a1ee09>] vfs_get_tree+0x89/0x2c0
+    [<ffffffff81a9f35f>] path_mount+0xecf/0x1800
+    [<ffffffff81a9fd83>] do_mount+0xf3/0x110
+    [<ffffffff81aa00e4>] __x64_sys_mount+0x154/0x1f0
+    [<ffffffff83968739>] do_syscall_64+0x39/0x80
+
+Fix the problem above by abort intent items that don't have a done item
+when recovery intents fail.
+
+Fixes: e6fff81e4870 ("xfs: proper replay of deferred ops queued during log recovery")
+Signed-off-by: Long Li <leo.lilong@huawei.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_defer.c | 5 +++--
+ fs/xfs/libxfs/xfs_defer.h | 2 +-
+ fs/xfs/xfs_log_recover.c  | 2 +-
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
+index 88388e12f8e7..f71679ce23b9 100644
+--- a/fs/xfs/libxfs/xfs_defer.c
++++ b/fs/xfs/libxfs/xfs_defer.c
+@@ -763,12 +763,13 @@ xfs_defer_ops_capture(
+ 
+ /* Release all resources that we used to capture deferred ops. */
+ void
+-xfs_defer_ops_capture_free(
++xfs_defer_ops_capture_abort(
+ 	struct xfs_mount		*mp,
+ 	struct xfs_defer_capture	*dfc)
+ {
+ 	unsigned short			i;
+ 
++	xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
+ 	xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+ 
+ 	for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
+@@ -809,7 +810,7 @@ xfs_defer_ops_capture_and_commit(
+ 	/* Commit the transaction and add the capture structure to the list. */
+ 	error = xfs_trans_commit(tp);
+ 	if (error) {
+-		xfs_defer_ops_capture_free(mp, dfc);
++		xfs_defer_ops_capture_abort(mp, dfc);
+ 		return error;
+ 	}
+ 
+diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
+index 114a3a4930a3..8788ad5f6a73 100644
+--- a/fs/xfs/libxfs/xfs_defer.h
++++ b/fs/xfs/libxfs/xfs_defer.h
+@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
+ 		struct list_head *capture_list);
+ void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
+ 		struct xfs_defer_resources *dres);
+-void xfs_defer_ops_capture_free(struct xfs_mount *mp,
++void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
+ 		struct xfs_defer_capture *d);
+ void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
+ 
+diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
+index 13b94d2e605b..a1e18b24971a 100644
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2511,7 +2511,7 @@ xlog_abort_defer_ops(
+ 
+ 	list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
+ 		list_del_init(&dfc->dfc_list);
+-		xfs_defer_ops_capture_free(mp, dfc);
++		xfs_defer_ops_capture_abort(mp, dfc);
+ 	}
+ }
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-allow-read-io-and-ficlone-to-run-concurrently.patch b/queue-6.6/xfs-allow-read-io-and-ficlone-to-run-concurrently.patch
new file mode 100644
index 00000000000..704d4ab090b
--- /dev/null
+++ b/queue-6.6/xfs-allow-read-io-and-ficlone-to-run-concurrently.patch
@@ -0,0 +1,233 @@
+From f7f5e8c270677efe0b56d4545a82bdacc0363ce4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:43 -0800
+Subject: xfs: allow read IO and FICLONE to run concurrently
+
+From: Catherine Hoang <catherine.hoang@oracle.com>
+
+commit 14a537983b228cb050ceca3a5b743d01315dc4aa upstream.
+
+One of our VM cluster management products needs to snapshot KVM image
+files so that they can be restored in case of failure. Snapshotting is
+done by redirecting VM disk writes to a sidecar file and using reflink
+on the disk image, specifically the FICLONE ioctl as used by
+"cp --reflink". Reflink locks the source and destination files while it
+operates, which means that reads from the main vm disk image are blocked,
+causing the vm to stall. When an image file is heavily fragmented, the
+copy process could take several minutes. Some of the vm image files have
+50-100 million extent records, and duplicating that much metadata locks
+the file for 30 minutes or more. Having activities suspended for such
+a long time in a cluster node could result in node eviction.
+
+Clone operations and read IO do not change any data in the source file,
+so they should be able to run concurrently. Demote the exclusive locks
+taken by FICLONE to shared locks to allow reads while cloning. While a
+clone is in progress, writes will take the IOLOCK_EXCL, so they block
+until the clone completes.
+
+Link: https://lore.kernel.org/linux-xfs/8911B94D-DD29-4D6E-B5BC-32EAF1866245@oracle.com/
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_file.c    | 63 +++++++++++++++++++++++++++++++++++---------
+ fs/xfs/xfs_inode.c   | 17 ++++++++++++
+ fs/xfs/xfs_inode.h   |  9 +++++++
+ fs/xfs/xfs_reflink.c |  4 +++
+ 4 files changed, 80 insertions(+), 13 deletions(-)
+
+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
+index 203700278ddb..e33e5e13b95f 100644
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -214,6 +214,43 @@ xfs_ilock_iocb(
+ 	return 0;
+ }
+ 
++static int
++xfs_ilock_iocb_for_write(
++	struct kiocb		*iocb,
++	unsigned int		*lock_mode)
++{
++	ssize_t			ret;
++	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
++
++	ret = xfs_ilock_iocb(iocb, *lock_mode);
++	if (ret)
++		return ret;
++
++	if (*lock_mode == XFS_IOLOCK_EXCL)
++		return 0;
++	if (!xfs_iflags_test(ip, XFS_IREMAPPING))
++		return 0;
++
++	xfs_iunlock(ip, *lock_mode);
++	*lock_mode = XFS_IOLOCK_EXCL;
++	return xfs_ilock_iocb(iocb, *lock_mode);
++}
++
++static unsigned int
++xfs_ilock_for_write_fault(
++	struct xfs_inode	*ip)
++{
++	/* get a shared lock if no remapping in progress */
++	xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
++	if (!xfs_iflags_test(ip, XFS_IREMAPPING))
++		return XFS_MMAPLOCK_SHARED;
++
++	/* wait for remapping to complete */
++	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
++	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
++	return XFS_MMAPLOCK_EXCL;
++}
++
+ STATIC ssize_t
+ xfs_file_dio_read(
+ 	struct kiocb		*iocb,
+@@ -551,7 +588,7 @@ xfs_file_dio_write_aligned(
+ 	unsigned int		iolock = XFS_IOLOCK_SHARED;
+ 	ssize_t			ret;
+ 
+-	ret = xfs_ilock_iocb(iocb, iolock);
++	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
+ 	if (ret)
+ 		return ret;
+ 	ret = xfs_file_write_checks(iocb, from, &iolock);
+@@ -618,7 +655,7 @@ xfs_file_dio_write_unaligned(
+ 		flags = IOMAP_DIO_FORCE_WAIT;
+ 	}
+ 
+-	ret = xfs_ilock_iocb(iocb, iolock);
++	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -1180,7 +1217,7 @@ xfs_file_remap_range(
+ 	if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
+ 		xfs_log_force_inode(dest);
+ out_unlock:
+-	xfs_iunlock2_io_mmap(src, dest);
++	xfs_iunlock2_remapping(src, dest);
+ 	if (ret)
+ 		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+ 	return remapped > 0 ? remapped : ret;
+@@ -1328,6 +1365,7 @@ __xfs_filemap_fault(
+ 	struct inode		*inode = file_inode(vmf->vma->vm_file);
+ 	struct xfs_inode	*ip = XFS_I(inode);
+ 	vm_fault_t		ret;
++	unsigned int		lock_mode = 0;
+ 
+ 	trace_xfs_filemap_fault(ip, order, write_fault);
+ 
+@@ -1336,25 +1374,24 @@ __xfs_filemap_fault(
+ 		file_update_time(vmf->vma->vm_file);
+ 	}
+ 
++	if (IS_DAX(inode) || write_fault)
++		lock_mode = xfs_ilock_for_write_fault(XFS_I(inode));
++
+ 	if (IS_DAX(inode)) {
+ 		pfn_t pfn;
+ 
+-		xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ 		ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
+ 		if (ret & VM_FAULT_NEEDDSYNC)
+ 			ret = dax_finish_sync_fault(vmf, order, pfn);
+-		xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
++	} else if (write_fault) {
++		ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
+ 	} else {
+-		if (write_fault) {
+-			xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+-			ret = iomap_page_mkwrite(vmf,
+-					&xfs_page_mkwrite_iomap_ops);
+-			xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+-		} else {
+-			ret = filemap_fault(vmf);
+-		}
++		ret = filemap_fault(vmf);
+ 	}
+ 
++	if (lock_mode)
++		xfs_iunlock(XFS_I(inode), lock_mode);
++
+ 	if (write_fault)
+ 		sb_end_pagefault(inode->i_sb);
+ 	return ret;
+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
+index fb85c5c81745..f9d29acd72b9 100644
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -3628,6 +3628,23 @@ xfs_iunlock2_io_mmap(
+ 		inode_unlock(VFS_I(ip1));
+ }
+ 
++/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */
++void
++xfs_iunlock2_remapping(
++	struct xfs_inode	*ip1,
++	struct xfs_inode	*ip2)
++{
++	xfs_iflags_clear(ip1, XFS_IREMAPPING);
++
++	if (ip1 != ip2)
++		xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED);
++	xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
++
++	if (ip1 != ip2)
++		inode_unlock_shared(VFS_I(ip1));
++	inode_unlock(VFS_I(ip2));
++}
++
+ /*
+  * Reload the incore inode list for this inode.  Caller should ensure that
+  * the link count cannot change, either by taking ILOCK_SHARED or otherwise
+diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
+index 0c5bdb91152e..3dc47937da5d 100644
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -347,6 +347,14 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
+ /* Quotacheck is running but inode has not been added to quota counts. */
+ #define XFS_IQUOTAUNCHECKED	(1 << 14)
+ 
++/*
++ * Remap in progress. Callers that wish to update file data while
++ * holding a shared IOLOCK or MMAPLOCK must drop the lock and retake
++ * the lock in exclusive mode. Relocking the file will block until
++ * IREMAPPING is cleared.
++ */
++#define XFS_IREMAPPING		(1U << 15)
++
+ /* All inode state flags related to inode reclaim. */
+ #define XFS_ALL_IRECLAIM_FLAGS	(XFS_IRECLAIMABLE | \
+ 				 XFS_IRECLAIM | \
+@@ -595,6 +603,7 @@ void xfs_end_io(struct work_struct *work);
+ 
+ int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
+ void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
++void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2);
+ 
+ static inline bool
+ xfs_inode_unlinked_incomplete(
+diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
+index eb9102453aff..658edee8381d 100644
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -1540,6 +1540,10 @@ xfs_reflink_remap_prep(
+ 	if (ret)
+ 		goto out_unlock;
+ 
++	xfs_iflags_set(src, XFS_IREMAPPING);
++	if (inode_in != inode_out)
++		xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
++
+ 	return 0;
+ out_unlock:
+ 	xfs_iunlock2_io_mmap(src, dest);
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-bump-max-fsgeom-struct-version.patch b/queue-6.6/xfs-bump-max-fsgeom-struct-version.patch
new file mode 100644
index 00000000000..cdd03ce59ad
--- /dev/null
+++ b/queue-6.6/xfs-bump-max-fsgeom-struct-version.patch
@@ -0,0 +1,41 @@
+From a05bc3d408f3a79da838b165951bedb6a380a10c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:35 -0800
+Subject: xfs: bump max fsgeom struct version
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit 9488062805943c2d63350d3ef9e4dc093799789a upstream.
+
+The latest version of the fs geometry structure is v5.  Bump this
+constant so that xfs_db and mkfs calls to libxfs_fs_geometry will fill
+out all the fields.
+
+IOWs, this commit is a no-op for the kernel, but will be useful for
+userspace reporting in later changes.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_sb.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
+index a5e14740ec9a..19134b23c10b 100644
+--- a/fs/xfs/libxfs/xfs_sb.h
++++ b/fs/xfs/libxfs/xfs_sb.h
+@@ -25,7 +25,7 @@ extern uint64_t	xfs_sb_version_to_features(struct xfs_sb *sbp);
+ 
+ extern int	xfs_update_secondary_sbs(struct xfs_mount *mp);
+ 
+-#define XFS_FS_GEOM_MAX_STRUCT_VER	(4)
++#define XFS_FS_GEOM_MAX_STRUCT_VER	(5)
+ extern void	xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo,
+ 				int struct_version);
+ extern int	xfs_sb_read_secondary(struct xfs_mount *mp,
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-clean-up-dqblk-extraction.patch b/queue-6.6/xfs-clean-up-dqblk-extraction.patch
new file mode 100644
index 00000000000..8508f40de24
--- /dev/null
+++ b/queue-6.6/xfs-clean-up-dqblk-extraction.patch
@@ -0,0 +1,92 @@
+From 399cb61dfc6e9cb6b9015cb131eb6b8969011ac2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:51 -0800
+Subject: xfs: clean up dqblk extraction
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit ed17f7da5f0c8b65b7b5f7c98beb0aadbc0546ee upstream.
+
+Since the introduction of xfs_dqblk in V5, xfs really ought to find the
+dqblk pointer from the dquot buffer, then compute the xfs_disk_dquot
+pointer from the dqblk pointer.  Fix the open-coded xfs_buf_offset calls
+and do the type checking in the correct order.
+
+Note that this has made no practical difference since the start of the
+xfs_disk_dquot is coincident with the start of the xfs_dqblk.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_dquot.c              | 5 +++--
+ fs/xfs/xfs_dquot_item_recover.c | 7 ++++---
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
+index ac6ba646624d..a013b87ab8d5 100644
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -562,7 +562,8 @@ xfs_dquot_from_disk(
+ 	struct xfs_dquot	*dqp,
+ 	struct xfs_buf		*bp)
+ {
+-	struct xfs_disk_dquot	*ddqp = bp->b_addr + dqp->q_bufoffset;
++	struct xfs_dqblk	*dqb = xfs_buf_offset(bp, dqp->q_bufoffset);
++	struct xfs_disk_dquot	*ddqp = &dqb->dd_diskdq;
+ 
+ 	/*
+ 	 * Ensure that we got the type and ID we were looking for.
+@@ -1250,7 +1251,7 @@ xfs_qm_dqflush(
+ 	}
+ 
+ 	/* Flush the incore dquot to the ondisk buffer. */
+-	dqblk = bp->b_addr + dqp->q_bufoffset;
++	dqblk = xfs_buf_offset(bp, dqp->q_bufoffset);
+ 	xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp);
+ 
+ 	/*
+diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
+index 8966ba842395..db2cb5e4197b 100644
+--- a/fs/xfs/xfs_dquot_item_recover.c
++++ b/fs/xfs/xfs_dquot_item_recover.c
+@@ -65,6 +65,7 @@ xlog_recover_dquot_commit_pass2(
+ {
+ 	struct xfs_mount		*mp = log->l_mp;
+ 	struct xfs_buf			*bp;
++	struct xfs_dqblk		*dqb;
+ 	struct xfs_disk_dquot		*ddq, *recddq;
+ 	struct xfs_dq_logformat		*dq_f;
+ 	xfs_failaddr_t			fa;
+@@ -130,14 +131,14 @@ xlog_recover_dquot_commit_pass2(
+ 		return error;
+ 
+ 	ASSERT(bp);
+-	ddq = xfs_buf_offset(bp, dq_f->qlf_boffset);
++	dqb = xfs_buf_offset(bp, dq_f->qlf_boffset);
++	ddq = &dqb->dd_diskdq;
+ 
+ 	/*
+ 	 * If the dquot has an LSN in it, recover the dquot only if it's less
+ 	 * than the lsn of the transaction we are replaying.
+ 	 */
+ 	if (xfs_has_crc(mp)) {
+-		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
+ 		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
+ 
+ 		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+@@ -147,7 +148,7 @@ xlog_recover_dquot_commit_pass2(
+ 
+ 	memcpy(ddq, recddq, item->ri_buf[1].i_len);
+ 	if (xfs_has_crc(mp)) {
+-		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
++		xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
+ 				 XFS_DQUOT_CRC_OFF);
+ 	}
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch b/queue-6.6/xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch
new file mode 100644
index 00000000000..c2dbfb48cb1
--- /dev/null
+++ b/queue-6.6/xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch
@@ -0,0 +1,67 @@
+From 1ce001494b6243c87b01e6d68b430c2d84cb2e39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:53 -0800
+Subject: xfs: clean up FS_XFLAG_REALTIME handling in xfs_ioctl_setattr_xflags
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit c421df0b19430417a04f68919fc3d1943d20ac04 upstream.
+
+Introduce a local boolean variable if FS_XFLAG_REALTIME to make the
+checks for it more obvious, and de-densify a few of the conditionals
+using it to make them more readable while at it.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20231025141020.192413-4-hch@lst.de
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_ioctl.c | 22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
+index 55bb01173cde..be69e7be713e 100644
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -1120,23 +1120,25 @@ xfs_ioctl_setattr_xflags(
+ 	struct fileattr		*fa)
+ {
+ 	struct xfs_mount	*mp = ip->i_mount;
++	bool			rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME);
+ 	uint64_t		i_flags2;
+ 
+-	/* Can't change realtime flag if any extents are allocated. */
+-	if ((ip->i_df.if_nextents || ip->i_delayed_blks) &&
+-	    XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
+-		return -EINVAL;
++	if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
++		/* Can't change realtime flag if any extents are allocated. */
++		if (ip->i_df.if_nextents || ip->i_delayed_blks)
++			return -EINVAL;
++	}
+ 
+-	/* If realtime flag is set then must have realtime device */
+-	if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
++	if (rtflag) {
++		/* If realtime flag is set then must have realtime device */
+ 		if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
+ 		    (ip->i_extsize % mp->m_sb.sb_rextsize))
+ 			return -EINVAL;
+-	}
+ 
+-	/* Clear reflink if we are actually able to set the rt flag. */
+-	if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
+-		ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
++		/* Clear reflink if we are actually able to set the rt flag. */
++		if (xfs_is_reflink_inode(ip))
++			ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
++	}
+ 
+ 	/* diflags2 only valid for v3 inodes. */
+ 	i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-dquot-recovery-does-not-validate-the-recovered-d.patch b/queue-6.6/xfs-dquot-recovery-does-not-validate-the-recovered-d.patch
new file mode 100644
index 00000000000..8056793526c
--- /dev/null
+++ b/queue-6.6/xfs-dquot-recovery-does-not-validate-the-recovered-d.patch
@@ -0,0 +1,57 @@
+From 53a81473c11e37e0a80e352a91836bb2eb276235 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:52 -0800
+Subject: xfs: dquot recovery does not validate the recovered dquot
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit 9c235dfc3d3f901fe22acb20f2ab37ff39f2ce02 upstream.
+
+When we're recovering ondisk quota records from the log, we need to
+validate the recovered buffer contents before writing them to disk.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_dquot_item_recover.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c
+index db2cb5e4197b..2c2720ce6923 100644
+--- a/fs/xfs/xfs_dquot_item_recover.c
++++ b/fs/xfs/xfs_dquot_item_recover.c
+@@ -19,6 +19,7 @@
+ #include "xfs_log.h"
+ #include "xfs_log_priv.h"
+ #include "xfs_log_recover.h"
++#include "xfs_error.h"
+ 
+ STATIC void
+ xlog_recover_dquot_ra_pass2(
+@@ -152,6 +153,19 @@ xlog_recover_dquot_commit_pass2(
+ 				 XFS_DQUOT_CRC_OFF);
+ 	}
+ 
++	/* Validate the recovered dquot. */
++	fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id);
++	if (fa) {
++		XFS_CORRUPTION_ERROR("Bad dquot after recovery",
++				XFS_ERRLEVEL_LOW, mp, dqb,
++				sizeof(struct xfs_dqblk));
++		xfs_alert(mp,
++ "Metadata corruption detected at %pS, dquot 0x%x",
++				fa, dq_f->qlf_id);
++		error = -EFSCORRUPTED;
++		goto out_release;
++	}
++
+ 	ASSERT(dq_f->qlf_size == 2);
+ 	ASSERT(bp->b_mount == mp);
+ 	bp->b_flags |= _XBF_LOGRECOVERY;
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-factor-out-xfs_defer_pending_abort.patch b/queue-6.6/xfs-factor-out-xfs_defer_pending_abort.patch
new file mode 100644
index 00000000000..788145a977b
--- /dev/null
+++ b/queue-6.6/xfs-factor-out-xfs_defer_pending_abort.patch
@@ -0,0 +1,74 @@
+From a3ae93d0aac98fb0e4dd6bc78511eef095eb8958 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:44 -0800
+Subject: xfs: factor out xfs_defer_pending_abort
+
+From: Long Li <leo.lilong@huawei.com>
+
+commit 2a5db859c6825b5d50377dda9c3cc729c20cad43 upstream.
+
+Factor out xfs_defer_pending_abort() from xfs_defer_trans_abort(), which
+not use transaction parameter, so it can be used after the transaction
+life cycle.
+
+Signed-off-by: Long Li <leo.lilong@huawei.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_defer.c | 23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
+index bcfb6a4203cd..88388e12f8e7 100644
+--- a/fs/xfs/libxfs/xfs_defer.c
++++ b/fs/xfs/libxfs/xfs_defer.c
+@@ -245,21 +245,18 @@ xfs_defer_create_intents(
+ 	return ret;
+ }
+ 
+-/* Abort all the intents that were committed. */
+ STATIC void
+-xfs_defer_trans_abort(
+-	struct xfs_trans		*tp,
+-	struct list_head		*dop_pending)
++xfs_defer_pending_abort(
++	struct xfs_mount		*mp,
++	struct list_head		*dop_list)
+ {
+ 	struct xfs_defer_pending	*dfp;
+ 	const struct xfs_defer_op_type	*ops;
+ 
+-	trace_xfs_defer_trans_abort(tp, _RET_IP_);
+-
+ 	/* Abort intent items that don't have a done item. */
+-	list_for_each_entry(dfp, dop_pending, dfp_list) {
++	list_for_each_entry(dfp, dop_list, dfp_list) {
+ 		ops = defer_op_types[dfp->dfp_type];
+-		trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
++		trace_xfs_defer_pending_abort(mp, dfp);
+ 		if (dfp->dfp_intent && !dfp->dfp_done) {
+ 			ops->abort_intent(dfp->dfp_intent);
+ 			dfp->dfp_intent = NULL;
+@@ -267,6 +264,16 @@ xfs_defer_trans_abort(
+ 	}
+ }
+ 
++/* Abort all the intents that were committed. */
++STATIC void
++xfs_defer_trans_abort(
++	struct xfs_trans		*tp,
++	struct list_head		*dop_pending)
++{
++	trace_xfs_defer_trans_abort(tp, _RET_IP_);
++	xfs_defer_pending_abort(tp->t_mountp, dop_pending);
++}
++
+ /*
+  * Capture resources that the caller said not to release ("held") when the
+  * transaction commits.  Caller is responsible for zero-initializing @dres.
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch b/queue-6.6/xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch
new file mode 100644
index 00000000000..8e0a9c64be1
--- /dev/null
+++ b/queue-6.6/xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch
@@ -0,0 +1,47 @@
+From 156bdf447fef047ee91b002cf23c47cb0fadae19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:49 -0800
+Subject: xfs: fix again select in kconfig XFS_ONLINE_SCRUB_STATS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Anthony Iliopoulos <ailiop@suse.com>
+
+commit a2e4388adfa44684c7c428a5a5980efe0d75e13e upstream.
+
+Commit 57c0f4a8ea3a attempted to fix the select in the kconfig entry
+XFS_ONLINE_SCRUB_STATS by selecting XFS_DEBUG, but the original
+intention was to select DEBUG_FS, since the feature relies on debugfs to
+export the related scrub statistics.
+
+Fixes: 57c0f4a8ea3a ("xfs: fix select in config XFS_ONLINE_SCRUB_STATS")
+
+Reported-by: Holger HoffstÃ¤tte <holger@applied-asynchrony.com>
+Signed-off-by: Anthony Iliopoulos <ailiop@suse.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
+index ed0bc8cbc703..567fb37274d3 100644
+--- a/fs/xfs/Kconfig
++++ b/fs/xfs/Kconfig
+@@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS
+ 	bool "XFS online metadata check usage data collection"
+ 	default y
+ 	depends on XFS_ONLINE_SCRUB
+-	select XFS_DEBUG
++	select DEBUG_FS
+ 	help
+ 	  If you say Y here, the kernel will gather usage data about
+ 	  the online metadata check subsystem.  This includes the number
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-fix-internal-error-from-agfl-exhaustion.patch b/queue-6.6/xfs-fix-internal-error-from-agfl-exhaustion.patch
new file mode 100644
index 00000000000..4a48d824e74
--- /dev/null
+++ b/queue-6.6/xfs-fix-internal-error-from-agfl-exhaustion.patch
@@ -0,0 +1,111 @@
+From 47fde25b235763386afbfdedbabd59953cc38047 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:48 -0800
+Subject: xfs: fix internal error from AGFL exhaustion
+
+From: Omar Sandoval <osandov@fb.com>
+
+commit f63a5b3769ad7659da4c0420751d78958ab97675 upstream.
+
+We've been seeing XFS errors like the following:
+
+XFS: Internal error i != 1 at line 3526 of file fs/xfs/libxfs/xfs_btree.c.  Caller xfs_btree_insert+0x1ec/0x280
+...
+Call Trace:
+ xfs_corruption_error+0x94/0xa0
+ xfs_btree_insert+0x221/0x280
+ xfs_alloc_fixup_trees+0x104/0x3e0
+ xfs_alloc_ag_vextent_size+0x667/0x820
+ xfs_alloc_fix_freelist+0x5d9/0x750
+ xfs_free_extent_fix_freelist+0x65/0xa0
+ __xfs_free_extent+0x57/0x180
+...
+
+This is the XFS_IS_CORRUPT() check in xfs_btree_insert() when
+xfs_btree_insrec() fails.
+
+After converting this into a panic and dissecting the core dump, I found
+that xfs_btree_insrec() is failing because it's trying to split a leaf
+node in the cntbt when the AG free list is empty. In particular, it's
+failing to get a block from the AGFL _while trying to refill the AGFL_.
+
+If a single operation splits every level of the bnobt and the cntbt (and
+the rmapbt if it is enabled) at once, the free list will be empty. Then,
+when the next operation tries to refill the free list, it allocates
+space. If the allocation does not use a full extent, it will need to
+insert records for the remaining space in the bnobt and cntbt. And if
+those new records go in full leaves, the leaves (and potentially more
+nodes up to the old root) need to be split.
+
+Fix it by accounting for the additional splits that may be required to
+refill the free list in the calculation for the minimum free list size.
+
+P.S. As far as I can tell, this bug has existed for a long time -- maybe
+back to xfs-history commit afdf80ae7405 ("Add XFS_AG_MAXLEVELS macros
+...") in April 1994! It requires a very unlucky sequence of events, and
+in fact we didn't hit it until a particular sparse mmap workload updated
+from 5.12 to 5.19. But this bug existed in 5.12, so it must've been
+exposed by some other change in allocation or writeback patterns. It's
+also much less likely to be hit with the rmapbt enabled, since that
+increases the minimum free list size and is unlikely to split at the
+same time as the bnobt and cntbt.
+
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Omar Sandoval <osandov@fb.com>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 27 ++++++++++++++++++++++++---
+ 1 file changed, 24 insertions(+), 3 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
+index 3069194527dd..100ab5931b31 100644
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist(
+ 
+ 	ASSERT(mp->m_alloc_maxlevels > 0);
+ 
++	/*
++	 * For a btree shorter than the maximum height, the worst case is that
++	 * every level gets split and a new level is added, then while inserting
++	 * another entry to refill the AGFL, every level under the old root gets
++	 * split again. This is:
++	 *
++	 *   (full height split reservation) + (AGFL refill split height)
++	 * = (current height + 1) + (current height - 1)
++	 * = (new height) + (new height - 2)
++	 * = 2 * new height - 2
++	 *
++	 * For a btree of maximum height, the worst case is that every level
++	 * under the root gets split, then while inserting another entry to
++	 * refill the AGFL, every level under the root gets split again. This is
++	 * also:
++	 *
++	 *   2 * (current height - 1)
++	 * = 2 * (new height - 1)
++	 * = 2 * new height - 2
++	 */
++
+ 	/* space needed by-bno freespace btree */
+ 	min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
+-				       mp->m_alloc_maxlevels);
++				       mp->m_alloc_maxlevels) * 2 - 2;
+ 	/* space needed by-size freespace btree */
+ 	min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
+-				       mp->m_alloc_maxlevels);
++				       mp->m_alloc_maxlevels) * 2 - 2;
+ 	/* space needed reverse mapping used space btree */
+ 	if (xfs_has_rmapbt(mp))
+ 		min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
+-						mp->m_rmap_maxlevels);
++						mp->m_rmap_maxlevels) * 2 - 2;
+ 
+ 	return min_free;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch b/queue-6.6/xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch
new file mode 100644
index 00000000000..d7adc41daa6
--- /dev/null
+++ b/queue-6.6/xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch
@@ -0,0 +1,43 @@
+From c38e81be2bc8b64c942cba29de80ecc0c44af64d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:39 -0800
+Subject: xfs: fix units conversion error in xfs_bmap_del_extent_delay
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit ddd98076d5c075c8a6c49d9e6e8ee12844137f23 upstream.
+
+The unit conversions in this function do not make sense.  First we
+convert a block count to bytes, then divide that bytes value by
+rextsize, which is in blocks, to get an rt extent count.  You can't
+divide bytes by blocks to get a (possibly multiblock) extent value.
+
+Fortunately nobody uses delalloc on the rt volume so this hasn't
+mattered.
+
+Fixes: fa5c836ca8eb5 ("xfs: refactor xfs_bunmapi_cow")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
+index 26bfa34b4bbf..617cc7e78e38 100644
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4827,7 +4827,7 @@ xfs_bmap_del_extent_delay(
+ 	ASSERT(got_endoff >= del_endoff);
+ 
+ 	if (isrt) {
+-		uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
++		uint64_t	rtexts = del->br_blockcount;
+ 
+ 		do_div(rtexts, mp->m_sb.sb_rextsize);
+ 		xfs_mod_frextents(mp, rtexts);
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch b/queue-6.6/xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch
new file mode 100644
index 00000000000..5cfd6ef895e
--- /dev/null
+++ b/queue-6.6/xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch
@@ -0,0 +1,100 @@
+From b126c3855bd639237cc1e848c082f3499c425bfa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:42 -0800
+Subject: xfs: handle nimaps=0 from xfs_bmapi_write in xfs_alloc_file_space
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 35dc55b9e80cb9ec4bcb969302000b002b2ed850 upstream.
+
+If xfs_bmapi_write finds a delalloc extent at the requested range, it
+tries to convert the entire delalloc extent to a real allocation.
+
+But if the allocator cannot find a single free extent large enough to
+cover the start block of the requested range, xfs_bmapi_write will
+return 0 but leave *nimaps set to 0.
+
+In that case we simply need to keep looping with the same startoffset_fsb
+so that one of the following allocations will eventually reach the
+requested range.
+
+Note that this could affect any caller of xfs_bmapi_write that covers
+an existing delayed allocation.  As far as I can tell we do not have
+any other such caller, though - the regular writeback path uses
+xfs_bmapi_convert_delalloc to convert delayed allocations to real ones,
+and direct I/O invalidates the page cache first.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_bmap_util.c | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
+index fcefab687285..ad4aba5002c1 100644
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -780,12 +780,10 @@ xfs_alloc_file_space(
+ {
+ 	xfs_mount_t		*mp = ip->i_mount;
+ 	xfs_off_t		count;
+-	xfs_filblks_t		allocated_fsb;
+ 	xfs_filblks_t		allocatesize_fsb;
+ 	xfs_extlen_t		extsz, temp;
+ 	xfs_fileoff_t		startoffset_fsb;
+ 	xfs_fileoff_t		endoffset_fsb;
+-	int			nimaps;
+ 	int			rt;
+ 	xfs_trans_t		*tp;
+ 	xfs_bmbt_irec_t		imaps[1], *imapp;
+@@ -808,7 +806,6 @@ xfs_alloc_file_space(
+ 
+ 	count = len;
+ 	imapp = &imaps[0];
+-	nimaps = 1;
+ 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
+ 	endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
+ 	allocatesize_fsb = endoffset_fsb - startoffset_fsb;
+@@ -819,6 +816,7 @@ xfs_alloc_file_space(
+ 	while (allocatesize_fsb && !error) {
+ 		xfs_fileoff_t	s, e;
+ 		unsigned int	dblocks, rblocks, resblks;
++		int		nimaps = 1;
+ 
+ 		/*
+ 		 * Determine space reservations for data/realtime.
+@@ -884,15 +882,19 @@ xfs_alloc_file_space(
+ 		if (error)
+ 			break;
+ 
+-		allocated_fsb = imapp->br_blockcount;
+-
+-		if (nimaps == 0) {
+-			error = -ENOSPC;
+-			break;
++		/*
++		 * If the allocator cannot find a single free extent large
++		 * enough to cover the start block of the requested range,
++		 * xfs_bmapi_write will return 0 but leave *nimaps set to 0.
++		 *
++		 * In that case we simply need to keep looping with the same
++		 * startoffset_fsb so that one of the following allocations
++		 * will eventually reach the requested range.
++		 */
++		if (nimaps) {
++			startoffset_fsb += imapp->br_blockcount;
++			allocatesize_fsb -= imapp->br_blockcount;
+ 		}
+-
+-		startoffset_fsb += allocated_fsb;
+-		allocatesize_fsb -= allocated_fsb;
+ 	}
+ 
+ 	return error;
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch b/queue-6.6/xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch
new file mode 100644
index 00000000000..e5b276b16a7
--- /dev/null
+++ b/queue-6.6/xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch
@@ -0,0 +1,140 @@
+From d4aa355a46f147a2f392f925bd6b037dac0f1117 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:36 -0800
+Subject: xfs: hoist freeing of rt data fork extent mappings
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit 6c664484337b37fa0cf6e958f4019623e30d40f7 upstream.
+
+Currently, xfs_bmap_del_extent_real contains a bunch of code to convert
+the physical extent of a data fork mapping for a realtime file into rt
+extents and pass that to the rt extent freeing function.  Since the
+details of this aren't needed when CONFIG_XFS_REALTIME=n, move it to
+xfs_rtbitmap.c to reduce code size when realtime isn't enabled.
+
+This will (one day) enable realtime EFIs to reuse the same
+unit-converting call with less code duplication.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c     | 19 +++----------------
+ fs/xfs/libxfs/xfs_rtbitmap.c | 33 +++++++++++++++++++++++++++++++++
+ fs/xfs/xfs_rtalloc.h         |  5 +++++
+ 3 files changed, 41 insertions(+), 16 deletions(-)
+
+diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
+index 30c931b38853..26bfa34b4bbf 100644
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -5057,33 +5057,20 @@ xfs_bmap_del_extent_real(
+ 
+ 	flags = XFS_ILOG_CORE;
+ 	if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
+-		xfs_filblks_t	len;
+-		xfs_extlen_t	mod;
+-
+-		len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
+-				  &mod);
+-		ASSERT(mod == 0);
+-
+ 		if (!(bflags & XFS_BMAPI_REMAP)) {
+-			xfs_fsblock_t	bno;
+-
+-			bno = div_u64_rem(del->br_startblock,
+-					mp->m_sb.sb_rextsize, &mod);
+-			ASSERT(mod == 0);
+-
+-			error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
++			error = xfs_rtfree_blocks(tp, del->br_startblock,
++					del->br_blockcount);
+ 			if (error)
+ 				goto done;
+ 		}
+ 
+ 		do_fx = 0;
+-		nblks = len * mp->m_sb.sb_rextsize;
+ 		qfield = XFS_TRANS_DQ_RTBCOUNT;
+ 	} else {
+ 		do_fx = 1;
+-		nblks = del->br_blockcount;
+ 		qfield = XFS_TRANS_DQ_BCOUNT;
+ 	}
++	nblks = del->br_blockcount;
+ 
+ 	del_endblock = del->br_startblock + del->br_blockcount;
+ 	if (cur) {
+diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
+index fa180ab66b73..655108a4cd05 100644
+--- a/fs/xfs/libxfs/xfs_rtbitmap.c
++++ b/fs/xfs/libxfs/xfs_rtbitmap.c
+@@ -1005,6 +1005,39 @@ xfs_rtfree_extent(
+ 	return 0;
+ }
+ 
++/*
++ * Free some blocks in the realtime subvolume.  rtbno and rtlen are in units of
++ * rt blocks, not rt extents; must be aligned to the rt extent size; and rtlen
++ * cannot exceed XFS_MAX_BMBT_EXTLEN.
++ */
++int
++xfs_rtfree_blocks(
++	struct xfs_trans	*tp,
++	xfs_fsblock_t		rtbno,
++	xfs_filblks_t		rtlen)
++{
++	struct xfs_mount	*mp = tp->t_mountp;
++	xfs_rtblock_t		bno;
++	xfs_filblks_t		len;
++	xfs_extlen_t		mod;
++
++	ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN);
++
++	len = div_u64_rem(rtlen, mp->m_sb.sb_rextsize, &mod);
++	if (mod) {
++		ASSERT(mod == 0);
++		return -EIO;
++	}
++
++	bno = div_u64_rem(rtbno, mp->m_sb.sb_rextsize, &mod);
++	if (mod) {
++		ASSERT(mod == 0);
++		return -EIO;
++	}
++
++	return xfs_rtfree_extent(tp, bno, len);
++}
++
+ /* Find all the free records within a given range. */
+ int
+ xfs_rtalloc_query_range(
+diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
+index 62c7ad79cbb6..3b2f1b499a11 100644
+--- a/fs/xfs/xfs_rtalloc.h
++++ b/fs/xfs/xfs_rtalloc.h
+@@ -58,6 +58,10 @@ xfs_rtfree_extent(
+ 	xfs_rtblock_t		bno,	/* starting block number to free */
+ 	xfs_extlen_t		len);	/* length of extent freed */
+ 
++/* Same as above, but in units of rt blocks. */
++int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
++		xfs_filblks_t rtlen);
++
+ /*
+  * Initialize realtime fields in the mount structure.
+  */
+@@ -139,6 +143,7 @@ int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
+ #else
+ # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb)    (ENOSYS)
+ # define xfs_rtfree_extent(t,b,l)                       (ENOSYS)
++# define xfs_rtfree_blocks(t,rb,rl)			(ENOSYS)
+ # define xfs_rtpick_extent(m,t,l,rb)                    (ENOSYS)
+ # define xfs_growfs_rt(mp,in)                           (ENOSYS)
+ # define xfs_rtalloc_query_range(t,l,h,f,p)             (ENOSYS)
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-inode-recovery-does-not-validate-the-recovered-i.patch b/queue-6.6/xfs-inode-recovery-does-not-validate-the-recovered-i.patch
new file mode 100644
index 00000000000..570765bed2e
--- /dev/null
+++ b/queue-6.6/xfs-inode-recovery-does-not-validate-the-recovered-i.patch
@@ -0,0 +1,77 @@
+From b10f08cb42bde45627cccd214db68c83b3c7cbf6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:50 -0800
+Subject: xfs: inode recovery does not validate the recovered inode
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 038ca189c0d2c1570b4d922f25b524007c85cf94 upstream.
+
+Discovered when trying to track down a weird recovery corruption
+issue that wasn't detected at recovery time.
+
+The specific corruption was a zero extent count field when big
+extent counts are in use, and it turns out the dinode verifier
+doesn't detect that specific corruption case, either. So fix it too.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/libxfs/xfs_inode_buf.c   |  3 +++
+ fs/xfs/xfs_inode_item_recover.c | 14 +++++++++++++-
+ 2 files changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
+index a35781577cad..0f970a0b3382 100644
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -508,6 +508,9 @@ xfs_dinode_verify(
+ 	if (mode && nextents + naextents > nblocks)
+ 		return __this_address;
+ 
++	if (nextents + naextents == 0 && nblocks != 0)
++		return __this_address;
++
+ 	if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
+ 		return __this_address;
+ 
+diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
+index e6609067ef26..144198a6b270 100644
+--- a/fs/xfs/xfs_inode_item_recover.c
++++ b/fs/xfs/xfs_inode_item_recover.c
+@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2(
+ 	struct xfs_log_dinode		*ldip;
+ 	uint				isize;
+ 	int				need_free = 0;
++	xfs_failaddr_t			fa;
+ 
+ 	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
+ 		in_f = item->ri_buf[0].i_addr;
+@@ -530,8 +531,19 @@ xlog_recover_inode_commit_pass2(
+ 	    (dip->di_mode != 0))
+ 		error = xfs_recover_inode_owner_change(mp, dip, in_f,
+ 						       buffer_list);
+-	/* re-generate the checksum. */
++	/* re-generate the checksum and validate the recovered inode. */
+ 	xfs_dinode_calc_crc(log->l_mp, dip);
++	fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
++	if (fa) {
++		XFS_CORRUPTION_ERROR(
++			"Bad dinode after recovery",
++				XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
++		xfs_alert(mp,
++			"Metadata corruption detected at %pS, inode 0x%llx",
++			fa, in_f->ilf_ino);
++		error = -EFSCORRUPTED;
++		goto out_release;
++	}
+ 
+ 	ASSERT(bp->b_mount == mp);
+ 	bp->b_flags |= _XBF_LOGRECOVERY;
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-introduce-protection-for-drop-nlink.patch b/queue-6.6/xfs-introduce-protection-for-drop-nlink.patch
new file mode 100644
index 00000000000..7c94f56b214
--- /dev/null
+++ b/queue-6.6/xfs-introduce-protection-for-drop-nlink.patch
@@ -0,0 +1,43 @@
+From 0554272f8e9dd7e5a94b88a96446cf4e6c8c83fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:41 -0800
+Subject: xfs: introduce protection for drop nlink
+
+From: Cheng Lin <cheng.lin130@zte.com.cn>
+
+commit 2b99e410b28f5a75ae417e6389e767c7745d6fce upstream.
+
+When abnormal drop_nlink are detected on the inode,
+return error, to avoid corruption propagation.
+
+Signed-off-by: Cheng Lin <cheng.lin130@zte.com.cn>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_inode.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
+index 4d55f58d99b7..fb85c5c81745 100644
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -918,6 +918,13 @@ xfs_droplink(
+ 	xfs_trans_t *tp,
+ 	xfs_inode_t *ip)
+ {
++	if (VFS_I(ip)->i_nlink == 0) {
++		xfs_alert(ip->i_mount,
++			  "%s: Attempt to drop inode (%llu) with nlink zero.",
++			  __func__, ip->i_ino);
++		return -EFSCORRUPTED;
++	}
++
+ 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+ 
+ 	drop_nlink(VFS_I(ip));
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch b/queue-6.6/xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch
new file mode 100644
index 00000000000..a015379eeff
--- /dev/null
+++ b/queue-6.6/xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch
@@ -0,0 +1,117 @@
+From 8401165f28dd5709d239433e47b26d6bafcbddfc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:40 -0800
+Subject: xfs: make sure maxlen is still congruent with prod when rounding down
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit f6a2dae2a1f52ea23f649c02615d073beba4cc35 upstream.
+
+In commit 2a6ca4baed62, we tried to fix an overflow problem in the
+realtime allocator that was caused by an overly large maxlen value
+causing xfs_rtcheck_range to run off the end of the realtime bitmap.
+Unfortunately, there is a subtle bug here -- maxlen (and minlen) both
+have to be aligned with @prod, but @prod can be larger than 1 if the
+user has set an extent size hint on the file, and that extent size hint
+is larger than the realtime extent size.
+
+If the rt free space extents are not aligned to this file's extszhint
+because other files without extent size hints allocated space (or the
+number of rt extents is similarly not aligned), then it's possible that
+maxlen after clamping to sb_rextents will no longer be aligned to prod.
+The allocation will succeed just fine, but we still trip the assertion.
+
+Fix the problem by reducing maxlen by any misalignment with prod.  While
+we're at it, split the assertions into two so that we can tell which
+value had the bad alignment.
+
+Fixes: 2a6ca4baed62 ("xfs: make sure the rt allocator doesn't run off the end")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_rtalloc.c | 31 ++++++++++++++++++++++++++-----
+ 1 file changed, 26 insertions(+), 5 deletions(-)
+
+diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
+index 31fd65b3aaa9..0e4e2df08aed 100644
+--- a/fs/xfs/xfs_rtalloc.c
++++ b/fs/xfs/xfs_rtalloc.c
+@@ -211,6 +211,23 @@ xfs_rtallocate_range(
+ 	return error;
+ }
+ 
++/*
++ * Make sure we don't run off the end of the rt volume.  Be careful that
++ * adjusting maxlen downwards doesn't cause us to fail the alignment checks.
++ */
++static inline xfs_extlen_t
++xfs_rtallocate_clamp_len(
++	struct xfs_mount	*mp,
++	xfs_rtblock_t		startrtx,
++	xfs_extlen_t		rtxlen,
++	xfs_extlen_t		prod)
++{
++	xfs_extlen_t		ret;
++
++	ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx;
++	return rounddown(ret, prod);
++}
++
+ /*
+  * Attempt to allocate an extent minlen<=len<=maxlen starting from
+  * bitmap block bbno.  If we don't get maxlen then use prod to trim
+@@ -248,7 +265,7 @@ xfs_rtallocate_extent_block(
+ 	     i <= end;
+ 	     i++) {
+ 		/* Make sure we don't scan off the end of the rt volume. */
+-		maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
++		maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod);
+ 
+ 		/*
+ 		 * See if there's a free extent of maxlen starting at i.
+@@ -355,7 +372,8 @@ xfs_rtallocate_extent_exact(
+ 	int		isfree;		/* extent is free */
+ 	xfs_rtblock_t	next;		/* next block to try (dummy) */
+ 
+-	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
++	ASSERT(minlen % prod == 0);
++	ASSERT(maxlen % prod == 0);
+ 	/*
+ 	 * Check if the range in question (for maxlen) is free.
+ 	 */
+@@ -438,7 +456,9 @@ xfs_rtallocate_extent_near(
+ 	xfs_rtblock_t	n;		/* next block to try */
+ 	xfs_rtblock_t	r;		/* result block */
+ 
+-	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
++	ASSERT(minlen % prod == 0);
++	ASSERT(maxlen % prod == 0);
++
+ 	/*
+ 	 * If the block number given is off the end, silently set it to
+ 	 * the last block.
+@@ -447,7 +467,7 @@ xfs_rtallocate_extent_near(
+ 		bno = mp->m_sb.sb_rextents - 1;
+ 
+ 	/* Make sure we don't run off the end of the rt volume. */
+-	maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
++	maxlen = xfs_rtallocate_clamp_len(mp, bno, maxlen, prod);
+ 	if (maxlen < minlen) {
+ 		*rtblock = NULLRTBLOCK;
+ 		return 0;
+@@ -638,7 +658,8 @@ xfs_rtallocate_extent_size(
+ 	xfs_rtblock_t	r;		/* result block number */
+ 	xfs_suminfo_t	sum;		/* summary information for extents */
+ 
+-	ASSERT(minlen % prod == 0 && maxlen % prod == 0);
++	ASSERT(minlen % prod == 0);
++	ASSERT(maxlen % prod == 0);
+ 	ASSERT(maxlen != 0);
+ 
+ 	/*
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch b/queue-6.6/xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch
new file mode 100644
index 00000000000..33b997ab081
--- /dev/null
+++ b/queue-6.6/xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch
@@ -0,0 +1,53 @@
+From 88759c80d3e673e75ed46cd283b090db16d0564a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:46 -0800
+Subject: xfs: only remap the written blocks in xfs_reflink_end_cow_extent
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 55f669f34184ecb25b8353f29c7f6f1ae5b313d1 upstream.
+
+xfs_reflink_end_cow_extent looks up the COW extent and the data fork
+extent at offset_fsb, and then proceeds to remap the common subset
+between the two.
+
+It does however not limit the remapped extent to the passed in
+[*offset_fsbm end_fsb] range and thus potentially remaps more blocks than
+the one handled by the current I/O completion.  This means that with
+sufficiently large data and COW extents we could be remapping COW fork
+mappings that have not been written to, leading to a stale data exposure
+on a powerfail event.
+
+We use to have a xfs_trim_range to make the remap fit the I/O completion
+range, but that got (apparently accidentally) removed in commit
+df2fd88f8ac7 ("xfs: rewrite xfs_reflink_end_cow to use intents").
+
+Note that I've only found this by code inspection, and a test case would
+probably require very specific delay and error injection.
+
+Fixes: df2fd88f8ac7 ("xfs: rewrite xfs_reflink_end_cow to use intents")
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_reflink.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
+index 658edee8381d..e5b62dc28466 100644
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent(
+ 		}
+ 	}
+ 	del = got;
++	xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
+ 
+ 	/* Grab the corresponding mapping in the data fork. */
+ 	nmaps = 1;
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-prevent-rt-growfs-when-quota-is-enabled.patch b/queue-6.6/xfs-prevent-rt-growfs-when-quota-is-enabled.patch
new file mode 100644
index 00000000000..ab85faf0383
--- /dev/null
+++ b/queue-6.6/xfs-prevent-rt-growfs-when-quota-is-enabled.patch
@@ -0,0 +1,42 @@
+From 6c15f39f53a79bf881765aba6bbba165bc2b8318 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:37 -0800
+Subject: xfs: prevent rt growfs when quota is enabled
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit b73494fa9a304ab95b59f07845e8d7d36e4d23e0 upstream.
+
+Quotas aren't (yet) supported with realtime, so we shouldn't allow
+userspace to set up a realtime section when quotas are enabled, even if
+they attached one via mount options.  IOWS, you shouldn't be able to do:
+
+# mkfs.xfs -f /dev/sda
+# mount /dev/sda /mnt -o rtdev=/dev/sdb,usrquota
+# xfs_growfs -r /mnt
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_rtalloc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
+index 16534e9873f6..31fd65b3aaa9 100644
+--- a/fs/xfs/xfs_rtalloc.c
++++ b/fs/xfs/xfs_rtalloc.c
+@@ -954,7 +954,7 @@ xfs_growfs_rt(
+ 		return -EINVAL;
+ 
+ 	/* Unsupported realtime features. */
+-	if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))
++	if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp))
+ 		return -EOPNOTSUPP;
+ 
+ 	nrblocks = in->newblocks;
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-respect-the-stable-writes-flag-on-the-rt-device.patch b/queue-6.6/xfs-respect-the-stable-writes-flag-on-the-rt-device.patch
new file mode 100644
index 00000000000..1e3973dff86
--- /dev/null
+++ b/queue-6.6/xfs-respect-the-stable-writes-flag-on-the-rt-device.patch
@@ -0,0 +1,84 @@
+From f70972235a9432ab34b98435eb6433ba3a3c2437 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:54 -0800
+Subject: xfs: respect the stable writes flag on the RT device
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 9c04138414c00ae61421f36ada002712c4bac94a upstream.
+
+Update the per-folio stable writes flag dependening on which device an
+inode resides on.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20231025141020.192413-5-hch@lst.de
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_inode.h | 8 ++++++++
+ fs/xfs/xfs_ioctl.c | 8 ++++++++
+ fs/xfs/xfs_iops.c  | 7 +++++++
+ 3 files changed, 23 insertions(+)
+
+diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
+index 3dc47937da5d..3beb470f1892 100644
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -569,6 +569,14 @@ extern void xfs_setup_inode(struct xfs_inode *ip);
+ extern void xfs_setup_iops(struct xfs_inode *ip);
+ extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
+ 
++static inline void xfs_update_stable_writes(struct xfs_inode *ip)
++{
++	if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev))
++		mapping_set_stable_writes(VFS_I(ip)->i_mapping);
++	else
++		mapping_clear_stable_writes(VFS_I(ip)->i_mapping);
++}
++
+ /*
+  * When setting up a newly allocated inode, we need to call
+  * xfs_finish_inode_setup() once the inode is fully instantiated at
+diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
+index be69e7be713e..535f6d38cdb5 100644
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -1149,6 +1149,14 @@ xfs_ioctl_setattr_xflags(
+ 	ip->i_diflags2 = i_flags2;
+ 
+ 	xfs_diflags_to_iflags(ip, false);
++
++	/*
++	 * Make the stable writes flag match that of the device the inode
++	 * resides on when flipping the RT flag.
++	 */
++	if (rtflag != XFS_IS_REALTIME_INODE(ip) && S_ISREG(VFS_I(ip)->i_mode))
++		xfs_update_stable_writes(ip);
++
+ 	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+ 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ 	XFS_STATS_INC(mp, xs_ig_attrchg);
+diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
+index 2b3b05c28e9e..b8ec045708c3 100644
+--- a/fs/xfs/xfs_iops.c
++++ b/fs/xfs/xfs_iops.c
+@@ -1298,6 +1298,13 @@ xfs_setup_inode(
+ 	gfp_mask = mapping_gfp_mask(inode->i_mapping);
+ 	mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));
+ 
++	/*
++	 * For real-time inodes update the stable write flags to that of the RT
++	 * device instead of the data device.
++	 */
++	if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip))
++		xfs_update_stable_writes(ip);
++
+ 	/*
+ 	 * If there is no attribute fork no ACL can exist on this inode,
+ 	 * and it can't have any file capabilities attached to it either.
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch b/queue-6.6/xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch
new file mode 100644
index 00000000000..686151568e4
--- /dev/null
+++ b/queue-6.6/xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch
@@ -0,0 +1,67 @@
+From e75c773a18b424a062d93af81ad3c4dd92fd7b78 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:38 -0800
+Subject: xfs: rt stubs should return negative errnos when rt disabled
+
+From: Darrick J. Wong <djwong@kernel.org>
+
+commit c2988eb5cff75c02bc57e02c323154aa08f55b78 upstream.
+
+When realtime support is not compiled into the kernel, these functions
+should return negative errnos, not positive errnos.  While we're at it,
+fix a broken macro declaration.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_rtalloc.h | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
+index 3b2f1b499a11..65c284e9d33e 100644
+--- a/fs/xfs/xfs_rtalloc.h
++++ b/fs/xfs/xfs_rtalloc.h
+@@ -141,17 +141,17 @@ int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
+ 			       bool *is_free);
+ int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
+ #else
+-# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb)    (ENOSYS)
+-# define xfs_rtfree_extent(t,b,l)                       (ENOSYS)
+-# define xfs_rtfree_blocks(t,rb,rl)			(ENOSYS)
+-# define xfs_rtpick_extent(m,t,l,rb)                    (ENOSYS)
+-# define xfs_growfs_rt(mp,in)                           (ENOSYS)
+-# define xfs_rtalloc_query_range(t,l,h,f,p)             (ENOSYS)
+-# define xfs_rtalloc_query_all(m,t,f,p)                 (ENOSYS)
+-# define xfs_rtbuf_get(m,t,b,i,p)                       (ENOSYS)
+-# define xfs_verify_rtbno(m, r)			(false)
+-# define xfs_rtalloc_extent_is_free(m,t,s,l,i)          (ENOSYS)
+-# define xfs_rtalloc_reinit_frextents(m)                (0)
++# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb)	(-ENOSYS)
++# define xfs_rtfree_extent(t,b,l)			(-ENOSYS)
++# define xfs_rtfree_blocks(t,rb,rl)			(-ENOSYS)
++# define xfs_rtpick_extent(m,t,l,rb)			(-ENOSYS)
++# define xfs_growfs_rt(mp,in)				(-ENOSYS)
++# define xfs_rtalloc_query_range(m,t,l,h,f,p)		(-ENOSYS)
++# define xfs_rtalloc_query_all(m,t,f,p)			(-ENOSYS)
++# define xfs_rtbuf_get(m,t,b,i,p)			(-ENOSYS)
++# define xfs_verify_rtbno(m, r)				(false)
++# define xfs_rtalloc_extent_is_free(m,t,s,l,i)		(-ENOSYS)
++# define xfs_rtalloc_reinit_frextents(m)		(0)
+ static inline int		/* error */
+ xfs_rtmount_init(
+ 	xfs_mount_t	*mp)	/* file system mount structure */
+@@ -162,7 +162,7 @@ xfs_rtmount_init(
+ 	xfs_warn(mp, "Not built with CONFIG_XFS_RT");
+ 	return -ENOSYS;
+ }
+-# define xfs_rtmount_inodes(m)  (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
++# define xfs_rtmount_inodes(m)  (((mp)->m_sb.sb_rblocks == 0)? 0 : (-ENOSYS))
+ # define xfs_rtunmount_inodes(m)
+ #endif	/* CONFIG_XFS_RT */
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.6/xfs-up-ic_sema-if-flushing-data-device-fails.patch b/queue-6.6/xfs-up-ic_sema-if-flushing-data-device-fails.patch
new file mode 100644
index 00000000000..60fdbe10281
--- /dev/null
+++ b/queue-6.6/xfs-up-ic_sema-if-flushing-data-device-fails.patch
@@ -0,0 +1,102 @@
+From 69c011d2a20e37461246216ade0501eff863e090 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Feb 2024 15:20:47 -0800
+Subject: xfs: up(ic_sema) if flushing data device fails
+
+From: Leah Rumancik <leah.rumancik@gmail.com>
+
+commit 471de20303dda0b67981e06d59cc6c4a83fd2a3c upstream.
+
+We flush the data device cache before we issue external log IO. If
+the flush fails, we shut down the log immediately and return. However,
+the iclog->ic_sema is left in a decremented state so let's add an up().
+Prior to this patch, xfs/438 would fail consistently when running with
+an external log device:
+
+sync
+  -> xfs_log_force
+  -> xlog_write_iclog
+      -> down(&iclog->ic_sema)
+      -> blkdev_issue_flush (fail causes us to intiate shutdown)
+          -> xlog_force_shutdown
+          -> return
+
+unmount
+  -> xfs_log_umount
+      -> xlog_wait_iclog_completion
+          -> down(&iclog->ic_sema) --------> HANG
+
+There is a second early return / shutdown. Make sure the up() happens
+for it as well. Also make sure we cleanup the iclog state,
+xlog_state_done_syncing, before dropping the iclog lock.
+
+Fixes: b5d721eaae47 ("xfs: external logs need to flush data device")
+Fixes: 842a42d126b4 ("xfs: shutdown on failure to add page to log bio")
+Fixes: 7d839e325af2 ("xfs: check return codes when flushing block devices")
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_log.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
+index 51c100c86177..ee206facf0dc 100644
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -1893,9 +1893,7 @@ xlog_write_iclog(
+ 		 * the buffer manually, the code needs to be kept in sync
+ 		 * with the I/O completion path.
+ 		 */
+-		xlog_state_done_syncing(iclog);
+-		up(&iclog->ic_sema);
+-		return;
++		goto sync;
+ 	}
+ 
+ 	/*
+@@ -1925,20 +1923,17 @@ xlog_write_iclog(
+ 		 * avoid shutdown re-entering this path and erroring out again.
+ 		 */
+ 		if (log->l_targ != log->l_mp->m_ddev_targp &&
+-		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
+-			xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+-			return;
+-		}
++		    blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
++			goto shutdown;
+ 	}
+ 	if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
+ 		iclog->ic_bio.bi_opf |= REQ_FUA;
+ 
+ 	iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
+ 
+-	if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
+-		xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+-		return;
+-	}
++	if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
++		goto shutdown;
++
+ 	if (is_vmalloc_addr(iclog->ic_data))
+ 		flush_kernel_vmap_range(iclog->ic_data, count);
+ 
+@@ -1959,6 +1954,12 @@ xlog_write_iclog(
+ 	}
+ 
+ 	submit_bio(&iclog->ic_bio);
++	return;
++shutdown:
++	xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
++sync:
++	xlog_state_done_syncing(iclog);
++	up(&iclog->ic_sema);
+ }
+ 
+ /*
+-- 
+2.43.0
+
-- 
2.47.3