io_uring-get-rid-of-remap_pfn_range-for-mapping-rings-sqes.patch
io_uring-don-t-attempt-to-mmap-larger-than-what-the-user-asks-for.patch
io_uring-fix-corner-case-forgetting-to-vunmap.patch
+xfs-pass-refcount-intent-directly-through-the-log-intent-code.patch
+xfs-pass-xfs_extent_free_item-directly-through-the-log-intent-code.patch
+xfs-fix-confusing-xfs_extent_item-variable-names.patch
+xfs-pass-the-xfs_bmbt_irec-directly-through-the-log-intent-code.patch
+xfs-pass-per-ag-references-to-xfs_free_extent.patch
+xfs-validate-block-number-being-freed-before-adding-to-xefi.patch
+xfs-fix-bounds-check-in-xfs_defer_agfl_block.patch
+xfs-use-deferred-frees-for-btree-block-freeing.patch
+xfs-reserve-less-log-space-when-recovering-log-intent-items.patch
+xfs-move-the-xfs_rtbitmap.c-declarations-to-xfs_rtbitmap.h.patch
+xfs-convert-rt-bitmap-extent-lengths-to-xfs_rtbxlen_t.patch
+xfs-consider-minlen-sized-extents-in-xfs_rtallocate_extent_block.patch
+xfs-don-t-leak-recovered-attri-intent-items.patch
+xfs-use-xfs_defer_pending-objects-to-recover-intent-items.patch
+xfs-pass-the-xfs_defer_pending-object-to-iop_recover.patch
+xfs-transfer-recovered-intent-item-ownership-in-iop_recover.patch
+xfs-make-rextslog-computation-consistent-with-mkfs.patch
+xfs-fix-32-bit-truncation-in-xfs_compute_rextslog.patch
+xfs-don-t-allow-overly-small-or-large-realtime-volumes.patch
+xfs-remove-unused-fields-from-struct-xbtree_ifakeroot.patch
+xfs-recompute-growfsrtfree-transaction-reservation-while-growing-rt-volume.patch
+xfs-force-all-buffers-to-be-written-during-btree-bulk-load.patch
+xfs-initialise-di_crc-in-xfs_log_dinode.patch
+xfs-add-lock-protection-when-remove-perag-from-radix-tree.patch
+xfs-fix-perag-leak-when-growfs-fails.patch
+xfs-ensure-logflagsp-is-initialized-in-xfs_bmap_del_extent_real.patch
+xfs-update-dir3-leaf-block-metadata-after-swap.patch
+xfs-reset-xfs_attr_incomplete-filter-on-node-removal.patch
+xfs-remove-conditional-building-of-rt-geometry-validator-functions.patch
--- /dev/null
+From stable+bounces-124381-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:45 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:44 -0700
+Subject: xfs: add lock protection when remove perag from radix tree
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Long Li <leo.lilong@huawei.com>, Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Chandan Babu R <chandanbabu@kernel.org>, Catherine Hoang <catherine.hoang@oracle.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-25-leah.rumancik@gmail.com>
+
+From: Long Li <leo.lilong@huawei.com>
+
+[ Upstream commit 07afd3173d0c6d24a47441839a835955ec6cf0d4 ]
+
+[ 6.1: resolved conflict in xfs_ag.c ]
+
+Take mp->m_perag_lock for deletions from the perag radix tree in
+xfs_initialize_perag to prevent racing with tagging operations.
+Lookups are fine - they are RCU protected so already deal with the
+tree changing shape underneath the lookup - but tagging operations
+require the tree to be stable while the tags are propagated back up
+to the root.
+
+Right now there's nothing stopping radix tree tagging from operating
+while a growfs operation is progress and adding/removing new entries
+into the radix tree.
+
+Hence we can have traversals that require a stable tree occurring at
+the same time we are removing unused entries from the radix tree which
+causes the shape of the tree to change.
+
+Likely this hasn't caused a problem in the past because we are only
+doing append addition and removal so the active AG part of the tree
+is not changing shape, but that doesn't mean it is safe. Just making
+the radix tree modifications serialise against each other is obviously
+correct.
+
+Signed-off-by: Long Li <leo.lilong@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -345,13 +345,17 @@ xfs_initialize_perag(
+ return 0;
+
+ out_remove_pag:
++ spin_lock(&mp->m_perag_lock);
+ radix_tree_delete(&mp->m_perag_tree, index);
++ spin_unlock(&mp->m_perag_lock);
+ out_free_pag:
+ kmem_free(pag);
+ out_unwind_new_pags:
+ /* unwind any prior newly initialized pags */
+ for (index = first_initialised; index < agcount; index++) {
++ spin_lock(&mp->m_perag_lock);
+ pag = radix_tree_delete(&mp->m_perag_tree, index);
++ spin_unlock(&mp->m_perag_lock);
+ if (!pag)
+ break;
+ xfs_buf_hash_destroy(pag);
--- /dev/null
+From stable+bounces-124369-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:27 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:32 -0700
+Subject: xfs: consider minlen sized extents in xfs_rtallocate_extent_block
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Chandan Babu R <chandanbabu@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-13-leah.rumancik@gmail.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 944df75958807d56f2db9fdc769eb15dd9f0366a ]
+
+minlen is the lower bound on the extent length that the caller can
+accept, and maxlen is at this point the maximal available length.
+This means a minlen extent is perfectly fine to use, so do it. This
+matches the equivalent logic in xfs_rtallocate_extent_exact that also
+accepts a minlen sized extent.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_rtalloc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_rtalloc.c
++++ b/fs/xfs/xfs_rtalloc.c
+@@ -318,7 +318,7 @@ xfs_rtallocate_extent_block(
+ /*
+ * Searched the whole thing & didn't find a maxlen free extent.
+ */
+- if (minlen < maxlen && besti != -1) {
++ if (minlen <= maxlen && besti != -1) {
+ xfs_extlen_t p; /* amount to trim length by */
+
+ /*
--- /dev/null
+From stable+bounces-124368-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:27 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:31 -0700
+Subject: xfs: convert rt bitmap extent lengths to xfs_rtbxlen_t
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Catherine Hoang <catherine.hoang@oracle.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-12-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit f29c3e745dc253bf9d9d06ddc36af1a534ba1dd0 ]
+
+[ 6.1: excluded changes to trace.h as xchk_rtsum_record_free
+does not exist yet ]
+
+XFS uses xfs_rtblock_t for many different uses, which makes it much more
+difficult to perform a unit analysis on the codebase. One of these
+(ab)uses is when we need to store the length of a free space extent as
+stored in the realtime bitmap. Because there can be up to 2^64 realtime
+extents in a filesystem, we need a new type that is larger than
+xfs_rtxlen_t for callers that are querying the bitmap directly. This
+means scrub and growfs.
+
+Create this type as "xfs_rtbxlen_t" and use it to store 64-bit rtx
+lengths. 'b' stands for 'bitmap' or 'big'; reader's choice.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_format.h | 2 +-
+ fs/xfs/libxfs/xfs_rtbitmap.h | 2 +-
+ fs/xfs/libxfs/xfs_types.h | 1 +
+ 3 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_format.h
++++ b/fs/xfs/libxfs/xfs_format.h
+@@ -98,7 +98,7 @@ typedef struct xfs_sb {
+ uint32_t sb_blocksize; /* logical block size, bytes */
+ xfs_rfsblock_t sb_dblocks; /* number of data blocks */
+ xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */
+- xfs_rtblock_t sb_rextents; /* number of realtime extents */
++ xfs_rtbxlen_t sb_rextents; /* number of realtime extents */
+ uuid_t sb_uuid; /* user-visible file system unique id */
+ xfs_fsblock_t sb_logstart; /* starting block of log if internal */
+ xfs_ino_t sb_rootino; /* root inode number */
+--- a/fs/xfs/libxfs/xfs_rtbitmap.h
++++ b/fs/xfs/libxfs/xfs_rtbitmap.h
+@@ -13,7 +13,7 @@
+ */
+ struct xfs_rtalloc_rec {
+ xfs_rtblock_t ar_startext;
+- xfs_rtblock_t ar_extcount;
++ xfs_rtbxlen_t ar_extcount;
+ };
+
+ typedef int (*xfs_rtalloc_query_range_fn)(
+--- a/fs/xfs/libxfs/xfs_types.h
++++ b/fs/xfs/libxfs/xfs_types.h
+@@ -31,6 +31,7 @@ typedef uint64_t xfs_rfsblock_t; /* bloc
+ typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
+ typedef uint64_t xfs_fileoff_t; /* block number in a file */
+ typedef uint64_t xfs_filblks_t; /* number of blocks in a file */
++typedef uint64_t xfs_rtbxlen_t; /* rtbitmap extent length in rtextents */
+
+ typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
+
--- /dev/null
+From stable+bounces-124376-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:36 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:39 -0700
+Subject: xfs: don't allow overly small or large realtime volumes
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-20-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit e14293803f4e84eb23a417b462b56251033b5a66 ]
+
+Don't allow realtime volumes that are less than one rt extent long.
+This has been broken across 4 LTS kernels with nobody noticing, so let's
+just disable it.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_rtbitmap.h | 13 +++++++++++++
+ fs/xfs/libxfs/xfs_sb.c | 3 ++-
+ fs/xfs/xfs_rtalloc.c | 2 ++
+ 3 files changed, 17 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_rtbitmap.h
++++ b/fs/xfs/libxfs/xfs_rtbitmap.h
+@@ -73,6 +73,18 @@ int xfs_rtfree_blocks(struct xfs_trans *
+
+ uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
+
++/* Do we support an rt volume having this number of rtextents? */
++static inline bool
++xfs_validate_rtextents(
++ xfs_rtbxlen_t rtextents)
++{
++ /* No runt rt volumes */
++ if (rtextents == 0)
++ return false;
++
++ return true;
++}
++
+ #else /* CONFIG_XFS_RT */
+ # define xfs_rtfree_extent(t,b,l) (-ENOSYS)
+ # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
+@@ -81,6 +93,7 @@ uint8_t xfs_compute_rextslog(xfs_rtbxlen
+ # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS)
+ # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
+ # define xfs_compute_rextslog(rtx) (0)
++# define xfs_validate_rtextents(rtx) (false)
+ #endif /* CONFIG_XFS_RT */
+
+ #endif /* __XFS_RTBITMAP_H__ */
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -502,7 +502,8 @@ xfs_validate_sb_common(
+ rbmblocks = howmany_64(sbp->sb_rextents,
+ NBBY * sbp->sb_blocksize);
+
+- if (sbp->sb_rextents != rexts ||
++ if (!xfs_validate_rtextents(rexts) ||
++ sbp->sb_rextents != rexts ||
+ sbp->sb_rextslog != xfs_compute_rextslog(rexts) ||
+ sbp->sb_rbmblocks != rbmblocks) {
+ xfs_notice(mp,
+--- a/fs/xfs/xfs_rtalloc.c
++++ b/fs/xfs/xfs_rtalloc.c
+@@ -998,6 +998,8 @@ xfs_growfs_rt(
+ */
+ nrextents = nrblocks;
+ do_div(nrextents, in->extsize);
++ if (!xfs_validate_rtextents(nrextents))
++ return -EINVAL;
+ nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
+ nrextslog = xfs_compute_rextslog(nrextents);
+ nrsumlevels = nrextslog + 1;
--- /dev/null
+From stable+bounces-124370-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:28 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:33 -0700
+Subject: xfs: don't leak recovered attri intent items
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-14-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 07bcbdf020c9fd3c14bec51c50225a2a02707b94 ]
+
+If recovery finds an xattr log intent item calling for the removal of an
+attribute and the file doesn't even have an attr fork, we know that the
+removal is trivially complete. However, we can't just exit the recovery
+function without doing something about the recovered log intent item --
+it's still on the AIL, and not logging an attrd item means it stays
+there forever.
+
+This has likely not been seen in practice because few people use LARP
+and the runtime code won't log the attri for a no-attrfork removexattr
+operation. But let's fix this anyway.
+
+Also we shouldn't really be testing the attr fork presence until we've
+taken the ILOCK, though this doesn't matter much in recovery, which is
+single threaded.
+
+Fixes: fdaf1bb3cafc ("xfs: ATTR_REPLACE algorithm with LARP enabled needs rework")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_attr_item.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_attr_item.c
++++ b/fs/xfs/xfs_attr_item.c
+@@ -329,6 +329,13 @@ xfs_xattri_finish_update(
+ goto out;
+ }
+
++ /* If an attr removal is trivially complete, we're done. */
++ if (attr->xattri_op_flags == XFS_ATTRI_OP_FLAGS_REMOVE &&
++ !xfs_inode_hasattr(args->dp)) {
++ error = 0;
++ goto out;
++ }
++
+ error = xfs_attr_set_iter(attr);
+ if (!error && attr->xattri_dela_state != XFS_DAS_DONE)
+ error = -EAGAIN;
+@@ -608,8 +615,6 @@ xfs_attri_item_recover(
+ attr->xattri_dela_state = xfs_attr_init_add_state(args);
+ break;
+ case XFS_ATTRI_OP_FLAGS_REMOVE:
+- if (!xfs_inode_hasattr(args->dp))
+- goto out;
+ attr->xattri_dela_state = xfs_attr_init_remove_state(args);
+ break;
+ default:
--- /dev/null
+From stable+bounces-124383-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:47 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:46 -0700
+Subject: xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>, Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Chandan Babu R <chandanbabu@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-27-leah.rumancik@gmail.com>
+
+From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+
+[ Upstream commit e6af9c98cbf0164a619d95572136bfb54d482dd6 ]
+
+In the case of returning -ENOSPC, ensure logflagsp is initialized by 0.
+Otherwise the caller __xfs_bunmapi will set uninitialized illegal
+tmp_logflags value into xfs log, which might cause unpredictable error
+in the log recovery procedure.
+
+Also, remove the flags variable and set the *logflagsp directly, so that
+the code should be more robust in the long run.
+
+Fixes: 1b24b633aafe ("xfs: move some more code into xfs_bmap_del_extent_real")
+Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 73 +++++++++++++++++++----------------------------
+ 1 file changed, 31 insertions(+), 42 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4999,7 +4999,6 @@ xfs_bmap_del_extent_real(
+ xfs_fileoff_t del_endoff; /* first offset past del */
+ int do_fx; /* free extent at end of routine */
+ int error; /* error return value */
+- int flags = 0;/* inode logging flags */
+ struct xfs_bmbt_irec got; /* current extent entry */
+ xfs_fileoff_t got_endoff; /* first offset past got */
+ int i; /* temp state */
+@@ -5012,6 +5011,8 @@ xfs_bmap_del_extent_real(
+ uint32_t state = xfs_bmap_fork_to_state(whichfork);
+ struct xfs_bmbt_irec old;
+
++ *logflagsp = 0;
++
+ mp = ip->i_mount;
+ XFS_STATS_INC(mp, xs_del_exlist);
+
+@@ -5024,7 +5025,6 @@ xfs_bmap_del_extent_real(
+ ASSERT(got_endoff >= del_endoff);
+ ASSERT(!isnullstartblock(got.br_startblock));
+ qfield = 0;
+- error = 0;
+
+ /*
+ * If it's the case where the directory code is running with no block
+@@ -5040,13 +5040,13 @@ xfs_bmap_del_extent_real(
+ del->br_startoff > got.br_startoff && del_endoff < got_endoff)
+ return -ENOSPC;
+
+- flags = XFS_ILOG_CORE;
++ *logflagsp = XFS_ILOG_CORE;
+ if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
+ if (!(bflags & XFS_BMAPI_REMAP)) {
+ error = xfs_rtfree_blocks(tp, del->br_startblock,
+ del->br_blockcount);
+ if (error)
+- goto done;
++ return error;
+ }
+
+ do_fx = 0;
+@@ -5061,11 +5061,9 @@ xfs_bmap_del_extent_real(
+ if (cur) {
+ error = xfs_bmbt_lookup_eq(cur, &got, &i);
+ if (error)
+- goto done;
+- if (XFS_IS_CORRUPT(mp, i != 1)) {
+- error = -EFSCORRUPTED;
+- goto done;
+- }
++ return error;
++ if (XFS_IS_CORRUPT(mp, i != 1))
++ return -EFSCORRUPTED;
+ }
+
+ if (got.br_startoff == del->br_startoff)
+@@ -5082,17 +5080,15 @@ xfs_bmap_del_extent_real(
+ xfs_iext_prev(ifp, icur);
+ ifp->if_nextents--;
+
+- flags |= XFS_ILOG_CORE;
++ *logflagsp |= XFS_ILOG_CORE;
+ if (!cur) {
+- flags |= xfs_ilog_fext(whichfork);
++ *logflagsp |= xfs_ilog_fext(whichfork);
+ break;
+ }
+ if ((error = xfs_btree_delete(cur, &i)))
+- goto done;
+- if (XFS_IS_CORRUPT(mp, i != 1)) {
+- error = -EFSCORRUPTED;
+- goto done;
+- }
++ return error;
++ if (XFS_IS_CORRUPT(mp, i != 1))
++ return -EFSCORRUPTED;
+ break;
+ case BMAP_LEFT_FILLING:
+ /*
+@@ -5103,12 +5099,12 @@ xfs_bmap_del_extent_real(
+ got.br_blockcount -= del->br_blockcount;
+ xfs_iext_update_extent(ip, state, icur, &got);
+ if (!cur) {
+- flags |= xfs_ilog_fext(whichfork);
++ *logflagsp |= xfs_ilog_fext(whichfork);
+ break;
+ }
+ error = xfs_bmbt_update(cur, &got);
+ if (error)
+- goto done;
++ return error;
+ break;
+ case BMAP_RIGHT_FILLING:
+ /*
+@@ -5117,12 +5113,12 @@ xfs_bmap_del_extent_real(
+ got.br_blockcount -= del->br_blockcount;
+ xfs_iext_update_extent(ip, state, icur, &got);
+ if (!cur) {
+- flags |= xfs_ilog_fext(whichfork);
++ *logflagsp |= xfs_ilog_fext(whichfork);
+ break;
+ }
+ error = xfs_bmbt_update(cur, &got);
+ if (error)
+- goto done;
++ return error;
+ break;
+ case 0:
+ /*
+@@ -5139,18 +5135,18 @@ xfs_bmap_del_extent_real(
+ new.br_state = got.br_state;
+ new.br_startblock = del_endblock;
+
+- flags |= XFS_ILOG_CORE;
++ *logflagsp |= XFS_ILOG_CORE;
+ if (cur) {
+ error = xfs_bmbt_update(cur, &got);
+ if (error)
+- goto done;
++ return error;
+ error = xfs_btree_increment(cur, 0, &i);
+ if (error)
+- goto done;
++ return error;
+ cur->bc_rec.b = new;
+ error = xfs_btree_insert(cur, &i);
+ if (error && error != -ENOSPC)
+- goto done;
++ return error;
+ /*
+ * If get no-space back from btree insert, it tried a
+ * split, and we have a zero block reservation. Fix up
+@@ -5163,33 +5159,28 @@ xfs_bmap_del_extent_real(
+ */
+ error = xfs_bmbt_lookup_eq(cur, &got, &i);
+ if (error)
+- goto done;
+- if (XFS_IS_CORRUPT(mp, i != 1)) {
+- error = -EFSCORRUPTED;
+- goto done;
+- }
++ return error;
++ if (XFS_IS_CORRUPT(mp, i != 1))
++ return -EFSCORRUPTED;
+ /*
+ * Update the btree record back
+ * to the original value.
+ */
+ error = xfs_bmbt_update(cur, &old);
+ if (error)
+- goto done;
++ return error;
+ /*
+ * Reset the extent record back
+ * to the original value.
+ */
+ xfs_iext_update_extent(ip, state, icur, &old);
+- flags = 0;
+- error = -ENOSPC;
+- goto done;
+- }
+- if (XFS_IS_CORRUPT(mp, i != 1)) {
+- error = -EFSCORRUPTED;
+- goto done;
++ *logflagsp = 0;
++ return -ENOSPC;
+ }
++ if (XFS_IS_CORRUPT(mp, i != 1))
++ return -EFSCORRUPTED;
+ } else
+- flags |= xfs_ilog_fext(whichfork);
++ *logflagsp |= xfs_ilog_fext(whichfork);
+
+ ifp->if_nextents++;
+ xfs_iext_next(ifp, icur);
+@@ -5213,7 +5204,7 @@ xfs_bmap_del_extent_real(
+ ((bflags & XFS_BMAPI_NODISCARD) ||
+ del->br_state == XFS_EXT_UNWRITTEN));
+ if (error)
+- goto done;
++ return error;
+ }
+ }
+
+@@ -5228,9 +5219,7 @@ xfs_bmap_del_extent_real(
+ if (qfield && !(bflags & XFS_BMAPI_REMAP))
+ xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
+
+-done:
+- *logflagsp = flags;
+- return error;
++ return 0;
+ }
+
+ /*
--- /dev/null
+From stable+bounces-124375-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:35 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:38 -0700
+Subject: xfs: fix 32-bit truncation in xfs_compute_rextslog
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-19-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit cf8f0e6c1429be7652869059ea44696b72d5b726 ]
+
+It's quite reasonable that some customer somewhere will want to
+configure a realtime volume with more than 2^32 extents. If they try to
+do this, the highbit32() call will truncate the upper bits of the
+xfs_rtbxlen_t and produce the wrong value for rextslog. This in turn
+causes the rsumlevels to be wrong, which results in a realtime summary
+file that is the wrong length. Fix that.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_rtbitmap.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_rtbitmap.c
++++ b/fs/xfs/libxfs/xfs_rtbitmap.c
+@@ -1133,13 +1133,15 @@ xfs_rtalloc_extent_is_free(
+
+ /*
+ * Compute the maximum level number of the realtime summary file, as defined by
+- * mkfs. The use of highbit32 on a 64-bit quantity is a historic artifact that
+- * prohibits correct use of rt volumes with more than 2^32 extents.
++ * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct
++ * use of rt volumes with more than 2^32 extents.
+ */
+ uint8_t
+ xfs_compute_rextslog(
+ xfs_rtbxlen_t rtextents)
+ {
+- return rtextents ? xfs_highbit32(rtextents) : 0;
++ if (!rtextents)
++ return 0;
++ return xfs_highbit64(rtextents);
+ }
+
--- /dev/null
+From stable+bounces-124364-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:20 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:27 -0700
+Subject: xfs: fix bounds check in xfs_defer_agfl_block()
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Dave Chinner <dchinner@redhat.com>, Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-8-leah.rumancik@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+[ Upstream commit 2bed0d82c2f78b91a0a9a5a73da57ee883a0c070 ]
+
+Need to happen before we allocate and then leak the xefi. Found by
+coverity via an xfsprogs libxfs scan.
+
+[djwong: This also fixes the type of the @agbno argument.]
+
+Fixes: 7dfee17b13e5 ("xfs: validate block number being freed before adding to xefi")
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2489,24 +2489,25 @@ static int
+ xfs_defer_agfl_block(
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+- xfs_fsblock_t agbno,
++ xfs_agblock_t agbno,
+ struct xfs_owner_info *oinfo)
+ {
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent_free_item *xefi;
++ xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
+
+ ASSERT(xfs_extfree_item_cache != NULL);
+ ASSERT(oinfo != NULL);
+
++ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
++ return -EFSCORRUPTED;
++
+ xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
+ GFP_KERNEL | __GFP_NOFAIL);
+- xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
++ xefi->xefi_startblock = fsbno;
+ xefi->xefi_blockcount = 1;
+ xefi->xefi_owner = oinfo->oi_owner;
+
+- if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
+- return -EFSCORRUPTED;
+-
+ trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
+
+ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
--- /dev/null
+From stable+bounces-124360-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:15 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:23 -0700
+Subject: xfs: fix confusing xfs_extent_item variable names
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-4-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 578c714b215d474c52949e65a914dae67924f0fe ]
+
+Change the name of all pointers to xfs_extent_item structures to "xefi"
+to make the name consistent and because the current selections ("new"
+and "free") mean other things in C.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 32 ++++++++++-----------
+ fs/xfs/xfs_extfree_item.c | 70 +++++++++++++++++++++++-----------------------
+ 2 files changed, 51 insertions(+), 51 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2493,20 +2493,20 @@ xfs_defer_agfl_block(
+ struct xfs_owner_info *oinfo)
+ {
+ struct xfs_mount *mp = tp->t_mountp;
+- struct xfs_extent_free_item *new; /* new element */
++ struct xfs_extent_free_item *xefi;
+
+ ASSERT(xfs_extfree_item_cache != NULL);
+ ASSERT(oinfo != NULL);
+
+- new = kmem_cache_zalloc(xfs_extfree_item_cache,
++ xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
+ GFP_KERNEL | __GFP_NOFAIL);
+- new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
+- new->xefi_blockcount = 1;
+- new->xefi_owner = oinfo->oi_owner;
++ xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
++ xefi->xefi_blockcount = 1;
++ xefi->xefi_owner = oinfo->oi_owner;
+
+ trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
+
+- xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
++ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
+ }
+
+ /*
+@@ -2521,7 +2521,7 @@ __xfs_free_extent_later(
+ const struct xfs_owner_info *oinfo,
+ bool skip_discard)
+ {
+- struct xfs_extent_free_item *new; /* new element */
++ struct xfs_extent_free_item *xefi;
+ #ifdef DEBUG
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_agnumber_t agno;
+@@ -2540,27 +2540,27 @@ __xfs_free_extent_later(
+ #endif
+ ASSERT(xfs_extfree_item_cache != NULL);
+
+- new = kmem_cache_zalloc(xfs_extfree_item_cache,
++ xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
+ GFP_KERNEL | __GFP_NOFAIL);
+- new->xefi_startblock = bno;
+- new->xefi_blockcount = (xfs_extlen_t)len;
++ xefi->xefi_startblock = bno;
++ xefi->xefi_blockcount = (xfs_extlen_t)len;
+ if (skip_discard)
+- new->xefi_flags |= XFS_EFI_SKIP_DISCARD;
++ xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
+ if (oinfo) {
+ ASSERT(oinfo->oi_offset == 0);
+
+ if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
+- new->xefi_flags |= XFS_EFI_ATTR_FORK;
++ xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
+ if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
+- new->xefi_flags |= XFS_EFI_BMBT_BLOCK;
+- new->xefi_owner = oinfo->oi_owner;
++ xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
++ xefi->xefi_owner = oinfo->oi_owner;
+ } else {
+- new->xefi_owner = XFS_RMAP_OWN_NULL;
++ xefi->xefi_owner = XFS_RMAP_OWN_NULL;
+ }
+ trace_xfs_bmap_free_defer(tp->t_mountp,
+ XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
+ XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
+- xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
++ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
+ }
+
+ #ifdef DEBUG
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -345,30 +345,30 @@ static int
+ xfs_trans_free_extent(
+ struct xfs_trans *tp,
+ struct xfs_efd_log_item *efdp,
+- struct xfs_extent_free_item *free)
++ struct xfs_extent_free_item *xefi)
+ {
+ struct xfs_owner_info oinfo = { };
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent *extp;
+ uint next_extent;
+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp,
+- free->xefi_startblock);
++ xefi->xefi_startblock);
+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
+- free->xefi_startblock);
++ xefi->xefi_startblock);
+ int error;
+
+- oinfo.oi_owner = free->xefi_owner;
+- if (free->xefi_flags & XFS_EFI_ATTR_FORK)
++ oinfo.oi_owner = xefi->xefi_owner;
++ if (xefi->xefi_flags & XFS_EFI_ATTR_FORK)
+ oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
+- if (free->xefi_flags & XFS_EFI_BMBT_BLOCK)
++ if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK)
+ oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
+
+ trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno,
+- free->xefi_blockcount);
++ xefi->xefi_blockcount);
+
+- error = __xfs_free_extent(tp, free->xefi_startblock,
+- free->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
+- free->xefi_flags & XFS_EFI_SKIP_DISCARD);
++ error = __xfs_free_extent(tp, xefi->xefi_startblock,
++ xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
++ xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+@@ -382,8 +382,8 @@ xfs_trans_free_extent(
+ next_extent = efdp->efd_next_extent;
+ ASSERT(next_extent < efdp->efd_format.efd_nextents);
+ extp = &(efdp->efd_format.efd_extents[next_extent]);
+- extp->ext_start = free->xefi_startblock;
+- extp->ext_len = free->xefi_blockcount;
++ extp->ext_start = xefi->xefi_startblock;
++ extp->ext_len = xefi->xefi_blockcount;
+ efdp->efd_next_extent++;
+
+ return error;
+@@ -411,7 +411,7 @@ STATIC void
+ xfs_extent_free_log_item(
+ struct xfs_trans *tp,
+ struct xfs_efi_log_item *efip,
+- struct xfs_extent_free_item *free)
++ struct xfs_extent_free_item *xefi)
+ {
+ uint next_extent;
+ struct xfs_extent *extp;
+@@ -427,8 +427,8 @@ xfs_extent_free_log_item(
+ next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
+ ASSERT(next_extent < efip->efi_format.efi_nextents);
+ extp = &efip->efi_format.efi_extents[next_extent];
+- extp->ext_start = free->xefi_startblock;
+- extp->ext_len = free->xefi_blockcount;
++ extp->ext_start = xefi->xefi_startblock;
++ extp->ext_len = xefi->xefi_blockcount;
+ }
+
+ static struct xfs_log_item *
+@@ -440,15 +440,15 @@ xfs_extent_free_create_intent(
+ {
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_efi_log_item *efip = xfs_efi_init(mp, count);
+- struct xfs_extent_free_item *free;
++ struct xfs_extent_free_item *xefi;
+
+ ASSERT(count > 0);
+
+ xfs_trans_add_item(tp, &efip->efi_item);
+ if (sort)
+ list_sort(mp, items, xfs_extent_free_diff_items);
+- list_for_each_entry(free, items, xefi_list)
+- xfs_extent_free_log_item(tp, efip, free);
++ list_for_each_entry(xefi, items, xefi_list)
++ xfs_extent_free_log_item(tp, efip, xefi);
+ return &efip->efi_item;
+ }
+
+@@ -470,13 +470,13 @@ xfs_extent_free_finish_item(
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+ {
+- struct xfs_extent_free_item *free;
++ struct xfs_extent_free_item *xefi;
+ int error;
+
+- free = container_of(item, struct xfs_extent_free_item, xefi_list);
++ xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+- error = xfs_trans_free_extent(tp, EFD_ITEM(done), free);
+- kmem_cache_free(xfs_extfree_item_cache, free);
++ error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi);
++ kmem_cache_free(xfs_extfree_item_cache, xefi);
+ return error;
+ }
+
+@@ -493,10 +493,10 @@ STATIC void
+ xfs_extent_free_cancel_item(
+ struct list_head *item)
+ {
+- struct xfs_extent_free_item *free;
++ struct xfs_extent_free_item *xefi;
+
+- free = container_of(item, struct xfs_extent_free_item, xefi_list);
+- kmem_cache_free(xfs_extfree_item_cache, free);
++ xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
++ kmem_cache_free(xfs_extfree_item_cache, xefi);
+ }
+
+ const struct xfs_defer_op_type xfs_extent_free_defer_type = {
+@@ -522,7 +522,7 @@ xfs_agfl_free_finish_item(
+ struct xfs_owner_info oinfo = { };
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_efd_log_item *efdp = EFD_ITEM(done);
+- struct xfs_extent_free_item *free;
++ struct xfs_extent_free_item *xefi;
+ struct xfs_extent *extp;
+ struct xfs_buf *agbp;
+ int error;
+@@ -531,13 +531,13 @@ xfs_agfl_free_finish_item(
+ uint next_extent;
+ struct xfs_perag *pag;
+
+- free = container_of(item, struct xfs_extent_free_item, xefi_list);
+- ASSERT(free->xefi_blockcount == 1);
+- agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
+- agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+- oinfo.oi_owner = free->xefi_owner;
++ xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
++ ASSERT(xefi->xefi_blockcount == 1);
++ agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock);
++ agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock);
++ oinfo.oi_owner = xefi->xefi_owner;
+
+- trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
++ trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, xefi->xefi_blockcount);
+
+ pag = xfs_perag_get(mp, agno);
+ error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
+@@ -558,11 +558,11 @@ xfs_agfl_free_finish_item(
+ next_extent = efdp->efd_next_extent;
+ ASSERT(next_extent < efdp->efd_format.efd_nextents);
+ extp = &(efdp->efd_format.efd_extents[next_extent]);
+- extp->ext_start = free->xefi_startblock;
+- extp->ext_len = free->xefi_blockcount;
++ extp->ext_start = xefi->xefi_startblock;
++ extp->ext_len = xefi->xefi_blockcount;
+ efdp->efd_next_extent++;
+
+- kmem_cache_free(xfs_extfree_item_cache, free);
++ kmem_cache_free(xfs_extfree_item_cache, xefi);
+ return error;
+ }
+
--- /dev/null
+From stable+bounces-124382-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:44 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:45 -0700
+Subject: xfs: fix perag leak when growfs fails
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Long Li <leo.lilong@huawei.com>, "Darrick J. Wong" <djwong@kernel.org>, Chandan Babu R <chandanbabu@kernel.org>, Catherine Hoang <catherine.hoang@oracle.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-26-leah.rumancik@gmail.com>
+
+From: Long Li <leo.lilong@huawei.com>
+
+[ Upstream commit 7823921887750b39d02e6b44faafdd1cc617c651 ]
+
+[ 6.1: resolved conflicts in xfs_ag.c and xfs_ag.h ]
+
+During growfs, if new ag in memory has been initialized, however
+sb_agcount has not been updated, if an error occurs at this time it
+will cause perag leaks as follows, these new AGs will not been freed
+during umount , because of these new AGs are not visible(that is
+included in mp->m_sb.sb_agcount).
+
+unreferenced object 0xffff88810be40200 (size 512):
+ comm "xfs_growfs", pid 857, jiffies 4294909093
+ hex dump (first 32 bytes):
+ 00 c0 c1 05 81 88 ff ff 04 00 00 00 00 00 00 00 ................
+ 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace (crc 381741e2):
+ [<ffffffff8191aef6>] __kmalloc+0x386/0x4f0
+ [<ffffffff82553e65>] kmem_alloc+0xb5/0x2f0
+ [<ffffffff8238dac5>] xfs_initialize_perag+0xc5/0x810
+ [<ffffffff824f679c>] xfs_growfs_data+0x9bc/0xbc0
+ [<ffffffff8250b90e>] xfs_file_ioctl+0x5fe/0x14d0
+ [<ffffffff81aa5194>] __x64_sys_ioctl+0x144/0x1c0
+ [<ffffffff83c3d81f>] do_syscall_64+0x3f/0xe0
+ [<ffffffff83e00087>] entry_SYSCALL_64_after_hwframe+0x62/0x6a
+unreferenced object 0xffff88810be40800 (size 512):
+ comm "xfs_growfs", pid 857, jiffies 4294909093
+ hex dump (first 32 bytes):
+ 20 00 00 00 00 00 00 00 57 ef be dc 00 00 00 00 .......W.......
+ 10 08 e4 0b 81 88 ff ff 10 08 e4 0b 81 88 ff ff ................
+ backtrace (crc bde50e2d):
+ [<ffffffff8191b43a>] __kmalloc_node+0x3da/0x540
+ [<ffffffff81814489>] kvmalloc_node+0x99/0x160
+ [<ffffffff8286acff>] bucket_table_alloc.isra.0+0x5f/0x400
+ [<ffffffff8286bdc5>] rhashtable_init+0x405/0x760
+ [<ffffffff8238dda3>] xfs_initialize_perag+0x3a3/0x810
+ [<ffffffff824f679c>] xfs_growfs_data+0x9bc/0xbc0
+ [<ffffffff8250b90e>] xfs_file_ioctl+0x5fe/0x14d0
+ [<ffffffff81aa5194>] __x64_sys_ioctl+0x144/0x1c0
+ [<ffffffff83c3d81f>] do_syscall_64+0x3f/0xe0
+ [<ffffffff83e00087>] entry_SYSCALL_64_after_hwframe+0x62/0x6a
+
+Factor out xfs_free_unused_perag_range() from xfs_initialize_perag(),
+used for freeing unused perag within a specified range in error handling,
+included in the error path of the growfs failure.
+
+Fixes: 1c1c6ebcf528 ("xfs: Replace per-ag array with a radix tree")
+Signed-off-by: Long Li <leo.lilong@huawei.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c | 34 +++++++++++++++++++++++++---------
+ fs/xfs/libxfs/xfs_ag.h | 3 +++
+ fs/xfs/xfs_fsops.c | 5 ++++-
+ 3 files changed, 32 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -259,6 +259,30 @@ xfs_agino_range(
+ return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last);
+ }
+
++/*
++ * Free perag within the specified AG range, it is only used to free unused
++ * perags under the error handling path.
++ */
++void
++xfs_free_unused_perag_range(
++ struct xfs_mount *mp,
++ xfs_agnumber_t agstart,
++ xfs_agnumber_t agend)
++{
++ struct xfs_perag *pag;
++ xfs_agnumber_t index;
++
++ for (index = agstart; index < agend; index++) {
++ spin_lock(&mp->m_perag_lock);
++ pag = radix_tree_delete(&mp->m_perag_tree, index);
++ spin_unlock(&mp->m_perag_lock);
++ if (!pag)
++ break;
++ xfs_buf_hash_destroy(pag);
++ kmem_free(pag);
++ }
++}
++
+ int
+ xfs_initialize_perag(
+ struct xfs_mount *mp,
+@@ -352,15 +376,7 @@ out_free_pag:
+ kmem_free(pag);
+ out_unwind_new_pags:
+ /* unwind any prior newly initialized pags */
+- for (index = first_initialised; index < agcount; index++) {
+- spin_lock(&mp->m_perag_lock);
+- pag = radix_tree_delete(&mp->m_perag_tree, index);
+- spin_unlock(&mp->m_perag_lock);
+- if (!pag)
+- break;
+- xfs_buf_hash_destroy(pag);
+- kmem_free(pag);
+- }
++ xfs_free_unused_perag_range(mp, first_initialised, agcount);
+ return error;
+ }
+
+--- a/fs/xfs/libxfs/xfs_ag.h
++++ b/fs/xfs/libxfs/xfs_ag.h
+@@ -106,6 +106,9 @@ struct xfs_perag {
+ #endif /* __KERNEL__ */
+ };
+
++
++void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart,
++ xfs_agnumber_t agend);
+ int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
+ xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
+ int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -153,7 +153,7 @@ xfs_growfs_data_private(
+ (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0,
+ XFS_TRANS_RESERVE, &tp);
+ if (error)
+- return error;
++ goto out_free_unused_perag;
+
+ last_pag = xfs_perag_get(mp, oagcount - 1);
+ if (delta > 0) {
+@@ -227,6 +227,9 @@ xfs_growfs_data_private(
+
+ out_trans_cancel:
+ xfs_trans_cancel(tp);
++out_free_unused_perag:
++ if (nagcount > oagcount)
++ xfs_free_unused_perag_range(mp, oagcount, nagcount);
+ return error;
+ }
+
--- /dev/null
+From stable+bounces-124380-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:40 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:42 -0700
+Subject: xfs: force all buffers to be written during btree bulk load
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-23-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 13ae04d8d45227c2ba51e188daf9fc13d08a1b12 ]
+
+While stress-testing online repair of btrees, I noticed periodic
+assertion failures from the buffer cache about buffers with incorrect
+DELWRI_Q state. Looking further, I observed this race between the AIL
+trying to write out a btree block and repair zapping a btree block after
+the fact:
+
+AIL: Repair0:
+
+pin buffer X
+delwri_queue:
+set DELWRI_Q
+add to delwri list
+
+ stale buf X:
+ clear DELWRI_Q
+ does not clear b_list
+ free space X
+ commit
+
+delwri_submit # oops
+
+Worse yet, I discovered that running the same repair over and over in a
+tight loop can result in a second race that cause data integrity
+problems with the repair:
+
+AIL: Repair0: Repair1:
+
+pin buffer X
+delwri_queue:
+set DELWRI_Q
+add to delwri list
+
+ stale buf X:
+ clear DELWRI_Q
+ does not clear b_list
+ free space X
+ commit
+
+ find free space X
+ get buffer
+ rewrite buffer
+ delwri_queue:
+ set DELWRI_Q
+ already on a list, do not add
+ commit
+
+ BAD: committed tree root before all blocks written
+
+delwri_submit # too late now
+
+I traced this to my own misunderstanding of how the delwri lists work,
+particularly with regards to the AIL's buffer list. If a buffer is
+logged and committed, the buffer can end up on that AIL buffer list. If
+btree repairs are run twice in rapid succession, it's possible that the
+first repair will invalidate the buffer and free it before the next time
+the AIL wakes up. Marking the buffer stale clears DELWRI_Q from the
+buffer state without removing the buffer from its delwri list. The
+buffer doesn't know which list it's on, so it cannot know which lock to
+take to protect the list for a removal.
+
+If the second repair allocates the same block, it will then recycle the
+buffer to start writing the new btree block. Meanwhile, if the AIL
+wakes up and walks the buffer list, it will ignore the buffer because it
+can't lock it, and go back to sleep.
+
+When the second repair calls delwri_queue to put the buffer on the
+list of buffers to write before committing the new btree, it will set
+DELWRI_Q again, but since the buffer hasn't been removed from the AIL's
+buffer list, it won't add it to the bulkload buffer's list.
+
+This is incorrect, because the bulkload caller relies on delwri_submit
+to ensure that all the buffers have been sent to disk /before/
+committing the new btree root pointer. This ordering requirement is
+required for data consistency.
+
+Worse, the AIL won't clear DELWRI_Q from the buffer when it does finally
+drop it, so the next thread to walk through the btree will trip over a
+debug assertion on that flag.
+
+To fix this, create a new function that waits for the buffer to be
+removed from any other delwri lists before adding the buffer to the
+caller's delwri list. By waiting for the buffer to clear both the
+delwri list and any potential delwri wait list, we can be sure that
+repair will initiate writes of all buffers and report all write errors
+back to userspace instead of committing the new structure.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree_staging.c | 4 ---
+ fs/xfs/xfs_buf.c | 44 ++++++++++++++++++++++++++++++++++----
+ fs/xfs/xfs_buf.h | 1
+ 3 files changed, 42 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_btree_staging.c
++++ b/fs/xfs/libxfs/xfs_btree_staging.c
+@@ -342,9 +342,7 @@ xfs_btree_bload_drop_buf(
+ if (*bpp == NULL)
+ return;
+
+- if (!xfs_buf_delwri_queue(*bpp, buffers_list))
+- ASSERT(0);
+-
++ xfs_buf_delwri_queue_here(*bpp, buffers_list);
+ xfs_buf_relse(*bpp);
+ *bpp = NULL;
+ }
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -2040,6 +2040,14 @@ error_free:
+ return NULL;
+ }
+
++static inline void
++xfs_buf_list_del(
++ struct xfs_buf *bp)
++{
++ list_del_init(&bp->b_list);
++ wake_up_var(&bp->b_list);
++}
++
+ /*
+ * Cancel a delayed write list.
+ *
+@@ -2057,7 +2065,7 @@ xfs_buf_delwri_cancel(
+
+ xfs_buf_lock(bp);
+ bp->b_flags &= ~_XBF_DELWRI_Q;
+- list_del_init(&bp->b_list);
++ xfs_buf_list_del(bp);
+ xfs_buf_relse(bp);
+ }
+ }
+@@ -2111,6 +2119,34 @@ xfs_buf_delwri_queue(
+ }
+
+ /*
++ * Queue a buffer to this delwri list as part of a data integrity operation.
++ * If the buffer is on any other delwri list, we'll wait for that to clear
++ * so that the caller can submit the buffer for IO and wait for the result.
++ * Callers must ensure the buffer is not already on the list.
++ */
++void
++xfs_buf_delwri_queue_here(
++ struct xfs_buf *bp,
++ struct list_head *buffer_list)
++{
++ /*
++ * We need this buffer to end up on the /caller's/ delwri list, not any
++ * old list. This can happen if the buffer is marked stale (which
++ * clears DELWRI_Q) after the AIL queues the buffer to its list but
++ * before the AIL has a chance to submit the list.
++ */
++ while (!list_empty(&bp->b_list)) {
++ xfs_buf_unlock(bp);
++ wait_var_event(&bp->b_list, list_empty(&bp->b_list));
++ xfs_buf_lock(bp);
++ }
++
++ ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
++
++ xfs_buf_delwri_queue(bp, buffer_list);
++}
++
++/*
+ * Compare function is more complex than it needs to be because
+ * the return value is only 32 bits and we are doing comparisons
+ * on 64 bit values
+@@ -2172,7 +2208,7 @@ xfs_buf_delwri_submit_buffers(
+ * reference and remove it from the list here.
+ */
+ if (!(bp->b_flags & _XBF_DELWRI_Q)) {
+- list_del_init(&bp->b_list);
++ xfs_buf_list_del(bp);
+ xfs_buf_relse(bp);
+ continue;
+ }
+@@ -2192,7 +2228,7 @@ xfs_buf_delwri_submit_buffers(
+ list_move_tail(&bp->b_list, wait_list);
+ } else {
+ bp->b_flags |= XBF_ASYNC;
+- list_del_init(&bp->b_list);
++ xfs_buf_list_del(bp);
+ }
+ __xfs_buf_submit(bp, false);
+ }
+@@ -2246,7 +2282,7 @@ xfs_buf_delwri_submit(
+ while (!list_empty(&wait_list)) {
+ bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
+
+- list_del_init(&bp->b_list);
++ xfs_buf_list_del(bp);
+
+ /*
+ * Wait on the locked buffer, check for errors and unlock and
+--- a/fs/xfs/xfs_buf.h
++++ b/fs/xfs/xfs_buf.h
+@@ -305,6 +305,7 @@ extern void xfs_buf_stale(struct xfs_buf
+ /* Delayed Write Buffer Routines */
+ extern void xfs_buf_delwri_cancel(struct list_head *);
+ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
++void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
+ extern int xfs_buf_delwri_submit(struct list_head *);
+ extern int xfs_buf_delwri_submit_nowait(struct list_head *);
+ extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
--- /dev/null
+From stable+bounces-124379-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:39 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:43 -0700
+Subject: xfs: initialise di_crc in xfs_log_dinode
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Dave Chinner <dchinner@redhat.com>, Alexander Potapenko <glider@google.com>, "Darrick J. Wong" <djwong@kernel.org>, Chandan Babu R <chandanbabu@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-24-leah.rumancik@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+[ Upstream commit 0573676fdde7ce3829ee6a42a8e5a56355234712 ]
+
+Alexander Potapenko report that KMSAN was issuing these warnings:
+
+kmalloc-ed xlog buffer of size 512 : ffff88802fc26200
+kmalloc-ed xlog buffer of size 368 : ffff88802fc24a00
+kmalloc-ed xlog buffer of size 648 : ffff88802b631000
+kmalloc-ed xlog buffer of size 648 : ffff88802b632800
+kmalloc-ed xlog buffer of size 648 : ffff88802b631c00
+xlog_write_iovec: copying 12 bytes from ffff888017ddbbd8 to ffff88802c300400
+xlog_write_iovec: copying 28 bytes from ffff888017ddbbe4 to ffff88802c30040c
+xlog_write_iovec: copying 68 bytes from ffff88802fc26274 to ffff88802c300428
+xlog_write_iovec: copying 188 bytes from ffff88802fc262bc to ffff88802c30046c
+=====================================================
+BUG: KMSAN: uninit-value in xlog_write_iovec fs/xfs/xfs_log.c:2227
+BUG: KMSAN: uninit-value in xlog_write_full fs/xfs/xfs_log.c:2263
+BUG: KMSAN: uninit-value in xlog_write+0x1fac/0x2600 fs/xfs/xfs_log.c:2532
+ xlog_write_iovec fs/xfs/xfs_log.c:2227
+ xlog_write_full fs/xfs/xfs_log.c:2263
+ xlog_write+0x1fac/0x2600 fs/xfs/xfs_log.c:2532
+ xlog_cil_write_chain fs/xfs/xfs_log_cil.c:918
+ xlog_cil_push_work+0x30f2/0x44e0 fs/xfs/xfs_log_cil.c:1263
+ process_one_work kernel/workqueue.c:2630
+ process_scheduled_works+0x1188/0x1e30 kernel/workqueue.c:2703
+ worker_thread+0xee5/0x14f0 kernel/workqueue.c:2784
+ kthread+0x391/0x500 kernel/kthread.c:388
+ ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147
+ ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242
+
+Uninit was created at:
+ slab_post_alloc_hook+0x101/0xac0 mm/slab.h:768
+ slab_alloc_node mm/slub.c:3482
+ __kmem_cache_alloc_node+0x612/0xae0 mm/slub.c:3521
+ __do_kmalloc_node mm/slab_common.c:1006
+ __kmalloc+0x11a/0x410 mm/slab_common.c:1020
+ kmalloc ./include/linux/slab.h:604
+ xlog_kvmalloc fs/xfs/xfs_log_priv.h:704
+ xlog_cil_alloc_shadow_bufs fs/xfs/xfs_log_cil.c:343
+ xlog_cil_commit+0x487/0x4dc0 fs/xfs/xfs_log_cil.c:1574
+ __xfs_trans_commit+0x8df/0x1930 fs/xfs/xfs_trans.c:1017
+ xfs_trans_commit+0x30/0x40 fs/xfs/xfs_trans.c:1061
+ xfs_create+0x15af/0x2150 fs/xfs/xfs_inode.c:1076
+ xfs_generic_create+0x4cd/0x1550 fs/xfs/xfs_iops.c:199
+ xfs_vn_create+0x4a/0x60 fs/xfs/xfs_iops.c:275
+ lookup_open fs/namei.c:3477
+ open_last_lookups fs/namei.c:3546
+ path_openat+0x29ac/0x6180 fs/namei.c:3776
+ do_filp_open+0x24d/0x680 fs/namei.c:3809
+ do_sys_openat2+0x1bc/0x330 fs/open.c:1440
+ do_sys_open fs/open.c:1455
+ __do_sys_openat fs/open.c:1471
+ __se_sys_openat fs/open.c:1466
+ __x64_sys_openat+0x253/0x330 fs/open.c:1466
+ do_syscall_x64 arch/x86/entry/common.c:51
+ do_syscall_64+0x4f/0x140 arch/x86/entry/common.c:82
+ entry_SYSCALL_64_after_hwframe+0x63/0x6b arch/x86/entry/entry_64.S:120
+
+Bytes 112-115 of 188 are uninitialized
+Memory access of size 188 starts at ffff88802fc262bc
+
+This is caused by the struct xfs_log_dinode not having the di_crc
+field initialised. Log recovery never uses this field (it is only
+present these days for on-disk format compatibility reasons) and so
+it's value is never checked so nothing in XFS has caught this.
+
+Further, none of the uninitialised memory access warning tools have
+caught this (despite catching other uninit memory accesses in the
+struct xfs_log_dinode back in 2017!) until recently. Alexander
+annotated the XFS code to get the dump of the actual bytes that were
+detected as uninitialised, and from that report it took me about 30s
+to realise what the issue was.
+
+The issue was introduced back in 2016 and every inode that is logged
+fails to initialise this field. This is no actual bad behaviour
+caused by this issue - I find it hard to even classify it as a
+bug...
+
+Reported-and-tested-by: Alexander Potapenko <glider@google.com>
+Fixes: f8d55aa0523a ("xfs: introduce inode log format object")
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode_item.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -556,6 +556,9 @@ xfs_inode_to_log_dinode(
+ memset(to->di_pad2, 0, sizeof(to->di_pad2));
+ uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
+ to->di_v3_pad = 0;
++
++ /* dummy value for initialisation */
++ to->di_crc = 0;
+ } else {
+ to->di_version = 2;
+ to->di_flushiter = ip->i_flushiter;
--- /dev/null
+From stable+bounces-124374-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:33 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:37 -0700
+Subject: xfs: make rextslog computation consistent with mkfs
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-18-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit a6a38f309afc4a7ede01242b603f36c433997780 ]
+
+There's a weird discrepancy in xfsprogs dating back to the creation of
+the Linux port -- if there are zero rt extents, mkfs will set
+sb_rextents and sb_rextslog both to zero:
+
+ sbp->sb_rextslog =
+ (uint8_t)(rtextents ?
+ libxfs_highbit32((unsigned int)rtextents) : 0);
+
+However, that's not the check that xfs_repair uses for nonzero rtblocks:
+
+ if (sb->sb_rextslog !=
+ libxfs_highbit32((unsigned int)sb->sb_rextents))
+
+The difference here is that xfs_highbit32 returns -1 if its argument is
+zero. Unfortunately, this means that in the weird corner case of a
+realtime volume shorter than 1 rt extent, xfs_repair will immediately
+flag a freshly formatted filesystem as corrupt. Because mkfs has been
+writing ondisk artifacts like this for decades, we have to accept that
+as "correct". TBH, zero rextslog for zero rtextents makes more sense to
+me anyway.
+
+Regrettably, the superblock verifier checks created in commit copied
+xfs_repair even though mkfs has been writing out such filesystems for
+ages. Fix the superblock verifier to accept what mkfs spits out; the
+userspace version of this patch will have to fix xfs_repair as well.
+
+Note that the new helper leaves the zeroday bug where the upper 32 bits
+of sb_rextents is ripped off and fed to highbit32. This leads to a
+seriously undersized rt summary file, which immediately breaks mkfs:
+
+$ hugedisk.sh foo /dev/sdc $(( 0x100000080 * 4096))B
+$ /sbin/mkfs.xfs -f /dev/sda -m rmapbt=0,reflink=0 -r rtdev=/dev/mapper/foo
+meta-data=/dev/sda isize=512 agcount=4, agsize=1298176 blks
+ = sectsz=512 attr=2, projid32bit=1
+ = crc=1 finobt=1, sparse=1, rmapbt=0
+ = reflink=0 bigtime=1 inobtcount=1 nrext64=1
+data = bsize=4096 blocks=5192704, imaxpct=25
+ = sunit=0 swidth=0 blks
+naming =version 2 bsize=4096 ascii-ci=0, ftype=1
+log =internal log bsize=4096 blocks=16384, version=2
+ = sectsz=512 sunit=0 blks, lazy-count=1
+realtime =/dev/mapper/foo extsz=4096 blocks=4294967424, rtextents=4294967424
+Discarding blocks...Done.
+mkfs.xfs: Error initializing the realtime space [117 - Structure needs cleaning]
+
+The next patch will drop support for rt volumes with fewer than 1 or
+more than 2^32-1 rt extents, since they've clearly been broken forever.
+
+Fixes: f8e566c0f5e1f ("xfs: validate the realtime geometry in xfs_validate_sb_common")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_rtbitmap.c | 13 +++++++++++++
+ fs/xfs/libxfs/xfs_rtbitmap.h | 4 ++++
+ fs/xfs/libxfs/xfs_sb.c | 3 ++-
+ fs/xfs/xfs_rtalloc.c | 4 ++--
+ 4 files changed, 21 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_rtbitmap.c
++++ b/fs/xfs/libxfs/xfs_rtbitmap.c
+@@ -1130,3 +1130,16 @@ xfs_rtalloc_extent_is_free(
+ *is_free = matches;
+ return 0;
+ }
++
++/*
++ * Compute the maximum level number of the realtime summary file, as defined by
++ * mkfs. The use of highbit32 on a 64-bit quantity is a historic artifact that
++ * prohibits correct use of rt volumes with more than 2^32 extents.
++ */
++uint8_t
++xfs_compute_rextslog(
++ xfs_rtbxlen_t rtextents)
++{
++ return rtextents ? xfs_highbit32(rtextents) : 0;
++}
++
+--- a/fs/xfs/libxfs/xfs_rtbitmap.h
++++ b/fs/xfs/libxfs/xfs_rtbitmap.h
+@@ -70,6 +70,9 @@ xfs_rtfree_extent(
+ /* Same as above, but in units of rt blocks. */
+ int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
+ xfs_filblks_t rtlen);
++
++uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
++
+ #else /* CONFIG_XFS_RT */
+ # define xfs_rtfree_extent(t,b,l) (-ENOSYS)
+ # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
+@@ -77,6 +80,7 @@ int xfs_rtfree_blocks(struct xfs_trans *
+ # define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
+ # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS)
+ # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
++# define xfs_compute_rextslog(rtx) (0)
+ #endif /* CONFIG_XFS_RT */
+
+ #endif /* __XFS_RTBITMAP_H__ */
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -25,6 +25,7 @@
+ #include "xfs_da_format.h"
+ #include "xfs_health.h"
+ #include "xfs_ag.h"
++#include "xfs_rtbitmap.h"
+
+ /*
+ * Physical superblock buffer manipulations. Shared with libxfs in userspace.
+@@ -502,7 +503,7 @@ xfs_validate_sb_common(
+ NBBY * sbp->sb_blocksize);
+
+ if (sbp->sb_rextents != rexts ||
+- sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) ||
++ sbp->sb_rextslog != xfs_compute_rextslog(rexts) ||
+ sbp->sb_rbmblocks != rbmblocks) {
+ xfs_notice(mp,
+ "realtime geometry sanity check failed");
+--- a/fs/xfs/xfs_rtalloc.c
++++ b/fs/xfs/xfs_rtalloc.c
+@@ -999,7 +999,7 @@ xfs_growfs_rt(
+ nrextents = nrblocks;
+ do_div(nrextents, in->extsize);
+ nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
+- nrextslog = xfs_highbit32(nrextents);
++ nrextslog = xfs_compute_rextslog(nrextents);
+ nrsumlevels = nrextslog + 1;
+ nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks;
+ nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
+@@ -1061,7 +1061,7 @@ xfs_growfs_rt(
+ nsbp->sb_rextents = nsbp->sb_rblocks;
+ do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
+ ASSERT(nsbp->sb_rextents != 0);
+- nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
++ nsbp->sb_rextslog = xfs_compute_rextslog(nsbp->sb_rextents);
+ nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
+ nrsumsize =
+ (uint)sizeof(xfs_suminfo_t) * nrsumlevels *
--- /dev/null
+From stable+bounces-124367-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:24 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:30 -0700
+Subject: xfs: move the xfs_rtbitmap.c declarations to xfs_rtbitmap.h
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Catherine Hoang <catherine.hoang@oracle.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-11-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 13928113fc5b5e79c91796290a99ed991ac0efe2 ]
+
+[6.1: resolved conflicts with fscounters.c and rtsummary.c ]
+
+Move all the declarations for functionality in xfs_rtbitmap.c into a
+separate xfs_rtbitmap.h header file.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 2 -
+ fs/xfs/libxfs/xfs_rtbitmap.c | 1
+ fs/xfs/libxfs/xfs_rtbitmap.h | 82 +++++++++++++++++++++++++++++++++++++++++++
+ fs/xfs/scrub/rtbitmap.c | 2 -
+ fs/xfs/xfs_fsmap.c | 2 -
+ fs/xfs/xfs_rtalloc.c | 1
+ fs/xfs/xfs_rtalloc.h | 73 --------------------------------------
+ 7 files changed, 87 insertions(+), 76 deletions(-)
+ create mode 100644 fs/xfs/libxfs/xfs_rtbitmap.h
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -21,7 +21,7 @@
+ #include "xfs_bmap.h"
+ #include "xfs_bmap_util.h"
+ #include "xfs_bmap_btree.h"
+-#include "xfs_rtalloc.h"
++#include "xfs_rtbitmap.h"
+ #include "xfs_errortag.h"
+ #include "xfs_error.h"
+ #include "xfs_quota.h"
+--- a/fs/xfs/libxfs/xfs_rtbitmap.c
++++ b/fs/xfs/libxfs/xfs_rtbitmap.c
+@@ -16,6 +16,7 @@
+ #include "xfs_trans.h"
+ #include "xfs_rtalloc.h"
+ #include "xfs_error.h"
++#include "xfs_rtbitmap.h"
+
+ /*
+ * Realtime allocator bitmap functions shared with userspace.
+--- /dev/null
++++ b/fs/xfs/libxfs/xfs_rtbitmap.h
+@@ -0,0 +1,82 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
++ * All Rights Reserved.
++ */
++#ifndef __XFS_RTBITMAP_H__
++#define __XFS_RTBITMAP_H__
++
++/*
++ * XXX: Most of the realtime allocation functions deal in units of realtime
++ * extents, not realtime blocks. This looks funny when paired with the type
++ * name and screams for a larger cleanup.
++ */
++struct xfs_rtalloc_rec {
++ xfs_rtblock_t ar_startext;
++ xfs_rtblock_t ar_extcount;
++};
++
++typedef int (*xfs_rtalloc_query_range_fn)(
++ struct xfs_mount *mp,
++ struct xfs_trans *tp,
++ const struct xfs_rtalloc_rec *rec,
++ void *priv);
++
++#ifdef CONFIG_XFS_RT
++int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
++ xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
++int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
++ xfs_rtblock_t start, xfs_extlen_t len, int val,
++ xfs_rtblock_t *new, int *stat);
++int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
++ xfs_rtblock_t start, xfs_rtblock_t limit,
++ xfs_rtblock_t *rtblock);
++int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
++ xfs_rtblock_t start, xfs_rtblock_t limit,
++ xfs_rtblock_t *rtblock);
++int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
++ xfs_rtblock_t start, xfs_extlen_t len, int val);
++int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
++ int log, xfs_rtblock_t bbno, int delta,
++ struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
++ xfs_suminfo_t *sum);
++int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
++ xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
++ xfs_fsblock_t *rsb);
++int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
++ xfs_rtblock_t start, xfs_extlen_t len,
++ struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
++int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
++ const struct xfs_rtalloc_rec *low_rec,
++ const struct xfs_rtalloc_rec *high_rec,
++ xfs_rtalloc_query_range_fn fn, void *priv);
++int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
++ xfs_rtalloc_query_range_fn fn,
++ void *priv);
++bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
++int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
++ xfs_rtblock_t start, xfs_extlen_t len,
++ bool *is_free);
++/*
++ * Free an extent in the realtime subvolume. Length is expressed in
++ * realtime extents, as is the block number.
++ */
++int /* error */
++xfs_rtfree_extent(
++ struct xfs_trans *tp, /* transaction pointer */
++ xfs_rtblock_t bno, /* starting block number to free */
++ xfs_extlen_t len); /* length of extent freed */
++
++/* Same as above, but in units of rt blocks. */
++int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
++ xfs_filblks_t rtlen);
++#else /* CONFIG_XFS_RT */
++# define xfs_rtfree_extent(t,b,l) (-ENOSYS)
++# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
++# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS)
++# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
++# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS)
++# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
++#endif /* CONFIG_XFS_RT */
++
++#endif /* __XFS_RTBITMAP_H__ */
+--- a/fs/xfs/scrub/rtbitmap.c
++++ b/fs/xfs/scrub/rtbitmap.c
+@@ -11,7 +11,7 @@
+ #include "xfs_mount.h"
+ #include "xfs_log_format.h"
+ #include "xfs_trans.h"
+-#include "xfs_rtalloc.h"
++#include "xfs_rtbitmap.h"
+ #include "xfs_inode.h"
+ #include "xfs_bmap.h"
+ #include "scrub/scrub.h"
+--- a/fs/xfs/xfs_fsmap.c
++++ b/fs/xfs/xfs_fsmap.c
+@@ -23,7 +23,7 @@
+ #include "xfs_refcount.h"
+ #include "xfs_refcount_btree.h"
+ #include "xfs_alloc_btree.h"
+-#include "xfs_rtalloc.h"
++#include "xfs_rtbitmap.h"
+ #include "xfs_ag.h"
+
+ /* Convert an xfs_fsmap to an fsmap. */
+--- a/fs/xfs/xfs_rtalloc.c
++++ b/fs/xfs/xfs_rtalloc.c
+@@ -19,6 +19,7 @@
+ #include "xfs_icache.h"
+ #include "xfs_rtalloc.h"
+ #include "xfs_sb.h"
++#include "xfs_rtbitmap.h"
+
+ /*
+ * Read and return the summary information for a given extent size,
+--- a/fs/xfs/xfs_rtalloc.h
++++ b/fs/xfs/xfs_rtalloc.h
+@@ -11,22 +11,6 @@
+ struct xfs_mount;
+ struct xfs_trans;
+
+-/*
+- * XXX: Most of the realtime allocation functions deal in units of realtime
+- * extents, not realtime blocks. This looks funny when paired with the type
+- * name and screams for a larger cleanup.
+- */
+-struct xfs_rtalloc_rec {
+- xfs_rtblock_t ar_startext;
+- xfs_rtblock_t ar_extcount;
+-};
+-
+-typedef int (*xfs_rtalloc_query_range_fn)(
+- struct xfs_mount *mp,
+- struct xfs_trans *tp,
+- const struct xfs_rtalloc_rec *rec,
+- void *priv);
+-
+ #ifdef CONFIG_XFS_RT
+ /*
+ * Function prototypes for exported functions.
+@@ -48,19 +32,6 @@ xfs_rtallocate_extent(
+ xfs_extlen_t prod, /* extent product factor */
+ xfs_rtblock_t *rtblock); /* out: start block allocated */
+
+-/*
+- * Free an extent in the realtime subvolume. Length is expressed in
+- * realtime extents, as is the block number.
+- */
+-int /* error */
+-xfs_rtfree_extent(
+- struct xfs_trans *tp, /* transaction pointer */
+- xfs_rtblock_t bno, /* starting block number to free */
+- xfs_extlen_t len); /* length of extent freed */
+-
+-/* Same as above, but in units of rt blocks. */
+-int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
+- xfs_filblks_t rtlen);
+
+ /*
+ * Initialize realtime fields in the mount structure.
+@@ -102,55 +73,11 @@ xfs_growfs_rt(
+ struct xfs_mount *mp, /* file system mount structure */
+ xfs_growfs_rt_t *in); /* user supplied growfs struct */
+
+-/*
+- * From xfs_rtbitmap.c
+- */
+-int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
+- xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
+-int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
+- xfs_rtblock_t start, xfs_extlen_t len, int val,
+- xfs_rtblock_t *new, int *stat);
+-int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
+- xfs_rtblock_t start, xfs_rtblock_t limit,
+- xfs_rtblock_t *rtblock);
+-int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
+- xfs_rtblock_t start, xfs_rtblock_t limit,
+- xfs_rtblock_t *rtblock);
+-int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
+- xfs_rtblock_t start, xfs_extlen_t len, int val);
+-int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
+- int log, xfs_rtblock_t bbno, int delta,
+- struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
+- xfs_suminfo_t *sum);
+-int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
+- xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
+- xfs_fsblock_t *rsb);
+-int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
+- xfs_rtblock_t start, xfs_extlen_t len,
+- struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
+-int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
+- const struct xfs_rtalloc_rec *low_rec,
+- const struct xfs_rtalloc_rec *high_rec,
+- xfs_rtalloc_query_range_fn fn, void *priv);
+-int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
+- xfs_rtalloc_query_range_fn fn,
+- void *priv);
+-bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
+-int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
+- xfs_rtblock_t start, xfs_extlen_t len,
+- bool *is_free);
+ int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
+ #else
+ # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (-ENOSYS)
+-# define xfs_rtfree_extent(t,b,l) (-ENOSYS)
+-# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
+ # define xfs_rtpick_extent(m,t,l,rb) (-ENOSYS)
+ # define xfs_growfs_rt(mp,in) (-ENOSYS)
+-# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS)
+-# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
+-# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS)
+-# define xfs_verify_rtbno(m, r) (false)
+-# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
+ # define xfs_rtalloc_reinit_frextents(m) (0)
+ static inline int /* error */
+ xfs_rtmount_init(
--- /dev/null
+From stable+bounces-124362-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:17 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:25 -0700
+Subject: xfs: pass per-ag references to xfs_free_extent
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Dave Chinner <dchinner@redhat.com>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-6-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit b2ccab3199aa7cea9154d80ea2585312c5f6eba0 ]
+
+Pass a reference to the per-AG structure to xfs_free_extent. Most
+callers already have one, so we can eliminate unnecessary lookups. The
+one exception to this is the EFI code, which the next patch will fix.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c | 6 ++----
+ fs/xfs/libxfs/xfs_alloc.c | 15 +++++----------
+ fs/xfs/libxfs/xfs_alloc.h | 8 +++++---
+ fs/xfs/libxfs/xfs_ialloc_btree.c | 7 +++++--
+ fs/xfs/libxfs/xfs_refcount_btree.c | 5 +++--
+ fs/xfs/scrub/repair.c | 3 ++-
+ fs/xfs/xfs_extfree_item.c | 8 ++++++--
+ 7 files changed, 28 insertions(+), 24 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -981,10 +981,8 @@ xfs_ag_extend_space(
+ if (error)
+ return error;
+
+- error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno,
+- be32_to_cpu(agf->agf_length) - len),
+- len, &XFS_RMAP_OINFO_SKIP_UPDATE,
+- XFS_AG_RESV_NONE);
++ error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len,
++ len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE);
+ if (error)
+ return error;
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -3447,7 +3447,8 @@ xfs_free_extent_fix_freelist(
+ int
+ __xfs_free_extent(
+ struct xfs_trans *tp,
+- xfs_fsblock_t bno,
++ struct xfs_perag *pag,
++ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type,
+@@ -3455,12 +3456,9 @@ __xfs_free_extent(
+ {
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *agbp;
+- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno);
+- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno);
+ struct xfs_agf *agf;
+ int error;
+ unsigned int busy_flags = 0;
+- struct xfs_perag *pag;
+
+ ASSERT(len != 0);
+ ASSERT(type != XFS_AG_RESV_AGFL);
+@@ -3469,10 +3467,9 @@ __xfs_free_extent(
+ XFS_ERRTAG_FREE_EXTENT))
+ return -EIO;
+
+- pag = xfs_perag_get(mp, agno);
+ error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
+ if (error)
+- goto err;
++ return error;
+ agf = agbp->b_addr;
+
+ if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
+@@ -3486,20 +3483,18 @@ __xfs_free_extent(
+ goto err_release;
+ }
+
+- error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
++ error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
++ type);
+ if (error)
+ goto err_release;
+
+ if (skip_discard)
+ busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
+ xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
+- xfs_perag_put(pag);
+ return 0;
+
+ err_release:
+ xfs_trans_brelse(tp, agbp);
+-err:
+- xfs_perag_put(pag);
+ return error;
+ }
+
+--- a/fs/xfs/libxfs/xfs_alloc.h
++++ b/fs/xfs/libxfs/xfs_alloc.h
+@@ -130,7 +130,8 @@ xfs_alloc_vextent(
+ int /* error */
+ __xfs_free_extent(
+ struct xfs_trans *tp, /* transaction pointer */
+- xfs_fsblock_t bno, /* starting block number of extent */
++ struct xfs_perag *pag,
++ xfs_agblock_t agbno,
+ xfs_extlen_t len, /* length of extent */
+ const struct xfs_owner_info *oinfo, /* extent owner */
+ enum xfs_ag_resv_type type, /* block reservation type */
+@@ -139,12 +140,13 @@ __xfs_free_extent(
+ static inline int
+ xfs_free_extent(
+ struct xfs_trans *tp,
+- xfs_fsblock_t bno,
++ struct xfs_perag *pag,
++ xfs_agblock_t agbno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ enum xfs_ag_resv_type type)
+ {
+- return __xfs_free_extent(tp, bno, len, oinfo, type, false);
++ return __xfs_free_extent(tp, pag, agbno, len, oinfo, type, false);
+ }
+
+ int /* error */
+--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
++++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
+@@ -156,9 +156,12 @@ __xfs_inobt_free_block(
+ struct xfs_buf *bp,
+ enum xfs_ag_resv_type resv)
+ {
++ xfs_fsblock_t fsbno;
++
+ xfs_inobt_mod_blockcount(cur, -1);
+- return xfs_free_extent(cur->bc_tp,
+- XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1,
++ fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
++ return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
++ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
+ &XFS_RMAP_OINFO_INOBT, resv);
+ }
+
+--- a/fs/xfs/libxfs/xfs_refcount_btree.c
++++ b/fs/xfs/libxfs/xfs_refcount_btree.c
+@@ -112,8 +112,9 @@ xfs_refcountbt_free_block(
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
+ be32_add_cpu(&agf->agf_refcount_blocks, -1);
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
+- error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC,
+- XFS_AG_RESV_METADATA);
++ error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
++ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
++ &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
+ if (error)
+ return error;
+
+--- a/fs/xfs/scrub/repair.c
++++ b/fs/xfs/scrub/repair.c
+@@ -582,7 +582,8 @@ xrep_reap_block(
+ else if (resv == XFS_AG_RESV_AGFL)
+ error = xrep_put_freelist(sc, agbno);
+ else
+- error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
++ error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, oinfo,
++ resv);
+ if (agf_bp != sc->sa.agf_bp)
+ xfs_trans_brelse(sc->tp, agf_bp);
+ if (error)
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -350,6 +350,7 @@ xfs_trans_free_extent(
+ struct xfs_owner_info oinfo = { };
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent *extp;
++ struct xfs_perag *pag;
+ uint next_extent;
+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp,
+ xefi->xefi_startblock);
+@@ -366,9 +367,12 @@ xfs_trans_free_extent(
+ trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno,
+ xefi->xefi_blockcount);
+
+- error = __xfs_free_extent(tp, xefi->xefi_startblock,
+- xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
++ pag = xfs_perag_get(mp, agno);
++ error = __xfs_free_extent(tp, pag, agbno, xefi->xefi_blockcount,
++ &oinfo, XFS_AG_RESV_NONE,
+ xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
++ xfs_perag_put(pag);
++
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
--- /dev/null
+From stable+bounces-124358-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:13 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:21 -0700
+Subject: xfs: pass refcount intent directly through the log intent code
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-2-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 0b11553ec54a6d88907e60d0595dbcef98539747 ]
+
+Pass the incore refcount intent through the CUI logging code instead of
+repeatedly boxing and unboxing parameters.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_refcount.c | 96 +++++++++++++++++++------------------------
+ fs/xfs/libxfs/xfs_refcount.h | 4 -
+ fs/xfs/xfs_refcount_item.c | 62 +++++++++++----------------
+ fs/xfs/xfs_trace.h | 15 +-----
+ 4 files changed, 74 insertions(+), 103 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_refcount.c
++++ b/fs/xfs/libxfs/xfs_refcount.c
+@@ -1213,37 +1213,33 @@ out_error:
+ STATIC int
+ xfs_refcount_adjust(
+ struct xfs_btree_cur *cur,
+- xfs_agblock_t agbno,
+- xfs_extlen_t aglen,
+- xfs_agblock_t *new_agbno,
+- xfs_extlen_t *new_aglen,
++ xfs_agblock_t *agbno,
++ xfs_extlen_t *aglen,
+ enum xfs_refc_adjust_op adj)
+ {
+ bool shape_changed;
+ int shape_changes = 0;
+ int error;
+
+- *new_agbno = agbno;
+- *new_aglen = aglen;
+ if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
+- trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+- agbno, aglen);
++ trace_xfs_refcount_increase(cur->bc_mp,
++ cur->bc_ag.pag->pag_agno, *agbno, *aglen);
+ else
+- trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+- agbno, aglen);
++ trace_xfs_refcount_decrease(cur->bc_mp,
++ cur->bc_ag.pag->pag_agno, *agbno, *aglen);
+
+ /*
+ * Ensure that no rcextents cross the boundary of the adjustment range.
+ */
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
+- agbno, &shape_changed);
++ *agbno, &shape_changed);
+ if (error)
+ goto out_error;
+ if (shape_changed)
+ shape_changes++;
+
+ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
+- agbno + aglen, &shape_changed);
++ *agbno + *aglen, &shape_changed);
+ if (error)
+ goto out_error;
+ if (shape_changed)
+@@ -1253,7 +1249,7 @@ xfs_refcount_adjust(
+ * Try to merge with the left or right extents of the range.
+ */
+ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
+- new_agbno, new_aglen, adj, &shape_changed);
++ agbno, aglen, adj, &shape_changed);
+ if (error)
+ goto out_error;
+ if (shape_changed)
+@@ -1262,7 +1258,7 @@ xfs_refcount_adjust(
+ cur->bc_ag.refc.shape_changes++;
+
+ /* Now that we've taken care of the ends, adjust the middle extents */
+- error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj);
++ error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj);
+ if (error)
+ goto out_error;
+
+@@ -1298,21 +1294,20 @@ xfs_refcount_finish_one_cleanup(
+ static inline int
+ xfs_refcount_continue_op(
+ struct xfs_btree_cur *cur,
+- xfs_fsblock_t startblock,
+- xfs_agblock_t new_agbno,
+- xfs_extlen_t new_len,
+- xfs_fsblock_t *new_fsbno)
++ struct xfs_refcount_intent *ri,
++ xfs_agblock_t new_agbno)
+ {
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_perag *pag = cur->bc_ag.pag;
+
+- if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len)))
++ if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
++ ri->ri_blockcount)))
+ return -EFSCORRUPTED;
+
+- *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
++ ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+
+- ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len));
+- ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno));
++ ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount));
++ ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
+
+ return 0;
+ }
+@@ -1327,11 +1322,7 @@ xfs_refcount_continue_op(
+ int
+ xfs_refcount_finish_one(
+ struct xfs_trans *tp,
+- enum xfs_refcount_intent_type type,
+- xfs_fsblock_t startblock,
+- xfs_extlen_t blockcount,
+- xfs_fsblock_t *new_fsb,
+- xfs_extlen_t *new_len,
++ struct xfs_refcount_intent *ri,
+ struct xfs_btree_cur **pcur)
+ {
+ struct xfs_mount *mp = tp->t_mountp;
+@@ -1339,17 +1330,16 @@ xfs_refcount_finish_one(
+ struct xfs_buf *agbp = NULL;
+ int error = 0;
+ xfs_agblock_t bno;
+- xfs_agblock_t new_agbno;
+ unsigned long nr_ops = 0;
+ int shape_changes = 0;
+ struct xfs_perag *pag;
+
+- pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock));
+- bno = XFS_FSB_TO_AGBNO(mp, startblock);
++ pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
++ bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
+
+- trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock),
+- type, XFS_FSB_TO_AGBNO(mp, startblock),
+- blockcount);
++ trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
++ ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
++ ri->ri_blockcount);
+
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) {
+ error = -EIO;
+@@ -1380,42 +1370,42 @@ xfs_refcount_finish_one(
+ }
+ *pcur = rcur;
+
+- switch (type) {
++ switch (ri->ri_type) {
+ case XFS_REFCOUNT_INCREASE:
+- error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
+- new_len, XFS_REFCOUNT_ADJUST_INCREASE);
++ error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
++ XFS_REFCOUNT_ADJUST_INCREASE);
+ if (error)
+ goto out_drop;
+- if (*new_len > 0)
+- error = xfs_refcount_continue_op(rcur, startblock,
+- new_agbno, *new_len, new_fsb);
++ if (ri->ri_blockcount > 0)
++ error = xfs_refcount_continue_op(rcur, ri, bno);
+ break;
+ case XFS_REFCOUNT_DECREASE:
+- error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno,
+- new_len, XFS_REFCOUNT_ADJUST_DECREASE);
++ error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
++ XFS_REFCOUNT_ADJUST_DECREASE);
+ if (error)
+ goto out_drop;
+- if (*new_len > 0)
+- error = xfs_refcount_continue_op(rcur, startblock,
+- new_agbno, *new_len, new_fsb);
++ if (ri->ri_blockcount > 0)
++ error = xfs_refcount_continue_op(rcur, ri, bno);
+ break;
+ case XFS_REFCOUNT_ALLOC_COW:
+- *new_fsb = startblock + blockcount;
+- *new_len = 0;
+- error = __xfs_refcount_cow_alloc(rcur, bno, blockcount);
++ error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount);
++ if (error)
++ goto out_drop;
++ ri->ri_blockcount = 0;
+ break;
+ case XFS_REFCOUNT_FREE_COW:
+- *new_fsb = startblock + blockcount;
+- *new_len = 0;
+- error = __xfs_refcount_cow_free(rcur, bno, blockcount);
++ error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount);
++ if (error)
++ goto out_drop;
++ ri->ri_blockcount = 0;
+ break;
+ default:
+ ASSERT(0);
+ error = -EFSCORRUPTED;
+ }
+- if (!error && *new_len > 0)
+- trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type,
+- bno, blockcount, new_agbno, *new_len);
++ if (!error && ri->ri_blockcount > 0)
++ trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno,
++ ri->ri_type, bno, ri->ri_blockcount);
+ out_drop:
+ xfs_perag_put(pag);
+ return error;
+--- a/fs/xfs/libxfs/xfs_refcount.h
++++ b/fs/xfs/libxfs/xfs_refcount.h
+@@ -75,9 +75,7 @@ void xfs_refcount_decrease_extent(struct
+ extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp,
+ struct xfs_btree_cur *rcur, int error);
+ extern int xfs_refcount_finish_one(struct xfs_trans *tp,
+- enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
+- xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
+- xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
++ struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur);
+
+ extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur,
+ xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
+--- a/fs/xfs/xfs_refcount_item.c
++++ b/fs/xfs/xfs_refcount_item.c
+@@ -252,17 +252,12 @@ static int
+ xfs_trans_log_finish_refcount_update(
+ struct xfs_trans *tp,
+ struct xfs_cud_log_item *cudp,
+- enum xfs_refcount_intent_type type,
+- xfs_fsblock_t startblock,
+- xfs_extlen_t blockcount,
+- xfs_fsblock_t *new_fsb,
+- xfs_extlen_t *new_len,
++ struct xfs_refcount_intent *ri,
+ struct xfs_btree_cur **pcur)
+ {
+ int error;
+
+- error = xfs_refcount_finish_one(tp, type, startblock,
+- blockcount, new_fsb, new_len, pcur);
++ error = xfs_refcount_finish_one(tp, ri, pcur);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+@@ -378,25 +373,20 @@ xfs_refcount_update_finish_item(
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+ {
+- struct xfs_refcount_intent *refc;
+- xfs_fsblock_t new_fsb;
+- xfs_extlen_t new_aglen;
++ struct xfs_refcount_intent *ri;
+ int error;
+
+- refc = container_of(item, struct xfs_refcount_intent, ri_list);
+- error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done),
+- refc->ri_type, refc->ri_startblock, refc->ri_blockcount,
+- &new_fsb, &new_aglen, state);
++ ri = container_of(item, struct xfs_refcount_intent, ri_list);
++ error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), ri,
++ state);
+
+ /* Did we run out of reservation? Requeue what we didn't finish. */
+- if (!error && new_aglen > 0) {
+- ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
+- refc->ri_type == XFS_REFCOUNT_DECREASE);
+- refc->ri_startblock = new_fsb;
+- refc->ri_blockcount = new_aglen;
++ if (!error && ri->ri_blockcount > 0) {
++ ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE ||
++ ri->ri_type == XFS_REFCOUNT_DECREASE);
+ return -EAGAIN;
+ }
+- kmem_cache_free(xfs_refcount_intent_cache, refc);
++ kmem_cache_free(xfs_refcount_intent_cache, ri);
+ return error;
+ }
+
+@@ -463,18 +453,13 @@ xfs_cui_item_recover(
+ struct xfs_log_item *lip,
+ struct list_head *capture_list)
+ {
+- struct xfs_bmbt_irec irec;
+ struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
+- struct xfs_phys_extent *refc;
+ struct xfs_cud_log_item *cudp;
+ struct xfs_trans *tp;
+ struct xfs_btree_cur *rcur = NULL;
+ struct xfs_mount *mp = lip->li_log->l_mp;
+- xfs_fsblock_t new_fsb;
+- xfs_extlen_t new_len;
+ unsigned int refc_type;
+ bool requeue_only = false;
+- enum xfs_refcount_intent_type type;
+ int i;
+ int error = 0;
+
+@@ -513,6 +498,9 @@ xfs_cui_item_recover(
+ cudp = xfs_trans_get_cud(tp, cuip);
+
+ for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
++ struct xfs_refcount_intent fake = { };
++ struct xfs_phys_extent *refc;
++
+ refc = &cuip->cui_format.cui_extents[i];
+ refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
+ switch (refc_type) {
+@@ -520,7 +508,7 @@ xfs_cui_item_recover(
+ case XFS_REFCOUNT_DECREASE:
+ case XFS_REFCOUNT_ALLOC_COW:
+ case XFS_REFCOUNT_FREE_COW:
+- type = refc_type;
++ fake.ri_type = refc_type;
+ break;
+ default:
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+@@ -529,13 +517,12 @@ xfs_cui_item_recover(
+ error = -EFSCORRUPTED;
+ goto abort_error;
+ }
+- if (requeue_only) {
+- new_fsb = refc->pe_startblock;
+- new_len = refc->pe_len;
+- } else
++
++ fake.ri_startblock = refc->pe_startblock;
++ fake.ri_blockcount = refc->pe_len;
++ if (!requeue_only)
+ error = xfs_trans_log_finish_refcount_update(tp, cudp,
+- type, refc->pe_startblock, refc->pe_len,
+- &new_fsb, &new_len, &rcur);
++ &fake, &rcur);
+ if (error == -EFSCORRUPTED)
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &cuip->cui_format,
+@@ -544,10 +531,13 @@ xfs_cui_item_recover(
+ goto abort_error;
+
+ /* Requeue what we didn't finish. */
+- if (new_len > 0) {
+- irec.br_startblock = new_fsb;
+- irec.br_blockcount = new_len;
+- switch (type) {
++ if (fake.ri_blockcount > 0) {
++ struct xfs_bmbt_irec irec = {
++ .br_startblock = fake.ri_startblock,
++ .br_blockcount = fake.ri_blockcount,
++ };
++
++ switch (fake.ri_type) {
+ case XFS_REFCOUNT_INCREASE:
+ xfs_refcount_increase_extent(tp, &irec);
+ break;
+--- a/fs/xfs/xfs_trace.h
++++ b/fs/xfs/xfs_trace.h
+@@ -3208,17 +3208,14 @@ DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refco
+
+ TRACE_EVENT(xfs_refcount_finish_one_leftover,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+- int type, xfs_agblock_t agbno, xfs_extlen_t len,
+- xfs_agblock_t new_agbno, xfs_extlen_t new_len),
+- TP_ARGS(mp, agno, type, agbno, len, new_agbno, new_len),
++ int type, xfs_agblock_t agbno, xfs_extlen_t len),
++ TP_ARGS(mp, agno, type, agbno, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(int, type)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+- __field(xfs_agblock_t, new_agbno)
+- __field(xfs_extlen_t, new_len)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+@@ -3226,17 +3223,13 @@ TRACE_EVENT(xfs_refcount_finish_one_left
+ __entry->type = type;
+ __entry->agbno = agbno;
+ __entry->len = len;
+- __entry->new_agbno = new_agbno;
+- __entry->new_len = new_len;
+ ),
+- TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x new_agbno 0x%x new_fsbcount 0x%x",
++ TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->type,
+ __entry->agno,
+ __entry->agbno,
+- __entry->len,
+- __entry->new_agbno,
+- __entry->new_len)
++ __entry->len)
+ );
+
+ /* simple inode-based error/%ip tracepoint class */
--- /dev/null
+From stable+bounces-124361-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:16 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:24 -0700
+Subject: xfs: pass the xfs_bmbt_irec directly through the log intent code
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-5-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit ddccb81b26ec021ae1f3366aa996cc4c68dd75ce ]
+
+Instead of repeatedly boxing and unboxing the incore extent mapping
+structure as it passes through the BUI code, pass the pointer directly
+through.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 32 ++++++++----------
+ fs/xfs/libxfs/xfs_bmap.h | 5 --
+ fs/xfs/xfs_bmap_item.c | 81 +++++++++++++++++------------------------------
+ 3 files changed, 46 insertions(+), 72 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -6119,39 +6119,37 @@ xfs_bmap_unmap_extent(
+ int
+ xfs_bmap_finish_one(
+ struct xfs_trans *tp,
+- struct xfs_inode *ip,
+- enum xfs_bmap_intent_type type,
+- int whichfork,
+- xfs_fileoff_t startoff,
+- xfs_fsblock_t startblock,
+- xfs_filblks_t *blockcount,
+- xfs_exntst_t state)
++ struct xfs_bmap_intent *bi)
+ {
++ struct xfs_bmbt_irec *bmap = &bi->bi_bmap;
+ int error = 0;
+
+ ASSERT(tp->t_firstblock == NULLFSBLOCK);
+
+ trace_xfs_bmap_deferred(tp->t_mountp,
+- XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
+- XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
+- ip->i_ino, whichfork, startoff, *blockcount, state);
++ XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
++ bi->bi_type,
++ XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
++ bi->bi_owner->i_ino, bi->bi_whichfork,
++ bmap->br_startoff, bmap->br_blockcount,
++ bmap->br_state);
+
+- if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK))
++ if (WARN_ON_ONCE(bi->bi_whichfork != XFS_DATA_FORK))
+ return -EFSCORRUPTED;
+
+ if (XFS_TEST_ERROR(false, tp->t_mountp,
+ XFS_ERRTAG_BMAP_FINISH_ONE))
+ return -EIO;
+
+- switch (type) {
++ switch (bi->bi_type) {
+ case XFS_BMAP_MAP:
+- error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
+- startblock, 0);
+- *blockcount = 0;
++ error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff,
++ bmap->br_blockcount, bmap->br_startblock, 0);
++ bmap->br_blockcount = 0;
+ break;
+ case XFS_BMAP_UNMAP:
+- error = __xfs_bunmapi(tp, ip, startoff, blockcount,
+- XFS_BMAPI_REMAP, 1);
++ error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff,
++ &bmap->br_blockcount, XFS_BMAPI_REMAP, 1);
+ break;
+ default:
+ ASSERT(0);
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -236,10 +236,7 @@ struct xfs_bmap_intent {
+ struct xfs_bmbt_irec bi_bmap;
+ };
+
+-int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip,
+- enum xfs_bmap_intent_type type, int whichfork,
+- xfs_fileoff_t startoff, xfs_fsblock_t startblock,
+- xfs_filblks_t *blockcount, xfs_exntst_t state);
++int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi);
+ void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap);
+ void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+--- a/fs/xfs/xfs_bmap_item.c
++++ b/fs/xfs/xfs_bmap_item.c
+@@ -246,18 +246,11 @@ static int
+ xfs_trans_log_finish_bmap_update(
+ struct xfs_trans *tp,
+ struct xfs_bud_log_item *budp,
+- enum xfs_bmap_intent_type type,
+- struct xfs_inode *ip,
+- int whichfork,
+- xfs_fileoff_t startoff,
+- xfs_fsblock_t startblock,
+- xfs_filblks_t *blockcount,
+- xfs_exntst_t state)
++ struct xfs_bmap_intent *bi)
+ {
+ int error;
+
+- error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
+- startblock, blockcount, state);
++ error = xfs_bmap_finish_one(tp, bi);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+@@ -378,25 +371,17 @@ xfs_bmap_update_finish_item(
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+ {
+- struct xfs_bmap_intent *bmap;
+- xfs_filblks_t count;
++ struct xfs_bmap_intent *bi;
+ int error;
+
+- bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+- count = bmap->bi_bmap.br_blockcount;
+- error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done),
+- bmap->bi_type,
+- bmap->bi_owner, bmap->bi_whichfork,
+- bmap->bi_bmap.br_startoff,
+- bmap->bi_bmap.br_startblock,
+- &count,
+- bmap->bi_bmap.br_state);
+- if (!error && count > 0) {
+- ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+- bmap->bi_bmap.br_blockcount = count;
++ bi = container_of(item, struct xfs_bmap_intent, bi_list);
++
++ error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), bi);
++ if (!error && bi->bi_bmap.br_blockcount > 0) {
++ ASSERT(bi->bi_type == XFS_BMAP_UNMAP);
+ return -EAGAIN;
+ }
+- kmem_cache_free(xfs_bmap_intent_cache, bmap);
++ kmem_cache_free(xfs_bmap_intent_cache, bi);
+ return error;
+ }
+
+@@ -471,17 +456,13 @@ xfs_bui_item_recover(
+ struct xfs_log_item *lip,
+ struct list_head *capture_list)
+ {
+- struct xfs_bmbt_irec irec;
++ struct xfs_bmap_intent fake = { };
+ struct xfs_bui_log_item *buip = BUI_ITEM(lip);
+ struct xfs_trans *tp;
+ struct xfs_inode *ip = NULL;
+ struct xfs_mount *mp = lip->li_log->l_mp;
+- struct xfs_map_extent *bmap;
++ struct xfs_map_extent *map;
+ struct xfs_bud_log_item *budp;
+- xfs_filblks_t count;
+- xfs_exntst_t state;
+- unsigned int bui_type;
+- int whichfork;
+ int iext_delta;
+ int error = 0;
+
+@@ -491,14 +472,12 @@ xfs_bui_item_recover(
+ return -EFSCORRUPTED;
+ }
+
+- bmap = &buip->bui_format.bui_extents[0];
+- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
+- XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
+- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
++ map = &buip->bui_format.bui_extents[0];
++ fake.bi_whichfork = (map->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
++ fake.bi_type = map->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
+
+- error = xlog_recover_iget(mp, bmap->me_owner, &ip);
++ error = xlog_recover_iget(mp, map->me_owner, &ip);
+ if (error)
+ return error;
+
+@@ -512,34 +491,34 @@ xfs_bui_item_recover(
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+
+- if (bui_type == XFS_BMAP_MAP)
++ if (fake.bi_type == XFS_BMAP_MAP)
+ iext_delta = XFS_IEXT_ADD_NOSPLIT_CNT;
+ else
+ iext_delta = XFS_IEXT_PUNCH_HOLE_CNT;
+
+- error = xfs_iext_count_may_overflow(ip, whichfork, iext_delta);
++ error = xfs_iext_count_may_overflow(ip, fake.bi_whichfork, iext_delta);
+ if (error == -EFBIG)
+ error = xfs_iext_count_upgrade(tp, ip, iext_delta);
+ if (error)
+ goto err_cancel;
+
+- count = bmap->me_len;
+- error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip,
+- whichfork, bmap->me_startoff, bmap->me_startblock,
+- &count, state);
++ fake.bi_owner = ip;
++ fake.bi_bmap.br_startblock = map->me_startblock;
++ fake.bi_bmap.br_startoff = map->me_startoff;
++ fake.bi_bmap.br_blockcount = map->me_len;
++ fake.bi_bmap.br_state = (map->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
++ XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
++
++ error = xfs_trans_log_finish_bmap_update(tp, budp, &fake);
+ if (error == -EFSCORRUPTED)
+- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bmap,
+- sizeof(*bmap));
++ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, map,
++ sizeof(*map));
+ if (error)
+ goto err_cancel;
+
+- if (count > 0) {
+- ASSERT(bui_type == XFS_BMAP_UNMAP);
+- irec.br_startblock = bmap->me_startblock;
+- irec.br_blockcount = count;
+- irec.br_startoff = bmap->me_startoff;
+- irec.br_state = state;
+- xfs_bmap_unmap_extent(tp, ip, &irec);
++ if (fake.bi_bmap.br_blockcount > 0) {
++ ASSERT(fake.bi_type == XFS_BMAP_UNMAP);
++ xfs_bmap_unmap_extent(tp, ip, &fake.bi_bmap);
+ }
+
+ /*
--- /dev/null
+From stable+bounces-124372-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:31 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:35 -0700
+Subject: xfs: pass the xfs_defer_pending object to iop_recover
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-16-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit a050acdfa8003a44eae4558fddafc7afb1aef458 ]
+
+Now that log intent item recovery recreates the xfs_defer_pending state,
+we should pass that into the ->iop_recover routines so that the intent
+item can finish the recreation work.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_attr_item.c | 3 ++-
+ fs/xfs/xfs_bmap_item.c | 3 ++-
+ fs/xfs/xfs_extfree_item.c | 3 ++-
+ fs/xfs/xfs_log_recover.c | 2 +-
+ fs/xfs/xfs_refcount_item.c | 3 ++-
+ fs/xfs/xfs_rmap_item.c | 3 ++-
+ fs/xfs/xfs_trans.h | 4 +++-
+ 7 files changed, 14 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/xfs_attr_item.c
++++ b/fs/xfs/xfs_attr_item.c
+@@ -545,9 +545,10 @@ xfs_attri_validate(
+ */
+ STATIC int
+ xfs_attri_item_recover(
+- struct xfs_log_item *lip,
++ struct xfs_defer_pending *dfp,
+ struct list_head *capture_list)
+ {
++ struct xfs_log_item *lip = dfp->dfp_intent;
+ struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
+ struct xfs_attr_intent *attr;
+ struct xfs_mount *mp = lip->li_log->l_mp;
+--- a/fs/xfs/xfs_bmap_item.c
++++ b/fs/xfs/xfs_bmap_item.c
+@@ -453,11 +453,12 @@ xfs_bui_validate(
+ */
+ STATIC int
+ xfs_bui_item_recover(
+- struct xfs_log_item *lip,
++ struct xfs_defer_pending *dfp,
+ struct list_head *capture_list)
+ {
+ struct xfs_bmap_intent fake = { };
+ struct xfs_trans_res resv;
++ struct xfs_log_item *lip = dfp->dfp_intent;
+ struct xfs_bui_log_item *buip = BUI_ITEM(lip);
+ struct xfs_trans *tp;
+ struct xfs_inode *ip = NULL;
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -595,10 +595,11 @@ xfs_efi_validate_ext(
+ */
+ STATIC int
+ xfs_efi_item_recover(
+- struct xfs_log_item *lip,
++ struct xfs_defer_pending *dfp,
+ struct list_head *capture_list)
+ {
+ struct xfs_trans_res resv;
++ struct xfs_log_item *lip = dfp->dfp_intent;
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+ struct xfs_mount *mp = lip->li_log->l_mp;
+ struct xfs_efd_log_item *efdp;
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2586,7 +2586,7 @@ xlog_recover_process_intents(
+ * The recovery function can free the log item, so we must not
+ * access lip after it returns.
+ */
+- error = ops->iop_recover(lip, &capture_list);
++ error = ops->iop_recover(dfp, &capture_list);
+ if (error) {
+ trace_xlog_intent_recovery_failed(log->l_mp, error,
+ ops->iop_recover);
+--- a/fs/xfs/xfs_refcount_item.c
++++ b/fs/xfs/xfs_refcount_item.c
+@@ -450,10 +450,11 @@ xfs_cui_validate_phys(
+ */
+ STATIC int
+ xfs_cui_item_recover(
+- struct xfs_log_item *lip,
++ struct xfs_defer_pending *dfp,
+ struct list_head *capture_list)
+ {
+ struct xfs_trans_res resv;
++ struct xfs_log_item *lip = dfp->dfp_intent;
+ struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
+ struct xfs_cud_log_item *cudp;
+ struct xfs_trans *tp;
+--- a/fs/xfs/xfs_rmap_item.c
++++ b/fs/xfs/xfs_rmap_item.c
+@@ -489,10 +489,11 @@ xfs_rui_validate_map(
+ */
+ STATIC int
+ xfs_rui_item_recover(
+- struct xfs_log_item *lip,
++ struct xfs_defer_pending *dfp,
+ struct list_head *capture_list)
+ {
+ struct xfs_trans_res resv;
++ struct xfs_log_item *lip = dfp->dfp_intent;
+ struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
+ struct xfs_map_extent *rmap;
+ struct xfs_rud_log_item *rudp;
+--- a/fs/xfs/xfs_trans.h
++++ b/fs/xfs/xfs_trans.h
+@@ -66,6 +66,8 @@ struct xfs_log_item {
+ { (1u << XFS_LI_DIRTY), "DIRTY" }, \
+ { (1u << XFS_LI_WHITEOUT), "WHITEOUT" }
+
++struct xfs_defer_pending;
++
+ struct xfs_item_ops {
+ unsigned flags;
+ void (*iop_size)(struct xfs_log_item *, int *, int *);
+@@ -78,7 +80,7 @@ struct xfs_item_ops {
+ xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
+ uint (*iop_push)(struct xfs_log_item *, struct list_head *);
+ void (*iop_release)(struct xfs_log_item *);
+- int (*iop_recover)(struct xfs_log_item *lip,
++ int (*iop_recover)(struct xfs_defer_pending *dfp,
+ struct list_head *capture_list);
+ bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
+ struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent,
--- /dev/null
+From stable+bounces-124359-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:15 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:22 -0700
+Subject: xfs: pass xfs_extent_free_item directly through the log intent code
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-3-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 72ba455599ad13d08c29dafa22a32360e07b1961 ]
+
+Pass the incore xfs_extent_free_item through the EFI logging code
+instead of repeatedly boxing and unboxing parameters.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_extfree_item.c | 55 +++++++++++++++++++++++++---------------------
+ 1 file changed, 30 insertions(+), 25 deletions(-)
+
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -345,23 +345,30 @@ static int
+ xfs_trans_free_extent(
+ struct xfs_trans *tp,
+ struct xfs_efd_log_item *efdp,
+- xfs_fsblock_t start_block,
+- xfs_extlen_t ext_len,
+- const struct xfs_owner_info *oinfo,
+- bool skip_discard)
++ struct xfs_extent_free_item *free)
+ {
++ struct xfs_owner_info oinfo = { };
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent *extp;
+ uint next_extent;
+- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
++ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp,
++ free->xefi_startblock);
+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
+- start_block);
++ free->xefi_startblock);
+ int error;
+
+- trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
++ oinfo.oi_owner = free->xefi_owner;
++ if (free->xefi_flags & XFS_EFI_ATTR_FORK)
++ oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
++ if (free->xefi_flags & XFS_EFI_BMBT_BLOCK)
++ oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
++
++ trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno,
++ free->xefi_blockcount);
+
+- error = __xfs_free_extent(tp, start_block, ext_len,
+- oinfo, XFS_AG_RESV_NONE, skip_discard);
++ error = __xfs_free_extent(tp, free->xefi_startblock,
++ free->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
++ free->xefi_flags & XFS_EFI_SKIP_DISCARD);
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+@@ -375,8 +382,8 @@ xfs_trans_free_extent(
+ next_extent = efdp->efd_next_extent;
+ ASSERT(next_extent < efdp->efd_format.efd_nextents);
+ extp = &(efdp->efd_format.efd_extents[next_extent]);
+- extp->ext_start = start_block;
+- extp->ext_len = ext_len;
++ extp->ext_start = free->xefi_startblock;
++ extp->ext_len = free->xefi_blockcount;
+ efdp->efd_next_extent++;
+
+ return error;
+@@ -463,20 +470,12 @@ xfs_extent_free_finish_item(
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+ {
+- struct xfs_owner_info oinfo = { };
+ struct xfs_extent_free_item *free;
+ int error;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+- oinfo.oi_owner = free->xefi_owner;
+- if (free->xefi_flags & XFS_EFI_ATTR_FORK)
+- oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
+- if (free->xefi_flags & XFS_EFI_BMBT_BLOCK)
+- oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
+- error = xfs_trans_free_extent(tp, EFD_ITEM(done),
+- free->xefi_startblock,
+- free->xefi_blockcount,
+- &oinfo, free->xefi_flags & XFS_EFI_SKIP_DISCARD);
++
++ error = xfs_trans_free_extent(tp, EFD_ITEM(done), free);
+ kmem_cache_free(xfs_extfree_item_cache, free);
+ return error;
+ }
+@@ -599,7 +598,6 @@ xfs_efi_item_recover(
+ struct xfs_mount *mp = lip->li_log->l_mp;
+ struct xfs_efd_log_item *efdp;
+ struct xfs_trans *tp;
+- struct xfs_extent *extp;
+ int i;
+ int error = 0;
+
+@@ -624,10 +622,17 @@ xfs_efi_item_recover(
+ efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
+
+ for (i = 0; i < efip->efi_format.efi_nextents; i++) {
++ struct xfs_extent_free_item fake = {
++ .xefi_owner = XFS_RMAP_OWN_UNKNOWN,
++ };
++ struct xfs_extent *extp;
++
+ extp = &efip->efi_format.efi_extents[i];
+- error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
+- extp->ext_len,
+- &XFS_RMAP_OINFO_ANY_OWNER, false);
++
++ fake.xefi_startblock = extp->ext_start;
++ fake.xefi_blockcount = extp->ext_len;
++
++ error = xfs_trans_free_extent(tp, efdp, &fake);
+ if (error == -EFSCORRUPTED)
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ extp, sizeof(*extp));
--- /dev/null
+From stable+bounces-124378-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:38 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:41 -0700
+Subject: xfs: recompute growfsrtfree transaction reservation while growing rt volume
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-22-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 578bd4ce7100ae34f98c6b0147fe75cfa0dadbac ]
+
+While playing with growfs to create a 20TB realtime section on a
+filesystem that didn't previously have an rt section, I noticed that
+growfs would occasionally shut down the log due to a transaction
+reservation overflow.
+
+xfs_calc_growrtfree_reservation uses the current size of the realtime
+summary file (m_rsumsize) to compute the transaction reservation for a
+growrtfree transaction. The reservations are computed at mount time,
+which means that m_rsumsize is zero when growfs starts "freeing" the new
+realtime extents into the rt volume. As a result, the transaction is
+undersized and fails.
+
+Fix this by recomputing the transaction reservations every time we
+change m_rsumsize.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_rtalloc.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/xfs/xfs_rtalloc.c
++++ b/fs/xfs/xfs_rtalloc.c
+@@ -1070,6 +1070,9 @@ xfs_growfs_rt(
+ nsbp->sb_rbmblocks;
+ nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
+ nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
++ /* recompute growfsrt reservation from new rsumsize */
++ xfs_trans_resv_calc(nmp, &nmp->m_resv);
++
+ /*
+ * Start a transaction, get the log reservation.
+ */
+@@ -1153,6 +1156,8 @@ error_cancel:
+ */
+ mp->m_rsumlevels = nrsumlevels;
+ mp->m_rsumsize = nrsumsize;
++ /* recompute growfsrt reservation from new rsumsize */
++ xfs_trans_resv_calc(mp, &mp->m_resv);
+
+ error = xfs_trans_commit(tp);
+ if (error)
--- /dev/null
+From stable+bounces-124386-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:50 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:49 -0700
+Subject: xfs: remove conditional building of rt geometry validator functions
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Chandan Babu R <chandanbabu@kernel.org>, Catherine Hoang <catherine.hoang@oracle.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-30-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 881f78f472556ed05588172d5b5676b48dc48240 ]
+
+[ 6.1: used 6.6 backport to minimize conflicts ]
+
+[backport: resolve merge conflicts due to refactoring rtbitmap/summary
+macros and accessors]
+
+I mistakenly turned off CONFIG_XFS_RT in the Kconfig file for arm64
+variant of the djwong-wtf git branch. Unfortunately, it took me a good
+hour to figure out that RT wasn't built because this is what got printed
+to dmesg:
+
+XFS (sda2): realtime geometry sanity check failed
+XFS (sda2): Metadata corruption detected at xfs_sb_read_verify+0x170/0x190 [xfs], xfs_sb block 0x0
+
+Whereas I would have expected:
+
+XFS (sda2): Not built with CONFIG_XFS_RT
+XFS (sda2): RT mount failed
+
+The root cause of these problems is the conditional compilation of the
+new functions xfs_validate_rtextents and xfs_compute_rextslog that I
+introduced in the two commits listed below. The !RT versions of these
+functions return false and 0, respectively, which causes primary
+superblock validation to fail, which explains the first message.
+
+Move the two functions to other parts of libxfs that are not
+conditionally defined by CONFIG_XFS_RT and remove the broken stubs so
+that validation works again.
+
+Fixes: e14293803f4e ("xfs: don't allow overly small or large realtime volumes")
+Fixes: a6a38f309afc ("xfs: make rextslog computation consistent with mkfs")
+Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_rtbitmap.c | 14 --------------
+ fs/xfs/libxfs/xfs_rtbitmap.h | 16 ----------------
+ fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++
+ fs/xfs/libxfs/xfs_sb.h | 2 ++
+ fs/xfs/libxfs/xfs_types.h | 12 ++++++++++++
+ fs/xfs/scrub/rtbitmap.c | 1 +
+ 6 files changed, 29 insertions(+), 30 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_rtbitmap.c
++++ b/fs/xfs/libxfs/xfs_rtbitmap.c
+@@ -1131,17 +1131,3 @@ xfs_rtalloc_extent_is_free(
+ return 0;
+ }
+
+-/*
+- * Compute the maximum level number of the realtime summary file, as defined by
+- * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct
+- * use of rt volumes with more than 2^32 extents.
+- */
+-uint8_t
+-xfs_compute_rextslog(
+- xfs_rtbxlen_t rtextents)
+-{
+- if (!rtextents)
+- return 0;
+- return xfs_highbit64(rtextents);
+-}
+-
+--- a/fs/xfs/libxfs/xfs_rtbitmap.h
++++ b/fs/xfs/libxfs/xfs_rtbitmap.h
+@@ -71,20 +71,6 @@ xfs_rtfree_extent(
+ int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
+ xfs_filblks_t rtlen);
+
+-uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
+-
+-/* Do we support an rt volume having this number of rtextents? */
+-static inline bool
+-xfs_validate_rtextents(
+- xfs_rtbxlen_t rtextents)
+-{
+- /* No runt rt volumes */
+- if (rtextents == 0)
+- return false;
+-
+- return true;
+-}
+-
+ #else /* CONFIG_XFS_RT */
+ # define xfs_rtfree_extent(t,b,l) (-ENOSYS)
+ # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
+@@ -92,8 +78,6 @@ xfs_validate_rtextents(
+ # define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
+ # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS)
+ # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
+-# define xfs_compute_rextslog(rtx) (0)
+-# define xfs_validate_rtextents(rtx) (false)
+ #endif /* CONFIG_XFS_RT */
+
+ #endif /* __XFS_RTBITMAP_H__ */
+--- a/fs/xfs/libxfs/xfs_sb.c
++++ b/fs/xfs/libxfs/xfs_sb.c
+@@ -1367,3 +1367,17 @@ xfs_validate_stripe_geometry(
+ }
+ return true;
+ }
++
++/*
++ * Compute the maximum level number of the realtime summary file, as defined by
++ * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct
++ * use of rt volumes with more than 2^32 extents.
++ */
++uint8_t
++xfs_compute_rextslog(
++ xfs_rtbxlen_t rtextents)
++{
++ if (!rtextents)
++ return 0;
++ return xfs_highbit64(rtextents);
++}
+--- a/fs/xfs/libxfs/xfs_sb.h
++++ b/fs/xfs/libxfs/xfs_sb.h
+@@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct x
+ extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
+ __s64 sunit, __s64 swidth, int sectorsize, bool silent);
+
++uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
++
+ #endif /* __XFS_SB_H__ */
+--- a/fs/xfs/libxfs/xfs_types.h
++++ b/fs/xfs/libxfs/xfs_types.h
+@@ -228,4 +228,16 @@ bool xfs_verify_fileoff(struct xfs_mount
+ bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off,
+ xfs_fileoff_t len);
+
++/* Do we support an rt volume having this number of rtextents? */
++static inline bool
++xfs_validate_rtextents(
++ xfs_rtbxlen_t rtextents)
++{
++ /* No runt rt volumes */
++ if (rtextents == 0)
++ return false;
++
++ return true;
++}
++
+ #endif /* __XFS_TYPES_H__ */
+--- a/fs/xfs/scrub/rtbitmap.c
++++ b/fs/xfs/scrub/rtbitmap.c
+@@ -14,6 +14,7 @@
+ #include "xfs_rtbitmap.h"
+ #include "xfs_inode.h"
+ #include "xfs_bmap.h"
++#include "xfs_sb.h"
+ #include "scrub/scrub.h"
+ #include "scrub/common.h"
+
--- /dev/null
+From stable+bounces-124377-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:37 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:40 -0700
+Subject: xfs: remove unused fields from struct xbtree_ifakeroot
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Dave Chinner <dchinner@redhat.com>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-21-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 4c8ecd1cfdd01fb727121035014d9f654a30bdf2 ]
+
+Remove these unused fields since nobody uses them. They should have
+been removed years ago in a different cleanup series from Christoph
+Hellwig.
+
+Fixes: daf83964a3681 ("xfs: move the per-fork nextents fields into struct xfs_ifork")
+Fixes: f7e67b20ecbbc ("xfs: move the fork format fields into struct xfs_ifork")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree_staging.h | 6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_btree_staging.h
++++ b/fs/xfs/libxfs/xfs_btree_staging.h
+@@ -37,12 +37,6 @@ struct xbtree_ifakeroot {
+
+ /* Number of bytes available for this fork in the inode. */
+ unsigned int if_fork_size;
+-
+- /* Fork format. */
+- unsigned int if_format;
+-
+- /* Number of records. */
+- unsigned int if_extents;
+ };
+
+ /* Cursor interactions with fake roots for inode-rooted btrees. */
--- /dev/null
+From stable+bounces-124366-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:23 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:29 -0700
+Subject: xfs: reserve less log space when recovering log intent items
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Wengang Wang <wen.gang.wang@oracle.com>, Srikanth C S <srikanth.c.s@oracle.com>, Dave Chinner <dchinner@redhat.com>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-10-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 3c919b0910906cc69d76dea214776f0eac73358b ]
+
+Wengang Wang reports that a customer's system was running a number of
+truncate operations on a filesystem with a very small log. Contention
+on the reserve heads lead to other threads stalling on smaller updates
+(e.g. mtime updates) long enough to result in the node being rebooted
+on account of the lack of responsivenes. The node failed to recover
+because log recovery of an EFI became stuck waiting for a grant of
+reserve space. From Wengang's report:
+
+"For the file deletion, log bytes are reserved basing on
+xfs_mount->tr_itruncate which is:
+
+ tr_logres = 175488,
+ tr_logcount = 2,
+ tr_logflags = XFS_TRANS_PERM_LOG_RES,
+
+"You see it's a permanent log reservation with two log operations (two
+transactions in rolling mode). After calculation (xlog_calc_unit_res()
+adds space for various log headers), the final log space needed per
+transaction changes from 175488 to 180208 bytes. So the total log
+space needed is 360416 bytes (180208 * 2). [That quantity] of log space
+(360416 bytes) needs to be reserved for both run time inode removing
+(xfs_inactive_truncate()) and EFI recover (xfs_efi_item_recover())."
+
+In other words, runtime pre-reserves 360K of space in anticipation of
+running a chain of two transactions in which each transaction gets a
+180K reservation.
+
+Now that we've allocated the transaction, we delete the bmap mapping,
+log an EFI to free the space, and roll the transaction as part of
+finishing the deferops chain. Rolling creates a new xfs_trans which
+shares its ticket with the old transaction. Next, xfs_trans_roll calls
+__xfs_trans_commit with regrant == true, which calls xlog_cil_commit
+with the same regrant parameter.
+
+xlog_cil_commit calls xfs_log_ticket_regrant, which decrements t_cnt and
+subtracts t_curr_res from the reservation and write heads.
+
+If the filesystem is fresh and the first transaction only used (say)
+20K, then t_curr_res will be 160K, and we give that much reservation
+back to the reservation head. Or if the file is really fragmented and
+the first transaction actually uses 170K, then t_curr_res will be 10K,
+and that's what we give back to the reservation.
+
+Having done that, we're now headed into the second transaction with an
+EFI and 180K of reservation. Other threads apparently consumed all the
+reservation for smaller transactions, such as timestamp updates.
+
+Now let's say the first transaction gets written to disk and we crash
+without ever completing the second transaction. Now we remount the fs,
+log recovery finds the unfinished EFI, and calls xfs_efi_recover to
+finish the EFI. However, xfs_efi_recover starts a new tr_itruncate
+tranasction, which asks for 360K log reservation. This is a lot more
+than the 180K that we had reserved at the time of the crash. If the
+first EFI to be recovered is also pinning the tail of the log, we will
+be unable to free any space in the log, and recovery livelocks.
+
+Wengang confirmed this:
+
+"Now we have the second transaction which has 180208 log bytes reserved
+too. The second transaction is supposed to process intents including
+extent freeing. With my hacking patch, I blocked the extent freeing 5
+hours. So in that 5 hours, 180208 (NOT 360416) log bytes are reserved.
+
+"With my test case, other transactions (update timestamps) then happen.
+As my hacking patch pins the journal tail, those timestamp-updating
+transactions finally use up (almost) all the left available log space
+(in memory in on disk). And finally the on disk (and in memory)
+available log space goes down near to 180208 bytes. Those 180208 bytes
+are reserved by [the] second (extent-free) transaction [in the chain]."
+
+Wengang and I noticed that EFI recovery starts a transaction, completes
+one step of the chain, and commits the transaction without completing
+any other steps of the chain. Those subsequent steps are completed by
+xlog_finish_defer_ops, which allocates yet another transaction to
+finish the rest of the chain. That transaction gets the same tr_logres
+as the head transaction, but with tr_logcount = 1 to force regranting
+with every roll to avoid livelocks.
+
+In other words, we already figured this out in commit 929b92f64048d
+("xfs: xfs_defer_capture should absorb remaining transaction
+reservation"), but should have applied that logic to each intent item's
+recovery function. For Wengang's case, the xfs_trans_alloc call in the
+EFI recovery function should only be asking for a single transaction's
+worth of log reservation -- 180K, not 360K.
+
+Quoting Wengang again:
+
+"With log recovery, during EFI recovery, we use tr_itruncate again to
+reserve two transactions that needs 360416 log bytes. Reserving 360416
+bytes fails [stalls] because we now only have about 180208 available.
+
+"Actually during the EFI recover, we only need one transaction to free
+the extents just like the 2nd transaction at RUNTIME. So it only needs
+to reserve 180208 rather than 360416 bytes. We have (a bit) more than
+180208 available log bytes on disk, so [if we decrease the reservation
+to 180K] the reservation goes and the recovery [finishes]. That is to
+say: we can fix the log recover part to fix the issue. We can introduce
+a new xfs_trans_res xfs_mount->tr_ext_free
+
+{
+ tr_logres = 175488,
+ tr_logcount = 0,
+ tr_logflags = 0,
+}
+
+"and use tr_ext_free instead of tr_itruncate in EFI recover."
+
+However, I don't think it quite makes sense to create an entirely new
+transaction reservation type to handle single-stepping during log
+recovery. Instead, we should copy the transaction reservation
+information in the xfs_mount, change tr_logcount to 1, and pass that
+into xfs_trans_alloc. We know this won't risk changing the min log size
+computation since we always ask for a fraction of the reservation for
+all known transaction types.
+
+This looks like it's been lurking in the codebase since commit
+3d3c8b5222b92, which changed the xfs_trans_reserve call in
+xlog_recover_process_efi to use the tr_logcount in tr_itruncate.
+That changed the EFI recovery transaction from making a
+non-XFS_TRANS_PERM_LOG_RES request for one transaction's worth of log
+space to a XFS_TRANS_PERM_LOG_RES request for two transactions worth.
+
+Fixes: 3d3c8b5222b92 ("xfs: refactor xfs_trans_reserve() interface")
+Complements: 929b92f64048d ("xfs: xfs_defer_capture should absorb remaining transaction reservation")
+Suggested-by: Wengang Wang <wen.gang.wang@oracle.com>
+Cc: Srikanth C S <srikanth.c.s@oracle.com>
+[djwong: apply the same transformation to all log intent recovery]
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_log_recover.h | 22 ++++++++++++++++++++++
+ fs/xfs/xfs_attr_item.c | 7 ++++---
+ fs/xfs/xfs_bmap_item.c | 4 +++-
+ fs/xfs/xfs_extfree_item.c | 4 +++-
+ fs/xfs/xfs_refcount_item.c | 6 ++++--
+ fs/xfs/xfs_rmap_item.c | 6 ++++--
+ 6 files changed, 40 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_log_recover.h
++++ b/fs/xfs/libxfs/xfs_log_recover.h
+@@ -131,4 +131,26 @@ void xlog_check_buf_cancel_table(struct
+ #define xlog_check_buf_cancel_table(log) do { } while (0)
+ #endif
+
++/*
++ * Transform a regular reservation into one suitable for recovery of a log
++ * intent item.
++ *
++ * Intent recovery only runs a single step of the transaction chain and defers
++ * the rest to a separate transaction. Therefore, we reduce logcount to 1 here
++ * to avoid livelocks if the log grant space is nearly exhausted due to the
++ * recovered intent pinning the tail. Keep the same logflags to avoid tripping
++ * asserts elsewhere. Struct copies abound below.
++ */
++static inline struct xfs_trans_res
++xlog_recover_resv(const struct xfs_trans_res *r)
++{
++ struct xfs_trans_res ret = {
++ .tr_logres = r->tr_logres,
++ .tr_logcount = 1,
++ .tr_logflags = r->tr_logflags,
++ };
++
++ return ret;
++}
++
+ #endif /* __XFS_LOG_RECOVER_H__ */
+--- a/fs/xfs/xfs_attr_item.c
++++ b/fs/xfs/xfs_attr_item.c
+@@ -547,7 +547,7 @@ xfs_attri_item_recover(
+ struct xfs_inode *ip;
+ struct xfs_da_args *args;
+ struct xfs_trans *tp;
+- struct xfs_trans_res tres;
++ struct xfs_trans_res resv;
+ struct xfs_attri_log_format *attrp;
+ struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
+ int error;
+@@ -618,8 +618,9 @@ xfs_attri_item_recover(
+ goto out;
+ }
+
+- xfs_init_attr_trans(args, &tres, &total);
+- error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp);
++ xfs_init_attr_trans(args, &resv, &total);
++ resv = xlog_recover_resv(&resv);
++ error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp);
+ if (error)
+ goto out;
+
+--- a/fs/xfs/xfs_bmap_item.c
++++ b/fs/xfs/xfs_bmap_item.c
+@@ -457,6 +457,7 @@ xfs_bui_item_recover(
+ struct list_head *capture_list)
+ {
+ struct xfs_bmap_intent fake = { };
++ struct xfs_trans_res resv;
+ struct xfs_bui_log_item *buip = BUI_ITEM(lip);
+ struct xfs_trans *tp;
+ struct xfs_inode *ip = NULL;
+@@ -482,7 +483,8 @@ xfs_bui_item_recover(
+ return error;
+
+ /* Allocate transaction and do the work. */
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
++ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
++ error = xfs_trans_alloc(mp, &resv,
+ XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
+ if (error)
+ goto err_rele;
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -598,6 +598,7 @@ xfs_efi_item_recover(
+ struct xfs_log_item *lip,
+ struct list_head *capture_list)
+ {
++ struct xfs_trans_res resv;
+ struct xfs_efi_log_item *efip = EFI_ITEM(lip);
+ struct xfs_mount *mp = lip->li_log->l_mp;
+ struct xfs_efd_log_item *efdp;
+@@ -620,7 +621,8 @@ xfs_efi_item_recover(
+ }
+ }
+
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
++ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
++ error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp);
+ if (error)
+ return error;
+ efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
+--- a/fs/xfs/xfs_refcount_item.c
++++ b/fs/xfs/xfs_refcount_item.c
+@@ -453,6 +453,7 @@ xfs_cui_item_recover(
+ struct xfs_log_item *lip,
+ struct list_head *capture_list)
+ {
++ struct xfs_trans_res resv;
+ struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
+ struct xfs_cud_log_item *cudp;
+ struct xfs_trans *tp;
+@@ -490,8 +491,9 @@ xfs_cui_item_recover(
+ * doesn't fit. We need to reserve enough blocks to handle a
+ * full btree split on either end of the refcount range.
+ */
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+- mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
++ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
++ error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0,
++ XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+
+--- a/fs/xfs/xfs_rmap_item.c
++++ b/fs/xfs/xfs_rmap_item.c
+@@ -492,6 +492,7 @@ xfs_rui_item_recover(
+ struct xfs_log_item *lip,
+ struct list_head *capture_list)
+ {
++ struct xfs_trans_res resv;
+ struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
+ struct xfs_map_extent *rmap;
+ struct xfs_rud_log_item *rudp;
+@@ -519,8 +520,9 @@ xfs_rui_item_recover(
+ }
+ }
+
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
+- mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp);
++ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
++ error = xfs_trans_alloc(mp, &resv, mp->m_rmap_maxlevels, 0,
++ XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+ rudp = xfs_trans_get_rud(tp, ruip);
--- /dev/null
+From stable+bounces-124385-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:47 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:48 -0700
+Subject: xfs: reset XFS_ATTR_INCOMPLETE filter on node removal
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Andrey Albershteyn <aalbersh@redhat.com>, Christoph Hellwig <hch@lst.de>, Chandan Babu R <chandanbabu@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, "Darrick J. Wong" <djwong@kernel.org>
+Message-ID: <20250313202550.2257219-29-leah.rumancik@gmail.com>
+
+From: Andrey Albershteyn <aalbersh@redhat.com>
+
+[ Upstream commit 82ef1a5356572219f41f9123ca047259a77bd67b ]
+
+In XFS_DAS_NODE_REMOVE_ATTR case, xfs_attr_mode_remove_attr() sets
+filter to XFS_ATTR_INCOMPLETE. The filter is then reset in
+xfs_attr_complete_op() if XFS_DA_OP_REPLACE operation is performed.
+
+The filter is not reset though if XFS just removes the attribute
+(args->value == NULL) with xfs_attr_defer_remove(). attr code goes
+to XFS_DAS_DONE state.
+
+Fix this by always resetting XFS_ATTR_INCOMPLETE filter. The replace
+operation already resets this filter in anyway and others are
+completed at this step hence don't need it.
+
+Fixes: fdaf1bb3cafc ("xfs: ATTR_REPLACE algorithm with LARP enabled needs rework")
+Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -421,10 +421,10 @@ xfs_attr_complete_op(
+ bool do_replace = args->op_flags & XFS_DA_OP_REPLACE;
+
+ args->op_flags &= ~XFS_DA_OP_REPLACE;
+- if (do_replace) {
+- args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
++ args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
++ if (do_replace)
+ return replace_state;
+- }
++
+ return XFS_DAS_DONE;
+ }
+
--- /dev/null
+From stable+bounces-124373-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:34 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:36 -0700
+Subject: xfs: transfer recovered intent item ownership in ->iop_recover
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-17-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit deb4cd8ba87f17b12c72b3827820d9c703e9fd95 ]
+
+Now that we pass the xfs_defer_pending object into the intent item
+recovery functions, we know exactly when ownership of the sole refcount
+passes from the recovery context to the intent done item. At that
+point, we need to null out dfp_intent so that the recovery mechanism
+won't release it. This should fix the UAF problem reported by Long Li.
+
+Note that we still want to recreate the full deferred work state. That
+will be addressed in the next patches.
+
+Fixes: 2e76f188fd90 ("xfs: cancel intents immediately if process_intents fails")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_log_recover.h | 2 ++
+ fs/xfs/xfs_attr_item.c | 1 +
+ fs/xfs/xfs_bmap_item.c | 2 ++
+ fs/xfs/xfs_extfree_item.c | 2 ++
+ fs/xfs/xfs_log_recover.c | 19 ++++++++++++-------
+ fs/xfs/xfs_refcount_item.c | 1 +
+ fs/xfs/xfs_rmap_item.c | 2 ++
+ 7 files changed, 22 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_log_recover.h
++++ b/fs/xfs/libxfs/xfs_log_recover.h
+@@ -155,5 +155,7 @@ xlog_recover_resv(const struct xfs_trans
+
+ void xlog_recover_intent_item(struct xlog *log, struct xfs_log_item *lip,
+ xfs_lsn_t lsn, unsigned int dfp_type);
++void xlog_recover_transfer_intent(struct xfs_trans *tp,
++ struct xfs_defer_pending *dfp);
+
+ #endif /* __XFS_LOG_RECOVER_H__ */
+--- a/fs/xfs/xfs_attr_item.c
++++ b/fs/xfs/xfs_attr_item.c
+@@ -632,6 +632,7 @@ xfs_attri_item_recover(
+
+ args->trans = tp;
+ done_item = xfs_trans_get_attrd(tp, attrip);
++ xlog_recover_transfer_intent(tp, dfp);
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+--- a/fs/xfs/xfs_bmap_item.c
++++ b/fs/xfs/xfs_bmap_item.c
+@@ -491,6 +491,8 @@ xfs_bui_item_recover(
+ goto err_rele;
+
+ budp = xfs_trans_get_bud(tp, buip);
++ xlog_recover_transfer_intent(tp, dfp);
++
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -626,7 +626,9 @@ xfs_efi_item_recover(
+ error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp);
+ if (error)
+ return error;
++
+ efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
++ xlog_recover_transfer_intent(tp, dfp);
+
+ for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+ struct xfs_extent_free_item fake = {
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2593,13 +2593,6 @@ xlog_recover_process_intents(
+ break;
+ }
+
+- /*
+- * XXX: @lip could have been freed, so detach the log item from
+- * the pending item before freeing the pending item. This does
+- * not fix the existing UAF bug that occurs if ->iop_recover
+- * fails after creating the intent done item.
+- */
+- dfp->dfp_intent = NULL;
+ xfs_defer_cancel_recovery(log->l_mp, dfp);
+ }
+ if (error)
+@@ -2634,6 +2627,18 @@ xlog_recover_cancel_intents(
+ }
+
+ /*
++ * Transfer ownership of the recovered log intent item to the recovery
++ * transaction.
++ */
++void
++xlog_recover_transfer_intent(
++ struct xfs_trans *tp,
++ struct xfs_defer_pending *dfp)
++{
++ dfp->dfp_intent = NULL;
++}
++
++/*
+ * This routine performs a transaction to null out a bad inode pointer
+ * in an agi unlinked inode hash bucket.
+ */
+--- a/fs/xfs/xfs_refcount_item.c
++++ b/fs/xfs/xfs_refcount_item.c
+@@ -499,6 +499,7 @@ xfs_cui_item_recover(
+ return error;
+
+ cudp = xfs_trans_get_cud(tp, cuip);
++ xlog_recover_transfer_intent(tp, dfp);
+
+ for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
+ struct xfs_refcount_intent fake = { };
+--- a/fs/xfs/xfs_rmap_item.c
++++ b/fs/xfs/xfs_rmap_item.c
+@@ -526,7 +526,9 @@ xfs_rui_item_recover(
+ XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
++
+ rudp = xfs_trans_get_rud(tp, ruip);
++ xlog_recover_transfer_intent(tp, dfp);
+
+ for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
+ rmap = &ruip->rui_format.rui_extents[i];
--- /dev/null
+From stable+bounces-124384-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:48 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:47 -0700
+Subject: xfs: update dir3 leaf block metadata after swap
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Zhang Tianci <zhangtianci.1997@bytedance.com>, Dave Chinner <david@fromorbit.com>, "Darrick J. Wong" <djwong@kernel.org>, Chandan Babu R <chandanbabu@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-28-leah.rumancik@gmail.com>
+
+From: Zhang Tianci <zhangtianci.1997@bytedance.com>
+
+[ Upstream commit 5759aa4f956034b289b0ae2c99daddfc775442e1 ]
+
+xfs_da3_swap_lastblock() copy the last block content to the dead block,
+but do not update the metadata in it. We need update some metadata
+for some kinds of type block, such as dir3 leafn block records its
+blkno, we shall update it to the dead block blkno. Otherwise,
+before write the xfs_buf to disk, the verify_write() will fail in
+blk_hdr->blkno != xfs_buf->b_bn, then xfs will be shutdown.
+
+We will get this warning:
+
+ XFS (dm-0): Metadata corruption detected at xfs_dir3_leaf_verify+0xa8/0xe0 [xfs], xfs_dir3_leafn block 0x178
+ XFS (dm-0): Unmount and run xfs_repair
+ XFS (dm-0): First 128 bytes of corrupted metadata buffer:
+ 00000000e80f1917: 00 80 00 0b 00 80 00 07 3d ff 00 00 00 00 00 00 ........=.......
+ 000000009604c005: 00 00 00 00 00 00 01 a0 00 00 00 00 00 00 00 00 ................
+ 000000006b6fb2bf: e4 44 e3 97 b5 64 44 41 8b 84 60 0e 50 43 d9 bf .D...dDA..`.PC..
+ 00000000678978a2: 00 00 00 00 00 00 00 83 01 73 00 93 00 00 00 00 .........s......
+ 00000000b28b247c: 99 29 1d 38 00 00 00 00 99 29 1d 40 00 00 00 00 .).8.....).@....
+ 000000002b2a662c: 99 29 1d 48 00 00 00 00 99 49 11 00 00 00 00 00 .).H.....I......
+ 00000000ea2ffbb8: 99 49 11 08 00 00 45 25 99 49 11 10 00 00 48 fe .I....E%.I....H.
+ 0000000069e86440: 99 49 11 18 00 00 4c 6b 99 49 11 20 00 00 4d 97 .I....Lk.I. ..M.
+ XFS (dm-0): xfs_do_force_shutdown(0x8) called from line 1423 of file fs/xfs/xfs_buf.c. Return address = 00000000c0ff63c1
+ XFS (dm-0): Corruption of in-memory data detected. Shutting down filesystem
+ XFS (dm-0): Please umount the filesystem and rectify the problem(s)
+
+>>From the log above, we know xfs_buf->b_no is 0x178, but the block's hdr record
+its blkno is 0x1a0.
+
+Fixes: 24df33b45ecf ("xfs: add CRC checking to dir2 leaf blocks")
+Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
+Suggested-by: Dave Chinner <david@fromorbit.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_da_btree.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_da_btree.c
++++ b/fs/xfs/libxfs/xfs_da_btree.c
+@@ -2316,10 +2316,17 @@ xfs_da3_swap_lastblock(
+ return error;
+ /*
+ * Copy the last block into the dead buffer and log it.
++ * On CRC-enabled file systems, also update the stamped in blkno.
+ */
+ memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize);
++ if (xfs_has_crc(mp)) {
++ struct xfs_da3_blkinfo *da3 = dead_buf->b_addr;
++
++ da3->blkno = cpu_to_be64(xfs_buf_daddr(dead_buf));
++ }
+ xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);
+ dead_info = dead_buf->b_addr;
++
+ /*
+ * Get values from the moved block.
+ */
--- /dev/null
+From stable+bounces-124365-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:21 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:28 -0700
+Subject: xfs: use deferred frees for btree block freeing
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Dave Chinner <dchinner@redhat.com>, "Darrick J. Wong" <djwong@kernel.org>, Chandan Babu R <chandan.babu@oracle.com>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-9-leah.rumancik@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+[ Upstream commit b742d7b4f0e03df25c2a772adcded35044b625ca ]
+
+[ 6.1: resolved conflict in xfs_extfree_item.c ]
+
+Btrees that aren't freespace management trees use the normal extent
+allocation and freeing routines for their blocks. Hence when a btree
+block is freed, a direct call to xfs_free_extent() is made and the
+extent is immediately freed. This puts the entire free space
+management btrees under this path, so we are stacking btrees on
+btrees in the call stack. The inobt, finobt and refcount btrees
+all do this.
+
+However, the bmap btree does not do this - it calls
+xfs_free_extent_later() to defer the extent free operation via an
+XEFI and hence it gets processed in deferred operation processing
+during the commit of the primary transaction (i.e. via intent
+chaining).
+
+We need to change xfs_free_extent() to behave in a non-blocking
+manner so that we can avoid deadlocks with busy extents near ENOSPC
+in transactions that free multiple extents. Inserting or removing a
+record from a btree can cause a multi-level tree merge operation and
+that will free multiple blocks from the btree in a single
+transaction. i.e. we can call xfs_free_extent() multiple times, and
+hence the btree manipulation transaction is vulnerable to this busy
+extent deadlock vector.
+
+To fix this, convert all the remaining callers of xfs_free_extent()
+to use xfs_free_extent_later() to queue XEFIs and hence defer
+processing of the extent frees to a context that can be safely
+restarted if a deadlock condition is detected.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c | 2 +-
+ fs/xfs/libxfs/xfs_alloc.c | 4 ++++
+ fs/xfs/libxfs/xfs_alloc.h | 8 +++++---
+ fs/xfs/libxfs/xfs_bmap.c | 8 +++++---
+ fs/xfs/libxfs/xfs_bmap_btree.c | 3 ++-
+ fs/xfs/libxfs/xfs_ialloc.c | 8 ++++----
+ fs/xfs/libxfs/xfs_ialloc_btree.c | 3 +--
+ fs/xfs/libxfs/xfs_refcount.c | 9 ++++++---
+ fs/xfs/libxfs/xfs_refcount_btree.c | 8 +-------
+ fs/xfs/xfs_extfree_item.c | 3 ++-
+ fs/xfs/xfs_reflink.c | 3 ++-
+ 11 files changed, 33 insertions(+), 26 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -907,7 +907,7 @@ xfs_ag_shrink_space(
+ goto resv_err;
+
+ err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
+- true);
++ XFS_AG_RESV_NONE, true);
+ if (err2)
+ goto resv_err;
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2507,6 +2507,7 @@ xfs_defer_agfl_block(
+ xefi->xefi_startblock = fsbno;
+ xefi->xefi_blockcount = 1;
+ xefi->xefi_owner = oinfo->oi_owner;
++ xefi->xefi_agresv = XFS_AG_RESV_AGFL;
+
+ trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
+
+@@ -2524,6 +2525,7 @@ __xfs_free_extent_later(
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ const struct xfs_owner_info *oinfo,
++ enum xfs_ag_resv_type type,
+ bool skip_discard)
+ {
+ struct xfs_extent_free_item *xefi;
+@@ -2544,6 +2546,7 @@ __xfs_free_extent_later(
+ ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
+ #endif
+ ASSERT(xfs_extfree_item_cache != NULL);
++ ASSERT(type != XFS_AG_RESV_AGFL);
+
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+@@ -2552,6 +2555,7 @@ __xfs_free_extent_later(
+ GFP_KERNEL | __GFP_NOFAIL);
+ xefi->xefi_startblock = bno;
+ xefi->xefi_blockcount = (xfs_extlen_t)len;
++ xefi->xefi_agresv = type;
+ if (skip_discard)
+ xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
+ if (oinfo) {
+--- a/fs/xfs/libxfs/xfs_alloc.h
++++ b/fs/xfs/libxfs/xfs_alloc.h
+@@ -215,7 +215,7 @@ xfs_buf_to_agfl_bno(
+
+ int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
+ xfs_filblks_t len, const struct xfs_owner_info *oinfo,
+- bool skip_discard);
++ enum xfs_ag_resv_type type, bool skip_discard);
+
+ /*
+ * List of extents to be free "later".
+@@ -227,6 +227,7 @@ struct xfs_extent_free_item {
+ xfs_fsblock_t xefi_startblock;/* starting fs block number */
+ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
+ unsigned int xefi_flags;
++ enum xfs_ag_resv_type xefi_agresv;
+ };
+
+ #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
+@@ -238,9 +239,10 @@ xfs_free_extent_later(
+ struct xfs_trans *tp,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+- const struct xfs_owner_info *oinfo)
++ const struct xfs_owner_info *oinfo,
++ enum xfs_ag_resv_type type)
+ {
+- return __xfs_free_extent_later(tp, bno, len, oinfo, false);
++ return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
+ }
+
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(
+ return error;
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
+- error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
++ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
++ XFS_AG_RESV_NONE);
+ if (error)
+ return error;
+
+@@ -5208,8 +5209,9 @@ xfs_bmap_del_extent_real(
+ } else {
+ error = __xfs_free_extent_later(tp, del->br_startblock,
+ del->br_blockcount, NULL,
+- (bflags & XFS_BMAPI_NODISCARD) ||
+- del->br_state == XFS_EXT_UNWRITTEN);
++ XFS_AG_RESV_NONE,
++ ((bflags & XFS_BMAPI_NODISCARD) ||
++ del->br_state == XFS_EXT_UNWRITTEN));
+ if (error)
+ goto done;
+ }
+--- a/fs/xfs/libxfs/xfs_bmap_btree.c
++++ b/fs/xfs/libxfs/xfs_bmap_btree.c
+@@ -288,7 +288,8 @@ xfs_bmbt_free_block(
+ int error;
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
+- error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
++ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
++ XFS_AG_RESV_NONE);
+ if (error)
+ return error;
+
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -1846,8 +1846,8 @@ xfs_difree_inode_chunk(
+ /* not sparse, calculate extent info directly */
+ return xfs_free_extent_later(tp,
+ XFS_AGB_TO_FSB(mp, agno, sagbno),
+- M_IGEO(mp)->ialloc_blks,
+- &XFS_RMAP_OINFO_INODES);
++ M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
++ XFS_AG_RESV_NONE);
+ }
+
+ /* holemask is only 16-bits (fits in an unsigned long) */
+@@ -1892,8 +1892,8 @@ xfs_difree_inode_chunk(
+ ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
+ ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
+ error = xfs_free_extent_later(tp,
+- XFS_AGB_TO_FSB(mp, agno, agbno),
+- contigblk, &XFS_RMAP_OINFO_INODES);
++ XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
++ &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
+ if (error)
+ return error;
+
+--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
++++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
+@@ -160,8 +160,7 @@ __xfs_inobt_free_block(
+
+ xfs_inobt_mod_blockcount(cur, -1);
+ fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
+- return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
+- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
++ return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
+ &XFS_RMAP_OINFO_INOBT, resv);
+ }
+
+--- a/fs/xfs/libxfs/xfs_refcount.c
++++ b/fs/xfs/libxfs/xfs_refcount.c
+@@ -1130,7 +1130,8 @@ xfs_refcount_adjust_extents(
+ cur->bc_ag.pag->pag_agno,
+ tmp.rc_startblock);
+ error = xfs_free_extent_later(cur->bc_tp, fsbno,
+- tmp.rc_blockcount, NULL);
++ tmp.rc_blockcount, NULL,
++ XFS_AG_RESV_NONE);
+ if (error)
+ goto out_error;
+ }
+@@ -1191,7 +1192,8 @@ xfs_refcount_adjust_extents(
+ cur->bc_ag.pag->pag_agno,
+ ext.rc_startblock);
+ error = xfs_free_extent_later(cur->bc_tp, fsbno,
+- ext.rc_blockcount, NULL);
++ ext.rc_blockcount, NULL,
++ XFS_AG_RESV_NONE);
+ if (error)
+ goto out_error;
+ }
+@@ -1963,7 +1965,8 @@ xfs_refcount_recover_cow_leftovers(
+
+ /* Free the block. */
+ error = xfs_free_extent_later(tp, fsb,
+- rr->rr_rrec.rc_blockcount, NULL);
++ rr->rr_rrec.rc_blockcount, NULL,
++ XFS_AG_RESV_NONE);
+ if (error)
+ goto out_trans;
+
+--- a/fs/xfs/libxfs/xfs_refcount_btree.c
++++ b/fs/xfs/libxfs/xfs_refcount_btree.c
+@@ -106,19 +106,13 @@ xfs_refcountbt_free_block(
+ struct xfs_buf *agbp = cur->bc_ag.agbp;
+ struct xfs_agf *agf = agbp->b_addr;
+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
+- int error;
+
+ trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
+ be32_add_cpu(&agf->agf_refcount_blocks, -1);
+ xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
+- error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
+- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
++ return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
+ &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
+- if (error)
+- return error;
+-
+- return error;
+ }
+
+ STATIC int
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -369,7 +369,7 @@ xfs_trans_free_extent(
+
+ pag = xfs_perag_get(mp, agno);
+ error = __xfs_free_extent(tp, pag, agbno, xefi->xefi_blockcount,
+- &oinfo, XFS_AG_RESV_NONE,
++ &oinfo, xefi->xefi_agresv,
+ xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
+ xfs_perag_put(pag);
+
+@@ -628,6 +628,7 @@ xfs_efi_item_recover(
+ for (i = 0; i < efip->efi_format.efi_nextents; i++) {
+ struct xfs_extent_free_item fake = {
+ .xefi_owner = XFS_RMAP_OWN_UNKNOWN,
++ .xefi_agresv = XFS_AG_RESV_NONE,
+ };
+ struct xfs_extent *extp;
+
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -619,7 +619,8 @@ xfs_reflink_cancel_cow_blocks(
+ del.br_blockcount);
+
+ error = xfs_free_extent_later(*tpp, del.br_startblock,
+- del.br_blockcount, NULL);
++ del.br_blockcount, NULL,
++ XFS_AG_RESV_NONE);
+ if (error)
+ break;
+
--- /dev/null
+From stable+bounces-124371-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:31 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:34 -0700
+Subject: xfs: use xfs_defer_pending objects to recover intent items
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Catherine Hoang <catherine.hoang@oracle.com>, Greg Kroah-Hartman <gregkh@linuxfoundation.org>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-15-leah.rumancik@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+[ Upstream commit 03f7767c9f6120ac933378fdec3bfd78bf07bc11 ]
+
+[ 6.1: resovled conflict in xfs_defer.c ]
+
+One thing I never quite got around to doing is porting the log intent
+item recovery code to reconstruct the deferred pending work state. As a
+result, each intent item open codes xfs_defer_finish_one in its recovery
+method, because that's what the EFI code did before xfs_defer.c even
+existed.
+
+This is a gross thing to have left unfixed -- if an EFI cannot proceed
+due to busy extents, we end up creating separate new EFIs for each
+unfinished work item, which is a change in behavior from what runtime
+would have done.
+
+Worse yet, Long Li pointed out that there's a UAF in the recovery code.
+The ->commit_pass2 function adds the intent item to the AIL and drops
+the refcount. The one remaining refcount is now owned by the recovery
+mechanism (aka the log intent items in the AIL) with the intent of
+giving the refcount to the intent done item in the ->iop_recover
+function.
+
+However, if something fails later in recovery, xlog_recover_finish will
+walk the recovered intent items in the AIL and release them. If the CIL
+hasn't been pushed before that point (which is possible since we don't
+force the log until later) then the intent done release will try to free
+its associated intent, which has already been freed.
+
+This patch starts to address this mess by having the ->commit_pass2
+functions recreate the xfs_defer_pending state. The next few patches
+will fix the recovery functions.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_defer.c | 103 ++++++++++++++++++++++++++---------
+ fs/xfs/libxfs/xfs_defer.h | 5 +
+ fs/xfs/libxfs/xfs_log_recover.h | 3 +
+ fs/xfs/xfs_attr_item.c | 10 ---
+ fs/xfs/xfs_bmap_item.c | 9 +--
+ fs/xfs/xfs_extfree_item.c | 9 +--
+ fs/xfs/xfs_log.c | 1
+ fs/xfs/xfs_log_priv.h | 1
+ fs/xfs/xfs_log_recover.c | 115 ++++++++++++++++++++--------------------
+ fs/xfs/xfs_refcount_item.c | 9 +--
+ fs/xfs/xfs_rmap_item.c | 9 +--
+ 11 files changed, 158 insertions(+), 116 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_defer.c
++++ b/fs/xfs/libxfs/xfs_defer.c
+@@ -245,23 +245,52 @@ xfs_defer_create_intents(
+ return ret;
+ }
+
+-STATIC void
++static inline void
+ xfs_defer_pending_abort(
+ struct xfs_mount *mp,
++ struct xfs_defer_pending *dfp)
++{
++ const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
++
++ trace_xfs_defer_pending_abort(mp, dfp);
++
++ if (dfp->dfp_intent && !dfp->dfp_done) {
++ ops->abort_intent(dfp->dfp_intent);
++ dfp->dfp_intent = NULL;
++ }
++}
++
++static inline void
++xfs_defer_pending_cancel_work(
++ struct xfs_mount *mp,
++ struct xfs_defer_pending *dfp)
++{
++ const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
++ struct list_head *pwi;
++ struct list_head *n;
++
++ trace_xfs_defer_cancel_list(mp, dfp);
++
++ list_del(&dfp->dfp_list);
++ list_for_each_safe(pwi, n, &dfp->dfp_work) {
++ list_del(pwi);
++ dfp->dfp_count--;
++ ops->cancel_item(pwi);
++ }
++ ASSERT(dfp->dfp_count == 0);
++ kmem_cache_free(xfs_defer_pending_cache, dfp);
++}
++
++STATIC void
++xfs_defer_pending_abort_list(
++ struct xfs_mount *mp,
+ struct list_head *dop_list)
+ {
+ struct xfs_defer_pending *dfp;
+- const struct xfs_defer_op_type *ops;
+
+ /* Abort intent items that don't have a done item. */
+- list_for_each_entry(dfp, dop_list, dfp_list) {
+- ops = defer_op_types[dfp->dfp_type];
+- trace_xfs_defer_pending_abort(mp, dfp);
+- if (dfp->dfp_intent && !dfp->dfp_done) {
+- ops->abort_intent(dfp->dfp_intent);
+- dfp->dfp_intent = NULL;
+- }
+- }
++ list_for_each_entry(dfp, dop_list, dfp_list)
++ xfs_defer_pending_abort(mp, dfp);
+ }
+
+ /* Abort all the intents that were committed. */
+@@ -271,7 +300,7 @@ xfs_defer_trans_abort(
+ struct list_head *dop_pending)
+ {
+ trace_xfs_defer_trans_abort(tp, _RET_IP_);
+- xfs_defer_pending_abort(tp->t_mountp, dop_pending);
++ xfs_defer_pending_abort_list(tp->t_mountp, dop_pending);
+ }
+
+ /*
+@@ -389,26 +418,13 @@ xfs_defer_cancel_list(
+ {
+ struct xfs_defer_pending *dfp;
+ struct xfs_defer_pending *pli;
+- struct list_head *pwi;
+- struct list_head *n;
+- const struct xfs_defer_op_type *ops;
+
+ /*
+ * Free the pending items. Caller should already have arranged
+ * for the intent items to be released.
+ */
+- list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
+- ops = defer_op_types[dfp->dfp_type];
+- trace_xfs_defer_cancel_list(mp, dfp);
+- list_del(&dfp->dfp_list);
+- list_for_each_safe(pwi, n, &dfp->dfp_work) {
+- list_del(pwi);
+- dfp->dfp_count--;
+- ops->cancel_item(pwi);
+- }
+- ASSERT(dfp->dfp_count == 0);
+- kmem_cache_free(xfs_defer_pending_cache, dfp);
+- }
++ list_for_each_entry_safe(dfp, pli, dop_list, dfp_list)
++ xfs_defer_pending_cancel_work(mp, dfp);
+ }
+
+ /*
+@@ -664,6 +680,39 @@ xfs_defer_add(
+ }
+
+ /*
++ * Create a pending deferred work item to replay the recovered intent item
++ * and add it to the list.
++ */
++void
++xfs_defer_start_recovery(
++ struct xfs_log_item *lip,
++ enum xfs_defer_ops_type dfp_type,
++ struct list_head *r_dfops)
++{
++ struct xfs_defer_pending *dfp;
++
++ dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
++ GFP_NOFS | __GFP_NOFAIL);
++ dfp->dfp_type = dfp_type;
++ dfp->dfp_intent = lip;
++ INIT_LIST_HEAD(&dfp->dfp_work);
++ list_add_tail(&dfp->dfp_list, r_dfops);
++}
++
++/*
++ * Cancel a deferred work item created to recover a log intent item. @dfp
++ * will be freed after this function returns.
++ */
++void
++xfs_defer_cancel_recovery(
++ struct xfs_mount *mp,
++ struct xfs_defer_pending *dfp)
++{
++ xfs_defer_pending_abort(mp, dfp);
++ xfs_defer_pending_cancel_work(mp, dfp);
++}
++
++/*
+ * Move deferred ops from one transaction to another and reset the source to
+ * initial state. This is primarily used to carry state forward across
+ * transaction rolls with pending dfops.
+@@ -767,7 +816,7 @@ xfs_defer_ops_capture_abort(
+ {
+ unsigned short i;
+
+- xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
++ xfs_defer_pending_abort_list(mp, &dfc->dfc_dfops);
+ xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+
+ for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
+--- a/fs/xfs/libxfs/xfs_defer.h
++++ b/fs/xfs/libxfs/xfs_defer.h
+@@ -125,6 +125,11 @@ void xfs_defer_ops_capture_abort(struct
+ struct xfs_defer_capture *d);
+ void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
+
++void xfs_defer_start_recovery(struct xfs_log_item *lip,
++ enum xfs_defer_ops_type dfp_type, struct list_head *r_dfops);
++void xfs_defer_cancel_recovery(struct xfs_mount *mp,
++ struct xfs_defer_pending *dfp);
++
+ int __init xfs_defer_init_item_caches(void);
+ void xfs_defer_destroy_item_caches(void);
+
+--- a/fs/xfs/libxfs/xfs_log_recover.h
++++ b/fs/xfs/libxfs/xfs_log_recover.h
+@@ -153,4 +153,7 @@ xlog_recover_resv(const struct xfs_trans
+ return ret;
+ }
+
++void xlog_recover_intent_item(struct xlog *log, struct xfs_log_item *lip,
++ xfs_lsn_t lsn, unsigned int dfp_type);
++
+ #endif /* __XFS_LOG_RECOVER_H__ */
+--- a/fs/xfs/xfs_attr_item.c
++++ b/fs/xfs/xfs_attr_item.c
+@@ -772,14 +772,8 @@ xlog_recover_attri_commit_pass2(
+ attrip = xfs_attri_init(mp, nv);
+ memcpy(&attrip->attri_format, attri_formatp, len);
+
+- /*
+- * The ATTRI has two references. One for the ATTRD and one for ATTRI to
+- * ensure it makes it into the AIL. Insert the ATTRI into the AIL
+- * directly and drop the ATTRI reference. Note that
+- * xfs_trans_ail_update() drops the AIL lock.
+- */
+- xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn);
+- xfs_attri_release(attrip);
++ xlog_recover_intent_item(log, &attrip->attri_item, lsn,
++ XFS_DEFER_OPS_TYPE_ATTR);
+ xfs_attri_log_nameval_put(nv);
+ return 0;
+ }
+--- a/fs/xfs/xfs_bmap_item.c
++++ b/fs/xfs/xfs_bmap_item.c
+@@ -646,12 +646,9 @@ xlog_recover_bui_commit_pass2(
+ buip = xfs_bui_init(mp);
+ xfs_bui_copy_format(&buip->bui_format, bui_formatp);
+ atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
+- /*
+- * Insert the intent into the AIL directly and drop one reference so
+- * that finishing or canceling the work will drop the other.
+- */
+- xfs_trans_ail_insert(log->l_ailp, &buip->bui_item, lsn);
+- xfs_bui_release(buip);
++
++ xlog_recover_intent_item(log, &buip->bui_item, lsn,
++ XFS_DEFER_OPS_TYPE_BMAP);
+ return 0;
+ }
+
+--- a/fs/xfs/xfs_extfree_item.c
++++ b/fs/xfs/xfs_extfree_item.c
+@@ -736,12 +736,9 @@ xlog_recover_efi_commit_pass2(
+ return error;
+ }
+ atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
+- /*
+- * Insert the intent into the AIL directly and drop one reference so
+- * that finishing or canceling the work will drop the other.
+- */
+- xfs_trans_ail_insert(log->l_ailp, &efip->efi_item, lsn);
+- xfs_efi_release(efip);
++
++ xlog_recover_intent_item(log, &efip->efi_item, lsn,
++ XFS_DEFER_OPS_TYPE_FREE);
+ return 0;
+ }
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -1540,6 +1540,7 @@ xlog_alloc_log(
+ log->l_covered_state = XLOG_STATE_COVER_IDLE;
+ set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
+ INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
++ INIT_LIST_HEAD(&log->r_dfops);
+
+ log->l_prev_block = -1;
+ /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -403,6 +403,7 @@ struct xlog {
+ long l_opstate; /* operational state */
+ uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
+ struct list_head *l_buf_cancel_table;
++ struct list_head r_dfops; /* recovered log intent items */
+ int l_iclog_hsize; /* size of iclog header */
+ int l_iclog_heads; /* # of iclog header sectors */
+ uint l_sectBBsize; /* sector size in BBs (2^n) */
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -1723,30 +1723,24 @@ xlog_clear_stale_blocks(
+ */
+ void
+ xlog_recover_release_intent(
+- struct xlog *log,
+- unsigned short intent_type,
+- uint64_t intent_id)
++ struct xlog *log,
++ unsigned short intent_type,
++ uint64_t intent_id)
+ {
+- struct xfs_ail_cursor cur;
+- struct xfs_log_item *lip;
+- struct xfs_ail *ailp = log->l_ailp;
+-
+- spin_lock(&ailp->ail_lock);
+- for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL;
+- lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
++ struct xfs_defer_pending *dfp, *n;
++
++ list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) {
++ struct xfs_log_item *lip = dfp->dfp_intent;
++
+ if (lip->li_type != intent_type)
+ continue;
+ if (!lip->li_ops->iop_match(lip, intent_id))
+ continue;
+
+- spin_unlock(&ailp->ail_lock);
+- lip->li_ops->iop_release(lip);
+- spin_lock(&ailp->ail_lock);
+- break;
+- }
++ ASSERT(xlog_item_is_intent(lip));
+
+- xfs_trans_ail_cursor_done(&cur);
+- spin_unlock(&ailp->ail_lock);
++ xfs_defer_cancel_recovery(log->l_mp, dfp);
++ }
+ }
+
+ int
+@@ -1939,6 +1933,29 @@ xlog_buf_readahead(
+ xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
+ }
+
++/*
++ * Create a deferred work structure for resuming and tracking the progress of a
++ * log intent item that was found during recovery.
++ */
++void
++xlog_recover_intent_item(
++ struct xlog *log,
++ struct xfs_log_item *lip,
++ xfs_lsn_t lsn,
++ unsigned int dfp_type)
++{
++ ASSERT(xlog_item_is_intent(lip));
++
++ xfs_defer_start_recovery(lip, dfp_type, &log->r_dfops);
++
++ /*
++ * Insert the intent into the AIL directly and drop one reference so
++ * that finishing or canceling the work will drop the other.
++ */
++ xfs_trans_ail_insert(log->l_ailp, lip, lsn);
++ lip->li_ops->iop_unpin(lip, 0);
++}
++
+ STATIC int
+ xlog_recover_items_pass2(
+ struct xlog *log,
+@@ -2536,29 +2553,22 @@ xlog_abort_defer_ops(
+ */
+ STATIC int
+ xlog_recover_process_intents(
+- struct xlog *log)
++ struct xlog *log)
+ {
+ LIST_HEAD(capture_list);
+- struct xfs_ail_cursor cur;
+- struct xfs_log_item *lip;
+- struct xfs_ail *ailp;
+- int error = 0;
++ struct xfs_defer_pending *dfp, *n;
++ int error = 0;
+ #if defined(DEBUG) || defined(XFS_WARN)
+- xfs_lsn_t last_lsn;
+-#endif
++ xfs_lsn_t last_lsn;
+
+- ailp = log->l_ailp;
+- spin_lock(&ailp->ail_lock);
+-#if defined(DEBUG) || defined(XFS_WARN)
+ last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
+ #endif
+- for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+- lip != NULL;
+- lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
+- const struct xfs_item_ops *ops;
+
+- if (!xlog_item_is_intent(lip))
+- break;
++ list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) {
++ struct xfs_log_item *lip = dfp->dfp_intent;
++ const struct xfs_item_ops *ops = lip->li_ops;
++
++ ASSERT(xlog_item_is_intent(lip));
+
+ /*
+ * We should never see a redo item with a LSN higher than
+@@ -2576,19 +2586,22 @@ xlog_recover_process_intents(
+ * The recovery function can free the log item, so we must not
+ * access lip after it returns.
+ */
+- spin_unlock(&ailp->ail_lock);
+- ops = lip->li_ops;
+ error = ops->iop_recover(lip, &capture_list);
+- spin_lock(&ailp->ail_lock);
+ if (error) {
+ trace_xlog_intent_recovery_failed(log->l_mp, error,
+ ops->iop_recover);
+ break;
+ }
+- }
+
+- xfs_trans_ail_cursor_done(&cur);
+- spin_unlock(&ailp->ail_lock);
++ /*
++ * XXX: @lip could have been freed, so detach the log item from
++ * the pending item before freeing the pending item. This does
++ * not fix the existing UAF bug that occurs if ->iop_recover
++ * fails after creating the intent done item.
++ */
++ dfp->dfp_intent = NULL;
++ xfs_defer_cancel_recovery(log->l_mp, dfp);
++ }
+ if (error)
+ goto err;
+
+@@ -2609,27 +2622,15 @@ err:
+ */
+ STATIC void
+ xlog_recover_cancel_intents(
+- struct xlog *log)
++ struct xlog *log)
+ {
+- struct xfs_log_item *lip;
+- struct xfs_ail_cursor cur;
+- struct xfs_ail *ailp;
+-
+- ailp = log->l_ailp;
+- spin_lock(&ailp->ail_lock);
+- lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+- while (lip != NULL) {
+- if (!xlog_item_is_intent(lip))
+- break;
++ struct xfs_defer_pending *dfp, *n;
+
+- spin_unlock(&ailp->ail_lock);
+- lip->li_ops->iop_release(lip);
+- spin_lock(&ailp->ail_lock);
+- lip = xfs_trans_ail_cursor_next(ailp, &cur);
+- }
++ list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) {
++ ASSERT(xlog_item_is_intent(dfp->dfp_intent));
+
+- xfs_trans_ail_cursor_done(&cur);
+- spin_unlock(&ailp->ail_lock);
++ xfs_defer_cancel_recovery(log->l_mp, dfp);
++ }
+ }
+
+ /*
+--- a/fs/xfs/xfs_refcount_item.c
++++ b/fs/xfs/xfs_refcount_item.c
+@@ -668,12 +668,9 @@ xlog_recover_cui_commit_pass2(
+ cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
+ xfs_cui_copy_format(&cuip->cui_format, cui_formatp);
+ atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
+- /*
+- * Insert the intent into the AIL directly and drop one reference so
+- * that finishing or canceling the work will drop the other.
+- */
+- xfs_trans_ail_insert(log->l_ailp, &cuip->cui_item, lsn);
+- xfs_cui_release(cuip);
++
++ xlog_recover_intent_item(log, &cuip->cui_item, lsn,
++ XFS_DEFER_OPS_TYPE_REFCOUNT);
+ return 0;
+ }
+
+--- a/fs/xfs/xfs_rmap_item.c
++++ b/fs/xfs/xfs_rmap_item.c
+@@ -682,12 +682,9 @@ xlog_recover_rui_commit_pass2(
+ ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+ xfs_rui_copy_format(&ruip->rui_format, rui_formatp);
+ atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
+- /*
+- * Insert the intent into the AIL directly and drop one reference so
+- * that finishing or canceling the work will drop the other.
+- */
+- xfs_trans_ail_insert(log->l_ailp, &ruip->rui_item, lsn);
+- xfs_rui_release(ruip);
++
++ xlog_recover_intent_item(log, &ruip->rui_item, lsn,
++ XFS_DEFER_OPS_TYPE_RMAP);
+ return 0;
+ }
+
--- /dev/null
+From stable+bounces-124363-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:20 2025
+From: Leah Rumancik <leah.rumancik@gmail.com>
+Date: Thu, 13 Mar 2025 13:25:26 -0700
+Subject: xfs: validate block number being freed before adding to xefi
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev, Dave Chinner <dchinner@redhat.com>, Christoph Hellwig <hch@lst.de>, "Darrick J. Wong" <djwong@kernel.org>, Dave Chinner <david@fromorbit.com>, Leah Rumancik <leah.rumancik@gmail.com>
+Message-ID: <20250313202550.2257219-7-leah.rumancik@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+[ Upstream commit 7dfee17b13e5024c5c0ab1911859ded4182de3e5 ]
+
+Bad things happen in defered extent freeing operations if it is
+passed a bad block number in the xefi. This can come from a bogus
+agno/agbno pair from deferred agfl freeing, or just a bad fsbno
+being passed to __xfs_free_extent_later(). Either way, it's very
+difficult to diagnose where a null perag oops in EFI creation
+is coming from when the operation that queued the xefi has already
+been completed and there's no longer any trace of it around....
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com>
+Acked-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c | 5 ++++-
+ fs/xfs/libxfs/xfs_alloc.c | 16 +++++++++++++---
+ fs/xfs/libxfs/xfs_alloc.h | 6 +++---
+ fs/xfs/libxfs/xfs_bmap.c | 10 ++++++++--
+ fs/xfs/libxfs/xfs_bmap_btree.c | 7 +++++--
+ fs/xfs/libxfs/xfs_ialloc.c | 24 ++++++++++++++++--------
+ fs/xfs/libxfs/xfs_refcount.c | 13 ++++++++++---
+ fs/xfs/xfs_reflink.c | 4 +++-
+ 8 files changed, 62 insertions(+), 23 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -906,7 +906,10 @@ xfs_ag_shrink_space(
+ if (err2 != -ENOSPC)
+ goto resv_err;
+
+- __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true);
++ err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
++ true);
++ if (err2)
++ goto resv_err;
+
+ /*
+ * Roll the transaction before trying to re-init the per-ag
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2485,7 +2485,7 @@ xfs_agfl_reset(
+ * the real allocation can proceed. Deferring the free disconnects freeing up
+ * the AGFL slot from freeing the block.
+ */
+-STATIC void
++static int
+ xfs_defer_agfl_block(
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+@@ -2504,16 +2504,20 @@ xfs_defer_agfl_block(
+ xefi->xefi_blockcount = 1;
+ xefi->xefi_owner = oinfo->oi_owner;
+
++ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
++ return -EFSCORRUPTED;
++
+ trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
+
+ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
++ return 0;
+ }
+
+ /*
+ * Add the extent to the list of extents to be free at transaction end.
+ * The list is maintained sorted (by block number).
+ */
+-void
++int
+ __xfs_free_extent_later(
+ struct xfs_trans *tp,
+ xfs_fsblock_t bno,
+@@ -2540,6 +2544,9 @@ __xfs_free_extent_later(
+ #endif
+ ASSERT(xfs_extfree_item_cache != NULL);
+
++ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
++ return -EFSCORRUPTED;
++
+ xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
+ GFP_KERNEL | __GFP_NOFAIL);
+ xefi->xefi_startblock = bno;
+@@ -2561,6 +2568,7 @@ __xfs_free_extent_later(
+ XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
+ XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
+ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
++ return 0;
+ }
+
+ #ifdef DEBUG
+@@ -2720,7 +2728,9 @@ xfs_alloc_fix_freelist(
+ goto out_agbp_relse;
+
+ /* defer agfl frees */
+- xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
++ error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
++ if (error)
++ goto out_agbp_relse;
+ }
+
+ targs.tp = tp;
+--- a/fs/xfs/libxfs/xfs_alloc.h
++++ b/fs/xfs/libxfs/xfs_alloc.h
+@@ -213,7 +213,7 @@ xfs_buf_to_agfl_bno(
+ return bp->b_addr;
+ }
+
+-void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
++int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
+ xfs_filblks_t len, const struct xfs_owner_info *oinfo,
+ bool skip_discard);
+
+@@ -233,14 +233,14 @@ struct xfs_extent_free_item {
+ #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
+ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
+
+-static inline void
++static inline int
+ xfs_free_extent_later(
+ struct xfs_trans *tp,
+ xfs_fsblock_t bno,
+ xfs_filblks_t len,
+ const struct xfs_owner_info *oinfo)
+ {
+- __xfs_free_extent_later(tp, bno, len, oinfo, false);
++ return __xfs_free_extent_later(tp, bno, len, oinfo, false);
+ }
+
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -572,8 +572,12 @@ xfs_bmap_btree_to_extents(
+ cblock = XFS_BUF_TO_BLOCK(cbp);
+ if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
+ return error;
++
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
+- xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
++ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
++ if (error)
++ return error;
++
+ ip->i_nblocks--;
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_binval(tp, cbp);
+@@ -5202,10 +5206,12 @@ xfs_bmap_del_extent_real(
+ if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
+ xfs_refcount_decrease_extent(tp, del);
+ } else {
+- __xfs_free_extent_later(tp, del->br_startblock,
++ error = __xfs_free_extent_later(tp, del->br_startblock,
+ del->br_blockcount, NULL,
+ (bflags & XFS_BMAPI_NODISCARD) ||
+ del->br_state == XFS_EXT_UNWRITTEN);
++ if (error)
++ goto done;
+ }
+ }
+
+--- a/fs/xfs/libxfs/xfs_bmap_btree.c
++++ b/fs/xfs/libxfs/xfs_bmap_btree.c
+@@ -285,11 +285,14 @@ xfs_bmbt_free_block(
+ struct xfs_trans *tp = cur->bc_tp;
+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
+ struct xfs_owner_info oinfo;
++ int error;
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
+- xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
+- ip->i_nblocks--;
++ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
++ if (error)
++ return error;
+
++ ip->i_nblocks--;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ return 0;
+--- a/fs/xfs/libxfs/xfs_ialloc.c
++++ b/fs/xfs/libxfs/xfs_ialloc.c
+@@ -1827,7 +1827,7 @@ xfs_dialloc(
+ * might be sparse and only free the regions that are allocated as part of the
+ * chunk.
+ */
+-STATIC void
++static int
+ xfs_difree_inode_chunk(
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+@@ -1844,10 +1844,10 @@ xfs_difree_inode_chunk(
+
+ if (!xfs_inobt_issparse(rec->ir_holemask)) {
+ /* not sparse, calculate extent info directly */
+- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
+- M_IGEO(mp)->ialloc_blks,
+- &XFS_RMAP_OINFO_INODES);
+- return;
++ return xfs_free_extent_later(tp,
++ XFS_AGB_TO_FSB(mp, agno, sagbno),
++ M_IGEO(mp)->ialloc_blks,
++ &XFS_RMAP_OINFO_INODES);
+ }
+
+ /* holemask is only 16-bits (fits in an unsigned long) */
+@@ -1864,6 +1864,8 @@ xfs_difree_inode_chunk(
+ XFS_INOBT_HOLEMASK_BITS);
+ nextbit = startidx + 1;
+ while (startidx < XFS_INOBT_HOLEMASK_BITS) {
++ int error;
++
+ nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
+ nextbit);
+ /*
+@@ -1889,8 +1891,11 @@ xfs_difree_inode_chunk(
+
+ ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
+ ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
+- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno),
+- contigblk, &XFS_RMAP_OINFO_INODES);
++ error = xfs_free_extent_later(tp,
++ XFS_AGB_TO_FSB(mp, agno, agbno),
++ contigblk, &XFS_RMAP_OINFO_INODES);
++ if (error)
++ return error;
+
+ /* reset range to current bit and carry on... */
+ startidx = endidx = nextbit;
+@@ -1898,6 +1903,7 @@ xfs_difree_inode_chunk(
+ next:
+ nextbit++;
+ }
++ return 0;
+ }
+
+ STATIC int
+@@ -1998,7 +2004,9 @@ xfs_difree_inobt(
+ goto error0;
+ }
+
+- xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
++ error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
++ if (error)
++ goto error0;
+ } else {
+ xic->deleted = false;
+
+--- a/fs/xfs/libxfs/xfs_refcount.c
++++ b/fs/xfs/libxfs/xfs_refcount.c
+@@ -1129,8 +1129,10 @@ xfs_refcount_adjust_extents(
+ fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
+ cur->bc_ag.pag->pag_agno,
+ tmp.rc_startblock);
+- xfs_free_extent_later(cur->bc_tp, fsbno,
++ error = xfs_free_extent_later(cur->bc_tp, fsbno,
+ tmp.rc_blockcount, NULL);
++ if (error)
++ goto out_error;
+ }
+
+ (*agbno) += tmp.rc_blockcount;
+@@ -1188,8 +1190,10 @@ xfs_refcount_adjust_extents(
+ fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
+ cur->bc_ag.pag->pag_agno,
+ ext.rc_startblock);
+- xfs_free_extent_later(cur->bc_tp, fsbno,
++ error = xfs_free_extent_later(cur->bc_tp, fsbno,
+ ext.rc_blockcount, NULL);
++ if (error)
++ goto out_error;
+ }
+
+ skip:
+@@ -1958,7 +1962,10 @@ xfs_refcount_recover_cow_leftovers(
+ rr->rr_rrec.rc_blockcount);
+
+ /* Free the block. */
+- xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL);
++ error = xfs_free_extent_later(tp, fsb,
++ rr->rr_rrec.rc_blockcount, NULL);
++ if (error)
++ goto out_trans;
+
+ error = xfs_trans_commit(tp);
+ if (error)
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -618,8 +618,10 @@ xfs_reflink_cancel_cow_blocks(
+ xfs_refcount_free_cow_extent(*tpp, del.br_startblock,
+ del.br_blockcount);
+
+- xfs_free_extent_later(*tpp, del.br_startblock,
++ error = xfs_free_extent_later(*tpp, del.br_startblock,
+ del.br_blockcount, NULL);
++ if (error)
++ break;
+
+ /* Roll the transaction */
+ error = xfs_defer_finish(tpp);