From: Greg Kroah-Hartman Date: Sun, 16 Mar 2025 06:17:04 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.6.84~38 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4a01f2d7e86b46e2baa86a84ec3fa82b7ac8a2a7;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: xfs-add-lock-protection-when-remove-perag-from-radix-tree.patch xfs-consider-minlen-sized-extents-in-xfs_rtallocate_extent_block.patch xfs-convert-rt-bitmap-extent-lengths-to-xfs_rtbxlen_t.patch xfs-don-t-allow-overly-small-or-large-realtime-volumes.patch xfs-don-t-leak-recovered-attri-intent-items.patch xfs-ensure-logflagsp-is-initialized-in-xfs_bmap_del_extent_real.patch xfs-fix-32-bit-truncation-in-xfs_compute_rextslog.patch xfs-fix-bounds-check-in-xfs_defer_agfl_block.patch xfs-fix-confusing-xfs_extent_item-variable-names.patch xfs-fix-perag-leak-when-growfs-fails.patch xfs-force-all-buffers-to-be-written-during-btree-bulk-load.patch xfs-initialise-di_crc-in-xfs_log_dinode.patch xfs-make-rextslog-computation-consistent-with-mkfs.patch xfs-move-the-xfs_rtbitmap.c-declarations-to-xfs_rtbitmap.h.patch xfs-pass-per-ag-references-to-xfs_free_extent.patch xfs-pass-refcount-intent-directly-through-the-log-intent-code.patch xfs-pass-the-xfs_bmbt_irec-directly-through-the-log-intent-code.patch xfs-pass-the-xfs_defer_pending-object-to-iop_recover.patch xfs-pass-xfs_extent_free_item-directly-through-the-log-intent-code.patch xfs-recompute-growfsrtfree-transaction-reservation-while-growing-rt-volume.patch xfs-remove-conditional-building-of-rt-geometry-validator-functions.patch xfs-remove-unused-fields-from-struct-xbtree_ifakeroot.patch xfs-reserve-less-log-space-when-recovering-log-intent-items.patch xfs-reset-xfs_attr_incomplete-filter-on-node-removal.patch xfs-transfer-recovered-intent-item-ownership-in-iop_recover.patch xfs-update-dir3-leaf-block-metadata-after-swap.patch xfs-use-deferred-frees-for-btree-block-freeing.patch xfs-use-xfs_defer_pending-objects-to-recover-intent-items.patch xfs-validate-block-number-being-freed-before-adding-to-xefi.patch --- diff --git a/queue-6.1/series b/queue-6.1/series index 0c37d02824..a7184c9329 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -75,3 +75,32 @@ mm-add-nommu-variant-of-vm_insert_pages.patch io_uring-get-rid-of-remap_pfn_range-for-mapping-rings-sqes.patch io_uring-don-t-attempt-to-mmap-larger-than-what-the-user-asks-for.patch io_uring-fix-corner-case-forgetting-to-vunmap.patch +xfs-pass-refcount-intent-directly-through-the-log-intent-code.patch +xfs-pass-xfs_extent_free_item-directly-through-the-log-intent-code.patch +xfs-fix-confusing-xfs_extent_item-variable-names.patch +xfs-pass-the-xfs_bmbt_irec-directly-through-the-log-intent-code.patch +xfs-pass-per-ag-references-to-xfs_free_extent.patch +xfs-validate-block-number-being-freed-before-adding-to-xefi.patch +xfs-fix-bounds-check-in-xfs_defer_agfl_block.patch +xfs-use-deferred-frees-for-btree-block-freeing.patch +xfs-reserve-less-log-space-when-recovering-log-intent-items.patch +xfs-move-the-xfs_rtbitmap.c-declarations-to-xfs_rtbitmap.h.patch +xfs-convert-rt-bitmap-extent-lengths-to-xfs_rtbxlen_t.patch +xfs-consider-minlen-sized-extents-in-xfs_rtallocate_extent_block.patch +xfs-don-t-leak-recovered-attri-intent-items.patch +xfs-use-xfs_defer_pending-objects-to-recover-intent-items.patch +xfs-pass-the-xfs_defer_pending-object-to-iop_recover.patch +xfs-transfer-recovered-intent-item-ownership-in-iop_recover.patch +xfs-make-rextslog-computation-consistent-with-mkfs.patch +xfs-fix-32-bit-truncation-in-xfs_compute_rextslog.patch +xfs-don-t-allow-overly-small-or-large-realtime-volumes.patch +xfs-remove-unused-fields-from-struct-xbtree_ifakeroot.patch +xfs-recompute-growfsrtfree-transaction-reservation-while-growing-rt-volume.patch +xfs-force-all-buffers-to-be-written-during-btree-bulk-load.patch +xfs-initialise-di_crc-in-xfs_log_dinode.patch +xfs-add-lock-protection-when-remove-perag-from-radix-tree.patch +xfs-fix-perag-leak-when-growfs-fails.patch +xfs-ensure-logflagsp-is-initialized-in-xfs_bmap_del_extent_real.patch +xfs-update-dir3-leaf-block-metadata-after-swap.patch +xfs-reset-xfs_attr_incomplete-filter-on-node-removal.patch +xfs-remove-conditional-building-of-rt-geometry-validator-functions.patch diff --git a/queue-6.1/xfs-add-lock-protection-when-remove-perag-from-radix-tree.patch b/queue-6.1/xfs-add-lock-protection-when-remove-perag-from-radix-tree.patch new file mode 100644 index 0000000000..229841a454 --- /dev/null +++ b/queue-6.1/xfs-add-lock-protection-when-remove-perag-from-radix-tree.patch @@ -0,0 +1,69 @@ +From stable+bounces-124381-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:45 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:44 -0700 +Subject: xfs: add lock protection when remove perag from radix tree +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Long Li , Christoph Hellwig , "Darrick J. Wong" , Chandan Babu R , Catherine Hoang , Greg Kroah-Hartman , Leah Rumancik +Message-ID: <20250313202550.2257219-25-leah.rumancik@gmail.com> + +From: Long Li + +[ Upstream commit 07afd3173d0c6d24a47441839a835955ec6cf0d4 ] + +[ 6.1: resolved conflict in xfs_ag.c ] + +Take mp->m_perag_lock for deletions from the perag radix tree in +xfs_initialize_perag to prevent racing with tagging operations. +Lookups are fine - they are RCU protected so already deal with the +tree changing shape underneath the lookup - but tagging operations +require the tree to be stable while the tags are propagated back up +to the root. + +Right now there's nothing stopping radix tree tagging from operating +while a growfs operation is progress and adding/removing new entries +into the radix tree. + +Hence we can have traversals that require a stable tree occurring at +the same time we are removing unused entries from the radix tree which +causes the shape of the tree to change. + +Likely this hasn't caused a problem in the past because we are only +doing append addition and removal so the active AG part of the tree +is not changing shape, but that doesn't mean it is safe. Just making +the radix tree modifications serialise against each other is obviously +correct. + +Signed-off-by: Long Li +Reviewed-by: Christoph Hellwig +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_ag.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/xfs/libxfs/xfs_ag.c ++++ b/fs/xfs/libxfs/xfs_ag.c +@@ -345,13 +345,17 @@ xfs_initialize_perag( + return 0; + + out_remove_pag: ++ spin_lock(&mp->m_perag_lock); + radix_tree_delete(&mp->m_perag_tree, index); ++ spin_unlock(&mp->m_perag_lock); + out_free_pag: + kmem_free(pag); + out_unwind_new_pags: + /* unwind any prior newly initialized pags */ + for (index = first_initialised; index < agcount; index++) { ++ spin_lock(&mp->m_perag_lock); + pag = radix_tree_delete(&mp->m_perag_tree, index); ++ spin_unlock(&mp->m_perag_lock); + if (!pag) + break; + xfs_buf_hash_destroy(pag); diff --git a/queue-6.1/xfs-consider-minlen-sized-extents-in-xfs_rtallocate_extent_block.patch b/queue-6.1/xfs-consider-minlen-sized-extents-in-xfs_rtallocate_extent_block.patch new file mode 100644 index 0000000000..aaeb7976e5 --- /dev/null +++ b/queue-6.1/xfs-consider-minlen-sized-extents-in-xfs_rtallocate_extent_block.patch @@ -0,0 +1,39 @@ +From stable+bounces-124369-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:27 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:32 -0700 +Subject: xfs: consider minlen sized extents in xfs_rtallocate_extent_block +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Christoph Hellwig , "Darrick J. Wong" , Chandan Babu R , Leah Rumancik +Message-ID: <20250313202550.2257219-13-leah.rumancik@gmail.com> + +From: Christoph Hellwig + +[ Upstream commit 944df75958807d56f2db9fdc769eb15dd9f0366a ] + +minlen is the lower bound on the extent length that the caller can +accept, and maxlen is at this point the maximal available length. +This means a minlen extent is perfectly fine to use, so do it. This +matches the equivalent logic in xfs_rtallocate_extent_exact that also +accepts a minlen sized extent. + +Signed-off-by: Christoph Hellwig +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_rtalloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -318,7 +318,7 @@ xfs_rtallocate_extent_block( + /* + * Searched the whole thing & didn't find a maxlen free extent. + */ +- if (minlen < maxlen && besti != -1) { ++ if (minlen <= maxlen && besti != -1) { + xfs_extlen_t p; /* amount to trim length by */ + + /* diff --git a/queue-6.1/xfs-convert-rt-bitmap-extent-lengths-to-xfs_rtbxlen_t.patch b/queue-6.1/xfs-convert-rt-bitmap-extent-lengths-to-xfs_rtbxlen_t.patch new file mode 100644 index 0000000000..32e1eac7fb --- /dev/null +++ b/queue-6.1/xfs-convert-rt-bitmap-extent-lengths-to-xfs_rtbxlen_t.patch @@ -0,0 +1,72 @@ +From stable+bounces-124368-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:27 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:31 -0700 +Subject: xfs: convert rt bitmap extent lengths to xfs_rtbxlen_t +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Catherine Hoang , Greg Kroah-Hartman , Leah Rumancik +Message-ID: <20250313202550.2257219-12-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit f29c3e745dc253bf9d9d06ddc36af1a534ba1dd0 ] + +[ 6.1: excluded changes to trace.h as xchk_rtsum_record_free +does not exist yet ] + +XFS uses xfs_rtblock_t for many different uses, which makes it much more +difficult to perform a unit analysis on the codebase. One of these +(ab)uses is when we need to store the length of a free space extent as +stored in the realtime bitmap. Because there can be up to 2^64 realtime +extents in a filesystem, we need a new type that is larger than +xfs_rtxlen_t for callers that are querying the bitmap directly. This +means scrub and growfs. + +Create this type as "xfs_rtbxlen_t" and use it to store 64-bit rtx +lengths. 'b' stands for 'bitmap' or 'big'; reader's choice. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_format.h | 2 +- + fs/xfs/libxfs/xfs_rtbitmap.h | 2 +- + fs/xfs/libxfs/xfs_types.h | 1 + + 3 files changed, 3 insertions(+), 2 deletions(-) + +--- a/fs/xfs/libxfs/xfs_format.h ++++ b/fs/xfs/libxfs/xfs_format.h +@@ -98,7 +98,7 @@ typedef struct xfs_sb { + uint32_t sb_blocksize; /* logical block size, bytes */ + xfs_rfsblock_t sb_dblocks; /* number of data blocks */ + xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ +- xfs_rtblock_t sb_rextents; /* number of realtime extents */ ++ xfs_rtbxlen_t sb_rextents; /* number of realtime extents */ + uuid_t sb_uuid; /* user-visible file system unique id */ + xfs_fsblock_t sb_logstart; /* starting block of log if internal */ + xfs_ino_t sb_rootino; /* root inode number */ +--- a/fs/xfs/libxfs/xfs_rtbitmap.h ++++ b/fs/xfs/libxfs/xfs_rtbitmap.h +@@ -13,7 +13,7 @@ + */ + struct xfs_rtalloc_rec { + xfs_rtblock_t ar_startext; +- xfs_rtblock_t ar_extcount; ++ xfs_rtbxlen_t ar_extcount; + }; + + typedef int (*xfs_rtalloc_query_range_fn)( +--- a/fs/xfs/libxfs/xfs_types.h ++++ b/fs/xfs/libxfs/xfs_types.h +@@ -31,6 +31,7 @@ typedef uint64_t xfs_rfsblock_t; /* bloc + typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ + typedef uint64_t xfs_fileoff_t; /* block number in a file */ + typedef uint64_t xfs_filblks_t; /* number of blocks in a file */ ++typedef uint64_t xfs_rtbxlen_t; /* rtbitmap extent length in rtextents */ + + typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ + diff --git a/queue-6.1/xfs-don-t-allow-overly-small-or-large-realtime-volumes.patch b/queue-6.1/xfs-don-t-allow-overly-small-or-large-realtime-volumes.patch new file mode 100644 index 0000000000..51774eeffd --- /dev/null +++ b/queue-6.1/xfs-don-t-allow-overly-small-or-large-realtime-volumes.patch @@ -0,0 +1,79 @@ +From stable+bounces-124376-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:36 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:39 -0700 +Subject: xfs: don't allow overly small or large realtime volumes +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Leah Rumancik +Message-ID: <20250313202550.2257219-20-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit e14293803f4e84eb23a417b462b56251033b5a66 ] + +Don't allow realtime volumes that are less than one rt extent long. +This has been broken across 4 LTS kernels with nobody noticing, so let's +just disable it. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_rtbitmap.h | 13 +++++++++++++ + fs/xfs/libxfs/xfs_sb.c | 3 ++- + fs/xfs/xfs_rtalloc.c | 2 ++ + 3 files changed, 17 insertions(+), 1 deletion(-) + +--- a/fs/xfs/libxfs/xfs_rtbitmap.h ++++ b/fs/xfs/libxfs/xfs_rtbitmap.h +@@ -73,6 +73,18 @@ int xfs_rtfree_blocks(struct xfs_trans * + + uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); + ++/* Do we support an rt volume having this number of rtextents? */ ++static inline bool ++xfs_validate_rtextents( ++ xfs_rtbxlen_t rtextents) ++{ ++ /* No runt rt volumes */ ++ if (rtextents == 0) ++ return false; ++ ++ return true; ++} ++ + #else /* CONFIG_XFS_RT */ + # define xfs_rtfree_extent(t,b,l) (-ENOSYS) + # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) +@@ -81,6 +93,7 @@ uint8_t xfs_compute_rextslog(xfs_rtbxlen + # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) + # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) + # define xfs_compute_rextslog(rtx) (0) ++# define xfs_validate_rtextents(rtx) (false) + #endif /* CONFIG_XFS_RT */ + + #endif /* __XFS_RTBITMAP_H__ */ +--- a/fs/xfs/libxfs/xfs_sb.c ++++ b/fs/xfs/libxfs/xfs_sb.c +@@ -502,7 +502,8 @@ xfs_validate_sb_common( + rbmblocks = howmany_64(sbp->sb_rextents, + NBBY * sbp->sb_blocksize); + +- if (sbp->sb_rextents != rexts || ++ if (!xfs_validate_rtextents(rexts) || ++ sbp->sb_rextents != rexts || + sbp->sb_rextslog != xfs_compute_rextslog(rexts) || + sbp->sb_rbmblocks != rbmblocks) { + xfs_notice(mp, +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -998,6 +998,8 @@ xfs_growfs_rt( + */ + nrextents = nrblocks; + do_div(nrextents, in->extsize); ++ if (!xfs_validate_rtextents(nrextents)) ++ return -EINVAL; + nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize); + nrextslog = xfs_compute_rextslog(nrextents); + nrsumlevels = nrextslog + 1; diff --git a/queue-6.1/xfs-don-t-leak-recovered-attri-intent-items.patch b/queue-6.1/xfs-don-t-leak-recovered-attri-intent-items.patch new file mode 100644 index 0000000000..4de81623d3 --- /dev/null +++ b/queue-6.1/xfs-don-t-leak-recovered-attri-intent-items.patch @@ -0,0 +1,62 @@ +From stable+bounces-124370-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:28 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:33 -0700 +Subject: xfs: don't leak recovered attri intent items +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Leah Rumancik +Message-ID: <20250313202550.2257219-14-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 07bcbdf020c9fd3c14bec51c50225a2a02707b94 ] + +If recovery finds an xattr log intent item calling for the removal of an +attribute and the file doesn't even have an attr fork, we know that the +removal is trivially complete. However, we can't just exit the recovery +function without doing something about the recovered log intent item -- +it's still on the AIL, and not logging an attrd item means it stays +there forever. + +This has likely not been seen in practice because few people use LARP +and the runtime code won't log the attri for a no-attrfork removexattr +operation. But let's fix this anyway. + +Also we shouldn't really be testing the attr fork presence until we've +taken the ILOCK, though this doesn't matter much in recovery, which is +single threaded. + +Fixes: fdaf1bb3cafc ("xfs: ATTR_REPLACE algorithm with LARP enabled needs rework") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_attr_item.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/fs/xfs/xfs_attr_item.c ++++ b/fs/xfs/xfs_attr_item.c +@@ -329,6 +329,13 @@ xfs_xattri_finish_update( + goto out; + } + ++ /* If an attr removal is trivially complete, we're done. */ ++ if (attr->xattri_op_flags == XFS_ATTRI_OP_FLAGS_REMOVE && ++ !xfs_inode_hasattr(args->dp)) { ++ error = 0; ++ goto out; ++ } ++ + error = xfs_attr_set_iter(attr); + if (!error && attr->xattri_dela_state != XFS_DAS_DONE) + error = -EAGAIN; +@@ -608,8 +615,6 @@ xfs_attri_item_recover( + attr->xattri_dela_state = xfs_attr_init_add_state(args); + break; + case XFS_ATTRI_OP_FLAGS_REMOVE: +- if (!xfs_inode_hasattr(args->dp)) +- goto out; + attr->xattri_dela_state = xfs_attr_init_remove_state(args); + break; + default: diff --git a/queue-6.1/xfs-ensure-logflagsp-is-initialized-in-xfs_bmap_del_extent_real.patch b/queue-6.1/xfs-ensure-logflagsp-is-initialized-in-xfs_bmap_del_extent_real.patch new file mode 100644 index 0000000000..01d804bc0c --- /dev/null +++ b/queue-6.1/xfs-ensure-logflagsp-is-initialized-in-xfs_bmap_del_extent_real.patch @@ -0,0 +1,229 @@ +From stable+bounces-124383-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:47 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:46 -0700 +Subject: xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Jiachen Zhang , Christoph Hellwig , "Darrick J. Wong" , Chandan Babu R , Leah Rumancik +Message-ID: <20250313202550.2257219-27-leah.rumancik@gmail.com> + +From: Jiachen Zhang + +[ Upstream commit e6af9c98cbf0164a619d95572136bfb54d482dd6 ] + +In the case of returning -ENOSPC, ensure logflagsp is initialized by 0. +Otherwise the caller __xfs_bunmapi will set uninitialized illegal +tmp_logflags value into xfs log, which might cause unpredictable error +in the log recovery procedure. + +Also, remove the flags variable and set the *logflagsp directly, so that +the code should be more robust in the long run. + +Fixes: 1b24b633aafe ("xfs: move some more code into xfs_bmap_del_extent_real") +Signed-off-by: Jiachen Zhang +Reviewed-by: Christoph Hellwig +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_bmap.c | 73 +++++++++++++++++++---------------------------- + 1 file changed, 31 insertions(+), 42 deletions(-) + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -4999,7 +4999,6 @@ xfs_bmap_del_extent_real( + xfs_fileoff_t del_endoff; /* first offset past del */ + int do_fx; /* free extent at end of routine */ + int error; /* error return value */ +- int flags = 0;/* inode logging flags */ + struct xfs_bmbt_irec got; /* current extent entry */ + xfs_fileoff_t got_endoff; /* first offset past got */ + int i; /* temp state */ +@@ -5012,6 +5011,8 @@ xfs_bmap_del_extent_real( + uint32_t state = xfs_bmap_fork_to_state(whichfork); + struct xfs_bmbt_irec old; + ++ *logflagsp = 0; ++ + mp = ip->i_mount; + XFS_STATS_INC(mp, xs_del_exlist); + +@@ -5024,7 +5025,6 @@ xfs_bmap_del_extent_real( + ASSERT(got_endoff >= del_endoff); + ASSERT(!isnullstartblock(got.br_startblock)); + qfield = 0; +- error = 0; + + /* + * If it's the case where the directory code is running with no block +@@ -5040,13 +5040,13 @@ xfs_bmap_del_extent_real( + del->br_startoff > got.br_startoff && del_endoff < got_endoff) + return -ENOSPC; + +- flags = XFS_ILOG_CORE; ++ *logflagsp = XFS_ILOG_CORE; + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { + if (!(bflags & XFS_BMAPI_REMAP)) { + error = xfs_rtfree_blocks(tp, del->br_startblock, + del->br_blockcount); + if (error) +- goto done; ++ return error; + } + + do_fx = 0; +@@ -5061,11 +5061,9 @@ xfs_bmap_del_extent_real( + if (cur) { + error = xfs_bmbt_lookup_eq(cur, &got, &i); + if (error) +- goto done; +- if (XFS_IS_CORRUPT(mp, i != 1)) { +- error = -EFSCORRUPTED; +- goto done; +- } ++ return error; ++ if (XFS_IS_CORRUPT(mp, i != 1)) ++ return -EFSCORRUPTED; + } + + if (got.br_startoff == del->br_startoff) +@@ -5082,17 +5080,15 @@ xfs_bmap_del_extent_real( + xfs_iext_prev(ifp, icur); + ifp->if_nextents--; + +- flags |= XFS_ILOG_CORE; ++ *logflagsp |= XFS_ILOG_CORE; + if (!cur) { +- flags |= xfs_ilog_fext(whichfork); ++ *logflagsp |= xfs_ilog_fext(whichfork); + break; + } + if ((error = xfs_btree_delete(cur, &i))) +- goto done; +- if (XFS_IS_CORRUPT(mp, i != 1)) { +- error = -EFSCORRUPTED; +- goto done; +- } ++ return error; ++ if (XFS_IS_CORRUPT(mp, i != 1)) ++ return -EFSCORRUPTED; + break; + case BMAP_LEFT_FILLING: + /* +@@ -5103,12 +5099,12 @@ xfs_bmap_del_extent_real( + got.br_blockcount -= del->br_blockcount; + xfs_iext_update_extent(ip, state, icur, &got); + if (!cur) { +- flags |= xfs_ilog_fext(whichfork); ++ *logflagsp |= xfs_ilog_fext(whichfork); + break; + } + error = xfs_bmbt_update(cur, &got); + if (error) +- goto done; ++ return error; + break; + case BMAP_RIGHT_FILLING: + /* +@@ -5117,12 +5113,12 @@ xfs_bmap_del_extent_real( + got.br_blockcount -= del->br_blockcount; + xfs_iext_update_extent(ip, state, icur, &got); + if (!cur) { +- flags |= xfs_ilog_fext(whichfork); ++ *logflagsp |= xfs_ilog_fext(whichfork); + break; + } + error = xfs_bmbt_update(cur, &got); + if (error) +- goto done; ++ return error; + break; + case 0: + /* +@@ -5139,18 +5135,18 @@ xfs_bmap_del_extent_real( + new.br_state = got.br_state; + new.br_startblock = del_endblock; + +- flags |= XFS_ILOG_CORE; ++ *logflagsp |= XFS_ILOG_CORE; + if (cur) { + error = xfs_bmbt_update(cur, &got); + if (error) +- goto done; ++ return error; + error = xfs_btree_increment(cur, 0, &i); + if (error) +- goto done; ++ return error; + cur->bc_rec.b = new; + error = xfs_btree_insert(cur, &i); + if (error && error != -ENOSPC) +- goto done; ++ return error; + /* + * If get no-space back from btree insert, it tried a + * split, and we have a zero block reservation. Fix up +@@ -5163,33 +5159,28 @@ xfs_bmap_del_extent_real( + */ + error = xfs_bmbt_lookup_eq(cur, &got, &i); + if (error) +- goto done; +- if (XFS_IS_CORRUPT(mp, i != 1)) { +- error = -EFSCORRUPTED; +- goto done; +- } ++ return error; ++ if (XFS_IS_CORRUPT(mp, i != 1)) ++ return -EFSCORRUPTED; + /* + * Update the btree record back + * to the original value. + */ + error = xfs_bmbt_update(cur, &old); + if (error) +- goto done; ++ return error; + /* + * Reset the extent record back + * to the original value. + */ + xfs_iext_update_extent(ip, state, icur, &old); +- flags = 0; +- error = -ENOSPC; +- goto done; +- } +- if (XFS_IS_CORRUPT(mp, i != 1)) { +- error = -EFSCORRUPTED; +- goto done; ++ *logflagsp = 0; ++ return -ENOSPC; + } ++ if (XFS_IS_CORRUPT(mp, i != 1)) ++ return -EFSCORRUPTED; + } else +- flags |= xfs_ilog_fext(whichfork); ++ *logflagsp |= xfs_ilog_fext(whichfork); + + ifp->if_nextents++; + xfs_iext_next(ifp, icur); +@@ -5213,7 +5204,7 @@ xfs_bmap_del_extent_real( + ((bflags & XFS_BMAPI_NODISCARD) || + del->br_state == XFS_EXT_UNWRITTEN)); + if (error) +- goto done; ++ return error; + } + } + +@@ -5228,9 +5219,7 @@ xfs_bmap_del_extent_real( + if (qfield && !(bflags & XFS_BMAPI_REMAP)) + xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); + +-done: +- *logflagsp = flags; +- return error; ++ return 0; + } + + /* diff --git a/queue-6.1/xfs-fix-32-bit-truncation-in-xfs_compute_rextslog.patch b/queue-6.1/xfs-fix-32-bit-truncation-in-xfs_compute_rextslog.patch new file mode 100644 index 0000000000..3375421e65 --- /dev/null +++ b/queue-6.1/xfs-fix-32-bit-truncation-in-xfs_compute_rextslog.patch @@ -0,0 +1,49 @@ +From stable+bounces-124375-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:35 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:38 -0700 +Subject: xfs: fix 32-bit truncation in xfs_compute_rextslog +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Leah Rumancik +Message-ID: <20250313202550.2257219-19-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit cf8f0e6c1429be7652869059ea44696b72d5b726 ] + +It's quite reasonable that some customer somewhere will want to +configure a realtime volume with more than 2^32 extents. If they try to +do this, the highbit32() call will truncate the upper bits of the +xfs_rtbxlen_t and produce the wrong value for rextslog. This in turn +causes the rsumlevels to be wrong, which results in a realtime summary +file that is the wrong length. Fix that. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_rtbitmap.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/fs/xfs/libxfs/xfs_rtbitmap.c ++++ b/fs/xfs/libxfs/xfs_rtbitmap.c +@@ -1133,13 +1133,15 @@ xfs_rtalloc_extent_is_free( + + /* + * Compute the maximum level number of the realtime summary file, as defined by +- * mkfs. The use of highbit32 on a 64-bit quantity is a historic artifact that +- * prohibits correct use of rt volumes with more than 2^32 extents. ++ * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct ++ * use of rt volumes with more than 2^32 extents. + */ + uint8_t + xfs_compute_rextslog( + xfs_rtbxlen_t rtextents) + { +- return rtextents ? xfs_highbit32(rtextents) : 0; ++ if (!rtextents) ++ return 0; ++ return xfs_highbit64(rtextents); + } + diff --git a/queue-6.1/xfs-fix-bounds-check-in-xfs_defer_agfl_block.patch b/queue-6.1/xfs-fix-bounds-check-in-xfs_defer_agfl_block.patch new file mode 100644 index 0000000000..bd567b271e --- /dev/null +++ b/queue-6.1/xfs-fix-bounds-check-in-xfs_defer_agfl_block.patch @@ -0,0 +1,62 @@ +From stable+bounces-124364-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:20 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:27 -0700 +Subject: xfs: fix bounds check in xfs_defer_agfl_block() +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Dave Chinner , Christoph Hellwig , "Darrick J. Wong" , Leah Rumancik +Message-ID: <20250313202550.2257219-8-leah.rumancik@gmail.com> + +From: Dave Chinner + +[ Upstream commit 2bed0d82c2f78b91a0a9a5a73da57ee883a0c070 ] + +Need to happen before we allocate and then leak the xefi. Found by +coverity via an xfsprogs libxfs scan. + +[djwong: This also fixes the type of the @agbno argument.] + +Fixes: 7dfee17b13e5 ("xfs: validate block number being freed before adding to xefi") +Signed-off-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_alloc.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -2489,24 +2489,25 @@ static int + xfs_defer_agfl_block( + struct xfs_trans *tp, + xfs_agnumber_t agno, +- xfs_fsblock_t agbno, ++ xfs_agblock_t agbno, + struct xfs_owner_info *oinfo) + { + struct xfs_mount *mp = tp->t_mountp; + struct xfs_extent_free_item *xefi; ++ xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno); + + ASSERT(xfs_extfree_item_cache != NULL); + ASSERT(oinfo != NULL); + ++ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno))) ++ return -EFSCORRUPTED; ++ + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, + GFP_KERNEL | __GFP_NOFAIL); +- xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); ++ xefi->xefi_startblock = fsbno; + xefi->xefi_blockcount = 1; + xefi->xefi_owner = oinfo->oi_owner; + +- if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock))) +- return -EFSCORRUPTED; +- + trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); + + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); diff --git a/queue-6.1/xfs-fix-confusing-xfs_extent_item-variable-names.patch b/queue-6.1/xfs-fix-confusing-xfs_extent_item-variable-names.patch new file mode 100644 index 0000000000..3c3e10fd0a --- /dev/null +++ b/queue-6.1/xfs-fix-confusing-xfs_extent_item-variable-names.patch @@ -0,0 +1,269 @@ +From stable+bounces-124360-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:15 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:23 -0700 +Subject: xfs: fix confusing xfs_extent_item variable names +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Leah Rumancik +Message-ID: <20250313202550.2257219-4-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 578c714b215d474c52949e65a914dae67924f0fe ] + +Change the name of all pointers to xfs_extent_item structures to "xefi" +to make the name consistent and because the current selections ("new" +and "free") mean other things in C. + +Signed-off-by: Darrick J. Wong +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_alloc.c | 32 ++++++++++----------- + fs/xfs/xfs_extfree_item.c | 70 +++++++++++++++++++++++----------------------- + 2 files changed, 51 insertions(+), 51 deletions(-) + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -2493,20 +2493,20 @@ xfs_defer_agfl_block( + struct xfs_owner_info *oinfo) + { + struct xfs_mount *mp = tp->t_mountp; +- struct xfs_extent_free_item *new; /* new element */ ++ struct xfs_extent_free_item *xefi; + + ASSERT(xfs_extfree_item_cache != NULL); + ASSERT(oinfo != NULL); + +- new = kmem_cache_zalloc(xfs_extfree_item_cache, ++ xefi = kmem_cache_zalloc(xfs_extfree_item_cache, + GFP_KERNEL | __GFP_NOFAIL); +- new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); +- new->xefi_blockcount = 1; +- new->xefi_owner = oinfo->oi_owner; ++ xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); ++ xefi->xefi_blockcount = 1; ++ xefi->xefi_owner = oinfo->oi_owner; + + trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); + +- xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); ++ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); + } + + /* +@@ -2521,7 +2521,7 @@ __xfs_free_extent_later( + const struct xfs_owner_info *oinfo, + bool skip_discard) + { +- struct xfs_extent_free_item *new; /* new element */ ++ struct xfs_extent_free_item *xefi; + #ifdef DEBUG + struct xfs_mount *mp = tp->t_mountp; + xfs_agnumber_t agno; +@@ -2540,27 +2540,27 @@ __xfs_free_extent_later( + #endif + ASSERT(xfs_extfree_item_cache != NULL); + +- new = kmem_cache_zalloc(xfs_extfree_item_cache, ++ xefi = kmem_cache_zalloc(xfs_extfree_item_cache, + GFP_KERNEL | __GFP_NOFAIL); +- new->xefi_startblock = bno; +- new->xefi_blockcount = (xfs_extlen_t)len; ++ xefi->xefi_startblock = bno; ++ xefi->xefi_blockcount = (xfs_extlen_t)len; + if (skip_discard) +- new->xefi_flags |= XFS_EFI_SKIP_DISCARD; ++ xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; + if (oinfo) { + ASSERT(oinfo->oi_offset == 0); + + if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) +- new->xefi_flags |= XFS_EFI_ATTR_FORK; ++ xefi->xefi_flags |= XFS_EFI_ATTR_FORK; + if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) +- new->xefi_flags |= XFS_EFI_BMBT_BLOCK; +- new->xefi_owner = oinfo->oi_owner; ++ xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK; ++ xefi->xefi_owner = oinfo->oi_owner; + } else { +- new->xefi_owner = XFS_RMAP_OWN_NULL; ++ xefi->xefi_owner = XFS_RMAP_OWN_NULL; + } + trace_xfs_bmap_free_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, + XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); +- xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); ++ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); + } + + #ifdef DEBUG +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -345,30 +345,30 @@ static int + xfs_trans_free_extent( + struct xfs_trans *tp, + struct xfs_efd_log_item *efdp, +- struct xfs_extent_free_item *free) ++ struct xfs_extent_free_item *xefi) + { + struct xfs_owner_info oinfo = { }; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_extent *extp; + uint next_extent; + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, +- free->xefi_startblock); ++ xefi->xefi_startblock); + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, +- free->xefi_startblock); ++ xefi->xefi_startblock); + int error; + +- oinfo.oi_owner = free->xefi_owner; +- if (free->xefi_flags & XFS_EFI_ATTR_FORK) ++ oinfo.oi_owner = xefi->xefi_owner; ++ if (xefi->xefi_flags & XFS_EFI_ATTR_FORK) + oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; +- if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) ++ if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK) + oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; + + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, +- free->xefi_blockcount); ++ xefi->xefi_blockcount); + +- error = __xfs_free_extent(tp, free->xefi_startblock, +- free->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, +- free->xefi_flags & XFS_EFI_SKIP_DISCARD); ++ error = __xfs_free_extent(tp, xefi->xefi_startblock, ++ xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, ++ xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: +@@ -382,8 +382,8 @@ xfs_trans_free_extent( + next_extent = efdp->efd_next_extent; + ASSERT(next_extent < efdp->efd_format.efd_nextents); + extp = &(efdp->efd_format.efd_extents[next_extent]); +- extp->ext_start = free->xefi_startblock; +- extp->ext_len = free->xefi_blockcount; ++ extp->ext_start = xefi->xefi_startblock; ++ extp->ext_len = xefi->xefi_blockcount; + efdp->efd_next_extent++; + + return error; +@@ -411,7 +411,7 @@ STATIC void + xfs_extent_free_log_item( + struct xfs_trans *tp, + struct xfs_efi_log_item *efip, +- struct xfs_extent_free_item *free) ++ struct xfs_extent_free_item *xefi) + { + uint next_extent; + struct xfs_extent *extp; +@@ -427,8 +427,8 @@ xfs_extent_free_log_item( + next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; + ASSERT(next_extent < efip->efi_format.efi_nextents); + extp = &efip->efi_format.efi_extents[next_extent]; +- extp->ext_start = free->xefi_startblock; +- extp->ext_len = free->xefi_blockcount; ++ extp->ext_start = xefi->xefi_startblock; ++ extp->ext_len = xefi->xefi_blockcount; + } + + static struct xfs_log_item * +@@ -440,15 +440,15 @@ xfs_extent_free_create_intent( + { + struct xfs_mount *mp = tp->t_mountp; + struct xfs_efi_log_item *efip = xfs_efi_init(mp, count); +- struct xfs_extent_free_item *free; ++ struct xfs_extent_free_item *xefi; + + ASSERT(count > 0); + + xfs_trans_add_item(tp, &efip->efi_item); + if (sort) + list_sort(mp, items, xfs_extent_free_diff_items); +- list_for_each_entry(free, items, xefi_list) +- xfs_extent_free_log_item(tp, efip, free); ++ list_for_each_entry(xefi, items, xefi_list) ++ xfs_extent_free_log_item(tp, efip, xefi); + return &efip->efi_item; + } + +@@ -470,13 +470,13 @@ xfs_extent_free_finish_item( + struct list_head *item, + struct xfs_btree_cur **state) + { +- struct xfs_extent_free_item *free; ++ struct xfs_extent_free_item *xefi; + int error; + +- free = container_of(item, struct xfs_extent_free_item, xefi_list); ++ xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + +- error = xfs_trans_free_extent(tp, EFD_ITEM(done), free); +- kmem_cache_free(xfs_extfree_item_cache, free); ++ error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); ++ kmem_cache_free(xfs_extfree_item_cache, xefi); + return error; + } + +@@ -493,10 +493,10 @@ STATIC void + xfs_extent_free_cancel_item( + struct list_head *item) + { +- struct xfs_extent_free_item *free; ++ struct xfs_extent_free_item *xefi; + +- free = container_of(item, struct xfs_extent_free_item, xefi_list); +- kmem_cache_free(xfs_extfree_item_cache, free); ++ xefi = container_of(item, struct xfs_extent_free_item, xefi_list); ++ kmem_cache_free(xfs_extfree_item_cache, xefi); + } + + const struct xfs_defer_op_type xfs_extent_free_defer_type = { +@@ -522,7 +522,7 @@ xfs_agfl_free_finish_item( + struct xfs_owner_info oinfo = { }; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_efd_log_item *efdp = EFD_ITEM(done); +- struct xfs_extent_free_item *free; ++ struct xfs_extent_free_item *xefi; + struct xfs_extent *extp; + struct xfs_buf *agbp; + int error; +@@ -531,13 +531,13 @@ xfs_agfl_free_finish_item( + uint next_extent; + struct xfs_perag *pag; + +- free = container_of(item, struct xfs_extent_free_item, xefi_list); +- ASSERT(free->xefi_blockcount == 1); +- agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); +- agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); +- oinfo.oi_owner = free->xefi_owner; ++ xefi = container_of(item, struct xfs_extent_free_item, xefi_list); ++ ASSERT(xefi->xefi_blockcount == 1); ++ agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock); ++ agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); ++ oinfo.oi_owner = xefi->xefi_owner; + +- trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); ++ trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, xefi->xefi_blockcount); + + pag = xfs_perag_get(mp, agno); + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); +@@ -558,11 +558,11 @@ xfs_agfl_free_finish_item( + next_extent = efdp->efd_next_extent; + ASSERT(next_extent < efdp->efd_format.efd_nextents); + extp = &(efdp->efd_format.efd_extents[next_extent]); +- extp->ext_start = free->xefi_startblock; +- extp->ext_len = free->xefi_blockcount; ++ extp->ext_start = xefi->xefi_startblock; ++ extp->ext_len = xefi->xefi_blockcount; + efdp->efd_next_extent++; + +- kmem_cache_free(xfs_extfree_item_cache, free); ++ kmem_cache_free(xfs_extfree_item_cache, xefi); + return error; + } + diff --git a/queue-6.1/xfs-fix-perag-leak-when-growfs-fails.patch b/queue-6.1/xfs-fix-perag-leak-when-growfs-fails.patch new file mode 100644 index 0000000000..d7bba51e58 --- /dev/null +++ b/queue-6.1/xfs-fix-perag-leak-when-growfs-fails.patch @@ -0,0 +1,154 @@ +From stable+bounces-124382-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:44 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:45 -0700 +Subject: xfs: fix perag leak when growfs fails +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Long Li , "Darrick J. Wong" , Chandan Babu R , Catherine Hoang , Greg Kroah-Hartman , Leah Rumancik +Message-ID: <20250313202550.2257219-26-leah.rumancik@gmail.com> + +From: Long Li + +[ Upstream commit 7823921887750b39d02e6b44faafdd1cc617c651 ] + +[ 6.1: resolved conflicts in xfs_ag.c and xfs_ag.h ] + +During growfs, if new ag in memory has been initialized, however +sb_agcount has not been updated, if an error occurs at this time it +will cause perag leaks as follows, these new AGs will not been freed +during umount , because of these new AGs are not visible(that is +included in mp->m_sb.sb_agcount). + +unreferenced object 0xffff88810be40200 (size 512): + comm "xfs_growfs", pid 857, jiffies 4294909093 + hex dump (first 32 bytes): + 00 c0 c1 05 81 88 ff ff 04 00 00 00 00 00 00 00 ................ + 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace (crc 381741e2): + [] __kmalloc+0x386/0x4f0 + [] kmem_alloc+0xb5/0x2f0 + [] xfs_initialize_perag+0xc5/0x810 + [] xfs_growfs_data+0x9bc/0xbc0 + [] xfs_file_ioctl+0x5fe/0x14d0 + [] __x64_sys_ioctl+0x144/0x1c0 + [] do_syscall_64+0x3f/0xe0 + [] entry_SYSCALL_64_after_hwframe+0x62/0x6a +unreferenced object 0xffff88810be40800 (size 512): + comm "xfs_growfs", pid 857, jiffies 4294909093 + hex dump (first 32 bytes): + 20 00 00 00 00 00 00 00 57 ef be dc 00 00 00 00 .......W....... + 10 08 e4 0b 81 88 ff ff 10 08 e4 0b 81 88 ff ff ................ + backtrace (crc bde50e2d): + [] __kmalloc_node+0x3da/0x540 + [] kvmalloc_node+0x99/0x160 + [] bucket_table_alloc.isra.0+0x5f/0x400 + [] rhashtable_init+0x405/0x760 + [] xfs_initialize_perag+0x3a3/0x810 + [] xfs_growfs_data+0x9bc/0xbc0 + [] xfs_file_ioctl+0x5fe/0x14d0 + [] __x64_sys_ioctl+0x144/0x1c0 + [] do_syscall_64+0x3f/0xe0 + [] entry_SYSCALL_64_after_hwframe+0x62/0x6a + +Factor out xfs_free_unused_perag_range() from xfs_initialize_perag(), +used for freeing unused perag within a specified range in error handling, +included in the error path of the growfs failure. + +Fixes: 1c1c6ebcf528 ("xfs: Replace per-ag array with a radix tree") +Signed-off-by: Long Li +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_ag.c | 34 +++++++++++++++++++++++++--------- + fs/xfs/libxfs/xfs_ag.h | 3 +++ + fs/xfs/xfs_fsops.c | 5 ++++- + 3 files changed, 32 insertions(+), 10 deletions(-) + +--- a/fs/xfs/libxfs/xfs_ag.c ++++ b/fs/xfs/libxfs/xfs_ag.c +@@ -259,6 +259,30 @@ xfs_agino_range( + return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); + } + ++/* ++ * Free perag within the specified AG range, it is only used to free unused ++ * perags under the error handling path. ++ */ ++void ++xfs_free_unused_perag_range( ++ struct xfs_mount *mp, ++ xfs_agnumber_t agstart, ++ xfs_agnumber_t agend) ++{ ++ struct xfs_perag *pag; ++ xfs_agnumber_t index; ++ ++ for (index = agstart; index < agend; index++) { ++ spin_lock(&mp->m_perag_lock); ++ pag = radix_tree_delete(&mp->m_perag_tree, index); ++ spin_unlock(&mp->m_perag_lock); ++ if (!pag) ++ break; ++ xfs_buf_hash_destroy(pag); ++ kmem_free(pag); ++ } ++} ++ + int + xfs_initialize_perag( + struct xfs_mount *mp, +@@ -352,15 +376,7 @@ out_free_pag: + kmem_free(pag); + out_unwind_new_pags: + /* unwind any prior newly initialized pags */ +- for (index = first_initialised; index < agcount; index++) { +- spin_lock(&mp->m_perag_lock); +- pag = radix_tree_delete(&mp->m_perag_tree, index); +- spin_unlock(&mp->m_perag_lock); +- if (!pag) +- break; +- xfs_buf_hash_destroy(pag); +- kmem_free(pag); +- } ++ xfs_free_unused_perag_range(mp, first_initialised, agcount); + return error; + } + +--- a/fs/xfs/libxfs/xfs_ag.h ++++ b/fs/xfs/libxfs/xfs_ag.h +@@ -106,6 +106,9 @@ struct xfs_perag { + #endif /* __KERNEL__ */ + }; + ++ ++void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart, ++ xfs_agnumber_t agend); + int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, + xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); + int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); +--- a/fs/xfs/xfs_fsops.c ++++ b/fs/xfs/xfs_fsops.c +@@ -153,7 +153,7 @@ xfs_growfs_data_private( + (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, + XFS_TRANS_RESERVE, &tp); + if (error) +- return error; ++ goto out_free_unused_perag; + + last_pag = xfs_perag_get(mp, oagcount - 1); + if (delta > 0) { +@@ -227,6 +227,9 @@ xfs_growfs_data_private( + + out_trans_cancel: + xfs_trans_cancel(tp); ++out_free_unused_perag: ++ if (nagcount > oagcount) ++ xfs_free_unused_perag_range(mp, oagcount, nagcount); + return error; + } + diff --git a/queue-6.1/xfs-force-all-buffers-to-be-written-during-btree-bulk-load.patch b/queue-6.1/xfs-force-all-buffers-to-be-written-during-btree-bulk-load.patch new file mode 100644 index 0000000000..9f3065edbf --- /dev/null +++ b/queue-6.1/xfs-force-all-buffers-to-be-written-during-btree-bulk-load.patch @@ -0,0 +1,220 @@ +From stable+bounces-124380-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:40 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:42 -0700 +Subject: xfs: force all buffers to be written during btree bulk load +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Leah Rumancik +Message-ID: <20250313202550.2257219-23-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 13ae04d8d45227c2ba51e188daf9fc13d08a1b12 ] + +While stress-testing online repair of btrees, I noticed periodic +assertion failures from the buffer cache about buffers with incorrect +DELWRI_Q state. Looking further, I observed this race between the AIL +trying to write out a btree block and repair zapping a btree block after +the fact: + +AIL: Repair0: + +pin buffer X +delwri_queue: +set DELWRI_Q +add to delwri list + + stale buf X: + clear DELWRI_Q + does not clear b_list + free space X + commit + +delwri_submit # oops + +Worse yet, I discovered that running the same repair over and over in a +tight loop can result in a second race that cause data integrity +problems with the repair: + +AIL: Repair0: Repair1: + +pin buffer X +delwri_queue: +set DELWRI_Q +add to delwri list + + stale buf X: + clear DELWRI_Q + does not clear b_list + free space X + commit + + find free space X + get buffer + rewrite buffer + delwri_queue: + set DELWRI_Q + already on a list, do not add + commit + + BAD: committed tree root before all blocks written + +delwri_submit # too late now + +I traced this to my own misunderstanding of how the delwri lists work, +particularly with regards to the AIL's buffer list. If a buffer is +logged and committed, the buffer can end up on that AIL buffer list. If +btree repairs are run twice in rapid succession, it's possible that the +first repair will invalidate the buffer and free it before the next time +the AIL wakes up. Marking the buffer stale clears DELWRI_Q from the +buffer state without removing the buffer from its delwri list. The +buffer doesn't know which list it's on, so it cannot know which lock to +take to protect the list for a removal. + +If the second repair allocates the same block, it will then recycle the +buffer to start writing the new btree block. Meanwhile, if the AIL +wakes up and walks the buffer list, it will ignore the buffer because it +can't lock it, and go back to sleep. + +When the second repair calls delwri_queue to put the buffer on the +list of buffers to write before committing the new btree, it will set +DELWRI_Q again, but since the buffer hasn't been removed from the AIL's +buffer list, it won't add it to the bulkload buffer's list. + +This is incorrect, because the bulkload caller relies on delwri_submit +to ensure that all the buffers have been sent to disk /before/ +committing the new btree root pointer. This ordering requirement is +required for data consistency. + +Worse, the AIL won't clear DELWRI_Q from the buffer when it does finally +drop it, so the next thread to walk through the btree will trip over a +debug assertion on that flag. + +To fix this, create a new function that waits for the buffer to be +removed from any other delwri lists before adding the buffer to the +caller's delwri list. By waiting for the buffer to clear both the +delwri list and any potential delwri wait list, we can be sure that +repair will initiate writes of all buffers and report all write errors +back to userspace instead of committing the new structure. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_btree_staging.c | 4 --- + fs/xfs/xfs_buf.c | 44 ++++++++++++++++++++++++++++++++++---- + fs/xfs/xfs_buf.h | 1 + 3 files changed, 42 insertions(+), 7 deletions(-) + +--- a/fs/xfs/libxfs/xfs_btree_staging.c ++++ b/fs/xfs/libxfs/xfs_btree_staging.c +@@ -342,9 +342,7 @@ xfs_btree_bload_drop_buf( + if (*bpp == NULL) + return; + +- if (!xfs_buf_delwri_queue(*bpp, buffers_list)) +- ASSERT(0); +- ++ xfs_buf_delwri_queue_here(*bpp, buffers_list); + xfs_buf_relse(*bpp); + *bpp = NULL; + } +--- a/fs/xfs/xfs_buf.c ++++ b/fs/xfs/xfs_buf.c +@@ -2040,6 +2040,14 @@ error_free: + return NULL; + } + ++static inline void ++xfs_buf_list_del( ++ struct xfs_buf *bp) ++{ ++ list_del_init(&bp->b_list); ++ wake_up_var(&bp->b_list); ++} ++ + /* + * Cancel a delayed write list. + * +@@ -2057,7 +2065,7 @@ xfs_buf_delwri_cancel( + + xfs_buf_lock(bp); + bp->b_flags &= ~_XBF_DELWRI_Q; +- list_del_init(&bp->b_list); ++ xfs_buf_list_del(bp); + xfs_buf_relse(bp); + } + } +@@ -2111,6 +2119,34 @@ xfs_buf_delwri_queue( + } + + /* ++ * Queue a buffer to this delwri list as part of a data integrity operation. ++ * If the buffer is on any other delwri list, we'll wait for that to clear ++ * so that the caller can submit the buffer for IO and wait for the result. ++ * Callers must ensure the buffer is not already on the list. ++ */ ++void ++xfs_buf_delwri_queue_here( ++ struct xfs_buf *bp, ++ struct list_head *buffer_list) ++{ ++ /* ++ * We need this buffer to end up on the /caller's/ delwri list, not any ++ * old list. This can happen if the buffer is marked stale (which ++ * clears DELWRI_Q) after the AIL queues the buffer to its list but ++ * before the AIL has a chance to submit the list. ++ */ ++ while (!list_empty(&bp->b_list)) { ++ xfs_buf_unlock(bp); ++ wait_var_event(&bp->b_list, list_empty(&bp->b_list)); ++ xfs_buf_lock(bp); ++ } ++ ++ ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); ++ ++ xfs_buf_delwri_queue(bp, buffer_list); ++} ++ ++/* + * Compare function is more complex than it needs to be because + * the return value is only 32 bits and we are doing comparisons + * on 64 bit values +@@ -2172,7 +2208,7 @@ xfs_buf_delwri_submit_buffers( + * reference and remove it from the list here. + */ + if (!(bp->b_flags & _XBF_DELWRI_Q)) { +- list_del_init(&bp->b_list); ++ xfs_buf_list_del(bp); + xfs_buf_relse(bp); + continue; + } +@@ -2192,7 +2228,7 @@ xfs_buf_delwri_submit_buffers( + list_move_tail(&bp->b_list, wait_list); + } else { + bp->b_flags |= XBF_ASYNC; +- list_del_init(&bp->b_list); ++ xfs_buf_list_del(bp); + } + __xfs_buf_submit(bp, false); + } +@@ -2246,7 +2282,7 @@ xfs_buf_delwri_submit( + while (!list_empty(&wait_list)) { + bp = list_first_entry(&wait_list, struct xfs_buf, b_list); + +- list_del_init(&bp->b_list); ++ xfs_buf_list_del(bp); + + /* + * Wait on the locked buffer, check for errors and unlock and +--- a/fs/xfs/xfs_buf.h ++++ b/fs/xfs/xfs_buf.h +@@ -305,6 +305,7 @@ extern void xfs_buf_stale(struct xfs_buf + /* Delayed Write Buffer Routines */ + extern void xfs_buf_delwri_cancel(struct list_head *); + extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); ++void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); + extern int xfs_buf_delwri_submit(struct list_head *); + extern int xfs_buf_delwri_submit_nowait(struct list_head *); + extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); diff --git a/queue-6.1/xfs-initialise-di_crc-in-xfs_log_dinode.patch b/queue-6.1/xfs-initialise-di_crc-in-xfs_log_dinode.patch new file mode 100644 index 0000000000..d3ba25c71d --- /dev/null +++ b/queue-6.1/xfs-initialise-di_crc-in-xfs_log_dinode.patch @@ -0,0 +1,111 @@ +From stable+bounces-124379-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:39 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:43 -0700 +Subject: xfs: initialise di_crc in xfs_log_dinode +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Dave Chinner , Alexander Potapenko , "Darrick J. Wong" , Chandan Babu R , Leah Rumancik +Message-ID: <20250313202550.2257219-24-leah.rumancik@gmail.com> + +From: Dave Chinner + +[ Upstream commit 0573676fdde7ce3829ee6a42a8e5a56355234712 ] + +Alexander Potapenko report that KMSAN was issuing these warnings: + +kmalloc-ed xlog buffer of size 512 : ffff88802fc26200 +kmalloc-ed xlog buffer of size 368 : ffff88802fc24a00 +kmalloc-ed xlog buffer of size 648 : ffff88802b631000 +kmalloc-ed xlog buffer of size 648 : ffff88802b632800 +kmalloc-ed xlog buffer of size 648 : ffff88802b631c00 +xlog_write_iovec: copying 12 bytes from ffff888017ddbbd8 to ffff88802c300400 +xlog_write_iovec: copying 28 bytes from ffff888017ddbbe4 to ffff88802c30040c +xlog_write_iovec: copying 68 bytes from ffff88802fc26274 to ffff88802c300428 +xlog_write_iovec: copying 188 bytes from ffff88802fc262bc to ffff88802c30046c +===================================================== +BUG: KMSAN: uninit-value in xlog_write_iovec fs/xfs/xfs_log.c:2227 +BUG: KMSAN: uninit-value in xlog_write_full fs/xfs/xfs_log.c:2263 +BUG: KMSAN: uninit-value in xlog_write+0x1fac/0x2600 fs/xfs/xfs_log.c:2532 + xlog_write_iovec fs/xfs/xfs_log.c:2227 + xlog_write_full fs/xfs/xfs_log.c:2263 + xlog_write+0x1fac/0x2600 fs/xfs/xfs_log.c:2532 + xlog_cil_write_chain fs/xfs/xfs_log_cil.c:918 + xlog_cil_push_work+0x30f2/0x44e0 fs/xfs/xfs_log_cil.c:1263 + process_one_work kernel/workqueue.c:2630 + process_scheduled_works+0x1188/0x1e30 kernel/workqueue.c:2703 + worker_thread+0xee5/0x14f0 kernel/workqueue.c:2784 + kthread+0x391/0x500 kernel/kthread.c:388 + ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 + +Uninit was created at: + slab_post_alloc_hook+0x101/0xac0 mm/slab.h:768 + slab_alloc_node mm/slub.c:3482 + __kmem_cache_alloc_node+0x612/0xae0 mm/slub.c:3521 + __do_kmalloc_node mm/slab_common.c:1006 + __kmalloc+0x11a/0x410 mm/slab_common.c:1020 + kmalloc ./include/linux/slab.h:604 + xlog_kvmalloc fs/xfs/xfs_log_priv.h:704 + xlog_cil_alloc_shadow_bufs fs/xfs/xfs_log_cil.c:343 + xlog_cil_commit+0x487/0x4dc0 fs/xfs/xfs_log_cil.c:1574 + __xfs_trans_commit+0x8df/0x1930 fs/xfs/xfs_trans.c:1017 + xfs_trans_commit+0x30/0x40 fs/xfs/xfs_trans.c:1061 + xfs_create+0x15af/0x2150 fs/xfs/xfs_inode.c:1076 + xfs_generic_create+0x4cd/0x1550 fs/xfs/xfs_iops.c:199 + xfs_vn_create+0x4a/0x60 fs/xfs/xfs_iops.c:275 + lookup_open fs/namei.c:3477 + open_last_lookups fs/namei.c:3546 + path_openat+0x29ac/0x6180 fs/namei.c:3776 + do_filp_open+0x24d/0x680 fs/namei.c:3809 + do_sys_openat2+0x1bc/0x330 fs/open.c:1440 + do_sys_open fs/open.c:1455 + __do_sys_openat fs/open.c:1471 + __se_sys_openat fs/open.c:1466 + __x64_sys_openat+0x253/0x330 fs/open.c:1466 + do_syscall_x64 arch/x86/entry/common.c:51 + do_syscall_64+0x4f/0x140 arch/x86/entry/common.c:82 + entry_SYSCALL_64_after_hwframe+0x63/0x6b arch/x86/entry/entry_64.S:120 + +Bytes 112-115 of 188 are uninitialized +Memory access of size 188 starts at ffff88802fc262bc + +This is caused by the struct xfs_log_dinode not having the di_crc +field initialised. Log recovery never uses this field (it is only +present these days for on-disk format compatibility reasons) and so +it's value is never checked so nothing in XFS has caught this. + +Further, none of the uninitialised memory access warning tools have +caught this (despite catching other uninit memory accesses in the +struct xfs_log_dinode back in 2017!) until recently. Alexander +annotated the XFS code to get the dump of the actual bytes that were +detected as uninitialised, and from that report it took me about 30s +to realise what the issue was. + +The issue was introduced back in 2016 and every inode that is logged +fails to initialise this field. This is no actual bad behaviour +caused by this issue - I find it hard to even classify it as a +bug... + +Reported-and-tested-by: Alexander Potapenko +Fixes: f8d55aa0523a ("xfs: introduce inode log format object") +Signed-off-by: Dave Chinner +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_inode_item.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/xfs/xfs_inode_item.c ++++ b/fs/xfs/xfs_inode_item.c +@@ -556,6 +556,9 @@ xfs_inode_to_log_dinode( + memset(to->di_pad2, 0, sizeof(to->di_pad2)); + uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); + to->di_v3_pad = 0; ++ ++ /* dummy value for initialisation */ ++ to->di_crc = 0; + } else { + to->di_version = 2; + to->di_flushiter = ip->i_flushiter; diff --git a/queue-6.1/xfs-make-rextslog-computation-consistent-with-mkfs.patch b/queue-6.1/xfs-make-rextslog-computation-consistent-with-mkfs.patch new file mode 100644 index 0000000000..8bf478066d --- /dev/null +++ b/queue-6.1/xfs-make-rextslog-computation-consistent-with-mkfs.patch @@ -0,0 +1,151 @@ +From stable+bounces-124374-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:33 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:37 -0700 +Subject: xfs: make rextslog computation consistent with mkfs +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Leah Rumancik +Message-ID: <20250313202550.2257219-18-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit a6a38f309afc4a7ede01242b603f36c433997780 ] + +There's a weird discrepancy in xfsprogs dating back to the creation of +the Linux port -- if there are zero rt extents, mkfs will set +sb_rextents and sb_rextslog both to zero: + + sbp->sb_rextslog = + (uint8_t)(rtextents ? + libxfs_highbit32((unsigned int)rtextents) : 0); + +However, that's not the check that xfs_repair uses for nonzero rtblocks: + + if (sb->sb_rextslog != + libxfs_highbit32((unsigned int)sb->sb_rextents)) + +The difference here is that xfs_highbit32 returns -1 if its argument is +zero. Unfortunately, this means that in the weird corner case of a +realtime volume shorter than 1 rt extent, xfs_repair will immediately +flag a freshly formatted filesystem as corrupt. Because mkfs has been +writing ondisk artifacts like this for decades, we have to accept that +as "correct". TBH, zero rextslog for zero rtextents makes more sense to +me anyway. + +Regrettably, the superblock verifier checks created in commit copied +xfs_repair even though mkfs has been writing out such filesystems for +ages. Fix the superblock verifier to accept what mkfs spits out; the +userspace version of this patch will have to fix xfs_repair as well. + +Note that the new helper leaves the zeroday bug where the upper 32 bits +of sb_rextents is ripped off and fed to highbit32. This leads to a +seriously undersized rt summary file, which immediately breaks mkfs: + +$ hugedisk.sh foo /dev/sdc $(( 0x100000080 * 4096))B +$ /sbin/mkfs.xfs -f /dev/sda -m rmapbt=0,reflink=0 -r rtdev=/dev/mapper/foo +meta-data=/dev/sda isize=512 agcount=4, agsize=1298176 blks + = sectsz=512 attr=2, projid32bit=1 + = crc=1 finobt=1, sparse=1, rmapbt=0 + = reflink=0 bigtime=1 inobtcount=1 nrext64=1 +data = bsize=4096 blocks=5192704, imaxpct=25 + = sunit=0 swidth=0 blks +naming =version 2 bsize=4096 ascii-ci=0, ftype=1 +log =internal log bsize=4096 blocks=16384, version=2 + = sectsz=512 sunit=0 blks, lazy-count=1 +realtime =/dev/mapper/foo extsz=4096 blocks=4294967424, rtextents=4294967424 +Discarding blocks...Done. +mkfs.xfs: Error initializing the realtime space [117 - Structure needs cleaning] + +The next patch will drop support for rt volumes with fewer than 1 or +more than 2^32-1 rt extents, since they've clearly been broken forever. + +Fixes: f8e566c0f5e1f ("xfs: validate the realtime geometry in xfs_validate_sb_common") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_rtbitmap.c | 13 +++++++++++++ + fs/xfs/libxfs/xfs_rtbitmap.h | 4 ++++ + fs/xfs/libxfs/xfs_sb.c | 3 ++- + fs/xfs/xfs_rtalloc.c | 4 ++-- + 4 files changed, 21 insertions(+), 3 deletions(-) + +--- a/fs/xfs/libxfs/xfs_rtbitmap.c ++++ b/fs/xfs/libxfs/xfs_rtbitmap.c +@@ -1130,3 +1130,16 @@ xfs_rtalloc_extent_is_free( + *is_free = matches; + return 0; + } ++ ++/* ++ * Compute the maximum level number of the realtime summary file, as defined by ++ * mkfs. The use of highbit32 on a 64-bit quantity is a historic artifact that ++ * prohibits correct use of rt volumes with more than 2^32 extents. ++ */ ++uint8_t ++xfs_compute_rextslog( ++ xfs_rtbxlen_t rtextents) ++{ ++ return rtextents ? xfs_highbit32(rtextents) : 0; ++} ++ +--- a/fs/xfs/libxfs/xfs_rtbitmap.h ++++ b/fs/xfs/libxfs/xfs_rtbitmap.h +@@ -70,6 +70,9 @@ xfs_rtfree_extent( + /* Same as above, but in units of rt blocks. */ + int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, + xfs_filblks_t rtlen); ++ ++uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); ++ + #else /* CONFIG_XFS_RT */ + # define xfs_rtfree_extent(t,b,l) (-ENOSYS) + # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) +@@ -77,6 +80,7 @@ int xfs_rtfree_blocks(struct xfs_trans * + # define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) + # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) + # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) ++# define xfs_compute_rextslog(rtx) (0) + #endif /* CONFIG_XFS_RT */ + + #endif /* __XFS_RTBITMAP_H__ */ +--- a/fs/xfs/libxfs/xfs_sb.c ++++ b/fs/xfs/libxfs/xfs_sb.c +@@ -25,6 +25,7 @@ + #include "xfs_da_format.h" + #include "xfs_health.h" + #include "xfs_ag.h" ++#include "xfs_rtbitmap.h" + + /* + * Physical superblock buffer manipulations. Shared with libxfs in userspace. +@@ -502,7 +503,7 @@ xfs_validate_sb_common( + NBBY * sbp->sb_blocksize); + + if (sbp->sb_rextents != rexts || +- sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) || ++ sbp->sb_rextslog != xfs_compute_rextslog(rexts) || + sbp->sb_rbmblocks != rbmblocks) { + xfs_notice(mp, + "realtime geometry sanity check failed"); +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -999,7 +999,7 @@ xfs_growfs_rt( + nrextents = nrblocks; + do_div(nrextents, in->extsize); + nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize); +- nrextslog = xfs_highbit32(nrextents); ++ nrextslog = xfs_compute_rextslog(nrextents); + nrsumlevels = nrextslog + 1; + nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks; + nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize); +@@ -1061,7 +1061,7 @@ xfs_growfs_rt( + nsbp->sb_rextents = nsbp->sb_rblocks; + do_div(nsbp->sb_rextents, nsbp->sb_rextsize); + ASSERT(nsbp->sb_rextents != 0); +- nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); ++ nsbp->sb_rextslog = xfs_compute_rextslog(nsbp->sb_rextents); + nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; + nrsumsize = + (uint)sizeof(xfs_suminfo_t) * nrsumlevels * diff --git a/queue-6.1/xfs-move-the-xfs_rtbitmap.c-declarations-to-xfs_rtbitmap.h.patch b/queue-6.1/xfs-move-the-xfs_rtbitmap.c-declarations-to-xfs_rtbitmap.h.patch new file mode 100644 index 0000000000..3f42d1385c --- /dev/null +++ b/queue-6.1/xfs-move-the-xfs_rtbitmap.c-declarations-to-xfs_rtbitmap.h.patch @@ -0,0 +1,275 @@ +From stable+bounces-124367-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:24 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:30 -0700 +Subject: xfs: move the xfs_rtbitmap.c declarations to xfs_rtbitmap.h +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Catherine Hoang , Greg Kroah-Hartman , Leah Rumancik +Message-ID: <20250313202550.2257219-11-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 13928113fc5b5e79c91796290a99ed991ac0efe2 ] + +[6.1: resolved conflicts with fscounters.c and rtsummary.c ] + +Move all the declarations for functionality in xfs_rtbitmap.c into a +separate xfs_rtbitmap.h header file. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_bmap.c | 2 - + fs/xfs/libxfs/xfs_rtbitmap.c | 1 + fs/xfs/libxfs/xfs_rtbitmap.h | 82 +++++++++++++++++++++++++++++++++++++++++++ + fs/xfs/scrub/rtbitmap.c | 2 - + fs/xfs/xfs_fsmap.c | 2 - + fs/xfs/xfs_rtalloc.c | 1 + fs/xfs/xfs_rtalloc.h | 73 -------------------------------------- + 7 files changed, 87 insertions(+), 76 deletions(-) + create mode 100644 fs/xfs/libxfs/xfs_rtbitmap.h + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -21,7 +21,7 @@ + #include "xfs_bmap.h" + #include "xfs_bmap_util.h" + #include "xfs_bmap_btree.h" +-#include "xfs_rtalloc.h" ++#include "xfs_rtbitmap.h" + #include "xfs_errortag.h" + #include "xfs_error.h" + #include "xfs_quota.h" +--- a/fs/xfs/libxfs/xfs_rtbitmap.c ++++ b/fs/xfs/libxfs/xfs_rtbitmap.c +@@ -16,6 +16,7 @@ + #include "xfs_trans.h" + #include "xfs_rtalloc.h" + #include "xfs_error.h" ++#include "xfs_rtbitmap.h" + + /* + * Realtime allocator bitmap functions shared with userspace. +--- /dev/null ++++ b/fs/xfs/libxfs/xfs_rtbitmap.h +@@ -0,0 +1,82 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. ++ * All Rights Reserved. ++ */ ++#ifndef __XFS_RTBITMAP_H__ ++#define __XFS_RTBITMAP_H__ ++ ++/* ++ * XXX: Most of the realtime allocation functions deal in units of realtime ++ * extents, not realtime blocks. This looks funny when paired with the type ++ * name and screams for a larger cleanup. ++ */ ++struct xfs_rtalloc_rec { ++ xfs_rtblock_t ar_startext; ++ xfs_rtblock_t ar_extcount; ++}; ++ ++typedef int (*xfs_rtalloc_query_range_fn)( ++ struct xfs_mount *mp, ++ struct xfs_trans *tp, ++ const struct xfs_rtalloc_rec *rec, ++ void *priv); ++ ++#ifdef CONFIG_XFS_RT ++int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, ++ xfs_rtblock_t block, int issum, struct xfs_buf **bpp); ++int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, ++ xfs_rtblock_t start, xfs_extlen_t len, int val, ++ xfs_rtblock_t *new, int *stat); ++int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp, ++ xfs_rtblock_t start, xfs_rtblock_t limit, ++ xfs_rtblock_t *rtblock); ++int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, ++ xfs_rtblock_t start, xfs_rtblock_t limit, ++ xfs_rtblock_t *rtblock); ++int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, ++ xfs_rtblock_t start, xfs_extlen_t len, int val); ++int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, ++ int log, xfs_rtblock_t bbno, int delta, ++ struct xfs_buf **rbpp, xfs_fsblock_t *rsb, ++ xfs_suminfo_t *sum); ++int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, ++ xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, ++ xfs_fsblock_t *rsb); ++int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, ++ xfs_rtblock_t start, xfs_extlen_t len, ++ struct xfs_buf **rbpp, xfs_fsblock_t *rsb); ++int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, ++ const struct xfs_rtalloc_rec *low_rec, ++ const struct xfs_rtalloc_rec *high_rec, ++ xfs_rtalloc_query_range_fn fn, void *priv); ++int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, ++ xfs_rtalloc_query_range_fn fn, ++ void *priv); ++bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); ++int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, ++ xfs_rtblock_t start, xfs_extlen_t len, ++ bool *is_free); ++/* ++ * Free an extent in the realtime subvolume. Length is expressed in ++ * realtime extents, as is the block number. ++ */ ++int /* error */ ++xfs_rtfree_extent( ++ struct xfs_trans *tp, /* transaction pointer */ ++ xfs_rtblock_t bno, /* starting block number to free */ ++ xfs_extlen_t len); /* length of extent freed */ ++ ++/* Same as above, but in units of rt blocks. */ ++int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, ++ xfs_filblks_t rtlen); ++#else /* CONFIG_XFS_RT */ ++# define xfs_rtfree_extent(t,b,l) (-ENOSYS) ++# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) ++# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) ++# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) ++# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) ++# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) ++#endif /* CONFIG_XFS_RT */ ++ ++#endif /* __XFS_RTBITMAP_H__ */ +--- a/fs/xfs/scrub/rtbitmap.c ++++ b/fs/xfs/scrub/rtbitmap.c +@@ -11,7 +11,7 @@ + #include "xfs_mount.h" + #include "xfs_log_format.h" + #include "xfs_trans.h" +-#include "xfs_rtalloc.h" ++#include "xfs_rtbitmap.h" + #include "xfs_inode.h" + #include "xfs_bmap.h" + #include "scrub/scrub.h" +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -23,7 +23,7 @@ + #include "xfs_refcount.h" + #include "xfs_refcount_btree.h" + #include "xfs_alloc_btree.h" +-#include "xfs_rtalloc.h" ++#include "xfs_rtbitmap.h" + #include "xfs_ag.h" + + /* Convert an xfs_fsmap to an fsmap. */ +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -19,6 +19,7 @@ + #include "xfs_icache.h" + #include "xfs_rtalloc.h" + #include "xfs_sb.h" ++#include "xfs_rtbitmap.h" + + /* + * Read and return the summary information for a given extent size, +--- a/fs/xfs/xfs_rtalloc.h ++++ b/fs/xfs/xfs_rtalloc.h +@@ -11,22 +11,6 @@ + struct xfs_mount; + struct xfs_trans; + +-/* +- * XXX: Most of the realtime allocation functions deal in units of realtime +- * extents, not realtime blocks. This looks funny when paired with the type +- * name and screams for a larger cleanup. +- */ +-struct xfs_rtalloc_rec { +- xfs_rtblock_t ar_startext; +- xfs_rtblock_t ar_extcount; +-}; +- +-typedef int (*xfs_rtalloc_query_range_fn)( +- struct xfs_mount *mp, +- struct xfs_trans *tp, +- const struct xfs_rtalloc_rec *rec, +- void *priv); +- + #ifdef CONFIG_XFS_RT + /* + * Function prototypes for exported functions. +@@ -48,19 +32,6 @@ xfs_rtallocate_extent( + xfs_extlen_t prod, /* extent product factor */ + xfs_rtblock_t *rtblock); /* out: start block allocated */ + +-/* +- * Free an extent in the realtime subvolume. Length is expressed in +- * realtime extents, as is the block number. +- */ +-int /* error */ +-xfs_rtfree_extent( +- struct xfs_trans *tp, /* transaction pointer */ +- xfs_rtblock_t bno, /* starting block number to free */ +- xfs_extlen_t len); /* length of extent freed */ +- +-/* Same as above, but in units of rt blocks. */ +-int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, +- xfs_filblks_t rtlen); + + /* + * Initialize realtime fields in the mount structure. +@@ -102,55 +73,11 @@ xfs_growfs_rt( + struct xfs_mount *mp, /* file system mount structure */ + xfs_growfs_rt_t *in); /* user supplied growfs struct */ + +-/* +- * From xfs_rtbitmap.c +- */ +-int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_rtblock_t block, int issum, struct xfs_buf **bpp); +-int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_rtblock_t start, xfs_extlen_t len, int val, +- xfs_rtblock_t *new, int *stat); +-int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_rtblock_t start, xfs_rtblock_t limit, +- xfs_rtblock_t *rtblock); +-int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_rtblock_t start, xfs_rtblock_t limit, +- xfs_rtblock_t *rtblock); +-int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_rtblock_t start, xfs_extlen_t len, int val); +-int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, +- int log, xfs_rtblock_t bbno, int delta, +- struct xfs_buf **rbpp, xfs_fsblock_t *rsb, +- xfs_suminfo_t *sum); +-int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, +- xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, +- xfs_fsblock_t *rsb); +-int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_rtblock_t start, xfs_extlen_t len, +- struct xfs_buf **rbpp, xfs_fsblock_t *rsb); +-int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, +- const struct xfs_rtalloc_rec *low_rec, +- const struct xfs_rtalloc_rec *high_rec, +- xfs_rtalloc_query_range_fn fn, void *priv); +-int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_rtalloc_query_range_fn fn, +- void *priv); +-bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); +-int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_rtblock_t start, xfs_extlen_t len, +- bool *is_free); + int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp); + #else + # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (-ENOSYS) +-# define xfs_rtfree_extent(t,b,l) (-ENOSYS) +-# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) + # define xfs_rtpick_extent(m,t,l,rb) (-ENOSYS) + # define xfs_growfs_rt(mp,in) (-ENOSYS) +-# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) +-# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) +-# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) +-# define xfs_verify_rtbno(m, r) (false) +-# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) + # define xfs_rtalloc_reinit_frextents(m) (0) + static inline int /* error */ + xfs_rtmount_init( diff --git a/queue-6.1/xfs-pass-per-ag-references-to-xfs_free_extent.patch b/queue-6.1/xfs-pass-per-ag-references-to-xfs_free_extent.patch new file mode 100644 index 0000000000..658cceff73 --- /dev/null +++ b/queue-6.1/xfs-pass-per-ag-references-to-xfs_free_extent.patch @@ -0,0 +1,202 @@ +From stable+bounces-124362-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:17 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:25 -0700 +Subject: xfs: pass per-ag references to xfs_free_extent +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Dave Chinner , Leah Rumancik +Message-ID: <20250313202550.2257219-6-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit b2ccab3199aa7cea9154d80ea2585312c5f6eba0 ] + +Pass a reference to the per-AG structure to xfs_free_extent. Most +callers already have one, so we can eliminate unnecessary lookups. The +one exception to this is the EFI code, which the next patch will fix. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_ag.c | 6 ++---- + fs/xfs/libxfs/xfs_alloc.c | 15 +++++---------- + fs/xfs/libxfs/xfs_alloc.h | 8 +++++--- + fs/xfs/libxfs/xfs_ialloc_btree.c | 7 +++++-- + fs/xfs/libxfs/xfs_refcount_btree.c | 5 +++-- + fs/xfs/scrub/repair.c | 3 ++- + fs/xfs/xfs_extfree_item.c | 8 ++++++-- + 7 files changed, 28 insertions(+), 24 deletions(-) + +--- a/fs/xfs/libxfs/xfs_ag.c ++++ b/fs/xfs/libxfs/xfs_ag.c +@@ -981,10 +981,8 @@ xfs_ag_extend_space( + if (error) + return error; + +- error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno, +- be32_to_cpu(agf->agf_length) - len), +- len, &XFS_RMAP_OINFO_SKIP_UPDATE, +- XFS_AG_RESV_NONE); ++ error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len, ++ len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); + if (error) + return error; + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -3447,7 +3447,8 @@ xfs_free_extent_fix_freelist( + int + __xfs_free_extent( + struct xfs_trans *tp, +- xfs_fsblock_t bno, ++ struct xfs_perag *pag, ++ xfs_agblock_t agbno, + xfs_extlen_t len, + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type, +@@ -3455,12 +3456,9 @@ __xfs_free_extent( + { + struct xfs_mount *mp = tp->t_mountp; + struct xfs_buf *agbp; +- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); +- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); + struct xfs_agf *agf; + int error; + unsigned int busy_flags = 0; +- struct xfs_perag *pag; + + ASSERT(len != 0); + ASSERT(type != XFS_AG_RESV_AGFL); +@@ -3469,10 +3467,9 @@ __xfs_free_extent( + XFS_ERRTAG_FREE_EXTENT)) + return -EIO; + +- pag = xfs_perag_get(mp, agno); + error = xfs_free_extent_fix_freelist(tp, pag, &agbp); + if (error) +- goto err; ++ return error; + agf = agbp->b_addr; + + if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { +@@ -3486,20 +3483,18 @@ __xfs_free_extent( + goto err_release; + } + +- error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); ++ error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo, ++ type); + if (error) + goto err_release; + + if (skip_discard) + busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD; + xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags); +- xfs_perag_put(pag); + return 0; + + err_release: + xfs_trans_brelse(tp, agbp); +-err: +- xfs_perag_put(pag); + return error; + } + +--- a/fs/xfs/libxfs/xfs_alloc.h ++++ b/fs/xfs/libxfs/xfs_alloc.h +@@ -130,7 +130,8 @@ xfs_alloc_vextent( + int /* error */ + __xfs_free_extent( + struct xfs_trans *tp, /* transaction pointer */ +- xfs_fsblock_t bno, /* starting block number of extent */ ++ struct xfs_perag *pag, ++ xfs_agblock_t agbno, + xfs_extlen_t len, /* length of extent */ + const struct xfs_owner_info *oinfo, /* extent owner */ + enum xfs_ag_resv_type type, /* block reservation type */ +@@ -139,12 +140,13 @@ __xfs_free_extent( + static inline int + xfs_free_extent( + struct xfs_trans *tp, +- xfs_fsblock_t bno, ++ struct xfs_perag *pag, ++ xfs_agblock_t agbno, + xfs_extlen_t len, + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type) + { +- return __xfs_free_extent(tp, bno, len, oinfo, type, false); ++ return __xfs_free_extent(tp, pag, agbno, len, oinfo, type, false); + } + + int /* error */ +--- a/fs/xfs/libxfs/xfs_ialloc_btree.c ++++ b/fs/xfs/libxfs/xfs_ialloc_btree.c +@@ -156,9 +156,12 @@ __xfs_inobt_free_block( + struct xfs_buf *bp, + enum xfs_ag_resv_type resv) + { ++ xfs_fsblock_t fsbno; ++ + xfs_inobt_mod_blockcount(cur, -1); +- return xfs_free_extent(cur->bc_tp, +- XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1, ++ fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); ++ return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, ++ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, + &XFS_RMAP_OINFO_INOBT, resv); + } + +--- a/fs/xfs/libxfs/xfs_refcount_btree.c ++++ b/fs/xfs/libxfs/xfs_refcount_btree.c +@@ -112,8 +112,9 @@ xfs_refcountbt_free_block( + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); + be32_add_cpu(&agf->agf_refcount_blocks, -1); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); +- error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC, +- XFS_AG_RESV_METADATA); ++ error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, ++ XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, ++ &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); + if (error) + return error; + +--- a/fs/xfs/scrub/repair.c ++++ b/fs/xfs/scrub/repair.c +@@ -582,7 +582,8 @@ xrep_reap_block( + else if (resv == XFS_AG_RESV_AGFL) + error = xrep_put_freelist(sc, agbno); + else +- error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv); ++ error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, oinfo, ++ resv); + if (agf_bp != sc->sa.agf_bp) + xfs_trans_brelse(sc->tp, agf_bp); + if (error) +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -350,6 +350,7 @@ xfs_trans_free_extent( + struct xfs_owner_info oinfo = { }; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_extent *extp; ++ struct xfs_perag *pag; + uint next_extent; + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, + xefi->xefi_startblock); +@@ -366,9 +367,12 @@ xfs_trans_free_extent( + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, + xefi->xefi_blockcount); + +- error = __xfs_free_extent(tp, xefi->xefi_startblock, +- xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, ++ pag = xfs_perag_get(mp, agno); ++ error = __xfs_free_extent(tp, pag, agbno, xefi->xefi_blockcount, ++ &oinfo, XFS_AG_RESV_NONE, + xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); ++ xfs_perag_put(pag); ++ + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: diff --git a/queue-6.1/xfs-pass-refcount-intent-directly-through-the-log-intent-code.patch b/queue-6.1/xfs-pass-refcount-intent-directly-through-the-log-intent-code.patch new file mode 100644 index 0000000000..02a4d3f768 --- /dev/null +++ b/queue-6.1/xfs-pass-refcount-intent-directly-through-the-log-intent-code.patch @@ -0,0 +1,406 @@ +From stable+bounces-124358-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:13 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:21 -0700 +Subject: xfs: pass refcount intent directly through the log intent code +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Leah Rumancik +Message-ID: <20250313202550.2257219-2-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 0b11553ec54a6d88907e60d0595dbcef98539747 ] + +Pass the incore refcount intent through the CUI logging code instead of +repeatedly boxing and unboxing parameters. + +Signed-off-by: Darrick J. Wong +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_refcount.c | 96 +++++++++++++++++++------------------------ + fs/xfs/libxfs/xfs_refcount.h | 4 - + fs/xfs/xfs_refcount_item.c | 62 +++++++++++---------------- + fs/xfs/xfs_trace.h | 15 +----- + 4 files changed, 74 insertions(+), 103 deletions(-) + +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -1213,37 +1213,33 @@ out_error: + STATIC int + xfs_refcount_adjust( + struct xfs_btree_cur *cur, +- xfs_agblock_t agbno, +- xfs_extlen_t aglen, +- xfs_agblock_t *new_agbno, +- xfs_extlen_t *new_aglen, ++ xfs_agblock_t *agbno, ++ xfs_extlen_t *aglen, + enum xfs_refc_adjust_op adj) + { + bool shape_changed; + int shape_changes = 0; + int error; + +- *new_agbno = agbno; +- *new_aglen = aglen; + if (adj == XFS_REFCOUNT_ADJUST_INCREASE) +- trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, +- agbno, aglen); ++ trace_xfs_refcount_increase(cur->bc_mp, ++ cur->bc_ag.pag->pag_agno, *agbno, *aglen); + else +- trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, +- agbno, aglen); ++ trace_xfs_refcount_decrease(cur->bc_mp, ++ cur->bc_ag.pag->pag_agno, *agbno, *aglen); + + /* + * Ensure that no rcextents cross the boundary of the adjustment range. + */ + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, +- agbno, &shape_changed); ++ *agbno, &shape_changed); + if (error) + goto out_error; + if (shape_changed) + shape_changes++; + + error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, +- agbno + aglen, &shape_changed); ++ *agbno + *aglen, &shape_changed); + if (error) + goto out_error; + if (shape_changed) +@@ -1253,7 +1249,7 @@ xfs_refcount_adjust( + * Try to merge with the left or right extents of the range. + */ + error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, +- new_agbno, new_aglen, adj, &shape_changed); ++ agbno, aglen, adj, &shape_changed); + if (error) + goto out_error; + if (shape_changed) +@@ -1262,7 +1258,7 @@ xfs_refcount_adjust( + cur->bc_ag.refc.shape_changes++; + + /* Now that we've taken care of the ends, adjust the middle extents */ +- error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); ++ error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); + if (error) + goto out_error; + +@@ -1298,21 +1294,20 @@ xfs_refcount_finish_one_cleanup( + static inline int + xfs_refcount_continue_op( + struct xfs_btree_cur *cur, +- xfs_fsblock_t startblock, +- xfs_agblock_t new_agbno, +- xfs_extlen_t new_len, +- xfs_fsblock_t *new_fsbno) ++ struct xfs_refcount_intent *ri, ++ xfs_agblock_t new_agbno) + { + struct xfs_mount *mp = cur->bc_mp; + struct xfs_perag *pag = cur->bc_ag.pag; + +- if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) ++ if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, ++ ri->ri_blockcount))) + return -EFSCORRUPTED; + +- *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); ++ ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + +- ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); +- ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); ++ ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); ++ ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); + + return 0; + } +@@ -1327,11 +1322,7 @@ xfs_refcount_continue_op( + int + xfs_refcount_finish_one( + struct xfs_trans *tp, +- enum xfs_refcount_intent_type type, +- xfs_fsblock_t startblock, +- xfs_extlen_t blockcount, +- xfs_fsblock_t *new_fsb, +- xfs_extlen_t *new_len, ++ struct xfs_refcount_intent *ri, + struct xfs_btree_cur **pcur) + { + struct xfs_mount *mp = tp->t_mountp; +@@ -1339,17 +1330,16 @@ xfs_refcount_finish_one( + struct xfs_buf *agbp = NULL; + int error = 0; + xfs_agblock_t bno; +- xfs_agblock_t new_agbno; + unsigned long nr_ops = 0; + int shape_changes = 0; + struct xfs_perag *pag; + +- pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); +- bno = XFS_FSB_TO_AGBNO(mp, startblock); ++ pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); ++ bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); + +- trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), +- type, XFS_FSB_TO_AGBNO(mp, startblock), +- blockcount); ++ trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock), ++ ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock), ++ ri->ri_blockcount); + + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { + error = -EIO; +@@ -1380,42 +1370,42 @@ xfs_refcount_finish_one( + } + *pcur = rcur; + +- switch (type) { ++ switch (ri->ri_type) { + case XFS_REFCOUNT_INCREASE: +- error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, +- new_len, XFS_REFCOUNT_ADJUST_INCREASE); ++ error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, ++ XFS_REFCOUNT_ADJUST_INCREASE); + if (error) + goto out_drop; +- if (*new_len > 0) +- error = xfs_refcount_continue_op(rcur, startblock, +- new_agbno, *new_len, new_fsb); ++ if (ri->ri_blockcount > 0) ++ error = xfs_refcount_continue_op(rcur, ri, bno); + break; + case XFS_REFCOUNT_DECREASE: +- error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, +- new_len, XFS_REFCOUNT_ADJUST_DECREASE); ++ error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, ++ XFS_REFCOUNT_ADJUST_DECREASE); + if (error) + goto out_drop; +- if (*new_len > 0) +- error = xfs_refcount_continue_op(rcur, startblock, +- new_agbno, *new_len, new_fsb); ++ if (ri->ri_blockcount > 0) ++ error = xfs_refcount_continue_op(rcur, ri, bno); + break; + case XFS_REFCOUNT_ALLOC_COW: +- *new_fsb = startblock + blockcount; +- *new_len = 0; +- error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); ++ error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); ++ if (error) ++ goto out_drop; ++ ri->ri_blockcount = 0; + break; + case XFS_REFCOUNT_FREE_COW: +- *new_fsb = startblock + blockcount; +- *new_len = 0; +- error = __xfs_refcount_cow_free(rcur, bno, blockcount); ++ error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); ++ if (error) ++ goto out_drop; ++ ri->ri_blockcount = 0; + break; + default: + ASSERT(0); + error = -EFSCORRUPTED; + } +- if (!error && *new_len > 0) +- trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, +- bno, blockcount, new_agbno, *new_len); ++ if (!error && ri->ri_blockcount > 0) ++ trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, ++ ri->ri_type, bno, ri->ri_blockcount); + out_drop: + xfs_perag_put(pag); + return error; +--- a/fs/xfs/libxfs/xfs_refcount.h ++++ b/fs/xfs/libxfs/xfs_refcount.h +@@ -75,9 +75,7 @@ void xfs_refcount_decrease_extent(struct + extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, + struct xfs_btree_cur *rcur, int error); + extern int xfs_refcount_finish_one(struct xfs_trans *tp, +- enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, +- xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, +- xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); ++ struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur); + + extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, + xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -252,17 +252,12 @@ static int + xfs_trans_log_finish_refcount_update( + struct xfs_trans *tp, + struct xfs_cud_log_item *cudp, +- enum xfs_refcount_intent_type type, +- xfs_fsblock_t startblock, +- xfs_extlen_t blockcount, +- xfs_fsblock_t *new_fsb, +- xfs_extlen_t *new_len, ++ struct xfs_refcount_intent *ri, + struct xfs_btree_cur **pcur) + { + int error; + +- error = xfs_refcount_finish_one(tp, type, startblock, +- blockcount, new_fsb, new_len, pcur); ++ error = xfs_refcount_finish_one(tp, ri, pcur); + + /* + * Mark the transaction dirty, even on error. This ensures the +@@ -378,25 +373,20 @@ xfs_refcount_update_finish_item( + struct list_head *item, + struct xfs_btree_cur **state) + { +- struct xfs_refcount_intent *refc; +- xfs_fsblock_t new_fsb; +- xfs_extlen_t new_aglen; ++ struct xfs_refcount_intent *ri; + int error; + +- refc = container_of(item, struct xfs_refcount_intent, ri_list); +- error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), +- refc->ri_type, refc->ri_startblock, refc->ri_blockcount, +- &new_fsb, &new_aglen, state); ++ ri = container_of(item, struct xfs_refcount_intent, ri_list); ++ error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), ri, ++ state); + + /* Did we run out of reservation? Requeue what we didn't finish. */ +- if (!error && new_aglen > 0) { +- ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || +- refc->ri_type == XFS_REFCOUNT_DECREASE); +- refc->ri_startblock = new_fsb; +- refc->ri_blockcount = new_aglen; ++ if (!error && ri->ri_blockcount > 0) { ++ ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE || ++ ri->ri_type == XFS_REFCOUNT_DECREASE); + return -EAGAIN; + } +- kmem_cache_free(xfs_refcount_intent_cache, refc); ++ kmem_cache_free(xfs_refcount_intent_cache, ri); + return error; + } + +@@ -463,18 +453,13 @@ xfs_cui_item_recover( + struct xfs_log_item *lip, + struct list_head *capture_list) + { +- struct xfs_bmbt_irec irec; + struct xfs_cui_log_item *cuip = CUI_ITEM(lip); +- struct xfs_phys_extent *refc; + struct xfs_cud_log_item *cudp; + struct xfs_trans *tp; + struct xfs_btree_cur *rcur = NULL; + struct xfs_mount *mp = lip->li_log->l_mp; +- xfs_fsblock_t new_fsb; +- xfs_extlen_t new_len; + unsigned int refc_type; + bool requeue_only = false; +- enum xfs_refcount_intent_type type; + int i; + int error = 0; + +@@ -513,6 +498,9 @@ xfs_cui_item_recover( + cudp = xfs_trans_get_cud(tp, cuip); + + for (i = 0; i < cuip->cui_format.cui_nextents; i++) { ++ struct xfs_refcount_intent fake = { }; ++ struct xfs_phys_extent *refc; ++ + refc = &cuip->cui_format.cui_extents[i]; + refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; + switch (refc_type) { +@@ -520,7 +508,7 @@ xfs_cui_item_recover( + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: +- type = refc_type; ++ fake.ri_type = refc_type; + break; + default: + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, +@@ -529,13 +517,12 @@ xfs_cui_item_recover( + error = -EFSCORRUPTED; + goto abort_error; + } +- if (requeue_only) { +- new_fsb = refc->pe_startblock; +- new_len = refc->pe_len; +- } else ++ ++ fake.ri_startblock = refc->pe_startblock; ++ fake.ri_blockcount = refc->pe_len; ++ if (!requeue_only) + error = xfs_trans_log_finish_refcount_update(tp, cudp, +- type, refc->pe_startblock, refc->pe_len, +- &new_fsb, &new_len, &rcur); ++ &fake, &rcur); + if (error == -EFSCORRUPTED) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &cuip->cui_format, +@@ -544,10 +531,13 @@ xfs_cui_item_recover( + goto abort_error; + + /* Requeue what we didn't finish. */ +- if (new_len > 0) { +- irec.br_startblock = new_fsb; +- irec.br_blockcount = new_len; +- switch (type) { ++ if (fake.ri_blockcount > 0) { ++ struct xfs_bmbt_irec irec = { ++ .br_startblock = fake.ri_startblock, ++ .br_blockcount = fake.ri_blockcount, ++ }; ++ ++ switch (fake.ri_type) { + case XFS_REFCOUNT_INCREASE: + xfs_refcount_increase_extent(tp, &irec); + break; +--- a/fs/xfs/xfs_trace.h ++++ b/fs/xfs/xfs_trace.h +@@ -3208,17 +3208,14 @@ DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refco + + TRACE_EVENT(xfs_refcount_finish_one_leftover, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, +- int type, xfs_agblock_t agbno, xfs_extlen_t len, +- xfs_agblock_t new_agbno, xfs_extlen_t new_len), +- TP_ARGS(mp, agno, type, agbno, len, new_agbno, new_len), ++ int type, xfs_agblock_t agbno, xfs_extlen_t len), ++ TP_ARGS(mp, agno, type, agbno, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, type) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) +- __field(xfs_agblock_t, new_agbno) +- __field(xfs_extlen_t, new_len) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; +@@ -3226,17 +3223,13 @@ TRACE_EVENT(xfs_refcount_finish_one_left + __entry->type = type; + __entry->agbno = agbno; + __entry->len = len; +- __entry->new_agbno = new_agbno; +- __entry->new_len = new_len; + ), +- TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x new_agbno 0x%x new_fsbcount 0x%x", ++ TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->type, + __entry->agno, + __entry->agbno, +- __entry->len, +- __entry->new_agbno, +- __entry->new_len) ++ __entry->len) + ); + + /* simple inode-based error/%ip tracepoint class */ diff --git a/queue-6.1/xfs-pass-the-xfs_bmbt_irec-directly-through-the-log-intent-code.patch b/queue-6.1/xfs-pass-the-xfs_bmbt_irec-directly-through-the-log-intent-code.patch new file mode 100644 index 0000000000..6808cee97a --- /dev/null +++ b/queue-6.1/xfs-pass-the-xfs_bmbt_irec-directly-through-the-log-intent-code.patch @@ -0,0 +1,242 @@ +From stable+bounces-124361-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:16 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:24 -0700 +Subject: xfs: pass the xfs_bmbt_irec directly through the log intent code +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Leah Rumancik +Message-ID: <20250313202550.2257219-5-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit ddccb81b26ec021ae1f3366aa996cc4c68dd75ce ] + +Instead of repeatedly boxing and unboxing the incore extent mapping +structure as it passes through the BUI code, pass the pointer directly +through. + +Signed-off-by: Darrick J. Wong +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_bmap.c | 32 ++++++++---------- + fs/xfs/libxfs/xfs_bmap.h | 5 -- + fs/xfs/xfs_bmap_item.c | 81 +++++++++++++++++------------------------------ + 3 files changed, 46 insertions(+), 72 deletions(-) + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -6119,39 +6119,37 @@ xfs_bmap_unmap_extent( + int + xfs_bmap_finish_one( + struct xfs_trans *tp, +- struct xfs_inode *ip, +- enum xfs_bmap_intent_type type, +- int whichfork, +- xfs_fileoff_t startoff, +- xfs_fsblock_t startblock, +- xfs_filblks_t *blockcount, +- xfs_exntst_t state) ++ struct xfs_bmap_intent *bi) + { ++ struct xfs_bmbt_irec *bmap = &bi->bi_bmap; + int error = 0; + + ASSERT(tp->t_firstblock == NULLFSBLOCK); + + trace_xfs_bmap_deferred(tp->t_mountp, +- XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, +- XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), +- ip->i_ino, whichfork, startoff, *blockcount, state); ++ XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), ++ bi->bi_type, ++ XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock), ++ bi->bi_owner->i_ino, bi->bi_whichfork, ++ bmap->br_startoff, bmap->br_blockcount, ++ bmap->br_state); + +- if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK)) ++ if (WARN_ON_ONCE(bi->bi_whichfork != XFS_DATA_FORK)) + return -EFSCORRUPTED; + + if (XFS_TEST_ERROR(false, tp->t_mountp, + XFS_ERRTAG_BMAP_FINISH_ONE)) + return -EIO; + +- switch (type) { ++ switch (bi->bi_type) { + case XFS_BMAP_MAP: +- error = xfs_bmapi_remap(tp, ip, startoff, *blockcount, +- startblock, 0); +- *blockcount = 0; ++ error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff, ++ bmap->br_blockcount, bmap->br_startblock, 0); ++ bmap->br_blockcount = 0; + break; + case XFS_BMAP_UNMAP: +- error = __xfs_bunmapi(tp, ip, startoff, blockcount, +- XFS_BMAPI_REMAP, 1); ++ error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff, ++ &bmap->br_blockcount, XFS_BMAPI_REMAP, 1); + break; + default: + ASSERT(0); +--- a/fs/xfs/libxfs/xfs_bmap.h ++++ b/fs/xfs/libxfs/xfs_bmap.h +@@ -236,10 +236,7 @@ struct xfs_bmap_intent { + struct xfs_bmbt_irec bi_bmap; + }; + +-int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip, +- enum xfs_bmap_intent_type type, int whichfork, +- xfs_fileoff_t startoff, xfs_fsblock_t startblock, +- xfs_filblks_t *blockcount, xfs_exntst_t state); ++int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi); + void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, + struct xfs_bmbt_irec *imap); + void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -246,18 +246,11 @@ static int + xfs_trans_log_finish_bmap_update( + struct xfs_trans *tp, + struct xfs_bud_log_item *budp, +- enum xfs_bmap_intent_type type, +- struct xfs_inode *ip, +- int whichfork, +- xfs_fileoff_t startoff, +- xfs_fsblock_t startblock, +- xfs_filblks_t *blockcount, +- xfs_exntst_t state) ++ struct xfs_bmap_intent *bi) + { + int error; + +- error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, +- startblock, blockcount, state); ++ error = xfs_bmap_finish_one(tp, bi); + + /* + * Mark the transaction dirty, even on error. This ensures the +@@ -378,25 +371,17 @@ xfs_bmap_update_finish_item( + struct list_head *item, + struct xfs_btree_cur **state) + { +- struct xfs_bmap_intent *bmap; +- xfs_filblks_t count; ++ struct xfs_bmap_intent *bi; + int error; + +- bmap = container_of(item, struct xfs_bmap_intent, bi_list); +- count = bmap->bi_bmap.br_blockcount; +- error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), +- bmap->bi_type, +- bmap->bi_owner, bmap->bi_whichfork, +- bmap->bi_bmap.br_startoff, +- bmap->bi_bmap.br_startblock, +- &count, +- bmap->bi_bmap.br_state); +- if (!error && count > 0) { +- ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); +- bmap->bi_bmap.br_blockcount = count; ++ bi = container_of(item, struct xfs_bmap_intent, bi_list); ++ ++ error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), bi); ++ if (!error && bi->bi_bmap.br_blockcount > 0) { ++ ASSERT(bi->bi_type == XFS_BMAP_UNMAP); + return -EAGAIN; + } +- kmem_cache_free(xfs_bmap_intent_cache, bmap); ++ kmem_cache_free(xfs_bmap_intent_cache, bi); + return error; + } + +@@ -471,17 +456,13 @@ xfs_bui_item_recover( + struct xfs_log_item *lip, + struct list_head *capture_list) + { +- struct xfs_bmbt_irec irec; ++ struct xfs_bmap_intent fake = { }; + struct xfs_bui_log_item *buip = BUI_ITEM(lip); + struct xfs_trans *tp; + struct xfs_inode *ip = NULL; + struct xfs_mount *mp = lip->li_log->l_mp; +- struct xfs_map_extent *bmap; ++ struct xfs_map_extent *map; + struct xfs_bud_log_item *budp; +- xfs_filblks_t count; +- xfs_exntst_t state; +- unsigned int bui_type; +- int whichfork; + int iext_delta; + int error = 0; + +@@ -491,14 +472,12 @@ xfs_bui_item_recover( + return -EFSCORRUPTED; + } + +- bmap = &buip->bui_format.bui_extents[0]; +- state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? +- XFS_EXT_UNWRITTEN : XFS_EXT_NORM; +- whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? ++ map = &buip->bui_format.bui_extents[0]; ++ fake.bi_whichfork = (map->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; +- bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; ++ fake.bi_type = map->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; + +- error = xlog_recover_iget(mp, bmap->me_owner, &ip); ++ error = xlog_recover_iget(mp, map->me_owner, &ip); + if (error) + return error; + +@@ -512,34 +491,34 @@ xfs_bui_item_recover( + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + +- if (bui_type == XFS_BMAP_MAP) ++ if (fake.bi_type == XFS_BMAP_MAP) + iext_delta = XFS_IEXT_ADD_NOSPLIT_CNT; + else + iext_delta = XFS_IEXT_PUNCH_HOLE_CNT; + +- error = xfs_iext_count_may_overflow(ip, whichfork, iext_delta); ++ error = xfs_iext_count_may_overflow(ip, fake.bi_whichfork, iext_delta); + if (error == -EFBIG) + error = xfs_iext_count_upgrade(tp, ip, iext_delta); + if (error) + goto err_cancel; + +- count = bmap->me_len; +- error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip, +- whichfork, bmap->me_startoff, bmap->me_startblock, +- &count, state); ++ fake.bi_owner = ip; ++ fake.bi_bmap.br_startblock = map->me_startblock; ++ fake.bi_bmap.br_startoff = map->me_startoff; ++ fake.bi_bmap.br_blockcount = map->me_len; ++ fake.bi_bmap.br_state = (map->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? ++ XFS_EXT_UNWRITTEN : XFS_EXT_NORM; ++ ++ error = xfs_trans_log_finish_bmap_update(tp, budp, &fake); + if (error == -EFSCORRUPTED) +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bmap, +- sizeof(*bmap)); ++ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, map, ++ sizeof(*map)); + if (error) + goto err_cancel; + +- if (count > 0) { +- ASSERT(bui_type == XFS_BMAP_UNMAP); +- irec.br_startblock = bmap->me_startblock; +- irec.br_blockcount = count; +- irec.br_startoff = bmap->me_startoff; +- irec.br_state = state; +- xfs_bmap_unmap_extent(tp, ip, &irec); ++ if (fake.bi_bmap.br_blockcount > 0) { ++ ASSERT(fake.bi_type == XFS_BMAP_UNMAP); ++ xfs_bmap_unmap_extent(tp, ip, &fake.bi_bmap); + } + + /* diff --git a/queue-6.1/xfs-pass-the-xfs_defer_pending-object-to-iop_recover.patch b/queue-6.1/xfs-pass-the-xfs_defer_pending-object-to-iop_recover.patch new file mode 100644 index 0000000000..4f98ef45b3 --- /dev/null +++ b/queue-6.1/xfs-pass-the-xfs_defer_pending-object-to-iop_recover.patch @@ -0,0 +1,137 @@ +From stable+bounces-124372-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:31 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:35 -0700 +Subject: xfs: pass the xfs_defer_pending object to iop_recover +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Leah Rumancik +Message-ID: <20250313202550.2257219-16-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit a050acdfa8003a44eae4558fddafc7afb1aef458 ] + +Now that log intent item recovery recreates the xfs_defer_pending state, +we should pass that into the ->iop_recover routines so that the intent +item can finish the recreation work. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_attr_item.c | 3 ++- + fs/xfs/xfs_bmap_item.c | 3 ++- + fs/xfs/xfs_extfree_item.c | 3 ++- + fs/xfs/xfs_log_recover.c | 2 +- + fs/xfs/xfs_refcount_item.c | 3 ++- + fs/xfs/xfs_rmap_item.c | 3 ++- + fs/xfs/xfs_trans.h | 4 +++- + 7 files changed, 14 insertions(+), 7 deletions(-) + +--- a/fs/xfs/xfs_attr_item.c ++++ b/fs/xfs/xfs_attr_item.c +@@ -545,9 +545,10 @@ xfs_attri_validate( + */ + STATIC int + xfs_attri_item_recover( +- struct xfs_log_item *lip, ++ struct xfs_defer_pending *dfp, + struct list_head *capture_list) + { ++ struct xfs_log_item *lip = dfp->dfp_intent; + struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip); + struct xfs_attr_intent *attr; + struct xfs_mount *mp = lip->li_log->l_mp; +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -453,11 +453,12 @@ xfs_bui_validate( + */ + STATIC int + xfs_bui_item_recover( +- struct xfs_log_item *lip, ++ struct xfs_defer_pending *dfp, + struct list_head *capture_list) + { + struct xfs_bmap_intent fake = { }; + struct xfs_trans_res resv; ++ struct xfs_log_item *lip = dfp->dfp_intent; + struct xfs_bui_log_item *buip = BUI_ITEM(lip); + struct xfs_trans *tp; + struct xfs_inode *ip = NULL; +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -595,10 +595,11 @@ xfs_efi_validate_ext( + */ + STATIC int + xfs_efi_item_recover( +- struct xfs_log_item *lip, ++ struct xfs_defer_pending *dfp, + struct list_head *capture_list) + { + struct xfs_trans_res resv; ++ struct xfs_log_item *lip = dfp->dfp_intent; + struct xfs_efi_log_item *efip = EFI_ITEM(lip); + struct xfs_mount *mp = lip->li_log->l_mp; + struct xfs_efd_log_item *efdp; +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2586,7 +2586,7 @@ xlog_recover_process_intents( + * The recovery function can free the log item, so we must not + * access lip after it returns. + */ +- error = ops->iop_recover(lip, &capture_list); ++ error = ops->iop_recover(dfp, &capture_list); + if (error) { + trace_xlog_intent_recovery_failed(log->l_mp, error, + ops->iop_recover); +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -450,10 +450,11 @@ xfs_cui_validate_phys( + */ + STATIC int + xfs_cui_item_recover( +- struct xfs_log_item *lip, ++ struct xfs_defer_pending *dfp, + struct list_head *capture_list) + { + struct xfs_trans_res resv; ++ struct xfs_log_item *lip = dfp->dfp_intent; + struct xfs_cui_log_item *cuip = CUI_ITEM(lip); + struct xfs_cud_log_item *cudp; + struct xfs_trans *tp; +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -489,10 +489,11 @@ xfs_rui_validate_map( + */ + STATIC int + xfs_rui_item_recover( +- struct xfs_log_item *lip, ++ struct xfs_defer_pending *dfp, + struct list_head *capture_list) + { + struct xfs_trans_res resv; ++ struct xfs_log_item *lip = dfp->dfp_intent; + struct xfs_rui_log_item *ruip = RUI_ITEM(lip); + struct xfs_map_extent *rmap; + struct xfs_rud_log_item *rudp; +--- a/fs/xfs/xfs_trans.h ++++ b/fs/xfs/xfs_trans.h +@@ -66,6 +66,8 @@ struct xfs_log_item { + { (1u << XFS_LI_DIRTY), "DIRTY" }, \ + { (1u << XFS_LI_WHITEOUT), "WHITEOUT" } + ++struct xfs_defer_pending; ++ + struct xfs_item_ops { + unsigned flags; + void (*iop_size)(struct xfs_log_item *, int *, int *); +@@ -78,7 +80,7 @@ struct xfs_item_ops { + xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); + uint (*iop_push)(struct xfs_log_item *, struct list_head *); + void (*iop_release)(struct xfs_log_item *); +- int (*iop_recover)(struct xfs_log_item *lip, ++ int (*iop_recover)(struct xfs_defer_pending *dfp, + struct list_head *capture_list); + bool (*iop_match)(struct xfs_log_item *item, uint64_t id); + struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent, diff --git a/queue-6.1/xfs-pass-xfs_extent_free_item-directly-through-the-log-intent-code.patch b/queue-6.1/xfs-pass-xfs_extent_free_item-directly-through-the-log-intent-code.patch new file mode 100644 index 0000000000..2c4e92f8a5 --- /dev/null +++ b/queue-6.1/xfs-pass-xfs_extent_free_item-directly-through-the-log-intent-code.patch @@ -0,0 +1,128 @@ +From stable+bounces-124359-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:15 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:22 -0700 +Subject: xfs: pass xfs_extent_free_item directly through the log intent code +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Leah Rumancik +Message-ID: <20250313202550.2257219-3-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 72ba455599ad13d08c29dafa22a32360e07b1961 ] + +Pass the incore xfs_extent_free_item through the EFI logging code +instead of repeatedly boxing and unboxing parameters. + +Signed-off-by: Darrick J. Wong +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_extfree_item.c | 55 +++++++++++++++++++++++++--------------------- + 1 file changed, 30 insertions(+), 25 deletions(-) + +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -345,23 +345,30 @@ static int + xfs_trans_free_extent( + struct xfs_trans *tp, + struct xfs_efd_log_item *efdp, +- xfs_fsblock_t start_block, +- xfs_extlen_t ext_len, +- const struct xfs_owner_info *oinfo, +- bool skip_discard) ++ struct xfs_extent_free_item *free) + { ++ struct xfs_owner_info oinfo = { }; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_extent *extp; + uint next_extent; +- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); ++ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, ++ free->xefi_startblock); + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, +- start_block); ++ free->xefi_startblock); + int error; + +- trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); ++ oinfo.oi_owner = free->xefi_owner; ++ if (free->xefi_flags & XFS_EFI_ATTR_FORK) ++ oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; ++ if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) ++ oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; ++ ++ trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ++ free->xefi_blockcount); + +- error = __xfs_free_extent(tp, start_block, ext_len, +- oinfo, XFS_AG_RESV_NONE, skip_discard); ++ error = __xfs_free_extent(tp, free->xefi_startblock, ++ free->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, ++ free->xefi_flags & XFS_EFI_SKIP_DISCARD); + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: +@@ -375,8 +382,8 @@ xfs_trans_free_extent( + next_extent = efdp->efd_next_extent; + ASSERT(next_extent < efdp->efd_format.efd_nextents); + extp = &(efdp->efd_format.efd_extents[next_extent]); +- extp->ext_start = start_block; +- extp->ext_len = ext_len; ++ extp->ext_start = free->xefi_startblock; ++ extp->ext_len = free->xefi_blockcount; + efdp->efd_next_extent++; + + return error; +@@ -463,20 +470,12 @@ xfs_extent_free_finish_item( + struct list_head *item, + struct xfs_btree_cur **state) + { +- struct xfs_owner_info oinfo = { }; + struct xfs_extent_free_item *free; + int error; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); +- oinfo.oi_owner = free->xefi_owner; +- if (free->xefi_flags & XFS_EFI_ATTR_FORK) +- oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; +- if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) +- oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; +- error = xfs_trans_free_extent(tp, EFD_ITEM(done), +- free->xefi_startblock, +- free->xefi_blockcount, +- &oinfo, free->xefi_flags & XFS_EFI_SKIP_DISCARD); ++ ++ error = xfs_trans_free_extent(tp, EFD_ITEM(done), free); + kmem_cache_free(xfs_extfree_item_cache, free); + return error; + } +@@ -599,7 +598,6 @@ xfs_efi_item_recover( + struct xfs_mount *mp = lip->li_log->l_mp; + struct xfs_efd_log_item *efdp; + struct xfs_trans *tp; +- struct xfs_extent *extp; + int i; + int error = 0; + +@@ -624,10 +622,17 @@ xfs_efi_item_recover( + efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); + + for (i = 0; i < efip->efi_format.efi_nextents; i++) { ++ struct xfs_extent_free_item fake = { ++ .xefi_owner = XFS_RMAP_OWN_UNKNOWN, ++ }; ++ struct xfs_extent *extp; ++ + extp = &efip->efi_format.efi_extents[i]; +- error = xfs_trans_free_extent(tp, efdp, extp->ext_start, +- extp->ext_len, +- &XFS_RMAP_OINFO_ANY_OWNER, false); ++ ++ fake.xefi_startblock = extp->ext_start; ++ fake.xefi_blockcount = extp->ext_len; ++ ++ error = xfs_trans_free_extent(tp, efdp, &fake); + if (error == -EFSCORRUPTED) + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + extp, sizeof(*extp)); diff --git a/queue-6.1/xfs-recompute-growfsrtfree-transaction-reservation-while-growing-rt-volume.patch b/queue-6.1/xfs-recompute-growfsrtfree-transaction-reservation-while-growing-rt-volume.patch new file mode 100644 index 0000000000..5b3ab8fc8c --- /dev/null +++ b/queue-6.1/xfs-recompute-growfsrtfree-transaction-reservation-while-growing-rt-volume.patch @@ -0,0 +1,57 @@ +From stable+bounces-124378-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:38 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:41 -0700 +Subject: xfs: recompute growfsrtfree transaction reservation while growing rt volume +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Leah Rumancik +Message-ID: <20250313202550.2257219-22-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 578bd4ce7100ae34f98c6b0147fe75cfa0dadbac ] + +While playing with growfs to create a 20TB realtime section on a +filesystem that didn't previously have an rt section, I noticed that +growfs would occasionally shut down the log due to a transaction +reservation overflow. + +xfs_calc_growrtfree_reservation uses the current size of the realtime +summary file (m_rsumsize) to compute the transaction reservation for a +growrtfree transaction. The reservations are computed at mount time, +which means that m_rsumsize is zero when growfs starts "freeing" the new +realtime extents into the rt volume. As a result, the transaction is +undersized and fails. + +Fix this by recomputing the transaction reservations every time we +change m_rsumsize. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_rtalloc.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -1070,6 +1070,9 @@ xfs_growfs_rt( + nsbp->sb_rbmblocks; + nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize); + nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks); ++ /* recompute growfsrt reservation from new rsumsize */ ++ xfs_trans_resv_calc(nmp, &nmp->m_resv); ++ + /* + * Start a transaction, get the log reservation. + */ +@@ -1153,6 +1156,8 @@ error_cancel: + */ + mp->m_rsumlevels = nrsumlevels; + mp->m_rsumsize = nrsumsize; ++ /* recompute growfsrt reservation from new rsumsize */ ++ xfs_trans_resv_calc(mp, &mp->m_resv); + + error = xfs_trans_commit(tp); + if (error) diff --git a/queue-6.1/xfs-remove-conditional-building-of-rt-geometry-validator-functions.patch b/queue-6.1/xfs-remove-conditional-building-of-rt-geometry-validator-functions.patch new file mode 100644 index 0000000000..118b6ef5ac --- /dev/null +++ b/queue-6.1/xfs-remove-conditional-building-of-rt-geometry-validator-functions.patch @@ -0,0 +1,170 @@ +From stable+bounces-124386-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:50 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:49 -0700 +Subject: xfs: remove conditional building of rt geometry validator functions +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Chandan Babu R , Catherine Hoang , Greg Kroah-Hartman , Leah Rumancik +Message-ID: <20250313202550.2257219-30-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 881f78f472556ed05588172d5b5676b48dc48240 ] + +[ 6.1: used 6.6 backport to minimize conflicts ] + +[backport: resolve merge conflicts due to refactoring rtbitmap/summary +macros and accessors] + +I mistakenly turned off CONFIG_XFS_RT in the Kconfig file for arm64 +variant of the djwong-wtf git branch. Unfortunately, it took me a good +hour to figure out that RT wasn't built because this is what got printed +to dmesg: + +XFS (sda2): realtime geometry sanity check failed +XFS (sda2): Metadata corruption detected at xfs_sb_read_verify+0x170/0x190 [xfs], xfs_sb block 0x0 + +Whereas I would have expected: + +XFS (sda2): Not built with CONFIG_XFS_RT +XFS (sda2): RT mount failed + +The root cause of these problems is the conditional compilation of the +new functions xfs_validate_rtextents and xfs_compute_rextslog that I +introduced in the two commits listed below. The !RT versions of these +functions return false and 0, respectively, which causes primary +superblock validation to fail, which explains the first message. + +Move the two functions to other parts of libxfs that are not +conditionally defined by CONFIG_XFS_RT and remove the broken stubs so +that validation works again. + +Fixes: e14293803f4e ("xfs: don't allow overly small or large realtime volumes") +Fixes: a6a38f309afc ("xfs: make rextslog computation consistent with mkfs") +Signed-off-by: "Darrick J. Wong" +Reviewed-by: Christoph Hellwig +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_rtbitmap.c | 14 -------------- + fs/xfs/libxfs/xfs_rtbitmap.h | 16 ---------------- + fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++ + fs/xfs/libxfs/xfs_sb.h | 2 ++ + fs/xfs/libxfs/xfs_types.h | 12 ++++++++++++ + fs/xfs/scrub/rtbitmap.c | 1 + + 6 files changed, 29 insertions(+), 30 deletions(-) + +--- a/fs/xfs/libxfs/xfs_rtbitmap.c ++++ b/fs/xfs/libxfs/xfs_rtbitmap.c +@@ -1131,17 +1131,3 @@ xfs_rtalloc_extent_is_free( + return 0; + } + +-/* +- * Compute the maximum level number of the realtime summary file, as defined by +- * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct +- * use of rt volumes with more than 2^32 extents. +- */ +-uint8_t +-xfs_compute_rextslog( +- xfs_rtbxlen_t rtextents) +-{ +- if (!rtextents) +- return 0; +- return xfs_highbit64(rtextents); +-} +- +--- a/fs/xfs/libxfs/xfs_rtbitmap.h ++++ b/fs/xfs/libxfs/xfs_rtbitmap.h +@@ -71,20 +71,6 @@ xfs_rtfree_extent( + int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, + xfs_filblks_t rtlen); + +-uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); +- +-/* Do we support an rt volume having this number of rtextents? */ +-static inline bool +-xfs_validate_rtextents( +- xfs_rtbxlen_t rtextents) +-{ +- /* No runt rt volumes */ +- if (rtextents == 0) +- return false; +- +- return true; +-} +- + #else /* CONFIG_XFS_RT */ + # define xfs_rtfree_extent(t,b,l) (-ENOSYS) + # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) +@@ -92,8 +78,6 @@ xfs_validate_rtextents( + # define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) + # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) + # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) +-# define xfs_compute_rextslog(rtx) (0) +-# define xfs_validate_rtextents(rtx) (false) + #endif /* CONFIG_XFS_RT */ + + #endif /* __XFS_RTBITMAP_H__ */ +--- a/fs/xfs/libxfs/xfs_sb.c ++++ b/fs/xfs/libxfs/xfs_sb.c +@@ -1367,3 +1367,17 @@ xfs_validate_stripe_geometry( + } + return true; + } ++ ++/* ++ * Compute the maximum level number of the realtime summary file, as defined by ++ * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct ++ * use of rt volumes with more than 2^32 extents. ++ */ ++uint8_t ++xfs_compute_rextslog( ++ xfs_rtbxlen_t rtextents) ++{ ++ if (!rtextents) ++ return 0; ++ return xfs_highbit64(rtextents); ++} +--- a/fs/xfs/libxfs/xfs_sb.h ++++ b/fs/xfs/libxfs/xfs_sb.h +@@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct x + extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, + __s64 sunit, __s64 swidth, int sectorsize, bool silent); + ++uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); ++ + #endif /* __XFS_SB_H__ */ +--- a/fs/xfs/libxfs/xfs_types.h ++++ b/fs/xfs/libxfs/xfs_types.h +@@ -228,4 +228,16 @@ bool xfs_verify_fileoff(struct xfs_mount + bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, + xfs_fileoff_t len); + ++/* Do we support an rt volume having this number of rtextents? */ ++static inline bool ++xfs_validate_rtextents( ++ xfs_rtbxlen_t rtextents) ++{ ++ /* No runt rt volumes */ ++ if (rtextents == 0) ++ return false; ++ ++ return true; ++} ++ + #endif /* __XFS_TYPES_H__ */ +--- a/fs/xfs/scrub/rtbitmap.c ++++ b/fs/xfs/scrub/rtbitmap.c +@@ -14,6 +14,7 @@ + #include "xfs_rtbitmap.h" + #include "xfs_inode.h" + #include "xfs_bmap.h" ++#include "xfs_sb.h" + #include "scrub/scrub.h" + #include "scrub/common.h" + diff --git a/queue-6.1/xfs-remove-unused-fields-from-struct-xbtree_ifakeroot.patch b/queue-6.1/xfs-remove-unused-fields-from-struct-xbtree_ifakeroot.patch new file mode 100644 index 0000000000..00e1583113 --- /dev/null +++ b/queue-6.1/xfs-remove-unused-fields-from-struct-xbtree_ifakeroot.patch @@ -0,0 +1,42 @@ +From stable+bounces-124377-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:37 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:40 -0700 +Subject: xfs: remove unused fields from struct xbtree_ifakeroot +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Dave Chinner , Leah Rumancik +Message-ID: <20250313202550.2257219-21-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 4c8ecd1cfdd01fb727121035014d9f654a30bdf2 ] + +Remove these unused fields since nobody uses them. They should have +been removed years ago in a different cleanup series from Christoph +Hellwig. + +Fixes: daf83964a3681 ("xfs: move the per-fork nextents fields into struct xfs_ifork") +Fixes: f7e67b20ecbbc ("xfs: move the fork format fields into struct xfs_ifork") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_btree_staging.h | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/fs/xfs/libxfs/xfs_btree_staging.h ++++ b/fs/xfs/libxfs/xfs_btree_staging.h +@@ -37,12 +37,6 @@ struct xbtree_ifakeroot { + + /* Number of bytes available for this fork in the inode. */ + unsigned int if_fork_size; +- +- /* Fork format. */ +- unsigned int if_format; +- +- /* Number of records. */ +- unsigned int if_extents; + }; + + /* Cursor interactions with fake roots for inode-rooted btrees. */ diff --git a/queue-6.1/xfs-reserve-less-log-space-when-recovering-log-intent-items.patch b/queue-6.1/xfs-reserve-less-log-space-when-recovering-log-intent-items.patch new file mode 100644 index 0000000000..1e2218819d --- /dev/null +++ b/queue-6.1/xfs-reserve-less-log-space-when-recovering-log-intent-items.patch @@ -0,0 +1,289 @@ +From stable+bounces-124366-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:23 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:29 -0700 +Subject: xfs: reserve less log space when recovering log intent items +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Wengang Wang , Srikanth C S , Dave Chinner , Leah Rumancik +Message-ID: <20250313202550.2257219-10-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 3c919b0910906cc69d76dea214776f0eac73358b ] + +Wengang Wang reports that a customer's system was running a number of +truncate operations on a filesystem with a very small log. Contention +on the reserve heads lead to other threads stalling on smaller updates +(e.g. mtime updates) long enough to result in the node being rebooted +on account of the lack of responsivenes. The node failed to recover +because log recovery of an EFI became stuck waiting for a grant of +reserve space. From Wengang's report: + +"For the file deletion, log bytes are reserved basing on +xfs_mount->tr_itruncate which is: + + tr_logres = 175488, + tr_logcount = 2, + tr_logflags = XFS_TRANS_PERM_LOG_RES, + +"You see it's a permanent log reservation with two log operations (two +transactions in rolling mode). After calculation (xlog_calc_unit_res() +adds space for various log headers), the final log space needed per +transaction changes from 175488 to 180208 bytes. So the total log +space needed is 360416 bytes (180208 * 2). [That quantity] of log space +(360416 bytes) needs to be reserved for both run time inode removing +(xfs_inactive_truncate()) and EFI recover (xfs_efi_item_recover())." + +In other words, runtime pre-reserves 360K of space in anticipation of +running a chain of two transactions in which each transaction gets a +180K reservation. + +Now that we've allocated the transaction, we delete the bmap mapping, +log an EFI to free the space, and roll the transaction as part of +finishing the deferops chain. Rolling creates a new xfs_trans which +shares its ticket with the old transaction. Next, xfs_trans_roll calls +__xfs_trans_commit with regrant == true, which calls xlog_cil_commit +with the same regrant parameter. + +xlog_cil_commit calls xfs_log_ticket_regrant, which decrements t_cnt and +subtracts t_curr_res from the reservation and write heads. + +If the filesystem is fresh and the first transaction only used (say) +20K, then t_curr_res will be 160K, and we give that much reservation +back to the reservation head. Or if the file is really fragmented and +the first transaction actually uses 170K, then t_curr_res will be 10K, +and that's what we give back to the reservation. + +Having done that, we're now headed into the second transaction with an +EFI and 180K of reservation. Other threads apparently consumed all the +reservation for smaller transactions, such as timestamp updates. + +Now let's say the first transaction gets written to disk and we crash +without ever completing the second transaction. Now we remount the fs, +log recovery finds the unfinished EFI, and calls xfs_efi_recover to +finish the EFI. However, xfs_efi_recover starts a new tr_itruncate +tranasction, which asks for 360K log reservation. This is a lot more +than the 180K that we had reserved at the time of the crash. If the +first EFI to be recovered is also pinning the tail of the log, we will +be unable to free any space in the log, and recovery livelocks. + +Wengang confirmed this: + +"Now we have the second transaction which has 180208 log bytes reserved +too. The second transaction is supposed to process intents including +extent freeing. With my hacking patch, I blocked the extent freeing 5 +hours. So in that 5 hours, 180208 (NOT 360416) log bytes are reserved. + +"With my test case, other transactions (update timestamps) then happen. +As my hacking patch pins the journal tail, those timestamp-updating +transactions finally use up (almost) all the left available log space +(in memory in on disk). And finally the on disk (and in memory) +available log space goes down near to 180208 bytes. Those 180208 bytes +are reserved by [the] second (extent-free) transaction [in the chain]." + +Wengang and I noticed that EFI recovery starts a transaction, completes +one step of the chain, and commits the transaction without completing +any other steps of the chain. Those subsequent steps are completed by +xlog_finish_defer_ops, which allocates yet another transaction to +finish the rest of the chain. That transaction gets the same tr_logres +as the head transaction, but with tr_logcount = 1 to force regranting +with every roll to avoid livelocks. + +In other words, we already figured this out in commit 929b92f64048d +("xfs: xfs_defer_capture should absorb remaining transaction +reservation"), but should have applied that logic to each intent item's +recovery function. For Wengang's case, the xfs_trans_alloc call in the +EFI recovery function should only be asking for a single transaction's +worth of log reservation -- 180K, not 360K. + +Quoting Wengang again: + +"With log recovery, during EFI recovery, we use tr_itruncate again to +reserve two transactions that needs 360416 log bytes. Reserving 360416 +bytes fails [stalls] because we now only have about 180208 available. + +"Actually during the EFI recover, we only need one transaction to free +the extents just like the 2nd transaction at RUNTIME. So it only needs +to reserve 180208 rather than 360416 bytes. We have (a bit) more than +180208 available log bytes on disk, so [if we decrease the reservation +to 180K] the reservation goes and the recovery [finishes]. That is to +say: we can fix the log recover part to fix the issue. We can introduce +a new xfs_trans_res xfs_mount->tr_ext_free + +{ + tr_logres = 175488, + tr_logcount = 0, + tr_logflags = 0, +} + +"and use tr_ext_free instead of tr_itruncate in EFI recover." + +However, I don't think it quite makes sense to create an entirely new +transaction reservation type to handle single-stepping during log +recovery. Instead, we should copy the transaction reservation +information in the xfs_mount, change tr_logcount to 1, and pass that +into xfs_trans_alloc. We know this won't risk changing the min log size +computation since we always ask for a fraction of the reservation for +all known transaction types. + +This looks like it's been lurking in the codebase since commit +3d3c8b5222b92, which changed the xfs_trans_reserve call in +xlog_recover_process_efi to use the tr_logcount in tr_itruncate. +That changed the EFI recovery transaction from making a +non-XFS_TRANS_PERM_LOG_RES request for one transaction's worth of log +space to a XFS_TRANS_PERM_LOG_RES request for two transactions worth. + +Fixes: 3d3c8b5222b92 ("xfs: refactor xfs_trans_reserve() interface") +Complements: 929b92f64048d ("xfs: xfs_defer_capture should absorb remaining transaction reservation") +Suggested-by: Wengang Wang +Cc: Srikanth C S +[djwong: apply the same transformation to all log intent recovery] +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_log_recover.h | 22 ++++++++++++++++++++++ + fs/xfs/xfs_attr_item.c | 7 ++++--- + fs/xfs/xfs_bmap_item.c | 4 +++- + fs/xfs/xfs_extfree_item.c | 4 +++- + fs/xfs/xfs_refcount_item.c | 6 ++++-- + fs/xfs/xfs_rmap_item.c | 6 ++++-- + 6 files changed, 40 insertions(+), 9 deletions(-) + +--- a/fs/xfs/libxfs/xfs_log_recover.h ++++ b/fs/xfs/libxfs/xfs_log_recover.h +@@ -131,4 +131,26 @@ void xlog_check_buf_cancel_table(struct + #define xlog_check_buf_cancel_table(log) do { } while (0) + #endif + ++/* ++ * Transform a regular reservation into one suitable for recovery of a log ++ * intent item. ++ * ++ * Intent recovery only runs a single step of the transaction chain and defers ++ * the rest to a separate transaction. Therefore, we reduce logcount to 1 here ++ * to avoid livelocks if the log grant space is nearly exhausted due to the ++ * recovered intent pinning the tail. Keep the same logflags to avoid tripping ++ * asserts elsewhere. Struct copies abound below. ++ */ ++static inline struct xfs_trans_res ++xlog_recover_resv(const struct xfs_trans_res *r) ++{ ++ struct xfs_trans_res ret = { ++ .tr_logres = r->tr_logres, ++ .tr_logcount = 1, ++ .tr_logflags = r->tr_logflags, ++ }; ++ ++ return ret; ++} ++ + #endif /* __XFS_LOG_RECOVER_H__ */ +--- a/fs/xfs/xfs_attr_item.c ++++ b/fs/xfs/xfs_attr_item.c +@@ -547,7 +547,7 @@ xfs_attri_item_recover( + struct xfs_inode *ip; + struct xfs_da_args *args; + struct xfs_trans *tp; +- struct xfs_trans_res tres; ++ struct xfs_trans_res resv; + struct xfs_attri_log_format *attrp; + struct xfs_attri_log_nameval *nv = attrip->attri_nameval; + int error; +@@ -618,8 +618,9 @@ xfs_attri_item_recover( + goto out; + } + +- xfs_init_attr_trans(args, &tres, &total); +- error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp); ++ xfs_init_attr_trans(args, &resv, &total); ++ resv = xlog_recover_resv(&resv); ++ error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp); + if (error) + goto out; + +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -457,6 +457,7 @@ xfs_bui_item_recover( + struct list_head *capture_list) + { + struct xfs_bmap_intent fake = { }; ++ struct xfs_trans_res resv; + struct xfs_bui_log_item *buip = BUI_ITEM(lip); + struct xfs_trans *tp; + struct xfs_inode *ip = NULL; +@@ -482,7 +483,8 @@ xfs_bui_item_recover( + return error; + + /* Allocate transaction and do the work. */ +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, ++ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); ++ error = xfs_trans_alloc(mp, &resv, + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); + if (error) + goto err_rele; +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -598,6 +598,7 @@ xfs_efi_item_recover( + struct xfs_log_item *lip, + struct list_head *capture_list) + { ++ struct xfs_trans_res resv; + struct xfs_efi_log_item *efip = EFI_ITEM(lip); + struct xfs_mount *mp = lip->li_log->l_mp; + struct xfs_efd_log_item *efdp; +@@ -620,7 +621,8 @@ xfs_efi_item_recover( + } + } + +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); ++ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); ++ error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp); + if (error) + return error; + efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -453,6 +453,7 @@ xfs_cui_item_recover( + struct xfs_log_item *lip, + struct list_head *capture_list) + { ++ struct xfs_trans_res resv; + struct xfs_cui_log_item *cuip = CUI_ITEM(lip); + struct xfs_cud_log_item *cudp; + struct xfs_trans *tp; +@@ -490,8 +491,9 @@ xfs_cui_item_recover( + * doesn't fit. We need to reserve enough blocks to handle a + * full btree split on either end of the refcount range. + */ +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, +- mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); ++ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); ++ error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, ++ XFS_TRANS_RESERVE, &tp); + if (error) + return error; + +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -492,6 +492,7 @@ xfs_rui_item_recover( + struct xfs_log_item *lip, + struct list_head *capture_list) + { ++ struct xfs_trans_res resv; + struct xfs_rui_log_item *ruip = RUI_ITEM(lip); + struct xfs_map_extent *rmap; + struct xfs_rud_log_item *rudp; +@@ -519,8 +520,9 @@ xfs_rui_item_recover( + } + } + +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, +- mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp); ++ resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); ++ error = xfs_trans_alloc(mp, &resv, mp->m_rmap_maxlevels, 0, ++ XFS_TRANS_RESERVE, &tp); + if (error) + return error; + rudp = xfs_trans_get_rud(tp, ruip); diff --git a/queue-6.1/xfs-reset-xfs_attr_incomplete-filter-on-node-removal.patch b/queue-6.1/xfs-reset-xfs_attr_incomplete-filter-on-node-removal.patch new file mode 100644 index 0000000000..a611821b04 --- /dev/null +++ b/queue-6.1/xfs-reset-xfs_attr_incomplete-filter-on-node-removal.patch @@ -0,0 +1,51 @@ +From stable+bounces-124385-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:47 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:48 -0700 +Subject: xfs: reset XFS_ATTR_INCOMPLETE filter on node removal +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Andrey Albershteyn , Christoph Hellwig , Chandan Babu R , Leah Rumancik , "Darrick J. Wong" +Message-ID: <20250313202550.2257219-29-leah.rumancik@gmail.com> + +From: Andrey Albershteyn + +[ Upstream commit 82ef1a5356572219f41f9123ca047259a77bd67b ] + +In XFS_DAS_NODE_REMOVE_ATTR case, xfs_attr_mode_remove_attr() sets +filter to XFS_ATTR_INCOMPLETE. The filter is then reset in +xfs_attr_complete_op() if XFS_DA_OP_REPLACE operation is performed. + +The filter is not reset though if XFS just removes the attribute +(args->value == NULL) with xfs_attr_defer_remove(). attr code goes +to XFS_DAS_DONE state. + +Fix this by always resetting XFS_ATTR_INCOMPLETE filter. The replace +operation already resets this filter in anyway and others are +completed at this step hence don't need it. + +Fixes: fdaf1bb3cafc ("xfs: ATTR_REPLACE algorithm with LARP enabled needs rework") +Signed-off-by: Andrey Albershteyn +Reviewed-by: Christoph Hellwig +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_attr.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/xfs/libxfs/xfs_attr.c ++++ b/fs/xfs/libxfs/xfs_attr.c +@@ -421,10 +421,10 @@ xfs_attr_complete_op( + bool do_replace = args->op_flags & XFS_DA_OP_REPLACE; + + args->op_flags &= ~XFS_DA_OP_REPLACE; +- if (do_replace) { +- args->attr_filter &= ~XFS_ATTR_INCOMPLETE; ++ args->attr_filter &= ~XFS_ATTR_INCOMPLETE; ++ if (do_replace) + return replace_state; +- } ++ + return XFS_DAS_DONE; + } + diff --git a/queue-6.1/xfs-transfer-recovered-intent-item-ownership-in-iop_recover.patch b/queue-6.1/xfs-transfer-recovered-intent-item-ownership-in-iop_recover.patch new file mode 100644 index 0000000000..4d05b3ca93 --- /dev/null +++ b/queue-6.1/xfs-transfer-recovered-intent-item-ownership-in-iop_recover.patch @@ -0,0 +1,137 @@ +From stable+bounces-124373-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:34 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:36 -0700 +Subject: xfs: transfer recovered intent item ownership in ->iop_recover +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Leah Rumancik +Message-ID: <20250313202550.2257219-17-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit deb4cd8ba87f17b12c72b3827820d9c703e9fd95 ] + +Now that we pass the xfs_defer_pending object into the intent item +recovery functions, we know exactly when ownership of the sole refcount +passes from the recovery context to the intent done item. At that +point, we need to null out dfp_intent so that the recovery mechanism +won't release it. This should fix the UAF problem reported by Long Li. + +Note that we still want to recreate the full deferred work state. That +will be addressed in the next patches. + +Fixes: 2e76f188fd90 ("xfs: cancel intents immediately if process_intents fails") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_log_recover.h | 2 ++ + fs/xfs/xfs_attr_item.c | 1 + + fs/xfs/xfs_bmap_item.c | 2 ++ + fs/xfs/xfs_extfree_item.c | 2 ++ + fs/xfs/xfs_log_recover.c | 19 ++++++++++++------- + fs/xfs/xfs_refcount_item.c | 1 + + fs/xfs/xfs_rmap_item.c | 2 ++ + 7 files changed, 22 insertions(+), 7 deletions(-) + +--- a/fs/xfs/libxfs/xfs_log_recover.h ++++ b/fs/xfs/libxfs/xfs_log_recover.h +@@ -155,5 +155,7 @@ xlog_recover_resv(const struct xfs_trans + + void xlog_recover_intent_item(struct xlog *log, struct xfs_log_item *lip, + xfs_lsn_t lsn, unsigned int dfp_type); ++void xlog_recover_transfer_intent(struct xfs_trans *tp, ++ struct xfs_defer_pending *dfp); + + #endif /* __XFS_LOG_RECOVER_H__ */ +--- a/fs/xfs/xfs_attr_item.c ++++ b/fs/xfs/xfs_attr_item.c +@@ -632,6 +632,7 @@ xfs_attri_item_recover( + + args->trans = tp; + done_item = xfs_trans_get_attrd(tp, attrip); ++ xlog_recover_transfer_intent(tp, dfp); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -491,6 +491,8 @@ xfs_bui_item_recover( + goto err_rele; + + budp = xfs_trans_get_bud(tp, buip); ++ xlog_recover_transfer_intent(tp, dfp); ++ + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -626,7 +626,9 @@ xfs_efi_item_recover( + error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp); + if (error) + return error; ++ + efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); ++ xlog_recover_transfer_intent(tp, dfp); + + for (i = 0; i < efip->efi_format.efi_nextents; i++) { + struct xfs_extent_free_item fake = { +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2593,13 +2593,6 @@ xlog_recover_process_intents( + break; + } + +- /* +- * XXX: @lip could have been freed, so detach the log item from +- * the pending item before freeing the pending item. This does +- * not fix the existing UAF bug that occurs if ->iop_recover +- * fails after creating the intent done item. +- */ +- dfp->dfp_intent = NULL; + xfs_defer_cancel_recovery(log->l_mp, dfp); + } + if (error) +@@ -2634,6 +2627,18 @@ xlog_recover_cancel_intents( + } + + /* ++ * Transfer ownership of the recovered log intent item to the recovery ++ * transaction. ++ */ ++void ++xlog_recover_transfer_intent( ++ struct xfs_trans *tp, ++ struct xfs_defer_pending *dfp) ++{ ++ dfp->dfp_intent = NULL; ++} ++ ++/* + * This routine performs a transaction to null out a bad inode pointer + * in an agi unlinked inode hash bucket. + */ +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -499,6 +499,7 @@ xfs_cui_item_recover( + return error; + + cudp = xfs_trans_get_cud(tp, cuip); ++ xlog_recover_transfer_intent(tp, dfp); + + for (i = 0; i < cuip->cui_format.cui_nextents; i++) { + struct xfs_refcount_intent fake = { }; +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -526,7 +526,9 @@ xfs_rui_item_recover( + XFS_TRANS_RESERVE, &tp); + if (error) + return error; ++ + rudp = xfs_trans_get_rud(tp, ruip); ++ xlog_recover_transfer_intent(tp, dfp); + + for (i = 0; i < ruip->rui_format.rui_nextents; i++) { + rmap = &ruip->rui_format.rui_extents[i]; diff --git a/queue-6.1/xfs-update-dir3-leaf-block-metadata-after-swap.patch b/queue-6.1/xfs-update-dir3-leaf-block-metadata-after-swap.patch new file mode 100644 index 0000000000..2df302e10a --- /dev/null +++ b/queue-6.1/xfs-update-dir3-leaf-block-metadata-after-swap.patch @@ -0,0 +1,71 @@ +From stable+bounces-124384-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:48 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:47 -0700 +Subject: xfs: update dir3 leaf block metadata after swap +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Zhang Tianci , Dave Chinner , "Darrick J. Wong" , Chandan Babu R , Leah Rumancik +Message-ID: <20250313202550.2257219-28-leah.rumancik@gmail.com> + +From: Zhang Tianci + +[ Upstream commit 5759aa4f956034b289b0ae2c99daddfc775442e1 ] + +xfs_da3_swap_lastblock() copy the last block content to the dead block, +but do not update the metadata in it. We need update some metadata +for some kinds of type block, such as dir3 leafn block records its +blkno, we shall update it to the dead block blkno. Otherwise, +before write the xfs_buf to disk, the verify_write() will fail in +blk_hdr->blkno != xfs_buf->b_bn, then xfs will be shutdown. + +We will get this warning: + + XFS (dm-0): Metadata corruption detected at xfs_dir3_leaf_verify+0xa8/0xe0 [xfs], xfs_dir3_leafn block 0x178 + XFS (dm-0): Unmount and run xfs_repair + XFS (dm-0): First 128 bytes of corrupted metadata buffer: + 00000000e80f1917: 00 80 00 0b 00 80 00 07 3d ff 00 00 00 00 00 00 ........=....... + 000000009604c005: 00 00 00 00 00 00 01 a0 00 00 00 00 00 00 00 00 ................ + 000000006b6fb2bf: e4 44 e3 97 b5 64 44 41 8b 84 60 0e 50 43 d9 bf .D...dDA..`.PC.. + 00000000678978a2: 00 00 00 00 00 00 00 83 01 73 00 93 00 00 00 00 .........s...... + 00000000b28b247c: 99 29 1d 38 00 00 00 00 99 29 1d 40 00 00 00 00 .).8.....).@.... + 000000002b2a662c: 99 29 1d 48 00 00 00 00 99 49 11 00 00 00 00 00 .).H.....I...... + 00000000ea2ffbb8: 99 49 11 08 00 00 45 25 99 49 11 10 00 00 48 fe .I....E%.I....H. + 0000000069e86440: 99 49 11 18 00 00 4c 6b 99 49 11 20 00 00 4d 97 .I....Lk.I. ..M. + XFS (dm-0): xfs_do_force_shutdown(0x8) called from line 1423 of file fs/xfs/xfs_buf.c. Return address = 00000000c0ff63c1 + XFS (dm-0): Corruption of in-memory data detected. Shutting down filesystem + XFS (dm-0): Please umount the filesystem and rectify the problem(s) + +>>From the log above, we know xfs_buf->b_no is 0x178, but the block's hdr record +its blkno is 0x1a0. + +Fixes: 24df33b45ecf ("xfs: add CRC checking to dir2 leaf blocks") +Signed-off-by: Zhang Tianci +Suggested-by: Dave Chinner +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_da_btree.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/xfs/libxfs/xfs_da_btree.c ++++ b/fs/xfs/libxfs/xfs_da_btree.c +@@ -2316,10 +2316,17 @@ xfs_da3_swap_lastblock( + return error; + /* + * Copy the last block into the dead buffer and log it. ++ * On CRC-enabled file systems, also update the stamped in blkno. + */ + memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); ++ if (xfs_has_crc(mp)) { ++ struct xfs_da3_blkinfo *da3 = dead_buf->b_addr; ++ ++ da3->blkno = cpu_to_be64(xfs_buf_daddr(dead_buf)); ++ } + xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); + dead_info = dead_buf->b_addr; ++ + /* + * Get values from the moved block. + */ diff --git a/queue-6.1/xfs-use-deferred-frees-for-btree-block-freeing.patch b/queue-6.1/xfs-use-deferred-frees-for-btree-block-freeing.patch new file mode 100644 index 0000000000..63b234cff4 --- /dev/null +++ b/queue-6.1/xfs-use-deferred-frees-for-btree-block-freeing.patch @@ -0,0 +1,298 @@ +From stable+bounces-124365-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:21 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:28 -0700 +Subject: xfs: use deferred frees for btree block freeing +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Dave Chinner , "Darrick J. Wong" , Chandan Babu R , Leah Rumancik +Message-ID: <20250313202550.2257219-9-leah.rumancik@gmail.com> + +From: Dave Chinner + +[ Upstream commit b742d7b4f0e03df25c2a772adcded35044b625ca ] + +[ 6.1: resolved conflict in xfs_extfree_item.c ] + +Btrees that aren't freespace management trees use the normal extent +allocation and freeing routines for their blocks. Hence when a btree +block is freed, a direct call to xfs_free_extent() is made and the +extent is immediately freed. This puts the entire free space +management btrees under this path, so we are stacking btrees on +btrees in the call stack. The inobt, finobt and refcount btrees +all do this. + +However, the bmap btree does not do this - it calls +xfs_free_extent_later() to defer the extent free operation via an +XEFI and hence it gets processed in deferred operation processing +during the commit of the primary transaction (i.e. via intent +chaining). + +We need to change xfs_free_extent() to behave in a non-blocking +manner so that we can avoid deadlocks with busy extents near ENOSPC +in transactions that free multiple extents. Inserting or removing a +record from a btree can cause a multi-level tree merge operation and +that will free multiple blocks from the btree in a single +transaction. i.e. we can call xfs_free_extent() multiple times, and +hence the btree manipulation transaction is vulnerable to this busy +extent deadlock vector. + +To fix this, convert all the remaining callers of xfs_free_extent() +to use xfs_free_extent_later() to queue XEFIs and hence defer +processing of the extent frees to a context that can be safely +restarted if a deadlock condition is detected. + +Signed-off-by: Dave Chinner +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Reviewed-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_ag.c | 2 +- + fs/xfs/libxfs/xfs_alloc.c | 4 ++++ + fs/xfs/libxfs/xfs_alloc.h | 8 +++++--- + fs/xfs/libxfs/xfs_bmap.c | 8 +++++--- + fs/xfs/libxfs/xfs_bmap_btree.c | 3 ++- + fs/xfs/libxfs/xfs_ialloc.c | 8 ++++---- + fs/xfs/libxfs/xfs_ialloc_btree.c | 3 +-- + fs/xfs/libxfs/xfs_refcount.c | 9 ++++++--- + fs/xfs/libxfs/xfs_refcount_btree.c | 8 +------- + fs/xfs/xfs_extfree_item.c | 3 ++- + fs/xfs/xfs_reflink.c | 3 ++- + 11 files changed, 33 insertions(+), 26 deletions(-) + +--- a/fs/xfs/libxfs/xfs_ag.c ++++ b/fs/xfs/libxfs/xfs_ag.c +@@ -907,7 +907,7 @@ xfs_ag_shrink_space( + goto resv_err; + + err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, +- true); ++ XFS_AG_RESV_NONE, true); + if (err2) + goto resv_err; + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -2507,6 +2507,7 @@ xfs_defer_agfl_block( + xefi->xefi_startblock = fsbno; + xefi->xefi_blockcount = 1; + xefi->xefi_owner = oinfo->oi_owner; ++ xefi->xefi_agresv = XFS_AG_RESV_AGFL; + + trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); + +@@ -2524,6 +2525,7 @@ __xfs_free_extent_later( + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo, ++ enum xfs_ag_resv_type type, + bool skip_discard) + { + struct xfs_extent_free_item *xefi; +@@ -2544,6 +2546,7 @@ __xfs_free_extent_later( + ASSERT(agbno + len <= mp->m_sb.sb_agblocks); + #endif + ASSERT(xfs_extfree_item_cache != NULL); ++ ASSERT(type != XFS_AG_RESV_AGFL); + + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) + return -EFSCORRUPTED; +@@ -2552,6 +2555,7 @@ __xfs_free_extent_later( + GFP_KERNEL | __GFP_NOFAIL); + xefi->xefi_startblock = bno; + xefi->xefi_blockcount = (xfs_extlen_t)len; ++ xefi->xefi_agresv = type; + if (skip_discard) + xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; + if (oinfo) { +--- a/fs/xfs/libxfs/xfs_alloc.h ++++ b/fs/xfs/libxfs/xfs_alloc.h +@@ -215,7 +215,7 @@ xfs_buf_to_agfl_bno( + + int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, + xfs_filblks_t len, const struct xfs_owner_info *oinfo, +- bool skip_discard); ++ enum xfs_ag_resv_type type, bool skip_discard); + + /* + * List of extents to be free "later". +@@ -227,6 +227,7 @@ struct xfs_extent_free_item { + xfs_fsblock_t xefi_startblock;/* starting fs block number */ + xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ + unsigned int xefi_flags; ++ enum xfs_ag_resv_type xefi_agresv; + }; + + #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ +@@ -238,9 +239,10 @@ xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, +- const struct xfs_owner_info *oinfo) ++ const struct xfs_owner_info *oinfo, ++ enum xfs_ag_resv_type type) + { +- return __xfs_free_extent_later(tp, bno, len, oinfo, false); ++ return __xfs_free_extent_later(tp, bno, len, oinfo, type, false); + } + + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents( + return error; + + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); +- error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); ++ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, ++ XFS_AG_RESV_NONE); + if (error) + return error; + +@@ -5208,8 +5209,9 @@ xfs_bmap_del_extent_real( + } else { + error = __xfs_free_extent_later(tp, del->br_startblock, + del->br_blockcount, NULL, +- (bflags & XFS_BMAPI_NODISCARD) || +- del->br_state == XFS_EXT_UNWRITTEN); ++ XFS_AG_RESV_NONE, ++ ((bflags & XFS_BMAPI_NODISCARD) || ++ del->br_state == XFS_EXT_UNWRITTEN)); + if (error) + goto done; + } +--- a/fs/xfs/libxfs/xfs_bmap_btree.c ++++ b/fs/xfs/libxfs/xfs_bmap_btree.c +@@ -288,7 +288,8 @@ xfs_bmbt_free_block( + int error; + + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); +- error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); ++ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, ++ XFS_AG_RESV_NONE); + if (error) + return error; + +--- a/fs/xfs/libxfs/xfs_ialloc.c ++++ b/fs/xfs/libxfs/xfs_ialloc.c +@@ -1846,8 +1846,8 @@ xfs_difree_inode_chunk( + /* not sparse, calculate extent info directly */ + return xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, sagbno), +- M_IGEO(mp)->ialloc_blks, +- &XFS_RMAP_OINFO_INODES); ++ M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, ++ XFS_AG_RESV_NONE); + } + + /* holemask is only 16-bits (fits in an unsigned long) */ +@@ -1892,8 +1892,8 @@ xfs_difree_inode_chunk( + ASSERT(agbno % mp->m_sb.sb_spino_align == 0); + ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); + error = xfs_free_extent_later(tp, +- XFS_AGB_TO_FSB(mp, agno, agbno), +- contigblk, &XFS_RMAP_OINFO_INODES); ++ XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, ++ &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE); + if (error) + return error; + +--- a/fs/xfs/libxfs/xfs_ialloc_btree.c ++++ b/fs/xfs/libxfs/xfs_ialloc_btree.c +@@ -160,8 +160,7 @@ __xfs_inobt_free_block( + + xfs_inobt_mod_blockcount(cur, -1); + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); +- return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, +- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, ++ return xfs_free_extent_later(cur->bc_tp, fsbno, 1, + &XFS_RMAP_OINFO_INOBT, resv); + } + +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -1130,7 +1130,8 @@ xfs_refcount_adjust_extents( + cur->bc_ag.pag->pag_agno, + tmp.rc_startblock); + error = xfs_free_extent_later(cur->bc_tp, fsbno, +- tmp.rc_blockcount, NULL); ++ tmp.rc_blockcount, NULL, ++ XFS_AG_RESV_NONE); + if (error) + goto out_error; + } +@@ -1191,7 +1192,8 @@ xfs_refcount_adjust_extents( + cur->bc_ag.pag->pag_agno, + ext.rc_startblock); + error = xfs_free_extent_later(cur->bc_tp, fsbno, +- ext.rc_blockcount, NULL); ++ ext.rc_blockcount, NULL, ++ XFS_AG_RESV_NONE); + if (error) + goto out_error; + } +@@ -1963,7 +1965,8 @@ xfs_refcount_recover_cow_leftovers( + + /* Free the block. */ + error = xfs_free_extent_later(tp, fsb, +- rr->rr_rrec.rc_blockcount, NULL); ++ rr->rr_rrec.rc_blockcount, NULL, ++ XFS_AG_RESV_NONE); + if (error) + goto out_trans; + +--- a/fs/xfs/libxfs/xfs_refcount_btree.c ++++ b/fs/xfs/libxfs/xfs_refcount_btree.c +@@ -106,19 +106,13 @@ xfs_refcountbt_free_block( + struct xfs_buf *agbp = cur->bc_ag.agbp; + struct xfs_agf *agf = agbp->b_addr; + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); +- int error; + + trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); + be32_add_cpu(&agf->agf_refcount_blocks, -1); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); +- error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, +- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, ++ return xfs_free_extent_later(cur->bc_tp, fsbno, 1, + &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); +- if (error) +- return error; +- +- return error; + } + + STATIC int +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -369,7 +369,7 @@ xfs_trans_free_extent( + + pag = xfs_perag_get(mp, agno); + error = __xfs_free_extent(tp, pag, agbno, xefi->xefi_blockcount, +- &oinfo, XFS_AG_RESV_NONE, ++ &oinfo, xefi->xefi_agresv, + xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); + xfs_perag_put(pag); + +@@ -628,6 +628,7 @@ xfs_efi_item_recover( + for (i = 0; i < efip->efi_format.efi_nextents; i++) { + struct xfs_extent_free_item fake = { + .xefi_owner = XFS_RMAP_OWN_UNKNOWN, ++ .xefi_agresv = XFS_AG_RESV_NONE, + }; + struct xfs_extent *extp; + +--- a/fs/xfs/xfs_reflink.c ++++ b/fs/xfs/xfs_reflink.c +@@ -619,7 +619,8 @@ xfs_reflink_cancel_cow_blocks( + del.br_blockcount); + + error = xfs_free_extent_later(*tpp, del.br_startblock, +- del.br_blockcount, NULL); ++ del.br_blockcount, NULL, ++ XFS_AG_RESV_NONE); + if (error) + break; + diff --git a/queue-6.1/xfs-use-xfs_defer_pending-objects-to-recover-intent-items.patch b/queue-6.1/xfs-use-xfs_defer_pending-objects-to-recover-intent-items.patch new file mode 100644 index 0000000000..dd75c779ef --- /dev/null +++ b/queue-6.1/xfs-use-xfs_defer_pending-objects-to-recover-intent-items.patch @@ -0,0 +1,527 @@ +From stable+bounces-124371-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:31 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:34 -0700 +Subject: xfs: use xfs_defer_pending objects to recover intent items +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, "Darrick J. Wong" , Christoph Hellwig , Catherine Hoang , Greg Kroah-Hartman , Leah Rumancik +Message-ID: <20250313202550.2257219-15-leah.rumancik@gmail.com> + +From: "Darrick J. Wong" + +[ Upstream commit 03f7767c9f6120ac933378fdec3bfd78bf07bc11 ] + +[ 6.1: resovled conflict in xfs_defer.c ] + +One thing I never quite got around to doing is porting the log intent +item recovery code to reconstruct the deferred pending work state. As a +result, each intent item open codes xfs_defer_finish_one in its recovery +method, because that's what the EFI code did before xfs_defer.c even +existed. + +This is a gross thing to have left unfixed -- if an EFI cannot proceed +due to busy extents, we end up creating separate new EFIs for each +unfinished work item, which is a change in behavior from what runtime +would have done. + +Worse yet, Long Li pointed out that there's a UAF in the recovery code. +The ->commit_pass2 function adds the intent item to the AIL and drops +the refcount. The one remaining refcount is now owned by the recovery +mechanism (aka the log intent items in the AIL) with the intent of +giving the refcount to the intent done item in the ->iop_recover +function. + +However, if something fails later in recovery, xlog_recover_finish will +walk the recovered intent items in the AIL and release them. If the CIL +hasn't been pushed before that point (which is possible since we don't +force the log until later) then the intent done release will try to free +its associated intent, which has already been freed. + +This patch starts to address this mess by having the ->commit_pass2 +functions recreate the xfs_defer_pending state. The next few patches +will fix the recovery functions. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Darrick J. Wong +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_defer.c | 103 ++++++++++++++++++++++++++--------- + fs/xfs/libxfs/xfs_defer.h | 5 + + fs/xfs/libxfs/xfs_log_recover.h | 3 + + fs/xfs/xfs_attr_item.c | 10 --- + fs/xfs/xfs_bmap_item.c | 9 +-- + fs/xfs/xfs_extfree_item.c | 9 +-- + fs/xfs/xfs_log.c | 1 + fs/xfs/xfs_log_priv.h | 1 + fs/xfs/xfs_log_recover.c | 115 ++++++++++++++++++++-------------------- + fs/xfs/xfs_refcount_item.c | 9 +-- + fs/xfs/xfs_rmap_item.c | 9 +-- + 11 files changed, 158 insertions(+), 116 deletions(-) + +--- a/fs/xfs/libxfs/xfs_defer.c ++++ b/fs/xfs/libxfs/xfs_defer.c +@@ -245,23 +245,52 @@ xfs_defer_create_intents( + return ret; + } + +-STATIC void ++static inline void + xfs_defer_pending_abort( + struct xfs_mount *mp, ++ struct xfs_defer_pending *dfp) ++{ ++ const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; ++ ++ trace_xfs_defer_pending_abort(mp, dfp); ++ ++ if (dfp->dfp_intent && !dfp->dfp_done) { ++ ops->abort_intent(dfp->dfp_intent); ++ dfp->dfp_intent = NULL; ++ } ++} ++ ++static inline void ++xfs_defer_pending_cancel_work( ++ struct xfs_mount *mp, ++ struct xfs_defer_pending *dfp) ++{ ++ const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; ++ struct list_head *pwi; ++ struct list_head *n; ++ ++ trace_xfs_defer_cancel_list(mp, dfp); ++ ++ list_del(&dfp->dfp_list); ++ list_for_each_safe(pwi, n, &dfp->dfp_work) { ++ list_del(pwi); ++ dfp->dfp_count--; ++ ops->cancel_item(pwi); ++ } ++ ASSERT(dfp->dfp_count == 0); ++ kmem_cache_free(xfs_defer_pending_cache, dfp); ++} ++ ++STATIC void ++xfs_defer_pending_abort_list( ++ struct xfs_mount *mp, + struct list_head *dop_list) + { + struct xfs_defer_pending *dfp; +- const struct xfs_defer_op_type *ops; + + /* Abort intent items that don't have a done item. */ +- list_for_each_entry(dfp, dop_list, dfp_list) { +- ops = defer_op_types[dfp->dfp_type]; +- trace_xfs_defer_pending_abort(mp, dfp); +- if (dfp->dfp_intent && !dfp->dfp_done) { +- ops->abort_intent(dfp->dfp_intent); +- dfp->dfp_intent = NULL; +- } +- } ++ list_for_each_entry(dfp, dop_list, dfp_list) ++ xfs_defer_pending_abort(mp, dfp); + } + + /* Abort all the intents that were committed. */ +@@ -271,7 +300,7 @@ xfs_defer_trans_abort( + struct list_head *dop_pending) + { + trace_xfs_defer_trans_abort(tp, _RET_IP_); +- xfs_defer_pending_abort(tp->t_mountp, dop_pending); ++ xfs_defer_pending_abort_list(tp->t_mountp, dop_pending); + } + + /* +@@ -389,26 +418,13 @@ xfs_defer_cancel_list( + { + struct xfs_defer_pending *dfp; + struct xfs_defer_pending *pli; +- struct list_head *pwi; +- struct list_head *n; +- const struct xfs_defer_op_type *ops; + + /* + * Free the pending items. Caller should already have arranged + * for the intent items to be released. + */ +- list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) { +- ops = defer_op_types[dfp->dfp_type]; +- trace_xfs_defer_cancel_list(mp, dfp); +- list_del(&dfp->dfp_list); +- list_for_each_safe(pwi, n, &dfp->dfp_work) { +- list_del(pwi); +- dfp->dfp_count--; +- ops->cancel_item(pwi); +- } +- ASSERT(dfp->dfp_count == 0); +- kmem_cache_free(xfs_defer_pending_cache, dfp); +- } ++ list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) ++ xfs_defer_pending_cancel_work(mp, dfp); + } + + /* +@@ -664,6 +680,39 @@ xfs_defer_add( + } + + /* ++ * Create a pending deferred work item to replay the recovered intent item ++ * and add it to the list. ++ */ ++void ++xfs_defer_start_recovery( ++ struct xfs_log_item *lip, ++ enum xfs_defer_ops_type dfp_type, ++ struct list_head *r_dfops) ++{ ++ struct xfs_defer_pending *dfp; ++ ++ dfp = kmem_cache_zalloc(xfs_defer_pending_cache, ++ GFP_NOFS | __GFP_NOFAIL); ++ dfp->dfp_type = dfp_type; ++ dfp->dfp_intent = lip; ++ INIT_LIST_HEAD(&dfp->dfp_work); ++ list_add_tail(&dfp->dfp_list, r_dfops); ++} ++ ++/* ++ * Cancel a deferred work item created to recover a log intent item. @dfp ++ * will be freed after this function returns. ++ */ ++void ++xfs_defer_cancel_recovery( ++ struct xfs_mount *mp, ++ struct xfs_defer_pending *dfp) ++{ ++ xfs_defer_pending_abort(mp, dfp); ++ xfs_defer_pending_cancel_work(mp, dfp); ++} ++ ++/* + * Move deferred ops from one transaction to another and reset the source to + * initial state. This is primarily used to carry state forward across + * transaction rolls with pending dfops. +@@ -767,7 +816,7 @@ xfs_defer_ops_capture_abort( + { + unsigned short i; + +- xfs_defer_pending_abort(mp, &dfc->dfc_dfops); ++ xfs_defer_pending_abort_list(mp, &dfc->dfc_dfops); + xfs_defer_cancel_list(mp, &dfc->dfc_dfops); + + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) +--- a/fs/xfs/libxfs/xfs_defer.h ++++ b/fs/xfs/libxfs/xfs_defer.h +@@ -125,6 +125,11 @@ void xfs_defer_ops_capture_abort(struct + struct xfs_defer_capture *d); + void xfs_defer_resources_rele(struct xfs_defer_resources *dres); + ++void xfs_defer_start_recovery(struct xfs_log_item *lip, ++ enum xfs_defer_ops_type dfp_type, struct list_head *r_dfops); ++void xfs_defer_cancel_recovery(struct xfs_mount *mp, ++ struct xfs_defer_pending *dfp); ++ + int __init xfs_defer_init_item_caches(void); + void xfs_defer_destroy_item_caches(void); + +--- a/fs/xfs/libxfs/xfs_log_recover.h ++++ b/fs/xfs/libxfs/xfs_log_recover.h +@@ -153,4 +153,7 @@ xlog_recover_resv(const struct xfs_trans + return ret; + } + ++void xlog_recover_intent_item(struct xlog *log, struct xfs_log_item *lip, ++ xfs_lsn_t lsn, unsigned int dfp_type); ++ + #endif /* __XFS_LOG_RECOVER_H__ */ +--- a/fs/xfs/xfs_attr_item.c ++++ b/fs/xfs/xfs_attr_item.c +@@ -772,14 +772,8 @@ xlog_recover_attri_commit_pass2( + attrip = xfs_attri_init(mp, nv); + memcpy(&attrip->attri_format, attri_formatp, len); + +- /* +- * The ATTRI has two references. One for the ATTRD and one for ATTRI to +- * ensure it makes it into the AIL. Insert the ATTRI into the AIL +- * directly and drop the ATTRI reference. Note that +- * xfs_trans_ail_update() drops the AIL lock. +- */ +- xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn); +- xfs_attri_release(attrip); ++ xlog_recover_intent_item(log, &attrip->attri_item, lsn, ++ XFS_DEFER_OPS_TYPE_ATTR); + xfs_attri_log_nameval_put(nv); + return 0; + } +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -646,12 +646,9 @@ xlog_recover_bui_commit_pass2( + buip = xfs_bui_init(mp); + xfs_bui_copy_format(&buip->bui_format, bui_formatp); + atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents); +- /* +- * Insert the intent into the AIL directly and drop one reference so +- * that finishing or canceling the work will drop the other. +- */ +- xfs_trans_ail_insert(log->l_ailp, &buip->bui_item, lsn); +- xfs_bui_release(buip); ++ ++ xlog_recover_intent_item(log, &buip->bui_item, lsn, ++ XFS_DEFER_OPS_TYPE_BMAP); + return 0; + } + +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -736,12 +736,9 @@ xlog_recover_efi_commit_pass2( + return error; + } + atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents); +- /* +- * Insert the intent into the AIL directly and drop one reference so +- * that finishing or canceling the work will drop the other. +- */ +- xfs_trans_ail_insert(log->l_ailp, &efip->efi_item, lsn); +- xfs_efi_release(efip); ++ ++ xlog_recover_intent_item(log, &efip->efi_item, lsn, ++ XFS_DEFER_OPS_TYPE_FREE); + return 0; + } + +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -1540,6 +1540,7 @@ xlog_alloc_log( + log->l_covered_state = XLOG_STATE_COVER_IDLE; + set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); + INIT_DELAYED_WORK(&log->l_work, xfs_log_worker); ++ INIT_LIST_HEAD(&log->r_dfops); + + log->l_prev_block = -1; + /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -403,6 +403,7 @@ struct xlog { + long l_opstate; /* operational state */ + uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ + struct list_head *l_buf_cancel_table; ++ struct list_head r_dfops; /* recovered log intent items */ + int l_iclog_hsize; /* size of iclog header */ + int l_iclog_heads; /* # of iclog header sectors */ + uint l_sectBBsize; /* sector size in BBs (2^n) */ +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -1723,30 +1723,24 @@ xlog_clear_stale_blocks( + */ + void + xlog_recover_release_intent( +- struct xlog *log, +- unsigned short intent_type, +- uint64_t intent_id) ++ struct xlog *log, ++ unsigned short intent_type, ++ uint64_t intent_id) + { +- struct xfs_ail_cursor cur; +- struct xfs_log_item *lip; +- struct xfs_ail *ailp = log->l_ailp; +- +- spin_lock(&ailp->ail_lock); +- for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; +- lip = xfs_trans_ail_cursor_next(ailp, &cur)) { ++ struct xfs_defer_pending *dfp, *n; ++ ++ list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) { ++ struct xfs_log_item *lip = dfp->dfp_intent; ++ + if (lip->li_type != intent_type) + continue; + if (!lip->li_ops->iop_match(lip, intent_id)) + continue; + +- spin_unlock(&ailp->ail_lock); +- lip->li_ops->iop_release(lip); +- spin_lock(&ailp->ail_lock); +- break; +- } ++ ASSERT(xlog_item_is_intent(lip)); + +- xfs_trans_ail_cursor_done(&cur); +- spin_unlock(&ailp->ail_lock); ++ xfs_defer_cancel_recovery(log->l_mp, dfp); ++ } + } + + int +@@ -1939,6 +1933,29 @@ xlog_buf_readahead( + xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops); + } + ++/* ++ * Create a deferred work structure for resuming and tracking the progress of a ++ * log intent item that was found during recovery. ++ */ ++void ++xlog_recover_intent_item( ++ struct xlog *log, ++ struct xfs_log_item *lip, ++ xfs_lsn_t lsn, ++ unsigned int dfp_type) ++{ ++ ASSERT(xlog_item_is_intent(lip)); ++ ++ xfs_defer_start_recovery(lip, dfp_type, &log->r_dfops); ++ ++ /* ++ * Insert the intent into the AIL directly and drop one reference so ++ * that finishing or canceling the work will drop the other. ++ */ ++ xfs_trans_ail_insert(log->l_ailp, lip, lsn); ++ lip->li_ops->iop_unpin(lip, 0); ++} ++ + STATIC int + xlog_recover_items_pass2( + struct xlog *log, +@@ -2536,29 +2553,22 @@ xlog_abort_defer_ops( + */ + STATIC int + xlog_recover_process_intents( +- struct xlog *log) ++ struct xlog *log) + { + LIST_HEAD(capture_list); +- struct xfs_ail_cursor cur; +- struct xfs_log_item *lip; +- struct xfs_ail *ailp; +- int error = 0; ++ struct xfs_defer_pending *dfp, *n; ++ int error = 0; + #if defined(DEBUG) || defined(XFS_WARN) +- xfs_lsn_t last_lsn; +-#endif ++ xfs_lsn_t last_lsn; + +- ailp = log->l_ailp; +- spin_lock(&ailp->ail_lock); +-#if defined(DEBUG) || defined(XFS_WARN) + last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); + #endif +- for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); +- lip != NULL; +- lip = xfs_trans_ail_cursor_next(ailp, &cur)) { +- const struct xfs_item_ops *ops; + +- if (!xlog_item_is_intent(lip)) +- break; ++ list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) { ++ struct xfs_log_item *lip = dfp->dfp_intent; ++ const struct xfs_item_ops *ops = lip->li_ops; ++ ++ ASSERT(xlog_item_is_intent(lip)); + + /* + * We should never see a redo item with a LSN higher than +@@ -2576,19 +2586,22 @@ xlog_recover_process_intents( + * The recovery function can free the log item, so we must not + * access lip after it returns. + */ +- spin_unlock(&ailp->ail_lock); +- ops = lip->li_ops; + error = ops->iop_recover(lip, &capture_list); +- spin_lock(&ailp->ail_lock); + if (error) { + trace_xlog_intent_recovery_failed(log->l_mp, error, + ops->iop_recover); + break; + } +- } + +- xfs_trans_ail_cursor_done(&cur); +- spin_unlock(&ailp->ail_lock); ++ /* ++ * XXX: @lip could have been freed, so detach the log item from ++ * the pending item before freeing the pending item. This does ++ * not fix the existing UAF bug that occurs if ->iop_recover ++ * fails after creating the intent done item. ++ */ ++ dfp->dfp_intent = NULL; ++ xfs_defer_cancel_recovery(log->l_mp, dfp); ++ } + if (error) + goto err; + +@@ -2609,27 +2622,15 @@ err: + */ + STATIC void + xlog_recover_cancel_intents( +- struct xlog *log) ++ struct xlog *log) + { +- struct xfs_log_item *lip; +- struct xfs_ail_cursor cur; +- struct xfs_ail *ailp; +- +- ailp = log->l_ailp; +- spin_lock(&ailp->ail_lock); +- lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); +- while (lip != NULL) { +- if (!xlog_item_is_intent(lip)) +- break; ++ struct xfs_defer_pending *dfp, *n; + +- spin_unlock(&ailp->ail_lock); +- lip->li_ops->iop_release(lip); +- spin_lock(&ailp->ail_lock); +- lip = xfs_trans_ail_cursor_next(ailp, &cur); +- } ++ list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) { ++ ASSERT(xlog_item_is_intent(dfp->dfp_intent)); + +- xfs_trans_ail_cursor_done(&cur); +- spin_unlock(&ailp->ail_lock); ++ xfs_defer_cancel_recovery(log->l_mp, dfp); ++ } + } + + /* +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -668,12 +668,9 @@ xlog_recover_cui_commit_pass2( + cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); + xfs_cui_copy_format(&cuip->cui_format, cui_formatp); + atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); +- /* +- * Insert the intent into the AIL directly and drop one reference so +- * that finishing or canceling the work will drop the other. +- */ +- xfs_trans_ail_insert(log->l_ailp, &cuip->cui_item, lsn); +- xfs_cui_release(cuip); ++ ++ xlog_recover_intent_item(log, &cuip->cui_item, lsn, ++ XFS_DEFER_OPS_TYPE_REFCOUNT); + return 0; + } + +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -682,12 +682,9 @@ xlog_recover_rui_commit_pass2( + ruip = xfs_rui_init(mp, rui_formatp->rui_nextents); + xfs_rui_copy_format(&ruip->rui_format, rui_formatp); + atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents); +- /* +- * Insert the intent into the AIL directly and drop one reference so +- * that finishing or canceling the work will drop the other. +- */ +- xfs_trans_ail_insert(log->l_ailp, &ruip->rui_item, lsn); +- xfs_rui_release(ruip); ++ ++ xlog_recover_intent_item(log, &ruip->rui_item, lsn, ++ XFS_DEFER_OPS_TYPE_RMAP); + return 0; + } + diff --git a/queue-6.1/xfs-validate-block-number-being-freed-before-adding-to-xefi.patch b/queue-6.1/xfs-validate-block-number-being-freed-before-adding-to-xefi.patch new file mode 100644 index 0000000000..ad1bc8666c --- /dev/null +++ b/queue-6.1/xfs-validate-block-number-being-freed-before-adding-to-xefi.patch @@ -0,0 +1,311 @@ +From stable+bounces-124363-greg=kroah.com@vger.kernel.org Thu Mar 13 21:26:20 2025 +From: Leah Rumancik +Date: Thu, 13 Mar 2025 13:25:26 -0700 +Subject: xfs: validate block number being freed before adding to xefi +To: stable@vger.kernel.org +Cc: xfs-stable@lists.linux.dev, Dave Chinner , Christoph Hellwig , "Darrick J. Wong" , Dave Chinner , Leah Rumancik +Message-ID: <20250313202550.2257219-7-leah.rumancik@gmail.com> + +From: Dave Chinner + +[ Upstream commit 7dfee17b13e5024c5c0ab1911859ded4182de3e5 ] + +Bad things happen in defered extent freeing operations if it is +passed a bad block number in the xefi. This can come from a bogus +agno/agbno pair from deferred agfl freeing, or just a bad fsbno +being passed to __xfs_free_extent_later(). Either way, it's very +difficult to diagnose where a null perag oops in EFI creation +is coming from when the operation that queued the xefi has already +been completed and there's no longer any trace of it around.... + +Signed-off-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_ag.c | 5 ++++- + fs/xfs/libxfs/xfs_alloc.c | 16 +++++++++++++--- + fs/xfs/libxfs/xfs_alloc.h | 6 +++--- + fs/xfs/libxfs/xfs_bmap.c | 10 ++++++++-- + fs/xfs/libxfs/xfs_bmap_btree.c | 7 +++++-- + fs/xfs/libxfs/xfs_ialloc.c | 24 ++++++++++++++++-------- + fs/xfs/libxfs/xfs_refcount.c | 13 ++++++++++--- + fs/xfs/xfs_reflink.c | 4 +++- + 8 files changed, 62 insertions(+), 23 deletions(-) + +--- a/fs/xfs/libxfs/xfs_ag.c ++++ b/fs/xfs/libxfs/xfs_ag.c +@@ -906,7 +906,10 @@ xfs_ag_shrink_space( + if (err2 != -ENOSPC) + goto resv_err; + +- __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true); ++ err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, ++ true); ++ if (err2) ++ goto resv_err; + + /* + * Roll the transaction before trying to re-init the per-ag +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -2485,7 +2485,7 @@ xfs_agfl_reset( + * the real allocation can proceed. Deferring the free disconnects freeing up + * the AGFL slot from freeing the block. + */ +-STATIC void ++static int + xfs_defer_agfl_block( + struct xfs_trans *tp, + xfs_agnumber_t agno, +@@ -2504,16 +2504,20 @@ xfs_defer_agfl_block( + xefi->xefi_blockcount = 1; + xefi->xefi_owner = oinfo->oi_owner; + ++ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock))) ++ return -EFSCORRUPTED; ++ + trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); + + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); ++ return 0; + } + + /* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +-void ++int + __xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, +@@ -2540,6 +2544,9 @@ __xfs_free_extent_later( + #endif + ASSERT(xfs_extfree_item_cache != NULL); + ++ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) ++ return -EFSCORRUPTED; ++ + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, + GFP_KERNEL | __GFP_NOFAIL); + xefi->xefi_startblock = bno; +@@ -2561,6 +2568,7 @@ __xfs_free_extent_later( + XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, + XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); ++ return 0; + } + + #ifdef DEBUG +@@ -2720,7 +2728,9 @@ xfs_alloc_fix_freelist( + goto out_agbp_relse; + + /* defer agfl frees */ +- xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); ++ error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); ++ if (error) ++ goto out_agbp_relse; + } + + targs.tp = tp; +--- a/fs/xfs/libxfs/xfs_alloc.h ++++ b/fs/xfs/libxfs/xfs_alloc.h +@@ -213,7 +213,7 @@ xfs_buf_to_agfl_bno( + return bp->b_addr; + } + +-void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, ++int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, + xfs_filblks_t len, const struct xfs_owner_info *oinfo, + bool skip_discard); + +@@ -233,14 +233,14 @@ struct xfs_extent_free_item { + #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ + #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ + +-static inline void ++static inline int + xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo) + { +- __xfs_free_extent_later(tp, bno, len, oinfo, false); ++ return __xfs_free_extent_later(tp, bno, len, oinfo, false); + } + + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -572,8 +572,12 @@ xfs_bmap_btree_to_extents( + cblock = XFS_BUF_TO_BLOCK(cbp); + if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) + return error; ++ + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); +- xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); ++ error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); ++ if (error) ++ return error; ++ + ip->i_nblocks--; + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_binval(tp, cbp); +@@ -5202,10 +5206,12 @@ xfs_bmap_del_extent_real( + if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { + xfs_refcount_decrease_extent(tp, del); + } else { +- __xfs_free_extent_later(tp, del->br_startblock, ++ error = __xfs_free_extent_later(tp, del->br_startblock, + del->br_blockcount, NULL, + (bflags & XFS_BMAPI_NODISCARD) || + del->br_state == XFS_EXT_UNWRITTEN); ++ if (error) ++ goto done; + } + } + +--- a/fs/xfs/libxfs/xfs_bmap_btree.c ++++ b/fs/xfs/libxfs/xfs_bmap_btree.c +@@ -285,11 +285,14 @@ xfs_bmbt_free_block( + struct xfs_trans *tp = cur->bc_tp; + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); + struct xfs_owner_info oinfo; ++ int error; + + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); +- xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); +- ip->i_nblocks--; ++ error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); ++ if (error) ++ return error; + ++ ip->i_nblocks--; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + return 0; +--- a/fs/xfs/libxfs/xfs_ialloc.c ++++ b/fs/xfs/libxfs/xfs_ialloc.c +@@ -1827,7 +1827,7 @@ xfs_dialloc( + * might be sparse and only free the regions that are allocated as part of the + * chunk. + */ +-STATIC void ++static int + xfs_difree_inode_chunk( + struct xfs_trans *tp, + xfs_agnumber_t agno, +@@ -1844,10 +1844,10 @@ xfs_difree_inode_chunk( + + if (!xfs_inobt_issparse(rec->ir_holemask)) { + /* not sparse, calculate extent info directly */ +- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), +- M_IGEO(mp)->ialloc_blks, +- &XFS_RMAP_OINFO_INODES); +- return; ++ return xfs_free_extent_later(tp, ++ XFS_AGB_TO_FSB(mp, agno, sagbno), ++ M_IGEO(mp)->ialloc_blks, ++ &XFS_RMAP_OINFO_INODES); + } + + /* holemask is only 16-bits (fits in an unsigned long) */ +@@ -1864,6 +1864,8 @@ xfs_difree_inode_chunk( + XFS_INOBT_HOLEMASK_BITS); + nextbit = startidx + 1; + while (startidx < XFS_INOBT_HOLEMASK_BITS) { ++ int error; ++ + nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS, + nextbit); + /* +@@ -1889,8 +1891,11 @@ xfs_difree_inode_chunk( + + ASSERT(agbno % mp->m_sb.sb_spino_align == 0); + ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); +- xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), +- contigblk, &XFS_RMAP_OINFO_INODES); ++ error = xfs_free_extent_later(tp, ++ XFS_AGB_TO_FSB(mp, agno, agbno), ++ contigblk, &XFS_RMAP_OINFO_INODES); ++ if (error) ++ return error; + + /* reset range to current bit and carry on... */ + startidx = endidx = nextbit; +@@ -1898,6 +1903,7 @@ xfs_difree_inode_chunk( + next: + nextbit++; + } ++ return 0; + } + + STATIC int +@@ -1998,7 +2004,9 @@ xfs_difree_inobt( + goto error0; + } + +- xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); ++ error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); ++ if (error) ++ goto error0; + } else { + xic->deleted = false; + +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -1129,8 +1129,10 @@ xfs_refcount_adjust_extents( + fsbno = XFS_AGB_TO_FSB(cur->bc_mp, + cur->bc_ag.pag->pag_agno, + tmp.rc_startblock); +- xfs_free_extent_later(cur->bc_tp, fsbno, ++ error = xfs_free_extent_later(cur->bc_tp, fsbno, + tmp.rc_blockcount, NULL); ++ if (error) ++ goto out_error; + } + + (*agbno) += tmp.rc_blockcount; +@@ -1188,8 +1190,10 @@ xfs_refcount_adjust_extents( + fsbno = XFS_AGB_TO_FSB(cur->bc_mp, + cur->bc_ag.pag->pag_agno, + ext.rc_startblock); +- xfs_free_extent_later(cur->bc_tp, fsbno, ++ error = xfs_free_extent_later(cur->bc_tp, fsbno, + ext.rc_blockcount, NULL); ++ if (error) ++ goto out_error; + } + + skip: +@@ -1958,7 +1962,10 @@ xfs_refcount_recover_cow_leftovers( + rr->rr_rrec.rc_blockcount); + + /* Free the block. */ +- xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); ++ error = xfs_free_extent_later(tp, fsb, ++ rr->rr_rrec.rc_blockcount, NULL); ++ if (error) ++ goto out_trans; + + error = xfs_trans_commit(tp); + if (error) +--- a/fs/xfs/xfs_reflink.c ++++ b/fs/xfs/xfs_reflink.c +@@ -618,8 +618,10 @@ xfs_reflink_cancel_cow_blocks( + xfs_refcount_free_cow_extent(*tpp, del.br_startblock, + del.br_blockcount); + +- xfs_free_extent_later(*tpp, del.br_startblock, ++ error = xfs_free_extent_later(*tpp, del.br_startblock, + del.br_blockcount, NULL); ++ if (error) ++ break; + + /* Roll the transaction */ + error = xfs_defer_finish(tpp);