arm64-mte-do-not-allow-prot_mte-on-map_hugetlb-user-mappings.patch
+xfs-assert-a-valid-limit-in-xfs_rtfind_forw.patch
+xfs-validate-inumber-in-xfs_iget.patch
+xfs-fix-a-sloppy-memory-handling-bug-in-xfs_iroot_realloc.patch
+xfs-fix-a-typo.patch
+xfs-skip-background-cowblock-trims-on-inodes-open-for-write.patch
+xfs-don-t-free-cowblocks-from-under-dirty-pagecache-on-unshare.patch
+xfs-merge-xfs_attr_leaf_try_add-into-xfs_attr_leaf_addname.patch
+xfs-return-bool-from-xfs_attr3_leaf_add.patch
+xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr3_leaf_split.patch
+xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr_node_try_addname.patch
+xfs-fold-xfs_bmap_alloc_userdata-into-xfs_bmapi_allocate.patch
+xfs-don-t-ifdef-around-the-exact-minlen-allocations.patch
+xfs-call-xfs_bmap_exact_minlen_extent_alloc-from-xfs_bmap_btalloc.patch
+xfs-support-lowmode-allocations-in-xfs_bmap_exact_minlen_extent_alloc.patch
+xfs-use-try_cmpxchg-in-xlog_cil_insert_pcp_aggregate.patch
+xfs-remove-empty-declartion-in-header-file.patch
+xfs-pass-the-exact-range-to-initialize-to-xfs_initialize_perag.patch
+xfs-update-the-file-system-geometry-after-recoverying-superblock-buffers.patch
+xfs-error-out-when-a-superblock-buffer-update-reduces-the-agcount.patch
+xfs-don-t-use-__gfp_retry_mayfail-in-xfs_initialize_perag.patch
+xfs-update-the-pag-for-the-last-ag-at-recovery-time.patch
+xfs-reduce-unnecessary-searches-when-searching-for-the-best-extents.patch
+xfs-streamline-xfs_filestream_pick_ag.patch
+xfs-check-for-delayed-allocations-before-setting-extsize.patch
--- /dev/null
+From stable+bounces-113967-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:45 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:02 -0800
+Subject: xfs: assert a valid limit in xfs_rtfind_forw
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-2-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 6d2db12d56a389b3e8efa236976f8dc3a8ae00f0 upstream.
+
+Protect against developers passing stupid limits when refactoring the
+RT code once again.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_rtbitmap.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_rtbitmap.c
++++ b/fs/xfs/libxfs/xfs_rtbitmap.c
+@@ -288,6 +288,8 @@ xfs_rtfind_forw(
+ xfs_rtword_t wdiff; /* difference from wanted value */
+ int word; /* word number in the buffer */
+
++ ASSERT(start <= limit);
++
+ /*
+ * Compute and read in starting bitmap block for starting block.
+ */
--- /dev/null
+From stable+bounces-113979-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:07 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:14 -0800
+Subject: xfs: call xfs_bmap_exact_minlen_extent_alloc from xfs_bmap_btalloc
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-14-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 405ee87c6938f67e6ab62a3f8f85b3c60a093886 upstream.
+
+[backport: dependency of 6aac770]
+
+xfs_bmap_exact_minlen_extent_alloc duplicates the args setup in
+xfs_bmap_btalloc. Switch to call it from xfs_bmap_btalloc after
+doing the basic setup.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 61 ++++++++++-------------------------------------
+ 1 file changed, 13 insertions(+), 48 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3390,28 +3390,17 @@ xfs_bmap_process_allocated_extent(
+
+ static int
+ xfs_bmap_exact_minlen_extent_alloc(
+- struct xfs_bmalloca *ap)
++ struct xfs_bmalloca *ap,
++ struct xfs_alloc_arg *args)
+ {
+- struct xfs_mount *mp = ap->ip->i_mount;
+- struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp };
+- xfs_fileoff_t orig_offset;
+- xfs_extlen_t orig_length;
+- int error;
+-
+- ASSERT(ap->length);
+-
+ if (ap->minlen != 1) {
+- ap->blkno = NULLFSBLOCK;
+- ap->length = 0;
++ args->fsbno = NULLFSBLOCK;
+ return 0;
+ }
+
+- orig_offset = ap->offset;
+- orig_length = ap->length;
+-
+- args.alloc_minlen_only = 1;
+-
+- xfs_bmap_compute_alignments(ap, &args);
++ args->alloc_minlen_only = 1;
++ args->minlen = args->maxlen = ap->minlen;
++ args->total = ap->total;
+
+ /*
+ * Unlike the longest extent available in an AG, we don't track
+@@ -3421,33 +3410,9 @@ xfs_bmap_exact_minlen_extent_alloc(
+ * we need not be concerned about a drop in performance in
+ * "debug only" code paths.
+ */
+- ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
+-
+- args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
+- args.minlen = args.maxlen = ap->minlen;
+- args.total = ap->total;
++ ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0);
+
+- args.alignment = 1;
+- args.minalignslop = 0;
+-
+- args.minleft = ap->minleft;
+- args.wasdel = ap->wasdel;
+- args.resv = XFS_AG_RESV_NONE;
+- args.datatype = ap->datatype;
+-
+- error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
+- if (error)
+- return error;
+-
+- if (args.fsbno != NULLFSBLOCK) {
+- xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
+- orig_length);
+- } else {
+- ap->blkno = NULLFSBLOCK;
+- ap->length = 0;
+- }
+-
+- return 0;
++ return xfs_alloc_vextent_first_ag(args, ap->blkno);
+ }
+
+ /*
+@@ -3706,8 +3671,11 @@ xfs_bmap_btalloc(
+ /* Trim the allocation back to the maximum an AG can fit. */
+ args.maxlen = min(ap->length, mp->m_ag_max_usable);
+
+- if ((ap->datatype & XFS_ALLOC_USERDATA) &&
+- xfs_inode_is_filestream(ap->ip))
++ if (unlikely(XFS_TEST_ERROR(false, mp,
++ XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
++ error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
++ else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
++ xfs_inode_is_filestream(ap->ip))
+ error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
+ else
+ error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
+@@ -4128,9 +4096,6 @@ xfs_bmapi_allocate(
+ if ((bma->datatype & XFS_ALLOC_USERDATA) &&
+ XFS_IS_REALTIME_INODE(bma->ip))
+ error = xfs_bmap_rtalloc(bma);
+- else if (unlikely(XFS_TEST_ERROR(false, mp,
+- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+- error = xfs_bmap_exact_minlen_extent_alloc(bma);
+ else
+ error = xfs_bmap_btalloc(bma);
+ if (error)
--- /dev/null
+From stable+bounces-113989-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:36 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:25 -0800
+Subject: xfs: Check for delayed allocations before setting extsize
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-25-catherine.hoang@oracle.com>
+
+From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+
+commit 2a492ff66673c38a77d0815d67b9a8cce2ef57f8 upstream.
+
+Extsize should only be allowed to be set on files with no data in it.
+For this, we check if the files have extents but miss to check if
+delayed extents are present. This patch adds that check.
+
+While we are at it, also refactor this check into a helper since
+it's used in some other places as well like xfs_inactive() or
+xfs_ioctl_setattr_xflags()
+
+**Without the patch (SUCCEEDS)**
+
+$ xfs_io -c 'open -f testfile' -c 'pwrite 0 1024' -c 'extsize 65536'
+
+wrote 1024/1024 bytes at offset 0
+1 KiB, 1 ops; 0.0002 sec (4.628 MiB/sec and 4739.3365 ops/sec)
+
+**With the patch (FAILS as expected)**
+
+$ xfs_io -c 'open -f testfile' -c 'pwrite 0 1024' -c 'extsize 65536'
+
+wrote 1024/1024 bytes at offset 0
+1 KiB, 1 ops; 0.0002 sec (4.628 MiB/sec and 4739.3365 ops/sec)
+xfs_io: FS_IOC_FSSETXATTR testfile: Invalid argument
+
+Fixes: e94af02a9cd7 ("[XFS] fix old xfs_setattr mis-merge from irix; mostly harmless esp if not using xfs rt")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode.c | 2 +-
+ fs/xfs/xfs_inode.h | 5 +++++
+ fs/xfs/xfs_ioctl.c | 4 ++--
+ 3 files changed, 8 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1758,7 +1758,7 @@ xfs_inactive(
+
+ if (S_ISREG(VFS_I(ip)->i_mode) &&
+ (ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 ||
+- ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
++ xfs_inode_has_filedata(ip)))
+ truncate = 1;
+
+ if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -286,6 +286,11 @@ static inline bool xfs_is_metadata_inode
+ xfs_is_quota_inode(&mp->m_sb, ip->i_ino);
+ }
+
++static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip)
++{
++ return ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0;
++}
++
+ /*
+ * Check if an inode has any data in the COW fork. This might be often false
+ * even for inodes with the reflink flag when there is no pending COW operation.
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -1126,7 +1126,7 @@ xfs_ioctl_setattr_xflags(
+
+ if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
+ /* Can't change realtime flag if any extents are allocated. */
+- if (ip->i_df.if_nextents || ip->i_delayed_blks)
++ if (xfs_inode_has_filedata(ip))
+ return -EINVAL;
+
+ /*
+@@ -1247,7 +1247,7 @@ xfs_ioctl_setattr_check_extsize(
+ if (!fa->fsx_valid)
+ return 0;
+
+- if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
++ if (S_ISREG(VFS_I(ip)->i_mode) && xfs_inode_has_filedata(ip) &&
+ XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
+ return -EINVAL;
+
--- /dev/null
+From stable+bounces-113975-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:58 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:10 -0800
+Subject: xfs: distinguish extra split from real ENOSPC from xfs_attr3_leaf_split
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-10-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit a5f73342abe1f796140f6585e43e2aa7bc1b7975 upstream.
+
+xfs_attr3_leaf_split propagates the need for an extra btree split as
+-ENOSPC to it's only caller, but the same return value can also be
+returned from xfs_da_grow_inode when it fails to find free space.
+
+Distinguish the two cases by returning 1 for the extra split case instead
+of overloading -ENOSPC.
+
+This can be triggered relatively easily with the pending realtime group
+support and a file system with a lot of small zones that use metadata
+space on the main device. In this case every about 5-10th run of
+xfs/538 runs into the following assert:
+
+ ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
+
+in xfs_attr3_leaf_split caused by an allocation failure. Note that
+the allocation failure is caused by another bug that will be fixed
+subsequently, but this commit at least sorts out the error handling.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_leaf.c | 5 ++++-
+ fs/xfs/libxfs/xfs_da_btree.c | 5 +++--
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -1340,6 +1340,9 @@ xfs_attr3_leaf_create(
+
+ /*
+ * Split the leaf node, rebalance, then add the new entry.
++ *
++ * Returns 0 if the entry was added, 1 if a further split is needed or a
++ * negative error number otherwise.
+ */
+ int
+ xfs_attr3_leaf_split(
+@@ -1396,7 +1399,7 @@ xfs_attr3_leaf_split(
+ oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
+ newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
+ if (!added)
+- return -ENOSPC;
++ return 1;
+ return 0;
+ }
+
+--- a/fs/xfs/libxfs/xfs_da_btree.c
++++ b/fs/xfs/libxfs/xfs_da_btree.c
+@@ -522,9 +522,8 @@ xfs_da3_split(
+ switch (oldblk->magic) {
+ case XFS_ATTR_LEAF_MAGIC:
+ error = xfs_attr3_leaf_split(state, oldblk, newblk);
+- if ((error != 0) && (error != -ENOSPC)) {
++ if (error < 0)
+ return error; /* GROT: attr is inconsistent */
+- }
+ if (!error) {
+ addblk = newblk;
+ break;
+@@ -546,6 +545,8 @@ xfs_da3_split(
+ error = xfs_attr3_leaf_split(state, newblk,
+ &state->extrablk);
+ }
++ if (error == 1)
++ return -ENOSPC;
+ if (error)
+ return error; /* GROT: attr inconsistent */
+ addblk = newblk;
--- /dev/null
+From stable+bounces-113976-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:00 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:11 -0800
+Subject: xfs: distinguish extra split from real ENOSPC from xfs_attr_node_try_addname
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-11-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b3f4e84e2f438a119b7ca8684a25452b3e57c0f0 upstream.
+
+Just like xfs_attr3_leaf_split, xfs_attr_node_try_addname can return
+-ENOSPC both for an actual failure to allocate a disk block, but also
+to signal the caller to convert the format of the attr fork. Use magic
+1 to ask for the conversion here as well.
+
+Note that unlike the similar issue in xfs_attr3_leaf_split, this one was
+only found by code review.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -543,7 +543,7 @@ xfs_attr_node_addname(
+ return error;
+
+ error = xfs_attr_node_try_addname(attr);
+- if (error == -ENOSPC) {
++ if (error == 1) {
+ error = xfs_attr3_leaf_to_node(args);
+ if (error)
+ return error;
+@@ -1380,9 +1380,12 @@ error:
+ /*
+ * Add a name to a Btree-format attribute list.
+ *
+- * This will involve walking down the Btree, and may involve splitting
+- * leaf nodes and even splitting intermediate nodes up to and including
+- * the root node (a special case of an intermediate node).
++ * This will involve walking down the Btree, and may involve splitting leaf
++ * nodes and even splitting intermediate nodes up to and including the root
++ * node (a special case of an intermediate node).
++ *
++ * If the tree was still in single leaf format and needs to converted to
++ * real node format return 1 and let the caller handle that.
+ */
+ static int
+ xfs_attr_node_try_addname(
+@@ -1404,7 +1407,7 @@ xfs_attr_node_try_addname(
+ * out-of-line values so it looked like it *might*
+ * have been a b-tree. Let the caller deal with this.
+ */
+- error = -ENOSPC;
++ error = 1;
+ goto out;
+ }
+
--- /dev/null
+From stable+bounces-113972-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:54 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:07 -0800
+Subject: xfs: don't free cowblocks from under dirty pagecache on unshare
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-7-catherine.hoang@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 4390f019ad7866c3791c3d768d2ff185d89e8ebe upstream.
+
+fallocate unshare mode explicitly breaks extent sharing. When a
+command completes, it checks the data fork for any remaining shared
+extents to determine whether the reflink inode flag and COW fork
+preallocation can be removed. This logic doesn't consider in-core
+pagecache and I/O state, however, which means we can unsafely remove
+COW fork blocks that are still needed under certain conditions.
+
+For example, consider the following command sequence:
+
+xfs_io -fc "pwrite 0 1k" -c "reflink <file> 0 256k 1k" \
+ -c "pwrite 0 32k" -c "funshare 0 1k" <file>
+
+This allocates a data block at offset 0, shares it, and then
+overwrites it with a larger buffered write. The overwrite triggers
+COW fork preallocation, 32 blocks by default, which maps the entire
+32k write to delalloc in the COW fork. All but the shared block at
+offset 0 remains hole mapped in the data fork. The unshare command
+redirties and flushes the folio at offset 0, removing the only
+shared extent from the inode. Since the inode no longer maps shared
+extents, unshare purges the COW fork before the remaining 28k may
+have written back.
+
+This leaves dirty pagecache backed by holes, which writeback quietly
+skips, thus leaving clean, non-zeroed pagecache over holes in the
+file. To verify, fiemap shows holes in the first 32k of the file and
+reads return different data across a remount:
+
+$ xfs_io -c "fiemap -v" <file>
+<file>:
+ EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS
+ ...
+ 1: [8..511]: hole 504
+ ...
+$ xfs_io -c "pread -v 4k 8" <file>
+00001000: cd cd cd cd cd cd cd cd ........
+$ umount <mnt>; mount <dev> <mnt>
+$ xfs_io -c "pread -v 4k 8" <file>
+00001000: 00 00 00 00 00 00 00 00 ........
+
+To avoid this problem, make unshare follow the same rules used for
+background cowblock scanning and never purge the COW fork for inodes
+with dirty pagecache or in-flight I/O.
+
+Fixes: 46afb0628b86347 ("xfs: only flush the unshared range in xfs_reflink_unshare")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c | 8 +-------
+ fs/xfs/xfs_reflink.c | 3 +++
+ fs/xfs/xfs_reflink.h | 19 +++++++++++++++++++
+ 3 files changed, 23 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1271,13 +1271,7 @@ xfs_prep_free_cowblocks(
+ */
+ if (!sync && inode_is_open_for_write(VFS_I(ip)))
+ return false;
+- if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
+- mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+- mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
+- atomic_read(&VFS_I(ip)->i_dio_count))
+- return false;
+-
+- return true;
++ return xfs_can_free_cowblocks(ip);
+ }
+
+ /*
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -1600,6 +1600,9 @@ xfs_reflink_clear_inode_flag(
+
+ ASSERT(xfs_is_reflink_inode(ip));
+
++ if (!xfs_can_free_cowblocks(ip))
++ return 0;
++
+ error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
+ if (error || needs_flag)
+ return error;
+--- a/fs/xfs/xfs_reflink.h
++++ b/fs/xfs/xfs_reflink.h
+@@ -16,6 +16,25 @@ static inline bool xfs_is_cow_inode(stru
+ return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
+ }
+
++/*
++ * Check whether it is safe to free COW fork blocks from an inode. It is unsafe
++ * to do so when an inode has dirty cache or I/O in-flight, even if no shared
++ * extents exist in the data fork, because outstanding I/O may target blocks
++ * that were speculatively allocated to the COW fork.
++ */
++static inline bool
++xfs_can_free_cowblocks(struct xfs_inode *ip)
++{
++ struct inode *inode = VFS_I(ip);
++
++ if ((inode->i_state & I_DIRTY_PAGES) ||
++ mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
++ mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
++ atomic_read(&inode->i_dio_count))
++ return false;
++ return true;
++}
++
+ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
+ struct xfs_bmbt_irec *irec, bool *shared);
+ int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
--- /dev/null
+From stable+bounces-113978-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:07 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:13 -0800
+Subject: xfs: don't ifdef around the exact minlen allocations
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-13-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b611fddc0435738e64453bbf1dadd4b12a801858 upstream.
+
+Exact minlen allocations only exist as an error injection tool for debug
+builds. Currently this is implemented using ifdefs, which means the code
+isn't even compiled for non-XFS_DEBUG builds. Enhance the compile test
+coverage by always building the code and use the compilers' dead code
+elimination to remove it from the generated binary instead.
+
+The only downside is that the alloc_minlen_only field is unconditionally
+added to struct xfs_alloc_args now, but by moving it around and packing
+it tightly this doesn't actually increase the size of the structure.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 7 ++-----
+ fs/xfs/libxfs/xfs_alloc.h | 4 +---
+ fs/xfs/libxfs/xfs_bmap.c | 6 ------
+ 3 files changed, 3 insertions(+), 14 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2581,7 +2581,6 @@ __xfs_free_extent_later(
+ return 0;
+ }
+
+-#ifdef DEBUG
+ /*
+ * Check if an AGF has a free extent record whose length is equal to
+ * args->minlen.
+@@ -2620,7 +2619,6 @@ out:
+
+ return error;
+ }
+-#endif
+
+ /*
+ * Decide whether to use this allocation group for this allocation.
+@@ -2694,15 +2692,14 @@ xfs_alloc_fix_freelist(
+ if (!xfs_alloc_space_available(args, need, alloc_flags))
+ goto out_agbp_relse;
+
+-#ifdef DEBUG
+- if (args->alloc_minlen_only) {
++ if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) {
+ int stat;
+
+ error = xfs_exact_minlen_extent_available(args, agbp, &stat);
+ if (error || !stat)
+ goto out_agbp_relse;
+ }
+-#endif
++
+ /*
+ * Make the freelist shorter if it's too long.
+ *
+--- a/fs/xfs/libxfs/xfs_alloc.h
++++ b/fs/xfs/libxfs/xfs_alloc.h
+@@ -53,11 +53,9 @@ typedef struct xfs_alloc_arg {
+ int datatype; /* mask defining data type treatment */
+ char wasdel; /* set if allocation was prev delayed */
+ char wasfromfl; /* set if allocation is from freelist */
++ bool alloc_minlen_only; /* allocate exact minlen extent */
+ struct xfs_owner_info oinfo; /* owner of blocks being allocated */
+ enum xfs_ag_resv_type resv; /* block reservation to use */
+-#ifdef DEBUG
+- bool alloc_minlen_only; /* allocate exact minlen extent */
+-#endif
+ } xfs_alloc_arg_t;
+
+ /*
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3388,7 +3388,6 @@ xfs_bmap_process_allocated_extent(
+ xfs_bmap_btalloc_accounting(ap, args);
+ }
+
+-#ifdef DEBUG
+ static int
+ xfs_bmap_exact_minlen_extent_alloc(
+ struct xfs_bmalloca *ap)
+@@ -3450,11 +3449,6 @@ xfs_bmap_exact_minlen_extent_alloc(
+
+ return 0;
+ }
+-#else
+-
+-#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
+-
+-#endif
+
+ /*
+ * If we are not low on available data blocks and we are allocating at
--- /dev/null
+From stable+bounces-113990-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:37 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:21 -0800
+Subject: xfs: don't use __GFP_RETRY_MAYFAIL in xfs_initialize_perag
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-21-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 069cf5e32b700f94c6ac60f6171662bdfb04f325 upstream.
+
+[backport: uses kmem_zalloc instead of kzalloc]
+
+__GFP_RETRY_MAYFAIL increases the likelyhood of allocations to fail,
+which isn't really helpful during log recovery. Remove the flag and
+stick to the default GFP_KERNEL policies.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -370,7 +370,7 @@ xfs_initialize_perag(
+ int error;
+
+ for (index = old_agcount; index < new_agcount; index++) {
+- pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
++ pag = kmem_zalloc(sizeof(*pag), 0);
+ if (!pag) {
+ error = -ENOMEM;
+ goto out_unwind_new_pags;
--- /dev/null
+From stable+bounces-113985-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:18 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:20 -0800
+Subject: xfs: error out when a superblock buffer update reduces the agcount
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-20-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b882b0f8138ffa935834e775953f1630f89bbb62 upstream.
+
+XFS currently does not support reducing the agcount, so error out if
+a logged sb buffer tries to shrink the agcount.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf_item_recover.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/xfs/xfs_buf_item_recover.c
++++ b/fs/xfs/xfs_buf_item_recover.c
+@@ -713,6 +713,11 @@ xlog_recover_do_primary_sb_buffer(
+ */
+ xfs_sb_from_disk(&mp->m_sb, dsb);
+
++ if (mp->m_sb.sb_agcount < orig_agcount) {
++ xfs_alert(mp, "Shrinking AG count in log recovery not supported");
++ return -EFSCORRUPTED;
++ }
++
+ /*
+ * Initialize the new perags, and also update various block and inode
+ * allocator setting based off the number of AGs or total blocks.
--- /dev/null
+From stable+bounces-113969-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:47 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:04 -0800
+Subject: xfs: fix a sloppy memory handling bug in xfs_iroot_realloc
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-4-catherine.hoang@oracle.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit de55149b6639e903c4d06eb0474ab2c05060e61d upstream.
+
+While refactoring code, I noticed that when xfs_iroot_realloc tries to
+shrink a bmbt root block, it allocates a smaller new block and then
+copies "records" and pointers to the new block. However, bmbt root
+blocks cannot ever be leaves, which means that it's not technically
+correct to copy records. We /should/ be copying keys.
+
+Note that this has never resulted in actual memory corruption because
+sizeof(bmbt_rec) == (sizeof(bmbt_key) + sizeof(bmbt_ptr)). However,
+this will no longer be true when we start adding realtime rmap stuff,
+so fix this now.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_inode_fork.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -449,15 +449,15 @@ xfs_iroot_realloc(
+ }
+
+ /*
+- * Only copy the records and pointers if there are any.
++ * Only copy the keys and pointers if there are any.
+ */
+ if (new_max > 0) {
+ /*
+- * First copy the records.
++ * First copy the keys.
+ */
+- op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
+- np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
+- memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
++ op = (char *)XFS_BMBT_KEY_ADDR(mp, ifp->if_broot, 1);
++ np = (char *)XFS_BMBT_KEY_ADDR(mp, new_broot, 1);
++ memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_key_t));
+
+ /*
+ * Then copy the pointers.
--- /dev/null
+From stable+bounces-113970-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:52 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:05 -0800
+Subject: xfs: fix a typo
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-5-catherine.hoang@oracle.com>
+
+From: Andrew Kreimer <algonell@gmail.com>
+
+commit 77bfe1b11ea0c0c4b0ce19b742cd1aa82f60e45d upstream.
+
+Fix a typo in comments.
+
+Signed-off-by: Andrew Kreimer <algonell@gmail.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_recover.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -1820,7 +1820,7 @@ xlog_find_item_ops(
+ * from the transaction. However, we can't do that until after we've
+ * replayed all the other items because they may be dependent on the
+ * cancelled buffer and replaying the cancelled buffer can remove it
+- * form the cancelled buffer table. Hence they have tobe done last.
++ * form the cancelled buffer table. Hence they have to be done last.
+ *
+ * 3. Inode allocation buffers must be replayed before inode items that
+ * read the buffer and replay changes into it. For filesystems using the
--- /dev/null
+From stable+bounces-113977-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:01 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:12 -0800
+Subject: xfs: fold xfs_bmap_alloc_userdata into xfs_bmapi_allocate
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-12-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 865469cd41bce2b04bef9539cbf70676878bc8df upstream.
+
+[backport: dependency of 6aac770]
+
+Userdata and metadata allocations end up in the same allocation helpers.
+Remove the separate xfs_bmap_alloc_userdata function to make this more
+clear.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 73 ++++++++++++++++++-----------------------------
+ 1 file changed, 28 insertions(+), 45 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4078,43 +4078,6 @@ out:
+ }
+
+ static int
+-xfs_bmap_alloc_userdata(
+- struct xfs_bmalloca *bma)
+-{
+- struct xfs_mount *mp = bma->ip->i_mount;
+- int whichfork = xfs_bmapi_whichfork(bma->flags);
+- int error;
+-
+- /*
+- * Set the data type being allocated. For the data fork, the first data
+- * in the file is treated differently to all other allocations. For the
+- * attribute fork, we only need to ensure the allocated range is not on
+- * the busy list.
+- */
+- bma->datatype = XFS_ALLOC_NOBUSY;
+- if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
+- bma->datatype |= XFS_ALLOC_USERDATA;
+- if (bma->offset == 0)
+- bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+-
+- if (mp->m_dalign && bma->length >= mp->m_dalign) {
+- error = xfs_bmap_isaeof(bma, whichfork);
+- if (error)
+- return error;
+- }
+-
+- if (XFS_IS_REALTIME_INODE(bma->ip))
+- return xfs_bmap_rtalloc(bma);
+- }
+-
+- if (unlikely(XFS_TEST_ERROR(false, mp,
+- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+- return xfs_bmap_exact_minlen_extent_alloc(bma);
+-
+- return xfs_bmap_btalloc(bma);
+-}
+-
+-static int
+ xfs_bmapi_allocate(
+ struct xfs_bmalloca *bma)
+ {
+@@ -4147,15 +4110,35 @@ xfs_bmapi_allocate(
+ else
+ bma->minlen = 1;
+
+- if (bma->flags & XFS_BMAPI_METADATA) {
+- if (unlikely(XFS_TEST_ERROR(false, mp,
+- XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+- error = xfs_bmap_exact_minlen_extent_alloc(bma);
+- else
+- error = xfs_bmap_btalloc(bma);
+- } else {
+- error = xfs_bmap_alloc_userdata(bma);
++ if (!(bma->flags & XFS_BMAPI_METADATA)) {
++ /*
++ * For the data and COW fork, the first data in the file is
++ * treated differently to all other allocations. For the
++ * attribute fork, we only need to ensure the allocated range
++ * is not on the busy list.
++ */
++ bma->datatype = XFS_ALLOC_NOBUSY;
++ if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
++ bma->datatype |= XFS_ALLOC_USERDATA;
++ if (bma->offset == 0)
++ bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
++
++ if (mp->m_dalign && bma->length >= mp->m_dalign) {
++ error = xfs_bmap_isaeof(bma, whichfork);
++ if (error)
++ return error;
++ }
++ }
+ }
++
++ if ((bma->datatype & XFS_ALLOC_USERDATA) &&
++ XFS_IS_REALTIME_INODE(bma->ip))
++ error = xfs_bmap_rtalloc(bma);
++ else if (unlikely(XFS_TEST_ERROR(false, mp,
++ XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
++ error = xfs_bmap_exact_minlen_extent_alloc(bma);
++ else
++ error = xfs_bmap_btalloc(bma);
+ if (error)
+ return error;
+ if (bma->blkno == NULLFSBLOCK)
--- /dev/null
+From stable+bounces-113973-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:56 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:08 -0800
+Subject: xfs: merge xfs_attr_leaf_try_add into xfs_attr_leaf_addname
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-8-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b1c649da15c2e4c86344c8e5af69c8afa215efec upstream.
+
+[backport: dependency of a5f7334 and b3f4e84]
+
+xfs_attr_leaf_try_add is only called by xfs_attr_leaf_addname, and
+merging the two will simplify a following error handling fix.
+
+To facilitate this move the remote block state save/restore helpers up in
+the file so that they don't need forward declarations now.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c | 176 +++++++++++++++++++----------------------------
+ 1 file changed, 74 insertions(+), 102 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -50,7 +50,6 @@ STATIC int xfs_attr_shortform_addname(xf
+ STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
+ STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
+ STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
+-STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
+
+ /*
+ * Internal routines when attribute list is more than one block.
+@@ -401,6 +400,33 @@ out:
+ return error;
+ }
+
++/* Save the current remote block info and clear the current pointers. */
++static void
++xfs_attr_save_rmt_blk(
++ struct xfs_da_args *args)
++{
++ args->blkno2 = args->blkno;
++ args->index2 = args->index;
++ args->rmtblkno2 = args->rmtblkno;
++ args->rmtblkcnt2 = args->rmtblkcnt;
++ args->rmtvaluelen2 = args->rmtvaluelen;
++ args->rmtblkno = 0;
++ args->rmtblkcnt = 0;
++ args->rmtvaluelen = 0;
++}
++
++/* Set stored info about a remote block */
++static void
++xfs_attr_restore_rmt_blk(
++ struct xfs_da_args *args)
++{
++ args->blkno = args->blkno2;
++ args->index = args->index2;
++ args->rmtblkno = args->rmtblkno2;
++ args->rmtblkcnt = args->rmtblkcnt2;
++ args->rmtvaluelen = args->rmtvaluelen2;
++}
++
+ /*
+ * Handle the state change on completion of a multi-state attr operation.
+ *
+@@ -428,49 +454,77 @@ xfs_attr_complete_op(
+ return XFS_DAS_DONE;
+ }
+
++/*
++ * Try to add an attribute to an inode in leaf form.
++ */
+ static int
+ xfs_attr_leaf_addname(
+ struct xfs_attr_intent *attr)
+ {
+ struct xfs_da_args *args = attr->xattri_da_args;
++ struct xfs_buf *bp;
+ int error;
+
+ ASSERT(xfs_attr_is_leaf(args->dp));
+
++ error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
++ if (error)
++ return error;
++
+ /*
+- * Use the leaf buffer we may already hold locked as a result of
+- * a sf-to-leaf conversion.
++ * Look up the xattr name to set the insertion point for the new xattr.
+ */
+- error = xfs_attr_leaf_try_add(args);
+-
+- if (error == -ENOSPC) {
+- error = xfs_attr3_leaf_to_node(args);
+- if (error)
+- return error;
++ error = xfs_attr3_leaf_lookup_int(bp, args);
++ switch (error) {
++ case -ENOATTR:
++ if (args->op_flags & XFS_DA_OP_REPLACE)
++ goto out_brelse;
++ break;
++ case -EEXIST:
++ if (!(args->op_flags & XFS_DA_OP_REPLACE))
++ goto out_brelse;
+
++ trace_xfs_attr_leaf_replace(args);
+ /*
+- * We're not in leaf format anymore, so roll the transaction and
+- * retry the add to the newly allocated node block.
++ * Save the existing remote attr state so that the current
++ * values reflect the state of the new attribute we are about to
++ * add, not the attribute we just found and will remove later.
+ */
+- attr->xattri_dela_state = XFS_DAS_NODE_ADD;
+- goto out;
++ xfs_attr_save_rmt_blk(args);
++ break;
++ case 0:
++ break;
++ default:
++ goto out_brelse;
+ }
+- if (error)
+- return error;
+
+ /*
+ * We need to commit and roll if we need to allocate remote xattr blocks
+ * or perform more xattr manipulations. Otherwise there is nothing more
+ * to do and we can return success.
+ */
+- if (args->rmtblkno)
++ error = xfs_attr3_leaf_add(bp, args);
++ if (error) {
++ if (error != -ENOSPC)
++ return error;
++ error = xfs_attr3_leaf_to_node(args);
++ if (error)
++ return error;
++
++ attr->xattri_dela_state = XFS_DAS_NODE_ADD;
++ } else if (args->rmtblkno) {
+ attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT;
+- else
+- attr->xattri_dela_state = xfs_attr_complete_op(attr,
+- XFS_DAS_LEAF_REPLACE);
+-out:
++ } else {
++ attr->xattri_dela_state =
++ xfs_attr_complete_op(attr, XFS_DAS_LEAF_REPLACE);
++ }
++
+ trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
+ return error;
++
++out_brelse:
++ xfs_trans_brelse(args->trans, bp);
++ return error;
+ }
+
+ /*
+@@ -1164,88 +1218,6 @@ xfs_attr_shortform_addname(
+ * External routines when attribute list is one block
+ *========================================================================*/
+
+-/* Save the current remote block info and clear the current pointers. */
+-static void
+-xfs_attr_save_rmt_blk(
+- struct xfs_da_args *args)
+-{
+- args->blkno2 = args->blkno;
+- args->index2 = args->index;
+- args->rmtblkno2 = args->rmtblkno;
+- args->rmtblkcnt2 = args->rmtblkcnt;
+- args->rmtvaluelen2 = args->rmtvaluelen;
+- args->rmtblkno = 0;
+- args->rmtblkcnt = 0;
+- args->rmtvaluelen = 0;
+-}
+-
+-/* Set stored info about a remote block */
+-static void
+-xfs_attr_restore_rmt_blk(
+- struct xfs_da_args *args)
+-{
+- args->blkno = args->blkno2;
+- args->index = args->index2;
+- args->rmtblkno = args->rmtblkno2;
+- args->rmtblkcnt = args->rmtblkcnt2;
+- args->rmtvaluelen = args->rmtvaluelen2;
+-}
+-
+-/*
+- * Tries to add an attribute to an inode in leaf form
+- *
+- * This function is meant to execute as part of a delayed operation and leaves
+- * the transaction handling to the caller. On success the attribute is added
+- * and the inode and transaction are left dirty. If there is not enough space,
+- * the attr data is converted to node format and -ENOSPC is returned. Caller is
+- * responsible for handling the dirty inode and transaction or adding the attr
+- * in node format.
+- */
+-STATIC int
+-xfs_attr_leaf_try_add(
+- struct xfs_da_args *args)
+-{
+- struct xfs_buf *bp;
+- int error;
+-
+- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+- if (error)
+- return error;
+-
+- /*
+- * Look up the xattr name to set the insertion point for the new xattr.
+- */
+- error = xfs_attr3_leaf_lookup_int(bp, args);
+- switch (error) {
+- case -ENOATTR:
+- if (args->op_flags & XFS_DA_OP_REPLACE)
+- goto out_brelse;
+- break;
+- case -EEXIST:
+- if (!(args->op_flags & XFS_DA_OP_REPLACE))
+- goto out_brelse;
+-
+- trace_xfs_attr_leaf_replace(args);
+- /*
+- * Save the existing remote attr state so that the current
+- * values reflect the state of the new attribute we are about to
+- * add, not the attribute we just found and will remove later.
+- */
+- xfs_attr_save_rmt_blk(args);
+- break;
+- case 0:
+- break;
+- default:
+- goto out_brelse;
+- }
+-
+- return xfs_attr3_leaf_add(bp, args);
+-
+-out_brelse:
+- xfs_trans_brelse(args->trans, bp);
+- return error;
+-}
+-
+ /*
+ * Return EEXIST if attr is found, or ENOATTR if not
+ */
--- /dev/null
+From stable+bounces-113983-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:16 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:18 -0800
+Subject: xfs: pass the exact range to initialize to xfs_initialize_perag
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-18-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 82742f8c3f1a93787a05a00aca50c2a565231f84 upstream.
+
+[backport: dependency of 6a18765b]
+
+Currently only the new agcount is passed to xfs_initialize_perag, which
+requires lookups of existing AGs to skip them and complicates error
+handling. Also pass the previous agcount so that the range that
+xfs_initialize_perag operates on is exactly defined. That way the
+extra lookups can be avoided, and error handling can clean up the
+exact range from the old count to the last added perag structure.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c | 28 ++++++----------------------
+ fs/xfs/libxfs/xfs_ag.h | 5 +++--
+ fs/xfs/xfs_fsops.c | 18 ++++++++----------
+ fs/xfs/xfs_log_recover.c | 5 +++--
+ fs/xfs/xfs_mount.c | 4 ++--
+ 5 files changed, 22 insertions(+), 38 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -360,27 +360,16 @@ xfs_free_unused_perag_range(
+ int
+ xfs_initialize_perag(
+ struct xfs_mount *mp,
+- xfs_agnumber_t agcount,
++ xfs_agnumber_t old_agcount,
++ xfs_agnumber_t new_agcount,
+ xfs_rfsblock_t dblocks,
+ xfs_agnumber_t *maxagi)
+ {
+ struct xfs_perag *pag;
+ xfs_agnumber_t index;
+- xfs_agnumber_t first_initialised = NULLAGNUMBER;
+ int error;
+
+- /*
+- * Walk the current per-ag tree so we don't try to initialise AGs
+- * that already exist (growfs case). Allocate and insert all the
+- * AGs we don't find ready for initialisation.
+- */
+- for (index = 0; index < agcount; index++) {
+- pag = xfs_perag_get(mp, index);
+- if (pag) {
+- xfs_perag_put(pag);
+- continue;
+- }
+-
++ for (index = old_agcount; index < new_agcount; index++) {
+ pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
+ if (!pag) {
+ error = -ENOMEM;
+@@ -425,21 +414,17 @@ xfs_initialize_perag(
+ /* Active ref owned by mount indicates AG is online. */
+ atomic_set(&pag->pag_active_ref, 1);
+
+- /* first new pag is fully initialized */
+- if (first_initialised == NULLAGNUMBER)
+- first_initialised = index;
+-
+ /*
+ * Pre-calculated geometry
+ */
+- pag->block_count = __xfs_ag_block_count(mp, index, agcount,
++ pag->block_count = __xfs_ag_block_count(mp, index, new_agcount,
+ dblocks);
+ pag->min_block = XFS_AGFL_BLOCK(mp);
+ __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
+ &pag->agino_max);
+ }
+
+- index = xfs_set_inode_alloc(mp, agcount);
++ index = xfs_set_inode_alloc(mp, new_agcount);
+
+ if (maxagi)
+ *maxagi = index;
+@@ -455,8 +440,7 @@ out_remove_pag:
+ out_free_pag:
+ kmem_free(pag);
+ out_unwind_new_pags:
+- /* unwind any prior newly initialized pags */
+- xfs_free_unused_perag_range(mp, first_initialised, agcount);
++ xfs_free_unused_perag_range(mp, old_agcount, index);
+ return error;
+ }
+
+--- a/fs/xfs/libxfs/xfs_ag.h
++++ b/fs/xfs/libxfs/xfs_ag.h
+@@ -135,8 +135,9 @@ __XFS_AG_OPSTATE(agfl_needs_reset, AGFL_
+
+ void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart,
+ xfs_agnumber_t agend);
+-int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
+- xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
++int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t old_agcount,
++ xfs_agnumber_t agcount, xfs_rfsblock_t dcount,
++ xfs_agnumber_t *maxagi);
+ int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
+ void xfs_free_perag(struct xfs_mount *mp);
+
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -87,6 +87,7 @@ xfs_growfs_data_private(
+ struct xfs_mount *mp, /* mount point for filesystem */
+ struct xfs_growfs_data *in) /* growfs data input struct */
+ {
++ xfs_agnumber_t oagcount = mp->m_sb.sb_agcount;
+ struct xfs_buf *bp;
+ int error;
+ xfs_agnumber_t nagcount;
+@@ -94,7 +95,6 @@ xfs_growfs_data_private(
+ xfs_rfsblock_t nb, nb_div, nb_mod;
+ int64_t delta;
+ bool lastag_extended = false;
+- xfs_agnumber_t oagcount;
+ struct xfs_trans *tp;
+ struct aghdr_init_data id = {};
+ struct xfs_perag *last_pag;
+@@ -138,16 +138,14 @@ xfs_growfs_data_private(
+ if (delta == 0)
+ return 0;
+
+- oagcount = mp->m_sb.sb_agcount;
+- /* allocate the new per-ag structures */
+- if (nagcount > oagcount) {
+- error = xfs_initialize_perag(mp, nagcount, nb, &nagimax);
+- if (error)
+- return error;
+- } else if (nagcount < oagcount) {
+- /* TODO: shrinking the entire AGs hasn't yet completed */
++ /* TODO: shrinking the entire AGs hasn't yet completed */
++ if (nagcount < oagcount)
+ return -EINVAL;
+- }
++
++ /* allocate the new per-ag structures */
++ error = xfs_initialize_perag(mp, oagcount, nagcount, nb, &nagimax);
++ if (error)
++ return error;
+
+ if (delta > 0)
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -3317,6 +3317,7 @@ xlog_do_recover(
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_buf *bp = mp->m_sb_bp;
+ struct xfs_sb *sbp = &mp->m_sb;
++ xfs_agnumber_t orig_agcount = sbp->sb_agcount;
+ int error;
+
+ trace_xfs_log_recover(log, head_blk, tail_blk);
+@@ -3365,8 +3366,8 @@ xlog_do_recover(
+ /* re-initialise in-core superblock and geometry structures */
+ mp->m_features |= xfs_sb_version_to_features(sbp);
+ xfs_reinit_percpu_counters(mp);
+- error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks,
+- &mp->m_maxagi);
++ error = xfs_initialize_perag(mp, orig_agcount, sbp->sb_agcount,
++ sbp->sb_dblocks, &mp->m_maxagi);
+ if (error) {
+ xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
+ return error;
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -797,8 +797,8 @@ xfs_mountfs(
+ /*
+ * Allocate and initialize the per-ag data.
+ */
+- error = xfs_initialize_perag(mp, sbp->sb_agcount, mp->m_sb.sb_dblocks,
+- &mp->m_maxagi);
++ error = xfs_initialize_perag(mp, 0, sbp->sb_agcount,
++ mp->m_sb.sb_dblocks, &mp->m_maxagi);
+ if (error) {
+ xfs_warn(mp, "Failed per-ag init: %d", error);
+ goto out_free_dir;
--- /dev/null
+From stable+bounces-113987-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:28 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:23 -0800
+Subject: xfs: Reduce unnecessary searches when searching for the best extents
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-23-catherine.hoang@oracle.com>
+
+From: Chi Zhiling <chizhiling@kylinos.cn>
+
+commit 3ef22684038aa577c10972ee9c6a2455f5fac941 upstream.
+
+Recently, we found that the CPU spent a lot of time in
+xfs_alloc_ag_vextent_size when the filesystem has millions of fragmented
+spaces.
+
+The reason is that we conducted much extra searching for extents that
+could not yield a better result, and these searches would cost a lot of
+time when there were millions of extents to search through. Even if we
+get the same result length, we don't switch our choice to the new one,
+so we can definitely terminate the search early.
+
+Since the result length cannot exceed the found length, when the found
+length equals the best result length we already have, we can conclude
+the search.
+
+We did a test in that filesystem:
+[root@localhost ~]# xfs_db -c freesp /dev/vdb
+ from to extents blocks pct
+ 1 1 215 215 0.01
+ 2 3 994476 1988952 99.99
+
+Before this patch:
+ 0) | xfs_alloc_ag_vextent_size [xfs]() {
+ 0) * 15597.94 us | }
+
+After this patch:
+ 0) | xfs_alloc_ag_vextent_size [xfs]() {
+ 0) 19.176 us | }
+
+Signed-off-by: Chi Zhiling <chizhiling@kylinos.cn>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -1783,7 +1783,7 @@ restart:
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
+- if (flen < bestrlen)
++ if (flen <= bestrlen)
+ break;
+ busy = xfs_alloc_compute_aligned(args, fbno, flen,
+ &rbno, &rlen, &busy_gen);
--- /dev/null
+From stable+bounces-113982-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:13 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:17 -0800
+Subject: xfs: Remove empty declartion in header file
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-17-catherine.hoang@oracle.com>
+
+From: Zhang Zekun <zhangzekun11@huawei.com>
+
+commit f6225eebd76f371dab98b4d1c1a7c1e255190aef upstream.
+
+The definition of xfs_attr_use_log_assist() has been removed since
+commit d9c61ccb3b09 ("xfs: move xfs_attr_use_log_assist out of xfs_log.c").
+So, Remove the empty declartion in header files.
+
+Signed-off-by: Zhang Zekun <zhangzekun11@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log.h | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -161,6 +161,5 @@ bool xlog_force_shutdown(struct xlog *
+
+ void xlog_use_incompat_feat(struct xlog *log);
+ void xlog_drop_incompat_feat(struct xlog *log);
+-int xfs_attr_use_log_assist(struct xfs_mount *mp);
+
+ #endif /* __XFS_LOG_H__ */
--- /dev/null
+From stable+bounces-113974-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:56 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:09 -0800
+Subject: xfs: return bool from xfs_attr3_leaf_add
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-9-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 346c1d46d4c631c0c88592d371f585214d714da4 upstream.
+
+[backport: dependency of a5f7334 and b3f4e84]
+
+xfs_attr3_leaf_add only has two potential return values, indicating if the
+entry could be added or not. Replace the errno return with a bool so that
+ENOSPC from it can't easily be confused with a real ENOSPC.
+
+Remove the return value from the xfs_attr3_leaf_add_work helper entirely,
+as it always return 0.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c | 13 +++++--------
+ fs/xfs/libxfs/xfs_attr_leaf.c | 37 +++++++++++++++++++------------------
+ fs/xfs/libxfs/xfs_attr_leaf.h | 2 +-
+ 3 files changed, 25 insertions(+), 27 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -503,10 +503,7 @@ xfs_attr_leaf_addname(
+ * or perform more xattr manipulations. Otherwise there is nothing more
+ * to do and we can return success.
+ */
+- error = xfs_attr3_leaf_add(bp, args);
+- if (error) {
+- if (error != -ENOSPC)
+- return error;
++ if (!xfs_attr3_leaf_add(bp, args)) {
+ error = xfs_attr3_leaf_to_node(args);
+ if (error)
+ return error;
+@@ -520,7 +517,7 @@ xfs_attr_leaf_addname(
+ }
+
+ trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
+- return error;
++ return 0;
+
+ out_brelse:
+ xfs_trans_brelse(args->trans, bp);
+@@ -1393,21 +1390,21 @@ xfs_attr_node_try_addname(
+ {
+ struct xfs_da_state *state = attr->xattri_da_state;
+ struct xfs_da_state_blk *blk;
+- int error;
++ int error = 0;
+
+ trace_xfs_attr_node_addname(state->args);
+
+ blk = &state->path.blk[state->path.active-1];
+ ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+
+- error = xfs_attr3_leaf_add(blk->bp, state->args);
+- if (error == -ENOSPC) {
++ if (!xfs_attr3_leaf_add(blk->bp, state->args)) {
+ if (state->path.active == 1) {
+ /*
+ * Its really a single leaf node, but it had
+ * out-of-line values so it looked like it *might*
+ * have been a b-tree. Let the caller deal with this.
+ */
++ error = -ENOSPC;
+ goto out;
+ }
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -46,7 +46,7 @@
+ */
+ STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
+ xfs_dablk_t which_block, struct xfs_buf **bpp);
+-STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
++STATIC void xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
+ struct xfs_attr3_icleaf_hdr *ichdr,
+ struct xfs_da_args *args, int freemap_index);
+ STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
+@@ -990,10 +990,8 @@ xfs_attr_shortform_to_leaf(
+ }
+ error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
+ ASSERT(error == -ENOATTR);
+- error = xfs_attr3_leaf_add(bp, &nargs);
+- ASSERT(error != -ENOSPC);
+- if (error)
+- goto out;
++ if (!xfs_attr3_leaf_add(bp, &nargs))
++ ASSERT(0);
+ sfe = xfs_attr_sf_nextentry(sfe);
+ }
+ error = 0;
+@@ -1349,8 +1347,9 @@ xfs_attr3_leaf_split(
+ struct xfs_da_state_blk *oldblk,
+ struct xfs_da_state_blk *newblk)
+ {
+- xfs_dablk_t blkno;
+- int error;
++ bool added;
++ xfs_dablk_t blkno;
++ int error;
+
+ trace_xfs_attr_leaf_split(state->args);
+
+@@ -1385,10 +1384,10 @@ xfs_attr3_leaf_split(
+ */
+ if (state->inleaf) {
+ trace_xfs_attr_leaf_add_old(state->args);
+- error = xfs_attr3_leaf_add(oldblk->bp, state->args);
++ added = xfs_attr3_leaf_add(oldblk->bp, state->args);
+ } else {
+ trace_xfs_attr_leaf_add_new(state->args);
+- error = xfs_attr3_leaf_add(newblk->bp, state->args);
++ added = xfs_attr3_leaf_add(newblk->bp, state->args);
+ }
+
+ /*
+@@ -1396,13 +1395,15 @@ xfs_attr3_leaf_split(
+ */
+ oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
+ newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
+- return error;
++ if (!added)
++ return -ENOSPC;
++ return 0;
+ }
+
+ /*
+ * Add a name to the leaf attribute list structure.
+ */
+-int
++bool
+ xfs_attr3_leaf_add(
+ struct xfs_buf *bp,
+ struct xfs_da_args *args)
+@@ -1411,6 +1412,7 @@ xfs_attr3_leaf_add(
+ struct xfs_attr3_icleaf_hdr ichdr;
+ int tablesize;
+ int entsize;
++ bool added = true;
+ int sum;
+ int tmp;
+ int i;
+@@ -1439,7 +1441,7 @@ xfs_attr3_leaf_add(
+ if (ichdr.freemap[i].base < ichdr.firstused)
+ tmp += sizeof(xfs_attr_leaf_entry_t);
+ if (ichdr.freemap[i].size >= tmp) {
+- tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
++ xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
+ goto out_log_hdr;
+ }
+ sum += ichdr.freemap[i].size;
+@@ -1451,7 +1453,7 @@ xfs_attr3_leaf_add(
+ * no good and we should just give up.
+ */
+ if (!ichdr.holes && sum < entsize)
+- return -ENOSPC;
++ return false;
+
+ /*
+ * Compact the entries to coalesce free space.
+@@ -1464,24 +1466,24 @@ xfs_attr3_leaf_add(
+ * free region, in freemap[0]. If it is not big enough, give up.
+ */
+ if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
+- tmp = -ENOSPC;
++ added = false;
+ goto out_log_hdr;
+ }
+
+- tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
++ xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
+
+ out_log_hdr:
+ xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
+ xfs_trans_log_buf(args->trans, bp,
+ XFS_DA_LOGRANGE(leaf, &leaf->hdr,
+ xfs_attr3_leaf_hdr_size(leaf)));
+- return tmp;
++ return added;
+ }
+
+ /*
+ * Add a name to a leaf attribute list structure.
+ */
+-STATIC int
++STATIC void
+ xfs_attr3_leaf_add_work(
+ struct xfs_buf *bp,
+ struct xfs_attr3_icleaf_hdr *ichdr,
+@@ -1599,7 +1601,6 @@ xfs_attr3_leaf_add_work(
+ }
+ }
+ ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
+- return 0;
+ }
+
+ /*
+--- a/fs/xfs/libxfs/xfs_attr_leaf.h
++++ b/fs/xfs/libxfs/xfs_attr_leaf.h
+@@ -78,7 +78,7 @@ int xfs_attr3_leaf_split(struct xfs_da_s
+ int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
+ struct xfs_da_args *args);
+ int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
+-int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
++bool xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
+ struct xfs_da_args *args);
+ int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
+ struct xfs_da_args *args);
--- /dev/null
+From stable+bounces-113971-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:53 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:06 -0800
+Subject: xfs: skip background cowblock trims on inodes open for write
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-6-catherine.hoang@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 90a71daaf73f5d39bb0cbb3c7ab6af942fe6233e upstream.
+
+The background blockgc scanner runs on a 5m interval by default and
+trims preallocation (post-eof and cow fork) from inodes that are
+otherwise idle. Idle effectively means that iolock can be acquired
+without blocking and that the inode has no dirty pagecache or I/O in
+flight.
+
+This simple mechanism and heuristic has worked fairly well for
+post-eof speculative preallocations. Support for reflink and COW
+fork preallocations came sometime later and plugged into the same
+mechanism, with similar heuristics. Some recent testing has shown
+that COW fork preallocation may be notably more sensitive to blockgc
+processing than post-eof preallocation, however.
+
+For example, consider an 8GB reflinked file with a COW extent size
+hint of 1MB. A worst case fully randomized overwrite of this file
+results in ~8k extents of an average size of ~1MB. If the same
+workload is interrupted a couple times for blockgc processing
+(assuming the file goes idle), the resulting extent count explodes
+to over 100k extents with an average size <100kB. This is
+significantly worse than ideal and essentially defeats the COW
+extent size hint mechanism.
+
+While this particular test is instrumented, it reflects a fairly
+reasonable pattern in practice where random I/Os might spread out
+over a large period of time with varying periods of (in)activity.
+For example, consider a cloned disk image file for a VM or container
+with long uptime and variable and bursty usage. A background blockgc
+scan that races and processes the image file when it happens to be
+clean and idle can have a significant effect on the future
+fragmentation level of the file, even when still in use.
+
+To help combat this, update the heuristic to skip cowblocks inodes
+that are currently opened for write access during non-sync blockgc
+scans. This allows COW fork preallocations to persist for as long as
+possible unless otherwise needed for functional purposes (i.e. a
+sync scan), the file is idle and closed, or the inode is being
+evicted from cache. While here, update the comments to help
+distinguish performance oriented heuristics from the logic that
+exists to maintain functional correctness.
+
+Suggested-by: Darrick Wong <djwong@kernel.org>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c | 31 +++++++++++++++++++++++--------
+ 1 file changed, 23 insertions(+), 8 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1234,14 +1234,17 @@ xfs_inode_clear_eofblocks_tag(
+ }
+
+ /*
+- * Set ourselves up to free CoW blocks from this file. If it's already clean
+- * then we can bail out quickly, but otherwise we must back off if the file
+- * is undergoing some kind of write.
++ * Prepare to free COW fork blocks from an inode.
+ */
+ static bool
+ xfs_prep_free_cowblocks(
+- struct xfs_inode *ip)
++ struct xfs_inode *ip,
++ struct xfs_icwalk *icw)
+ {
++ bool sync;
++
++ sync = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC);
++
+ /*
+ * Just clear the tag if we have an empty cow fork or none at all. It's
+ * possible the inode was fully unshared since it was originally tagged.
+@@ -1253,9 +1256,21 @@ xfs_prep_free_cowblocks(
+ }
+
+ /*
+- * If the mapping is dirty or under writeback we cannot touch the
+- * CoW fork. Leave it alone if we're in the midst of a directio.
++ * A cowblocks trim of an inode can have a significant effect on
++ * fragmentation even when a reasonable COW extent size hint is set.
++ * Therefore, we prefer to not process cowblocks unless they are clean
++ * and idle. We can never process a cowblocks inode that is dirty or has
++ * in-flight I/O under any circumstances, because outstanding writeback
++ * or dio expects targeted COW fork blocks exist through write
++ * completion where they can be remapped into the data fork.
++ *
++ * Therefore, the heuristic used here is to never process inodes
++ * currently opened for write from background (i.e. non-sync) scans. For
++ * sync scans, use the pagecache/dio state of the inode to ensure we
++ * never free COW fork blocks out from under pending I/O.
+ */
++ if (!sync && inode_is_open_for_write(VFS_I(ip)))
++ return false;
+ if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
+ mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+ mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
+@@ -1291,7 +1306,7 @@ xfs_inode_free_cowblocks(
+ if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS))
+ return 0;
+
+- if (!xfs_prep_free_cowblocks(ip))
++ if (!xfs_prep_free_cowblocks(ip, icw))
+ return 0;
+
+ if (!xfs_icwalk_match(ip, icw))
+@@ -1320,7 +1335,7 @@ xfs_inode_free_cowblocks(
+ * Check again, nobody else should be able to dirty blocks or change
+ * the reflink iflag now that we have the first two locks held.
+ */
+- if (xfs_prep_free_cowblocks(ip))
++ if (xfs_prep_free_cowblocks(ip, icw))
+ ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
+ return ret;
+ }
--- /dev/null
+From stable+bounces-113988-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:29 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:24 -0800
+Subject: xfs: streamline xfs_filestream_pick_ag
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-24-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 81a1e1c32ef474c20ccb9f730afe1ac25b1c62a4 upstream.
+
+Directly return the error from xfs_bmap_longest_free_extent instead
+of breaking from the loop and handling it there, and use a done
+label to directly jump to the exist when we found a suitable perag
+structure to reduce the indentation level and pag/max_pag check
+complexity in the tail of the function.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_filestream.c | 96 +++++++++++++++++++++++-------------------------
+ 1 file changed, 46 insertions(+), 50 deletions(-)
+
+--- a/fs/xfs/xfs_filestream.c
++++ b/fs/xfs/xfs_filestream.c
+@@ -67,22 +67,28 @@ xfs_filestream_pick_ag(
+ xfs_extlen_t minfree, maxfree = 0;
+ xfs_agnumber_t agno;
+ bool first_pass = true;
+- int err;
+
+ /* 2% of an AG's blocks must be free for it to be chosen. */
+ minfree = mp->m_sb.sb_agblocks / 50;
+
+ restart:
+ for_each_perag_wrap(mp, start_agno, agno, pag) {
++ int err;
++
+ trace_xfs_filestream_scan(pag, pino);
++
+ *longest = 0;
+ err = xfs_bmap_longest_free_extent(pag, NULL, longest);
+ if (err) {
+- if (err != -EAGAIN)
+- break;
+- /* Couldn't lock the AGF, skip this AG. */
+- err = 0;
+- continue;
++ if (err == -EAGAIN) {
++ /* Couldn't lock the AGF, skip this AG. */
++ err = 0;
++ continue;
++ }
++ xfs_perag_rele(pag);
++ if (max_pag)
++ xfs_perag_rele(max_pag);
++ return err;
+ }
+
+ /* Keep track of the AG with the most free blocks. */
+@@ -107,7 +113,9 @@ restart:
+ !(flags & XFS_PICK_USERDATA) ||
+ (flags & XFS_PICK_LOWSPACE))) {
+ /* Break out, retaining the reference on the AG. */
+- break;
++ if (max_pag)
++ xfs_perag_rele(max_pag);
++ goto done;
+ }
+ }
+
+@@ -115,56 +123,44 @@ restart:
+ atomic_dec(&pag->pagf_fstrms);
+ }
+
+- if (err) {
+- xfs_perag_rele(pag);
+- if (max_pag)
+- xfs_perag_rele(max_pag);
+- return err;
++ /*
++ * Allow a second pass to give xfs_bmap_longest_free_extent() another
++ * attempt at locking AGFs that it might have skipped over before we
++ * fail.
++ */
++ if (first_pass) {
++ first_pass = false;
++ goto restart;
+ }
+
+- if (!pag) {
+- /*
+- * Allow a second pass to give xfs_bmap_longest_free_extent()
+- * another attempt at locking AGFs that it might have skipped
+- * over before we fail.
+- */
+- if (first_pass) {
+- first_pass = false;
+- goto restart;
+- }
+-
+- /*
+- * We must be low on data space, so run a final lowspace
+- * optimised selection pass if we haven't already.
+- */
+- if (!(flags & XFS_PICK_LOWSPACE)) {
+- flags |= XFS_PICK_LOWSPACE;
+- goto restart;
+- }
+-
+- /*
+- * No unassociated AGs are available, so select the AG with the
+- * most free space, regardless of whether it's already in use by
+- * another filestream. It none suit, just use whatever AG we can
+- * grab.
+- */
+- if (!max_pag) {
+- for_each_perag_wrap(args->mp, 0, start_agno, pag) {
+- max_pag = pag;
+- break;
+- }
++ /*
++ * We must be low on data space, so run a final lowspace optimised
++ * selection pass if we haven't already.
++ */
++ if (!(flags & XFS_PICK_LOWSPACE)) {
++ flags |= XFS_PICK_LOWSPACE;
++ goto restart;
++ }
+
+- /* Bail if there are no AGs at all to select from. */
+- if (!max_pag)
+- return -ENOSPC;
++ /*
++ * No unassociated AGs are available, so select the AG with the most
++ * free space, regardless of whether it's already in use by another
++ * filestream. It none suit, just use whatever AG we can grab.
++ */
++ if (!max_pag) {
++ for_each_perag_wrap(args->mp, 0, start_agno, pag) {
++ max_pag = pag;
++ break;
+ }
+
+- pag = max_pag;
+- atomic_inc(&pag->pagf_fstrms);
+- } else if (max_pag) {
+- xfs_perag_rele(max_pag);
++ /* Bail if there are no AGs at all to select from. */
++ if (!max_pag)
++ return -ENOSPC;
+ }
+
++ pag = max_pag;
++ atomic_inc(&pag->pagf_fstrms);
++done:
+ trace_xfs_filestream_pick(pag, pino);
+ args->pag = pag;
+ return 0;
--- /dev/null
+From stable+bounces-113980-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:12 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:15 -0800
+Subject: xfs: support lowmode allocations in xfs_bmap_exact_minlen_extent_alloc
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-15-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 6aac77059881e4419df499392c995bf02fb9630b upstream.
+
+Currently the debug-only xfs_bmap_exact_minlen_extent_alloc allocation
+variant fails to drop into the lowmode last resort allocator, and
+thus can sometimes fail allocations for which the caller has a
+transaction block reservation.
+
+Fix this by using xfs_bmap_btalloc_low_space to do the actual allocation.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3412,7 +3412,13 @@ xfs_bmap_exact_minlen_extent_alloc(
+ */
+ ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0);
+
+- return xfs_alloc_vextent_first_ag(args, ap->blkno);
++ /*
++ * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG
++ * iteration and then drops args->total to args->minlen, which might be
++ * required to find an allocation for the transaction reservation when
++ * the file system is very full.
++ */
++ return xfs_bmap_btalloc_low_space(ap, args);
+ }
+
+ /*
--- /dev/null
+From stable+bounces-113984-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:18 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:19 -0800
+Subject: xfs: update the file system geometry after recoverying superblock buffers
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-19-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 6a18765b54e2e52aebcdb84c3b4f4d1f7cb2c0ca upstream.
+
+Primary superblock buffers that change the file system geometry after a
+growfs operation can affect the operation of later CIL checkpoints that
+make use of the newly added space and allocation groups.
+
+Apply the changes to the in-memory structures as part of recovery pass 2,
+to ensure recovery works fine for such cases.
+
+In the future we should apply the logic to other updates such as features
+bits as well.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf_item_recover.c | 52 ++++++++++++++++++++++++++++++++++++++++++
+ fs/xfs/xfs_log_recover.c | 8 ------
+ 2 files changed, 52 insertions(+), 8 deletions(-)
+
+--- a/fs/xfs/xfs_buf_item_recover.c
++++ b/fs/xfs/xfs_buf_item_recover.c
+@@ -22,6 +22,9 @@
+ #include "xfs_inode.h"
+ #include "xfs_dir2.h"
+ #include "xfs_quota.h"
++#include "xfs_alloc.h"
++#include "xfs_ag.h"
++#include "xfs_sb.h"
+
+ /*
+ * This is the number of entries in the l_buf_cancel_table used during
+@@ -685,6 +688,49 @@ xlog_recover_do_inode_buffer(
+ }
+
+ /*
++ * Update the in-memory superblock and perag structures from the primary SB
++ * buffer.
++ *
++ * This is required because transactions running after growfs may require the
++ * updated values to be set in a previous fully commit transaction.
++ */
++static int
++xlog_recover_do_primary_sb_buffer(
++ struct xfs_mount *mp,
++ struct xlog_recover_item *item,
++ struct xfs_buf *bp,
++ struct xfs_buf_log_format *buf_f,
++ xfs_lsn_t current_lsn)
++{
++ struct xfs_dsb *dsb = bp->b_addr;
++ xfs_agnumber_t orig_agcount = mp->m_sb.sb_agcount;
++ int error;
++
++ xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
++
++ /*
++ * Update the in-core super block from the freshly recovered on-disk one.
++ */
++ xfs_sb_from_disk(&mp->m_sb, dsb);
++
++ /*
++ * Initialize the new perags, and also update various block and inode
++ * allocator setting based off the number of AGs or total blocks.
++ * Because of the latter this also needs to happen if the agcount did
++ * not change.
++ */
++ error = xfs_initialize_perag(mp, orig_agcount,
++ mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks,
++ &mp->m_maxagi);
++ if (error) {
++ xfs_warn(mp, "Failed recovery per-ag init: %d", error);
++ return error;
++ }
++ mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
++ return 0;
++}
++
++/*
+ * V5 filesystems know the age of the buffer on disk being recovered. We can
+ * have newer objects on disk than we are replaying, and so for these cases we
+ * don't want to replay the current change as that will make the buffer contents
+@@ -967,6 +1013,12 @@ xlog_recover_buf_commit_pass2(
+ dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+ if (!dirty)
+ goto out_release;
++ } else if ((xfs_blft_from_flags(buf_f) & XFS_BLFT_SB_BUF) &&
++ xfs_buf_daddr(bp) == 0) {
++ error = xlog_recover_do_primary_sb_buffer(mp, item, bp, buf_f,
++ current_lsn);
++ if (error)
++ goto out_release;
+ } else {
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
+ }
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -3317,7 +3317,6 @@ xlog_do_recover(
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_buf *bp = mp->m_sb_bp;
+ struct xfs_sb *sbp = &mp->m_sb;
+- xfs_agnumber_t orig_agcount = sbp->sb_agcount;
+ int error;
+
+ trace_xfs_log_recover(log, head_blk, tail_blk);
+@@ -3366,13 +3365,6 @@ xlog_do_recover(
+ /* re-initialise in-core superblock and geometry structures */
+ mp->m_features |= xfs_sb_version_to_features(sbp);
+ xfs_reinit_percpu_counters(mp);
+- error = xfs_initialize_perag(mp, orig_agcount, sbp->sb_agcount,
+- sbp->sb_dblocks, &mp->m_maxagi);
+- if (error) {
+- xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
+- return error;
+- }
+- mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+
+ /* Normal transactions can now occur */
+ clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
--- /dev/null
+From stable+bounces-113986-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:27 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:22 -0800
+Subject: xfs: update the pag for the last AG at recovery time
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-22-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 4a201dcfa1ff0dcfe4348c40f3ad8bd68b97eb6c upstream.
+
+Currently log recovery never updates the in-core perag values for the
+last allocation group when they were grown by growfs. This leads to
+btree record validation failures for the alloc, ialloc or finotbt
+trees if a transaction references this new space.
+
+Found by Brian's new growfs recovery stress test.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c | 17 +++++++++++++++++
+ fs/xfs/libxfs/xfs_ag.h | 1 +
+ fs/xfs/xfs_buf_item_recover.c | 19 ++++++++++++++++---
+ 3 files changed, 34 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -358,6 +358,23 @@ xfs_free_unused_perag_range(
+ }
+
+ int
++xfs_update_last_ag_size(
++ struct xfs_mount *mp,
++ xfs_agnumber_t prev_agcount)
++{
++ struct xfs_perag *pag = xfs_perag_grab(mp, prev_agcount - 1);
++
++ if (!pag)
++ return -EFSCORRUPTED;
++ pag->block_count = __xfs_ag_block_count(mp, prev_agcount - 1,
++ mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks);
++ __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
++ &pag->agino_max);
++ xfs_perag_rele(pag);
++ return 0;
++}
++
++int
+ xfs_initialize_perag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t old_agcount,
+--- a/fs/xfs/libxfs/xfs_ag.h
++++ b/fs/xfs/libxfs/xfs_ag.h
+@@ -140,6 +140,7 @@ int xfs_initialize_perag(struct xfs_moun
+ xfs_agnumber_t *maxagi);
+ int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
+ void xfs_free_perag(struct xfs_mount *mp);
++int xfs_update_last_ag_size(struct xfs_mount *mp, xfs_agnumber_t prev_agcount);
+
+ /* Passive AG references */
+ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
+--- a/fs/xfs/xfs_buf_item_recover.c
++++ b/fs/xfs/xfs_buf_item_recover.c
+@@ -708,6 +708,11 @@ xlog_recover_do_primary_sb_buffer(
+
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
+
++ if (orig_agcount == 0) {
++ xfs_alert(mp, "Trying to grow file system without AGs");
++ return -EFSCORRUPTED;
++ }
++
+ /*
+ * Update the in-core super block from the freshly recovered on-disk one.
+ */
+@@ -719,14 +724,22 @@ xlog_recover_do_primary_sb_buffer(
+ }
+
+ /*
++ * Growfs can also grow the last existing AG. In this case we also need
++ * to update the length in the in-core perag structure and values
++ * depending on it.
++ */
++ error = xfs_update_last_ag_size(mp, orig_agcount);
++ if (error)
++ return error;
++
++ /*
+ * Initialize the new perags, and also update various block and inode
+ * allocator setting based off the number of AGs or total blocks.
+ * Because of the latter this also needs to happen if the agcount did
+ * not change.
+ */
+- error = xfs_initialize_perag(mp, orig_agcount,
+- mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks,
+- &mp->m_maxagi);
++ error = xfs_initialize_perag(mp, orig_agcount, mp->m_sb.sb_agcount,
++ mp->m_sb.sb_dblocks, &mp->m_maxagi);
+ if (error) {
+ xfs_warn(mp, "Failed recovery per-ag init: %d", error);
+ return error;
--- /dev/null
+From stable+bounces-113981-greg=kroah.com@vger.kernel.org Wed Feb 5 22:41:10 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:16 -0800
+Subject: xfs: Use try_cmpxchg() in xlog_cil_insert_pcp_aggregate()
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-16-catherine.hoang@oracle.com>
+
+From: Uros Bizjak <ubizjak@gmail.com>
+
+commit 20195d011c840b01fa91a85ebcd099ca95fbf8fc upstream.
+
+Use !try_cmpxchg instead of cmpxchg (*ptr, old, new) != old in
+xlog_cil_insert_pcp_aggregate(). x86 CMPXCHG instruction returns
+success in ZF flag, so this change saves a compare after cmpxchg.
+
+Also, try_cmpxchg implicitly assigns old *ptr value to "old" when
+cmpxchg fails. There is no need to re-read the value in the loop.
+
+Note that the value from *ptr should be read using READ_ONCE to
+prevent the compiler from merging, refetching or reordering the read.
+
+No functional change intended.
+
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@infradead.org>
+Cc: Chandan Babu R <chandan.babu@oracle.com>
+Cc: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_cil.c | 11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -156,7 +156,6 @@ xlog_cil_insert_pcp_aggregate(
+ struct xfs_cil *cil,
+ struct xfs_cil_ctx *ctx)
+ {
+- struct xlog_cil_pcp *cilpcp;
+ int cpu;
+ int count = 0;
+
+@@ -171,13 +170,11 @@ xlog_cil_insert_pcp_aggregate(
+ * structures that could have a nonzero space_used.
+ */
+ for_each_cpu(cpu, &ctx->cil_pcpmask) {
+- int old, prev;
++ struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
++ int old = READ_ONCE(cilpcp->space_used);
+
+- cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
+- do {
+- old = cilpcp->space_used;
+- prev = cmpxchg(&cilpcp->space_used, old, 0);
+- } while (old != prev);
++ while (!try_cmpxchg(&cilpcp->space_used, &old, 0))
++ ;
+ count += old;
+ }
+ atomic_add(count, &ctx->space_used);
--- /dev/null
+From stable+bounces-113968-greg=kroah.com@vger.kernel.org Wed Feb 5 22:40:46 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed, 5 Feb 2025 13:40:03 -0800
+Subject: xfs: validate inumber in xfs_iget
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-3-catherine.hoang@oracle.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit 05aba1953f4a6e2b48e13c610e8a4545ba4ef509 upstream.
+
+Actually use the inumber validator to check the argument passed in here.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -748,7 +748,7 @@ xfs_iget(
+ ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
+
+ /* reject inode numbers outside existing AGs */
+- if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
++ if (!xfs_verify_ino(mp, ino))
+ return -EINVAL;
+
+ XFS_STATS_INC(mp, xs_ig_attempts);