]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 21 Feb 2025 15:23:26 +0000 (16:23 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 21 Feb 2025 15:23:26 +0000 (16:23 +0100)
added patches:
xfs-assert-a-valid-limit-in-xfs_rtfind_forw.patch
xfs-call-xfs_bmap_exact_minlen_extent_alloc-from-xfs_bmap_btalloc.patch
xfs-check-for-delayed-allocations-before-setting-extsize.patch
xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr3_leaf_split.patch
xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr_node_try_addname.patch
xfs-don-t-free-cowblocks-from-under-dirty-pagecache-on-unshare.patch
xfs-don-t-ifdef-around-the-exact-minlen-allocations.patch
xfs-don-t-use-__gfp_retry_mayfail-in-xfs_initialize_perag.patch
xfs-error-out-when-a-superblock-buffer-update-reduces-the-agcount.patch
xfs-fix-a-sloppy-memory-handling-bug-in-xfs_iroot_realloc.patch
xfs-fix-a-typo.patch
xfs-fold-xfs_bmap_alloc_userdata-into-xfs_bmapi_allocate.patch
xfs-merge-xfs_attr_leaf_try_add-into-xfs_attr_leaf_addname.patch
xfs-pass-the-exact-range-to-initialize-to-xfs_initialize_perag.patch
xfs-reduce-unnecessary-searches-when-searching-for-the-best-extents.patch
xfs-remove-empty-declartion-in-header-file.patch
xfs-return-bool-from-xfs_attr3_leaf_add.patch
xfs-skip-background-cowblock-trims-on-inodes-open-for-write.patch
xfs-streamline-xfs_filestream_pick_ag.patch
xfs-support-lowmode-allocations-in-xfs_bmap_exact_minlen_extent_alloc.patch
xfs-update-the-file-system-geometry-after-recoverying-superblock-buffers.patch
xfs-update-the-pag-for-the-last-ag-at-recovery-time.patch
xfs-use-try_cmpxchg-in-xlog_cil_insert_pcp_aggregate.patch
xfs-validate-inumber-in-xfs_iget.patch

25 files changed:
queue-6.6/series
queue-6.6/xfs-assert-a-valid-limit-in-xfs_rtfind_forw.patch [new file with mode: 0644]
queue-6.6/xfs-call-xfs_bmap_exact_minlen_extent_alloc-from-xfs_bmap_btalloc.patch [new file with mode: 0644]
queue-6.6/xfs-check-for-delayed-allocations-before-setting-extsize.patch [new file with mode: 0644]
queue-6.6/xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr3_leaf_split.patch [new file with mode: 0644]
queue-6.6/xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr_node_try_addname.patch [new file with mode: 0644]
queue-6.6/xfs-don-t-free-cowblocks-from-under-dirty-pagecache-on-unshare.patch [new file with mode: 0644]
queue-6.6/xfs-don-t-ifdef-around-the-exact-minlen-allocations.patch [new file with mode: 0644]
queue-6.6/xfs-don-t-use-__gfp_retry_mayfail-in-xfs_initialize_perag.patch [new file with mode: 0644]
queue-6.6/xfs-error-out-when-a-superblock-buffer-update-reduces-the-agcount.patch [new file with mode: 0644]
queue-6.6/xfs-fix-a-sloppy-memory-handling-bug-in-xfs_iroot_realloc.patch [new file with mode: 0644]
queue-6.6/xfs-fix-a-typo.patch [new file with mode: 0644]
queue-6.6/xfs-fold-xfs_bmap_alloc_userdata-into-xfs_bmapi_allocate.patch [new file with mode: 0644]
queue-6.6/xfs-merge-xfs_attr_leaf_try_add-into-xfs_attr_leaf_addname.patch [new file with mode: 0644]
queue-6.6/xfs-pass-the-exact-range-to-initialize-to-xfs_initialize_perag.patch [new file with mode: 0644]
queue-6.6/xfs-reduce-unnecessary-searches-when-searching-for-the-best-extents.patch [new file with mode: 0644]
queue-6.6/xfs-remove-empty-declartion-in-header-file.patch [new file with mode: 0644]
queue-6.6/xfs-return-bool-from-xfs_attr3_leaf_add.patch [new file with mode: 0644]
queue-6.6/xfs-skip-background-cowblock-trims-on-inodes-open-for-write.patch [new file with mode: 0644]
queue-6.6/xfs-streamline-xfs_filestream_pick_ag.patch [new file with mode: 0644]
queue-6.6/xfs-support-lowmode-allocations-in-xfs_bmap_exact_minlen_extent_alloc.patch [new file with mode: 0644]
queue-6.6/xfs-update-the-file-system-geometry-after-recoverying-superblock-buffers.patch [new file with mode: 0644]
queue-6.6/xfs-update-the-pag-for-the-last-ag-at-recovery-time.patch [new file with mode: 0644]
queue-6.6/xfs-use-try_cmpxchg-in-xlog_cil_insert_pcp_aggregate.patch [new file with mode: 0644]
queue-6.6/xfs-validate-inumber-in-xfs_iget.patch [new file with mode: 0644]

index 23ce9dbdbe823e09ad321f378651161caaa4197a..414dd126c579ff5ca26435009f103fca16de7108 100644 (file)
@@ -1 +1,25 @@
 arm64-mte-do-not-allow-prot_mte-on-map_hugetlb-user-mappings.patch
+xfs-assert-a-valid-limit-in-xfs_rtfind_forw.patch
+xfs-validate-inumber-in-xfs_iget.patch
+xfs-fix-a-sloppy-memory-handling-bug-in-xfs_iroot_realloc.patch
+xfs-fix-a-typo.patch
+xfs-skip-background-cowblock-trims-on-inodes-open-for-write.patch
+xfs-don-t-free-cowblocks-from-under-dirty-pagecache-on-unshare.patch
+xfs-merge-xfs_attr_leaf_try_add-into-xfs_attr_leaf_addname.patch
+xfs-return-bool-from-xfs_attr3_leaf_add.patch
+xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr3_leaf_split.patch
+xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr_node_try_addname.patch
+xfs-fold-xfs_bmap_alloc_userdata-into-xfs_bmapi_allocate.patch
+xfs-don-t-ifdef-around-the-exact-minlen-allocations.patch
+xfs-call-xfs_bmap_exact_minlen_extent_alloc-from-xfs_bmap_btalloc.patch
+xfs-support-lowmode-allocations-in-xfs_bmap_exact_minlen_extent_alloc.patch
+xfs-use-try_cmpxchg-in-xlog_cil_insert_pcp_aggregate.patch
+xfs-remove-empty-declartion-in-header-file.patch
+xfs-pass-the-exact-range-to-initialize-to-xfs_initialize_perag.patch
+xfs-update-the-file-system-geometry-after-recoverying-superblock-buffers.patch
+xfs-error-out-when-a-superblock-buffer-update-reduces-the-agcount.patch
+xfs-don-t-use-__gfp_retry_mayfail-in-xfs_initialize_perag.patch
+xfs-update-the-pag-for-the-last-ag-at-recovery-time.patch
+xfs-reduce-unnecessary-searches-when-searching-for-the-best-extents.patch
+xfs-streamline-xfs_filestream_pick_ag.patch
+xfs-check-for-delayed-allocations-before-setting-extsize.patch
diff --git a/queue-6.6/xfs-assert-a-valid-limit-in-xfs_rtfind_forw.patch b/queue-6.6/xfs-assert-a-valid-limit-in-xfs_rtfind_forw.patch
new file mode 100644 (file)
index 0000000..bee8ae8
--- /dev/null
@@ -0,0 +1,36 @@
+From stable+bounces-113967-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:45 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:02 -0800
+Subject: xfs: assert a valid limit in xfs_rtfind_forw
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-2-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 6d2db12d56a389b3e8efa236976f8dc3a8ae00f0 upstream.
+
+Protect against developers passing stupid limits when refactoring the
+RT code once again.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_rtbitmap.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/xfs/libxfs/xfs_rtbitmap.c
++++ b/fs/xfs/libxfs/xfs_rtbitmap.c
+@@ -288,6 +288,8 @@ xfs_rtfind_forw(
+       xfs_rtword_t    wdiff;          /* difference from wanted value */
+       int             word;           /* word number in the buffer */
++      ASSERT(start <= limit);
++
+       /*
+        * Compute and read in starting bitmap block for starting block.
+        */
diff --git a/queue-6.6/xfs-call-xfs_bmap_exact_minlen_extent_alloc-from-xfs_bmap_btalloc.patch b/queue-6.6/xfs-call-xfs_bmap_exact_minlen_extent_alloc-from-xfs_bmap_btalloc.patch
new file mode 100644 (file)
index 0000000..3b8aa6f
--- /dev/null
@@ -0,0 +1,125 @@
+From stable+bounces-113979-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:07 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:14 -0800
+Subject: xfs: call xfs_bmap_exact_minlen_extent_alloc from xfs_bmap_btalloc
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-14-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 405ee87c6938f67e6ab62a3f8f85b3c60a093886 upstream.
+
+[backport: dependency of 6aac770]
+
+xfs_bmap_exact_minlen_extent_alloc duplicates the args setup in
+xfs_bmap_btalloc.  Switch to call it from xfs_bmap_btalloc after
+doing the basic setup.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |   61 ++++++++++-------------------------------------
+ 1 file changed, 13 insertions(+), 48 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3390,28 +3390,17 @@ xfs_bmap_process_allocated_extent(
+ static int
+ xfs_bmap_exact_minlen_extent_alloc(
+-      struct xfs_bmalloca     *ap)
++      struct xfs_bmalloca     *ap,
++      struct xfs_alloc_arg    *args)
+ {
+-      struct xfs_mount        *mp = ap->ip->i_mount;
+-      struct xfs_alloc_arg    args = { .tp = ap->tp, .mp = mp };
+-      xfs_fileoff_t           orig_offset;
+-      xfs_extlen_t            orig_length;
+-      int                     error;
+-
+-      ASSERT(ap->length);
+-
+       if (ap->minlen != 1) {
+-              ap->blkno = NULLFSBLOCK;
+-              ap->length = 0;
++              args->fsbno = NULLFSBLOCK;
+               return 0;
+       }
+-      orig_offset = ap->offset;
+-      orig_length = ap->length;
+-
+-      args.alloc_minlen_only = 1;
+-
+-      xfs_bmap_compute_alignments(ap, &args);
++      args->alloc_minlen_only = 1;
++      args->minlen = args->maxlen = ap->minlen;
++      args->total = ap->total;
+       /*
+        * Unlike the longest extent available in an AG, we don't track
+@@ -3421,33 +3410,9 @@ xfs_bmap_exact_minlen_extent_alloc(
+        * we need not be concerned about a drop in performance in
+        * "debug only" code paths.
+        */
+-      ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
+-
+-      args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
+-      args.minlen = args.maxlen = ap->minlen;
+-      args.total = ap->total;
++      ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0);
+-      args.alignment = 1;
+-      args.minalignslop = 0;
+-
+-      args.minleft = ap->minleft;
+-      args.wasdel = ap->wasdel;
+-      args.resv = XFS_AG_RESV_NONE;
+-      args.datatype = ap->datatype;
+-
+-      error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
+-      if (error)
+-              return error;
+-
+-      if (args.fsbno != NULLFSBLOCK) {
+-              xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
+-                      orig_length);
+-      } else {
+-              ap->blkno = NULLFSBLOCK;
+-              ap->length = 0;
+-      }
+-
+-      return 0;
++      return xfs_alloc_vextent_first_ag(args, ap->blkno);
+ }
+ /*
+@@ -3706,8 +3671,11 @@ xfs_bmap_btalloc(
+       /* Trim the allocation back to the maximum an AG can fit. */
+       args.maxlen = min(ap->length, mp->m_ag_max_usable);
+-      if ((ap->datatype & XFS_ALLOC_USERDATA) &&
+-          xfs_inode_is_filestream(ap->ip))
++      if (unlikely(XFS_TEST_ERROR(false, mp,
++                      XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
++              error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
++      else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
++                      xfs_inode_is_filestream(ap->ip))
+               error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
+       else
+               error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
+@@ -4128,9 +4096,6 @@ xfs_bmapi_allocate(
+       if ((bma->datatype & XFS_ALLOC_USERDATA) &&
+           XFS_IS_REALTIME_INODE(bma->ip))
+               error = xfs_bmap_rtalloc(bma);
+-      else if (unlikely(XFS_TEST_ERROR(false, mp,
+-                      XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+-              error = xfs_bmap_exact_minlen_extent_alloc(bma);
+       else
+               error = xfs_bmap_btalloc(bma);
+       if (error)
diff --git a/queue-6.6/xfs-check-for-delayed-allocations-before-setting-extsize.patch b/queue-6.6/xfs-check-for-delayed-allocations-before-setting-extsize.patch
new file mode 100644 (file)
index 0000000..626d3a0
--- /dev/null
@@ -0,0 +1,95 @@
+From stable+bounces-113989-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:36 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:25 -0800
+Subject: xfs: Check for delayed allocations before setting extsize
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-25-catherine.hoang@oracle.com>
+
+From: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+
+commit 2a492ff66673c38a77d0815d67b9a8cce2ef57f8 upstream.
+
+Extsize should only be allowed to be set on files with no data in it.
+For this, we check if the files have extents but miss to check if
+delayed extents are present. This patch adds that check.
+
+While we are at it, also refactor this check into a helper since
+it's used in some other places as well like xfs_inactive() or
+xfs_ioctl_setattr_xflags()
+
+**Without the patch (SUCCEEDS)**
+
+$ xfs_io -c 'open -f testfile' -c 'pwrite 0 1024' -c 'extsize 65536'
+
+wrote 1024/1024 bytes at offset 0
+1 KiB, 1 ops; 0.0002 sec (4.628 MiB/sec and 4739.3365 ops/sec)
+
+**With the patch (FAILS as expected)**
+
+$ xfs_io -c 'open -f testfile' -c 'pwrite 0 1024' -c 'extsize 65536'
+
+wrote 1024/1024 bytes at offset 0
+1 KiB, 1 ops; 0.0002 sec (4.628 MiB/sec and 4739.3365 ops/sec)
+xfs_io: FS_IOC_FSSETXATTR testfile: Invalid argument
+
+Fixes: e94af02a9cd7 ("[XFS] fix old xfs_setattr mis-merge from irix; mostly harmless esp if not using xfs rt")
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Signed-off-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode.c |    2 +-
+ fs/xfs/xfs_inode.h |    5 +++++
+ fs/xfs/xfs_ioctl.c |    4 ++--
+ 3 files changed, 8 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1758,7 +1758,7 @@ xfs_inactive(
+       if (S_ISREG(VFS_I(ip)->i_mode) &&
+           (ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 ||
+-           ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
++           xfs_inode_has_filedata(ip)))
+               truncate = 1;
+       if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -286,6 +286,11 @@ static inline bool xfs_is_metadata_inode
+               xfs_is_quota_inode(&mp->m_sb, ip->i_ino);
+ }
++static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip)
++{
++      return ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0;
++}
++
+ /*
+  * Check if an inode has any data in the COW fork.  This might be often false
+  * even for inodes with the reflink flag when there is no pending COW operation.
+--- a/fs/xfs/xfs_ioctl.c
++++ b/fs/xfs/xfs_ioctl.c
+@@ -1126,7 +1126,7 @@ xfs_ioctl_setattr_xflags(
+       if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
+               /* Can't change realtime flag if any extents are allocated. */
+-              if (ip->i_df.if_nextents || ip->i_delayed_blks)
++              if (xfs_inode_has_filedata(ip))
+                       return -EINVAL;
+               /*
+@@ -1247,7 +1247,7 @@ xfs_ioctl_setattr_check_extsize(
+       if (!fa->fsx_valid)
+               return 0;
+-      if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
++      if (S_ISREG(VFS_I(ip)->i_mode) && xfs_inode_has_filedata(ip) &&
+           XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
+               return -EINVAL;
diff --git a/queue-6.6/xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr3_leaf_split.patch b/queue-6.6/xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr3_leaf_split.patch
new file mode 100644 (file)
index 0000000..61746bb
--- /dev/null
@@ -0,0 +1,84 @@
+From stable+bounces-113975-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:58 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:10 -0800
+Subject: xfs: distinguish extra split from real ENOSPC from xfs_attr3_leaf_split
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-10-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit a5f73342abe1f796140f6585e43e2aa7bc1b7975 upstream.
+
+xfs_attr3_leaf_split propagates the need for an extra btree split as
+-ENOSPC to it's only caller, but the same return value can also be
+returned from xfs_da_grow_inode when it fails to find free space.
+
+Distinguish the two cases by returning 1 for the extra split case instead
+of overloading -ENOSPC.
+
+This can be triggered relatively easily with the pending realtime group
+support and a file system with a lot of small zones that use metadata
+space on the main device.  In this case every about 5-10th run of
+xfs/538 runs into the following assert:
+
+       ASSERT(oldblk->magic == XFS_ATTR_LEAF_MAGIC);
+
+in xfs_attr3_leaf_split caused by an allocation failure.  Note that
+the allocation failure is caused by another bug that will be fixed
+subsequently, but this commit at least sorts out the error handling.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_leaf.c |    5 ++++-
+ fs/xfs/libxfs/xfs_da_btree.c  |    5 +++--
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -1340,6 +1340,9 @@ xfs_attr3_leaf_create(
+ /*
+  * Split the leaf node, rebalance, then add the new entry.
++ *
++ * Returns 0 if the entry was added, 1 if a further split is needed or a
++ * negative error number otherwise.
+  */
+ int
+ xfs_attr3_leaf_split(
+@@ -1396,7 +1399,7 @@ xfs_attr3_leaf_split(
+       oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
+       newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
+       if (!added)
+-              return -ENOSPC;
++              return 1;
+       return 0;
+ }
+--- a/fs/xfs/libxfs/xfs_da_btree.c
++++ b/fs/xfs/libxfs/xfs_da_btree.c
+@@ -522,9 +522,8 @@ xfs_da3_split(
+               switch (oldblk->magic) {
+               case XFS_ATTR_LEAF_MAGIC:
+                       error = xfs_attr3_leaf_split(state, oldblk, newblk);
+-                      if ((error != 0) && (error != -ENOSPC)) {
++                      if (error < 0)
+                               return error;   /* GROT: attr is inconsistent */
+-                      }
+                       if (!error) {
+                               addblk = newblk;
+                               break;
+@@ -546,6 +545,8 @@ xfs_da3_split(
+                               error = xfs_attr3_leaf_split(state, newblk,
+                                                           &state->extrablk);
+                       }
++                      if (error == 1)
++                              return -ENOSPC;
+                       if (error)
+                               return error;   /* GROT: attr inconsistent */
+                       addblk = newblk;
diff --git a/queue-6.6/xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr_node_try_addname.patch b/queue-6.6/xfs-distinguish-extra-split-from-real-enospc-from-xfs_attr_node_try_addname.patch
new file mode 100644 (file)
index 0000000..70521c8
--- /dev/null
@@ -0,0 +1,66 @@
+From stable+bounces-113976-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:00 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:11 -0800
+Subject: xfs: distinguish extra split from real ENOSPC from xfs_attr_node_try_addname
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-11-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b3f4e84e2f438a119b7ca8684a25452b3e57c0f0 upstream.
+
+Just like xfs_attr3_leaf_split, xfs_attr_node_try_addname can return
+-ENOSPC both for an actual failure to allocate a disk block, but also
+to signal the caller to convert the format of the attr fork.  Use magic
+1 to ask for the conversion here as well.
+
+Note that unlike the similar issue in xfs_attr3_leaf_split, this one was
+only found by code review.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c |   13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -543,7 +543,7 @@ xfs_attr_node_addname(
+               return error;
+       error = xfs_attr_node_try_addname(attr);
+-      if (error == -ENOSPC) {
++      if (error == 1) {
+               error = xfs_attr3_leaf_to_node(args);
+               if (error)
+                       return error;
+@@ -1380,9 +1380,12 @@ error:
+ /*
+  * Add a name to a Btree-format attribute list.
+  *
+- * This will involve walking down the Btree, and may involve splitting
+- * leaf nodes and even splitting intermediate nodes up to and including
+- * the root node (a special case of an intermediate node).
++ * This will involve walking down the Btree, and may involve splitting leaf
++ * nodes and even splitting intermediate nodes up to and including the root
++ * node (a special case of an intermediate node).
++ *
++ * If the tree was still in single leaf format and needs to converted to
++ * real node format return 1 and let the caller handle that.
+  */
+ static int
+ xfs_attr_node_try_addname(
+@@ -1404,7 +1407,7 @@ xfs_attr_node_try_addname(
+                        * out-of-line values so it looked like it *might*
+                        * have been a b-tree. Let the caller deal with this.
+                        */
+-                      error = -ENOSPC;
++                      error = 1;
+                       goto out;
+               }
diff --git a/queue-6.6/xfs-don-t-free-cowblocks-from-under-dirty-pagecache-on-unshare.patch b/queue-6.6/xfs-don-t-free-cowblocks-from-under-dirty-pagecache-on-unshare.patch
new file mode 100644 (file)
index 0000000..d2a01e7
--- /dev/null
@@ -0,0 +1,125 @@
+From stable+bounces-113972-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:54 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:07 -0800
+Subject: xfs: don't free cowblocks from under dirty pagecache on unshare
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-7-catherine.hoang@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 4390f019ad7866c3791c3d768d2ff185d89e8ebe upstream.
+
+fallocate unshare mode explicitly breaks extent sharing. When a
+command completes, it checks the data fork for any remaining shared
+extents to determine whether the reflink inode flag and COW fork
+preallocation can be removed. This logic doesn't consider in-core
+pagecache and I/O state, however, which means we can unsafely remove
+COW fork blocks that are still needed under certain conditions.
+
+For example, consider the following command sequence:
+
+xfs_io -fc "pwrite 0 1k" -c "reflink <file> 0 256k 1k" \
+       -c "pwrite 0 32k" -c "funshare 0 1k" <file>
+
+This allocates a data block at offset 0, shares it, and then
+overwrites it with a larger buffered write. The overwrite triggers
+COW fork preallocation, 32 blocks by default, which maps the entire
+32k write to delalloc in the COW fork. All but the shared block at
+offset 0 remains hole mapped in the data fork. The unshare command
+redirties and flushes the folio at offset 0, removing the only
+shared extent from the inode. Since the inode no longer maps shared
+extents, unshare purges the COW fork before the remaining 28k may
+have written back.
+
+This leaves dirty pagecache backed by holes, which writeback quietly
+skips, thus leaving clean, non-zeroed pagecache over holes in the
+file. To verify, fiemap shows holes in the first 32k of the file and
+reads return different data across a remount:
+
+$ xfs_io -c "fiemap -v" <file>
+<file>:
+ EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
+   ...
+   1: [8..511]:        hole               504
+   ...
+$ xfs_io -c "pread -v 4k 8" <file>
+00001000:  cd cd cd cd cd cd cd cd  ........
+$ umount <mnt>; mount <dev> <mnt>
+$ xfs_io -c "pread -v 4k 8" <file>
+00001000:  00 00 00 00 00 00 00 00  ........
+
+To avoid this problem, make unshare follow the same rules used for
+background cowblock scanning and never purge the COW fork for inodes
+with dirty pagecache or in-flight I/O.
+
+Fixes: 46afb0628b86347 ("xfs: only flush the unshared range in xfs_reflink_unshare")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c  |    8 +-------
+ fs/xfs/xfs_reflink.c |    3 +++
+ fs/xfs/xfs_reflink.h |   19 +++++++++++++++++++
+ 3 files changed, 23 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1271,13 +1271,7 @@ xfs_prep_free_cowblocks(
+        */
+       if (!sync && inode_is_open_for_write(VFS_I(ip)))
+               return false;
+-      if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
+-          mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+-          mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
+-          atomic_read(&VFS_I(ip)->i_dio_count))
+-              return false;
+-
+-      return true;
++      return xfs_can_free_cowblocks(ip);
+ }
+ /*
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -1600,6 +1600,9 @@ xfs_reflink_clear_inode_flag(
+       ASSERT(xfs_is_reflink_inode(ip));
++      if (!xfs_can_free_cowblocks(ip))
++              return 0;
++
+       error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
+       if (error || needs_flag)
+               return error;
+--- a/fs/xfs/xfs_reflink.h
++++ b/fs/xfs/xfs_reflink.h
+@@ -16,6 +16,25 @@ static inline bool xfs_is_cow_inode(stru
+       return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
+ }
++/*
++ * Check whether it is safe to free COW fork blocks from an inode. It is unsafe
++ * to do so when an inode has dirty cache or I/O in-flight, even if no shared
++ * extents exist in the data fork, because outstanding I/O may target blocks
++ * that were speculatively allocated to the COW fork.
++ */
++static inline bool
++xfs_can_free_cowblocks(struct xfs_inode *ip)
++{
++      struct inode *inode = VFS_I(ip);
++
++      if ((inode->i_state & I_DIRTY_PAGES) ||
++          mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
++          mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
++          atomic_read(&inode->i_dio_count))
++              return false;
++      return true;
++}
++
+ extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
+               struct xfs_bmbt_irec *irec, bool *shared);
+ int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
diff --git a/queue-6.6/xfs-don-t-ifdef-around-the-exact-minlen-allocations.patch b/queue-6.6/xfs-don-t-ifdef-around-the-exact-minlen-allocations.patch
new file mode 100644 (file)
index 0000000..74b9049
--- /dev/null
@@ -0,0 +1,107 @@
+From stable+bounces-113978-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:07 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:13 -0800
+Subject: xfs: don't ifdef around the exact minlen allocations
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-13-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b611fddc0435738e64453bbf1dadd4b12a801858 upstream.
+
+Exact minlen allocations only exist as an error injection tool for debug
+builds.  Currently this is implemented using ifdefs, which means the code
+isn't even compiled for non-XFS_DEBUG builds.  Enhance the compile test
+coverage by always building the code and use the compilers' dead code
+elimination to remove it from the generated binary instead.
+
+The only downside is that the alloc_minlen_only field is unconditionally
+added to struct xfs_alloc_args now, but by moving it around and packing
+it tightly this doesn't actually increase the size of the structure.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c |    7 ++-----
+ fs/xfs/libxfs/xfs_alloc.h |    4 +---
+ fs/xfs/libxfs/xfs_bmap.c  |    6 ------
+ 3 files changed, 3 insertions(+), 14 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -2581,7 +2581,6 @@ __xfs_free_extent_later(
+       return 0;
+ }
+-#ifdef DEBUG
+ /*
+  * Check if an AGF has a free extent record whose length is equal to
+  * args->minlen.
+@@ -2620,7 +2619,6 @@ out:
+       return error;
+ }
+-#endif
+ /*
+  * Decide whether to use this allocation group for this allocation.
+@@ -2694,15 +2692,14 @@ xfs_alloc_fix_freelist(
+       if (!xfs_alloc_space_available(args, need, alloc_flags))
+               goto out_agbp_relse;
+-#ifdef DEBUG
+-      if (args->alloc_minlen_only) {
++      if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) {
+               int stat;
+               error = xfs_exact_minlen_extent_available(args, agbp, &stat);
+               if (error || !stat)
+                       goto out_agbp_relse;
+       }
+-#endif
++
+       /*
+        * Make the freelist shorter if it's too long.
+        *
+--- a/fs/xfs/libxfs/xfs_alloc.h
++++ b/fs/xfs/libxfs/xfs_alloc.h
+@@ -53,11 +53,9 @@ typedef struct xfs_alloc_arg {
+       int             datatype;       /* mask defining data type treatment */
+       char            wasdel;         /* set if allocation was prev delayed */
+       char            wasfromfl;      /* set if allocation is from freelist */
++      bool            alloc_minlen_only; /* allocate exact minlen extent */
+       struct xfs_owner_info   oinfo;  /* owner of blocks being allocated */
+       enum xfs_ag_resv_type   resv;   /* block reservation to use */
+-#ifdef DEBUG
+-      bool            alloc_minlen_only; /* allocate exact minlen extent */
+-#endif
+ } xfs_alloc_arg_t;
+ /*
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3388,7 +3388,6 @@ xfs_bmap_process_allocated_extent(
+       xfs_bmap_btalloc_accounting(ap, args);
+ }
+-#ifdef DEBUG
+ static int
+ xfs_bmap_exact_minlen_extent_alloc(
+       struct xfs_bmalloca     *ap)
+@@ -3450,11 +3449,6 @@ xfs_bmap_exact_minlen_extent_alloc(
+       return 0;
+ }
+-#else
+-
+-#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
+-
+-#endif
+ /*
+  * If we are not low on available data blocks and we are allocating at
diff --git a/queue-6.6/xfs-don-t-use-__gfp_retry_mayfail-in-xfs_initialize_perag.patch b/queue-6.6/xfs-don-t-use-__gfp_retry_mayfail-in-xfs_initialize_perag.patch
new file mode 100644 (file)
index 0000000..763bf4d
--- /dev/null
@@ -0,0 +1,40 @@
+From stable+bounces-113990-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:37 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:21 -0800
+Subject: xfs: don't use __GFP_RETRY_MAYFAIL in xfs_initialize_perag
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-21-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 069cf5e32b700f94c6ac60f6171662bdfb04f325 upstream.
+
+[backport: uses kmem_zalloc instead of kzalloc]
+
+__GFP_RETRY_MAYFAIL increases the likelyhood of allocations to fail,
+which isn't really helpful during log recovery.  Remove the flag and
+stick to the default GFP_KERNEL policies.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -370,7 +370,7 @@ xfs_initialize_perag(
+       int                     error;
+       for (index = old_agcount; index < new_agcount; index++) {
+-              pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
++              pag = kmem_zalloc(sizeof(*pag), 0);
+               if (!pag) {
+                       error = -ENOMEM;
+                       goto out_unwind_new_pags;
diff --git a/queue-6.6/xfs-error-out-when-a-superblock-buffer-update-reduces-the-agcount.patch b/queue-6.6/xfs-error-out-when-a-superblock-buffer-update-reduces-the-agcount.patch
new file mode 100644 (file)
index 0000000..6d081a4
--- /dev/null
@@ -0,0 +1,40 @@
+From stable+bounces-113985-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:18 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:20 -0800
+Subject: xfs: error out when a superblock buffer update reduces the agcount
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-20-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b882b0f8138ffa935834e775953f1630f89bbb62 upstream.
+
+XFS currently does not support reducing the agcount, so error out if
+a logged sb buffer tries to shrink the agcount.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf_item_recover.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/xfs/xfs_buf_item_recover.c
++++ b/fs/xfs/xfs_buf_item_recover.c
+@@ -713,6 +713,11 @@ xlog_recover_do_primary_sb_buffer(
+        */
+       xfs_sb_from_disk(&mp->m_sb, dsb);
++      if (mp->m_sb.sb_agcount < orig_agcount) {
++              xfs_alert(mp, "Shrinking AG count in log recovery not supported");
++              return -EFSCORRUPTED;
++      }
++
+       /*
+        * Initialize the new perags, and also update various block and inode
+        * allocator setting based off the number of AGs or total blocks.
diff --git a/queue-6.6/xfs-fix-a-sloppy-memory-handling-bug-in-xfs_iroot_realloc.patch b/queue-6.6/xfs-fix-a-sloppy-memory-handling-bug-in-xfs_iroot_realloc.patch
new file mode 100644 (file)
index 0000000..965f5db
--- /dev/null
@@ -0,0 +1,55 @@
+From stable+bounces-113969-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:47 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:04 -0800
+Subject: xfs: fix a sloppy memory handling bug in xfs_iroot_realloc
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-4-catherine.hoang@oracle.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit de55149b6639e903c4d06eb0474ab2c05060e61d upstream.
+
+While refactoring code, I noticed that when xfs_iroot_realloc tries to
+shrink a bmbt root block, it allocates a smaller new block and then
+copies "records" and pointers to the new block.  However, bmbt root
+blocks cannot ever be leaves, which means that it's not technically
+correct to copy records.  We /should/ be copying keys.
+
+Note that this has never resulted in actual memory corruption because
+sizeof(bmbt_rec) == (sizeof(bmbt_key) + sizeof(bmbt_ptr)).  However,
+this will no longer be true when we start adding realtime rmap stuff,
+so fix this now.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_inode_fork.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_inode_fork.c
++++ b/fs/xfs/libxfs/xfs_inode_fork.c
+@@ -449,15 +449,15 @@ xfs_iroot_realloc(
+       }
+       /*
+-       * Only copy the records and pointers if there are any.
++       * Only copy the keys and pointers if there are any.
+        */
+       if (new_max > 0) {
+               /*
+-               * First copy the records.
++               * First copy the keys.
+                */
+-              op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
+-              np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
+-              memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
++              op = (char *)XFS_BMBT_KEY_ADDR(mp, ifp->if_broot, 1);
++              np = (char *)XFS_BMBT_KEY_ADDR(mp, new_broot, 1);
++              memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_key_t));
+               /*
+                * Then copy the pointers.
diff --git a/queue-6.6/xfs-fix-a-typo.patch b/queue-6.6/xfs-fix-a-typo.patch
new file mode 100644 (file)
index 0000000..25b6ead
--- /dev/null
@@ -0,0 +1,35 @@
+From stable+bounces-113970-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:52 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:05 -0800
+Subject: xfs: fix a typo
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-5-catherine.hoang@oracle.com>
+
+From: Andrew Kreimer <algonell@gmail.com>
+
+commit 77bfe1b11ea0c0c4b0ce19b742cd1aa82f60e45d upstream.
+
+Fix a typo in comments.
+
+Signed-off-by: Andrew Kreimer <algonell@gmail.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_recover.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -1820,7 +1820,7 @@ xlog_find_item_ops(
+  *       from the transaction. However, we can't do that until after we've
+  *       replayed all the other items because they may be dependent on the
+  *       cancelled buffer and replaying the cancelled buffer can remove it
+- *       form the cancelled buffer table. Hence they have tobe done last.
++ *       form the cancelled buffer table. Hence they have to be done last.
+  *
+  *    3. Inode allocation buffers must be replayed before inode items that
+  *       read the buffer and replay changes into it. For filesystems using the
diff --git a/queue-6.6/xfs-fold-xfs_bmap_alloc_userdata-into-xfs_bmapi_allocate.patch b/queue-6.6/xfs-fold-xfs_bmap_alloc_userdata-into-xfs_bmapi_allocate.patch
new file mode 100644 (file)
index 0000000..46ed70b
--- /dev/null
@@ -0,0 +1,118 @@
+From stable+bounces-113977-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:01 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:12 -0800
+Subject: xfs: fold xfs_bmap_alloc_userdata into xfs_bmapi_allocate
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-12-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 865469cd41bce2b04bef9539cbf70676878bc8df upstream.
+
+[backport: dependency of 6aac770]
+
+Userdata and metadata allocations end up in the same allocation helpers.
+Remove the separate xfs_bmap_alloc_userdata function to make this more
+clear.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |   73 ++++++++++++++++++-----------------------------
+ 1 file changed, 28 insertions(+), 45 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -4078,43 +4078,6 @@ out:
+ }
+ static int
+-xfs_bmap_alloc_userdata(
+-      struct xfs_bmalloca     *bma)
+-{
+-      struct xfs_mount        *mp = bma->ip->i_mount;
+-      int                     whichfork = xfs_bmapi_whichfork(bma->flags);
+-      int                     error;
+-
+-      /*
+-       * Set the data type being allocated. For the data fork, the first data
+-       * in the file is treated differently to all other allocations. For the
+-       * attribute fork, we only need to ensure the allocated range is not on
+-       * the busy list.
+-       */
+-      bma->datatype = XFS_ALLOC_NOBUSY;
+-      if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
+-              bma->datatype |= XFS_ALLOC_USERDATA;
+-              if (bma->offset == 0)
+-                      bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+-
+-              if (mp->m_dalign && bma->length >= mp->m_dalign) {
+-                      error = xfs_bmap_isaeof(bma, whichfork);
+-                      if (error)
+-                              return error;
+-              }
+-
+-              if (XFS_IS_REALTIME_INODE(bma->ip))
+-                      return xfs_bmap_rtalloc(bma);
+-      }
+-
+-      if (unlikely(XFS_TEST_ERROR(false, mp,
+-                      XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+-              return xfs_bmap_exact_minlen_extent_alloc(bma);
+-
+-      return xfs_bmap_btalloc(bma);
+-}
+-
+-static int
+ xfs_bmapi_allocate(
+       struct xfs_bmalloca     *bma)
+ {
+@@ -4147,15 +4110,35 @@ xfs_bmapi_allocate(
+       else
+               bma->minlen = 1;
+-      if (bma->flags & XFS_BMAPI_METADATA) {
+-              if (unlikely(XFS_TEST_ERROR(false, mp,
+-                              XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
+-                      error = xfs_bmap_exact_minlen_extent_alloc(bma);
+-              else
+-                      error = xfs_bmap_btalloc(bma);
+-      } else {
+-              error = xfs_bmap_alloc_userdata(bma);
++      if (!(bma->flags & XFS_BMAPI_METADATA)) {
++              /*
++               * For the data and COW fork, the first data in the file is
++               * treated differently to all other allocations. For the
++               * attribute fork, we only need to ensure the allocated range
++               * is not on the busy list.
++               */
++              bma->datatype = XFS_ALLOC_NOBUSY;
++              if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
++                      bma->datatype |= XFS_ALLOC_USERDATA;
++                      if (bma->offset == 0)
++                              bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
++
++                      if (mp->m_dalign && bma->length >= mp->m_dalign) {
++                              error = xfs_bmap_isaeof(bma, whichfork);
++                              if (error)
++                                      return error;
++                      }
++              }
+       }
++
++      if ((bma->datatype & XFS_ALLOC_USERDATA) &&
++          XFS_IS_REALTIME_INODE(bma->ip))
++              error = xfs_bmap_rtalloc(bma);
++      else if (unlikely(XFS_TEST_ERROR(false, mp,
++                      XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
++              error = xfs_bmap_exact_minlen_extent_alloc(bma);
++      else
++              error = xfs_bmap_btalloc(bma);
+       if (error)
+               return error;
+       if (bma->blkno == NULLFSBLOCK)
diff --git a/queue-6.6/xfs-merge-xfs_attr_leaf_try_add-into-xfs_attr_leaf_addname.patch b/queue-6.6/xfs-merge-xfs_attr_leaf_try_add-into-xfs_attr_leaf_addname.patch
new file mode 100644 (file)
index 0000000..a1bd576
--- /dev/null
@@ -0,0 +1,260 @@
+From stable+bounces-113973-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:56 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:08 -0800
+Subject: xfs: merge xfs_attr_leaf_try_add into xfs_attr_leaf_addname
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-8-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit b1c649da15c2e4c86344c8e5af69c8afa215efec upstream.
+
+[backport: dependency of a5f7334 and b3f4e84]
+
+xfs_attr_leaf_try_add is only called by xfs_attr_leaf_addname, and
+merging the two will simplify a following error handling fix.
+
+To facilitate this move the remote block state save/restore helpers up in
+the file so that they don't need forward declarations now.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c |  176 +++++++++++++++++++----------------------------
+ 1 file changed, 74 insertions(+), 102 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -50,7 +50,6 @@ STATIC int xfs_attr_shortform_addname(xf
+ STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
+ STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
+ STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
+-STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
+ /*
+  * Internal routines when attribute list is more than one block.
+@@ -401,6 +400,33 @@ out:
+       return error;
+ }
++/* Save the current remote block info and clear the current pointers. */
++static void
++xfs_attr_save_rmt_blk(
++      struct xfs_da_args      *args)
++{
++      args->blkno2 = args->blkno;
++      args->index2 = args->index;
++      args->rmtblkno2 = args->rmtblkno;
++      args->rmtblkcnt2 = args->rmtblkcnt;
++      args->rmtvaluelen2 = args->rmtvaluelen;
++      args->rmtblkno = 0;
++      args->rmtblkcnt = 0;
++      args->rmtvaluelen = 0;
++}
++
++/* Set stored info about a remote block */
++static void
++xfs_attr_restore_rmt_blk(
++      struct xfs_da_args      *args)
++{
++      args->blkno = args->blkno2;
++      args->index = args->index2;
++      args->rmtblkno = args->rmtblkno2;
++      args->rmtblkcnt = args->rmtblkcnt2;
++      args->rmtvaluelen = args->rmtvaluelen2;
++}
++
+ /*
+  * Handle the state change on completion of a multi-state attr operation.
+  *
+@@ -428,49 +454,77 @@ xfs_attr_complete_op(
+       return XFS_DAS_DONE;
+ }
++/*
++ * Try to add an attribute to an inode in leaf form.
++ */
+ static int
+ xfs_attr_leaf_addname(
+       struct xfs_attr_intent  *attr)
+ {
+       struct xfs_da_args      *args = attr->xattri_da_args;
++      struct xfs_buf          *bp;
+       int                     error;
+       ASSERT(xfs_attr_is_leaf(args->dp));
++      error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
++      if (error)
++              return error;
++
+       /*
+-       * Use the leaf buffer we may already hold locked as a result of
+-       * a sf-to-leaf conversion.
++       * Look up the xattr name to set the insertion point for the new xattr.
+        */
+-      error = xfs_attr_leaf_try_add(args);
+-
+-      if (error == -ENOSPC) {
+-              error = xfs_attr3_leaf_to_node(args);
+-              if (error)
+-                      return error;
++      error = xfs_attr3_leaf_lookup_int(bp, args);
++      switch (error) {
++      case -ENOATTR:
++              if (args->op_flags & XFS_DA_OP_REPLACE)
++                      goto out_brelse;
++              break;
++      case -EEXIST:
++              if (!(args->op_flags & XFS_DA_OP_REPLACE))
++                      goto out_brelse;
++              trace_xfs_attr_leaf_replace(args);
+               /*
+-               * We're not in leaf format anymore, so roll the transaction and
+-               * retry the add to the newly allocated node block.
++               * Save the existing remote attr state so that the current
++               * values reflect the state of the new attribute we are about to
++               * add, not the attribute we just found and will remove later.
+                */
+-              attr->xattri_dela_state = XFS_DAS_NODE_ADD;
+-              goto out;
++              xfs_attr_save_rmt_blk(args);
++              break;
++      case 0:
++              break;
++      default:
++              goto out_brelse;
+       }
+-      if (error)
+-              return error;
+       /*
+        * We need to commit and roll if we need to allocate remote xattr blocks
+        * or perform more xattr manipulations. Otherwise there is nothing more
+        * to do and we can return success.
+        */
+-      if (args->rmtblkno)
++      error = xfs_attr3_leaf_add(bp, args);
++      if (error) {
++              if (error != -ENOSPC)
++                      return error;
++              error = xfs_attr3_leaf_to_node(args);
++              if (error)
++                      return error;
++
++              attr->xattri_dela_state = XFS_DAS_NODE_ADD;
++      } else if (args->rmtblkno) {
+               attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT;
+-      else
+-              attr->xattri_dela_state = xfs_attr_complete_op(attr,
+-                                                      XFS_DAS_LEAF_REPLACE);
+-out:
++      } else {
++              attr->xattri_dela_state =
++                      xfs_attr_complete_op(attr, XFS_DAS_LEAF_REPLACE);
++      }
++
+       trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
+       return error;
++
++out_brelse:
++      xfs_trans_brelse(args->trans, bp);
++      return error;
+ }
+ /*
+@@ -1164,88 +1218,6 @@ xfs_attr_shortform_addname(
+  * External routines when attribute list is one block
+  *========================================================================*/
+-/* Save the current remote block info and clear the current pointers. */
+-static void
+-xfs_attr_save_rmt_blk(
+-      struct xfs_da_args      *args)
+-{
+-      args->blkno2 = args->blkno;
+-      args->index2 = args->index;
+-      args->rmtblkno2 = args->rmtblkno;
+-      args->rmtblkcnt2 = args->rmtblkcnt;
+-      args->rmtvaluelen2 = args->rmtvaluelen;
+-      args->rmtblkno = 0;
+-      args->rmtblkcnt = 0;
+-      args->rmtvaluelen = 0;
+-}
+-
+-/* Set stored info about a remote block */
+-static void
+-xfs_attr_restore_rmt_blk(
+-      struct xfs_da_args      *args)
+-{
+-      args->blkno = args->blkno2;
+-      args->index = args->index2;
+-      args->rmtblkno = args->rmtblkno2;
+-      args->rmtblkcnt = args->rmtblkcnt2;
+-      args->rmtvaluelen = args->rmtvaluelen2;
+-}
+-
+-/*
+- * Tries to add an attribute to an inode in leaf form
+- *
+- * This function is meant to execute as part of a delayed operation and leaves
+- * the transaction handling to the caller.  On success the attribute is added
+- * and the inode and transaction are left dirty.  If there is not enough space,
+- * the attr data is converted to node format and -ENOSPC is returned. Caller is
+- * responsible for handling the dirty inode and transaction or adding the attr
+- * in node format.
+- */
+-STATIC int
+-xfs_attr_leaf_try_add(
+-      struct xfs_da_args      *args)
+-{
+-      struct xfs_buf          *bp;
+-      int                     error;
+-
+-      error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+-      if (error)
+-              return error;
+-
+-      /*
+-       * Look up the xattr name to set the insertion point for the new xattr.
+-       */
+-      error = xfs_attr3_leaf_lookup_int(bp, args);
+-      switch (error) {
+-      case -ENOATTR:
+-              if (args->op_flags & XFS_DA_OP_REPLACE)
+-                      goto out_brelse;
+-              break;
+-      case -EEXIST:
+-              if (!(args->op_flags & XFS_DA_OP_REPLACE))
+-                      goto out_brelse;
+-
+-              trace_xfs_attr_leaf_replace(args);
+-              /*
+-               * Save the existing remote attr state so that the current
+-               * values reflect the state of the new attribute we are about to
+-               * add, not the attribute we just found and will remove later.
+-               */
+-              xfs_attr_save_rmt_blk(args);
+-              break;
+-      case 0:
+-              break;
+-      default:
+-              goto out_brelse;
+-      }
+-
+-      return xfs_attr3_leaf_add(bp, args);
+-
+-out_brelse:
+-      xfs_trans_brelse(args->trans, bp);
+-      return error;
+-}
+-
+ /*
+  * Return EEXIST if attr is found, or ENOATTR if not
+  */
diff --git a/queue-6.6/xfs-pass-the-exact-range-to-initialize-to-xfs_initialize_perag.patch b/queue-6.6/xfs-pass-the-exact-range-to-initialize-to-xfs_initialize_perag.patch
new file mode 100644 (file)
index 0000000..6c6e4f3
--- /dev/null
@@ -0,0 +1,193 @@
+From stable+bounces-113983-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:16 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:18 -0800
+Subject: xfs: pass the exact range to initialize to xfs_initialize_perag
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-18-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 82742f8c3f1a93787a05a00aca50c2a565231f84 upstream.
+
+[backport: dependency of 6a18765b]
+
+Currently only the new agcount is passed to xfs_initialize_perag, which
+requires lookups of existing AGs to skip them and complicates error
+handling.  Also pass the previous agcount so that the range that
+xfs_initialize_perag operates on is exactly defined.  That way the
+extra lookups can be avoided, and error handling can clean up the
+exact range from the old count to the last added perag structure.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c   |   28 ++++++----------------------
+ fs/xfs/libxfs/xfs_ag.h   |    5 +++--
+ fs/xfs/xfs_fsops.c       |   18 ++++++++----------
+ fs/xfs/xfs_log_recover.c |    5 +++--
+ fs/xfs/xfs_mount.c       |    4 ++--
+ 5 files changed, 22 insertions(+), 38 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -360,27 +360,16 @@ xfs_free_unused_perag_range(
+ int
+ xfs_initialize_perag(
+       struct xfs_mount        *mp,
+-      xfs_agnumber_t          agcount,
++      xfs_agnumber_t          old_agcount,
++      xfs_agnumber_t          new_agcount,
+       xfs_rfsblock_t          dblocks,
+       xfs_agnumber_t          *maxagi)
+ {
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          index;
+-      xfs_agnumber_t          first_initialised = NULLAGNUMBER;
+       int                     error;
+-      /*
+-       * Walk the current per-ag tree so we don't try to initialise AGs
+-       * that already exist (growfs case). Allocate and insert all the
+-       * AGs we don't find ready for initialisation.
+-       */
+-      for (index = 0; index < agcount; index++) {
+-              pag = xfs_perag_get(mp, index);
+-              if (pag) {
+-                      xfs_perag_put(pag);
+-                      continue;
+-              }
+-
++      for (index = old_agcount; index < new_agcount; index++) {
+               pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
+               if (!pag) {
+                       error = -ENOMEM;
+@@ -425,21 +414,17 @@ xfs_initialize_perag(
+               /* Active ref owned by mount indicates AG is online. */
+               atomic_set(&pag->pag_active_ref, 1);
+-              /* first new pag is fully initialized */
+-              if (first_initialised == NULLAGNUMBER)
+-                      first_initialised = index;
+-
+               /*
+                * Pre-calculated geometry
+                */
+-              pag->block_count = __xfs_ag_block_count(mp, index, agcount,
++              pag->block_count = __xfs_ag_block_count(mp, index, new_agcount,
+                               dblocks);
+               pag->min_block = XFS_AGFL_BLOCK(mp);
+               __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
+                               &pag->agino_max);
+       }
+-      index = xfs_set_inode_alloc(mp, agcount);
++      index = xfs_set_inode_alloc(mp, new_agcount);
+       if (maxagi)
+               *maxagi = index;
+@@ -455,8 +440,7 @@ out_remove_pag:
+ out_free_pag:
+       kmem_free(pag);
+ out_unwind_new_pags:
+-      /* unwind any prior newly initialized pags */
+-      xfs_free_unused_perag_range(mp, first_initialised, agcount);
++      xfs_free_unused_perag_range(mp, old_agcount, index);
+       return error;
+ }
+--- a/fs/xfs/libxfs/xfs_ag.h
++++ b/fs/xfs/libxfs/xfs_ag.h
+@@ -135,8 +135,9 @@ __XFS_AG_OPSTATE(agfl_needs_reset, AGFL_
+ void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart,
+                       xfs_agnumber_t agend);
+-int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
+-                      xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
++int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t old_agcount,
++              xfs_agnumber_t agcount, xfs_rfsblock_t dcount,
++              xfs_agnumber_t *maxagi);
+ int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
+ void xfs_free_perag(struct xfs_mount *mp);
+--- a/fs/xfs/xfs_fsops.c
++++ b/fs/xfs/xfs_fsops.c
+@@ -87,6 +87,7 @@ xfs_growfs_data_private(
+       struct xfs_mount        *mp,            /* mount point for filesystem */
+       struct xfs_growfs_data  *in)            /* growfs data input struct */
+ {
++      xfs_agnumber_t          oagcount = mp->m_sb.sb_agcount;
+       struct xfs_buf          *bp;
+       int                     error;
+       xfs_agnumber_t          nagcount;
+@@ -94,7 +95,6 @@ xfs_growfs_data_private(
+       xfs_rfsblock_t          nb, nb_div, nb_mod;
+       int64_t                 delta;
+       bool                    lastag_extended = false;
+-      xfs_agnumber_t          oagcount;
+       struct xfs_trans        *tp;
+       struct aghdr_init_data  id = {};
+       struct xfs_perag        *last_pag;
+@@ -138,16 +138,14 @@ xfs_growfs_data_private(
+       if (delta == 0)
+               return 0;
+-      oagcount = mp->m_sb.sb_agcount;
+-      /* allocate the new per-ag structures */
+-      if (nagcount > oagcount) {
+-              error = xfs_initialize_perag(mp, nagcount, nb, &nagimax);
+-              if (error)
+-                      return error;
+-      } else if (nagcount < oagcount) {
+-              /* TODO: shrinking the entire AGs hasn't yet completed */
++      /* TODO: shrinking the entire AGs hasn't yet completed */
++      if (nagcount < oagcount)
+               return -EINVAL;
+-      }
++
++      /* allocate the new per-ag structures */
++      error = xfs_initialize_perag(mp, oagcount, nagcount, nb, &nagimax);
++      if (error)
++              return error;
+       if (delta > 0)
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -3317,6 +3317,7 @@ xlog_do_recover(
+       struct xfs_mount        *mp = log->l_mp;
+       struct xfs_buf          *bp = mp->m_sb_bp;
+       struct xfs_sb           *sbp = &mp->m_sb;
++      xfs_agnumber_t          orig_agcount = sbp->sb_agcount;
+       int                     error;
+       trace_xfs_log_recover(log, head_blk, tail_blk);
+@@ -3365,8 +3366,8 @@ xlog_do_recover(
+       /* re-initialise in-core superblock and geometry structures */
+       mp->m_features |= xfs_sb_version_to_features(sbp);
+       xfs_reinit_percpu_counters(mp);
+-      error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks,
+-                      &mp->m_maxagi);
++      error = xfs_initialize_perag(mp, orig_agcount, sbp->sb_agcount,
++                      sbp->sb_dblocks, &mp->m_maxagi);
+       if (error) {
+               xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
+               return error;
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -797,8 +797,8 @@ xfs_mountfs(
+       /*
+        * Allocate and initialize the per-ag data.
+        */
+-      error = xfs_initialize_perag(mp, sbp->sb_agcount, mp->m_sb.sb_dblocks,
+-                      &mp->m_maxagi);
++      error = xfs_initialize_perag(mp, 0, sbp->sb_agcount,
++                      mp->m_sb.sb_dblocks, &mp->m_maxagi);
+       if (error) {
+               xfs_warn(mp, "Failed per-ag init: %d", error);
+               goto out_free_dir;
diff --git a/queue-6.6/xfs-reduce-unnecessary-searches-when-searching-for-the-best-extents.patch b/queue-6.6/xfs-reduce-unnecessary-searches-when-searching-for-the-best-extents.patch
new file mode 100644 (file)
index 0000000..60edee0
--- /dev/null
@@ -0,0 +1,61 @@
+From stable+bounces-113987-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:28 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:23 -0800
+Subject: xfs: Reduce unnecessary searches when searching for the best extents
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-23-catherine.hoang@oracle.com>
+
+From: Chi Zhiling <chizhiling@kylinos.cn>
+
+commit 3ef22684038aa577c10972ee9c6a2455f5fac941 upstream.
+
+Recently, we found that the CPU spent a lot of time in
+xfs_alloc_ag_vextent_size when the filesystem has millions of fragmented
+spaces.
+
+The reason is that we conducted much extra searching for extents that
+could not yield a better result, and these searches would cost a lot of
+time when there were millions of extents to search through. Even if we
+get the same result length, we don't switch our choice to the new one,
+so we can definitely terminate the search early.
+
+Since the result length cannot exceed the found length, when the found
+length equals the best result length we already have, we can conclude
+the search.
+
+We did a test in that filesystem:
+[root@localhost ~]# xfs_db -c freesp /dev/vdb
+   from      to extents  blocks    pct
+      1       1     215     215   0.01
+      2       3  994476 1988952  99.99
+
+Before this patch:
+ 0)               |  xfs_alloc_ag_vextent_size [xfs]() {
+ 0) * 15597.94 us |  }
+
+After this patch:
+ 0)               |  xfs_alloc_ag_vextent_size [xfs]() {
+ 0)   19.176 us    |  }
+
+Signed-off-by: Chi Zhiling <chizhiling@kylinos.cn>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -1783,7 +1783,7 @@ restart:
+                               error = -EFSCORRUPTED;
+                               goto error0;
+                       }
+-                      if (flen < bestrlen)
++                      if (flen <= bestrlen)
+                               break;
+                       busy = xfs_alloc_compute_aligned(args, fbno, flen,
+                                       &rbno, &rlen, &busy_gen);
diff --git a/queue-6.6/xfs-remove-empty-declartion-in-header-file.patch b/queue-6.6/xfs-remove-empty-declartion-in-header-file.patch
new file mode 100644 (file)
index 0000000..38c39df
--- /dev/null
@@ -0,0 +1,35 @@
+From stable+bounces-113982-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:13 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:17 -0800
+Subject: xfs: Remove empty declartion in header file
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-17-catherine.hoang@oracle.com>
+
+From: Zhang Zekun <zhangzekun11@huawei.com>
+
+commit f6225eebd76f371dab98b4d1c1a7c1e255190aef upstream.
+
+The definition of xfs_attr_use_log_assist() has been removed since
+commit d9c61ccb3b09 ("xfs: move xfs_attr_use_log_assist out of xfs_log.c").
+So, Remove the empty declartion in header files.
+
+Signed-off-by: Zhang Zekun <zhangzekun11@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log.h |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -161,6 +161,5 @@ bool         xlog_force_shutdown(struct xlog *
+ void xlog_use_incompat_feat(struct xlog *log);
+ void xlog_drop_incompat_feat(struct xlog *log);
+-int xfs_attr_use_log_assist(struct xfs_mount *mp);
+ #endif        /* __XFS_LOG_H__ */
diff --git a/queue-6.6/xfs-return-bool-from-xfs_attr3_leaf_add.patch b/queue-6.6/xfs-return-bool-from-xfs_attr3_leaf_add.patch
new file mode 100644 (file)
index 0000000..7be996d
--- /dev/null
@@ -0,0 +1,222 @@
+From stable+bounces-113974-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:56 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:09 -0800
+Subject: xfs: return bool from xfs_attr3_leaf_add
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-9-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 346c1d46d4c631c0c88592d371f585214d714da4 upstream.
+
+[backport: dependency of a5f7334 and b3f4e84]
+
+xfs_attr3_leaf_add only has two potential return values, indicating if the
+entry could be added or not.  Replace the errno return with a bool so that
+ENOSPC from it can't easily be confused with a real ENOSPC.
+
+Remove the return value from the xfs_attr3_leaf_add_work helper entirely,
+as it always return 0.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c      |   13 +++++--------
+ fs/xfs/libxfs/xfs_attr_leaf.c |   37 +++++++++++++++++++------------------
+ fs/xfs/libxfs/xfs_attr_leaf.h |    2 +-
+ 3 files changed, 25 insertions(+), 27 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -503,10 +503,7 @@ xfs_attr_leaf_addname(
+        * or perform more xattr manipulations. Otherwise there is nothing more
+        * to do and we can return success.
+        */
+-      error = xfs_attr3_leaf_add(bp, args);
+-      if (error) {
+-              if (error != -ENOSPC)
+-                      return error;
++      if (!xfs_attr3_leaf_add(bp, args)) {
+               error = xfs_attr3_leaf_to_node(args);
+               if (error)
+                       return error;
+@@ -520,7 +517,7 @@ xfs_attr_leaf_addname(
+       }
+       trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
+-      return error;
++      return 0;
+ out_brelse:
+       xfs_trans_brelse(args->trans, bp);
+@@ -1393,21 +1390,21 @@ xfs_attr_node_try_addname(
+ {
+       struct xfs_da_state             *state = attr->xattri_da_state;
+       struct xfs_da_state_blk         *blk;
+-      int                             error;
++      int                             error = 0;
+       trace_xfs_attr_node_addname(state->args);
+       blk = &state->path.blk[state->path.active-1];
+       ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+-      error = xfs_attr3_leaf_add(blk->bp, state->args);
+-      if (error == -ENOSPC) {
++      if (!xfs_attr3_leaf_add(blk->bp, state->args)) {
+               if (state->path.active == 1) {
+                       /*
+                        * Its really a single leaf node, but it had
+                        * out-of-line values so it looked like it *might*
+                        * have been a b-tree. Let the caller deal with this.
+                        */
++                      error = -ENOSPC;
+                       goto out;
+               }
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -46,7 +46,7 @@
+  */
+ STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
+                                xfs_dablk_t which_block, struct xfs_buf **bpp);
+-STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
++STATIC void xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
+                                  struct xfs_attr3_icleaf_hdr *ichdr,
+                                  struct xfs_da_args *args, int freemap_index);
+ STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
+@@ -990,10 +990,8 @@ xfs_attr_shortform_to_leaf(
+               }
+               error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
+               ASSERT(error == -ENOATTR);
+-              error = xfs_attr3_leaf_add(bp, &nargs);
+-              ASSERT(error != -ENOSPC);
+-              if (error)
+-                      goto out;
++              if (!xfs_attr3_leaf_add(bp, &nargs))
++                      ASSERT(0);
+               sfe = xfs_attr_sf_nextentry(sfe);
+       }
+       error = 0;
+@@ -1349,8 +1347,9 @@ xfs_attr3_leaf_split(
+       struct xfs_da_state_blk *oldblk,
+       struct xfs_da_state_blk *newblk)
+ {
+-      xfs_dablk_t blkno;
+-      int error;
++      bool                    added;
++      xfs_dablk_t             blkno;
++      int                     error;
+       trace_xfs_attr_leaf_split(state->args);
+@@ -1385,10 +1384,10 @@ xfs_attr3_leaf_split(
+        */
+       if (state->inleaf) {
+               trace_xfs_attr_leaf_add_old(state->args);
+-              error = xfs_attr3_leaf_add(oldblk->bp, state->args);
++              added = xfs_attr3_leaf_add(oldblk->bp, state->args);
+       } else {
+               trace_xfs_attr_leaf_add_new(state->args);
+-              error = xfs_attr3_leaf_add(newblk->bp, state->args);
++              added = xfs_attr3_leaf_add(newblk->bp, state->args);
+       }
+       /*
+@@ -1396,13 +1395,15 @@ xfs_attr3_leaf_split(
+        */
+       oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
+       newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
+-      return error;
++      if (!added)
++              return -ENOSPC;
++      return 0;
+ }
+ /*
+  * Add a name to the leaf attribute list structure.
+  */
+-int
++bool
+ xfs_attr3_leaf_add(
+       struct xfs_buf          *bp,
+       struct xfs_da_args      *args)
+@@ -1411,6 +1412,7 @@ xfs_attr3_leaf_add(
+       struct xfs_attr3_icleaf_hdr ichdr;
+       int                     tablesize;
+       int                     entsize;
++      bool                    added = true;
+       int                     sum;
+       int                     tmp;
+       int                     i;
+@@ -1439,7 +1441,7 @@ xfs_attr3_leaf_add(
+               if (ichdr.freemap[i].base < ichdr.firstused)
+                       tmp += sizeof(xfs_attr_leaf_entry_t);
+               if (ichdr.freemap[i].size >= tmp) {
+-                      tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
++                      xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
+                       goto out_log_hdr;
+               }
+               sum += ichdr.freemap[i].size;
+@@ -1451,7 +1453,7 @@ xfs_attr3_leaf_add(
+        * no good and we should just give up.
+        */
+       if (!ichdr.holes && sum < entsize)
+-              return -ENOSPC;
++              return false;
+       /*
+        * Compact the entries to coalesce free space.
+@@ -1464,24 +1466,24 @@ xfs_attr3_leaf_add(
+        * free region, in freemap[0].  If it is not big enough, give up.
+        */
+       if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
+-              tmp = -ENOSPC;
++              added = false;
+               goto out_log_hdr;
+       }
+-      tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
++      xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
+ out_log_hdr:
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
+       xfs_trans_log_buf(args->trans, bp,
+               XFS_DA_LOGRANGE(leaf, &leaf->hdr,
+                               xfs_attr3_leaf_hdr_size(leaf)));
+-      return tmp;
++      return added;
+ }
+ /*
+  * Add a name to a leaf attribute list structure.
+  */
+-STATIC int
++STATIC void
+ xfs_attr3_leaf_add_work(
+       struct xfs_buf          *bp,
+       struct xfs_attr3_icleaf_hdr *ichdr,
+@@ -1599,7 +1601,6 @@ xfs_attr3_leaf_add_work(
+               }
+       }
+       ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
+-      return 0;
+ }
+ /*
+--- a/fs/xfs/libxfs/xfs_attr_leaf.h
++++ b/fs/xfs/libxfs/xfs_attr_leaf.h
+@@ -78,7 +78,7 @@ int  xfs_attr3_leaf_split(struct xfs_da_s
+ int   xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
+                                       struct xfs_da_args *args);
+ int   xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
+-int   xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
++bool  xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
+                                struct xfs_da_args *args);
+ int   xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
+                                   struct xfs_da_args *args);
diff --git a/queue-6.6/xfs-skip-background-cowblock-trims-on-inodes-open-for-write.patch b/queue-6.6/xfs-skip-background-cowblock-trims-on-inodes-open-for-write.patch
new file mode 100644 (file)
index 0000000..d567d6c
--- /dev/null
@@ -0,0 +1,129 @@
+From stable+bounces-113971-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:53 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:06 -0800
+Subject: xfs: skip background cowblock trims on inodes open for write
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-6-catherine.hoang@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 90a71daaf73f5d39bb0cbb3c7ab6af942fe6233e upstream.
+
+The background blockgc scanner runs on a 5m interval by default and
+trims preallocation (post-eof and cow fork) from inodes that are
+otherwise idle. Idle effectively means that iolock can be acquired
+without blocking and that the inode has no dirty pagecache or I/O in
+flight.
+
+This simple mechanism and heuristic has worked fairly well for
+post-eof speculative preallocations. Support for reflink and COW
+fork preallocations came sometime later and plugged into the same
+mechanism, with similar heuristics. Some recent testing has shown
+that COW fork preallocation may be notably more sensitive to blockgc
+processing than post-eof preallocation, however.
+
+For example, consider an 8GB reflinked file with a COW extent size
+hint of 1MB. A worst case fully randomized overwrite of this file
+results in ~8k extents of an average size of ~1MB. If the same
+workload is interrupted a couple times for blockgc processing
+(assuming the file goes idle), the resulting extent count explodes
+to over 100k extents with an average size <100kB. This is
+significantly worse than ideal and essentially defeats the COW
+extent size hint mechanism.
+
+While this particular test is instrumented, it reflects a fairly
+reasonable pattern in practice where random I/Os might spread out
+over a large period of time with varying periods of (in)activity.
+For example, consider a cloned disk image file for a VM or container
+with long uptime and variable and bursty usage. A background blockgc
+scan that races and processes the image file when it happens to be
+clean and idle can have a significant effect on the future
+fragmentation level of the file, even when still in use.
+
+To help combat this, update the heuristic to skip cowblocks inodes
+that are currently opened for write access during non-sync blockgc
+scans. This allows COW fork preallocations to persist for as long as
+possible unless otherwise needed for functional purposes (i.e. a
+sync scan), the file is idle and closed, or the inode is being
+evicted from cache. While here, update the comments to help
+distinguish performance oriented heuristics from the logic that
+exists to maintain functional correctness.
+
+Suggested-by: Darrick Wong <djwong@kernel.org>
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c |   31 +++++++++++++++++++++++--------
+ 1 file changed, 23 insertions(+), 8 deletions(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -1234,14 +1234,17 @@ xfs_inode_clear_eofblocks_tag(
+ }
+ /*
+- * Set ourselves up to free CoW blocks from this file.  If it's already clean
+- * then we can bail out quickly, but otherwise we must back off if the file
+- * is undergoing some kind of write.
++ * Prepare to free COW fork blocks from an inode.
+  */
+ static bool
+ xfs_prep_free_cowblocks(
+-      struct xfs_inode        *ip)
++      struct xfs_inode        *ip,
++      struct xfs_icwalk       *icw)
+ {
++      bool                    sync;
++
++      sync = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC);
++
+       /*
+        * Just clear the tag if we have an empty cow fork or none at all. It's
+        * possible the inode was fully unshared since it was originally tagged.
+@@ -1253,9 +1256,21 @@ xfs_prep_free_cowblocks(
+       }
+       /*
+-       * If the mapping is dirty or under writeback we cannot touch the
+-       * CoW fork.  Leave it alone if we're in the midst of a directio.
++       * A cowblocks trim of an inode can have a significant effect on
++       * fragmentation even when a reasonable COW extent size hint is set.
++       * Therefore, we prefer to not process cowblocks unless they are clean
++       * and idle. We can never process a cowblocks inode that is dirty or has
++       * in-flight I/O under any circumstances, because outstanding writeback
++       * or dio expects targeted COW fork blocks exist through write
++       * completion where they can be remapped into the data fork.
++       *
++       * Therefore, the heuristic used here is to never process inodes
++       * currently opened for write from background (i.e. non-sync) scans. For
++       * sync scans, use the pagecache/dio state of the inode to ensure we
++       * never free COW fork blocks out from under pending I/O.
+        */
++      if (!sync && inode_is_open_for_write(VFS_I(ip)))
++              return false;
+       if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
+           mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+           mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
+@@ -1291,7 +1306,7 @@ xfs_inode_free_cowblocks(
+       if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS))
+               return 0;
+-      if (!xfs_prep_free_cowblocks(ip))
++      if (!xfs_prep_free_cowblocks(ip, icw))
+               return 0;
+       if (!xfs_icwalk_match(ip, icw))
+@@ -1320,7 +1335,7 @@ xfs_inode_free_cowblocks(
+        * Check again, nobody else should be able to dirty blocks or change
+        * the reflink iflag now that we have the first two locks held.
+        */
+-      if (xfs_prep_free_cowblocks(ip))
++      if (xfs_prep_free_cowblocks(ip, icw))
+               ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
+       return ret;
+ }
diff --git a/queue-6.6/xfs-streamline-xfs_filestream_pick_ag.patch b/queue-6.6/xfs-streamline-xfs_filestream_pick_ag.patch
new file mode 100644 (file)
index 0000000..b64ccb9
--- /dev/null
@@ -0,0 +1,164 @@
+From stable+bounces-113988-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:29 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:24 -0800
+Subject: xfs: streamline xfs_filestream_pick_ag
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-24-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 81a1e1c32ef474c20ccb9f730afe1ac25b1c62a4 upstream.
+
+Directly return the error from xfs_bmap_longest_free_extent instead
+of breaking from the loop and handling it there, and use a done
+label to directly jump to the exist when we found a suitable perag
+structure to reduce the indentation level and pag/max_pag check
+complexity in the tail of the function.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_filestream.c |   96 +++++++++++++++++++++++-------------------------
+ 1 file changed, 46 insertions(+), 50 deletions(-)
+
+--- a/fs/xfs/xfs_filestream.c
++++ b/fs/xfs/xfs_filestream.c
+@@ -67,22 +67,28 @@ xfs_filestream_pick_ag(
+       xfs_extlen_t            minfree, maxfree = 0;
+       xfs_agnumber_t          agno;
+       bool                    first_pass = true;
+-      int                     err;
+       /* 2% of an AG's blocks must be free for it to be chosen. */
+       minfree = mp->m_sb.sb_agblocks / 50;
+ restart:
+       for_each_perag_wrap(mp, start_agno, agno, pag) {
++              int             err;
++
+               trace_xfs_filestream_scan(pag, pino);
++
+               *longest = 0;
+               err = xfs_bmap_longest_free_extent(pag, NULL, longest);
+               if (err) {
+-                      if (err != -EAGAIN)
+-                              break;
+-                      /* Couldn't lock the AGF, skip this AG. */
+-                      err = 0;
+-                      continue;
++                      if (err == -EAGAIN) {
++                              /* Couldn't lock the AGF, skip this AG. */
++                              err = 0;
++                              continue;
++                      }
++                      xfs_perag_rele(pag);
++                      if (max_pag)
++                              xfs_perag_rele(max_pag);
++                      return err;
+               }
+               /* Keep track of the AG with the most free blocks. */
+@@ -107,7 +113,9 @@ restart:
+                            !(flags & XFS_PICK_USERDATA) ||
+                            (flags & XFS_PICK_LOWSPACE))) {
+                               /* Break out, retaining the reference on the AG. */
+-                              break;
++                              if (max_pag)
++                                      xfs_perag_rele(max_pag);
++                              goto done;
+                       }
+               }
+@@ -115,56 +123,44 @@ restart:
+               atomic_dec(&pag->pagf_fstrms);
+       }
+-      if (err) {
+-              xfs_perag_rele(pag);
+-              if (max_pag)
+-                      xfs_perag_rele(max_pag);
+-              return err;
++      /*
++       * Allow a second pass to give xfs_bmap_longest_free_extent() another
++       * attempt at locking AGFs that it might have skipped over before we
++       * fail.
++       */
++      if (first_pass) {
++              first_pass = false;
++              goto restart;
+       }
+-      if (!pag) {
+-              /*
+-               * Allow a second pass to give xfs_bmap_longest_free_extent()
+-               * another attempt at locking AGFs that it might have skipped
+-               * over before we fail.
+-               */
+-              if (first_pass) {
+-                      first_pass = false;
+-                      goto restart;
+-              }
+-
+-              /*
+-               * We must be low on data space, so run a final lowspace
+-               * optimised selection pass if we haven't already.
+-               */
+-              if (!(flags & XFS_PICK_LOWSPACE)) {
+-                      flags |= XFS_PICK_LOWSPACE;
+-                      goto restart;
+-              }
+-
+-              /*
+-               * No unassociated AGs are available, so select the AG with the
+-               * most free space, regardless of whether it's already in use by
+-               * another filestream. It none suit, just use whatever AG we can
+-               * grab.
+-               */
+-              if (!max_pag) {
+-                      for_each_perag_wrap(args->mp, 0, start_agno, pag) {
+-                              max_pag = pag;
+-                              break;
+-                      }
++      /*
++       * We must be low on data space, so run a final lowspace optimised
++       * selection pass if we haven't already.
++       */
++      if (!(flags & XFS_PICK_LOWSPACE)) {
++              flags |= XFS_PICK_LOWSPACE;
++              goto restart;
++      }
+-                      /* Bail if there are no AGs at all to select from. */
+-                      if (!max_pag)
+-                              return -ENOSPC;
++      /*
++       * No unassociated AGs are available, so select the AG with the most
++       * free space, regardless of whether it's already in use by another
++       * filestream. It none suit, just use whatever AG we can grab.
++       */
++      if (!max_pag) {
++              for_each_perag_wrap(args->mp, 0, start_agno, pag) {
++                      max_pag = pag;
++                      break;
+               }
+-              pag = max_pag;
+-              atomic_inc(&pag->pagf_fstrms);
+-      } else if (max_pag) {
+-              xfs_perag_rele(max_pag);
++              /* Bail if there are no AGs at all to select from. */
++              if (!max_pag)
++                      return -ENOSPC;
+       }
++      pag = max_pag;
++      atomic_inc(&pag->pagf_fstrms);
++done:
+       trace_xfs_filestream_pick(pag, pino);
+       args->pag = pag;
+       return 0;
diff --git a/queue-6.6/xfs-support-lowmode-allocations-in-xfs_bmap_exact_minlen_extent_alloc.patch b/queue-6.6/xfs-support-lowmode-allocations-in-xfs_bmap_exact_minlen_extent_alloc.patch
new file mode 100644 (file)
index 0000000..5488296
--- /dev/null
@@ -0,0 +1,46 @@
+From stable+bounces-113980-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:12 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:15 -0800
+Subject: xfs: support lowmode allocations in xfs_bmap_exact_minlen_extent_alloc
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-15-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 6aac77059881e4419df499392c995bf02fb9630b upstream.
+
+Currently the debug-only xfs_bmap_exact_minlen_extent_alloc allocation
+variant fails to drop into the lowmode last resort allocator, and
+thus can sometimes fail allocations for which the caller has a
+transaction block reservation.
+
+Fix this by using xfs_bmap_btalloc_low_space to do the actual allocation.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -3412,7 +3412,13 @@ xfs_bmap_exact_minlen_extent_alloc(
+        */
+       ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0);
+-      return xfs_alloc_vextent_first_ag(args, ap->blkno);
++      /*
++       * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG
++       * iteration and then drops args->total to args->minlen, which might be
++       * required to find an allocation for the transaction reservation when
++       * the file system is very full.
++       */
++      return xfs_bmap_btalloc_low_space(ap, args);
+ }
+ /*
diff --git a/queue-6.6/xfs-update-the-file-system-geometry-after-recoverying-superblock-buffers.patch b/queue-6.6/xfs-update-the-file-system-geometry-after-recoverying-superblock-buffers.patch
new file mode 100644 (file)
index 0000000..fc22ce8
--- /dev/null
@@ -0,0 +1,133 @@
+From stable+bounces-113984-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:18 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:19 -0800
+Subject: xfs: update the file system geometry after recoverying superblock buffers
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-19-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 6a18765b54e2e52aebcdb84c3b4f4d1f7cb2c0ca upstream.
+
+Primary superblock buffers that change the file system geometry after a
+growfs operation can affect the operation of later CIL checkpoints that
+make use of the newly added space and allocation groups.
+
+Apply the changes to the in-memory structures as part of recovery pass 2,
+to ensure recovery works fine for such cases.
+
+In the future we should apply the logic to other updates such as features
+bits as well.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf_item_recover.c |   52 ++++++++++++++++++++++++++++++++++++++++++
+ fs/xfs/xfs_log_recover.c      |    8 ------
+ 2 files changed, 52 insertions(+), 8 deletions(-)
+
+--- a/fs/xfs/xfs_buf_item_recover.c
++++ b/fs/xfs/xfs_buf_item_recover.c
+@@ -22,6 +22,9 @@
+ #include "xfs_inode.h"
+ #include "xfs_dir2.h"
+ #include "xfs_quota.h"
++#include "xfs_alloc.h"
++#include "xfs_ag.h"
++#include "xfs_sb.h"
+ /*
+  * This is the number of entries in the l_buf_cancel_table used during
+@@ -685,6 +688,49 @@ xlog_recover_do_inode_buffer(
+ }
+ /*
++ * Update the in-memory superblock and perag structures from the primary SB
++ * buffer.
++ *
++ * This is required because transactions running after growfs may require the
++ * updated values to be set in a previous fully commit transaction.
++ */
++static int
++xlog_recover_do_primary_sb_buffer(
++      struct xfs_mount                *mp,
++      struct xlog_recover_item        *item,
++      struct xfs_buf                  *bp,
++      struct xfs_buf_log_format       *buf_f,
++      xfs_lsn_t                       current_lsn)
++{
++      struct xfs_dsb                  *dsb = bp->b_addr;
++      xfs_agnumber_t                  orig_agcount = mp->m_sb.sb_agcount;
++      int                             error;
++
++      xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
++
++      /*
++       * Update the in-core super block from the freshly recovered on-disk one.
++       */
++      xfs_sb_from_disk(&mp->m_sb, dsb);
++
++      /*
++       * Initialize the new perags, and also update various block and inode
++       * allocator setting based off the number of AGs or total blocks.
++       * Because of the latter this also needs to happen if the agcount did
++       * not change.
++       */
++      error = xfs_initialize_perag(mp, orig_agcount,
++                      mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks,
++                      &mp->m_maxagi);
++      if (error) {
++              xfs_warn(mp, "Failed recovery per-ag init: %d", error);
++              return error;
++      }
++      mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
++      return 0;
++}
++
++/*
+  * V5 filesystems know the age of the buffer on disk being recovered. We can
+  * have newer objects on disk than we are replaying, and so for these cases we
+  * don't want to replay the current change as that will make the buffer contents
+@@ -967,6 +1013,12 @@ xlog_recover_buf_commit_pass2(
+               dirty = xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
+               if (!dirty)
+                       goto out_release;
++      } else if ((xfs_blft_from_flags(buf_f) & XFS_BLFT_SB_BUF) &&
++                      xfs_buf_daddr(bp) == 0) {
++              error = xlog_recover_do_primary_sb_buffer(mp, item, bp, buf_f,
++                              current_lsn);
++              if (error)
++                      goto out_release;
+       } else {
+               xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
+       }
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -3317,7 +3317,6 @@ xlog_do_recover(
+       struct xfs_mount        *mp = log->l_mp;
+       struct xfs_buf          *bp = mp->m_sb_bp;
+       struct xfs_sb           *sbp = &mp->m_sb;
+-      xfs_agnumber_t          orig_agcount = sbp->sb_agcount;
+       int                     error;
+       trace_xfs_log_recover(log, head_blk, tail_blk);
+@@ -3366,13 +3365,6 @@ xlog_do_recover(
+       /* re-initialise in-core superblock and geometry structures */
+       mp->m_features |= xfs_sb_version_to_features(sbp);
+       xfs_reinit_percpu_counters(mp);
+-      error = xfs_initialize_perag(mp, orig_agcount, sbp->sb_agcount,
+-                      sbp->sb_dblocks, &mp->m_maxagi);
+-      if (error) {
+-              xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
+-              return error;
+-      }
+-      mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+       /* Normal transactions can now occur */
+       clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
diff --git a/queue-6.6/xfs-update-the-pag-for-the-last-ag-at-recovery-time.patch b/queue-6.6/xfs-update-the-pag-for-the-last-ag-at-recovery-time.patch
new file mode 100644 (file)
index 0000000..187571e
--- /dev/null
@@ -0,0 +1,107 @@
+From stable+bounces-113986-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:27 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:22 -0800
+Subject: xfs: update the pag for the last AG at recovery time
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-22-catherine.hoang@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 4a201dcfa1ff0dcfe4348c40f3ad8bd68b97eb6c upstream.
+
+Currently log recovery never updates the in-core perag values for the
+last allocation group when they were grown by growfs.  This leads to
+btree record validation failures for the alloc, ialloc or finotbt
+trees if a transaction references this new space.
+
+Found by Brian's new growfs recovery stress test.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_ag.c        |   17 +++++++++++++++++
+ fs/xfs/libxfs/xfs_ag.h        |    1 +
+ fs/xfs/xfs_buf_item_recover.c |   19 ++++++++++++++++---
+ 3 files changed, 34 insertions(+), 3 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_ag.c
++++ b/fs/xfs/libxfs/xfs_ag.c
+@@ -358,6 +358,23 @@ xfs_free_unused_perag_range(
+ }
+ int
++xfs_update_last_ag_size(
++      struct xfs_mount        *mp,
++      xfs_agnumber_t          prev_agcount)
++{
++      struct xfs_perag        *pag = xfs_perag_grab(mp, prev_agcount - 1);
++
++      if (!pag)
++              return -EFSCORRUPTED;
++      pag->block_count = __xfs_ag_block_count(mp, prev_agcount - 1,
++                      mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks);
++      __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
++                      &pag->agino_max);
++      xfs_perag_rele(pag);
++      return 0;
++}
++
++int
+ xfs_initialize_perag(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          old_agcount,
+--- a/fs/xfs/libxfs/xfs_ag.h
++++ b/fs/xfs/libxfs/xfs_ag.h
+@@ -140,6 +140,7 @@ int xfs_initialize_perag(struct xfs_moun
+               xfs_agnumber_t *maxagi);
+ int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
+ void xfs_free_perag(struct xfs_mount *mp);
++int xfs_update_last_ag_size(struct xfs_mount *mp, xfs_agnumber_t prev_agcount);
+ /* Passive AG references */
+ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
+--- a/fs/xfs/xfs_buf_item_recover.c
++++ b/fs/xfs/xfs_buf_item_recover.c
+@@ -708,6 +708,11 @@ xlog_recover_do_primary_sb_buffer(
+       xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
++      if (orig_agcount == 0) {
++              xfs_alert(mp, "Trying to grow file system without AGs");
++              return -EFSCORRUPTED;
++      }
++
+       /*
+        * Update the in-core super block from the freshly recovered on-disk one.
+        */
+@@ -719,14 +724,22 @@ xlog_recover_do_primary_sb_buffer(
+       }
+       /*
++       * Growfs can also grow the last existing AG.  In this case we also need
++       * to update the length in the in-core perag structure and values
++       * depending on it.
++       */
++      error = xfs_update_last_ag_size(mp, orig_agcount);
++      if (error)
++              return error;
++
++      /*
+        * Initialize the new perags, and also update various block and inode
+        * allocator setting based off the number of AGs or total blocks.
+        * Because of the latter this also needs to happen if the agcount did
+        * not change.
+        */
+-      error = xfs_initialize_perag(mp, orig_agcount,
+-                      mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks,
+-                      &mp->m_maxagi);
++      error = xfs_initialize_perag(mp, orig_agcount, mp->m_sb.sb_agcount,
++                      mp->m_sb.sb_dblocks, &mp->m_maxagi);
+       if (error) {
+               xfs_warn(mp, "Failed recovery per-ag init: %d", error);
+               return error;
diff --git a/queue-6.6/xfs-use-try_cmpxchg-in-xlog_cil_insert_pcp_aggregate.patch b/queue-6.6/xfs-use-try_cmpxchg-in-xlog_cil_insert_pcp_aggregate.patch
new file mode 100644 (file)
index 0000000..4bbf5c8
--- /dev/null
@@ -0,0 +1,65 @@
+From stable+bounces-113981-greg=kroah.com@vger.kernel.org Wed Feb  5 22:41:10 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:16 -0800
+Subject: xfs: Use try_cmpxchg() in xlog_cil_insert_pcp_aggregate()
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-16-catherine.hoang@oracle.com>
+
+From: Uros Bizjak <ubizjak@gmail.com>
+
+commit 20195d011c840b01fa91a85ebcd099ca95fbf8fc upstream.
+
+Use !try_cmpxchg instead of cmpxchg (*ptr, old, new) != old in
+xlog_cil_insert_pcp_aggregate().  x86 CMPXCHG instruction returns
+success in ZF flag, so this change saves a compare after cmpxchg.
+
+Also, try_cmpxchg implicitly assigns old *ptr value to "old" when
+cmpxchg fails. There is no need to re-read the value in the loop.
+
+Note that the value from *ptr should be read using READ_ONCE to
+prevent the compiler from merging, refetching or reordering the read.
+
+No functional change intended.
+
+Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
+Reviewed-by: Christoph Hellwig <hch@infradead.org>
+Cc: Chandan Babu R <chandan.babu@oracle.com>
+Cc: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_cil.c |   11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -156,7 +156,6 @@ xlog_cil_insert_pcp_aggregate(
+       struct xfs_cil          *cil,
+       struct xfs_cil_ctx      *ctx)
+ {
+-      struct xlog_cil_pcp     *cilpcp;
+       int                     cpu;
+       int                     count = 0;
+@@ -171,13 +170,11 @@ xlog_cil_insert_pcp_aggregate(
+        * structures that could have a nonzero space_used.
+        */
+       for_each_cpu(cpu, &ctx->cil_pcpmask) {
+-              int     old, prev;
++              struct xlog_cil_pcp     *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
++              int                     old = READ_ONCE(cilpcp->space_used);
+-              cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
+-              do {
+-                      old = cilpcp->space_used;
+-                      prev = cmpxchg(&cilpcp->space_used, old, 0);
+-              } while (old != prev);
++              while (!try_cmpxchg(&cilpcp->space_used, &old, 0))
++                      ;
+               count += old;
+       }
+       atomic_add(count, &ctx->space_used);
diff --git a/queue-6.6/xfs-validate-inumber-in-xfs_iget.patch b/queue-6.6/xfs-validate-inumber-in-xfs_iget.patch
new file mode 100644 (file)
index 0000000..cdf1bfe
--- /dev/null
@@ -0,0 +1,35 @@
+From stable+bounces-113968-greg=kroah.com@vger.kernel.org Wed Feb  5 22:40:46 2025
+From: Catherine Hoang <catherine.hoang@oracle.com>
+Date: Wed,  5 Feb 2025 13:40:03 -0800
+Subject: xfs: validate inumber in xfs_iget
+To: stable@vger.kernel.org
+Cc: xfs-stable@lists.linux.dev
+Message-ID: <20250205214025.72516-3-catherine.hoang@oracle.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit 05aba1953f4a6e2b48e13c610e8a4545ba4ef509 upstream.
+
+Actually use the inumber validator to check the argument passed in here.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Catherine Hoang <catherine.hoang@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_icache.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_icache.c
++++ b/fs/xfs/xfs_icache.c
+@@ -748,7 +748,7 @@ xfs_iget(
+       ASSERT((lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) == 0);
+       /* reject inode numbers outside existing AGs */
+-      if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
++      if (!xfs_verify_ino(mp, ino))
+               return -EINVAL;
+       XFS_STATS_INC(mp, xs_ig_attempts);