]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 26 Oct 2022 14:52:12 +0000 (16:52 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 26 Oct 2022 14:52:12 +0000 (16:52 +0200)
added patches:
xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch
xfs-check-owner-of-dir3-blocks.patch
xfs-check-owner-of-dir3-data-blocks.patch
xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch
xfs-factor-common-ail-item-deletion-code.patch
xfs-factor-out-a-new-xfs_log_force_inode-helper.patch
xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch
xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch
xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch
xfs-fix-use-after-free-on-cil-context-on-shutdown.patch
xfs-lower-cil-flush-limit-for-large-logs.patch
xfs-move-inode-flush-to-the-sync-workqueue.patch
xfs-open-code-insert-range-extent-split-helper.patch
xfs-preserve-default-grace-interval-during-quotacheck.patch
xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch
xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch
xfs-remove-the-xfs_dq_logitem_t-typedef.patch
xfs-remove-the-xfs_qoff_logitem_t-typedef.patch
xfs-replace-function-declaration-by-actual-definition.patch
xfs-rework-collapse-range-into-an-atomic-operation.patch
xfs-rework-insert-range-into-an-atomic-operation.patch
xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch
xfs-throttle-commits-on-delayed-background-cil-push.patch
xfs-trylock-underlying-buffer-on-dquot-flush.patch
xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch
xfs-xfs_buf_corruption_error-should-take-__this_address.patch

28 files changed:
queue-4.14/series [new file with mode: 0644]
queue-5.4/series [new file with mode: 0644]
queue-5.4/xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch [new file with mode: 0644]
queue-5.4/xfs-check-owner-of-dir3-blocks.patch [new file with mode: 0644]
queue-5.4/xfs-check-owner-of-dir3-data-blocks.patch [new file with mode: 0644]
queue-5.4/xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch [new file with mode: 0644]
queue-5.4/xfs-factor-common-ail-item-deletion-code.patch [new file with mode: 0644]
queue-5.4/xfs-factor-out-a-new-xfs_log_force_inode-helper.patch [new file with mode: 0644]
queue-5.4/xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch [new file with mode: 0644]
queue-5.4/xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch [new file with mode: 0644]
queue-5.4/xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch [new file with mode: 0644]
queue-5.4/xfs-fix-use-after-free-on-cil-context-on-shutdown.patch [new file with mode: 0644]
queue-5.4/xfs-lower-cil-flush-limit-for-large-logs.patch [new file with mode: 0644]
queue-5.4/xfs-move-inode-flush-to-the-sync-workqueue.patch [new file with mode: 0644]
queue-5.4/xfs-open-code-insert-range-extent-split-helper.patch [new file with mode: 0644]
queue-5.4/xfs-preserve-default-grace-interval-during-quotacheck.patch [new file with mode: 0644]
queue-5.4/xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch [new file with mode: 0644]
queue-5.4/xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch [new file with mode: 0644]
queue-5.4/xfs-remove-the-xfs_dq_logitem_t-typedef.patch [new file with mode: 0644]
queue-5.4/xfs-remove-the-xfs_qoff_logitem_t-typedef.patch [new file with mode: 0644]
queue-5.4/xfs-replace-function-declaration-by-actual-definition.patch [new file with mode: 0644]
queue-5.4/xfs-rework-collapse-range-into-an-atomic-operation.patch [new file with mode: 0644]
queue-5.4/xfs-rework-insert-range-into-an-atomic-operation.patch [new file with mode: 0644]
queue-5.4/xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch [new file with mode: 0644]
queue-5.4/xfs-throttle-commits-on-delayed-background-cil-push.patch [new file with mode: 0644]
queue-5.4/xfs-trylock-underlying-buffer-on-dquot-flush.patch [new file with mode: 0644]
queue-5.4/xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch [new file with mode: 0644]
queue-5.4/xfs-xfs_buf_corruption_error-should-take-__this_address.patch [new file with mode: 0644]

diff --git a/queue-4.14/series b/queue-4.14/series
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/queue-5.4/series b/queue-5.4/series
new file mode 100644 (file)
index 0000000..b8be03f
--- /dev/null
@@ -0,0 +1,26 @@
+xfs-open-code-insert-range-extent-split-helper.patch
+xfs-rework-insert-range-into-an-atomic-operation.patch
+xfs-rework-collapse-range-into-an-atomic-operation.patch
+xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch
+xfs-xfs_buf_corruption_error-should-take-__this_address.patch
+xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch
+xfs-check-owner-of-dir3-data-blocks.patch
+xfs-check-owner-of-dir3-blocks.patch
+xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch
+xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch
+xfs-remove-the-xfs_dq_logitem_t-typedef.patch
+xfs-remove-the-xfs_qoff_logitem_t-typedef.patch
+xfs-replace-function-declaration-by-actual-definition.patch
+xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch
+xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch
+xfs-preserve-default-grace-interval-during-quotacheck.patch
+xfs-lower-cil-flush-limit-for-large-logs.patch
+xfs-throttle-commits-on-delayed-background-cil-push.patch
+xfs-factor-common-ail-item-deletion-code.patch
+xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch
+xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch
+xfs-trylock-underlying-buffer-on-dquot-flush.patch
+xfs-factor-out-a-new-xfs_log_force_inode-helper.patch
+xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch
+xfs-move-inode-flush-to-the-sync-workqueue.patch
+xfs-fix-use-after-free-on-cil-context-on-shutdown.patch
diff --git a/queue-5.4/xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch b/queue-5.4/xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch
new file mode 100644 (file)
index 0000000..9e75a46
--- /dev/null
@@ -0,0 +1,286 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:21 +0530
+Subject: xfs: add a function to deal with corrupt buffers post-verifiers
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-5-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 8d57c21600a514d7a9237327c2496ae159bab5bb upstream.
+
+Add a helper function to get rid of buffers that we have decided are
+corrupt after the verifiers have run.  This function is intended to
+handle metadata checks that can't happen in the verifiers, such as
+inter-block relationship checking.  Note that we now mark the buffer
+stale so that it will not end up on any LRU and will be purged on
+release.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c     |    2 +-
+ fs/xfs/libxfs/xfs_attr_leaf.c |    6 +++---
+ fs/xfs/libxfs/xfs_btree.c     |    2 +-
+ fs/xfs/libxfs/xfs_da_btree.c  |   10 +++++-----
+ fs/xfs/libxfs/xfs_dir2_leaf.c |    2 +-
+ fs/xfs/libxfs/xfs_dir2_node.c |    6 +++---
+ fs/xfs/xfs_attr_inactive.c    |    6 +++---
+ fs/xfs/xfs_attr_list.c        |    2 +-
+ fs/xfs/xfs_buf.c              |   22 ++++++++++++++++++++++
+ fs/xfs/xfs_buf.h              |    2 ++
+ fs/xfs/xfs_error.c            |    2 ++
+ fs/xfs/xfs_inode.c            |    4 ++--
+ 12 files changed, 46 insertions(+), 20 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -685,7 +685,7 @@ xfs_alloc_update_counters(
+       xfs_trans_agblocks_delta(tp, len);
+       if (unlikely(be32_to_cpu(agf->agf_freeblks) >
+                    be32_to_cpu(agf->agf_length))) {
+-              xfs_buf_corruption_error(agbp);
++              xfs_buf_mark_corrupt(agbp);
+               return -EFSCORRUPTED;
+       }
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -2288,7 +2288,7 @@ xfs_attr3_leaf_lookup_int(
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
+       entries = xfs_attr3_leaf_entryp(leaf);
+       if (ichdr.count >= args->geo->blksize / 8) {
+-              xfs_buf_corruption_error(bp);
++              xfs_buf_mark_corrupt(bp);
+               return -EFSCORRUPTED;
+       }
+@@ -2307,11 +2307,11 @@ xfs_attr3_leaf_lookup_int(
+                       break;
+       }
+       if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) {
+-              xfs_buf_corruption_error(bp);
++              xfs_buf_mark_corrupt(bp);
+               return -EFSCORRUPTED;
+       }
+       if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) {
+-              xfs_buf_corruption_error(bp);
++              xfs_buf_mark_corrupt(bp);
+               return -EFSCORRUPTED;
+       }
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -1820,7 +1820,7 @@ xfs_btree_lookup_get_block(
+ out_bad:
+       *blkp = NULL;
+-      xfs_buf_corruption_error(bp);
++      xfs_buf_mark_corrupt(bp);
+       xfs_trans_brelse(cur->bc_tp, bp);
+       return -EFSCORRUPTED;
+ }
+--- a/fs/xfs/libxfs/xfs_da_btree.c
++++ b/fs/xfs/libxfs/xfs_da_btree.c
+@@ -504,7 +504,7 @@ xfs_da3_split(
+       node = oldblk->bp->b_addr;
+       if (node->hdr.info.forw) {
+               if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
+-                      xfs_buf_corruption_error(oldblk->bp);
++                      xfs_buf_mark_corrupt(oldblk->bp);
+                       error = -EFSCORRUPTED;
+                       goto out;
+               }
+@@ -517,7 +517,7 @@ xfs_da3_split(
+       node = oldblk->bp->b_addr;
+       if (node->hdr.info.back) {
+               if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
+-                      xfs_buf_corruption_error(oldblk->bp);
++                      xfs_buf_mark_corrupt(oldblk->bp);
+                       error = -EFSCORRUPTED;
+                       goto out;
+               }
+@@ -1544,7 +1544,7 @@ xfs_da3_node_lookup_int(
+               }
+               if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
+-                      xfs_buf_corruption_error(blk->bp);
++                      xfs_buf_mark_corrupt(blk->bp);
+                       return -EFSCORRUPTED;
+               }
+@@ -1559,7 +1559,7 @@ xfs_da3_node_lookup_int(
+               /* Tree taller than we can handle; bail out! */
+               if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
+-                      xfs_buf_corruption_error(blk->bp);
++                      xfs_buf_mark_corrupt(blk->bp);
+                       return -EFSCORRUPTED;
+               }
+@@ -1567,7 +1567,7 @@ xfs_da3_node_lookup_int(
+               if (blkno == args->geo->leafblk)
+                       expected_level = nodehdr.level - 1;
+               else if (expected_level != nodehdr.level) {
+-                      xfs_buf_corruption_error(blk->bp);
++                      xfs_buf_mark_corrupt(blk->bp);
+                       return -EFSCORRUPTED;
+               } else
+                       expected_level--;
+--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
++++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
+@@ -1344,7 +1344,7 @@ xfs_dir2_leaf_removename(
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       bestsp = xfs_dir2_leaf_bests_p(ltp);
+       if (be16_to_cpu(bestsp[db]) != oldbest) {
+-              xfs_buf_corruption_error(lbp);
++              xfs_buf_mark_corrupt(lbp);
+               return -EFSCORRUPTED;
+       }
+       /*
+--- a/fs/xfs/libxfs/xfs_dir2_node.c
++++ b/fs/xfs/libxfs/xfs_dir2_node.c
+@@ -375,7 +375,7 @@ xfs_dir2_leaf_to_node(
+       ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+       if (be32_to_cpu(ltp->bestcount) >
+                               (uint)dp->i_d.di_size / args->geo->blksize) {
+-              xfs_buf_corruption_error(lbp);
++              xfs_buf_mark_corrupt(lbp);
+               return -EFSCORRUPTED;
+       }
+@@ -449,7 +449,7 @@ xfs_dir2_leafn_add(
+        * into other peoples memory
+        */
+       if (index < 0) {
+-              xfs_buf_corruption_error(bp);
++              xfs_buf_mark_corrupt(bp);
+               return -EFSCORRUPTED;
+       }
+@@ -745,7 +745,7 @@ xfs_dir2_leafn_lookup_for_entry(
+       xfs_dir3_leaf_check(dp, bp);
+       if (leafhdr.count <= 0) {
+-              xfs_buf_corruption_error(bp);
++              xfs_buf_mark_corrupt(bp);
+               return -EFSCORRUPTED;
+       }
+--- a/fs/xfs/xfs_attr_inactive.c
++++ b/fs/xfs/xfs_attr_inactive.c
+@@ -145,7 +145,7 @@ xfs_attr3_node_inactive(
+        * Since this code is recursive (gasp!) we must protect ourselves.
+        */
+       if (level > XFS_DA_NODE_MAXDEPTH) {
+-              xfs_buf_corruption_error(bp);
++              xfs_buf_mark_corrupt(bp);
+               xfs_trans_brelse(*trans, bp);   /* no locks for later trans */
+               return -EFSCORRUPTED;
+       }
+@@ -196,7 +196,7 @@ xfs_attr3_node_inactive(
+                       error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
+                       break;
+               default:
+-                      xfs_buf_corruption_error(child_bp);
++                      xfs_buf_mark_corrupt(child_bp);
+                       xfs_trans_brelse(*trans, child_bp);
+                       error = -EFSCORRUPTED;
+                       break;
+@@ -281,7 +281,7 @@ xfs_attr3_root_inactive(
+               break;
+       default:
+               error = -EFSCORRUPTED;
+-              xfs_buf_corruption_error(bp);
++              xfs_buf_mark_corrupt(bp);
+               xfs_trans_brelse(*trans, bp);
+               break;
+       }
+--- a/fs/xfs/xfs_attr_list.c
++++ b/fs/xfs/xfs_attr_list.c
+@@ -271,7 +271,7 @@ xfs_attr_node_list_lookup(
+       return 0;
+ out_corruptbuf:
+-      xfs_buf_corruption_error(bp);
++      xfs_buf_mark_corrupt(bp);
+       xfs_trans_brelse(tp, bp);
+       return -EFSCORRUPTED;
+ }
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1547,6 +1547,28 @@ xfs_buf_zero(
+ }
+ /*
++ * Log a message about and stale a buffer that a caller has decided is corrupt.
++ *
++ * This function should be called for the kinds of metadata corruption that
++ * cannot be detect from a verifier, such as incorrect inter-block relationship
++ * data.  Do /not/ call this function from a verifier function.
++ *
++ * The buffer must be XBF_DONE prior to the call.  Afterwards, the buffer will
++ * be marked stale, but b_error will not be set.  The caller is responsible for
++ * releasing the buffer or fixing it.
++ */
++void
++__xfs_buf_mark_corrupt(
++      struct xfs_buf          *bp,
++      xfs_failaddr_t          fa)
++{
++      ASSERT(bp->b_flags & XBF_DONE);
++
++      xfs_buf_corruption_error(bp);
++      xfs_buf_stale(bp);
++}
++
++/*
+  *    Handling of buffer targets (buftargs).
+  */
+--- a/fs/xfs/xfs_buf.h
++++ b/fs/xfs/xfs_buf.h
+@@ -270,6 +270,8 @@ static inline int xfs_buf_submit(struct
+ }
+ void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
++void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa);
++#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address)
+ /* Buffer Utility Routines */
+ extern void *xfs_buf_offset(struct xfs_buf *, size_t);
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -345,6 +345,8 @@ xfs_corruption_error(
+  * Complain about the kinds of metadata corruption that we can't detect from a
+  * verifier, such as incorrect inter-block relationship data.  Does not set
+  * bp->b_error.
++ *
++ * Call xfs_buf_mark_corrupt, not this function.
+  */
+ void
+ xfs_buf_corruption_error(
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2149,7 +2149,7 @@ xfs_iunlink_update_bucket(
+        * head of the list.
+        */
+       if (old_value == new_agino) {
+-              xfs_buf_corruption_error(agibp);
++              xfs_buf_mark_corrupt(agibp);
+               return -EFSCORRUPTED;
+       }
+@@ -2283,7 +2283,7 @@ xfs_iunlink(
+       next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+       if (next_agino == agino ||
+           !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+-              xfs_buf_corruption_error(agibp);
++              xfs_buf_mark_corrupt(agibp);
+               return -EFSCORRUPTED;
+       }
diff --git a/queue-5.4/xfs-check-owner-of-dir3-blocks.patch b/queue-5.4/xfs-check-owner-of-dir3-blocks.patch
new file mode 100644 (file)
index 0000000..73ac452
--- /dev/null
@@ -0,0 +1,77 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:25 +0530
+Subject: xfs: check owner of dir3 blocks
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-9-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 1b2c1a63b678d63e9c98314d44413f5af79c9c80 upstream.
+
+Check the owner field of dir3 block headers.  If it's corrupt, release
+the buffer and return EFSCORRUPTED.  All callers handle this properly.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2_block.c |   33 +++++++++++++++++++++++++++++++--
+ 1 file changed, 31 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_block.c
++++ b/fs/xfs/libxfs/xfs_dir2_block.c
+@@ -114,6 +114,23 @@ const struct xfs_buf_ops xfs_dir3_block_
+       .verify_struct = xfs_dir3_block_verify,
+ };
++static xfs_failaddr_t
++xfs_dir3_block_header_check(
++      struct xfs_inode        *dp,
++      struct xfs_buf          *bp)
++{
++      struct xfs_mount        *mp = dp->i_mount;
++
++      if (xfs_sb_version_hascrc(&mp->m_sb)) {
++              struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
++
++              if (be64_to_cpu(hdr3->owner) != dp->i_ino)
++                      return __this_address;
++      }
++
++      return NULL;
++}
++
+ int
+ xfs_dir3_block_read(
+       struct xfs_trans        *tp,
+@@ -121,12 +138,24 @@ xfs_dir3_block_read(
+       struct xfs_buf          **bpp)
+ {
+       struct xfs_mount        *mp = dp->i_mount;
++      xfs_failaddr_t          fa;
+       int                     err;
+       err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
+                               XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
+-      if (!err && tp && *bpp)
+-              xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
++      if (err || !*bpp)
++              return err;
++
++      /* Check things that we can't do in the verifier. */
++      fa = xfs_dir3_block_header_check(dp, *bpp);
++      if (fa) {
++              __xfs_buf_mark_corrupt(*bpp, fa);
++              xfs_trans_brelse(tp, *bpp);
++              *bpp = NULL;
++              return -EFSCORRUPTED;
++      }
++
++      xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
+       return err;
+ }
diff --git a/queue-5.4/xfs-check-owner-of-dir3-data-blocks.patch b/queue-5.4/xfs-check-owner-of-dir3-data-blocks.patch
new file mode 100644 (file)
index 0000000..8445c7d
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:24 +0530
+Subject: xfs: check owner of dir3 data blocks
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-8-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit a10c21ed5d5241d11cf1d5a4556730840572900b upstream.
+
+[Slightly edit xfs_dir3_data_read() to work with existing mapped_bno argument instead
+of flag values introduced in later kernels]
+
+Check the owner field of dir3 data block headers.  If it's corrupt,
+release the buffer and return EFSCORRUPTED.  All callers handle this
+properly.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2_data.c |   32 ++++++++++++++++++++++++++++++--
+ 1 file changed, 30 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_data.c
++++ b/fs/xfs/libxfs/xfs_dir2_data.c
+@@ -348,6 +348,22 @@ static const struct xfs_buf_ops xfs_dir3
+       .verify_write = xfs_dir3_data_write_verify,
+ };
++static xfs_failaddr_t
++xfs_dir3_data_header_check(
++      struct xfs_inode        *dp,
++      struct xfs_buf          *bp)
++{
++      struct xfs_mount        *mp = dp->i_mount;
++
++      if (xfs_sb_version_hascrc(&mp->m_sb)) {
++              struct xfs_dir3_data_hdr *hdr3 = bp->b_addr;
++
++              if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino)
++                      return __this_address;
++      }
++
++      return NULL;
++}
+ int
+ xfs_dir3_data_read(
+@@ -357,12 +373,24 @@ xfs_dir3_data_read(
+       xfs_daddr_t             mapped_bno,
+       struct xfs_buf          **bpp)
+ {
++      xfs_failaddr_t          fa;
+       int                     err;
+       err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
+                               XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
+-      if (!err && tp && *bpp)
+-              xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
++      if (err || !*bpp)
++              return err;
++
++      /* Check things that we can't do in the verifier. */
++      fa = xfs_dir3_data_header_check(dp, *bpp);
++      if (fa) {
++              __xfs_buf_mark_corrupt(*bpp, fa);
++              xfs_trans_brelse(tp, *bpp);
++              *bpp = NULL;
++              return -EFSCORRUPTED;
++      }
++
++      xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
+       return err;
+ }
diff --git a/queue-5.4/xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch b/queue-5.4/xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch
new file mode 100644 (file)
index 0000000..6064a9b
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:38 +0530
+Subject: xfs: don't write a corrupt unmount record to force summary counter recalc
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-22-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 5cc3c006eb45524860c4d1dd4dd7ad4a506bf3f5 upstream.
+
+[ Modify fs/xfs/xfs_log.c to include the changes at locations suitable for
+  5.4-lts kernel ]
+
+In commit f467cad95f5e3, I added the ability to force a recalculation of
+the filesystem summary counters if they seemed incorrect.  This was done
+(not entirely correctly) by tweaking the log code to write an unmount
+record without the UMOUNT_TRANS flag set.  At next mount, the log
+recovery code will fail to find the unmount record and go into recovery,
+which triggers the recalculation.
+
+What actually gets written to the log is what ought to be an unmount
+record, but without any flags set to indicate what kind of record it
+actually is.  This worked to trigger the recalculation, but we shouldn't
+write bogus log records when we could simply write nothing.
+
+Fixes: f467cad95f5e3 ("xfs: force summary counter recalc at next mount")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log.c |   26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -837,19 +837,6 @@ xfs_log_write_unmount_record(
+       if (error)
+               goto out_err;
+-      /*
+-       * If we think the summary counters are bad, clear the unmount header
+-       * flag in the unmount record so that the summary counters will be
+-       * recalculated during log recovery at next mount.  Refer to
+-       * xlog_check_unmount_rec for more details.
+-       */
+-      if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
+-                      XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
+-              xfs_alert(mp, "%s: will fix summary counters at next mount",
+-                              __func__);
+-              flags &= ~XLOG_UNMOUNT_TRANS;
+-      }
+-
+       /* remove inited flag, and account for space used */
+       tic->t_flags = 0;
+       tic->t_curr_res -= sizeof(magic);
+@@ -932,6 +919,19 @@ xfs_log_unmount_write(xfs_mount_t *mp)
+       } while (iclog != first_iclog);
+ #endif
+       if (! (XLOG_FORCED_SHUTDOWN(log))) {
++              /*
++               * If we think the summary counters are bad, avoid writing the
++               * unmount record to force log recovery at next mount, after
++               * which the summary counters will be recalculated.  Refer to
++               * xlog_check_unmount_rec for more details.
++               */
++              if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS),
++                              mp, XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
++                      xfs_alert(mp,
++                              "%s: will fix summary counters at next mount",
++                              __func__);
++                      return 0;
++              }
+               xfs_log_write_unmount_record(mp);
+       } else {
+               /*
diff --git a/queue-5.4/xfs-factor-common-ail-item-deletion-code.patch b/queue-5.4/xfs-factor-common-ail-item-deletion-code.patch
new file mode 100644 (file)
index 0000000..9ff67aa
--- /dev/null
@@ -0,0 +1,147 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:36 +0530
+Subject: xfs: factor common AIL item deletion code
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-20-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 4165994ac9672d91134675caa6de3645a9ace6c8 upstream.
+
+Factor the common AIL deletion code that does all the wakeups into a
+helper so we only have one copy of this somewhat tricky code to
+interface with all the wakeups necessary when the LSN of the log
+tail changes.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode_item.c |   12 +-----------
+ fs/xfs/xfs_trans_ail.c  |   48 ++++++++++++++++++++++++++----------------------
+ fs/xfs/xfs_trans_priv.h |    4 +++-
+ 3 files changed, 30 insertions(+), 34 deletions(-)
+
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -744,17 +744,7 @@ xfs_iflush_done(
+                               xfs_clear_li_failed(blip);
+                       }
+               }
+-
+-              if (mlip_changed) {
+-                      if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
+-                              xlog_assign_tail_lsn_locked(ailp->ail_mount);
+-                      if (list_empty(&ailp->ail_head))
+-                              wake_up_all(&ailp->ail_empty);
+-              }
+-              spin_unlock(&ailp->ail_lock);
+-
+-              if (mlip_changed)
+-                      xfs_log_space_wake(ailp->ail_mount);
++              xfs_ail_update_finish(ailp, mlip_changed);
+       }
+       /*
+--- a/fs/xfs/xfs_trans_ail.c
++++ b/fs/xfs/xfs_trans_ail.c
+@@ -680,6 +680,27 @@ xfs_ail_push_all_sync(
+       finish_wait(&ailp->ail_empty, &wait);
+ }
++void
++xfs_ail_update_finish(
++      struct xfs_ail          *ailp,
++      bool                    do_tail_update) __releases(ailp->ail_lock)
++{
++      struct xfs_mount        *mp = ailp->ail_mount;
++
++      if (!do_tail_update) {
++              spin_unlock(&ailp->ail_lock);
++              return;
++      }
++
++      if (!XFS_FORCED_SHUTDOWN(mp))
++              xlog_assign_tail_lsn_locked(mp);
++
++      if (list_empty(&ailp->ail_head))
++              wake_up_all(&ailp->ail_empty);
++      spin_unlock(&ailp->ail_lock);
++      xfs_log_space_wake(mp);
++}
++
+ /*
+  * xfs_trans_ail_update - bulk AIL insertion operation.
+  *
+@@ -739,15 +760,7 @@ xfs_trans_ail_update_bulk(
+       if (!list_empty(&tmp))
+               xfs_ail_splice(ailp, cur, &tmp, lsn);
+-      if (mlip_changed) {
+-              if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
+-                      xlog_assign_tail_lsn_locked(ailp->ail_mount);
+-              spin_unlock(&ailp->ail_lock);
+-
+-              xfs_log_space_wake(ailp->ail_mount);
+-      } else {
+-              spin_unlock(&ailp->ail_lock);
+-      }
++      xfs_ail_update_finish(ailp, mlip_changed);
+ }
+ bool
+@@ -791,10 +804,10 @@ void
+ xfs_trans_ail_delete(
+       struct xfs_ail          *ailp,
+       struct xfs_log_item     *lip,
+-      int                     shutdown_type) __releases(ailp->ail_lock)
++      int                     shutdown_type)
+ {
+       struct xfs_mount        *mp = ailp->ail_mount;
+-      bool                    mlip_changed;
++      bool                    need_update;
+       if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
+               spin_unlock(&ailp->ail_lock);
+@@ -807,17 +820,8 @@ xfs_trans_ail_delete(
+               return;
+       }
+-      mlip_changed = xfs_ail_delete_one(ailp, lip);
+-      if (mlip_changed) {
+-              if (!XFS_FORCED_SHUTDOWN(mp))
+-                      xlog_assign_tail_lsn_locked(mp);
+-              if (list_empty(&ailp->ail_head))
+-                      wake_up_all(&ailp->ail_empty);
+-      }
+-
+-      spin_unlock(&ailp->ail_lock);
+-      if (mlip_changed)
+-              xfs_log_space_wake(ailp->ail_mount);
++      need_update = xfs_ail_delete_one(ailp, lip);
++      xfs_ail_update_finish(ailp, need_update);
+ }
+ int
+--- a/fs/xfs/xfs_trans_priv.h
++++ b/fs/xfs/xfs_trans_priv.h
+@@ -92,8 +92,10 @@ xfs_trans_ail_update(
+ }
+ bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
++void xfs_ail_update_finish(struct xfs_ail *ailp, bool do_tail_update)
++                      __releases(ailp->ail_lock);
+ void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
+-              int shutdown_type) __releases(ailp->ail_lock);
++              int shutdown_type);
+ static inline void
+ xfs_trans_ail_remove(
diff --git a/queue-5.4/xfs-factor-out-a-new-xfs_log_force_inode-helper.patch b/queue-5.4/xfs-factor-out-a-new-xfs_log_force_inode-helper.patch
new file mode 100644 (file)
index 0000000..1fdab10
--- /dev/null
@@ -0,0 +1,117 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:40 +0530
+Subject: xfs: factor out a new xfs_log_force_inode helper
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-24-chandan.babu@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 54fbdd1035e3a4e4f4082c335b095426cdefd092 upstream.
+
+Create a new helper to force the log up to the last LSN touching an
+inode.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_export.c |   14 +-------------
+ fs/xfs/xfs_file.c   |   12 +-----------
+ fs/xfs/xfs_inode.c  |   19 +++++++++++++++++++
+ fs/xfs/xfs_inode.h  |    1 +
+ 4 files changed, 22 insertions(+), 24 deletions(-)
+
+--- a/fs/xfs/xfs_export.c
++++ b/fs/xfs/xfs_export.c
+@@ -15,7 +15,6 @@
+ #include "xfs_trans.h"
+ #include "xfs_inode_item.h"
+ #include "xfs_icache.h"
+-#include "xfs_log.h"
+ #include "xfs_pnfs.h"
+ /*
+@@ -221,18 +220,7 @@ STATIC int
+ xfs_fs_nfs_commit_metadata(
+       struct inode            *inode)
+ {
+-      struct xfs_inode        *ip = XFS_I(inode);
+-      struct xfs_mount        *mp = ip->i_mount;
+-      xfs_lsn_t               lsn = 0;
+-
+-      xfs_ilock(ip, XFS_ILOCK_SHARED);
+-      if (xfs_ipincount(ip))
+-              lsn = ip->i_itemp->ili_last_lsn;
+-      xfs_iunlock(ip, XFS_ILOCK_SHARED);
+-
+-      if (!lsn)
+-              return 0;
+-      return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
++      return xfs_log_force_inode(XFS_I(inode));
+ }
+ const struct export_operations xfs_export_operations = {
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -80,19 +80,9 @@ xfs_dir_fsync(
+       int                     datasync)
+ {
+       struct xfs_inode        *ip = XFS_I(file->f_mapping->host);
+-      struct xfs_mount        *mp = ip->i_mount;
+-      xfs_lsn_t               lsn = 0;
+       trace_xfs_dir_fsync(ip);
+-
+-      xfs_ilock(ip, XFS_ILOCK_SHARED);
+-      if (xfs_ipincount(ip))
+-              lsn = ip->i_itemp->ili_last_lsn;
+-      xfs_iunlock(ip, XFS_ILOCK_SHARED);
+-
+-      if (!lsn)
+-              return 0;
+-      return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
++      return xfs_log_force_inode(ip);
+ }
+ STATIC int
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -3973,3 +3973,22 @@ xfs_irele(
+       trace_xfs_irele(ip, _RET_IP_);
+       iput(VFS_I(ip));
+ }
++
++/*
++ * Ensure all commited transactions touching the inode are written to the log.
++ */
++int
++xfs_log_force_inode(
++      struct xfs_inode        *ip)
++{
++      xfs_lsn_t               lsn = 0;
++
++      xfs_ilock(ip, XFS_ILOCK_SHARED);
++      if (xfs_ipincount(ip))
++              lsn = ip->i_itemp->ili_last_lsn;
++      xfs_iunlock(ip, XFS_ILOCK_SHARED);
++
++      if (!lsn)
++              return 0;
++      return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
++}
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -441,6 +441,7 @@ int                xfs_itruncate_extents_flags(struct
+                               struct xfs_inode *, int, xfs_fsize_t, int);
+ void          xfs_iext_realloc(xfs_inode_t *, int, int);
++int           xfs_log_force_inode(struct xfs_inode *ip);
+ void          xfs_iunpin_wait(xfs_inode_t *);
+ #define xfs_ipincount(ip)     ((unsigned int) atomic_read(&ip->i_pincount))
diff --git a/queue-5.4/xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch b/queue-5.4/xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch
new file mode 100644 (file)
index 0000000..c6aa200
--- /dev/null
@@ -0,0 +1,90 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:31 +0530
+Subject: xfs: factor out quotaoff intent AIL removal and memory free
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-15-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 854f82b1f6039a418b7d1407513f8640e05fd73f upstream.
+
+AIL removal of the quotaoff start intent and free of both intents is
+hardcoded to the ->iop_committed() handler of the end intent. Factor
+out the start intent handling code so it can be used in a future
+patch to properly handle quotaoff errors. Use xfs_trans_ail_remove()
+instead of the _delete() variant to acquire the AIL lock and also
+handle cases where an intent might not reside in the AIL at the
+time of a failure.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot_item.c |   29 ++++++++++++++++++++---------
+ fs/xfs/xfs_dquot_item.h |    1 +
+ 2 files changed, 21 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/xfs_dquot_item.c
++++ b/fs/xfs/xfs_dquot_item.c
+@@ -307,18 +307,10 @@ xfs_qm_qoffend_logitem_committed(
+ {
+       struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
+       struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
+-      struct xfs_ail          *ailp = qfs->qql_item.li_ailp;
+-      /*
+-       * Delete the qoff-start logitem from the AIL.
+-       * xfs_trans_ail_delete() drops the AIL lock.
+-       */
+-      spin_lock(&ailp->ail_lock);
+-      xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
++      xfs_qm_qoff_logitem_relse(qfs);
+-      kmem_free(qfs->qql_item.li_lv_shadow);
+       kmem_free(lip->li_lv_shadow);
+-      kmem_free(qfs);
+       kmem_free(qfe);
+       return (xfs_lsn_t)-1;
+ }
+@@ -337,6 +329,25 @@ static const struct xfs_item_ops xfs_qm_
+ };
+ /*
++ * Delete the quotaoff intent from the AIL and free it. On success,
++ * this should only be called for the start item. It can be used for
++ * either on shutdown or abort.
++ */
++void
++xfs_qm_qoff_logitem_relse(
++      struct xfs_qoff_logitem *qoff)
++{
++      struct xfs_log_item     *lip = &qoff->qql_item;
++
++      ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags) ||
++             test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
++             XFS_FORCED_SHUTDOWN(lip->li_mountp));
++      xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
++      kmem_free(lip->li_lv_shadow);
++      kmem_free(qoff);
++}
++
++/*
+  * Allocate and initialize an quotaoff item of the correct quota type(s).
+  */
+ struct xfs_qoff_logitem *
+--- a/fs/xfs/xfs_dquot_item.h
++++ b/fs/xfs/xfs_dquot_item.h
+@@ -28,6 +28,7 @@ void xfs_qm_dquot_logitem_init(struct xf
+ struct xfs_qoff_logitem       *xfs_qm_qoff_logitem_init(struct xfs_mount *mp,
+               struct xfs_qoff_logitem *start,
+               uint flags);
++void xfs_qm_qoff_logitem_relse(struct xfs_qoff_logitem *);
+ struct xfs_qoff_logitem       *xfs_trans_get_qoff_item(struct xfs_trans *tp,
+               struct xfs_qoff_logitem *startqoff,
+               uint flags);
diff --git a/queue-5.4/xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch b/queue-5.4/xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch
new file mode 100644 (file)
index 0000000..009582b
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:23 +0530
+Subject: xfs: fix buffer corruption reporting when xfs_dir3_free_header_check fails
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-7-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit ce99494c9699df58b31d0a839e957f86cd58c755 upstream.
+
+xfs_verifier_error is supposed to be called on a corrupt metadata buffer
+from within a buffer verifier function, whereas xfs_buf_mark_corrupt
+is the function to be called when a piece of code has read a buffer and
+catches something that a read verifier cannot.  The first function sets
+b_error anticipating that the low level buffer handling code will see
+the nonzero b_error and clear XBF_DONE on the buffer, whereas the second
+function does not.
+
+Since xfs_dir3_free_header_check examines fields in the dir free block
+header that require more context than can be provided to read verifiers,
+we must call xfs_buf_mark_corrupt when it finds a problem.
+
+Switching the calls has a secondary effect that we no longer corrupt the
+buffer state by setting b_error and leaving XBF_DONE set.  When /that/
+happens, we'll trip over various state assertions (most commonly the
+b_error check in xfs_buf_reverify) on a subsequent attempt to read the
+buffer.
+
+Fixes: bc1a09b8e334bf5f ("xfs: refactor verifier callers to print address of failing check")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2_node.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_node.c
++++ b/fs/xfs/libxfs/xfs_dir2_node.c
+@@ -208,7 +208,7 @@ __xfs_dir3_free_read(
+       /* Check things that we can't do in the verifier. */
+       fa = xfs_dir3_free_header_check(dp, fbno, *bpp);
+       if (fa) {
+-              xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
++              __xfs_buf_mark_corrupt(*bpp, fa);
+               xfs_trans_brelse(tp, *bpp);
+               *bpp = NULL;
+               return -EFSCORRUPTED;
diff --git a/queue-5.4/xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch b/queue-5.4/xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch
new file mode 100644 (file)
index 0000000..ff01634
--- /dev/null
@@ -0,0 +1,143 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:32 +0530
+Subject: xfs: fix unmount hang and memory leak on shutdown during quotaoff
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-16-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 8a62714313391b9b2297d67c341b35edbf46c279 upstream.
+
+AIL removal of the quotaoff start intent and free of both quotaoff
+intents is currently limited to the ->iop_committed() handler of the
+end intent. This executes when the end intent is committed to the
+on-disk log and marks the completion of the operation. The problem
+with this is it assumes the success of the operation. If a shutdown
+or other error occurs during the quotaoff, it's possible for the
+quotaoff task to exit without removing the start intent from the
+AIL. This results in an unmount hang as the AIL cannot be emptied.
+Further, no other codepath frees the intents and so this is also a
+memory leak vector.
+
+First, update the high level quotaoff error path to directly remove
+and free the quotaoff start intent if it still exists in the AIL at
+the time of the error. Next, update both of the start and end
+quotaoff intents with an ->iop_release() callback to properly handle
+transaction abort.
+
+This means that If the quotaoff start transaction aborts, it frees
+the start intent in the transaction commit path. If the filesystem
+shuts down before the end transaction allocates, the quotaoff
+sequence removes and frees the start intent. If the end transaction
+aborts, it removes the start intent and frees both. This ensures
+that a shutdown does not result in a hung unmount and that memory is
+not leaked regardless of when a quotaoff error occurs.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot_item.c  |   15 +++++++++++++++
+ fs/xfs/xfs_qm_syscalls.c |   13 +++++++------
+ 2 files changed, 22 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_dquot_item.c
++++ b/fs/xfs/xfs_dquot_item.c
+@@ -315,17 +315,32 @@ xfs_qm_qoffend_logitem_committed(
+       return (xfs_lsn_t)-1;
+ }
++STATIC void
++xfs_qm_qoff_logitem_release(
++      struct xfs_log_item     *lip)
++{
++      struct xfs_qoff_logitem *qoff = QOFF_ITEM(lip);
++
++      if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
++              if (qoff->qql_start_lip)
++                      xfs_qm_qoff_logitem_relse(qoff->qql_start_lip);
++              xfs_qm_qoff_logitem_relse(qoff);
++      }
++}
++
+ static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+       .iop_size       = xfs_qm_qoff_logitem_size,
+       .iop_format     = xfs_qm_qoff_logitem_format,
+       .iop_committed  = xfs_qm_qoffend_logitem_committed,
+       .iop_push       = xfs_qm_qoff_logitem_push,
++      .iop_release    = xfs_qm_qoff_logitem_release,
+ };
+ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+       .iop_size       = xfs_qm_qoff_logitem_size,
+       .iop_format     = xfs_qm_qoff_logitem_format,
+       .iop_push       = xfs_qm_qoff_logitem_push,
++      .iop_release    = xfs_qm_qoff_logitem_release,
+ };
+ /*
+--- a/fs/xfs/xfs_qm_syscalls.c
++++ b/fs/xfs/xfs_qm_syscalls.c
+@@ -29,8 +29,6 @@ xfs_qm_log_quotaoff(
+       int                     error;
+       struct xfs_qoff_logitem *qoffi;
+-      *qoffstartp = NULL;
+-
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
+       if (error)
+               goto out;
+@@ -62,7 +60,7 @@ out:
+ STATIC int
+ xfs_qm_log_quotaoff_end(
+       struct xfs_mount        *mp,
+-      struct xfs_qoff_logitem *startqoff,
++      struct xfs_qoff_logitem **startqoff,
+       uint                    flags)
+ {
+       struct xfs_trans        *tp;
+@@ -73,9 +71,10 @@ xfs_qm_log_quotaoff_end(
+       if (error)
+               return error;
+-      qoffi = xfs_trans_get_qoff_item(tp, startqoff,
++      qoffi = xfs_trans_get_qoff_item(tp, *startqoff,
+                                       flags & XFS_ALL_QUOTA_ACCT);
+       xfs_trans_log_quotaoff_item(tp, qoffi);
++      *startqoff = NULL;
+       /*
+        * We have to make sure that the transaction is secure on disk before we
+@@ -103,7 +102,7 @@ xfs_qm_scall_quotaoff(
+       uint                    dqtype;
+       int                     error;
+       uint                    inactivate_flags;
+-      struct xfs_qoff_logitem *qoffstart;
++      struct xfs_qoff_logitem *qoffstart = NULL;
+       /*
+        * No file system can have quotas enabled on disk but not in core.
+@@ -228,7 +227,7 @@ xfs_qm_scall_quotaoff(
+        * So, we have QUOTAOFF start and end logitems; the start
+        * logitem won't get overwritten until the end logitem appears...
+        */
+-      error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
++      error = xfs_qm_log_quotaoff_end(mp, &qoffstart, flags);
+       if (error) {
+               /* We're screwed now. Shutdown is the only option. */
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+@@ -261,6 +260,8 @@ xfs_qm_scall_quotaoff(
+       }
+ out_unlock:
++      if (error && qoffstart)
++              xfs_qm_qoff_logitem_relse(qoffstart);
+       mutex_unlock(&q->qi_quotaofflock);
+       return error;
+ }
diff --git a/queue-5.4/xfs-fix-use-after-free-on-cil-context-on-shutdown.patch b/queue-5.4/xfs-fix-use-after-free-on-cil-context-on-shutdown.patch
new file mode 100644 (file)
index 0000000..85aecc7
--- /dev/null
@@ -0,0 +1,122 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:43 +0530
+Subject: xfs: fix use-after-free on CIL context on shutdown
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-27-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit c7f87f3984cfa1e6d32806a715f35c5947ad9c09 upstream.
+
+xlog_wait() on the CIL context can reference a freed context if the
+waiter doesn't get scheduled before the CIL context is freed. This
+can happen when a task is on the hard throttle and the CIL push
+aborts due to a shutdown. This was detected by generic/019:
+
+thread 1                       thread 2
+
+__xfs_trans_commit
+ xfs_log_commit_cil
+  <CIL size over hard throttle limit>
+  xlog_wait
+   schedule
+                               xlog_cil_push_work
+                               wake_up_all
+                               <shutdown aborts commit>
+                               xlog_cil_committed
+                               kmem_free
+
+   remove_wait_queue
+    spin_lock_irqsave --> UAF
+
+Fix it by moving the wait queue to the CIL rather than keeping it in
+in the CIL context that gets freed on push completion. Because the
+wait queue is now independent of the CIL context and we might have
+multiple contexts in flight at once, only wake the waiters on the
+push throttle when the context we are pushing is over the hard
+throttle size threshold.
+
+Fixes: 0e7ab7efe7745 ("xfs: Throttle commits on delayed background CIL push")
+Reported-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_cil.c  |   10 +++++-----
+ fs/xfs/xfs_log_priv.h |    2 +-
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -673,7 +673,8 @@ xlog_cil_push(
+       /*
+        * Wake up any background push waiters now this context is being pushed.
+        */
+-      wake_up_all(&ctx->push_wait);
++      if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log))
++              wake_up_all(&cil->xc_push_wait);
+       /*
+        * Check if we've anything to push. If there is nothing, then we don't
+@@ -745,13 +746,12 @@ xlog_cil_push(
+       /*
+        * initialise the new context and attach it to the CIL. Then attach
+-       * the current context to the CIL committing lsit so it can be found
++       * the current context to the CIL committing list so it can be found
+        * during log forces to extract the commit lsn of the sequence that
+        * needs to be forced.
+        */
+       INIT_LIST_HEAD(&new_ctx->committing);
+       INIT_LIST_HEAD(&new_ctx->busy_extents);
+-      init_waitqueue_head(&new_ctx->push_wait);
+       new_ctx->sequence = ctx->sequence + 1;
+       new_ctx->cil = cil;
+       cil->xc_ctx = new_ctx;
+@@ -946,7 +946,7 @@ xlog_cil_push_background(
+       if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
+               trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
+               ASSERT(cil->xc_ctx->space_used < log->l_logsize);
+-              xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
++              xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
+               return;
+       }
+@@ -1222,12 +1222,12 @@ xlog_cil_init(
+       INIT_LIST_HEAD(&cil->xc_committing);
+       spin_lock_init(&cil->xc_cil_lock);
+       spin_lock_init(&cil->xc_push_lock);
++      init_waitqueue_head(&cil->xc_push_wait);
+       init_rwsem(&cil->xc_ctx_lock);
+       init_waitqueue_head(&cil->xc_commit_wait);
+       INIT_LIST_HEAD(&ctx->committing);
+       INIT_LIST_HEAD(&ctx->busy_extents);
+-      init_waitqueue_head(&ctx->push_wait);
+       ctx->sequence = 1;
+       ctx->cil = cil;
+       cil->xc_ctx = ctx;
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -247,7 +247,6 @@ struct xfs_cil_ctx {
+       struct xfs_log_vec      *lv_chain;      /* logvecs being pushed */
+       struct list_head        iclog_entry;
+       struct list_head        committing;     /* ctx committing list */
+-      wait_queue_head_t       push_wait;      /* background push throttle */
+       struct work_struct      discard_endio_work;
+ };
+@@ -281,6 +280,7 @@ struct xfs_cil {
+       wait_queue_head_t       xc_commit_wait;
+       xfs_lsn_t               xc_current_sequence;
+       struct work_struct      xc_push_work;
++      wait_queue_head_t       xc_push_wait;   /* background push throttle */
+ } ____cacheline_aligned_in_smp;
+ /*
diff --git a/queue-5.4/xfs-lower-cil-flush-limit-for-large-logs.patch b/queue-5.4/xfs-lower-cil-flush-limit-for-large-logs.patch
new file mode 100644 (file)
index 0000000..aff6233
--- /dev/null
@@ -0,0 +1,100 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:34 +0530
+Subject: xfs: Lower CIL flush limit for large logs
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-18-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 108a42358a05312b2128533c6462a3fdeb410bdf upstream.
+
+The current CIL size aggregation limit is 1/8th the log size. This
+means for large logs we might be aggregating at least 250MB of dirty objects
+in memory before the CIL is flushed to the journal. With CIL shadow
+buffers sitting around, this means the CIL is often consuming >500MB
+of temporary memory that is all allocated under GFP_NOFS conditions.
+
+Flushing the CIL can take some time to do if there is other IO
+ongoing, and can introduce substantial log force latency by itself.
+It also pins the memory until the objects are in the AIL and can be
+written back and reclaimed by shrinkers. Hence this threshold also
+tends to determine the minimum amount of memory XFS can operate in
+under heavy modification without triggering the OOM killer.
+
+Modify the CIL space limit to prevent such huge amounts of pinned
+metadata from aggregating. We can have 2MB of log IO in flight at
+once, so limit aggregation to 16x this size. This threshold was
+chosen as it little impact on performance (on 16-way fsmark) or log
+traffic but pins a lot less memory on large logs especially under
+heavy memory pressure.  An aggregation limit of 8x had 5-10%
+performance degradation and a 50% increase in log throughput for
+the same workload, so clearly that was too small for highly
+concurrent workloads on large logs.
+
+This was found via trace analysis of AIL behaviour. e.g. insertion
+from a single CIL flush:
+
+xfs_ail_insert: old lsn 0/0 new lsn 1/3033090 type XFS_LI_INODE flags IN_AIL
+
+$ grep xfs_ail_insert /mnt/scratch/s.t |grep "new lsn 1/3033090" |wc -l
+1721823
+$
+
+So there were 1.7 million objects inserted into the AIL from this
+CIL checkpoint, the first at 2323.392108, the last at 2325.667566 which
+was the end of the trace (i.e. it hadn't finished). Clearly a major
+problem.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_priv.h |   29 +++++++++++++++++++++++------
+ 1 file changed, 23 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -323,13 +323,30 @@ struct xfs_cil {
+  * tries to keep 25% of the log free, so we need to keep below that limit or we
+  * risk running out of free log space to start any new transactions.
+  *
+- * In order to keep background CIL push efficient, we will set a lower
+- * threshold at which background pushing is attempted without blocking current
+- * transaction commits.  A separate, higher bound defines when CIL pushes are
+- * enforced to ensure we stay within our maximum checkpoint size bounds.
+- * threshold, yet give us plenty of space for aggregation on large logs.
++ * In order to keep background CIL push efficient, we only need to ensure the
++ * CIL is large enough to maintain sufficient in-memory relogging to avoid
++ * repeated physical writes of frequently modified metadata. If we allow the CIL
++ * to grow to a substantial fraction of the log, then we may be pinning hundreds
++ * of megabytes of metadata in memory until the CIL flushes. This can cause
++ * issues when we are running low on memory - pinned memory cannot be reclaimed,
++ * and the CIL consumes a lot of memory. Hence we need to set an upper physical
++ * size limit for the CIL that limits the maximum amount of memory pinned by the
++ * CIL but does not limit performance by reducing relogging efficiency
++ * significantly.
++ *
++ * As such, the CIL push threshold ends up being the smaller of two thresholds:
++ * - a threshold large enough that it allows CIL to be pushed and progress to be
++ *   made without excessive blocking of incoming transaction commits. This is
++ *   defined to be 12.5% of the log space - half the 25% push threshold of the
++ *   AIL.
++ * - small enough that it doesn't pin excessive amounts of memory but maintains
++ *   close to peak relogging efficiency. This is defined to be 16x the iclog
++ *   buffer window (32MB) as measurements have shown this to be roughly the
++ *   point of diminishing performance increases under highly concurrent
++ *   modification workloads.
+  */
+-#define XLOG_CIL_SPACE_LIMIT(log)     (log->l_logsize >> 3)
++#define XLOG_CIL_SPACE_LIMIT(log)     \
++      min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4)
+ /*
+  * ticket grant locks, queues and accounting have their own cachlines
diff --git a/queue-5.4/xfs-move-inode-flush-to-the-sync-workqueue.patch b/queue-5.4/xfs-move-inode-flush-to-the-sync-workqueue.patch
new file mode 100644 (file)
index 0000000..1f2d708
--- /dev/null
@@ -0,0 +1,104 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:42 +0530
+Subject: xfs: move inode flush to the sync workqueue
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-26-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit f0f7a674d4df1510d8ca050a669e1420cf7d7fab upstream.
+
+[ Modify fs/xfs/xfs_super.c to include the changes at locations suitable for
+ 5.4-lts kernel ]
+
+Move the inode dirty data flushing to a workqueue so that multiple
+threads can take advantage of a single thread's flushing work.  The
+ratelimiting technique used in bdd4ee4 was not successful, because
+threads that skipped the inode flush scan due to ratelimiting would
+ENOSPC early, which caused occasional (but noticeable) changes in
+behavior and sporadic fstest regressions.
+
+Therefore, make all the writer threads wait on a single inode flush,
+which eliminates both the stampeding hordes of flushers and the small
+window in which a write could fail with ENOSPC because it lost the
+ratelimit race after even another thread freed space.
+
+Fixes: c6425702f21e ("xfs: ratelimit inode flush on buffered write ENOSPC")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_mount.h |    5 +++++
+ fs/xfs/xfs_super.c |   28 +++++++++++++++++++++++-----
+ 2 files changed, 28 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/xfs_mount.h
++++ b/fs/xfs/xfs_mount.h
+@@ -179,6 +179,11 @@ typedef struct xfs_mount {
+       struct xfs_error_cfg    m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
+       struct xstats           m_stats;        /* per-fs stats */
++      /*
++       * Workqueue item so that we can coalesce multiple inode flush attempts
++       * into a single flush.
++       */
++      struct work_struct      m_flush_inodes_work;
+       struct workqueue_struct *m_buf_workqueue;
+       struct workqueue_struct *m_unwritten_workqueue;
+       struct workqueue_struct *m_cil_workqueue;
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -840,6 +840,20 @@ xfs_destroy_mount_workqueues(
+       destroy_workqueue(mp->m_buf_workqueue);
+ }
++static void
++xfs_flush_inodes_worker(
++      struct work_struct      *work)
++{
++      struct xfs_mount        *mp = container_of(work, struct xfs_mount,
++                                                 m_flush_inodes_work);
++      struct super_block      *sb = mp->m_super;
++
++      if (down_read_trylock(&sb->s_umount)) {
++              sync_inodes_sb(sb);
++              up_read(&sb->s_umount);
++      }
++}
++
+ /*
+  * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
+  * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
+@@ -850,12 +864,15 @@ void
+ xfs_flush_inodes(
+       struct xfs_mount        *mp)
+ {
+-      struct super_block      *sb = mp->m_super;
++      /*
++       * If flush_work() returns true then that means we waited for a flush
++       * which was already in progress.  Don't bother running another scan.
++       */
++      if (flush_work(&mp->m_flush_inodes_work))
++              return;
+-      if (down_read_trylock(&sb->s_umount)) {
+-              sync_inodes_sb(sb);
+-              up_read(&sb->s_umount);
+-      }
++      queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
++      flush_work(&mp->m_flush_inodes_work);
+ }
+ /* Catch misguided souls that try to use this interface on XFS */
+@@ -1532,6 +1549,7 @@ xfs_mount_alloc(
+       spin_lock_init(&mp->m_perag_lock);
+       mutex_init(&mp->m_growlock);
+       atomic_set(&mp->m_active_trans, 0);
++      INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
+       INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+       INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
+       INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
diff --git a/queue-5.4/xfs-open-code-insert-range-extent-split-helper.patch b/queue-5.4/xfs-open-code-insert-range-extent-split-helper.patch
new file mode 100644 (file)
index 0000000..b3de731
--- /dev/null
@@ -0,0 +1,116 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:18 +0530
+Subject: xfs: open code insert range extent split helper
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-2-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit b73df17e4c5ba977205253fb7ef54267717a3cba upstream.
+
+The insert range operation currently splits the extent at the target
+offset in a separate transaction and lock cycle from the one that
+shifts extents. In preparation for reworking insert range into an
+atomic operation, lift the code into the caller so it can be easily
+condensed to a single rolling transaction and lock cycle and
+eliminate the helper. No functional changes.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c |   32 ++------------------------------
+ fs/xfs/libxfs/xfs_bmap.h |    3 ++-
+ fs/xfs/xfs_bmap_util.c   |   14 +++++++++++++-
+ 3 files changed, 17 insertions(+), 32 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -5925,8 +5925,8 @@ del_cursor:
+  * @split_fsb is a block where the extents is split.  If split_fsb lies in a
+  * hole or the first block of extents, just return 0.
+  */
+-STATIC int
+-xfs_bmap_split_extent_at(
++int
++xfs_bmap_split_extent(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           split_fsb)
+@@ -6037,34 +6037,6 @@ del_cursor:
+       return error;
+ }
+-int
+-xfs_bmap_split_extent(
+-      struct xfs_inode        *ip,
+-      xfs_fileoff_t           split_fsb)
+-{
+-      struct xfs_mount        *mp = ip->i_mount;
+-      struct xfs_trans        *tp;
+-      int                     error;
+-
+-      error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+-                      XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+-      if (error)
+-              return error;
+-
+-      xfs_ilock(ip, XFS_ILOCK_EXCL);
+-      xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+-
+-      error = xfs_bmap_split_extent_at(tp, ip, split_fsb);
+-      if (error)
+-              goto out;
+-
+-      return xfs_trans_commit(tp);
+-
+-out:
+-      xfs_trans_cancel(tp);
+-      return error;
+-}
+-
+ /* Deferred mapping is only for real extents in the data fork. */
+ static bool
+ xfs_bmap_is_update_needed(
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -222,7 +222,8 @@ int        xfs_bmap_can_insert_extents(struct x
+ int   xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+               xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
+               bool *done, xfs_fileoff_t stop_fsb);
+-int   xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
++int   xfs_bmap_split_extent(struct xfs_trans *tp, struct xfs_inode *ip,
++              xfs_fileoff_t split_offset);
+ int   xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
+               xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
+               struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1326,7 +1326,19 @@ xfs_insert_file_space(
+        * is not the starting block of extent, we need to split the extent at
+        * stop_fsb.
+        */
+-      error = xfs_bmap_split_extent(ip, stop_fsb);
++      error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
++                      XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
++      if (error)
++              return error;
++
++      xfs_ilock(ip, XFS_ILOCK_EXCL);
++      xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
++
++      error = xfs_bmap_split_extent(tp, ip, stop_fsb);
++      if (error)
++              goto out_trans_cancel;
++
++      error = xfs_trans_commit(tp);
+       if (error)
+               return error;
diff --git a/queue-5.4/xfs-preserve-default-grace-interval-during-quotacheck.patch b/queue-5.4/xfs-preserve-default-grace-interval-during-quotacheck.patch
new file mode 100644 (file)
index 0000000..8798d5a
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:33 +0530
+Subject: xfs: preserve default grace interval during quotacheck
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-17-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 5885539f0af371024d07afd14974bfdc3fff84c5 upstream.
+
+When quotacheck runs, it zeroes all the timer fields in every dquot.
+Unfortunately, it also does this to the root dquot, which erases any
+preconfigured grace intervals and warning limits that the administrator
+may have set.  Worse yet, the incore copies of those variables remain
+set.  This cache coherence problem manifests itself as the grace
+interval mysteriously being reset back to the defaults at the /next/
+mount.
+
+Fix it by not resetting the root disk dquot's timer and warning fields.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_qm.c |   20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -875,12 +875,20 @@ xfs_qm_reset_dqcounts(
+               ddq->d_bcount = 0;
+               ddq->d_icount = 0;
+               ddq->d_rtbcount = 0;
+-              ddq->d_btimer = 0;
+-              ddq->d_itimer = 0;
+-              ddq->d_rtbtimer = 0;
+-              ddq->d_bwarns = 0;
+-              ddq->d_iwarns = 0;
+-              ddq->d_rtbwarns = 0;
++
++              /*
++               * dquot id 0 stores the default grace period and the maximum
++               * warning limit that were set by the administrator, so we
++               * should not reset them.
++               */
++              if (ddq->d_id != 0) {
++                      ddq->d_btimer = 0;
++                      ddq->d_itimer = 0;
++                      ddq->d_rtbtimer = 0;
++                      ddq->d_bwarns = 0;
++                      ddq->d_iwarns = 0;
++                      ddq->d_rtbwarns = 0;
++              }
+               if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                       xfs_update_cksum((char *)&dqb[j],
diff --git a/queue-5.4/xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch b/queue-5.4/xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch
new file mode 100644 (file)
index 0000000..f602de3
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:41 +0530
+Subject: xfs: reflink should force the log out if mounted with wsync
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-25-chandan.babu@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 5833112df7e9a306af9af09c60127b92ed723962 upstream.
+
+Reflink should force the log out to disk if the filesystem was mounted
+with wsync, the same as most other operations in xfs.
+
+[Note: XFS_MOUNT_WSYNC is set when the admin mounts the filesystem
+with either the 'wsync' or 'sync' mount options, which effectively means
+that we're classifying reflink/dedupe as IO operations and making them
+synchronous when required.]
+
+Fixes: 3fc9f5e409319 ("xfs: remove xfs_reflink_remap_range")
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+[darrick: add more to the changelog]
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_file.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1044,7 +1044,11 @@ xfs_file_remap_range(
+       ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+                       remap_flags);
++      if (ret)
++              goto out_unlock;
++      if (mp->m_flags & XFS_MOUNT_WSYNC)
++              xfs_log_force_inode(dest);
+ out_unlock:
+       xfs_reflink_remap_unlock(file_in, file_out);
+       if (ret)
diff --git a/queue-5.4/xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch b/queue-5.4/xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch
new file mode 100644 (file)
index 0000000..153d541
--- /dev/null
@@ -0,0 +1,489 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:27 +0530
+Subject: xfs: remove the xfs_disk_dquot_t and xfs_dquot_t
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-11-chandan.babu@oracle.com>
+
+From: Pavel Reichl <preichl@redhat.com>
+
+commit aefe69a45d84901c702f87672ec1e93de1d03f73 upstream.
+
+Signed-off-by: Pavel Reichl <preichl@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+[darrick: fix some of the comments]
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dquot_buf.c  |    8 +--
+ fs/xfs/libxfs/xfs_format.h     |   10 ++--
+ fs/xfs/libxfs/xfs_trans_resv.c |    2 
+ fs/xfs/xfs_dquot.c             |   18 +++----
+ fs/xfs/xfs_dquot.h             |   98 ++++++++++++++++++++---------------------
+ fs/xfs/xfs_log_recover.c       |    5 +-
+ fs/xfs/xfs_qm.c                |   30 ++++++------
+ fs/xfs/xfs_qm_bhv.c            |    6 +-
+ fs/xfs/xfs_trans_dquot.c       |   44 +++++++++---------
+ 9 files changed, 112 insertions(+), 109 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dquot_buf.c
++++ b/fs/xfs/libxfs/xfs_dquot_buf.c
+@@ -35,10 +35,10 @@ xfs_calc_dquots_per_chunk(
+ xfs_failaddr_t
+ xfs_dquot_verify(
+-      struct xfs_mount *mp,
+-      xfs_disk_dquot_t *ddq,
+-      xfs_dqid_t       id,
+-      uint             type)    /* used only during quotacheck */
++      struct xfs_mount        *mp,
++      struct xfs_disk_dquot   *ddq,
++      xfs_dqid_t              id,
++      uint                    type)   /* used only during quotacheck */
+ {
+       /*
+        * We can encounter an uninitialized dquot buffer for 2 reasons:
+--- a/fs/xfs/libxfs/xfs_format.h
++++ b/fs/xfs/libxfs/xfs_format.h
+@@ -1144,11 +1144,11 @@ static inline void xfs_dinode_put_rdev(s
+ /*
+  * This is the main portion of the on-disk representation of quota
+- * information for a user. This is the q_core of the xfs_dquot_t that
++ * information for a user. This is the q_core of the struct xfs_dquot that
+  * is kept in kernel memory. We pad this with some more expansion room
+  * to construct the on disk structure.
+  */
+-typedef struct        xfs_disk_dquot {
++struct xfs_disk_dquot {
+       __be16          d_magic;        /* dquot magic = XFS_DQUOT_MAGIC */
+       __u8            d_version;      /* dquot version */
+       __u8            d_flags;        /* XFS_DQ_USER/PROJ/GROUP */
+@@ -1171,15 +1171,15 @@ typedef struct xfs_disk_dquot {
+       __be32          d_rtbtimer;     /* similar to above; for RT disk blocks */
+       __be16          d_rtbwarns;     /* warnings issued wrt RT disk blocks */
+       __be16          d_pad;
+-} xfs_disk_dquot_t;
++};
+ /*
+  * This is what goes on disk. This is separated from the xfs_disk_dquot because
+  * carrying the unnecessary padding would be a waste of memory.
+  */
+ typedef struct xfs_dqblk {
+-      xfs_disk_dquot_t  dd_diskdq;    /* portion that lives incore as well */
+-      char              dd_fill[4];   /* filling for posterity */
++      struct xfs_disk_dquot   dd_diskdq; /* portion living incore as well */
++      char                    dd_fill[4];/* filling for posterity */
+       /*
+        * These two are only present on filesystems with the CRC bits set.
+--- a/fs/xfs/libxfs/xfs_trans_resv.c
++++ b/fs/xfs/libxfs/xfs_trans_resv.c
+@@ -776,7 +776,7 @@ xfs_calc_clear_agi_bucket_reservation(
+ /*
+  * Adjusting quota limits.
+- *    the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
++ *    the disk quota buffer: sizeof(struct xfs_disk_dquot)
+  */
+ STATIC uint
+ xfs_calc_qm_setqlim_reservation(void)
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -48,7 +48,7 @@ static struct lock_class_key xfs_dquot_p
+  */
+ void
+ xfs_qm_dqdestroy(
+-      xfs_dquot_t     *dqp)
++      struct xfs_dquot        *dqp)
+ {
+       ASSERT(list_empty(&dqp->q_lru));
+@@ -113,8 +113,8 @@ xfs_qm_adjust_dqlimits(
+  */
+ void
+ xfs_qm_adjust_dqtimers(
+-      xfs_mount_t             *mp,
+-      xfs_disk_dquot_t        *d)
++      struct xfs_mount        *mp,
++      struct xfs_disk_dquot   *d)
+ {
+       ASSERT(d->d_id);
+@@ -497,7 +497,7 @@ xfs_dquot_from_disk(
+       struct xfs_disk_dquot   *ddqp = bp->b_addr + dqp->q_bufoffset;
+       /* copy everything from disk dquot to the incore dquot */
+-      memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
++      memcpy(&dqp->q_core, ddqp, sizeof(struct xfs_disk_dquot));
+       /*
+        * Reservation counters are defined as reservation plus current usage
+@@ -989,7 +989,7 @@ xfs_qm_dqput(
+  */
+ void
+ xfs_qm_dqrele(
+-      xfs_dquot_t     *dqp)
++      struct xfs_dquot        *dqp)
+ {
+       if (!dqp)
+               return;
+@@ -1019,7 +1019,7 @@ xfs_qm_dqflush_done(
+       struct xfs_log_item     *lip)
+ {
+       xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
+-      xfs_dquot_t             *dqp = qip->qli_dquot;
++      struct xfs_dquot        *dqp = qip->qli_dquot;
+       struct xfs_ail          *ailp = lip->li_ailp;
+       /*
+@@ -1129,7 +1129,7 @@ xfs_qm_dqflush(
+       }
+       /* This is the only portion of data that needs to persist */
+-      memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
++      memcpy(ddqp, &dqp->q_core, sizeof(struct xfs_disk_dquot));
+       /*
+        * Clear the dirty field and remember the flush lsn for later use.
+@@ -1187,8 +1187,8 @@ out_unlock:
+  */
+ void
+ xfs_dqlock2(
+-      xfs_dquot_t     *d1,
+-      xfs_dquot_t     *d2)
++      struct xfs_dquot        *d1,
++      struct xfs_dquot        *d2)
+ {
+       if (d1 && d2) {
+               ASSERT(d1 != d2);
+--- a/fs/xfs/xfs_dquot.h
++++ b/fs/xfs/xfs_dquot.h
+@@ -30,33 +30,36 @@ enum {
+ /*
+  * The incore dquot structure
+  */
+-typedef struct xfs_dquot {
+-      uint             dq_flags;      /* various flags (XFS_DQ_*) */
+-      struct list_head q_lru;         /* global free list of dquots */
+-      struct xfs_mount*q_mount;       /* filesystem this relates to */
+-      uint             q_nrefs;       /* # active refs from inodes */
+-      xfs_daddr_t      q_blkno;       /* blkno of dquot buffer */
+-      int              q_bufoffset;   /* off of dq in buffer (# dquots) */
+-      xfs_fileoff_t    q_fileoffset;  /* offset in quotas file */
+-
+-      xfs_disk_dquot_t q_core;        /* actual usage & quotas */
+-      xfs_dq_logitem_t q_logitem;     /* dquot log item */
+-      xfs_qcnt_t       q_res_bcount;  /* total regular nblks used+reserved */
+-      xfs_qcnt_t       q_res_icount;  /* total inos allocd+reserved */
+-      xfs_qcnt_t       q_res_rtbcount;/* total realtime blks used+reserved */
+-      xfs_qcnt_t       q_prealloc_lo_wmark;/* prealloc throttle wmark */
+-      xfs_qcnt_t       q_prealloc_hi_wmark;/* prealloc disabled wmark */
+-      int64_t          q_low_space[XFS_QLOWSP_MAX];
+-      struct mutex     q_qlock;       /* quota lock */
+-      struct completion q_flush;      /* flush completion queue */
+-      atomic_t          q_pincount;   /* dquot pin count */
+-      wait_queue_head_t q_pinwait;    /* dquot pinning wait queue */
+-} xfs_dquot_t;
++struct xfs_dquot {
++      uint                    dq_flags;
++      struct list_head        q_lru;
++      struct xfs_mount        *q_mount;
++      uint                    q_nrefs;
++      xfs_daddr_t             q_blkno;
++      int                     q_bufoffset;
++      xfs_fileoff_t           q_fileoffset;
++
++      struct xfs_disk_dquot   q_core;
++      xfs_dq_logitem_t        q_logitem;
++      /* total regular nblks used+reserved */
++      xfs_qcnt_t              q_res_bcount;
++      /* total inos allocd+reserved */
++      xfs_qcnt_t              q_res_icount;
++      /* total realtime blks used+reserved */
++      xfs_qcnt_t              q_res_rtbcount;
++      xfs_qcnt_t              q_prealloc_lo_wmark;
++      xfs_qcnt_t              q_prealloc_hi_wmark;
++      int64_t                 q_low_space[XFS_QLOWSP_MAX];
++      struct mutex            q_qlock;
++      struct completion       q_flush;
++      atomic_t                q_pincount;
++      struct wait_queue_head  q_pinwait;
++};
+ /*
+  * Lock hierarchy for q_qlock:
+  *    XFS_QLOCK_NORMAL is the implicit default,
+- *    XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
++ *    XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
+  */
+ enum {
+       XFS_QLOCK_NORMAL = 0,
+@@ -64,21 +67,21 @@ enum {
+ };
+ /*
+- * Manage the q_flush completion queue embedded in the dquot.  This completion
++ * Manage the q_flush completion queue embedded in the dquot. This completion
+  * queue synchronizes processes attempting to flush the in-core dquot back to
+  * disk.
+  */
+-static inline void xfs_dqflock(xfs_dquot_t *dqp)
++static inline void xfs_dqflock(struct xfs_dquot *dqp)
+ {
+       wait_for_completion(&dqp->q_flush);
+ }
+-static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp)
++static inline bool xfs_dqflock_nowait(struct xfs_dquot *dqp)
+ {
+       return try_wait_for_completion(&dqp->q_flush);
+ }
+-static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
++static inline void xfs_dqfunlock(struct xfs_dquot *dqp)
+ {
+       complete(&dqp->q_flush);
+ }
+@@ -112,7 +115,7 @@ static inline int xfs_this_quota_on(stru
+       }
+ }
+-static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
++static inline struct xfs_dquot *xfs_inode_dquot(struct xfs_inode *ip, int type)
+ {
+       switch (type & XFS_DQ_ALLTYPES) {
+       case XFS_DQ_USER:
+@@ -147,31 +150,30 @@ static inline bool xfs_dquot_lowsp(struc
+ #define XFS_QM_ISPDQ(dqp)     ((dqp)->dq_flags & XFS_DQ_PROJ)
+ #define XFS_QM_ISGDQ(dqp)     ((dqp)->dq_flags & XFS_DQ_GROUP)
+-extern void           xfs_qm_dqdestroy(xfs_dquot_t *);
+-extern int            xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
+-extern void           xfs_qm_dqunpin_wait(xfs_dquot_t *);
+-extern void           xfs_qm_adjust_dqtimers(xfs_mount_t *,
+-                                      xfs_disk_dquot_t *);
+-extern void           xfs_qm_adjust_dqlimits(struct xfs_mount *,
+-                                             struct xfs_dquot *);
+-extern xfs_dqid_t     xfs_qm_id_for_quotatype(struct xfs_inode *ip,
+-                                      uint type);
+-extern int            xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id,
++void          xfs_qm_dqdestroy(struct xfs_dquot *dqp);
++int           xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp);
++void          xfs_qm_dqunpin_wait(struct xfs_dquot *dqp);
++void          xfs_qm_adjust_dqtimers(struct xfs_mount *mp,
++                                              struct xfs_disk_dquot *d);
++void          xfs_qm_adjust_dqlimits(struct xfs_mount *mp,
++                                              struct xfs_dquot *d);
++xfs_dqid_t    xfs_qm_id_for_quotatype(struct xfs_inode *ip, uint type);
++int           xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id,
+                                       uint type, bool can_alloc,
+                                       struct xfs_dquot **dqpp);
+-extern int            xfs_qm_dqget_inode(struct xfs_inode *ip, uint type,
+-                                      bool can_alloc,
+-                                      struct xfs_dquot **dqpp);
+-extern int            xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id,
++int           xfs_qm_dqget_inode(struct xfs_inode *ip, uint type,
++                                              bool can_alloc,
++                                              struct xfs_dquot **dqpp);
++int           xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id,
+                                       uint type, struct xfs_dquot **dqpp);
+-extern int            xfs_qm_dqget_uncached(struct xfs_mount *mp,
+-                                      xfs_dqid_t id, uint type,
+-                                      struct xfs_dquot **dqpp);
+-extern void           xfs_qm_dqput(xfs_dquot_t *);
++int           xfs_qm_dqget_uncached(struct xfs_mount *mp,
++                                              xfs_dqid_t id, uint type,
++                                              struct xfs_dquot **dqpp);
++void          xfs_qm_dqput(struct xfs_dquot *dqp);
+-extern void           xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
++void          xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
+-extern void           xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
++void          xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
+ static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
+ {
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2577,6 +2577,7 @@ xlog_recover_do_reg_buffer(
+       int                     bit;
+       int                     nbits;
+       xfs_failaddr_t          fa;
++      const size_t            size_disk_dquot = sizeof(struct xfs_disk_dquot);
+       trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+@@ -2619,7 +2620,7 @@ xlog_recover_do_reg_buffer(
+                                       "XFS: NULL dquot in %s.", __func__);
+                               goto next;
+                       }
+-                      if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
++                      if (item->ri_buf[i].i_len < size_disk_dquot) {
+                               xfs_alert(mp,
+                                       "XFS: dquot too small (%d) in %s.",
+                                       item->ri_buf[i].i_len, __func__);
+@@ -3250,7 +3251,7 @@ xlog_recover_dquot_pass2(
+               xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
+               return -EFSCORRUPTED;
+       }
+-      if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
++      if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
+               xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
+                       item->ri_buf[1].i_len, __func__);
+               return -EFSCORRUPTED;
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -244,14 +244,14 @@ xfs_qm_unmount_quotas(
+ STATIC int
+ xfs_qm_dqattach_one(
+-      xfs_inode_t     *ip,
+-      xfs_dqid_t      id,
+-      uint            type,
+-      bool            doalloc,
+-      xfs_dquot_t     **IO_idqpp)
++      struct xfs_inode        *ip,
++      xfs_dqid_t              id,
++      uint                    type,
++      bool                    doalloc,
++      struct xfs_dquot        **IO_idqpp)
+ {
+-      xfs_dquot_t     *dqp;
+-      int             error;
++      struct xfs_dquot        *dqp;
++      int                     error;
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       error = 0;
+@@ -544,8 +544,8 @@ xfs_qm_set_defquota(
+       uint            type,
+       xfs_quotainfo_t *qinf)
+ {
+-      xfs_dquot_t             *dqp;
+-      struct xfs_def_quota    *defq;
++      struct xfs_dquot        *dqp;
++      struct xfs_def_quota    *defq;
+       struct xfs_disk_dquot   *ddqp;
+       int                     error;
+@@ -1746,14 +1746,14 @@ error_rele:
+  * Actually transfer ownership, and do dquot modifications.
+  * These were already reserved.
+  */
+-xfs_dquot_t *
++struct xfs_dquot *
+ xfs_qm_vop_chown(
+-      xfs_trans_t     *tp,
+-      xfs_inode_t     *ip,
+-      xfs_dquot_t     **IO_olddq,
+-      xfs_dquot_t     *newdq)
++      struct xfs_trans        *tp,
++      struct xfs_inode        *ip,
++      struct xfs_dquot        **IO_olddq,
++      struct xfs_dquot        *newdq)
+ {
+-      xfs_dquot_t     *prevdq;
++      struct xfs_dquot        *prevdq;
+       uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
+                                XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+--- a/fs/xfs/xfs_qm_bhv.c
++++ b/fs/xfs/xfs_qm_bhv.c
+@@ -54,11 +54,11 @@ xfs_fill_statvfs_from_dquot(
+  */
+ void
+ xfs_qm_statvfs(
+-      xfs_inode_t             *ip,
++      struct xfs_inode        *ip,
+       struct kstatfs          *statp)
+ {
+-      xfs_mount_t             *mp = ip->i_mount;
+-      xfs_dquot_t             *dqp;
++      struct xfs_mount        *mp = ip->i_mount;
++      struct xfs_dquot        *dqp;
+       if (!xfs_qm_dqget(mp, xfs_get_projid(ip), XFS_DQ_PROJ, false, &dqp)) {
+               xfs_fill_statvfs_from_dquot(statp, dqp);
+--- a/fs/xfs/xfs_trans_dquot.c
++++ b/fs/xfs/xfs_trans_dquot.c
+@@ -25,8 +25,8 @@ STATIC void  xfs_trans_alloc_dqinfo(xfs_t
+  */
+ void
+ xfs_trans_dqjoin(
+-      xfs_trans_t     *tp,
+-      xfs_dquot_t     *dqp)
++      struct xfs_trans        *tp,
++      struct xfs_dquot        *dqp)
+ {
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       ASSERT(dqp->q_logitem.qli_dquot == dqp);
+@@ -49,8 +49,8 @@ xfs_trans_dqjoin(
+  */
+ void
+ xfs_trans_log_dquot(
+-      xfs_trans_t     *tp,
+-      xfs_dquot_t     *dqp)
++      struct xfs_trans        *tp,
++      struct xfs_dquot        *dqp)
+ {
+       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+@@ -486,12 +486,12 @@ xfs_trans_apply_dquot_deltas(
+  */
+ void
+ xfs_trans_unreserve_and_mod_dquots(
+-      xfs_trans_t             *tp)
++      struct xfs_trans        *tp)
+ {
+       int                     i, j;
+-      xfs_dquot_t             *dqp;
++      struct xfs_dquot        *dqp;
+       struct xfs_dqtrx        *qtrx, *qa;
+-      bool                    locked;
++      bool                    locked;
+       if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+               return;
+@@ -571,21 +571,21 @@ xfs_quota_warn(
+  */
+ STATIC int
+ xfs_trans_dqresv(
+-      xfs_trans_t     *tp,
+-      xfs_mount_t     *mp,
+-      xfs_dquot_t     *dqp,
+-      int64_t         nblks,
+-      long            ninos,
+-      uint            flags)
+-{
+-      xfs_qcnt_t      hardlimit;
+-      xfs_qcnt_t      softlimit;
+-      time_t          timer;
+-      xfs_qwarncnt_t  warns;
+-      xfs_qwarncnt_t  warnlimit;
+-      xfs_qcnt_t      total_count;
+-      xfs_qcnt_t      *resbcountp;
+-      xfs_quotainfo_t *q = mp->m_quotainfo;
++      struct xfs_trans        *tp,
++      struct xfs_mount        *mp,
++      struct xfs_dquot        *dqp,
++      int64_t                 nblks,
++      long                    ninos,
++      uint                    flags)
++{
++      xfs_qcnt_t              hardlimit;
++      xfs_qcnt_t              softlimit;
++      time_t                  timer;
++      xfs_qwarncnt_t          warns;
++      xfs_qwarncnt_t          warnlimit;
++      xfs_qcnt_t              total_count;
++      xfs_qcnt_t              *resbcountp;
++      xfs_quotainfo_t         *q = mp->m_quotainfo;
+       struct xfs_def_quota    *defq;
diff --git a/queue-5.4/xfs-remove-the-xfs_dq_logitem_t-typedef.patch b/queue-5.4/xfs-remove-the-xfs_dq_logitem_t-typedef.patch
new file mode 100644 (file)
index 0000000..bd86591
--- /dev/null
@@ -0,0 +1,65 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:28 +0530
+Subject: xfs: remove the xfs_dq_logitem_t typedef
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-12-chandan.babu@oracle.com>
+
+From: Pavel Reichl <preichl@redhat.com>
+
+commit fd8b81dbbb23d4a3508cfac83256b4f5e770941c upstream.
+
+Signed-off-by: Pavel Reichl <preichl@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot.c      |    2 +-
+ fs/xfs/xfs_dquot.h      |    2 +-
+ fs/xfs/xfs_dquot_item.h |   10 +++++-----
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -1018,7 +1018,7 @@ xfs_qm_dqflush_done(
+       struct xfs_buf          *bp,
+       struct xfs_log_item     *lip)
+ {
+-      xfs_dq_logitem_t        *qip = (struct xfs_dq_logitem *)lip;
++      struct xfs_dq_logitem   *qip = (struct xfs_dq_logitem *)lip;
+       struct xfs_dquot        *dqp = qip->qli_dquot;
+       struct xfs_ail          *ailp = lip->li_ailp;
+--- a/fs/xfs/xfs_dquot.h
++++ b/fs/xfs/xfs_dquot.h
+@@ -40,7 +40,7 @@ struct xfs_dquot {
+       xfs_fileoff_t           q_fileoffset;
+       struct xfs_disk_dquot   q_core;
+-      xfs_dq_logitem_t        q_logitem;
++      struct xfs_dq_logitem   q_logitem;
+       /* total regular nblks used+reserved */
+       xfs_qcnt_t              q_res_bcount;
+       /* total inos allocd+reserved */
+--- a/fs/xfs/xfs_dquot_item.h
++++ b/fs/xfs/xfs_dquot_item.h
+@@ -11,11 +11,11 @@ struct xfs_trans;
+ struct xfs_mount;
+ struct xfs_qoff_logitem;
+-typedef struct xfs_dq_logitem {
+-      struct xfs_log_item      qli_item;         /* common portion */
+-      struct xfs_dquot        *qli_dquot;        /* dquot ptr */
+-      xfs_lsn_t                qli_flush_lsn;    /* lsn at last flush */
+-} xfs_dq_logitem_t;
++struct xfs_dq_logitem {
++      struct xfs_log_item      qli_item;      /* common portion */
++      struct xfs_dquot        *qli_dquot;     /* dquot ptr */
++      xfs_lsn_t                qli_flush_lsn; /* lsn at last flush */
++};
+ typedef struct xfs_qoff_logitem {
+       struct xfs_log_item      qql_item;      /* common portion */
diff --git a/queue-5.4/xfs-remove-the-xfs_qoff_logitem_t-typedef.patch b/queue-5.4/xfs-remove-the-xfs_qoff_logitem_t-typedef.patch
new file mode 100644 (file)
index 0000000..e506edb
--- /dev/null
@@ -0,0 +1,183 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:29 +0530
+Subject: xfs: remove the xfs_qoff_logitem_t typedef
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-13-chandan.babu@oracle.com>
+
+From: Pavel Reichl <preichl@redhat.com>
+
+commit d0bdfb106907e4a3ef4f25f6d27e392abf41f3a0 upstream.
+
+Signed-off-by: Pavel Reichl <preichl@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+[darrick: fix a comment]
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_trans_resv.c |    4 ++--
+ fs/xfs/xfs_dquot_item.h        |   28 +++++++++++++++-------------
+ fs/xfs/xfs_qm_syscalls.c       |   29 ++++++++++++++++-------------
+ fs/xfs/xfs_trans_dquot.c       |   12 ++++++------
+ 4 files changed, 39 insertions(+), 34 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_trans_resv.c
++++ b/fs/xfs/libxfs/xfs_trans_resv.c
+@@ -800,7 +800,7 @@ xfs_calc_qm_dqalloc_reservation(
+ /*
+  * Turning off quotas.
+- *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
++ *    the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2
+  *    the superblock for the quota flags: sector size
+  */
+ STATIC uint
+@@ -813,7 +813,7 @@ xfs_calc_qm_quotaoff_reservation(
+ /*
+  * End of turning off quotas.
+- *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
++ *    the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2
+  */
+ STATIC uint
+ xfs_calc_qm_quotaoff_end_reservation(void)
+--- a/fs/xfs/xfs_dquot_item.h
++++ b/fs/xfs/xfs_dquot_item.h
+@@ -12,24 +12,26 @@ struct xfs_mount;
+ struct xfs_qoff_logitem;
+ struct xfs_dq_logitem {
+-      struct xfs_log_item      qli_item;      /* common portion */
++      struct xfs_log_item     qli_item;       /* common portion */
+       struct xfs_dquot        *qli_dquot;     /* dquot ptr */
+-      xfs_lsn_t                qli_flush_lsn; /* lsn at last flush */
++      xfs_lsn_t               qli_flush_lsn;  /* lsn at last flush */
+ };
+-typedef struct xfs_qoff_logitem {
+-      struct xfs_log_item      qql_item;      /* common portion */
+-      struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
++struct xfs_qoff_logitem {
++      struct xfs_log_item     qql_item;       /* common portion */
++      struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+       unsigned int            qql_flags;
+-} xfs_qoff_logitem_t;
++};
+-extern void              xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
+-                                      struct xfs_qoff_logitem *, uint);
+-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
+-                                      struct xfs_qoff_logitem *, uint);
+-extern void              xfs_trans_log_quotaoff_item(struct xfs_trans *,
+-                                      struct xfs_qoff_logitem *);
++void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp);
++struct xfs_qoff_logitem       *xfs_qm_qoff_logitem_init(struct xfs_mount *mp,
++              struct xfs_qoff_logitem *start,
++              uint flags);
++struct xfs_qoff_logitem       *xfs_trans_get_qoff_item(struct xfs_trans *tp,
++              struct xfs_qoff_logitem *startqoff,
++              uint flags);
++void xfs_trans_log_quotaoff_item(struct xfs_trans *tp,
++              struct xfs_qoff_logitem *qlp);
+ #endif        /* __XFS_DQUOT_ITEM_H__ */
+--- a/fs/xfs/xfs_qm_syscalls.c
++++ b/fs/xfs/xfs_qm_syscalls.c
+@@ -19,9 +19,12 @@
+ #include "xfs_qm.h"
+ #include "xfs_icache.h"
+-STATIC int    xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
+-STATIC int    xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+-                                      uint);
++STATIC int xfs_qm_log_quotaoff(struct xfs_mount *mp,
++                                      struct xfs_qoff_logitem **qoffstartp,
++                                      uint flags);
++STATIC int xfs_qm_log_quotaoff_end(struct xfs_mount *mp,
++                                      struct xfs_qoff_logitem *startqoff,
++                                      uint flags);
+ /*
+  * Turn off quota accounting and/or enforcement for all udquots and/or
+@@ -40,7 +43,7 @@ xfs_qm_scall_quotaoff(
+       uint                    dqtype;
+       int                     error;
+       uint                    inactivate_flags;
+-      xfs_qoff_logitem_t      *qoffstart;
++      struct xfs_qoff_logitem *qoffstart;
+       /*
+        * No file system can have quotas enabled on disk but not in core.
+@@ -540,13 +543,13 @@ out_unlock:
+ STATIC int
+ xfs_qm_log_quotaoff_end(
+-      xfs_mount_t             *mp,
+-      xfs_qoff_logitem_t      *startqoff,
++      struct xfs_mount        *mp,
++      struct xfs_qoff_logitem *startqoff,
+       uint                    flags)
+ {
+-      xfs_trans_t             *tp;
++      struct xfs_trans        *tp;
+       int                     error;
+-      xfs_qoff_logitem_t      *qoffi;
++      struct xfs_qoff_logitem *qoffi;
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
+       if (error)
+@@ -568,13 +571,13 @@ xfs_qm_log_quotaoff_end(
+ STATIC int
+ xfs_qm_log_quotaoff(
+-      xfs_mount_t            *mp,
+-      xfs_qoff_logitem_t     **qoffstartp,
+-      uint                   flags)
++      struct xfs_mount        *mp,
++      struct xfs_qoff_logitem **qoffstartp,
++      uint                    flags)
+ {
+-      xfs_trans_t            *tp;
++      struct xfs_trans        *tp;
+       int                     error;
+-      xfs_qoff_logitem_t     *qoffi;
++      struct xfs_qoff_logitem *qoffi;
+       *qoffstartp = NULL;
+--- a/fs/xfs/xfs_trans_dquot.c
++++ b/fs/xfs/xfs_trans_dquot.c
+@@ -824,13 +824,13 @@ xfs_trans_reserve_quota_nblks(
+ /*
+  * This routine is called to allocate a quotaoff log item.
+  */
+-xfs_qoff_logitem_t *
++struct xfs_qoff_logitem *
+ xfs_trans_get_qoff_item(
+-      xfs_trans_t             *tp,
+-      xfs_qoff_logitem_t      *startqoff,
++      struct xfs_trans        *tp,
++      struct xfs_qoff_logitem *startqoff,
+       uint                    flags)
+ {
+-      xfs_qoff_logitem_t      *q;
++      struct xfs_qoff_logitem *q;
+       ASSERT(tp != NULL);
+@@ -852,8 +852,8 @@ xfs_trans_get_qoff_item(
+  */
+ void
+ xfs_trans_log_quotaoff_item(
+-      xfs_trans_t             *tp,
+-      xfs_qoff_logitem_t      *qlp)
++      struct xfs_trans        *tp,
++      struct xfs_qoff_logitem *qlp)
+ {
+       tp->t_flags |= XFS_TRANS_DIRTY;
+       set_bit(XFS_LI_DIRTY, &qlp->qql_item.li_flags);
diff --git a/queue-5.4/xfs-replace-function-declaration-by-actual-definition.patch b/queue-5.4/xfs-replace-function-declaration-by-actual-definition.patch
new file mode 100644 (file)
index 0000000..794eee6
--- /dev/null
@@ -0,0 +1,179 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:30 +0530
+Subject: xfs: Replace function declaration by actual definition
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-14-chandan.babu@oracle.com>
+
+From: Pavel Reichl <preichl@redhat.com>
+
+commit 1cc95e6f0d7cfd61c9d3c5cdd4e7345b173f764f upstream.
+
+Signed-off-by: Pavel Reichl <preichl@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+[darrick: fix typo in subject line]
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_qm_syscalls.c |  140 ++++++++++++++++++++++-------------------------
+ 1 file changed, 66 insertions(+), 74 deletions(-)
+
+--- a/fs/xfs/xfs_qm_syscalls.c
++++ b/fs/xfs/xfs_qm_syscalls.c
+@@ -19,12 +19,72 @@
+ #include "xfs_qm.h"
+ #include "xfs_icache.h"
+-STATIC int xfs_qm_log_quotaoff(struct xfs_mount *mp,
+-                                      struct xfs_qoff_logitem **qoffstartp,
+-                                      uint flags);
+-STATIC int xfs_qm_log_quotaoff_end(struct xfs_mount *mp,
+-                                      struct xfs_qoff_logitem *startqoff,
+-                                      uint flags);
++STATIC int
++xfs_qm_log_quotaoff(
++      struct xfs_mount        *mp,
++      struct xfs_qoff_logitem **qoffstartp,
++      uint                    flags)
++{
++      struct xfs_trans        *tp;
++      int                     error;
++      struct xfs_qoff_logitem *qoffi;
++
++      *qoffstartp = NULL;
++
++      error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
++      if (error)
++              goto out;
++
++      qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
++      xfs_trans_log_quotaoff_item(tp, qoffi);
++
++      spin_lock(&mp->m_sb_lock);
++      mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
++      spin_unlock(&mp->m_sb_lock);
++
++      xfs_log_sb(tp);
++
++      /*
++       * We have to make sure that the transaction is secure on disk before we
++       * return and actually stop quota accounting. So, make it synchronous.
++       * We don't care about quotoff's performance.
++       */
++      xfs_trans_set_sync(tp);
++      error = xfs_trans_commit(tp);
++      if (error)
++              goto out;
++
++      *qoffstartp = qoffi;
++out:
++      return error;
++}
++
++STATIC int
++xfs_qm_log_quotaoff_end(
++      struct xfs_mount        *mp,
++      struct xfs_qoff_logitem *startqoff,
++      uint                    flags)
++{
++      struct xfs_trans        *tp;
++      int                     error;
++      struct xfs_qoff_logitem *qoffi;
++
++      error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
++      if (error)
++              return error;
++
++      qoffi = xfs_trans_get_qoff_item(tp, startqoff,
++                                      flags & XFS_ALL_QUOTA_ACCT);
++      xfs_trans_log_quotaoff_item(tp, qoffi);
++
++      /*
++       * We have to make sure that the transaction is secure on disk before we
++       * return and actually stop quota accounting. So, make it synchronous.
++       * We don't care about quotoff's performance.
++       */
++      xfs_trans_set_sync(tp);
++      return xfs_trans_commit(tp);
++}
+ /*
+  * Turn off quota accounting and/or enforcement for all udquots and/or
+@@ -541,74 +601,6 @@ out_unlock:
+       return error;
+ }
+-STATIC int
+-xfs_qm_log_quotaoff_end(
+-      struct xfs_mount        *mp,
+-      struct xfs_qoff_logitem *startqoff,
+-      uint                    flags)
+-{
+-      struct xfs_trans        *tp;
+-      int                     error;
+-      struct xfs_qoff_logitem *qoffi;
+-
+-      error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
+-      if (error)
+-              return error;
+-
+-      qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+-                                      flags & XFS_ALL_QUOTA_ACCT);
+-      xfs_trans_log_quotaoff_item(tp, qoffi);
+-
+-      /*
+-       * We have to make sure that the transaction is secure on disk before we
+-       * return and actually stop quota accounting. So, make it synchronous.
+-       * We don't care about quotoff's performance.
+-       */
+-      xfs_trans_set_sync(tp);
+-      return xfs_trans_commit(tp);
+-}
+-
+-
+-STATIC int
+-xfs_qm_log_quotaoff(
+-      struct xfs_mount        *mp,
+-      struct xfs_qoff_logitem **qoffstartp,
+-      uint                    flags)
+-{
+-      struct xfs_trans        *tp;
+-      int                     error;
+-      struct xfs_qoff_logitem *qoffi;
+-
+-      *qoffstartp = NULL;
+-
+-      error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
+-      if (error)
+-              goto out;
+-
+-      qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+-      xfs_trans_log_quotaoff_item(tp, qoffi);
+-
+-      spin_lock(&mp->m_sb_lock);
+-      mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+-      spin_unlock(&mp->m_sb_lock);
+-
+-      xfs_log_sb(tp);
+-
+-      /*
+-       * We have to make sure that the transaction is secure on disk before we
+-       * return and actually stop quota accounting. So, make it synchronous.
+-       * We don't care about quotoff's performance.
+-       */
+-      xfs_trans_set_sync(tp);
+-      error = xfs_trans_commit(tp);
+-      if (error)
+-              goto out;
+-
+-      *qoffstartp = qoffi;
+-out:
+-      return error;
+-}
+-
+ /* Fill out the quota context. */
+ static void
+ xfs_qm_scall_getquota_fill_qc(
diff --git a/queue-5.4/xfs-rework-collapse-range-into-an-atomic-operation.patch b/queue-5.4/xfs-rework-collapse-range-into-an-atomic-operation.patch
new file mode 100644 (file)
index 0000000..0404a44
--- /dev/null
@@ -0,0 +1,98 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:20 +0530
+Subject: xfs: rework collapse range into an atomic operation
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-4-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 211683b21de959a647de74faedfdd8a5d189327e upstream.
+
+The collapse range operation uses a unique transaction and ilock
+cycle for the hole punch and each extent shift iteration of the
+overall operation. While the hole punch is safe as a separate
+operation due to the iolock, cycling the ilock after each extent
+shift is risky w.r.t. concurrent operations, similar to insert range.
+
+To avoid this problem, make collapse range atomic with respect to
+ilock. Hold the ilock across the entire operation, replace the
+individual transactions with a single rolling transaction sequence
+and finish dfops on each iteration to perform pending frees and roll
+the transaction. Remove the unnecessary quota reservation as
+collapse range can only ever merge extents (and thus remove extent
+records and potentially free bmap blocks). The dfops call
+automatically relogs the inode to keep it moving in the log. This
+guarantees that nothing else can change the extent mapping of an
+inode while a collapse range operation is in progress.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_bmap_util.c |   29 +++++++++++++++--------------
+ 1 file changed, 15 insertions(+), 14 deletions(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1237,7 +1237,6 @@ xfs_collapse_file_space(
+       int                     error;
+       xfs_fileoff_t           next_fsb = XFS_B_TO_FSB(mp, offset + len);
+       xfs_fileoff_t           shift_fsb = XFS_B_TO_FSB(mp, len);
+-      uint                    resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+       bool                    done = false;
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+@@ -1253,32 +1252,34 @@ xfs_collapse_file_space(
+       if (error)
+               return error;
+-      while (!error && !done) {
+-              error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+-                                      &tp);
+-              if (error)
+-                      break;
++      error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
++      if (error)
++              return error;
+-              xfs_ilock(ip, XFS_ILOCK_EXCL);
+-              error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+-                              ip->i_gdquot, ip->i_pdquot, resblks, 0,
+-                              XFS_QMOPT_RES_REGBLKS);
+-              if (error)
+-                      goto out_trans_cancel;
+-              xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
++      xfs_ilock(ip, XFS_ILOCK_EXCL);
++      xfs_trans_ijoin(tp, ip, 0);
++      while (!done) {
+               error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
+                               &done);
+               if (error)
+                       goto out_trans_cancel;
++              if (done)
++                      break;
+-              error = xfs_trans_commit(tp);
++              /* finish any deferred frees and roll the transaction */
++              error = xfs_defer_finish(&tp);
++              if (error)
++                      goto out_trans_cancel;
+       }
++      error = xfs_trans_commit(tp);
++      xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+ out_trans_cancel:
+       xfs_trans_cancel(tp);
++      xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+ }
diff --git a/queue-5.4/xfs-rework-insert-range-into-an-atomic-operation.patch b/queue-5.4/xfs-rework-insert-range-into-an-atomic-operation.patch
new file mode 100644 (file)
index 0000000..d60dd75
--- /dev/null
@@ -0,0 +1,100 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:19 +0530
+Subject: xfs: rework insert range into an atomic operation
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-3-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit dd87f87d87fa4359a54e7b44549742f579e3e805 upstream.
+
+The insert range operation uses a unique transaction and ilock cycle
+for the extent split and each extent shift iteration of the overall
+operation. While this works, it is risks racing with other
+operations in subtle ways such as COW writeback modifying an extent
+tree in the middle of a shift operation.
+
+To avoid this problem, make insert range atomic with respect to
+ilock. Hold the ilock across the entire operation, replace the
+individual transactions with a single rolling transaction sequence
+and relog the inode to keep it moving in the log. This guarantees
+that nothing else can change the extent mapping of an inode while
+an insert range operation is in progress.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_bmap_util.c |   32 +++++++++++++-------------------
+ 1 file changed, 13 insertions(+), 19 deletions(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1321,47 +1321,41 @@ xfs_insert_file_space(
+       if (error)
+               return error;
+-      /*
+-       * The extent shifting code works on extent granularity. So, if stop_fsb
+-       * is not the starting block of extent, we need to split the extent at
+-       * stop_fsb.
+-       */
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+                       XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+       if (error)
+               return error;
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+-      xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
++      xfs_trans_ijoin(tp, ip, 0);
++      /*
++       * The extent shifting code works on extent granularity. So, if stop_fsb
++       * is not the starting block of extent, we need to split the extent at
++       * stop_fsb.
++       */
+       error = xfs_bmap_split_extent(tp, ip, stop_fsb);
+       if (error)
+               goto out_trans_cancel;
+-      error = xfs_trans_commit(tp);
+-      if (error)
+-              return error;
+-
+-      while (!error && !done) {
+-              error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0,
+-                                      &tp);
++      do {
++              error = xfs_trans_roll_inode(&tp, ip);
+               if (error)
+-                      break;
++                      goto out_trans_cancel;
+-              xfs_ilock(ip, XFS_ILOCK_EXCL);
+-              xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+               error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
+                               &done, stop_fsb);
+               if (error)
+                       goto out_trans_cancel;
++      } while (!done);
+-              error = xfs_trans_commit(tp);
+-      }
+-
++      error = xfs_trans_commit(tp);
++      xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+ out_trans_cancel:
+       xfs_trans_cancel(tp);
++      xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       return error;
+ }
diff --git a/queue-5.4/xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch b/queue-5.4/xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch
new file mode 100644 (file)
index 0000000..81f18a4
--- /dev/null
@@ -0,0 +1,220 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:37 +0530
+Subject: xfs: tail updates only need to occur when LSN changes
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-21-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 8eb807bd839938b45bf7a97f0568d2a845ba6929 upstream.
+
+We currently wake anything waiting on the log tail to move whenever
+the log item at the tail of the log is removed. Historically this
+was fine behaviour because there were very few items at any given
+LSN. But with delayed logging, there may be thousands of items at
+any given LSN, and we can't move the tail until they are all gone.
+
+Hence if we are removing them in near tail-first order, we might be
+waking up processes waiting on the tail LSN to change (e.g. log
+space waiters) repeatedly without them being able to make progress.
+This also occurs with the new sync push waiters, and can result in
+thousands of spurious wakeups every second when under heavy direct
+reclaim pressure.
+
+To fix this, check that the tail LSN has actually changed on the
+AIL before triggering wakeups. This will reduce the number of
+spurious wakeups when doing bulk AIL removal and make this code much
+more efficient.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode_item.c |   18 ++++++++++++----
+ fs/xfs/xfs_trans_ail.c  |   52 +++++++++++++++++++++++++++++++++---------------
+ fs/xfs/xfs_trans_priv.h |    4 +--
+ 3 files changed, 51 insertions(+), 23 deletions(-)
+
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -732,19 +732,27 @@ xfs_iflush_done(
+        * holding the lock before removing the inode from the AIL.
+        */
+       if (need_ail) {
+-              bool                    mlip_changed = false;
++              xfs_lsn_t       tail_lsn = 0;
+               /* this is an opencoded batch version of xfs_trans_ail_delete */
+               spin_lock(&ailp->ail_lock);
+               list_for_each_entry(blip, &tmp, li_bio_list) {
+                       if (INODE_ITEM(blip)->ili_logged &&
+-                          blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
+-                              mlip_changed |= xfs_ail_delete_one(ailp, blip);
+-                      else {
++                          blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) {
++                              /*
++                               * xfs_ail_update_finish() only cares about the
++                               * lsn of the first tail item removed, any
++                               * others will be at the same or higher lsn so
++                               * we just ignore them.
++                               */
++                              xfs_lsn_t lsn = xfs_ail_delete_one(ailp, blip);
++                              if (!tail_lsn && lsn)
++                                      tail_lsn = lsn;
++                      } else {
+                               xfs_clear_li_failed(blip);
+                       }
+               }
+-              xfs_ail_update_finish(ailp, mlip_changed);
++              xfs_ail_update_finish(ailp, tail_lsn);
+       }
+       /*
+--- a/fs/xfs/xfs_trans_ail.c
++++ b/fs/xfs/xfs_trans_ail.c
+@@ -108,17 +108,25 @@ xfs_ail_next(
+  * We need the AIL lock in order to get a coherent read of the lsn of the last
+  * item in the AIL.
+  */
++static xfs_lsn_t
++__xfs_ail_min_lsn(
++      struct xfs_ail          *ailp)
++{
++      struct xfs_log_item     *lip = xfs_ail_min(ailp);
++
++      if (lip)
++              return lip->li_lsn;
++      return 0;
++}
++
+ xfs_lsn_t
+ xfs_ail_min_lsn(
+       struct xfs_ail          *ailp)
+ {
+-      xfs_lsn_t               lsn = 0;
+-      struct xfs_log_item     *lip;
++      xfs_lsn_t               lsn;
+       spin_lock(&ailp->ail_lock);
+-      lip = xfs_ail_min(ailp);
+-      if (lip)
+-              lsn = lip->li_lsn;
++      lsn = __xfs_ail_min_lsn(ailp);
+       spin_unlock(&ailp->ail_lock);
+       return lsn;
+@@ -683,11 +691,12 @@ xfs_ail_push_all_sync(
+ void
+ xfs_ail_update_finish(
+       struct xfs_ail          *ailp,
+-      bool                    do_tail_update) __releases(ailp->ail_lock)
++      xfs_lsn_t               old_lsn) __releases(ailp->ail_lock)
+ {
+       struct xfs_mount        *mp = ailp->ail_mount;
+-      if (!do_tail_update) {
++      /* if the tail lsn hasn't changed, don't do updates or wakeups. */
++      if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
+               spin_unlock(&ailp->ail_lock);
+               return;
+       }
+@@ -732,7 +741,7 @@ xfs_trans_ail_update_bulk(
+       xfs_lsn_t               lsn) __releases(ailp->ail_lock)
+ {
+       struct xfs_log_item     *mlip;
+-      int                     mlip_changed = 0;
++      xfs_lsn_t               tail_lsn = 0;
+       int                     i;
+       LIST_HEAD(tmp);
+@@ -747,9 +756,10 @@ xfs_trans_ail_update_bulk(
+                               continue;
+                       trace_xfs_ail_move(lip, lip->li_lsn, lsn);
++                      if (mlip == lip && !tail_lsn)
++                              tail_lsn = lip->li_lsn;
++
+                       xfs_ail_delete(ailp, lip);
+-                      if (mlip == lip)
+-                              mlip_changed = 1;
+               } else {
+                       trace_xfs_ail_insert(lip, 0, lsn);
+               }
+@@ -760,15 +770,23 @@ xfs_trans_ail_update_bulk(
+       if (!list_empty(&tmp))
+               xfs_ail_splice(ailp, cur, &tmp, lsn);
+-      xfs_ail_update_finish(ailp, mlip_changed);
++      xfs_ail_update_finish(ailp, tail_lsn);
+ }
+-bool
++/*
++ * Delete one log item from the AIL.
++ *
++ * If this item was at the tail of the AIL, return the LSN of the log item so
++ * that we can use it to check if the LSN of the tail of the log has moved
++ * when finishing up the AIL delete process in xfs_ail_update_finish().
++ */
++xfs_lsn_t
+ xfs_ail_delete_one(
+       struct xfs_ail          *ailp,
+       struct xfs_log_item     *lip)
+ {
+       struct xfs_log_item     *mlip = xfs_ail_min(ailp);
++      xfs_lsn_t               lsn = lip->li_lsn;
+       trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
+       xfs_ail_delete(ailp, lip);
+@@ -776,7 +794,9 @@ xfs_ail_delete_one(
+       clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
+       lip->li_lsn = 0;
+-      return mlip == lip;
++      if (mlip == lip)
++              return lsn;
++      return 0;
+ }
+ /**
+@@ -807,7 +827,7 @@ xfs_trans_ail_delete(
+       int                     shutdown_type)
+ {
+       struct xfs_mount        *mp = ailp->ail_mount;
+-      bool                    need_update;
++      xfs_lsn_t               tail_lsn;
+       if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
+               spin_unlock(&ailp->ail_lock);
+@@ -820,8 +840,8 @@ xfs_trans_ail_delete(
+               return;
+       }
+-      need_update = xfs_ail_delete_one(ailp, lip);
+-      xfs_ail_update_finish(ailp, need_update);
++      tail_lsn = xfs_ail_delete_one(ailp, lip);
++      xfs_ail_update_finish(ailp, tail_lsn);
+ }
+ int
+--- a/fs/xfs/xfs_trans_priv.h
++++ b/fs/xfs/xfs_trans_priv.h
+@@ -91,8 +91,8 @@ xfs_trans_ail_update(
+       xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
+ }
+-bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
+-void xfs_ail_update_finish(struct xfs_ail *ailp, bool do_tail_update)
++xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
++void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn)
+                       __releases(ailp->ail_lock);
+ void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
+               int shutdown_type);
diff --git a/queue-5.4/xfs-throttle-commits-on-delayed-background-cil-push.patch b/queue-5.4/xfs-throttle-commits-on-delayed-background-cil-push.patch
new file mode 100644 (file)
index 0000000..43b7b91
--- /dev/null
@@ -0,0 +1,189 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:35 +0530
+Subject: xfs: Throttle commits on delayed background CIL push
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-19-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 0e7ab7efe77451cba4cbecb6c9f5ef83cf32b36b upstream.
+
+In certain situations the background CIL push can be indefinitely
+delayed. While we have workarounds from the obvious cases now, it
+doesn't solve the underlying issue. This issue is that there is no
+upper limit on the CIL where we will either force or wait for
+a background push to start, hence allowing the CIL to grow without
+bound until it consumes all log space.
+
+To fix this, add a new wait queue to the CIL which allows background
+pushes to wait for the CIL context to be switched out. This happens
+when the push starts, so it will allow us to block incoming
+transaction commit completion until the push has started. This will
+only affect processes that are running modifications, and only when
+the CIL threshold has been significantly overrun.
+
+This has no apparent impact on performance, and doesn't even trigger
+until over 45 million inodes had been created in a 16-way fsmark
+test on a 2GB log. That was limiting at 64MB of log space used, so
+the active CIL size is only about 3% of the total log in that case.
+The concurrent removal of those files did not trigger the background
+sleep at all.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_cil.c  |   37 +++++++++++++++++++++++++++++++++----
+ fs/xfs/xfs_log_priv.h |   24 ++++++++++++++++++++++++
+ fs/xfs/xfs_trace.h    |    1 +
+ 3 files changed, 58 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -671,6 +671,11 @@ xlog_cil_push(
+       ASSERT(push_seq <= ctx->sequence);
+       /*
++       * Wake up any background push waiters now this context is being pushed.
++       */
++      wake_up_all(&ctx->push_wait);
++
++      /*
+        * Check if we've anything to push. If there is nothing, then we don't
+        * move on to a new sequence number and so we have to be able to push
+        * this sequence again later.
+@@ -746,6 +751,7 @@ xlog_cil_push(
+        */
+       INIT_LIST_HEAD(&new_ctx->committing);
+       INIT_LIST_HEAD(&new_ctx->busy_extents);
++      init_waitqueue_head(&new_ctx->push_wait);
+       new_ctx->sequence = ctx->sequence + 1;
+       new_ctx->cil = cil;
+       cil->xc_ctx = new_ctx;
+@@ -900,7 +906,7 @@ xlog_cil_push_work(
+  */
+ static void
+ xlog_cil_push_background(
+-      struct xlog     *log)
++      struct xlog     *log) __releases(cil->xc_ctx_lock)
+ {
+       struct xfs_cil  *cil = log->l_cilp;
+@@ -914,14 +920,36 @@ xlog_cil_push_background(
+        * don't do a background push if we haven't used up all the
+        * space available yet.
+        */
+-      if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
++      if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
++              up_read(&cil->xc_ctx_lock);
+               return;
++      }
+       spin_lock(&cil->xc_push_lock);
+       if (cil->xc_push_seq < cil->xc_current_sequence) {
+               cil->xc_push_seq = cil->xc_current_sequence;
+               queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
+       }
++
++      /*
++       * Drop the context lock now, we can't hold that if we need to sleep
++       * because we are over the blocking threshold. The push_lock is still
++       * held, so blocking threshold sleep/wakeup is still correctly
++       * serialised here.
++       */
++      up_read(&cil->xc_ctx_lock);
++
++      /*
++       * If we are well over the space limit, throttle the work that is being
++       * done until the push work on this context has begun.
++       */
++      if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
++              trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
++              ASSERT(cil->xc_ctx->space_used < log->l_logsize);
++              xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
++              return;
++      }
++
+       spin_unlock(&cil->xc_push_lock);
+ }
+@@ -1038,9 +1066,9 @@ xfs_log_commit_cil(
+               if (lip->li_ops->iop_committing)
+                       lip->li_ops->iop_committing(lip, xc_commit_lsn);
+       }
+-      xlog_cil_push_background(log);
+-      up_read(&cil->xc_ctx_lock);
++      /* xlog_cil_push_background() releases cil->xc_ctx_lock */
++      xlog_cil_push_background(log);
+ }
+ /*
+@@ -1199,6 +1227,7 @@ xlog_cil_init(
+       INIT_LIST_HEAD(&ctx->committing);
+       INIT_LIST_HEAD(&ctx->busy_extents);
++      init_waitqueue_head(&ctx->push_wait);
+       ctx->sequence = 1;
+       ctx->cil = cil;
+       cil->xc_ctx = ctx;
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -247,6 +247,7 @@ struct xfs_cil_ctx {
+       struct xfs_log_vec      *lv_chain;      /* logvecs being pushed */
+       struct list_head        iclog_entry;
+       struct list_head        committing;     /* ctx committing list */
++      wait_queue_head_t       push_wait;      /* background push throttle */
+       struct work_struct      discard_endio_work;
+ };
+@@ -344,10 +345,33 @@ struct xfs_cil {
+  *   buffer window (32MB) as measurements have shown this to be roughly the
+  *   point of diminishing performance increases under highly concurrent
+  *   modification workloads.
++ *
++ * To prevent the CIL from overflowing upper commit size bounds, we introduce a
++ * new threshold at which we block committing transactions until the background
++ * CIL commit commences and switches to a new context. While this is not a hard
++ * limit, it forces the process committing a transaction to the CIL to block and
++ * yeild the CPU, giving the CIL push work a chance to be scheduled and start
++ * work. This prevents a process running lots of transactions from overfilling
++ * the CIL because it is not yielding the CPU. We set the blocking limit at
++ * twice the background push space threshold so we keep in line with the AIL
++ * push thresholds.
++ *
++ * Note: this is not a -hard- limit as blocking is applied after the transaction
++ * is inserted into the CIL and the push has been triggered. It is largely a
++ * throttling mechanism that allows the CIL push to be scheduled and run. A hard
++ * limit will be difficult to implement without introducing global serialisation
++ * in the CIL commit fast path, and it's not at all clear that we actually need
++ * such hard limits given the ~7 years we've run without a hard limit before
++ * finding the first situation where a checkpoint size overflow actually
++ * occurred. Hence the simple throttle, and an ASSERT check to tell us that
++ * we've overrun the max size.
+  */
+ #define XLOG_CIL_SPACE_LIMIT(log)     \
+       min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4)
++#define XLOG_CIL_BLOCKING_SPACE_LIMIT(log)    \
++      (XLOG_CIL_SPACE_LIMIT(log) * 2)
++
+ /*
+  * ticket grant locks, queues and accounting have their own cachlines
+  * as these are quite hot and can be operated on concurrently.
+--- a/fs/xfs/xfs_trace.h
++++ b/fs/xfs/xfs_trace.h
+@@ -1011,6 +1011,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_re
+ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
+ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
+ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
++DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait);
+ DECLARE_EVENT_CLASS(xfs_log_item_class,
+       TP_PROTO(struct xfs_log_item *lip),
diff --git a/queue-5.4/xfs-trylock-underlying-buffer-on-dquot-flush.patch b/queue-5.4/xfs-trylock-underlying-buffer-on-dquot-flush.patch
new file mode 100644 (file)
index 0000000..7c03609
--- /dev/null
@@ -0,0 +1,112 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:39 +0530
+Subject: xfs: trylock underlying buffer on dquot flush
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-23-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 8d3d7e2b35ea7d91d6e085c93b5efecfb0fba307 upstream.
+
+A dquot flush currently blocks on the buffer lock for the underlying
+dquot buffer. In turn, this causes xfsaild to block rather than
+continue processing other items in the meantime. Update
+xfs_qm_dqflush() to trylock the buffer, similar to how inode buffers
+are handled, and return -EAGAIN if the lock fails. Fix up any
+callers that don't currently handle the error properly.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot.c      |    6 +++---
+ fs/xfs/xfs_dquot_item.c |    3 ++-
+ fs/xfs/xfs_qm.c         |   14 +++++++++-----
+ 3 files changed, 14 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -1105,8 +1105,8 @@ xfs_qm_dqflush(
+        * Get the buffer containing the on-disk dquot
+        */
+       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+-                                 mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+-                                 &xfs_dquot_buf_ops);
++                                 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
++                                 &bp, &xfs_dquot_buf_ops);
+       if (error)
+               goto out_unlock;
+@@ -1176,7 +1176,7 @@ xfs_qm_dqflush(
+ out_unlock:
+       xfs_dqfunlock(dqp);
+-      return -EIO;
++      return error;
+ }
+ /*
+--- a/fs/xfs/xfs_dquot_item.c
++++ b/fs/xfs/xfs_dquot_item.c
+@@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push(
+               if (!xfs_buf_delwri_queue(bp, buffer_list))
+                       rval = XFS_ITEM_FLUSHING;
+               xfs_buf_relse(bp);
+-      }
++      } else if (error == -EAGAIN)
++              rval = XFS_ITEM_LOCKED;
+       spin_lock(&lip->li_ailp->ail_lock);
+ out_unlock:
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -121,12 +121,11 @@ xfs_qm_dqpurge(
+ {
+       struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_quotainfo    *qi = mp->m_quotainfo;
++      int                     error = -EAGAIN;
+       xfs_dqlock(dqp);
+-      if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
+-              xfs_dqunlock(dqp);
+-              return -EAGAIN;
+-      }
++      if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0)
++              goto out_unlock;
+       dqp->dq_flags |= XFS_DQ_FREEING;
+@@ -139,7 +138,6 @@ xfs_qm_dqpurge(
+        */
+       if (XFS_DQ_IS_DIRTY(dqp)) {
+               struct xfs_buf  *bp = NULL;
+-              int             error;
+               /*
+                * We don't care about getting disk errors here. We need
+@@ -149,6 +147,8 @@ xfs_qm_dqpurge(
+               if (!error) {
+                       error = xfs_bwrite(bp);
+                       xfs_buf_relse(bp);
++              } else if (error == -EAGAIN) {
++                      goto out_unlock;
+               }
+               xfs_dqflock(dqp);
+       }
+@@ -174,6 +174,10 @@ xfs_qm_dqpurge(
+       xfs_qm_dqdestroy(dqp);
+       return 0;
++
++out_unlock:
++      xfs_dqunlock(dqp);
++      return error;
+ }
+ /*
diff --git a/queue-5.4/xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch b/queue-5.4/xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch
new file mode 100644 (file)
index 0000000..3dc8b69
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:26 +0530
+Subject: xfs: Use scnprintf() for avoiding potential buffer overflow
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-10-chandan.babu@oracle.com>
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 17bb60b74124e9491d593e2601e3afe14daa2f57 upstream.
+
+Since snprintf() returns the would-be-output size instead of the
+actual output size, the succeeding calls may go beyond the given
+buffer limit.  Fix it by replacing with scnprintf().
+
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_stats.c |   10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/xfs_stats.c
++++ b/fs/xfs/xfs_stats.c
+@@ -57,13 +57,13 @@ int xfs_stats_format(struct xfsstats __p
+       /* Loop over all stats groups */
+       for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
+-              len += snprintf(buf + len, PATH_MAX - len, "%s",
++              len += scnprintf(buf + len, PATH_MAX - len, "%s",
+                               xstats[i].desc);
+               /* inner loop does each group */
+               for (; j < xstats[i].endpoint; j++)
+-                      len += snprintf(buf + len, PATH_MAX - len, " %u",
++                      len += scnprintf(buf + len, PATH_MAX - len, " %u",
+                                       counter_val(stats, j));
+-              len += snprintf(buf + len, PATH_MAX - len, "\n");
++              len += scnprintf(buf + len, PATH_MAX - len, "\n");
+       }
+       /* extra precision counters */
+       for_each_possible_cpu(i) {
+@@ -72,9 +72,9 @@ int xfs_stats_format(struct xfsstats __p
+               xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
+       }
+-      len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
++      len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
+                       xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+-      len += snprintf(buf + len, PATH_MAX-len, "debug %u\n",
++      len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
+ #if defined(DEBUG)
+               1);
+ #else
diff --git a/queue-5.4/xfs-xfs_buf_corruption_error-should-take-__this_address.patch b/queue-5.4/xfs-xfs_buf_corruption_error-should-take-__this_address.patch
new file mode 100644 (file)
index 0000000..7e19e08
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:22 +0530
+Subject: xfs: xfs_buf_corruption_error should take __this_address
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-6-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit e83cf875d67a6cb9ddfaa8b45d2fa93d12b5c66f upstream.
+
+Add a xfs_failaddr_t parameter to this function so that callers can
+potentially pass in (and therefore report) the exact point in the code
+where we decided that a metadata buffer was corrupt.  This enables us to
+wire it up to checking functions that have to run outside of verifiers.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf.c   |    2 +-
+ fs/xfs/xfs_error.c |    5 +++--
+ fs/xfs/xfs_error.h |    2 +-
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1564,7 +1564,7 @@ __xfs_buf_mark_corrupt(
+ {
+       ASSERT(bp->b_flags & XBF_DONE);
+-      xfs_buf_corruption_error(bp);
++      xfs_buf_corruption_error(bp, fa);
+       xfs_buf_stale(bp);
+ }
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -350,13 +350,14 @@ xfs_corruption_error(
+  */
+ void
+ xfs_buf_corruption_error(
+-      struct xfs_buf          *bp)
++      struct xfs_buf          *bp,
++      xfs_failaddr_t          fa)
+ {
+       struct xfs_mount        *mp = bp->b_mount;
+       xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR,
+                 "Metadata corruption detected at %pS, %s block 0x%llx",
+-                __return_address, bp->b_ops->name, bp->b_bn);
++                fa, bp->b_ops->name, bp->b_bn);
+       xfs_alert(mp, "Unmount and run xfs_repair");
+--- a/fs/xfs/xfs_error.h
++++ b/fs/xfs/xfs_error.h
+@@ -15,7 +15,7 @@ extern void xfs_corruption_error(const c
+                       struct xfs_mount *mp, const void *buf, size_t bufsize,
+                       const char *filename, int linenum,
+                       xfs_failaddr_t failaddr);
+-void xfs_buf_corruption_error(struct xfs_buf *bp);
++void xfs_buf_corruption_error(struct xfs_buf *bp, xfs_failaddr_t fa);
+ extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error,
+                       const char *name, const void *buf, size_t bufsz,
+                       xfs_failaddr_t failaddr);