--- /dev/null
+xfs-open-code-insert-range-extent-split-helper.patch
+xfs-rework-insert-range-into-an-atomic-operation.patch
+xfs-rework-collapse-range-into-an-atomic-operation.patch
+xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch
+xfs-xfs_buf_corruption_error-should-take-__this_address.patch
+xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch
+xfs-check-owner-of-dir3-data-blocks.patch
+xfs-check-owner-of-dir3-blocks.patch
+xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch
+xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch
+xfs-remove-the-xfs_dq_logitem_t-typedef.patch
+xfs-remove-the-xfs_qoff_logitem_t-typedef.patch
+xfs-replace-function-declaration-by-actual-definition.patch
+xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch
+xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch
+xfs-preserve-default-grace-interval-during-quotacheck.patch
+xfs-lower-cil-flush-limit-for-large-logs.patch
+xfs-throttle-commits-on-delayed-background-cil-push.patch
+xfs-factor-common-ail-item-deletion-code.patch
+xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch
+xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch
+xfs-trylock-underlying-buffer-on-dquot-flush.patch
+xfs-factor-out-a-new-xfs_log_force_inode-helper.patch
+xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch
+xfs-move-inode-flush-to-the-sync-workqueue.patch
+xfs-fix-use-after-free-on-cil-context-on-shutdown.patch
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:21 +0530
+Subject: xfs: add a function to deal with corrupt buffers post-verifiers
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-5-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 8d57c21600a514d7a9237327c2496ae159bab5bb upstream.
+
+Add a helper function to get rid of buffers that we have decided are
+corrupt after the verifiers have run. This function is intended to
+handle metadata checks that can't happen in the verifiers, such as
+inter-block relationship checking. Note that we now mark the buffer
+stale so that it will not end up on any LRU and will be purged on
+release.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_alloc.c | 2 +-
+ fs/xfs/libxfs/xfs_attr_leaf.c | 6 +++---
+ fs/xfs/libxfs/xfs_btree.c | 2 +-
+ fs/xfs/libxfs/xfs_da_btree.c | 10 +++++-----
+ fs/xfs/libxfs/xfs_dir2_leaf.c | 2 +-
+ fs/xfs/libxfs/xfs_dir2_node.c | 6 +++---
+ fs/xfs/xfs_attr_inactive.c | 6 +++---
+ fs/xfs/xfs_attr_list.c | 2 +-
+ fs/xfs/xfs_buf.c | 22 ++++++++++++++++++++++
+ fs/xfs/xfs_buf.h | 2 ++
+ fs/xfs/xfs_error.c | 2 ++
+ fs/xfs/xfs_inode.c | 4 ++--
+ 12 files changed, 46 insertions(+), 20 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_alloc.c
++++ b/fs/xfs/libxfs/xfs_alloc.c
+@@ -685,7 +685,7 @@ xfs_alloc_update_counters(
+ xfs_trans_agblocks_delta(tp, len);
+ if (unlikely(be32_to_cpu(agf->agf_freeblks) >
+ be32_to_cpu(agf->agf_length))) {
+- xfs_buf_corruption_error(agbp);
++ xfs_buf_mark_corrupt(agbp);
+ return -EFSCORRUPTED;
+ }
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -2288,7 +2288,7 @@ xfs_attr3_leaf_lookup_int(
+ xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
+ entries = xfs_attr3_leaf_entryp(leaf);
+ if (ichdr.count >= args->geo->blksize / 8) {
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ return -EFSCORRUPTED;
+ }
+
+@@ -2307,11 +2307,11 @@ xfs_attr3_leaf_lookup_int(
+ break;
+ }
+ if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) {
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ return -EFSCORRUPTED;
+ }
+ if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) {
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ return -EFSCORRUPTED;
+ }
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -1820,7 +1820,7 @@ xfs_btree_lookup_get_block(
+
+ out_bad:
+ *blkp = NULL;
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ xfs_trans_brelse(cur->bc_tp, bp);
+ return -EFSCORRUPTED;
+ }
+--- a/fs/xfs/libxfs/xfs_da_btree.c
++++ b/fs/xfs/libxfs/xfs_da_btree.c
+@@ -504,7 +504,7 @@ xfs_da3_split(
+ node = oldblk->bp->b_addr;
+ if (node->hdr.info.forw) {
+ if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
+- xfs_buf_corruption_error(oldblk->bp);
++ xfs_buf_mark_corrupt(oldblk->bp);
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+@@ -517,7 +517,7 @@ xfs_da3_split(
+ node = oldblk->bp->b_addr;
+ if (node->hdr.info.back) {
+ if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
+- xfs_buf_corruption_error(oldblk->bp);
++ xfs_buf_mark_corrupt(oldblk->bp);
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+@@ -1544,7 +1544,7 @@ xfs_da3_node_lookup_int(
+ }
+
+ if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
+- xfs_buf_corruption_error(blk->bp);
++ xfs_buf_mark_corrupt(blk->bp);
+ return -EFSCORRUPTED;
+ }
+
+@@ -1559,7 +1559,7 @@ xfs_da3_node_lookup_int(
+
+ /* Tree taller than we can handle; bail out! */
+ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
+- xfs_buf_corruption_error(blk->bp);
++ xfs_buf_mark_corrupt(blk->bp);
+ return -EFSCORRUPTED;
+ }
+
+@@ -1567,7 +1567,7 @@ xfs_da3_node_lookup_int(
+ if (blkno == args->geo->leafblk)
+ expected_level = nodehdr.level - 1;
+ else if (expected_level != nodehdr.level) {
+- xfs_buf_corruption_error(blk->bp);
++ xfs_buf_mark_corrupt(blk->bp);
+ return -EFSCORRUPTED;
+ } else
+ expected_level--;
+--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
++++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
+@@ -1344,7 +1344,7 @@ xfs_dir2_leaf_removename(
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+ bestsp = xfs_dir2_leaf_bests_p(ltp);
+ if (be16_to_cpu(bestsp[db]) != oldbest) {
+- xfs_buf_corruption_error(lbp);
++ xfs_buf_mark_corrupt(lbp);
+ return -EFSCORRUPTED;
+ }
+ /*
+--- a/fs/xfs/libxfs/xfs_dir2_node.c
++++ b/fs/xfs/libxfs/xfs_dir2_node.c
+@@ -375,7 +375,7 @@ xfs_dir2_leaf_to_node(
+ ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+ if (be32_to_cpu(ltp->bestcount) >
+ (uint)dp->i_d.di_size / args->geo->blksize) {
+- xfs_buf_corruption_error(lbp);
++ xfs_buf_mark_corrupt(lbp);
+ return -EFSCORRUPTED;
+ }
+
+@@ -449,7 +449,7 @@ xfs_dir2_leafn_add(
+ * into other peoples memory
+ */
+ if (index < 0) {
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ return -EFSCORRUPTED;
+ }
+
+@@ -745,7 +745,7 @@ xfs_dir2_leafn_lookup_for_entry(
+
+ xfs_dir3_leaf_check(dp, bp);
+ if (leafhdr.count <= 0) {
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ return -EFSCORRUPTED;
+ }
+
+--- a/fs/xfs/xfs_attr_inactive.c
++++ b/fs/xfs/xfs_attr_inactive.c
+@@ -145,7 +145,7 @@ xfs_attr3_node_inactive(
+ * Since this code is recursive (gasp!) we must protect ourselves.
+ */
+ if (level > XFS_DA_NODE_MAXDEPTH) {
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ xfs_trans_brelse(*trans, bp); /* no locks for later trans */
+ return -EFSCORRUPTED;
+ }
+@@ -196,7 +196,7 @@ xfs_attr3_node_inactive(
+ error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
+ break;
+ default:
+- xfs_buf_corruption_error(child_bp);
++ xfs_buf_mark_corrupt(child_bp);
+ xfs_trans_brelse(*trans, child_bp);
+ error = -EFSCORRUPTED;
+ break;
+@@ -281,7 +281,7 @@ xfs_attr3_root_inactive(
+ break;
+ default:
+ error = -EFSCORRUPTED;
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ xfs_trans_brelse(*trans, bp);
+ break;
+ }
+--- a/fs/xfs/xfs_attr_list.c
++++ b/fs/xfs/xfs_attr_list.c
+@@ -271,7 +271,7 @@ xfs_attr_node_list_lookup(
+ return 0;
+
+ out_corruptbuf:
+- xfs_buf_corruption_error(bp);
++ xfs_buf_mark_corrupt(bp);
+ xfs_trans_brelse(tp, bp);
+ return -EFSCORRUPTED;
+ }
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1547,6 +1547,28 @@ xfs_buf_zero(
+ }
+
+ /*
++ * Log a message about and stale a buffer that a caller has decided is corrupt.
++ *
++ * This function should be called for the kinds of metadata corruption that
++ * cannot be detect from a verifier, such as incorrect inter-block relationship
++ * data. Do /not/ call this function from a verifier function.
++ *
++ * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will
++ * be marked stale, but b_error will not be set. The caller is responsible for
++ * releasing the buffer or fixing it.
++ */
++void
++__xfs_buf_mark_corrupt(
++ struct xfs_buf *bp,
++ xfs_failaddr_t fa)
++{
++ ASSERT(bp->b_flags & XBF_DONE);
++
++ xfs_buf_corruption_error(bp);
++ xfs_buf_stale(bp);
++}
++
++/*
+ * Handling of buffer targets (buftargs).
+ */
+
+--- a/fs/xfs/xfs_buf.h
++++ b/fs/xfs/xfs_buf.h
+@@ -270,6 +270,8 @@ static inline int xfs_buf_submit(struct
+ }
+
+ void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
++void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa);
++#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address)
+
+ /* Buffer Utility Routines */
+ extern void *xfs_buf_offset(struct xfs_buf *, size_t);
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -345,6 +345,8 @@ xfs_corruption_error(
+ * Complain about the kinds of metadata corruption that we can't detect from a
+ * verifier, such as incorrect inter-block relationship data. Does not set
+ * bp->b_error.
++ *
++ * Call xfs_buf_mark_corrupt, not this function.
+ */
+ void
+ xfs_buf_corruption_error(
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2149,7 +2149,7 @@ xfs_iunlink_update_bucket(
+ * head of the list.
+ */
+ if (old_value == new_agino) {
+- xfs_buf_corruption_error(agibp);
++ xfs_buf_mark_corrupt(agibp);
+ return -EFSCORRUPTED;
+ }
+
+@@ -2283,7 +2283,7 @@ xfs_iunlink(
+ next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
+ if (next_agino == agino ||
+ !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+- xfs_buf_corruption_error(agibp);
++ xfs_buf_mark_corrupt(agibp);
+ return -EFSCORRUPTED;
+ }
+
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:25 +0530
+Subject: xfs: check owner of dir3 blocks
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-9-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 1b2c1a63b678d63e9c98314d44413f5af79c9c80 upstream.
+
+Check the owner field of dir3 block headers. If it's corrupt, release
+the buffer and return EFSCORRUPTED. All callers handle this properly.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2_block.c | 33 +++++++++++++++++++++++++++++++--
+ 1 file changed, 31 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_block.c
++++ b/fs/xfs/libxfs/xfs_dir2_block.c
+@@ -114,6 +114,23 @@ const struct xfs_buf_ops xfs_dir3_block_
+ .verify_struct = xfs_dir3_block_verify,
+ };
+
++static xfs_failaddr_t
++xfs_dir3_block_header_check(
++ struct xfs_inode *dp,
++ struct xfs_buf *bp)
++{
++ struct xfs_mount *mp = dp->i_mount;
++
++ if (xfs_sb_version_hascrc(&mp->m_sb)) {
++ struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
++
++ if (be64_to_cpu(hdr3->owner) != dp->i_ino)
++ return __this_address;
++ }
++
++ return NULL;
++}
++
+ int
+ xfs_dir3_block_read(
+ struct xfs_trans *tp,
+@@ -121,12 +138,24 @@ xfs_dir3_block_read(
+ struct xfs_buf **bpp)
+ {
+ struct xfs_mount *mp = dp->i_mount;
++ xfs_failaddr_t fa;
+ int err;
+
+ err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
+ XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
+- if (!err && tp && *bpp)
+- xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
++ if (err || !*bpp)
++ return err;
++
++ /* Check things that we can't do in the verifier. */
++ fa = xfs_dir3_block_header_check(dp, *bpp);
++ if (fa) {
++ __xfs_buf_mark_corrupt(*bpp, fa);
++ xfs_trans_brelse(tp, *bpp);
++ *bpp = NULL;
++ return -EFSCORRUPTED;
++ }
++
++ xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
+ return err;
+ }
+
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:24 +0530
+Subject: xfs: check owner of dir3 data blocks
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-8-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit a10c21ed5d5241d11cf1d5a4556730840572900b upstream.
+
+[Slightly edit xfs_dir3_data_read() to work with existing mapped_bno argument instead
+of flag values introduced in later kernels]
+
+Check the owner field of dir3 data block headers. If it's corrupt,
+release the buffer and return EFSCORRUPTED. All callers handle this
+properly.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2_data.c | 32 ++++++++++++++++++++++++++++++--
+ 1 file changed, 30 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_data.c
++++ b/fs/xfs/libxfs/xfs_dir2_data.c
+@@ -348,6 +348,22 @@ static const struct xfs_buf_ops xfs_dir3
+ .verify_write = xfs_dir3_data_write_verify,
+ };
+
++static xfs_failaddr_t
++xfs_dir3_data_header_check(
++ struct xfs_inode *dp,
++ struct xfs_buf *bp)
++{
++ struct xfs_mount *mp = dp->i_mount;
++
++ if (xfs_sb_version_hascrc(&mp->m_sb)) {
++ struct xfs_dir3_data_hdr *hdr3 = bp->b_addr;
++
++ if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino)
++ return __this_address;
++ }
++
++ return NULL;
++}
+
+ int
+ xfs_dir3_data_read(
+@@ -357,12 +373,24 @@ xfs_dir3_data_read(
+ xfs_daddr_t mapped_bno,
+ struct xfs_buf **bpp)
+ {
++ xfs_failaddr_t fa;
+ int err;
+
+ err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
+ XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
+- if (!err && tp && *bpp)
+- xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
++ if (err || !*bpp)
++ return err;
++
++ /* Check things that we can't do in the verifier. */
++ fa = xfs_dir3_data_header_check(dp, *bpp);
++ if (fa) {
++ __xfs_buf_mark_corrupt(*bpp, fa);
++ xfs_trans_brelse(tp, *bpp);
++ *bpp = NULL;
++ return -EFSCORRUPTED;
++ }
++
++ xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
+ return err;
+ }
+
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:38 +0530
+Subject: xfs: don't write a corrupt unmount record to force summary counter recalc
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-22-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 5cc3c006eb45524860c4d1dd4dd7ad4a506bf3f5 upstream.
+
+[ Modify fs/xfs/xfs_log.c to include the changes at locations suitable for
+ 5.4-lts kernel ]
+
+In commit f467cad95f5e3, I added the ability to force a recalculation of
+the filesystem summary counters if they seemed incorrect. This was done
+(not entirely correctly) by tweaking the log code to write an unmount
+record without the UMOUNT_TRANS flag set. At next mount, the log
+recovery code will fail to find the unmount record and go into recovery,
+which triggers the recalculation.
+
+What actually gets written to the log is what ought to be an unmount
+record, but without any flags set to indicate what kind of record it
+actually is. This worked to trigger the recalculation, but we shouldn't
+write bogus log records when we could simply write nothing.
+
+Fixes: f467cad95f5e3 ("xfs: force summary counter recalc at next mount")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log.c | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -837,19 +837,6 @@ xfs_log_write_unmount_record(
+ if (error)
+ goto out_err;
+
+- /*
+- * If we think the summary counters are bad, clear the unmount header
+- * flag in the unmount record so that the summary counters will be
+- * recalculated during log recovery at next mount. Refer to
+- * xlog_check_unmount_rec for more details.
+- */
+- if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
+- XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
+- xfs_alert(mp, "%s: will fix summary counters at next mount",
+- __func__);
+- flags &= ~XLOG_UNMOUNT_TRANS;
+- }
+-
+ /* remove inited flag, and account for space used */
+ tic->t_flags = 0;
+ tic->t_curr_res -= sizeof(magic);
+@@ -932,6 +919,19 @@ xfs_log_unmount_write(xfs_mount_t *mp)
+ } while (iclog != first_iclog);
+ #endif
+ if (! (XLOG_FORCED_SHUTDOWN(log))) {
++ /*
++ * If we think the summary counters are bad, avoid writing the
++ * unmount record to force log recovery at next mount, after
++ * which the summary counters will be recalculated. Refer to
++ * xlog_check_unmount_rec for more details.
++ */
++ if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS),
++ mp, XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
++ xfs_alert(mp,
++ "%s: will fix summary counters at next mount",
++ __func__);
++ return 0;
++ }
+ xfs_log_write_unmount_record(mp);
+ } else {
+ /*
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:36 +0530
+Subject: xfs: factor common AIL item deletion code
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-20-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 4165994ac9672d91134675caa6de3645a9ace6c8 upstream.
+
+Factor the common AIL deletion code that does all the wakeups into a
+helper so we only have one copy of this somewhat tricky code to
+interface with all the wakeups necessary when the LSN of the log
+tail changes.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode_item.c | 12 +-----------
+ fs/xfs/xfs_trans_ail.c | 48 ++++++++++++++++++++++++++----------------------
+ fs/xfs/xfs_trans_priv.h | 4 +++-
+ 3 files changed, 30 insertions(+), 34 deletions(-)
+
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -744,17 +744,7 @@ xfs_iflush_done(
+ xfs_clear_li_failed(blip);
+ }
+ }
+-
+- if (mlip_changed) {
+- if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
+- xlog_assign_tail_lsn_locked(ailp->ail_mount);
+- if (list_empty(&ailp->ail_head))
+- wake_up_all(&ailp->ail_empty);
+- }
+- spin_unlock(&ailp->ail_lock);
+-
+- if (mlip_changed)
+- xfs_log_space_wake(ailp->ail_mount);
++ xfs_ail_update_finish(ailp, mlip_changed);
+ }
+
+ /*
+--- a/fs/xfs/xfs_trans_ail.c
++++ b/fs/xfs/xfs_trans_ail.c
+@@ -680,6 +680,27 @@ xfs_ail_push_all_sync(
+ finish_wait(&ailp->ail_empty, &wait);
+ }
+
++void
++xfs_ail_update_finish(
++ struct xfs_ail *ailp,
++ bool do_tail_update) __releases(ailp->ail_lock)
++{
++ struct xfs_mount *mp = ailp->ail_mount;
++
++ if (!do_tail_update) {
++ spin_unlock(&ailp->ail_lock);
++ return;
++ }
++
++ if (!XFS_FORCED_SHUTDOWN(mp))
++ xlog_assign_tail_lsn_locked(mp);
++
++ if (list_empty(&ailp->ail_head))
++ wake_up_all(&ailp->ail_empty);
++ spin_unlock(&ailp->ail_lock);
++ xfs_log_space_wake(mp);
++}
++
+ /*
+ * xfs_trans_ail_update - bulk AIL insertion operation.
+ *
+@@ -739,15 +760,7 @@ xfs_trans_ail_update_bulk(
+ if (!list_empty(&tmp))
+ xfs_ail_splice(ailp, cur, &tmp, lsn);
+
+- if (mlip_changed) {
+- if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
+- xlog_assign_tail_lsn_locked(ailp->ail_mount);
+- spin_unlock(&ailp->ail_lock);
+-
+- xfs_log_space_wake(ailp->ail_mount);
+- } else {
+- spin_unlock(&ailp->ail_lock);
+- }
++ xfs_ail_update_finish(ailp, mlip_changed);
+ }
+
+ bool
+@@ -791,10 +804,10 @@ void
+ xfs_trans_ail_delete(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip,
+- int shutdown_type) __releases(ailp->ail_lock)
++ int shutdown_type)
+ {
+ struct xfs_mount *mp = ailp->ail_mount;
+- bool mlip_changed;
++ bool need_update;
+
+ if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
+ spin_unlock(&ailp->ail_lock);
+@@ -807,17 +820,8 @@ xfs_trans_ail_delete(
+ return;
+ }
+
+- mlip_changed = xfs_ail_delete_one(ailp, lip);
+- if (mlip_changed) {
+- if (!XFS_FORCED_SHUTDOWN(mp))
+- xlog_assign_tail_lsn_locked(mp);
+- if (list_empty(&ailp->ail_head))
+- wake_up_all(&ailp->ail_empty);
+- }
+-
+- spin_unlock(&ailp->ail_lock);
+- if (mlip_changed)
+- xfs_log_space_wake(ailp->ail_mount);
++ need_update = xfs_ail_delete_one(ailp, lip);
++ xfs_ail_update_finish(ailp, need_update);
+ }
+
+ int
+--- a/fs/xfs/xfs_trans_priv.h
++++ b/fs/xfs/xfs_trans_priv.h
+@@ -92,8 +92,10 @@ xfs_trans_ail_update(
+ }
+
+ bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
++void xfs_ail_update_finish(struct xfs_ail *ailp, bool do_tail_update)
++ __releases(ailp->ail_lock);
+ void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
+- int shutdown_type) __releases(ailp->ail_lock);
++ int shutdown_type);
+
+ static inline void
+ xfs_trans_ail_remove(
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:40 +0530
+Subject: xfs: factor out a new xfs_log_force_inode helper
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-24-chandan.babu@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 54fbdd1035e3a4e4f4082c335b095426cdefd092 upstream.
+
+Create a new helper to force the log up to the last LSN touching an
+inode.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_export.c | 14 +-------------
+ fs/xfs/xfs_file.c | 12 +-----------
+ fs/xfs/xfs_inode.c | 19 +++++++++++++++++++
+ fs/xfs/xfs_inode.h | 1 +
+ 4 files changed, 22 insertions(+), 24 deletions(-)
+
+--- a/fs/xfs/xfs_export.c
++++ b/fs/xfs/xfs_export.c
+@@ -15,7 +15,6 @@
+ #include "xfs_trans.h"
+ #include "xfs_inode_item.h"
+ #include "xfs_icache.h"
+-#include "xfs_log.h"
+ #include "xfs_pnfs.h"
+
+ /*
+@@ -221,18 +220,7 @@ STATIC int
+ xfs_fs_nfs_commit_metadata(
+ struct inode *inode)
+ {
+- struct xfs_inode *ip = XFS_I(inode);
+- struct xfs_mount *mp = ip->i_mount;
+- xfs_lsn_t lsn = 0;
+-
+- xfs_ilock(ip, XFS_ILOCK_SHARED);
+- if (xfs_ipincount(ip))
+- lsn = ip->i_itemp->ili_last_lsn;
+- xfs_iunlock(ip, XFS_ILOCK_SHARED);
+-
+- if (!lsn)
+- return 0;
+- return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
++ return xfs_log_force_inode(XFS_I(inode));
+ }
+
+ const struct export_operations xfs_export_operations = {
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -80,19 +80,9 @@ xfs_dir_fsync(
+ int datasync)
+ {
+ struct xfs_inode *ip = XFS_I(file->f_mapping->host);
+- struct xfs_mount *mp = ip->i_mount;
+- xfs_lsn_t lsn = 0;
+
+ trace_xfs_dir_fsync(ip);
+-
+- xfs_ilock(ip, XFS_ILOCK_SHARED);
+- if (xfs_ipincount(ip))
+- lsn = ip->i_itemp->ili_last_lsn;
+- xfs_iunlock(ip, XFS_ILOCK_SHARED);
+-
+- if (!lsn)
+- return 0;
+- return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
++ return xfs_log_force_inode(ip);
+ }
+
+ STATIC int
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -3973,3 +3973,22 @@ xfs_irele(
+ trace_xfs_irele(ip, _RET_IP_);
+ iput(VFS_I(ip));
+ }
++
++/*
++ * Ensure all commited transactions touching the inode are written to the log.
++ */
++int
++xfs_log_force_inode(
++ struct xfs_inode *ip)
++{
++ xfs_lsn_t lsn = 0;
++
++ xfs_ilock(ip, XFS_ILOCK_SHARED);
++ if (xfs_ipincount(ip))
++ lsn = ip->i_itemp->ili_last_lsn;
++ xfs_iunlock(ip, XFS_ILOCK_SHARED);
++
++ if (!lsn)
++ return 0;
++ return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
++}
+--- a/fs/xfs/xfs_inode.h
++++ b/fs/xfs/xfs_inode.h
+@@ -441,6 +441,7 @@ int xfs_itruncate_extents_flags(struct
+ struct xfs_inode *, int, xfs_fsize_t, int);
+ void xfs_iext_realloc(xfs_inode_t *, int, int);
+
++int xfs_log_force_inode(struct xfs_inode *ip);
+ void xfs_iunpin_wait(xfs_inode_t *);
+ #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
+
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:31 +0530
+Subject: xfs: factor out quotaoff intent AIL removal and memory free
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-15-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 854f82b1f6039a418b7d1407513f8640e05fd73f upstream.
+
+AIL removal of the quotaoff start intent and free of both intents is
+hardcoded to the ->iop_committed() handler of the end intent. Factor
+out the start intent handling code so it can be used in a future
+patch to properly handle quotaoff errors. Use xfs_trans_ail_remove()
+instead of the _delete() variant to acquire the AIL lock and also
+handle cases where an intent might not reside in the AIL at the
+time of a failure.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot_item.c | 29 ++++++++++++++++++++---------
+ fs/xfs/xfs_dquot_item.h | 1 +
+ 2 files changed, 21 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/xfs_dquot_item.c
++++ b/fs/xfs/xfs_dquot_item.c
+@@ -307,18 +307,10 @@ xfs_qm_qoffend_logitem_committed(
+ {
+ struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
+ struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
+- struct xfs_ail *ailp = qfs->qql_item.li_ailp;
+
+- /*
+- * Delete the qoff-start logitem from the AIL.
+- * xfs_trans_ail_delete() drops the AIL lock.
+- */
+- spin_lock(&ailp->ail_lock);
+- xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
++ xfs_qm_qoff_logitem_relse(qfs);
+
+- kmem_free(qfs->qql_item.li_lv_shadow);
+ kmem_free(lip->li_lv_shadow);
+- kmem_free(qfs);
+ kmem_free(qfe);
+ return (xfs_lsn_t)-1;
+ }
+@@ -337,6 +329,25 @@ static const struct xfs_item_ops xfs_qm_
+ };
+
+ /*
++ * Delete the quotaoff intent from the AIL and free it. On success,
++ * this should only be called for the start item. It can be used for
++ * either on shutdown or abort.
++ */
++void
++xfs_qm_qoff_logitem_relse(
++ struct xfs_qoff_logitem *qoff)
++{
++ struct xfs_log_item *lip = &qoff->qql_item;
++
++ ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags) ||
++ test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
++ XFS_FORCED_SHUTDOWN(lip->li_mountp));
++ xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
++ kmem_free(lip->li_lv_shadow);
++ kmem_free(qoff);
++}
++
++/*
+ * Allocate and initialize an quotaoff item of the correct quota type(s).
+ */
+ struct xfs_qoff_logitem *
+--- a/fs/xfs/xfs_dquot_item.h
++++ b/fs/xfs/xfs_dquot_item.h
+@@ -28,6 +28,7 @@ void xfs_qm_dquot_logitem_init(struct xf
+ struct xfs_qoff_logitem *xfs_qm_qoff_logitem_init(struct xfs_mount *mp,
+ struct xfs_qoff_logitem *start,
+ uint flags);
++void xfs_qm_qoff_logitem_relse(struct xfs_qoff_logitem *);
+ struct xfs_qoff_logitem *xfs_trans_get_qoff_item(struct xfs_trans *tp,
+ struct xfs_qoff_logitem *startqoff,
+ uint flags);
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:23 +0530
+Subject: xfs: fix buffer corruption reporting when xfs_dir3_free_header_check fails
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-7-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit ce99494c9699df58b31d0a839e957f86cd58c755 upstream.
+
+xfs_verifier_error is supposed to be called on a corrupt metadata buffer
+from within a buffer verifier function, whereas xfs_buf_mark_corrupt
+is the function to be called when a piece of code has read a buffer and
+catches something that a read verifier cannot. The first function sets
+b_error anticipating that the low level buffer handling code will see
+the nonzero b_error and clear XBF_DONE on the buffer, whereas the second
+function does not.
+
+Since xfs_dir3_free_header_check examines fields in the dir free block
+header that require more context than can be provided to read verifiers,
+we must call xfs_buf_mark_corrupt when it finds a problem.
+
+Switching the calls has a secondary effect that we no longer corrupt the
+buffer state by setting b_error and leaving XBF_DONE set. When /that/
+happens, we'll trip over various state assertions (most commonly the
+b_error check in xfs_buf_reverify) on a subsequent attempt to read the
+buffer.
+
+Fixes: bc1a09b8e334bf5f ("xfs: refactor verifier callers to print address of failing check")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dir2_node.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_dir2_node.c
++++ b/fs/xfs/libxfs/xfs_dir2_node.c
+@@ -208,7 +208,7 @@ __xfs_dir3_free_read(
+ /* Check things that we can't do in the verifier. */
+ fa = xfs_dir3_free_header_check(dp, fbno, *bpp);
+ if (fa) {
+- xfs_verifier_error(*bpp, -EFSCORRUPTED, fa);
++ __xfs_buf_mark_corrupt(*bpp, fa);
+ xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
+ return -EFSCORRUPTED;
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:32 +0530
+Subject: xfs: fix unmount hang and memory leak on shutdown during quotaoff
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-16-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 8a62714313391b9b2297d67c341b35edbf46c279 upstream.
+
+AIL removal of the quotaoff start intent and free of both quotaoff
+intents is currently limited to the ->iop_committed() handler of the
+end intent. This executes when the end intent is committed to the
+on-disk log and marks the completion of the operation. The problem
+with this is it assumes the success of the operation. If a shutdown
+or other error occurs during the quotaoff, it's possible for the
+quotaoff task to exit without removing the start intent from the
+AIL. This results in an unmount hang as the AIL cannot be emptied.
+Further, no other codepath frees the intents and so this is also a
+memory leak vector.
+
+First, update the high level quotaoff error path to directly remove
+and free the quotaoff start intent if it still exists in the AIL at
+the time of the error. Next, update both of the start and end
+quotaoff intents with an ->iop_release() callback to properly handle
+transaction abort.
+
+This means that If the quotaoff start transaction aborts, it frees
+the start intent in the transaction commit path. If the filesystem
+shuts down before the end transaction allocates, the quotaoff
+sequence removes and frees the start intent. If the end transaction
+aborts, it removes the start intent and frees both. This ensures
+that a shutdown does not result in a hung unmount and that memory is
+not leaked regardless of when a quotaoff error occurs.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot_item.c | 15 +++++++++++++++
+ fs/xfs/xfs_qm_syscalls.c | 13 +++++++------
+ 2 files changed, 22 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_dquot_item.c
++++ b/fs/xfs/xfs_dquot_item.c
+@@ -315,17 +315,32 @@ xfs_qm_qoffend_logitem_committed(
+ return (xfs_lsn_t)-1;
+ }
+
++STATIC void
++xfs_qm_qoff_logitem_release(
++ struct xfs_log_item *lip)
++{
++ struct xfs_qoff_logitem *qoff = QOFF_ITEM(lip);
++
++ if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
++ if (qoff->qql_start_lip)
++ xfs_qm_qoff_logitem_relse(qoff->qql_start_lip);
++ xfs_qm_qoff_logitem_relse(qoff);
++ }
++}
++
+ static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+ .iop_size = xfs_qm_qoff_logitem_size,
+ .iop_format = xfs_qm_qoff_logitem_format,
+ .iop_committed = xfs_qm_qoffend_logitem_committed,
+ .iop_push = xfs_qm_qoff_logitem_push,
++ .iop_release = xfs_qm_qoff_logitem_release,
+ };
+
+ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+ .iop_size = xfs_qm_qoff_logitem_size,
+ .iop_format = xfs_qm_qoff_logitem_format,
+ .iop_push = xfs_qm_qoff_logitem_push,
++ .iop_release = xfs_qm_qoff_logitem_release,
+ };
+
+ /*
+--- a/fs/xfs/xfs_qm_syscalls.c
++++ b/fs/xfs/xfs_qm_syscalls.c
+@@ -29,8 +29,6 @@ xfs_qm_log_quotaoff(
+ int error;
+ struct xfs_qoff_logitem *qoffi;
+
+- *qoffstartp = NULL;
+-
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
+ if (error)
+ goto out;
+@@ -62,7 +60,7 @@ out:
+ STATIC int
+ xfs_qm_log_quotaoff_end(
+ struct xfs_mount *mp,
+- struct xfs_qoff_logitem *startqoff,
++ struct xfs_qoff_logitem **startqoff,
+ uint flags)
+ {
+ struct xfs_trans *tp;
+@@ -73,9 +71,10 @@ xfs_qm_log_quotaoff_end(
+ if (error)
+ return error;
+
+- qoffi = xfs_trans_get_qoff_item(tp, startqoff,
++ qoffi = xfs_trans_get_qoff_item(tp, *startqoff,
+ flags & XFS_ALL_QUOTA_ACCT);
+ xfs_trans_log_quotaoff_item(tp, qoffi);
++ *startqoff = NULL;
+
+ /*
+ * We have to make sure that the transaction is secure on disk before we
+@@ -103,7 +102,7 @@ xfs_qm_scall_quotaoff(
+ uint dqtype;
+ int error;
+ uint inactivate_flags;
+- struct xfs_qoff_logitem *qoffstart;
++ struct xfs_qoff_logitem *qoffstart = NULL;
+
+ /*
+ * No file system can have quotas enabled on disk but not in core.
+@@ -228,7 +227,7 @@ xfs_qm_scall_quotaoff(
+ * So, we have QUOTAOFF start and end logitems; the start
+ * logitem won't get overwritten until the end logitem appears...
+ */
+- error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
++ error = xfs_qm_log_quotaoff_end(mp, &qoffstart, flags);
+ if (error) {
+ /* We're screwed now. Shutdown is the only option. */
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+@@ -261,6 +260,8 @@ xfs_qm_scall_quotaoff(
+ }
+
+ out_unlock:
++ if (error && qoffstart)
++ xfs_qm_qoff_logitem_relse(qoffstart);
+ mutex_unlock(&q->qi_quotaofflock);
+ return error;
+ }
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:43 +0530
+Subject: xfs: fix use-after-free on CIL context on shutdown
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-27-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit c7f87f3984cfa1e6d32806a715f35c5947ad9c09 upstream.
+
+xlog_wait() on the CIL context can reference a freed context if the
+waiter doesn't get scheduled before the CIL context is freed. This
+can happen when a task is on the hard throttle and the CIL push
+aborts due to a shutdown. This was detected by generic/019:
+
+thread 1 thread 2
+
+__xfs_trans_commit
+ xfs_log_commit_cil
+ <CIL size over hard throttle limit>
+ xlog_wait
+ schedule
+ xlog_cil_push_work
+ wake_up_all
+ <shutdown aborts commit>
+ xlog_cil_committed
+ kmem_free
+
+ remove_wait_queue
+ spin_lock_irqsave --> UAF
+
+Fix it by moving the wait queue to the CIL rather than keeping it in
+in the CIL context that gets freed on push completion. Because the
+wait queue is now independent of the CIL context and we might have
+multiple contexts in flight at once, only wake the waiters on the
+push throttle when the context we are pushing is over the hard
+throttle size threshold.
+
+Fixes: 0e7ab7efe7745 ("xfs: Throttle commits on delayed background CIL push")
+Reported-by: Yu Kuai <yukuai3@huawei.com>
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_cil.c | 10 +++++-----
+ fs/xfs/xfs_log_priv.h | 2 +-
+ 2 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -673,7 +673,8 @@ xlog_cil_push(
+ /*
+ * Wake up any background push waiters now this context is being pushed.
+ */
+- wake_up_all(&ctx->push_wait);
++ if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log))
++ wake_up_all(&cil->xc_push_wait);
+
+ /*
+ * Check if we've anything to push. If there is nothing, then we don't
+@@ -745,13 +746,12 @@ xlog_cil_push(
+
+ /*
+ * initialise the new context and attach it to the CIL. Then attach
+- * the current context to the CIL committing lsit so it can be found
++ * the current context to the CIL committing list so it can be found
+ * during log forces to extract the commit lsn of the sequence that
+ * needs to be forced.
+ */
+ INIT_LIST_HEAD(&new_ctx->committing);
+ INIT_LIST_HEAD(&new_ctx->busy_extents);
+- init_waitqueue_head(&new_ctx->push_wait);
+ new_ctx->sequence = ctx->sequence + 1;
+ new_ctx->cil = cil;
+ cil->xc_ctx = new_ctx;
+@@ -946,7 +946,7 @@ xlog_cil_push_background(
+ if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
+ trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
+ ASSERT(cil->xc_ctx->space_used < log->l_logsize);
+- xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
++ xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
+ return;
+ }
+
+@@ -1222,12 +1222,12 @@ xlog_cil_init(
+ INIT_LIST_HEAD(&cil->xc_committing);
+ spin_lock_init(&cil->xc_cil_lock);
+ spin_lock_init(&cil->xc_push_lock);
++ init_waitqueue_head(&cil->xc_push_wait);
+ init_rwsem(&cil->xc_ctx_lock);
+ init_waitqueue_head(&cil->xc_commit_wait);
+
+ INIT_LIST_HEAD(&ctx->committing);
+ INIT_LIST_HEAD(&ctx->busy_extents);
+- init_waitqueue_head(&ctx->push_wait);
+ ctx->sequence = 1;
+ ctx->cil = cil;
+ cil->xc_ctx = ctx;
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -247,7 +247,6 @@ struct xfs_cil_ctx {
+ struct xfs_log_vec *lv_chain; /* logvecs being pushed */
+ struct list_head iclog_entry;
+ struct list_head committing; /* ctx committing list */
+- wait_queue_head_t push_wait; /* background push throttle */
+ struct work_struct discard_endio_work;
+ };
+
+@@ -281,6 +280,7 @@ struct xfs_cil {
+ wait_queue_head_t xc_commit_wait;
+ xfs_lsn_t xc_current_sequence;
+ struct work_struct xc_push_work;
++ wait_queue_head_t xc_push_wait; /* background push throttle */
+ } ____cacheline_aligned_in_smp;
+
+ /*
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:34 +0530
+Subject: xfs: Lower CIL flush limit for large logs
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-18-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 108a42358a05312b2128533c6462a3fdeb410bdf upstream.
+
+The current CIL size aggregation limit is 1/8th the log size. This
+means for large logs we might be aggregating at least 250MB of dirty objects
+in memory before the CIL is flushed to the journal. With CIL shadow
+buffers sitting around, this means the CIL is often consuming >500MB
+of temporary memory that is all allocated under GFP_NOFS conditions.
+
+Flushing the CIL can take some time to do if there is other IO
+ongoing, and can introduce substantial log force latency by itself.
+It also pins the memory until the objects are in the AIL and can be
+written back and reclaimed by shrinkers. Hence this threshold also
+tends to determine the minimum amount of memory XFS can operate in
+under heavy modification without triggering the OOM killer.
+
+Modify the CIL space limit to prevent such huge amounts of pinned
+metadata from aggregating. We can have 2MB of log IO in flight at
+once, so limit aggregation to 16x this size. This threshold was
+chosen as it little impact on performance (on 16-way fsmark) or log
+traffic but pins a lot less memory on large logs especially under
+heavy memory pressure. An aggregation limit of 8x had 5-10%
+performance degradation and a 50% increase in log throughput for
+the same workload, so clearly that was too small for highly
+concurrent workloads on large logs.
+
+This was found via trace analysis of AIL behaviour. e.g. insertion
+from a single CIL flush:
+
+xfs_ail_insert: old lsn 0/0 new lsn 1/3033090 type XFS_LI_INODE flags IN_AIL
+
+$ grep xfs_ail_insert /mnt/scratch/s.t |grep "new lsn 1/3033090" |wc -l
+1721823
+$
+
+So there were 1.7 million objects inserted into the AIL from this
+CIL checkpoint, the first at 2323.392108, the last at 2325.667566 which
+was the end of the trace (i.e. it hadn't finished). Clearly a major
+problem.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_priv.h | 29 +++++++++++++++++++++++------
+ 1 file changed, 23 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -323,13 +323,30 @@ struct xfs_cil {
+ * tries to keep 25% of the log free, so we need to keep below that limit or we
+ * risk running out of free log space to start any new transactions.
+ *
+- * In order to keep background CIL push efficient, we will set a lower
+- * threshold at which background pushing is attempted without blocking current
+- * transaction commits. A separate, higher bound defines when CIL pushes are
+- * enforced to ensure we stay within our maximum checkpoint size bounds.
+- * threshold, yet give us plenty of space for aggregation on large logs.
++ * In order to keep background CIL push efficient, we only need to ensure the
++ * CIL is large enough to maintain sufficient in-memory relogging to avoid
++ * repeated physical writes of frequently modified metadata. If we allow the CIL
++ * to grow to a substantial fraction of the log, then we may be pinning hundreds
++ * of megabytes of metadata in memory until the CIL flushes. This can cause
++ * issues when we are running low on memory - pinned memory cannot be reclaimed,
++ * and the CIL consumes a lot of memory. Hence we need to set an upper physical
++ * size limit for the CIL that limits the maximum amount of memory pinned by the
++ * CIL but does not limit performance by reducing relogging efficiency
++ * significantly.
++ *
++ * As such, the CIL push threshold ends up being the smaller of two thresholds:
++ * - a threshold large enough that it allows CIL to be pushed and progress to be
++ * made without excessive blocking of incoming transaction commits. This is
++ * defined to be 12.5% of the log space - half the 25% push threshold of the
++ * AIL.
++ * - small enough that it doesn't pin excessive amounts of memory but maintains
++ * close to peak relogging efficiency. This is defined to be 16x the iclog
++ * buffer window (32MB) as measurements have shown this to be roughly the
++ * point of diminishing performance increases under highly concurrent
++ * modification workloads.
+ */
+-#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
++#define XLOG_CIL_SPACE_LIMIT(log) \
++ min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4)
+
+ /*
+ * ticket grant locks, queues and accounting have their own cachlines
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:42 +0530
+Subject: xfs: move inode flush to the sync workqueue
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-26-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit f0f7a674d4df1510d8ca050a669e1420cf7d7fab upstream.
+
+[ Modify fs/xfs/xfs_super.c to include the changes at locations suitable for
+ 5.4-lts kernel ]
+
+Move the inode dirty data flushing to a workqueue so that multiple
+threads can take advantage of a single thread's flushing work. The
+ratelimiting technique used in bdd4ee4 was not successful, because
+threads that skipped the inode flush scan due to ratelimiting would
+ENOSPC early, which caused occasional (but noticeable) changes in
+behavior and sporadic fstest regressions.
+
+Therefore, make all the writer threads wait on a single inode flush,
+which eliminates both the stampeding hordes of flushers and the small
+window in which a write could fail with ENOSPC because it lost the
+ratelimit race after even another thread freed space.
+
+Fixes: c6425702f21e ("xfs: ratelimit inode flush on buffered write ENOSPC")
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_mount.h | 5 +++++
+ fs/xfs/xfs_super.c | 28 +++++++++++++++++++++++-----
+ 2 files changed, 28 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/xfs_mount.h
++++ b/fs/xfs/xfs_mount.h
+@@ -179,6 +179,11 @@ typedef struct xfs_mount {
+ struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
+ struct xstats m_stats; /* per-fs stats */
+
++ /*
++ * Workqueue item so that we can coalesce multiple inode flush attempts
++ * into a single flush.
++ */
++ struct work_struct m_flush_inodes_work;
+ struct workqueue_struct *m_buf_workqueue;
+ struct workqueue_struct *m_unwritten_workqueue;
+ struct workqueue_struct *m_cil_workqueue;
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -840,6 +840,20 @@ xfs_destroy_mount_workqueues(
+ destroy_workqueue(mp->m_buf_workqueue);
+ }
+
++static void
++xfs_flush_inodes_worker(
++ struct work_struct *work)
++{
++ struct xfs_mount *mp = container_of(work, struct xfs_mount,
++ m_flush_inodes_work);
++ struct super_block *sb = mp->m_super;
++
++ if (down_read_trylock(&sb->s_umount)) {
++ sync_inodes_sb(sb);
++ up_read(&sb->s_umount);
++ }
++}
++
+ /*
+ * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
+ * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
+@@ -850,12 +864,15 @@ void
+ xfs_flush_inodes(
+ struct xfs_mount *mp)
+ {
+- struct super_block *sb = mp->m_super;
++ /*
++ * If flush_work() returns true then that means we waited for a flush
++ * which was already in progress. Don't bother running another scan.
++ */
++ if (flush_work(&mp->m_flush_inodes_work))
++ return;
+
+- if (down_read_trylock(&sb->s_umount)) {
+- sync_inodes_sb(sb);
+- up_read(&sb->s_umount);
+- }
++ queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
++ flush_work(&mp->m_flush_inodes_work);
+ }
+
+ /* Catch misguided souls that try to use this interface on XFS */
+@@ -1532,6 +1549,7 @@ xfs_mount_alloc(
+ spin_lock_init(&mp->m_perag_lock);
+ mutex_init(&mp->m_growlock);
+ atomic_set(&mp->m_active_trans, 0);
++ INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
+ INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+ INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
+ INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:18 +0530
+Subject: xfs: open code insert range extent split helper
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-2-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit b73df17e4c5ba977205253fb7ef54267717a3cba upstream.
+
+The insert range operation currently splits the extent at the target
+offset in a separate transaction and lock cycle from the one that
+shifts extents. In preparation for reworking insert range into an
+atomic operation, lift the code into the caller so it can be easily
+condensed to a single rolling transaction and lock cycle and
+eliminate the helper. No functional changes.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_bmap.c | 32 ++------------------------------
+ fs/xfs/libxfs/xfs_bmap.h | 3 ++-
+ fs/xfs/xfs_bmap_util.c | 14 +++++++++++++-
+ 3 files changed, 17 insertions(+), 32 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_bmap.c
++++ b/fs/xfs/libxfs/xfs_bmap.c
+@@ -5925,8 +5925,8 @@ del_cursor:
+ * @split_fsb is a block where the extents is split. If split_fsb lies in a
+ * hole or the first block of extents, just return 0.
+ */
+-STATIC int
+-xfs_bmap_split_extent_at(
++int
++xfs_bmap_split_extent(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ xfs_fileoff_t split_fsb)
+@@ -6037,34 +6037,6 @@ del_cursor:
+ return error;
+ }
+
+-int
+-xfs_bmap_split_extent(
+- struct xfs_inode *ip,
+- xfs_fileoff_t split_fsb)
+-{
+- struct xfs_mount *mp = ip->i_mount;
+- struct xfs_trans *tp;
+- int error;
+-
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+- if (error)
+- return error;
+-
+- xfs_ilock(ip, XFS_ILOCK_EXCL);
+- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+-
+- error = xfs_bmap_split_extent_at(tp, ip, split_fsb);
+- if (error)
+- goto out;
+-
+- return xfs_trans_commit(tp);
+-
+-out:
+- xfs_trans_cancel(tp);
+- return error;
+-}
+-
+ /* Deferred mapping is only for real extents in the data fork. */
+ static bool
+ xfs_bmap_is_update_needed(
+--- a/fs/xfs/libxfs/xfs_bmap.h
++++ b/fs/xfs/libxfs/xfs_bmap.h
+@@ -222,7 +222,8 @@ int xfs_bmap_can_insert_extents(struct x
+ int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
+ bool *done, xfs_fileoff_t stop_fsb);
+-int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
++int xfs_bmap_split_extent(struct xfs_trans *tp, struct xfs_inode *ip,
++ xfs_fileoff_t split_offset);
+ int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
+ xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc,
+ struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1326,7 +1326,19 @@ xfs_insert_file_space(
+ * is not the starting block of extent, we need to split the extent at
+ * stop_fsb.
+ */
+- error = xfs_bmap_split_extent(ip, stop_fsb);
++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
++ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
++ if (error)
++ return error;
++
++ xfs_ilock(ip, XFS_ILOCK_EXCL);
++ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
++
++ error = xfs_bmap_split_extent(tp, ip, stop_fsb);
++ if (error)
++ goto out_trans_cancel;
++
++ error = xfs_trans_commit(tp);
+ if (error)
+ return error;
+
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:33 +0530
+Subject: xfs: preserve default grace interval during quotacheck
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-17-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 5885539f0af371024d07afd14974bfdc3fff84c5 upstream.
+
+When quotacheck runs, it zeroes all the timer fields in every dquot.
+Unfortunately, it also does this to the root dquot, which erases any
+preconfigured grace intervals and warning limits that the administrator
+may have set. Worse yet, the incore copies of those variables remain
+set. This cache coherence problem manifests itself as the grace
+interval mysteriously being reset back to the defaults at the /next/
+mount.
+
+Fix it by not resetting the root disk dquot's timer and warning fields.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_qm.c | 20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -875,12 +875,20 @@ xfs_qm_reset_dqcounts(
+ ddq->d_bcount = 0;
+ ddq->d_icount = 0;
+ ddq->d_rtbcount = 0;
+- ddq->d_btimer = 0;
+- ddq->d_itimer = 0;
+- ddq->d_rtbtimer = 0;
+- ddq->d_bwarns = 0;
+- ddq->d_iwarns = 0;
+- ddq->d_rtbwarns = 0;
++
++ /*
++ * dquot id 0 stores the default grace period and the maximum
++ * warning limit that were set by the administrator, so we
++ * should not reset them.
++ */
++ if (ddq->d_id != 0) {
++ ddq->d_btimer = 0;
++ ddq->d_itimer = 0;
++ ddq->d_rtbtimer = 0;
++ ddq->d_bwarns = 0;
++ ddq->d_iwarns = 0;
++ ddq->d_rtbwarns = 0;
++ }
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_update_cksum((char *)&dqb[j],
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:41 +0530
+Subject: xfs: reflink should force the log out if mounted with wsync
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-25-chandan.babu@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 5833112df7e9a306af9af09c60127b92ed723962 upstream.
+
+Reflink should force the log out to disk if the filesystem was mounted
+with wsync, the same as most other operations in xfs.
+
+[Note: XFS_MOUNT_WSYNC is set when the admin mounts the filesystem
+with either the 'wsync' or 'sync' mount options, which effectively means
+that we're classifying reflink/dedupe as IO operations and making them
+synchronous when required.]
+
+Fixes: 3fc9f5e409319 ("xfs: remove xfs_reflink_remap_range")
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+[darrick: add more to the changelog]
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_file.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -1044,7 +1044,11 @@ xfs_file_remap_range(
+
+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+ remap_flags);
++ if (ret)
++ goto out_unlock;
+
++ if (mp->m_flags & XFS_MOUNT_WSYNC)
++ xfs_log_force_inode(dest);
+ out_unlock:
+ xfs_reflink_remap_unlock(file_in, file_out);
+ if (ret)
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:27 +0530
+Subject: xfs: remove the xfs_disk_dquot_t and xfs_dquot_t
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-11-chandan.babu@oracle.com>
+
+From: Pavel Reichl <preichl@redhat.com>
+
+commit aefe69a45d84901c702f87672ec1e93de1d03f73 upstream.
+
+Signed-off-by: Pavel Reichl <preichl@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+[darrick: fix some of the comments]
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_dquot_buf.c | 8 +--
+ fs/xfs/libxfs/xfs_format.h | 10 ++--
+ fs/xfs/libxfs/xfs_trans_resv.c | 2
+ fs/xfs/xfs_dquot.c | 18 +++----
+ fs/xfs/xfs_dquot.h | 98 ++++++++++++++++++++---------------------
+ fs/xfs/xfs_log_recover.c | 5 +-
+ fs/xfs/xfs_qm.c | 30 ++++++------
+ fs/xfs/xfs_qm_bhv.c | 6 +-
+ fs/xfs/xfs_trans_dquot.c | 44 +++++++++---------
+ 9 files changed, 112 insertions(+), 109 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_dquot_buf.c
++++ b/fs/xfs/libxfs/xfs_dquot_buf.c
+@@ -35,10 +35,10 @@ xfs_calc_dquots_per_chunk(
+
+ xfs_failaddr_t
+ xfs_dquot_verify(
+- struct xfs_mount *mp,
+- xfs_disk_dquot_t *ddq,
+- xfs_dqid_t id,
+- uint type) /* used only during quotacheck */
++ struct xfs_mount *mp,
++ struct xfs_disk_dquot *ddq,
++ xfs_dqid_t id,
++ uint type) /* used only during quotacheck */
+ {
+ /*
+ * We can encounter an uninitialized dquot buffer for 2 reasons:
+--- a/fs/xfs/libxfs/xfs_format.h
++++ b/fs/xfs/libxfs/xfs_format.h
+@@ -1144,11 +1144,11 @@ static inline void xfs_dinode_put_rdev(s
+
+ /*
+ * This is the main portion of the on-disk representation of quota
+- * information for a user. This is the q_core of the xfs_dquot_t that
++ * information for a user. This is the q_core of the struct xfs_dquot that
+ * is kept in kernel memory. We pad this with some more expansion room
+ * to construct the on disk structure.
+ */
+-typedef struct xfs_disk_dquot {
++struct xfs_disk_dquot {
+ __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */
+ __u8 d_version; /* dquot version */
+ __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */
+@@ -1171,15 +1171,15 @@ typedef struct xfs_disk_dquot {
+ __be32 d_rtbtimer; /* similar to above; for RT disk blocks */
+ __be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */
+ __be16 d_pad;
+-} xfs_disk_dquot_t;
++};
+
+ /*
+ * This is what goes on disk. This is separated from the xfs_disk_dquot because
+ * carrying the unnecessary padding would be a waste of memory.
+ */
+ typedef struct xfs_dqblk {
+- xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */
+- char dd_fill[4]; /* filling for posterity */
++ struct xfs_disk_dquot dd_diskdq; /* portion living incore as well */
++ char dd_fill[4];/* filling for posterity */
+
+ /*
+ * These two are only present on filesystems with the CRC bits set.
+--- a/fs/xfs/libxfs/xfs_trans_resv.c
++++ b/fs/xfs/libxfs/xfs_trans_resv.c
+@@ -776,7 +776,7 @@ xfs_calc_clear_agi_bucket_reservation(
+
+ /*
+ * Adjusting quota limits.
+- * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
++ * the disk quota buffer: sizeof(struct xfs_disk_dquot)
+ */
+ STATIC uint
+ xfs_calc_qm_setqlim_reservation(void)
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -48,7 +48,7 @@ static struct lock_class_key xfs_dquot_p
+ */
+ void
+ xfs_qm_dqdestroy(
+- xfs_dquot_t *dqp)
++ struct xfs_dquot *dqp)
+ {
+ ASSERT(list_empty(&dqp->q_lru));
+
+@@ -113,8 +113,8 @@ xfs_qm_adjust_dqlimits(
+ */
+ void
+ xfs_qm_adjust_dqtimers(
+- xfs_mount_t *mp,
+- xfs_disk_dquot_t *d)
++ struct xfs_mount *mp,
++ struct xfs_disk_dquot *d)
+ {
+ ASSERT(d->d_id);
+
+@@ -497,7 +497,7 @@ xfs_dquot_from_disk(
+ struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset;
+
+ /* copy everything from disk dquot to the incore dquot */
+- memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
++ memcpy(&dqp->q_core, ddqp, sizeof(struct xfs_disk_dquot));
+
+ /*
+ * Reservation counters are defined as reservation plus current usage
+@@ -989,7 +989,7 @@ xfs_qm_dqput(
+ */
+ void
+ xfs_qm_dqrele(
+- xfs_dquot_t *dqp)
++ struct xfs_dquot *dqp)
+ {
+ if (!dqp)
+ return;
+@@ -1019,7 +1019,7 @@ xfs_qm_dqflush_done(
+ struct xfs_log_item *lip)
+ {
+ xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip;
+- xfs_dquot_t *dqp = qip->qli_dquot;
++ struct xfs_dquot *dqp = qip->qli_dquot;
+ struct xfs_ail *ailp = lip->li_ailp;
+
+ /*
+@@ -1129,7 +1129,7 @@ xfs_qm_dqflush(
+ }
+
+ /* This is the only portion of data that needs to persist */
+- memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
++ memcpy(ddqp, &dqp->q_core, sizeof(struct xfs_disk_dquot));
+
+ /*
+ * Clear the dirty field and remember the flush lsn for later use.
+@@ -1187,8 +1187,8 @@ out_unlock:
+ */
+ void
+ xfs_dqlock2(
+- xfs_dquot_t *d1,
+- xfs_dquot_t *d2)
++ struct xfs_dquot *d1,
++ struct xfs_dquot *d2)
+ {
+ if (d1 && d2) {
+ ASSERT(d1 != d2);
+--- a/fs/xfs/xfs_dquot.h
++++ b/fs/xfs/xfs_dquot.h
+@@ -30,33 +30,36 @@ enum {
+ /*
+ * The incore dquot structure
+ */
+-typedef struct xfs_dquot {
+- uint dq_flags; /* various flags (XFS_DQ_*) */
+- struct list_head q_lru; /* global free list of dquots */
+- struct xfs_mount*q_mount; /* filesystem this relates to */
+- uint q_nrefs; /* # active refs from inodes */
+- xfs_daddr_t q_blkno; /* blkno of dquot buffer */
+- int q_bufoffset; /* off of dq in buffer (# dquots) */
+- xfs_fileoff_t q_fileoffset; /* offset in quotas file */
+-
+- xfs_disk_dquot_t q_core; /* actual usage & quotas */
+- xfs_dq_logitem_t q_logitem; /* dquot log item */
+- xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
+- xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
+- xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
+- xfs_qcnt_t q_prealloc_lo_wmark;/* prealloc throttle wmark */
+- xfs_qcnt_t q_prealloc_hi_wmark;/* prealloc disabled wmark */
+- int64_t q_low_space[XFS_QLOWSP_MAX];
+- struct mutex q_qlock; /* quota lock */
+- struct completion q_flush; /* flush completion queue */
+- atomic_t q_pincount; /* dquot pin count */
+- wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
+-} xfs_dquot_t;
++struct xfs_dquot {
++ uint dq_flags;
++ struct list_head q_lru;
++ struct xfs_mount *q_mount;
++ uint q_nrefs;
++ xfs_daddr_t q_blkno;
++ int q_bufoffset;
++ xfs_fileoff_t q_fileoffset;
++
++ struct xfs_disk_dquot q_core;
++ xfs_dq_logitem_t q_logitem;
++ /* total regular nblks used+reserved */
++ xfs_qcnt_t q_res_bcount;
++ /* total inos allocd+reserved */
++ xfs_qcnt_t q_res_icount;
++ /* total realtime blks used+reserved */
++ xfs_qcnt_t q_res_rtbcount;
++ xfs_qcnt_t q_prealloc_lo_wmark;
++ xfs_qcnt_t q_prealloc_hi_wmark;
++ int64_t q_low_space[XFS_QLOWSP_MAX];
++ struct mutex q_qlock;
++ struct completion q_flush;
++ atomic_t q_pincount;
++ struct wait_queue_head q_pinwait;
++};
+
+ /*
+ * Lock hierarchy for q_qlock:
+ * XFS_QLOCK_NORMAL is the implicit default,
+- * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
++ * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
+ */
+ enum {
+ XFS_QLOCK_NORMAL = 0,
+@@ -64,21 +67,21 @@ enum {
+ };
+
+ /*
+- * Manage the q_flush completion queue embedded in the dquot. This completion
++ * Manage the q_flush completion queue embedded in the dquot. This completion
+ * queue synchronizes processes attempting to flush the in-core dquot back to
+ * disk.
+ */
+-static inline void xfs_dqflock(xfs_dquot_t *dqp)
++static inline void xfs_dqflock(struct xfs_dquot *dqp)
+ {
+ wait_for_completion(&dqp->q_flush);
+ }
+
+-static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp)
++static inline bool xfs_dqflock_nowait(struct xfs_dquot *dqp)
+ {
+ return try_wait_for_completion(&dqp->q_flush);
+ }
+
+-static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
++static inline void xfs_dqfunlock(struct xfs_dquot *dqp)
+ {
+ complete(&dqp->q_flush);
+ }
+@@ -112,7 +115,7 @@ static inline int xfs_this_quota_on(stru
+ }
+ }
+
+-static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
++static inline struct xfs_dquot *xfs_inode_dquot(struct xfs_inode *ip, int type)
+ {
+ switch (type & XFS_DQ_ALLTYPES) {
+ case XFS_DQ_USER:
+@@ -147,31 +150,30 @@ static inline bool xfs_dquot_lowsp(struc
+ #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ)
+ #define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP)
+
+-extern void xfs_qm_dqdestroy(xfs_dquot_t *);
+-extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
+-extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
+-extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
+- xfs_disk_dquot_t *);
+-extern void xfs_qm_adjust_dqlimits(struct xfs_mount *,
+- struct xfs_dquot *);
+-extern xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip,
+- uint type);
+-extern int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id,
++void xfs_qm_dqdestroy(struct xfs_dquot *dqp);
++int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp);
++void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp);
++void xfs_qm_adjust_dqtimers(struct xfs_mount *mp,
++ struct xfs_disk_dquot *d);
++void xfs_qm_adjust_dqlimits(struct xfs_mount *mp,
++ struct xfs_dquot *d);
++xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip, uint type);
++int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id,
+ uint type, bool can_alloc,
+ struct xfs_dquot **dqpp);
+-extern int xfs_qm_dqget_inode(struct xfs_inode *ip, uint type,
+- bool can_alloc,
+- struct xfs_dquot **dqpp);
+-extern int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id,
++int xfs_qm_dqget_inode(struct xfs_inode *ip, uint type,
++ bool can_alloc,
++ struct xfs_dquot **dqpp);
++int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id,
+ uint type, struct xfs_dquot **dqpp);
+-extern int xfs_qm_dqget_uncached(struct xfs_mount *mp,
+- xfs_dqid_t id, uint type,
+- struct xfs_dquot **dqpp);
+-extern void xfs_qm_dqput(xfs_dquot_t *);
++int xfs_qm_dqget_uncached(struct xfs_mount *mp,
++ xfs_dqid_t id, uint type,
++ struct xfs_dquot **dqpp);
++void xfs_qm_dqput(struct xfs_dquot *dqp);
+
+-extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
++void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
+
+-extern void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
++void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
+
+ static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
+ {
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2577,6 +2577,7 @@ xlog_recover_do_reg_buffer(
+ int bit;
+ int nbits;
+ xfs_failaddr_t fa;
++ const size_t size_disk_dquot = sizeof(struct xfs_disk_dquot);
+
+ trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+
+@@ -2619,7 +2620,7 @@ xlog_recover_do_reg_buffer(
+ "XFS: NULL dquot in %s.", __func__);
+ goto next;
+ }
+- if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
++ if (item->ri_buf[i].i_len < size_disk_dquot) {
+ xfs_alert(mp,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[i].i_len, __func__);
+@@ -3250,7 +3251,7 @@ xlog_recover_dquot_pass2(
+ xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
+ return -EFSCORRUPTED;
+ }
+- if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
++ if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
+ xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
+ item->ri_buf[1].i_len, __func__);
+ return -EFSCORRUPTED;
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -244,14 +244,14 @@ xfs_qm_unmount_quotas(
+
+ STATIC int
+ xfs_qm_dqattach_one(
+- xfs_inode_t *ip,
+- xfs_dqid_t id,
+- uint type,
+- bool doalloc,
+- xfs_dquot_t **IO_idqpp)
++ struct xfs_inode *ip,
++ xfs_dqid_t id,
++ uint type,
++ bool doalloc,
++ struct xfs_dquot **IO_idqpp)
+ {
+- xfs_dquot_t *dqp;
+- int error;
++ struct xfs_dquot *dqp;
++ int error;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ error = 0;
+@@ -544,8 +544,8 @@ xfs_qm_set_defquota(
+ uint type,
+ xfs_quotainfo_t *qinf)
+ {
+- xfs_dquot_t *dqp;
+- struct xfs_def_quota *defq;
++ struct xfs_dquot *dqp;
++ struct xfs_def_quota *defq;
+ struct xfs_disk_dquot *ddqp;
+ int error;
+
+@@ -1746,14 +1746,14 @@ error_rele:
+ * Actually transfer ownership, and do dquot modifications.
+ * These were already reserved.
+ */
+-xfs_dquot_t *
++struct xfs_dquot *
+ xfs_qm_vop_chown(
+- xfs_trans_t *tp,
+- xfs_inode_t *ip,
+- xfs_dquot_t **IO_olddq,
+- xfs_dquot_t *newdq)
++ struct xfs_trans *tp,
++ struct xfs_inode *ip,
++ struct xfs_dquot **IO_olddq,
++ struct xfs_dquot *newdq)
+ {
+- xfs_dquot_t *prevdq;
++ struct xfs_dquot *prevdq;
+ uint bfield = XFS_IS_REALTIME_INODE(ip) ?
+ XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
+--- a/fs/xfs/xfs_qm_bhv.c
++++ b/fs/xfs/xfs_qm_bhv.c
+@@ -54,11 +54,11 @@ xfs_fill_statvfs_from_dquot(
+ */
+ void
+ xfs_qm_statvfs(
+- xfs_inode_t *ip,
++ struct xfs_inode *ip,
+ struct kstatfs *statp)
+ {
+- xfs_mount_t *mp = ip->i_mount;
+- xfs_dquot_t *dqp;
++ struct xfs_mount *mp = ip->i_mount;
++ struct xfs_dquot *dqp;
+
+ if (!xfs_qm_dqget(mp, xfs_get_projid(ip), XFS_DQ_PROJ, false, &dqp)) {
+ xfs_fill_statvfs_from_dquot(statp, dqp);
+--- a/fs/xfs/xfs_trans_dquot.c
++++ b/fs/xfs/xfs_trans_dquot.c
+@@ -25,8 +25,8 @@ STATIC void xfs_trans_alloc_dqinfo(xfs_t
+ */
+ void
+ xfs_trans_dqjoin(
+- xfs_trans_t *tp,
+- xfs_dquot_t *dqp)
++ struct xfs_trans *tp,
++ struct xfs_dquot *dqp)
+ {
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ ASSERT(dqp->q_logitem.qli_dquot == dqp);
+@@ -49,8 +49,8 @@ xfs_trans_dqjoin(
+ */
+ void
+ xfs_trans_log_dquot(
+- xfs_trans_t *tp,
+- xfs_dquot_t *dqp)
++ struct xfs_trans *tp,
++ struct xfs_dquot *dqp)
+ {
+ ASSERT(XFS_DQ_IS_LOCKED(dqp));
+
+@@ -486,12 +486,12 @@ xfs_trans_apply_dquot_deltas(
+ */
+ void
+ xfs_trans_unreserve_and_mod_dquots(
+- xfs_trans_t *tp)
++ struct xfs_trans *tp)
+ {
+ int i, j;
+- xfs_dquot_t *dqp;
++ struct xfs_dquot *dqp;
+ struct xfs_dqtrx *qtrx, *qa;
+- bool locked;
++ bool locked;
+
+ if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+ return;
+@@ -571,21 +571,21 @@ xfs_quota_warn(
+ */
+ STATIC int
+ xfs_trans_dqresv(
+- xfs_trans_t *tp,
+- xfs_mount_t *mp,
+- xfs_dquot_t *dqp,
+- int64_t nblks,
+- long ninos,
+- uint flags)
+-{
+- xfs_qcnt_t hardlimit;
+- xfs_qcnt_t softlimit;
+- time_t timer;
+- xfs_qwarncnt_t warns;
+- xfs_qwarncnt_t warnlimit;
+- xfs_qcnt_t total_count;
+- xfs_qcnt_t *resbcountp;
+- xfs_quotainfo_t *q = mp->m_quotainfo;
++ struct xfs_trans *tp,
++ struct xfs_mount *mp,
++ struct xfs_dquot *dqp,
++ int64_t nblks,
++ long ninos,
++ uint flags)
++{
++ xfs_qcnt_t hardlimit;
++ xfs_qcnt_t softlimit;
++ time_t timer;
++ xfs_qwarncnt_t warns;
++ xfs_qwarncnt_t warnlimit;
++ xfs_qcnt_t total_count;
++ xfs_qcnt_t *resbcountp;
++ xfs_quotainfo_t *q = mp->m_quotainfo;
+ struct xfs_def_quota *defq;
+
+
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:28 +0530
+Subject: xfs: remove the xfs_dq_logitem_t typedef
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-12-chandan.babu@oracle.com>
+
+From: Pavel Reichl <preichl@redhat.com>
+
+commit fd8b81dbbb23d4a3508cfac83256b4f5e770941c upstream.
+
+Signed-off-by: Pavel Reichl <preichl@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot.c | 2 +-
+ fs/xfs/xfs_dquot.h | 2 +-
+ fs/xfs/xfs_dquot_item.h | 10 +++++-----
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -1018,7 +1018,7 @@ xfs_qm_dqflush_done(
+ struct xfs_buf *bp,
+ struct xfs_log_item *lip)
+ {
+- xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip;
++ struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip;
+ struct xfs_dquot *dqp = qip->qli_dquot;
+ struct xfs_ail *ailp = lip->li_ailp;
+
+--- a/fs/xfs/xfs_dquot.h
++++ b/fs/xfs/xfs_dquot.h
+@@ -40,7 +40,7 @@ struct xfs_dquot {
+ xfs_fileoff_t q_fileoffset;
+
+ struct xfs_disk_dquot q_core;
+- xfs_dq_logitem_t q_logitem;
++ struct xfs_dq_logitem q_logitem;
+ /* total regular nblks used+reserved */
+ xfs_qcnt_t q_res_bcount;
+ /* total inos allocd+reserved */
+--- a/fs/xfs/xfs_dquot_item.h
++++ b/fs/xfs/xfs_dquot_item.h
+@@ -11,11 +11,11 @@ struct xfs_trans;
+ struct xfs_mount;
+ struct xfs_qoff_logitem;
+
+-typedef struct xfs_dq_logitem {
+- struct xfs_log_item qli_item; /* common portion */
+- struct xfs_dquot *qli_dquot; /* dquot ptr */
+- xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
+-} xfs_dq_logitem_t;
++struct xfs_dq_logitem {
++ struct xfs_log_item qli_item; /* common portion */
++ struct xfs_dquot *qli_dquot; /* dquot ptr */
++ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
++};
+
+ typedef struct xfs_qoff_logitem {
+ struct xfs_log_item qql_item; /* common portion */
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:29 +0530
+Subject: xfs: remove the xfs_qoff_logitem_t typedef
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-13-chandan.babu@oracle.com>
+
+From: Pavel Reichl <preichl@redhat.com>
+
+commit d0bdfb106907e4a3ef4f25f6d27e392abf41f3a0 upstream.
+
+Signed-off-by: Pavel Reichl <preichl@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+[darrick: fix a comment]
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_trans_resv.c | 4 ++--
+ fs/xfs/xfs_dquot_item.h | 28 +++++++++++++++-------------
+ fs/xfs/xfs_qm_syscalls.c | 29 ++++++++++++++++-------------
+ fs/xfs/xfs_trans_dquot.c | 12 ++++++------
+ 4 files changed, 39 insertions(+), 34 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_trans_resv.c
++++ b/fs/xfs/libxfs/xfs_trans_resv.c
+@@ -800,7 +800,7 @@ xfs_calc_qm_dqalloc_reservation(
+
+ /*
+ * Turning off quotas.
+- * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
++ * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2
+ * the superblock for the quota flags: sector size
+ */
+ STATIC uint
+@@ -813,7 +813,7 @@ xfs_calc_qm_quotaoff_reservation(
+
+ /*
+ * End of turning off quotas.
+- * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
++ * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2
+ */
+ STATIC uint
+ xfs_calc_qm_quotaoff_end_reservation(void)
+--- a/fs/xfs/xfs_dquot_item.h
++++ b/fs/xfs/xfs_dquot_item.h
+@@ -12,24 +12,26 @@ struct xfs_mount;
+ struct xfs_qoff_logitem;
+
+ struct xfs_dq_logitem {
+- struct xfs_log_item qli_item; /* common portion */
++ struct xfs_log_item qli_item; /* common portion */
+ struct xfs_dquot *qli_dquot; /* dquot ptr */
+- xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
++ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
+ };
+
+-typedef struct xfs_qoff_logitem {
+- struct xfs_log_item qql_item; /* common portion */
+- struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
++struct xfs_qoff_logitem {
++ struct xfs_log_item qql_item; /* common portion */
++ struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+ unsigned int qql_flags;
+-} xfs_qoff_logitem_t;
++};
+
+
+-extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *);
+-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
+- struct xfs_qoff_logitem *, uint);
+-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
+- struct xfs_qoff_logitem *, uint);
+-extern void xfs_trans_log_quotaoff_item(struct xfs_trans *,
+- struct xfs_qoff_logitem *);
++void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp);
++struct xfs_qoff_logitem *xfs_qm_qoff_logitem_init(struct xfs_mount *mp,
++ struct xfs_qoff_logitem *start,
++ uint flags);
++struct xfs_qoff_logitem *xfs_trans_get_qoff_item(struct xfs_trans *tp,
++ struct xfs_qoff_logitem *startqoff,
++ uint flags);
++void xfs_trans_log_quotaoff_item(struct xfs_trans *tp,
++ struct xfs_qoff_logitem *qlp);
+
+ #endif /* __XFS_DQUOT_ITEM_H__ */
+--- a/fs/xfs/xfs_qm_syscalls.c
++++ b/fs/xfs/xfs_qm_syscalls.c
+@@ -19,9 +19,12 @@
+ #include "xfs_qm.h"
+ #include "xfs_icache.h"
+
+-STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
+-STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
+- uint);
++STATIC int xfs_qm_log_quotaoff(struct xfs_mount *mp,
++ struct xfs_qoff_logitem **qoffstartp,
++ uint flags);
++STATIC int xfs_qm_log_quotaoff_end(struct xfs_mount *mp,
++ struct xfs_qoff_logitem *startqoff,
++ uint flags);
+
+ /*
+ * Turn off quota accounting and/or enforcement for all udquots and/or
+@@ -40,7 +43,7 @@ xfs_qm_scall_quotaoff(
+ uint dqtype;
+ int error;
+ uint inactivate_flags;
+- xfs_qoff_logitem_t *qoffstart;
++ struct xfs_qoff_logitem *qoffstart;
+
+ /*
+ * No file system can have quotas enabled on disk but not in core.
+@@ -540,13 +543,13 @@ out_unlock:
+
+ STATIC int
+ xfs_qm_log_quotaoff_end(
+- xfs_mount_t *mp,
+- xfs_qoff_logitem_t *startqoff,
++ struct xfs_mount *mp,
++ struct xfs_qoff_logitem *startqoff,
+ uint flags)
+ {
+- xfs_trans_t *tp;
++ struct xfs_trans *tp;
+ int error;
+- xfs_qoff_logitem_t *qoffi;
++ struct xfs_qoff_logitem *qoffi;
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
+ if (error)
+@@ -568,13 +571,13 @@ xfs_qm_log_quotaoff_end(
+
+ STATIC int
+ xfs_qm_log_quotaoff(
+- xfs_mount_t *mp,
+- xfs_qoff_logitem_t **qoffstartp,
+- uint flags)
++ struct xfs_mount *mp,
++ struct xfs_qoff_logitem **qoffstartp,
++ uint flags)
+ {
+- xfs_trans_t *tp;
++ struct xfs_trans *tp;
+ int error;
+- xfs_qoff_logitem_t *qoffi;
++ struct xfs_qoff_logitem *qoffi;
+
+ *qoffstartp = NULL;
+
+--- a/fs/xfs/xfs_trans_dquot.c
++++ b/fs/xfs/xfs_trans_dquot.c
+@@ -824,13 +824,13 @@ xfs_trans_reserve_quota_nblks(
+ /*
+ * This routine is called to allocate a quotaoff log item.
+ */
+-xfs_qoff_logitem_t *
++struct xfs_qoff_logitem *
+ xfs_trans_get_qoff_item(
+- xfs_trans_t *tp,
+- xfs_qoff_logitem_t *startqoff,
++ struct xfs_trans *tp,
++ struct xfs_qoff_logitem *startqoff,
+ uint flags)
+ {
+- xfs_qoff_logitem_t *q;
++ struct xfs_qoff_logitem *q;
+
+ ASSERT(tp != NULL);
+
+@@ -852,8 +852,8 @@ xfs_trans_get_qoff_item(
+ */
+ void
+ xfs_trans_log_quotaoff_item(
+- xfs_trans_t *tp,
+- xfs_qoff_logitem_t *qlp)
++ struct xfs_trans *tp,
++ struct xfs_qoff_logitem *qlp)
+ {
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &qlp->qql_item.li_flags);
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:30 +0530
+Subject: xfs: Replace function declaration by actual definition
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-14-chandan.babu@oracle.com>
+
+From: Pavel Reichl <preichl@redhat.com>
+
+commit 1cc95e6f0d7cfd61c9d3c5cdd4e7345b173f764f upstream.
+
+Signed-off-by: Pavel Reichl <preichl@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+[darrick: fix typo in subject line]
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_qm_syscalls.c | 140 ++++++++++++++++++++++-------------------------
+ 1 file changed, 66 insertions(+), 74 deletions(-)
+
+--- a/fs/xfs/xfs_qm_syscalls.c
++++ b/fs/xfs/xfs_qm_syscalls.c
+@@ -19,12 +19,72 @@
+ #include "xfs_qm.h"
+ #include "xfs_icache.h"
+
+-STATIC int xfs_qm_log_quotaoff(struct xfs_mount *mp,
+- struct xfs_qoff_logitem **qoffstartp,
+- uint flags);
+-STATIC int xfs_qm_log_quotaoff_end(struct xfs_mount *mp,
+- struct xfs_qoff_logitem *startqoff,
+- uint flags);
++STATIC int
++xfs_qm_log_quotaoff(
++ struct xfs_mount *mp,
++ struct xfs_qoff_logitem **qoffstartp,
++ uint flags)
++{
++ struct xfs_trans *tp;
++ int error;
++ struct xfs_qoff_logitem *qoffi;
++
++ *qoffstartp = NULL;
++
++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
++ if (error)
++ goto out;
++
++ qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
++ xfs_trans_log_quotaoff_item(tp, qoffi);
++
++ spin_lock(&mp->m_sb_lock);
++ mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
++ spin_unlock(&mp->m_sb_lock);
++
++ xfs_log_sb(tp);
++
++ /*
++ * We have to make sure that the transaction is secure on disk before we
++ * return and actually stop quota accounting. So, make it synchronous.
++ * We don't care about quotoff's performance.
++ */
++ xfs_trans_set_sync(tp);
++ error = xfs_trans_commit(tp);
++ if (error)
++ goto out;
++
++ *qoffstartp = qoffi;
++out:
++ return error;
++}
++
++STATIC int
++xfs_qm_log_quotaoff_end(
++ struct xfs_mount *mp,
++ struct xfs_qoff_logitem *startqoff,
++ uint flags)
++{
++ struct xfs_trans *tp;
++ int error;
++ struct xfs_qoff_logitem *qoffi;
++
++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
++ if (error)
++ return error;
++
++ qoffi = xfs_trans_get_qoff_item(tp, startqoff,
++ flags & XFS_ALL_QUOTA_ACCT);
++ xfs_trans_log_quotaoff_item(tp, qoffi);
++
++ /*
++ * We have to make sure that the transaction is secure on disk before we
++ * return and actually stop quota accounting. So, make it synchronous.
++ * We don't care about quotoff's performance.
++ */
++ xfs_trans_set_sync(tp);
++ return xfs_trans_commit(tp);
++}
+
+ /*
+ * Turn off quota accounting and/or enforcement for all udquots and/or
+@@ -541,74 +601,6 @@ out_unlock:
+ return error;
+ }
+
+-STATIC int
+-xfs_qm_log_quotaoff_end(
+- struct xfs_mount *mp,
+- struct xfs_qoff_logitem *startqoff,
+- uint flags)
+-{
+- struct xfs_trans *tp;
+- int error;
+- struct xfs_qoff_logitem *qoffi;
+-
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
+- if (error)
+- return error;
+-
+- qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+- flags & XFS_ALL_QUOTA_ACCT);
+- xfs_trans_log_quotaoff_item(tp, qoffi);
+-
+- /*
+- * We have to make sure that the transaction is secure on disk before we
+- * return and actually stop quota accounting. So, make it synchronous.
+- * We don't care about quotoff's performance.
+- */
+- xfs_trans_set_sync(tp);
+- return xfs_trans_commit(tp);
+-}
+-
+-
+-STATIC int
+-xfs_qm_log_quotaoff(
+- struct xfs_mount *mp,
+- struct xfs_qoff_logitem **qoffstartp,
+- uint flags)
+-{
+- struct xfs_trans *tp;
+- int error;
+- struct xfs_qoff_logitem *qoffi;
+-
+- *qoffstartp = NULL;
+-
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
+- if (error)
+- goto out;
+-
+- qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+- xfs_trans_log_quotaoff_item(tp, qoffi);
+-
+- spin_lock(&mp->m_sb_lock);
+- mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+- spin_unlock(&mp->m_sb_lock);
+-
+- xfs_log_sb(tp);
+-
+- /*
+- * We have to make sure that the transaction is secure on disk before we
+- * return and actually stop quota accounting. So, make it synchronous.
+- * We don't care about quotoff's performance.
+- */
+- xfs_trans_set_sync(tp);
+- error = xfs_trans_commit(tp);
+- if (error)
+- goto out;
+-
+- *qoffstartp = qoffi;
+-out:
+- return error;
+-}
+-
+ /* Fill out the quota context. */
+ static void
+ xfs_qm_scall_getquota_fill_qc(
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:20 +0530
+Subject: xfs: rework collapse range into an atomic operation
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-4-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 211683b21de959a647de74faedfdd8a5d189327e upstream.
+
+The collapse range operation uses a unique transaction and ilock
+cycle for the hole punch and each extent shift iteration of the
+overall operation. While the hole punch is safe as a separate
+operation due to the iolock, cycling the ilock after each extent
+shift is risky w.r.t. concurrent operations, similar to insert range.
+
+To avoid this problem, make collapse range atomic with respect to
+ilock. Hold the ilock across the entire operation, replace the
+individual transactions with a single rolling transaction sequence
+and finish dfops on each iteration to perform pending frees and roll
+the transaction. Remove the unnecessary quota reservation as
+collapse range can only ever merge extents (and thus remove extent
+records and potentially free bmap blocks). The dfops call
+automatically relogs the inode to keep it moving in the log. This
+guarantees that nothing else can change the extent mapping of an
+inode while a collapse range operation is in progress.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_bmap_util.c | 29 +++++++++++++++--------------
+ 1 file changed, 15 insertions(+), 14 deletions(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1237,7 +1237,6 @@ xfs_collapse_file_space(
+ int error;
+ xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len);
+ xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len);
+- uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+ bool done = false;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+@@ -1253,32 +1252,34 @@ xfs_collapse_file_space(
+ if (error)
+ return error;
+
+- while (!error && !done) {
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+- &tp);
+- if (error)
+- break;
++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
++ if (error)
++ return error;
+
+- xfs_ilock(ip, XFS_ILOCK_EXCL);
+- error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+- ip->i_gdquot, ip->i_pdquot, resblks, 0,
+- XFS_QMOPT_RES_REGBLKS);
+- if (error)
+- goto out_trans_cancel;
+- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
++ xfs_ilock(ip, XFS_ILOCK_EXCL);
++ xfs_trans_ijoin(tp, ip, 0);
+
++ while (!done) {
+ error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
+ &done);
+ if (error)
+ goto out_trans_cancel;
++ if (done)
++ break;
+
+- error = xfs_trans_commit(tp);
++ /* finish any deferred frees and roll the transaction */
++ error = xfs_defer_finish(&tp);
++ if (error)
++ goto out_trans_cancel;
+ }
+
++ error = xfs_trans_commit(tp);
++ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+
+ out_trans_cancel:
+ xfs_trans_cancel(tp);
++ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+ }
+
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:19 +0530
+Subject: xfs: rework insert range into an atomic operation
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-3-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit dd87f87d87fa4359a54e7b44549742f579e3e805 upstream.
+
+The insert range operation uses a unique transaction and ilock cycle
+for the extent split and each extent shift iteration of the overall
+operation. While this works, it is risks racing with other
+operations in subtle ways such as COW writeback modifying an extent
+tree in the middle of a shift operation.
+
+To avoid this problem, make insert range atomic with respect to
+ilock. Hold the ilock across the entire operation, replace the
+individual transactions with a single rolling transaction sequence
+and relog the inode to keep it moving in the log. This guarantees
+that nothing else can change the extent mapping of an inode while
+an insert range operation is in progress.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_bmap_util.c | 32 +++++++++++++-------------------
+ 1 file changed, 13 insertions(+), 19 deletions(-)
+
+--- a/fs/xfs/xfs_bmap_util.c
++++ b/fs/xfs/xfs_bmap_util.c
+@@ -1321,47 +1321,41 @@ xfs_insert_file_space(
+ if (error)
+ return error;
+
+- /*
+- * The extent shifting code works on extent granularity. So, if stop_fsb
+- * is not the starting block of extent, we need to split the extent at
+- * stop_fsb.
+- */
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
++ xfs_trans_ijoin(tp, ip, 0);
+
++ /*
++ * The extent shifting code works on extent granularity. So, if stop_fsb
++ * is not the starting block of extent, we need to split the extent at
++ * stop_fsb.
++ */
+ error = xfs_bmap_split_extent(tp, ip, stop_fsb);
+ if (error)
+ goto out_trans_cancel;
+
+- error = xfs_trans_commit(tp);
+- if (error)
+- return error;
+-
+- while (!error && !done) {
+- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0,
+- &tp);
++ do {
++ error = xfs_trans_roll_inode(&tp, ip);
+ if (error)
+- break;
++ goto out_trans_cancel;
+
+- xfs_ilock(ip, XFS_ILOCK_EXCL);
+- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
+ &done, stop_fsb);
+ if (error)
+ goto out_trans_cancel;
++ } while (!done);
+
+- error = xfs_trans_commit(tp);
+- }
+-
++ error = xfs_trans_commit(tp);
++ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+
+ out_trans_cancel:
+ xfs_trans_cancel(tp);
++ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+ }
+
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:37 +0530
+Subject: xfs: tail updates only need to occur when LSN changes
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-21-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 8eb807bd839938b45bf7a97f0568d2a845ba6929 upstream.
+
+We currently wake anything waiting on the log tail to move whenever
+the log item at the tail of the log is removed. Historically this
+was fine behaviour because there were very few items at any given
+LSN. But with delayed logging, there may be thousands of items at
+any given LSN, and we can't move the tail until they are all gone.
+
+Hence if we are removing them in near tail-first order, we might be
+waking up processes waiting on the tail LSN to change (e.g. log
+space waiters) repeatedly without them being able to make progress.
+This also occurs with the new sync push waiters, and can result in
+thousands of spurious wakeups every second when under heavy direct
+reclaim pressure.
+
+To fix this, check that the tail LSN has actually changed on the
+AIL before triggering wakeups. This will reduce the number of
+spurious wakeups when doing bulk AIL removal and make this code much
+more efficient.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode_item.c | 18 ++++++++++++----
+ fs/xfs/xfs_trans_ail.c | 52 +++++++++++++++++++++++++++++++++---------------
+ fs/xfs/xfs_trans_priv.h | 4 +--
+ 3 files changed, 51 insertions(+), 23 deletions(-)
+
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -732,19 +732,27 @@ xfs_iflush_done(
+ * holding the lock before removing the inode from the AIL.
+ */
+ if (need_ail) {
+- bool mlip_changed = false;
++ xfs_lsn_t tail_lsn = 0;
+
+ /* this is an opencoded batch version of xfs_trans_ail_delete */
+ spin_lock(&ailp->ail_lock);
+ list_for_each_entry(blip, &tmp, li_bio_list) {
+ if (INODE_ITEM(blip)->ili_logged &&
+- blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
+- mlip_changed |= xfs_ail_delete_one(ailp, blip);
+- else {
++ blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) {
++ /*
++ * xfs_ail_update_finish() only cares about the
++ * lsn of the first tail item removed, any
++ * others will be at the same or higher lsn so
++ * we just ignore them.
++ */
++ xfs_lsn_t lsn = xfs_ail_delete_one(ailp, blip);
++ if (!tail_lsn && lsn)
++ tail_lsn = lsn;
++ } else {
+ xfs_clear_li_failed(blip);
+ }
+ }
+- xfs_ail_update_finish(ailp, mlip_changed);
++ xfs_ail_update_finish(ailp, tail_lsn);
+ }
+
+ /*
+--- a/fs/xfs/xfs_trans_ail.c
++++ b/fs/xfs/xfs_trans_ail.c
+@@ -108,17 +108,25 @@ xfs_ail_next(
+ * We need the AIL lock in order to get a coherent read of the lsn of the last
+ * item in the AIL.
+ */
++static xfs_lsn_t
++__xfs_ail_min_lsn(
++ struct xfs_ail *ailp)
++{
++ struct xfs_log_item *lip = xfs_ail_min(ailp);
++
++ if (lip)
++ return lip->li_lsn;
++ return 0;
++}
++
+ xfs_lsn_t
+ xfs_ail_min_lsn(
+ struct xfs_ail *ailp)
+ {
+- xfs_lsn_t lsn = 0;
+- struct xfs_log_item *lip;
++ xfs_lsn_t lsn;
+
+ spin_lock(&ailp->ail_lock);
+- lip = xfs_ail_min(ailp);
+- if (lip)
+- lsn = lip->li_lsn;
++ lsn = __xfs_ail_min_lsn(ailp);
+ spin_unlock(&ailp->ail_lock);
+
+ return lsn;
+@@ -683,11 +691,12 @@ xfs_ail_push_all_sync(
+ void
+ xfs_ail_update_finish(
+ struct xfs_ail *ailp,
+- bool do_tail_update) __releases(ailp->ail_lock)
++ xfs_lsn_t old_lsn) __releases(ailp->ail_lock)
+ {
+ struct xfs_mount *mp = ailp->ail_mount;
+
+- if (!do_tail_update) {
++ /* if the tail lsn hasn't changed, don't do updates or wakeups. */
++ if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
+ spin_unlock(&ailp->ail_lock);
+ return;
+ }
+@@ -732,7 +741,7 @@ xfs_trans_ail_update_bulk(
+ xfs_lsn_t lsn) __releases(ailp->ail_lock)
+ {
+ struct xfs_log_item *mlip;
+- int mlip_changed = 0;
++ xfs_lsn_t tail_lsn = 0;
+ int i;
+ LIST_HEAD(tmp);
+
+@@ -747,9 +756,10 @@ xfs_trans_ail_update_bulk(
+ continue;
+
+ trace_xfs_ail_move(lip, lip->li_lsn, lsn);
++ if (mlip == lip && !tail_lsn)
++ tail_lsn = lip->li_lsn;
++
+ xfs_ail_delete(ailp, lip);
+- if (mlip == lip)
+- mlip_changed = 1;
+ } else {
+ trace_xfs_ail_insert(lip, 0, lsn);
+ }
+@@ -760,15 +770,23 @@ xfs_trans_ail_update_bulk(
+ if (!list_empty(&tmp))
+ xfs_ail_splice(ailp, cur, &tmp, lsn);
+
+- xfs_ail_update_finish(ailp, mlip_changed);
++ xfs_ail_update_finish(ailp, tail_lsn);
+ }
+
+-bool
++/*
++ * Delete one log item from the AIL.
++ *
++ * If this item was at the tail of the AIL, return the LSN of the log item so
++ * that we can use it to check if the LSN of the tail of the log has moved
++ * when finishing up the AIL delete process in xfs_ail_update_finish().
++ */
++xfs_lsn_t
+ xfs_ail_delete_one(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+ {
+ struct xfs_log_item *mlip = xfs_ail_min(ailp);
++ xfs_lsn_t lsn = lip->li_lsn;
+
+ trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
+ xfs_ail_delete(ailp, lip);
+@@ -776,7 +794,9 @@ xfs_ail_delete_one(
+ clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
+ lip->li_lsn = 0;
+
+- return mlip == lip;
++ if (mlip == lip)
++ return lsn;
++ return 0;
+ }
+
+ /**
+@@ -807,7 +827,7 @@ xfs_trans_ail_delete(
+ int shutdown_type)
+ {
+ struct xfs_mount *mp = ailp->ail_mount;
+- bool need_update;
++ xfs_lsn_t tail_lsn;
+
+ if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
+ spin_unlock(&ailp->ail_lock);
+@@ -820,8 +840,8 @@ xfs_trans_ail_delete(
+ return;
+ }
+
+- need_update = xfs_ail_delete_one(ailp, lip);
+- xfs_ail_update_finish(ailp, need_update);
++ tail_lsn = xfs_ail_delete_one(ailp, lip);
++ xfs_ail_update_finish(ailp, tail_lsn);
+ }
+
+ int
+--- a/fs/xfs/xfs_trans_priv.h
++++ b/fs/xfs/xfs_trans_priv.h
+@@ -91,8 +91,8 @@ xfs_trans_ail_update(
+ xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
+ }
+
+-bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
+-void xfs_ail_update_finish(struct xfs_ail *ailp, bool do_tail_update)
++xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
++void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn)
+ __releases(ailp->ail_lock);
+ void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
+ int shutdown_type);
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:35 +0530
+Subject: xfs: Throttle commits on delayed background CIL push
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-19-chandan.babu@oracle.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 0e7ab7efe77451cba4cbecb6c9f5ef83cf32b36b upstream.
+
+In certain situations the background CIL push can be indefinitely
+delayed. While we have workarounds from the obvious cases now, it
+doesn't solve the underlying issue. This issue is that there is no
+upper limit on the CIL where we will either force or wait for
+a background push to start, hence allowing the CIL to grow without
+bound until it consumes all log space.
+
+To fix this, add a new wait queue to the CIL which allows background
+pushes to wait for the CIL context to be switched out. This happens
+when the push starts, so it will allow us to block incoming
+transaction commit completion until the push has started. This will
+only affect processes that are running modifications, and only when
+the CIL threshold has been significantly overrun.
+
+This has no apparent impact on performance, and doesn't even trigger
+until over 45 million inodes had been created in a 16-way fsmark
+test on a 2GB log. That was limiting at 64MB of log space used, so
+the active CIL size is only about 3% of the total log in that case.
+The concurrent removal of those files did not trigger the background
+sleep at all.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_cil.c | 37 +++++++++++++++++++++++++++++++++----
+ fs/xfs/xfs_log_priv.h | 24 ++++++++++++++++++++++++
+ fs/xfs/xfs_trace.h | 1 +
+ 3 files changed, 58 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -671,6 +671,11 @@ xlog_cil_push(
+ ASSERT(push_seq <= ctx->sequence);
+
+ /*
++ * Wake up any background push waiters now this context is being pushed.
++ */
++ wake_up_all(&ctx->push_wait);
++
++ /*
+ * Check if we've anything to push. If there is nothing, then we don't
+ * move on to a new sequence number and so we have to be able to push
+ * this sequence again later.
+@@ -746,6 +751,7 @@ xlog_cil_push(
+ */
+ INIT_LIST_HEAD(&new_ctx->committing);
+ INIT_LIST_HEAD(&new_ctx->busy_extents);
++ init_waitqueue_head(&new_ctx->push_wait);
+ new_ctx->sequence = ctx->sequence + 1;
+ new_ctx->cil = cil;
+ cil->xc_ctx = new_ctx;
+@@ -900,7 +906,7 @@ xlog_cil_push_work(
+ */
+ static void
+ xlog_cil_push_background(
+- struct xlog *log)
++ struct xlog *log) __releases(cil->xc_ctx_lock)
+ {
+ struct xfs_cil *cil = log->l_cilp;
+
+@@ -914,14 +920,36 @@ xlog_cil_push_background(
+ * don't do a background push if we haven't used up all the
+ * space available yet.
+ */
+- if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
++ if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
++ up_read(&cil->xc_ctx_lock);
+ return;
++ }
+
+ spin_lock(&cil->xc_push_lock);
+ if (cil->xc_push_seq < cil->xc_current_sequence) {
+ cil->xc_push_seq = cil->xc_current_sequence;
+ queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
+ }
++
++ /*
++ * Drop the context lock now, we can't hold that if we need to sleep
++ * because we are over the blocking threshold. The push_lock is still
++ * held, so blocking threshold sleep/wakeup is still correctly
++ * serialised here.
++ */
++ up_read(&cil->xc_ctx_lock);
++
++ /*
++ * If we are well over the space limit, throttle the work that is being
++ * done until the push work on this context has begun.
++ */
++ if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
++ trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
++ ASSERT(cil->xc_ctx->space_used < log->l_logsize);
++ xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
++ return;
++ }
++
+ spin_unlock(&cil->xc_push_lock);
+
+ }
+@@ -1038,9 +1066,9 @@ xfs_log_commit_cil(
+ if (lip->li_ops->iop_committing)
+ lip->li_ops->iop_committing(lip, xc_commit_lsn);
+ }
+- xlog_cil_push_background(log);
+
+- up_read(&cil->xc_ctx_lock);
++ /* xlog_cil_push_background() releases cil->xc_ctx_lock */
++ xlog_cil_push_background(log);
+ }
+
+ /*
+@@ -1199,6 +1227,7 @@ xlog_cil_init(
+
+ INIT_LIST_HEAD(&ctx->committing);
+ INIT_LIST_HEAD(&ctx->busy_extents);
++ init_waitqueue_head(&ctx->push_wait);
+ ctx->sequence = 1;
+ ctx->cil = cil;
+ cil->xc_ctx = ctx;
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -247,6 +247,7 @@ struct xfs_cil_ctx {
+ struct xfs_log_vec *lv_chain; /* logvecs being pushed */
+ struct list_head iclog_entry;
+ struct list_head committing; /* ctx committing list */
++ wait_queue_head_t push_wait; /* background push throttle */
+ struct work_struct discard_endio_work;
+ };
+
+@@ -344,10 +345,33 @@ struct xfs_cil {
+ * buffer window (32MB) as measurements have shown this to be roughly the
+ * point of diminishing performance increases under highly concurrent
+ * modification workloads.
++ *
++ * To prevent the CIL from overflowing upper commit size bounds, we introduce a
++ * new threshold at which we block committing transactions until the background
++ * CIL commit commences and switches to a new context. While this is not a hard
++ * limit, it forces the process committing a transaction to the CIL to block and
++ * yeild the CPU, giving the CIL push work a chance to be scheduled and start
++ * work. This prevents a process running lots of transactions from overfilling
++ * the CIL because it is not yielding the CPU. We set the blocking limit at
++ * twice the background push space threshold so we keep in line with the AIL
++ * push thresholds.
++ *
++ * Note: this is not a -hard- limit as blocking is applied after the transaction
++ * is inserted into the CIL and the push has been triggered. It is largely a
++ * throttling mechanism that allows the CIL push to be scheduled and run. A hard
++ * limit will be difficult to implement without introducing global serialisation
++ * in the CIL commit fast path, and it's not at all clear that we actually need
++ * such hard limits given the ~7 years we've run without a hard limit before
++ * finding the first situation where a checkpoint size overflow actually
++ * occurred. Hence the simple throttle, and an ASSERT check to tell us that
++ * we've overrun the max size.
+ */
+ #define XLOG_CIL_SPACE_LIMIT(log) \
+ min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4)
+
++#define XLOG_CIL_BLOCKING_SPACE_LIMIT(log) \
++ (XLOG_CIL_SPACE_LIMIT(log) * 2)
++
+ /*
+ * ticket grant locks, queues and accounting have their own cachlines
+ * as these are quite hot and can be operated on concurrently.
+--- a/fs/xfs/xfs_trace.h
++++ b/fs/xfs/xfs_trace.h
+@@ -1011,6 +1011,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_re
+ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
+ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
+ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
++DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait);
+
+ DECLARE_EVENT_CLASS(xfs_log_item_class,
+ TP_PROTO(struct xfs_log_item *lip),
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:39 +0530
+Subject: xfs: trylock underlying buffer on dquot flush
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-23-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 8d3d7e2b35ea7d91d6e085c93b5efecfb0fba307 upstream.
+
+A dquot flush currently blocks on the buffer lock for the underlying
+dquot buffer. In turn, this causes xfsaild to block rather than
+continue processing other items in the meantime. Update
+xfs_qm_dqflush() to trylock the buffer, similar to how inode buffers
+are handled, and return -EAGAIN if the lock fails. Fix up any
+callers that don't currently handle the error properly.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot.c | 6 +++---
+ fs/xfs/xfs_dquot_item.c | 3 ++-
+ fs/xfs/xfs_qm.c | 14 +++++++++-----
+ 3 files changed, 14 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -1105,8 +1105,8 @@ xfs_qm_dqflush(
+ * Get the buffer containing the on-disk dquot
+ */
+ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+- mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+- &xfs_dquot_buf_ops);
++ mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
++ &bp, &xfs_dquot_buf_ops);
+ if (error)
+ goto out_unlock;
+
+@@ -1176,7 +1176,7 @@ xfs_qm_dqflush(
+
+ out_unlock:
+ xfs_dqfunlock(dqp);
+- return -EIO;
++ return error;
+ }
+
+ /*
+--- a/fs/xfs/xfs_dquot_item.c
++++ b/fs/xfs/xfs_dquot_item.c
+@@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push(
+ if (!xfs_buf_delwri_queue(bp, buffer_list))
+ rval = XFS_ITEM_FLUSHING;
+ xfs_buf_relse(bp);
+- }
++ } else if (error == -EAGAIN)
++ rval = XFS_ITEM_LOCKED;
+
+ spin_lock(&lip->li_ailp->ail_lock);
+ out_unlock:
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -121,12 +121,11 @@ xfs_qm_dqpurge(
+ {
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_quotainfo *qi = mp->m_quotainfo;
++ int error = -EAGAIN;
+
+ xfs_dqlock(dqp);
+- if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
+- xfs_dqunlock(dqp);
+- return -EAGAIN;
+- }
++ if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0)
++ goto out_unlock;
+
+ dqp->dq_flags |= XFS_DQ_FREEING;
+
+@@ -139,7 +138,6 @@ xfs_qm_dqpurge(
+ */
+ if (XFS_DQ_IS_DIRTY(dqp)) {
+ struct xfs_buf *bp = NULL;
+- int error;
+
+ /*
+ * We don't care about getting disk errors here. We need
+@@ -149,6 +147,8 @@ xfs_qm_dqpurge(
+ if (!error) {
+ error = xfs_bwrite(bp);
+ xfs_buf_relse(bp);
++ } else if (error == -EAGAIN) {
++ goto out_unlock;
+ }
+ xfs_dqflock(dqp);
+ }
+@@ -174,6 +174,10 @@ xfs_qm_dqpurge(
+
+ xfs_qm_dqdestroy(dqp);
+ return 0;
++
++out_unlock:
++ xfs_dqunlock(dqp);
++ return error;
+ }
+
+ /*
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:26 +0530
+Subject: xfs: Use scnprintf() for avoiding potential buffer overflow
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-10-chandan.babu@oracle.com>
+
+From: Takashi Iwai <tiwai@suse.de>
+
+commit 17bb60b74124e9491d593e2601e3afe14daa2f57 upstream.
+
+Since snprintf() returns the would-be-output size instead of the
+actual output size, the succeeding calls may go beyond the given
+buffer limit. Fix it by replacing with scnprintf().
+
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_stats.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/fs/xfs/xfs_stats.c
++++ b/fs/xfs/xfs_stats.c
+@@ -57,13 +57,13 @@ int xfs_stats_format(struct xfsstats __p
+ /* Loop over all stats groups */
+
+ for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
+- len += snprintf(buf + len, PATH_MAX - len, "%s",
++ len += scnprintf(buf + len, PATH_MAX - len, "%s",
+ xstats[i].desc);
+ /* inner loop does each group */
+ for (; j < xstats[i].endpoint; j++)
+- len += snprintf(buf + len, PATH_MAX - len, " %u",
++ len += scnprintf(buf + len, PATH_MAX - len, " %u",
+ counter_val(stats, j));
+- len += snprintf(buf + len, PATH_MAX - len, "\n");
++ len += scnprintf(buf + len, PATH_MAX - len, "\n");
+ }
+ /* extra precision counters */
+ for_each_possible_cpu(i) {
+@@ -72,9 +72,9 @@ int xfs_stats_format(struct xfsstats __p
+ xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes;
+ }
+
+- len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
++ len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
+ xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
+- len += snprintf(buf + len, PATH_MAX-len, "debug %u\n",
++ len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n",
+ #if defined(DEBUG)
+ 1);
+ #else
--- /dev/null
+From foo@baz Wed Oct 26 04:49:40 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed, 26 Oct 2022 11:58:22 +0530
+Subject: xfs: xfs_buf_corruption_error should take __this_address
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221026062843.927600-6-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit e83cf875d67a6cb9ddfaa8b45d2fa93d12b5c66f upstream.
+
+Add a xfs_failaddr_t parameter to this function so that callers can
+potentially pass in (and therefore report) the exact point in the code
+where we decided that a metadata buffer was corrupt. This enables us to
+wire it up to checking functions that have to run outside of verifiers.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_buf.c | 2 +-
+ fs/xfs/xfs_error.c | 5 +++--
+ fs/xfs/xfs_error.h | 2 +-
+ 3 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/xfs/xfs_buf.c
++++ b/fs/xfs/xfs_buf.c
+@@ -1564,7 +1564,7 @@ __xfs_buf_mark_corrupt(
+ {
+ ASSERT(bp->b_flags & XBF_DONE);
+
+- xfs_buf_corruption_error(bp);
++ xfs_buf_corruption_error(bp, fa);
+ xfs_buf_stale(bp);
+ }
+
+--- a/fs/xfs/xfs_error.c
++++ b/fs/xfs/xfs_error.c
+@@ -350,13 +350,14 @@ xfs_corruption_error(
+ */
+ void
+ xfs_buf_corruption_error(
+- struct xfs_buf *bp)
++ struct xfs_buf *bp,
++ xfs_failaddr_t fa)
+ {
+ struct xfs_mount *mp = bp->b_mount;
+
+ xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR,
+ "Metadata corruption detected at %pS, %s block 0x%llx",
+- __return_address, bp->b_ops->name, bp->b_bn);
++ fa, bp->b_ops->name, bp->b_bn);
+
+ xfs_alert(mp, "Unmount and run xfs_repair");
+
+--- a/fs/xfs/xfs_error.h
++++ b/fs/xfs/xfs_error.h
+@@ -15,7 +15,7 @@ extern void xfs_corruption_error(const c
+ struct xfs_mount *mp, const void *buf, size_t bufsize,
+ const char *filename, int linenum,
+ xfs_failaddr_t failaddr);
+-void xfs_buf_corruption_error(struct xfs_buf *bp);
++void xfs_buf_corruption_error(struct xfs_buf *bp, xfs_failaddr_t fa);
+ extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error,
+ const char *name, const void *buf, size_t bufsz,
+ xfs_failaddr_t failaddr);