From: Greg Kroah-Hartman Date: Wed, 26 Oct 2022 14:52:12 +0000 (+0200) Subject: 5.4-stable patches X-Git-Tag: v5.10.151~59 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7e875dce2ea5aa1135d4a48f8a12773bf732f1cd;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch xfs-check-owner-of-dir3-blocks.patch xfs-check-owner-of-dir3-data-blocks.patch xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch xfs-factor-common-ail-item-deletion-code.patch xfs-factor-out-a-new-xfs_log_force_inode-helper.patch xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch xfs-fix-use-after-free-on-cil-context-on-shutdown.patch xfs-lower-cil-flush-limit-for-large-logs.patch xfs-move-inode-flush-to-the-sync-workqueue.patch xfs-open-code-insert-range-extent-split-helper.patch xfs-preserve-default-grace-interval-during-quotacheck.patch xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch xfs-remove-the-xfs_dq_logitem_t-typedef.patch xfs-remove-the-xfs_qoff_logitem_t-typedef.patch xfs-replace-function-declaration-by-actual-definition.patch xfs-rework-collapse-range-into-an-atomic-operation.patch xfs-rework-insert-range-into-an-atomic-operation.patch xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch xfs-throttle-commits-on-delayed-background-cil-push.patch xfs-trylock-underlying-buffer-on-dquot-flush.patch xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch xfs-xfs_buf_corruption_error-should-take-__this_address.patch --- diff --git a/queue-4.14/series b/queue-4.14/series new file mode 100644 index 00000000000..e69de29bb2d diff --git a/queue-5.4/series b/queue-5.4/series new file mode 100644 index 00000000000..b8be03f5971 --- /dev/null +++ b/queue-5.4/series @@ -0,0 +1,26 @@ +xfs-open-code-insert-range-extent-split-helper.patch +xfs-rework-insert-range-into-an-atomic-operation.patch +xfs-rework-collapse-range-into-an-atomic-operation.patch +xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch +xfs-xfs_buf_corruption_error-should-take-__this_address.patch +xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch +xfs-check-owner-of-dir3-data-blocks.patch +xfs-check-owner-of-dir3-blocks.patch +xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch +xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch +xfs-remove-the-xfs_dq_logitem_t-typedef.patch +xfs-remove-the-xfs_qoff_logitem_t-typedef.patch +xfs-replace-function-declaration-by-actual-definition.patch +xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch +xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch +xfs-preserve-default-grace-interval-during-quotacheck.patch +xfs-lower-cil-flush-limit-for-large-logs.patch +xfs-throttle-commits-on-delayed-background-cil-push.patch +xfs-factor-common-ail-item-deletion-code.patch +xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch +xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch +xfs-trylock-underlying-buffer-on-dquot-flush.patch +xfs-factor-out-a-new-xfs_log_force_inode-helper.patch +xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch +xfs-move-inode-flush-to-the-sync-workqueue.patch +xfs-fix-use-after-free-on-cil-context-on-shutdown.patch diff --git a/queue-5.4/xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch b/queue-5.4/xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch new file mode 100644 index 00000000000..9e75a46e3f9 --- /dev/null +++ b/queue-5.4/xfs-add-a-function-to-deal-with-corrupt-buffers-post-verifiers.patch @@ -0,0 +1,286 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:21 +0530 +Subject: xfs: add a function to deal with corrupt buffers post-verifiers +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-5-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 8d57c21600a514d7a9237327c2496ae159bab5bb upstream. + +Add a helper function to get rid of buffers that we have decided are +corrupt after the verifiers have run. This function is intended to +handle metadata checks that can't happen in the verifiers, such as +inter-block relationship checking. Note that we now mark the buffer +stale so that it will not end up on any LRU and will be purged on +release. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_alloc.c | 2 +- + fs/xfs/libxfs/xfs_attr_leaf.c | 6 +++--- + fs/xfs/libxfs/xfs_btree.c | 2 +- + fs/xfs/libxfs/xfs_da_btree.c | 10 +++++----- + fs/xfs/libxfs/xfs_dir2_leaf.c | 2 +- + fs/xfs/libxfs/xfs_dir2_node.c | 6 +++--- + fs/xfs/xfs_attr_inactive.c | 6 +++--- + fs/xfs/xfs_attr_list.c | 2 +- + fs/xfs/xfs_buf.c | 22 ++++++++++++++++++++++ + fs/xfs/xfs_buf.h | 2 ++ + fs/xfs/xfs_error.c | 2 ++ + fs/xfs/xfs_inode.c | 4 ++-- + 12 files changed, 46 insertions(+), 20 deletions(-) + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -685,7 +685,7 @@ xfs_alloc_update_counters( + xfs_trans_agblocks_delta(tp, len); + if (unlikely(be32_to_cpu(agf->agf_freeblks) > + be32_to_cpu(agf->agf_length))) { +- xfs_buf_corruption_error(agbp); ++ xfs_buf_mark_corrupt(agbp); + return -EFSCORRUPTED; + } + +--- a/fs/xfs/libxfs/xfs_attr_leaf.c ++++ b/fs/xfs/libxfs/xfs_attr_leaf.c +@@ -2288,7 +2288,7 @@ xfs_attr3_leaf_lookup_int( + xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); + entries = xfs_attr3_leaf_entryp(leaf); + if (ichdr.count >= args->geo->blksize / 8) { +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + return -EFSCORRUPTED; + } + +@@ -2307,11 +2307,11 @@ xfs_attr3_leaf_lookup_int( + break; + } + if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) { +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + return -EFSCORRUPTED; + } + if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) { +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + return -EFSCORRUPTED; + } + +--- a/fs/xfs/libxfs/xfs_btree.c ++++ b/fs/xfs/libxfs/xfs_btree.c +@@ -1820,7 +1820,7 @@ xfs_btree_lookup_get_block( + + out_bad: + *blkp = NULL; +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + xfs_trans_brelse(cur->bc_tp, bp); + return -EFSCORRUPTED; + } +--- a/fs/xfs/libxfs/xfs_da_btree.c ++++ b/fs/xfs/libxfs/xfs_da_btree.c +@@ -504,7 +504,7 @@ xfs_da3_split( + node = oldblk->bp->b_addr; + if (node->hdr.info.forw) { + if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { +- xfs_buf_corruption_error(oldblk->bp); ++ xfs_buf_mark_corrupt(oldblk->bp); + error = -EFSCORRUPTED; + goto out; + } +@@ -517,7 +517,7 @@ xfs_da3_split( + node = oldblk->bp->b_addr; + if (node->hdr.info.back) { + if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { +- xfs_buf_corruption_error(oldblk->bp); ++ xfs_buf_mark_corrupt(oldblk->bp); + error = -EFSCORRUPTED; + goto out; + } +@@ -1544,7 +1544,7 @@ xfs_da3_node_lookup_int( + } + + if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { +- xfs_buf_corruption_error(blk->bp); ++ xfs_buf_mark_corrupt(blk->bp); + return -EFSCORRUPTED; + } + +@@ -1559,7 +1559,7 @@ xfs_da3_node_lookup_int( + + /* Tree taller than we can handle; bail out! */ + if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { +- xfs_buf_corruption_error(blk->bp); ++ xfs_buf_mark_corrupt(blk->bp); + return -EFSCORRUPTED; + } + +@@ -1567,7 +1567,7 @@ xfs_da3_node_lookup_int( + if (blkno == args->geo->leafblk) + expected_level = nodehdr.level - 1; + else if (expected_level != nodehdr.level) { +- xfs_buf_corruption_error(blk->bp); ++ xfs_buf_mark_corrupt(blk->bp); + return -EFSCORRUPTED; + } else + expected_level--; +--- a/fs/xfs/libxfs/xfs_dir2_leaf.c ++++ b/fs/xfs/libxfs/xfs_dir2_leaf.c +@@ -1344,7 +1344,7 @@ xfs_dir2_leaf_removename( + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + bestsp = xfs_dir2_leaf_bests_p(ltp); + if (be16_to_cpu(bestsp[db]) != oldbest) { +- xfs_buf_corruption_error(lbp); ++ xfs_buf_mark_corrupt(lbp); + return -EFSCORRUPTED; + } + /* +--- a/fs/xfs/libxfs/xfs_dir2_node.c ++++ b/fs/xfs/libxfs/xfs_dir2_node.c +@@ -375,7 +375,7 @@ xfs_dir2_leaf_to_node( + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + if (be32_to_cpu(ltp->bestcount) > + (uint)dp->i_d.di_size / args->geo->blksize) { +- xfs_buf_corruption_error(lbp); ++ xfs_buf_mark_corrupt(lbp); + return -EFSCORRUPTED; + } + +@@ -449,7 +449,7 @@ xfs_dir2_leafn_add( + * into other peoples memory + */ + if (index < 0) { +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + return -EFSCORRUPTED; + } + +@@ -745,7 +745,7 @@ xfs_dir2_leafn_lookup_for_entry( + + xfs_dir3_leaf_check(dp, bp); + if (leafhdr.count <= 0) { +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + return -EFSCORRUPTED; + } + +--- a/fs/xfs/xfs_attr_inactive.c ++++ b/fs/xfs/xfs_attr_inactive.c +@@ -145,7 +145,7 @@ xfs_attr3_node_inactive( + * Since this code is recursive (gasp!) we must protect ourselves. + */ + if (level > XFS_DA_NODE_MAXDEPTH) { +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + xfs_trans_brelse(*trans, bp); /* no locks for later trans */ + return -EFSCORRUPTED; + } +@@ -196,7 +196,7 @@ xfs_attr3_node_inactive( + error = xfs_attr3_leaf_inactive(trans, dp, child_bp); + break; + default: +- xfs_buf_corruption_error(child_bp); ++ xfs_buf_mark_corrupt(child_bp); + xfs_trans_brelse(*trans, child_bp); + error = -EFSCORRUPTED; + break; +@@ -281,7 +281,7 @@ xfs_attr3_root_inactive( + break; + default: + error = -EFSCORRUPTED; +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + xfs_trans_brelse(*trans, bp); + break; + } +--- a/fs/xfs/xfs_attr_list.c ++++ b/fs/xfs/xfs_attr_list.c +@@ -271,7 +271,7 @@ xfs_attr_node_list_lookup( + return 0; + + out_corruptbuf: +- xfs_buf_corruption_error(bp); ++ xfs_buf_mark_corrupt(bp); + xfs_trans_brelse(tp, bp); + return -EFSCORRUPTED; + } +--- a/fs/xfs/xfs_buf.c ++++ b/fs/xfs/xfs_buf.c +@@ -1547,6 +1547,28 @@ xfs_buf_zero( + } + + /* ++ * Log a message about and stale a buffer that a caller has decided is corrupt. ++ * ++ * This function should be called for the kinds of metadata corruption that ++ * cannot be detect from a verifier, such as incorrect inter-block relationship ++ * data. Do /not/ call this function from a verifier function. ++ * ++ * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will ++ * be marked stale, but b_error will not be set. The caller is responsible for ++ * releasing the buffer or fixing it. ++ */ ++void ++__xfs_buf_mark_corrupt( ++ struct xfs_buf *bp, ++ xfs_failaddr_t fa) ++{ ++ ASSERT(bp->b_flags & XBF_DONE); ++ ++ xfs_buf_corruption_error(bp); ++ xfs_buf_stale(bp); ++} ++ ++/* + * Handling of buffer targets (buftargs). + */ + +--- a/fs/xfs/xfs_buf.h ++++ b/fs/xfs/xfs_buf.h +@@ -270,6 +270,8 @@ static inline int xfs_buf_submit(struct + } + + void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize); ++void __xfs_buf_mark_corrupt(struct xfs_buf *bp, xfs_failaddr_t fa); ++#define xfs_buf_mark_corrupt(bp) __xfs_buf_mark_corrupt((bp), __this_address) + + /* Buffer Utility Routines */ + extern void *xfs_buf_offset(struct xfs_buf *, size_t); +--- a/fs/xfs/xfs_error.c ++++ b/fs/xfs/xfs_error.c +@@ -345,6 +345,8 @@ xfs_corruption_error( + * Complain about the kinds of metadata corruption that we can't detect from a + * verifier, such as incorrect inter-block relationship data. Does not set + * bp->b_error. ++ * ++ * Call xfs_buf_mark_corrupt, not this function. + */ + void + xfs_buf_corruption_error( +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -2149,7 +2149,7 @@ xfs_iunlink_update_bucket( + * head of the list. + */ + if (old_value == new_agino) { +- xfs_buf_corruption_error(agibp); ++ xfs_buf_mark_corrupt(agibp); + return -EFSCORRUPTED; + } + +@@ -2283,7 +2283,7 @@ xfs_iunlink( + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (next_agino == agino || + !xfs_verify_agino_or_null(mp, agno, next_agino)) { +- xfs_buf_corruption_error(agibp); ++ xfs_buf_mark_corrupt(agibp); + return -EFSCORRUPTED; + } + diff --git a/queue-5.4/xfs-check-owner-of-dir3-blocks.patch b/queue-5.4/xfs-check-owner-of-dir3-blocks.patch new file mode 100644 index 00000000000..73ac4523fe9 --- /dev/null +++ b/queue-5.4/xfs-check-owner-of-dir3-blocks.patch @@ -0,0 +1,77 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:25 +0530 +Subject: xfs: check owner of dir3 blocks +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-9-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 1b2c1a63b678d63e9c98314d44413f5af79c9c80 upstream. + +Check the owner field of dir3 block headers. If it's corrupt, release +the buffer and return EFSCORRUPTED. All callers handle this properly. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_dir2_block.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +--- a/fs/xfs/libxfs/xfs_dir2_block.c ++++ b/fs/xfs/libxfs/xfs_dir2_block.c +@@ -114,6 +114,23 @@ const struct xfs_buf_ops xfs_dir3_block_ + .verify_struct = xfs_dir3_block_verify, + }; + ++static xfs_failaddr_t ++xfs_dir3_block_header_check( ++ struct xfs_inode *dp, ++ struct xfs_buf *bp) ++{ ++ struct xfs_mount *mp = dp->i_mount; ++ ++ if (xfs_sb_version_hascrc(&mp->m_sb)) { ++ struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; ++ ++ if (be64_to_cpu(hdr3->owner) != dp->i_ino) ++ return __this_address; ++ } ++ ++ return NULL; ++} ++ + int + xfs_dir3_block_read( + struct xfs_trans *tp, +@@ -121,12 +138,24 @@ xfs_dir3_block_read( + struct xfs_buf **bpp) + { + struct xfs_mount *mp = dp->i_mount; ++ xfs_failaddr_t fa; + int err; + + err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp, + XFS_DATA_FORK, &xfs_dir3_block_buf_ops); +- if (!err && tp && *bpp) +- xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); ++ if (err || !*bpp) ++ return err; ++ ++ /* Check things that we can't do in the verifier. */ ++ fa = xfs_dir3_block_header_check(dp, *bpp); ++ if (fa) { ++ __xfs_buf_mark_corrupt(*bpp, fa); ++ xfs_trans_brelse(tp, *bpp); ++ *bpp = NULL; ++ return -EFSCORRUPTED; ++ } ++ ++ xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); + return err; + } + diff --git a/queue-5.4/xfs-check-owner-of-dir3-data-blocks.patch b/queue-5.4/xfs-check-owner-of-dir3-data-blocks.patch new file mode 100644 index 00000000000..8445c7d522d --- /dev/null +++ b/queue-5.4/xfs-check-owner-of-dir3-data-blocks.patch @@ -0,0 +1,80 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:24 +0530 +Subject: xfs: check owner of dir3 data blocks +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-8-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit a10c21ed5d5241d11cf1d5a4556730840572900b upstream. + +[Slightly edit xfs_dir3_data_read() to work with existing mapped_bno argument instead +of flag values introduced in later kernels] + +Check the owner field of dir3 data block headers. If it's corrupt, +release the buffer and return EFSCORRUPTED. All callers handle this +properly. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_dir2_data.c | 32 ++++++++++++++++++++++++++++++-- + 1 file changed, 30 insertions(+), 2 deletions(-) + +--- a/fs/xfs/libxfs/xfs_dir2_data.c ++++ b/fs/xfs/libxfs/xfs_dir2_data.c +@@ -348,6 +348,22 @@ static const struct xfs_buf_ops xfs_dir3 + .verify_write = xfs_dir3_data_write_verify, + }; + ++static xfs_failaddr_t ++xfs_dir3_data_header_check( ++ struct xfs_inode *dp, ++ struct xfs_buf *bp) ++{ ++ struct xfs_mount *mp = dp->i_mount; ++ ++ if (xfs_sb_version_hascrc(&mp->m_sb)) { ++ struct xfs_dir3_data_hdr *hdr3 = bp->b_addr; ++ ++ if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino) ++ return __this_address; ++ } ++ ++ return NULL; ++} + + int + xfs_dir3_data_read( +@@ -357,12 +373,24 @@ xfs_dir3_data_read( + xfs_daddr_t mapped_bno, + struct xfs_buf **bpp) + { ++ xfs_failaddr_t fa; + int err; + + err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp, + XFS_DATA_FORK, &xfs_dir3_data_buf_ops); +- if (!err && tp && *bpp) +- xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); ++ if (err || !*bpp) ++ return err; ++ ++ /* Check things that we can't do in the verifier. */ ++ fa = xfs_dir3_data_header_check(dp, *bpp); ++ if (fa) { ++ __xfs_buf_mark_corrupt(*bpp, fa); ++ xfs_trans_brelse(tp, *bpp); ++ *bpp = NULL; ++ return -EFSCORRUPTED; ++ } ++ ++ xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF); + return err; + } + diff --git a/queue-5.4/xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch b/queue-5.4/xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch new file mode 100644 index 00000000000..6064a9bcfec --- /dev/null +++ b/queue-5.4/xfs-don-t-write-a-corrupt-unmount-record-to-force-summary-counter-recalc.patch @@ -0,0 +1,80 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:38 +0530 +Subject: xfs: don't write a corrupt unmount record to force summary counter recalc +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-22-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 5cc3c006eb45524860c4d1dd4dd7ad4a506bf3f5 upstream. + +[ Modify fs/xfs/xfs_log.c to include the changes at locations suitable for + 5.4-lts kernel ] + +In commit f467cad95f5e3, I added the ability to force a recalculation of +the filesystem summary counters if they seemed incorrect. This was done +(not entirely correctly) by tweaking the log code to write an unmount +record without the UMOUNT_TRANS flag set. At next mount, the log +recovery code will fail to find the unmount record and go into recovery, +which triggers the recalculation. + +What actually gets written to the log is what ought to be an unmount +record, but without any flags set to indicate what kind of record it +actually is. This worked to trigger the recalculation, but we shouldn't +write bogus log records when we could simply write nothing. + +Fixes: f467cad95f5e3 ("xfs: force summary counter recalc at next mount") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -837,19 +837,6 @@ xfs_log_write_unmount_record( + if (error) + goto out_err; + +- /* +- * If we think the summary counters are bad, clear the unmount header +- * flag in the unmount record so that the summary counters will be +- * recalculated during log recovery at next mount. Refer to +- * xlog_check_unmount_rec for more details. +- */ +- if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp, +- XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { +- xfs_alert(mp, "%s: will fix summary counters at next mount", +- __func__); +- flags &= ~XLOG_UNMOUNT_TRANS; +- } +- + /* remove inited flag, and account for space used */ + tic->t_flags = 0; + tic->t_curr_res -= sizeof(magic); +@@ -932,6 +919,19 @@ xfs_log_unmount_write(xfs_mount_t *mp) + } while (iclog != first_iclog); + #endif + if (! (XLOG_FORCED_SHUTDOWN(log))) { ++ /* ++ * If we think the summary counters are bad, avoid writing the ++ * unmount record to force log recovery at next mount, after ++ * which the summary counters will be recalculated. Refer to ++ * xlog_check_unmount_rec for more details. ++ */ ++ if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), ++ mp, XFS_ERRTAG_FORCE_SUMMARY_RECALC)) { ++ xfs_alert(mp, ++ "%s: will fix summary counters at next mount", ++ __func__); ++ return 0; ++ } + xfs_log_write_unmount_record(mp); + } else { + /* diff --git a/queue-5.4/xfs-factor-common-ail-item-deletion-code.patch b/queue-5.4/xfs-factor-common-ail-item-deletion-code.patch new file mode 100644 index 00000000000..9ff67aa6051 --- /dev/null +++ b/queue-5.4/xfs-factor-common-ail-item-deletion-code.patch @@ -0,0 +1,147 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:36 +0530 +Subject: xfs: factor common AIL item deletion code +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-20-chandan.babu@oracle.com> + +From: Dave Chinner + +commit 4165994ac9672d91134675caa6de3645a9ace6c8 upstream. + +Factor the common AIL deletion code that does all the wakeups into a +helper so we only have one copy of this somewhat tricky code to +interface with all the wakeups necessary when the LSN of the log +tail changes. + +Signed-off-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Reviewed-by: Allison Collins +Reviewed-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_inode_item.c | 12 +----------- + fs/xfs/xfs_trans_ail.c | 48 ++++++++++++++++++++++++++---------------------- + fs/xfs/xfs_trans_priv.h | 4 +++- + 3 files changed, 30 insertions(+), 34 deletions(-) + +--- a/fs/xfs/xfs_inode_item.c ++++ b/fs/xfs/xfs_inode_item.c +@@ -744,17 +744,7 @@ xfs_iflush_done( + xfs_clear_li_failed(blip); + } + } +- +- if (mlip_changed) { +- if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount)) +- xlog_assign_tail_lsn_locked(ailp->ail_mount); +- if (list_empty(&ailp->ail_head)) +- wake_up_all(&ailp->ail_empty); +- } +- spin_unlock(&ailp->ail_lock); +- +- if (mlip_changed) +- xfs_log_space_wake(ailp->ail_mount); ++ xfs_ail_update_finish(ailp, mlip_changed); + } + + /* +--- a/fs/xfs/xfs_trans_ail.c ++++ b/fs/xfs/xfs_trans_ail.c +@@ -680,6 +680,27 @@ xfs_ail_push_all_sync( + finish_wait(&ailp->ail_empty, &wait); + } + ++void ++xfs_ail_update_finish( ++ struct xfs_ail *ailp, ++ bool do_tail_update) __releases(ailp->ail_lock) ++{ ++ struct xfs_mount *mp = ailp->ail_mount; ++ ++ if (!do_tail_update) { ++ spin_unlock(&ailp->ail_lock); ++ return; ++ } ++ ++ if (!XFS_FORCED_SHUTDOWN(mp)) ++ xlog_assign_tail_lsn_locked(mp); ++ ++ if (list_empty(&ailp->ail_head)) ++ wake_up_all(&ailp->ail_empty); ++ spin_unlock(&ailp->ail_lock); ++ xfs_log_space_wake(mp); ++} ++ + /* + * xfs_trans_ail_update - bulk AIL insertion operation. + * +@@ -739,15 +760,7 @@ xfs_trans_ail_update_bulk( + if (!list_empty(&tmp)) + xfs_ail_splice(ailp, cur, &tmp, lsn); + +- if (mlip_changed) { +- if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount)) +- xlog_assign_tail_lsn_locked(ailp->ail_mount); +- spin_unlock(&ailp->ail_lock); +- +- xfs_log_space_wake(ailp->ail_mount); +- } else { +- spin_unlock(&ailp->ail_lock); +- } ++ xfs_ail_update_finish(ailp, mlip_changed); + } + + bool +@@ -791,10 +804,10 @@ void + xfs_trans_ail_delete( + struct xfs_ail *ailp, + struct xfs_log_item *lip, +- int shutdown_type) __releases(ailp->ail_lock) ++ int shutdown_type) + { + struct xfs_mount *mp = ailp->ail_mount; +- bool mlip_changed; ++ bool need_update; + + if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { + spin_unlock(&ailp->ail_lock); +@@ -807,17 +820,8 @@ xfs_trans_ail_delete( + return; + } + +- mlip_changed = xfs_ail_delete_one(ailp, lip); +- if (mlip_changed) { +- if (!XFS_FORCED_SHUTDOWN(mp)) +- xlog_assign_tail_lsn_locked(mp); +- if (list_empty(&ailp->ail_head)) +- wake_up_all(&ailp->ail_empty); +- } +- +- spin_unlock(&ailp->ail_lock); +- if (mlip_changed) +- xfs_log_space_wake(ailp->ail_mount); ++ need_update = xfs_ail_delete_one(ailp, lip); ++ xfs_ail_update_finish(ailp, need_update); + } + + int +--- a/fs/xfs/xfs_trans_priv.h ++++ b/fs/xfs/xfs_trans_priv.h +@@ -92,8 +92,10 @@ xfs_trans_ail_update( + } + + bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); ++void xfs_ail_update_finish(struct xfs_ail *ailp, bool do_tail_update) ++ __releases(ailp->ail_lock); + void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, +- int shutdown_type) __releases(ailp->ail_lock); ++ int shutdown_type); + + static inline void + xfs_trans_ail_remove( diff --git a/queue-5.4/xfs-factor-out-a-new-xfs_log_force_inode-helper.patch b/queue-5.4/xfs-factor-out-a-new-xfs_log_force_inode-helper.patch new file mode 100644 index 00000000000..1fdab108b75 --- /dev/null +++ b/queue-5.4/xfs-factor-out-a-new-xfs_log_force_inode-helper.patch @@ -0,0 +1,117 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:40 +0530 +Subject: xfs: factor out a new xfs_log_force_inode helper +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-24-chandan.babu@oracle.com> + +From: Christoph Hellwig + +commit 54fbdd1035e3a4e4f4082c335b095426cdefd092 upstream. + +Create a new helper to force the log up to the last LSN touching an +inode. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_export.c | 14 +------------- + fs/xfs/xfs_file.c | 12 +----------- + fs/xfs/xfs_inode.c | 19 +++++++++++++++++++ + fs/xfs/xfs_inode.h | 1 + + 4 files changed, 22 insertions(+), 24 deletions(-) + +--- a/fs/xfs/xfs_export.c ++++ b/fs/xfs/xfs_export.c +@@ -15,7 +15,6 @@ + #include "xfs_trans.h" + #include "xfs_inode_item.h" + #include "xfs_icache.h" +-#include "xfs_log.h" + #include "xfs_pnfs.h" + + /* +@@ -221,18 +220,7 @@ STATIC int + xfs_fs_nfs_commit_metadata( + struct inode *inode) + { +- struct xfs_inode *ip = XFS_I(inode); +- struct xfs_mount *mp = ip->i_mount; +- xfs_lsn_t lsn = 0; +- +- xfs_ilock(ip, XFS_ILOCK_SHARED); +- if (xfs_ipincount(ip)) +- lsn = ip->i_itemp->ili_last_lsn; +- xfs_iunlock(ip, XFS_ILOCK_SHARED); +- +- if (!lsn) +- return 0; +- return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); ++ return xfs_log_force_inode(XFS_I(inode)); + } + + const struct export_operations xfs_export_operations = { +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -80,19 +80,9 @@ xfs_dir_fsync( + int datasync) + { + struct xfs_inode *ip = XFS_I(file->f_mapping->host); +- struct xfs_mount *mp = ip->i_mount; +- xfs_lsn_t lsn = 0; + + trace_xfs_dir_fsync(ip); +- +- xfs_ilock(ip, XFS_ILOCK_SHARED); +- if (xfs_ipincount(ip)) +- lsn = ip->i_itemp->ili_last_lsn; +- xfs_iunlock(ip, XFS_ILOCK_SHARED); +- +- if (!lsn) +- return 0; +- return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); ++ return xfs_log_force_inode(ip); + } + + STATIC int +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -3973,3 +3973,22 @@ xfs_irele( + trace_xfs_irele(ip, _RET_IP_); + iput(VFS_I(ip)); + } ++ ++/* ++ * Ensure all commited transactions touching the inode are written to the log. ++ */ ++int ++xfs_log_force_inode( ++ struct xfs_inode *ip) ++{ ++ xfs_lsn_t lsn = 0; ++ ++ xfs_ilock(ip, XFS_ILOCK_SHARED); ++ if (xfs_ipincount(ip)) ++ lsn = ip->i_itemp->ili_last_lsn; ++ xfs_iunlock(ip, XFS_ILOCK_SHARED); ++ ++ if (!lsn) ++ return 0; ++ return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL); ++} +--- a/fs/xfs/xfs_inode.h ++++ b/fs/xfs/xfs_inode.h +@@ -441,6 +441,7 @@ int xfs_itruncate_extents_flags(struct + struct xfs_inode *, int, xfs_fsize_t, int); + void xfs_iext_realloc(xfs_inode_t *, int, int); + ++int xfs_log_force_inode(struct xfs_inode *ip); + void xfs_iunpin_wait(xfs_inode_t *); + #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) + diff --git a/queue-5.4/xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch b/queue-5.4/xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch new file mode 100644 index 00000000000..c6aa200e7d1 --- /dev/null +++ b/queue-5.4/xfs-factor-out-quotaoff-intent-ail-removal-and-memory-free.patch @@ -0,0 +1,90 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:31 +0530 +Subject: xfs: factor out quotaoff intent AIL removal and memory free +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-15-chandan.babu@oracle.com> + +From: Brian Foster + +commit 854f82b1f6039a418b7d1407513f8640e05fd73f upstream. + +AIL removal of the quotaoff start intent and free of both intents is +hardcoded to the ->iop_committed() handler of the end intent. Factor +out the start intent handling code so it can be used in a future +patch to properly handle quotaoff errors. Use xfs_trans_ail_remove() +instead of the _delete() variant to acquire the AIL lock and also +handle cases where an intent might not reside in the AIL at the +time of a failure. + +Signed-off-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_dquot_item.c | 29 ++++++++++++++++++++--------- + fs/xfs/xfs_dquot_item.h | 1 + + 2 files changed, 21 insertions(+), 9 deletions(-) + +--- a/fs/xfs/xfs_dquot_item.c ++++ b/fs/xfs/xfs_dquot_item.c +@@ -307,18 +307,10 @@ xfs_qm_qoffend_logitem_committed( + { + struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); + struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; +- struct xfs_ail *ailp = qfs->qql_item.li_ailp; + +- /* +- * Delete the qoff-start logitem from the AIL. +- * xfs_trans_ail_delete() drops the AIL lock. +- */ +- spin_lock(&ailp->ail_lock); +- xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR); ++ xfs_qm_qoff_logitem_relse(qfs); + +- kmem_free(qfs->qql_item.li_lv_shadow); + kmem_free(lip->li_lv_shadow); +- kmem_free(qfs); + kmem_free(qfe); + return (xfs_lsn_t)-1; + } +@@ -337,6 +329,25 @@ static const struct xfs_item_ops xfs_qm_ + }; + + /* ++ * Delete the quotaoff intent from the AIL and free it. On success, ++ * this should only be called for the start item. It can be used for ++ * either on shutdown or abort. ++ */ ++void ++xfs_qm_qoff_logitem_relse( ++ struct xfs_qoff_logitem *qoff) ++{ ++ struct xfs_log_item *lip = &qoff->qql_item; ++ ++ ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags) || ++ test_bit(XFS_LI_ABORTED, &lip->li_flags) || ++ XFS_FORCED_SHUTDOWN(lip->li_mountp)); ++ xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); ++ kmem_free(lip->li_lv_shadow); ++ kmem_free(qoff); ++} ++ ++/* + * Allocate and initialize an quotaoff item of the correct quota type(s). + */ + struct xfs_qoff_logitem * +--- a/fs/xfs/xfs_dquot_item.h ++++ b/fs/xfs/xfs_dquot_item.h +@@ -28,6 +28,7 @@ void xfs_qm_dquot_logitem_init(struct xf + struct xfs_qoff_logitem *xfs_qm_qoff_logitem_init(struct xfs_mount *mp, + struct xfs_qoff_logitem *start, + uint flags); ++void xfs_qm_qoff_logitem_relse(struct xfs_qoff_logitem *); + struct xfs_qoff_logitem *xfs_trans_get_qoff_item(struct xfs_trans *tp, + struct xfs_qoff_logitem *startqoff, + uint flags); diff --git a/queue-5.4/xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch b/queue-5.4/xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch new file mode 100644 index 00000000000..009582b76f6 --- /dev/null +++ b/queue-5.4/xfs-fix-buffer-corruption-reporting-when-xfs_dir3_free_header_check-fails.patch @@ -0,0 +1,51 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:23 +0530 +Subject: xfs: fix buffer corruption reporting when xfs_dir3_free_header_check fails +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-7-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit ce99494c9699df58b31d0a839e957f86cd58c755 upstream. + +xfs_verifier_error is supposed to be called on a corrupt metadata buffer +from within a buffer verifier function, whereas xfs_buf_mark_corrupt +is the function to be called when a piece of code has read a buffer and +catches something that a read verifier cannot. The first function sets +b_error anticipating that the low level buffer handling code will see +the nonzero b_error and clear XBF_DONE on the buffer, whereas the second +function does not. + +Since xfs_dir3_free_header_check examines fields in the dir free block +header that require more context than can be provided to read verifiers, +we must call xfs_buf_mark_corrupt when it finds a problem. + +Switching the calls has a secondary effect that we no longer corrupt the +buffer state by setting b_error and leaving XBF_DONE set. When /that/ +happens, we'll trip over various state assertions (most commonly the +b_error check in xfs_buf_reverify) on a subsequent attempt to read the +buffer. + +Fixes: bc1a09b8e334bf5f ("xfs: refactor verifier callers to print address of failing check") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_dir2_node.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/xfs/libxfs/xfs_dir2_node.c ++++ b/fs/xfs/libxfs/xfs_dir2_node.c +@@ -208,7 +208,7 @@ __xfs_dir3_free_read( + /* Check things that we can't do in the verifier. */ + fa = xfs_dir3_free_header_check(dp, fbno, *bpp); + if (fa) { +- xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); ++ __xfs_buf_mark_corrupt(*bpp, fa); + xfs_trans_brelse(tp, *bpp); + *bpp = NULL; + return -EFSCORRUPTED; diff --git a/queue-5.4/xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch b/queue-5.4/xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch new file mode 100644 index 00000000000..ff016349c44 --- /dev/null +++ b/queue-5.4/xfs-fix-unmount-hang-and-memory-leak-on-shutdown-during-quotaoff.patch @@ -0,0 +1,143 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:32 +0530 +Subject: xfs: fix unmount hang and memory leak on shutdown during quotaoff +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-16-chandan.babu@oracle.com> + +From: Brian Foster + +commit 8a62714313391b9b2297d67c341b35edbf46c279 upstream. + +AIL removal of the quotaoff start intent and free of both quotaoff +intents is currently limited to the ->iop_committed() handler of the +end intent. This executes when the end intent is committed to the +on-disk log and marks the completion of the operation. The problem +with this is it assumes the success of the operation. If a shutdown +or other error occurs during the quotaoff, it's possible for the +quotaoff task to exit without removing the start intent from the +AIL. This results in an unmount hang as the AIL cannot be emptied. +Further, no other codepath frees the intents and so this is also a +memory leak vector. + +First, update the high level quotaoff error path to directly remove +and free the quotaoff start intent if it still exists in the AIL at +the time of the error. Next, update both of the start and end +quotaoff intents with an ->iop_release() callback to properly handle +transaction abort. + +This means that If the quotaoff start transaction aborts, it frees +the start intent in the transaction commit path. If the filesystem +shuts down before the end transaction allocates, the quotaoff +sequence removes and frees the start intent. If the end transaction +aborts, it removes the start intent and frees both. This ensures +that a shutdown does not result in a hung unmount and that memory is +not leaked regardless of when a quotaoff error occurs. + +Signed-off-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_dquot_item.c | 15 +++++++++++++++ + fs/xfs/xfs_qm_syscalls.c | 13 +++++++------ + 2 files changed, 22 insertions(+), 6 deletions(-) + +--- a/fs/xfs/xfs_dquot_item.c ++++ b/fs/xfs/xfs_dquot_item.c +@@ -315,17 +315,32 @@ xfs_qm_qoffend_logitem_committed( + return (xfs_lsn_t)-1; + } + ++STATIC void ++xfs_qm_qoff_logitem_release( ++ struct xfs_log_item *lip) ++{ ++ struct xfs_qoff_logitem *qoff = QOFF_ITEM(lip); ++ ++ if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { ++ if (qoff->qql_start_lip) ++ xfs_qm_qoff_logitem_relse(qoff->qql_start_lip); ++ xfs_qm_qoff_logitem_relse(qoff); ++ } ++} ++ + static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_committed = xfs_qm_qoffend_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, ++ .iop_release = xfs_qm_qoff_logitem_release, + }; + + static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_push = xfs_qm_qoff_logitem_push, ++ .iop_release = xfs_qm_qoff_logitem_release, + }; + + /* +--- a/fs/xfs/xfs_qm_syscalls.c ++++ b/fs/xfs/xfs_qm_syscalls.c +@@ -29,8 +29,6 @@ xfs_qm_log_quotaoff( + int error; + struct xfs_qoff_logitem *qoffi; + +- *qoffstartp = NULL; +- + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp); + if (error) + goto out; +@@ -62,7 +60,7 @@ out: + STATIC int + xfs_qm_log_quotaoff_end( + struct xfs_mount *mp, +- struct xfs_qoff_logitem *startqoff, ++ struct xfs_qoff_logitem **startqoff, + uint flags) + { + struct xfs_trans *tp; +@@ -73,9 +71,10 @@ xfs_qm_log_quotaoff_end( + if (error) + return error; + +- qoffi = xfs_trans_get_qoff_item(tp, startqoff, ++ qoffi = xfs_trans_get_qoff_item(tp, *startqoff, + flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); ++ *startqoff = NULL; + + /* + * We have to make sure that the transaction is secure on disk before we +@@ -103,7 +102,7 @@ xfs_qm_scall_quotaoff( + uint dqtype; + int error; + uint inactivate_flags; +- struct xfs_qoff_logitem *qoffstart; ++ struct xfs_qoff_logitem *qoffstart = NULL; + + /* + * No file system can have quotas enabled on disk but not in core. +@@ -228,7 +227,7 @@ xfs_qm_scall_quotaoff( + * So, we have QUOTAOFF start and end logitems; the start + * logitem won't get overwritten until the end logitem appears... + */ +- error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); ++ error = xfs_qm_log_quotaoff_end(mp, &qoffstart, flags); + if (error) { + /* We're screwed now. Shutdown is the only option. */ + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); +@@ -261,6 +260,8 @@ xfs_qm_scall_quotaoff( + } + + out_unlock: ++ if (error && qoffstart) ++ xfs_qm_qoff_logitem_relse(qoffstart); + mutex_unlock(&q->qi_quotaofflock); + return error; + } diff --git a/queue-5.4/xfs-fix-use-after-free-on-cil-context-on-shutdown.patch b/queue-5.4/xfs-fix-use-after-free-on-cil-context-on-shutdown.patch new file mode 100644 index 00000000000..85aecc7ea21 --- /dev/null +++ b/queue-5.4/xfs-fix-use-after-free-on-cil-context-on-shutdown.patch @@ -0,0 +1,122 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:43 +0530 +Subject: xfs: fix use-after-free on CIL context on shutdown +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-27-chandan.babu@oracle.com> + +From: Dave Chinner + +commit c7f87f3984cfa1e6d32806a715f35c5947ad9c09 upstream. + +xlog_wait() on the CIL context can reference a freed context if the +waiter doesn't get scheduled before the CIL context is freed. This +can happen when a task is on the hard throttle and the CIL push +aborts due to a shutdown. This was detected by generic/019: + +thread 1 thread 2 + +__xfs_trans_commit + xfs_log_commit_cil + + xlog_wait + schedule + xlog_cil_push_work + wake_up_all + + xlog_cil_committed + kmem_free + + remove_wait_queue + spin_lock_irqsave --> UAF + +Fix it by moving the wait queue to the CIL rather than keeping it in +in the CIL context that gets freed on push completion. Because the +wait queue is now independent of the CIL context and we might have +multiple contexts in flight at once, only wake the waiters on the +push throttle when the context we are pushing is over the hard +throttle size threshold. + +Fixes: 0e7ab7efe7745 ("xfs: Throttle commits on delayed background CIL push") +Reported-by: Yu Kuai +Signed-off-by: Dave Chinner +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_cil.c | 10 +++++----- + fs/xfs/xfs_log_priv.h | 2 +- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/fs/xfs/xfs_log_cil.c ++++ b/fs/xfs/xfs_log_cil.c +@@ -673,7 +673,8 @@ xlog_cil_push( + /* + * Wake up any background push waiters now this context is being pushed. + */ +- wake_up_all(&ctx->push_wait); ++ if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) ++ wake_up_all(&cil->xc_push_wait); + + /* + * Check if we've anything to push. If there is nothing, then we don't +@@ -745,13 +746,12 @@ xlog_cil_push( + + /* + * initialise the new context and attach it to the CIL. Then attach +- * the current context to the CIL committing lsit so it can be found ++ * the current context to the CIL committing list so it can be found + * during log forces to extract the commit lsn of the sequence that + * needs to be forced. + */ + INIT_LIST_HEAD(&new_ctx->committing); + INIT_LIST_HEAD(&new_ctx->busy_extents); +- init_waitqueue_head(&new_ctx->push_wait); + new_ctx->sequence = ctx->sequence + 1; + new_ctx->cil = cil; + cil->xc_ctx = new_ctx; +@@ -946,7 +946,7 @@ xlog_cil_push_background( + if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) { + trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); + ASSERT(cil->xc_ctx->space_used < log->l_logsize); +- xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock); ++ xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); + return; + } + +@@ -1222,12 +1222,12 @@ xlog_cil_init( + INIT_LIST_HEAD(&cil->xc_committing); + spin_lock_init(&cil->xc_cil_lock); + spin_lock_init(&cil->xc_push_lock); ++ init_waitqueue_head(&cil->xc_push_wait); + init_rwsem(&cil->xc_ctx_lock); + init_waitqueue_head(&cil->xc_commit_wait); + + INIT_LIST_HEAD(&ctx->committing); + INIT_LIST_HEAD(&ctx->busy_extents); +- init_waitqueue_head(&ctx->push_wait); + ctx->sequence = 1; + ctx->cil = cil; + cil->xc_ctx = ctx; +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -247,7 +247,6 @@ struct xfs_cil_ctx { + struct xfs_log_vec *lv_chain; /* logvecs being pushed */ + struct list_head iclog_entry; + struct list_head committing; /* ctx committing list */ +- wait_queue_head_t push_wait; /* background push throttle */ + struct work_struct discard_endio_work; + }; + +@@ -281,6 +280,7 @@ struct xfs_cil { + wait_queue_head_t xc_commit_wait; + xfs_lsn_t xc_current_sequence; + struct work_struct xc_push_work; ++ wait_queue_head_t xc_push_wait; /* background push throttle */ + } ____cacheline_aligned_in_smp; + + /* diff --git a/queue-5.4/xfs-lower-cil-flush-limit-for-large-logs.patch b/queue-5.4/xfs-lower-cil-flush-limit-for-large-logs.patch new file mode 100644 index 00000000000..aff62334c19 --- /dev/null +++ b/queue-5.4/xfs-lower-cil-flush-limit-for-large-logs.patch @@ -0,0 +1,100 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:34 +0530 +Subject: xfs: Lower CIL flush limit for large logs +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-18-chandan.babu@oracle.com> + +From: Dave Chinner + +commit 108a42358a05312b2128533c6462a3fdeb410bdf upstream. + +The current CIL size aggregation limit is 1/8th the log size. This +means for large logs we might be aggregating at least 250MB of dirty objects +in memory before the CIL is flushed to the journal. With CIL shadow +buffers sitting around, this means the CIL is often consuming >500MB +of temporary memory that is all allocated under GFP_NOFS conditions. + +Flushing the CIL can take some time to do if there is other IO +ongoing, and can introduce substantial log force latency by itself. +It also pins the memory until the objects are in the AIL and can be +written back and reclaimed by shrinkers. Hence this threshold also +tends to determine the minimum amount of memory XFS can operate in +under heavy modification without triggering the OOM killer. + +Modify the CIL space limit to prevent such huge amounts of pinned +metadata from aggregating. We can have 2MB of log IO in flight at +once, so limit aggregation to 16x this size. This threshold was +chosen as it little impact on performance (on 16-way fsmark) or log +traffic but pins a lot less memory on large logs especially under +heavy memory pressure. An aggregation limit of 8x had 5-10% +performance degradation and a 50% increase in log throughput for +the same workload, so clearly that was too small for highly +concurrent workloads on large logs. + +This was found via trace analysis of AIL behaviour. e.g. insertion +from a single CIL flush: + +xfs_ail_insert: old lsn 0/0 new lsn 1/3033090 type XFS_LI_INODE flags IN_AIL + +$ grep xfs_ail_insert /mnt/scratch/s.t |grep "new lsn 1/3033090" |wc -l +1721823 +$ + +So there were 1.7 million objects inserted into the AIL from this +CIL checkpoint, the first at 2323.392108, the last at 2325.667566 which +was the end of the trace (i.e. it hadn't finished). Clearly a major +problem. + +Signed-off-by: Dave Chinner +Reviewed-by: Brian Foster +Reviewed-by: Allison Collins +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_priv.h | 29 +++++++++++++++++++++++------ + 1 file changed, 23 insertions(+), 6 deletions(-) + +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -323,13 +323,30 @@ struct xfs_cil { + * tries to keep 25% of the log free, so we need to keep below that limit or we + * risk running out of free log space to start any new transactions. + * +- * In order to keep background CIL push efficient, we will set a lower +- * threshold at which background pushing is attempted without blocking current +- * transaction commits. A separate, higher bound defines when CIL pushes are +- * enforced to ensure we stay within our maximum checkpoint size bounds. +- * threshold, yet give us plenty of space for aggregation on large logs. ++ * In order to keep background CIL push efficient, we only need to ensure the ++ * CIL is large enough to maintain sufficient in-memory relogging to avoid ++ * repeated physical writes of frequently modified metadata. If we allow the CIL ++ * to grow to a substantial fraction of the log, then we may be pinning hundreds ++ * of megabytes of metadata in memory until the CIL flushes. This can cause ++ * issues when we are running low on memory - pinned memory cannot be reclaimed, ++ * and the CIL consumes a lot of memory. Hence we need to set an upper physical ++ * size limit for the CIL that limits the maximum amount of memory pinned by the ++ * CIL but does not limit performance by reducing relogging efficiency ++ * significantly. ++ * ++ * As such, the CIL push threshold ends up being the smaller of two thresholds: ++ * - a threshold large enough that it allows CIL to be pushed and progress to be ++ * made without excessive blocking of incoming transaction commits. This is ++ * defined to be 12.5% of the log space - half the 25% push threshold of the ++ * AIL. ++ * - small enough that it doesn't pin excessive amounts of memory but maintains ++ * close to peak relogging efficiency. This is defined to be 16x the iclog ++ * buffer window (32MB) as measurements have shown this to be roughly the ++ * point of diminishing performance increases under highly concurrent ++ * modification workloads. + */ +-#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) ++#define XLOG_CIL_SPACE_LIMIT(log) \ ++ min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4) + + /* + * ticket grant locks, queues and accounting have their own cachlines diff --git a/queue-5.4/xfs-move-inode-flush-to-the-sync-workqueue.patch b/queue-5.4/xfs-move-inode-flush-to-the-sync-workqueue.patch new file mode 100644 index 00000000000..1f2d70868e3 --- /dev/null +++ b/queue-5.4/xfs-move-inode-flush-to-the-sync-workqueue.patch @@ -0,0 +1,104 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:42 +0530 +Subject: xfs: move inode flush to the sync workqueue +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-26-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit f0f7a674d4df1510d8ca050a669e1420cf7d7fab upstream. + +[ Modify fs/xfs/xfs_super.c to include the changes at locations suitable for + 5.4-lts kernel ] + +Move the inode dirty data flushing to a workqueue so that multiple +threads can take advantage of a single thread's flushing work. The +ratelimiting technique used in bdd4ee4 was not successful, because +threads that skipped the inode flush scan due to ratelimiting would +ENOSPC early, which caused occasional (but noticeable) changes in +behavior and sporadic fstest regressions. + +Therefore, make all the writer threads wait on a single inode flush, +which eliminates both the stampeding hordes of flushers and the small +window in which a write could fail with ENOSPC because it lost the +ratelimit race after even another thread freed space. + +Fixes: c6425702f21e ("xfs: ratelimit inode flush on buffered write ENOSPC") +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_mount.h | 5 +++++ + fs/xfs/xfs_super.c | 28 +++++++++++++++++++++++----- + 2 files changed, 28 insertions(+), 5 deletions(-) + +--- a/fs/xfs/xfs_mount.h ++++ b/fs/xfs/xfs_mount.h +@@ -179,6 +179,11 @@ typedef struct xfs_mount { + struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; + struct xstats m_stats; /* per-fs stats */ + ++ /* ++ * Workqueue item so that we can coalesce multiple inode flush attempts ++ * into a single flush. ++ */ ++ struct work_struct m_flush_inodes_work; + struct workqueue_struct *m_buf_workqueue; + struct workqueue_struct *m_unwritten_workqueue; + struct workqueue_struct *m_cil_workqueue; +--- a/fs/xfs/xfs_super.c ++++ b/fs/xfs/xfs_super.c +@@ -840,6 +840,20 @@ xfs_destroy_mount_workqueues( + destroy_workqueue(mp->m_buf_workqueue); + } + ++static void ++xfs_flush_inodes_worker( ++ struct work_struct *work) ++{ ++ struct xfs_mount *mp = container_of(work, struct xfs_mount, ++ m_flush_inodes_work); ++ struct super_block *sb = mp->m_super; ++ ++ if (down_read_trylock(&sb->s_umount)) { ++ sync_inodes_sb(sb); ++ up_read(&sb->s_umount); ++ } ++} ++ + /* + * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK + * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting +@@ -850,12 +864,15 @@ void + xfs_flush_inodes( + struct xfs_mount *mp) + { +- struct super_block *sb = mp->m_super; ++ /* ++ * If flush_work() returns true then that means we waited for a flush ++ * which was already in progress. Don't bother running another scan. ++ */ ++ if (flush_work(&mp->m_flush_inodes_work)) ++ return; + +- if (down_read_trylock(&sb->s_umount)) { +- sync_inodes_sb(sb); +- up_read(&sb->s_umount); +- } ++ queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); ++ flush_work(&mp->m_flush_inodes_work); + } + + /* Catch misguided souls that try to use this interface on XFS */ +@@ -1532,6 +1549,7 @@ xfs_mount_alloc( + spin_lock_init(&mp->m_perag_lock); + mutex_init(&mp->m_growlock); + atomic_set(&mp->m_active_trans, 0); ++ INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); + INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); diff --git a/queue-5.4/xfs-open-code-insert-range-extent-split-helper.patch b/queue-5.4/xfs-open-code-insert-range-extent-split-helper.patch new file mode 100644 index 00000000000..b3de7317e0a --- /dev/null +++ b/queue-5.4/xfs-open-code-insert-range-extent-split-helper.patch @@ -0,0 +1,116 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:18 +0530 +Subject: xfs: open code insert range extent split helper +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-2-chandan.babu@oracle.com> + +From: Brian Foster + +commit b73df17e4c5ba977205253fb7ef54267717a3cba upstream. + +The insert range operation currently splits the extent at the target +offset in a separate transaction and lock cycle from the one that +shifts extents. In preparation for reworking insert range into an +atomic operation, lift the code into the caller so it can be easily +condensed to a single rolling transaction and lock cycle and +eliminate the helper. No functional changes. + +Signed-off-by: Brian Foster +Reviewed-by: Allison Collins +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_bmap.c | 32 ++------------------------------ + fs/xfs/libxfs/xfs_bmap.h | 3 ++- + fs/xfs/xfs_bmap_util.c | 14 +++++++++++++- + 3 files changed, 17 insertions(+), 32 deletions(-) + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -5925,8 +5925,8 @@ del_cursor: + * @split_fsb is a block where the extents is split. If split_fsb lies in a + * hole or the first block of extents, just return 0. + */ +-STATIC int +-xfs_bmap_split_extent_at( ++int ++xfs_bmap_split_extent( + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_fileoff_t split_fsb) +@@ -6037,34 +6037,6 @@ del_cursor: + return error; + } + +-int +-xfs_bmap_split_extent( +- struct xfs_inode *ip, +- xfs_fileoff_t split_fsb) +-{ +- struct xfs_mount *mp = ip->i_mount; +- struct xfs_trans *tp; +- int error; +- +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, +- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); +- if (error) +- return error; +- +- xfs_ilock(ip, XFS_ILOCK_EXCL); +- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +- +- error = xfs_bmap_split_extent_at(tp, ip, split_fsb); +- if (error) +- goto out; +- +- return xfs_trans_commit(tp); +- +-out: +- xfs_trans_cancel(tp); +- return error; +-} +- + /* Deferred mapping is only for real extents in the data fork. */ + static bool + xfs_bmap_is_update_needed( +--- a/fs/xfs/libxfs/xfs_bmap.h ++++ b/fs/xfs/libxfs/xfs_bmap.h +@@ -222,7 +222,8 @@ int xfs_bmap_can_insert_extents(struct x + int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, + bool *done, xfs_fileoff_t stop_fsb); +-int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); ++int xfs_bmap_split_extent(struct xfs_trans *tp, struct xfs_inode *ip, ++ xfs_fileoff_t split_offset); + int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork, + xfs_fileoff_t off, xfs_filblks_t len, xfs_filblks_t prealloc, + struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur, +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -1326,7 +1326,19 @@ xfs_insert_file_space( + * is not the starting block of extent, we need to split the extent at + * stop_fsb. + */ +- error = xfs_bmap_split_extent(ip, stop_fsb); ++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, ++ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); ++ if (error) ++ return error; ++ ++ xfs_ilock(ip, XFS_ILOCK_EXCL); ++ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ++ ++ error = xfs_bmap_split_extent(tp, ip, stop_fsb); ++ if (error) ++ goto out_trans_cancel; ++ ++ error = xfs_trans_commit(tp); + if (error) + return error; + diff --git a/queue-5.4/xfs-preserve-default-grace-interval-during-quotacheck.patch b/queue-5.4/xfs-preserve-default-grace-interval-during-quotacheck.patch new file mode 100644 index 00000000000..8798d5a706a --- /dev/null +++ b/queue-5.4/xfs-preserve-default-grace-interval-during-quotacheck.patch @@ -0,0 +1,61 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:33 +0530 +Subject: xfs: preserve default grace interval during quotacheck +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-17-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 5885539f0af371024d07afd14974bfdc3fff84c5 upstream. + +When quotacheck runs, it zeroes all the timer fields in every dquot. +Unfortunately, it also does this to the root dquot, which erases any +preconfigured grace intervals and warning limits that the administrator +may have set. Worse yet, the incore copies of those variables remain +set. This cache coherence problem manifests itself as the grace +interval mysteriously being reset back to the defaults at the /next/ +mount. + +Fix it by not resetting the root disk dquot's timer and warning fields. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_qm.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/fs/xfs/xfs_qm.c ++++ b/fs/xfs/xfs_qm.c +@@ -875,12 +875,20 @@ xfs_qm_reset_dqcounts( + ddq->d_bcount = 0; + ddq->d_icount = 0; + ddq->d_rtbcount = 0; +- ddq->d_btimer = 0; +- ddq->d_itimer = 0; +- ddq->d_rtbtimer = 0; +- ddq->d_bwarns = 0; +- ddq->d_iwarns = 0; +- ddq->d_rtbwarns = 0; ++ ++ /* ++ * dquot id 0 stores the default grace period and the maximum ++ * warning limit that were set by the administrator, so we ++ * should not reset them. ++ */ ++ if (ddq->d_id != 0) { ++ ddq->d_btimer = 0; ++ ddq->d_itimer = 0; ++ ddq->d_rtbtimer = 0; ++ ddq->d_bwarns = 0; ++ ddq->d_iwarns = 0; ++ ddq->d_rtbwarns = 0; ++ } + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + xfs_update_cksum((char *)&dqb[j], diff --git a/queue-5.4/xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch b/queue-5.4/xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch new file mode 100644 index 00000000000..f602de3f950 --- /dev/null +++ b/queue-5.4/xfs-reflink-should-force-the-log-out-if-mounted-with-wsync.patch @@ -0,0 +1,47 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:41 +0530 +Subject: xfs: reflink should force the log out if mounted with wsync +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-25-chandan.babu@oracle.com> + +From: Christoph Hellwig + +commit 5833112df7e9a306af9af09c60127b92ed723962 upstream. + +Reflink should force the log out to disk if the filesystem was mounted +with wsync, the same as most other operations in xfs. + +[Note: XFS_MOUNT_WSYNC is set when the admin mounts the filesystem +with either the 'wsync' or 'sync' mount options, which effectively means +that we're classifying reflink/dedupe as IO operations and making them +synchronous when required.] + +Fixes: 3fc9f5e409319 ("xfs: remove xfs_reflink_remap_range") +Signed-off-by: Christoph Hellwig +Reviewed-by: Brian Foster +[darrick: add more to the changelog] +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_file.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -1044,7 +1044,11 @@ xfs_file_remap_range( + + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, + remap_flags); ++ if (ret) ++ goto out_unlock; + ++ if (mp->m_flags & XFS_MOUNT_WSYNC) ++ xfs_log_force_inode(dest); + out_unlock: + xfs_reflink_remap_unlock(file_in, file_out); + if (ret) diff --git a/queue-5.4/xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch b/queue-5.4/xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch new file mode 100644 index 00000000000..153d5412c25 --- /dev/null +++ b/queue-5.4/xfs-remove-the-xfs_disk_dquot_t-and-xfs_dquot_t.patch @@ -0,0 +1,489 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:27 +0530 +Subject: xfs: remove the xfs_disk_dquot_t and xfs_dquot_t +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-11-chandan.babu@oracle.com> + +From: Pavel Reichl + +commit aefe69a45d84901c702f87672ec1e93de1d03f73 upstream. + +Signed-off-by: Pavel Reichl +Reviewed-by: Darrick J. Wong +[darrick: fix some of the comments] +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_dquot_buf.c | 8 +-- + fs/xfs/libxfs/xfs_format.h | 10 ++-- + fs/xfs/libxfs/xfs_trans_resv.c | 2 + fs/xfs/xfs_dquot.c | 18 +++---- + fs/xfs/xfs_dquot.h | 98 ++++++++++++++++++++--------------------- + fs/xfs/xfs_log_recover.c | 5 +- + fs/xfs/xfs_qm.c | 30 ++++++------ + fs/xfs/xfs_qm_bhv.c | 6 +- + fs/xfs/xfs_trans_dquot.c | 44 +++++++++--------- + 9 files changed, 112 insertions(+), 109 deletions(-) + +--- a/fs/xfs/libxfs/xfs_dquot_buf.c ++++ b/fs/xfs/libxfs/xfs_dquot_buf.c +@@ -35,10 +35,10 @@ xfs_calc_dquots_per_chunk( + + xfs_failaddr_t + xfs_dquot_verify( +- struct xfs_mount *mp, +- xfs_disk_dquot_t *ddq, +- xfs_dqid_t id, +- uint type) /* used only during quotacheck */ ++ struct xfs_mount *mp, ++ struct xfs_disk_dquot *ddq, ++ xfs_dqid_t id, ++ uint type) /* used only during quotacheck */ + { + /* + * We can encounter an uninitialized dquot buffer for 2 reasons: +--- a/fs/xfs/libxfs/xfs_format.h ++++ b/fs/xfs/libxfs/xfs_format.h +@@ -1144,11 +1144,11 @@ static inline void xfs_dinode_put_rdev(s + + /* + * This is the main portion of the on-disk representation of quota +- * information for a user. This is the q_core of the xfs_dquot_t that ++ * information for a user. This is the q_core of the struct xfs_dquot that + * is kept in kernel memory. We pad this with some more expansion room + * to construct the on disk structure. + */ +-typedef struct xfs_disk_dquot { ++struct xfs_disk_dquot { + __be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */ + __u8 d_version; /* dquot version */ + __u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */ +@@ -1171,15 +1171,15 @@ typedef struct xfs_disk_dquot { + __be32 d_rtbtimer; /* similar to above; for RT disk blocks */ + __be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */ + __be16 d_pad; +-} xfs_disk_dquot_t; ++}; + + /* + * This is what goes on disk. This is separated from the xfs_disk_dquot because + * carrying the unnecessary padding would be a waste of memory. + */ + typedef struct xfs_dqblk { +- xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */ +- char dd_fill[4]; /* filling for posterity */ ++ struct xfs_disk_dquot dd_diskdq; /* portion living incore as well */ ++ char dd_fill[4];/* filling for posterity */ + + /* + * These two are only present on filesystems with the CRC bits set. +--- a/fs/xfs/libxfs/xfs_trans_resv.c ++++ b/fs/xfs/libxfs/xfs_trans_resv.c +@@ -776,7 +776,7 @@ xfs_calc_clear_agi_bucket_reservation( + + /* + * Adjusting quota limits. +- * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) ++ * the disk quota buffer: sizeof(struct xfs_disk_dquot) + */ + STATIC uint + xfs_calc_qm_setqlim_reservation(void) +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -48,7 +48,7 @@ static struct lock_class_key xfs_dquot_p + */ + void + xfs_qm_dqdestroy( +- xfs_dquot_t *dqp) ++ struct xfs_dquot *dqp) + { + ASSERT(list_empty(&dqp->q_lru)); + +@@ -113,8 +113,8 @@ xfs_qm_adjust_dqlimits( + */ + void + xfs_qm_adjust_dqtimers( +- xfs_mount_t *mp, +- xfs_disk_dquot_t *d) ++ struct xfs_mount *mp, ++ struct xfs_disk_dquot *d) + { + ASSERT(d->d_id); + +@@ -497,7 +497,7 @@ xfs_dquot_from_disk( + struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; + + /* copy everything from disk dquot to the incore dquot */ +- memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); ++ memcpy(&dqp->q_core, ddqp, sizeof(struct xfs_disk_dquot)); + + /* + * Reservation counters are defined as reservation plus current usage +@@ -989,7 +989,7 @@ xfs_qm_dqput( + */ + void + xfs_qm_dqrele( +- xfs_dquot_t *dqp) ++ struct xfs_dquot *dqp) + { + if (!dqp) + return; +@@ -1019,7 +1019,7 @@ xfs_qm_dqflush_done( + struct xfs_log_item *lip) + { + xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; +- xfs_dquot_t *dqp = qip->qli_dquot; ++ struct xfs_dquot *dqp = qip->qli_dquot; + struct xfs_ail *ailp = lip->li_ailp; + + /* +@@ -1129,7 +1129,7 @@ xfs_qm_dqflush( + } + + /* This is the only portion of data that needs to persist */ +- memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); ++ memcpy(ddqp, &dqp->q_core, sizeof(struct xfs_disk_dquot)); + + /* + * Clear the dirty field and remember the flush lsn for later use. +@@ -1187,8 +1187,8 @@ out_unlock: + */ + void + xfs_dqlock2( +- xfs_dquot_t *d1, +- xfs_dquot_t *d2) ++ struct xfs_dquot *d1, ++ struct xfs_dquot *d2) + { + if (d1 && d2) { + ASSERT(d1 != d2); +--- a/fs/xfs/xfs_dquot.h ++++ b/fs/xfs/xfs_dquot.h +@@ -30,33 +30,36 @@ enum { + /* + * The incore dquot structure + */ +-typedef struct xfs_dquot { +- uint dq_flags; /* various flags (XFS_DQ_*) */ +- struct list_head q_lru; /* global free list of dquots */ +- struct xfs_mount*q_mount; /* filesystem this relates to */ +- uint q_nrefs; /* # active refs from inodes */ +- xfs_daddr_t q_blkno; /* blkno of dquot buffer */ +- int q_bufoffset; /* off of dq in buffer (# dquots) */ +- xfs_fileoff_t q_fileoffset; /* offset in quotas file */ +- +- xfs_disk_dquot_t q_core; /* actual usage & quotas */ +- xfs_dq_logitem_t q_logitem; /* dquot log item */ +- xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ +- xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ +- xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ +- xfs_qcnt_t q_prealloc_lo_wmark;/* prealloc throttle wmark */ +- xfs_qcnt_t q_prealloc_hi_wmark;/* prealloc disabled wmark */ +- int64_t q_low_space[XFS_QLOWSP_MAX]; +- struct mutex q_qlock; /* quota lock */ +- struct completion q_flush; /* flush completion queue */ +- atomic_t q_pincount; /* dquot pin count */ +- wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ +-} xfs_dquot_t; ++struct xfs_dquot { ++ uint dq_flags; ++ struct list_head q_lru; ++ struct xfs_mount *q_mount; ++ uint q_nrefs; ++ xfs_daddr_t q_blkno; ++ int q_bufoffset; ++ xfs_fileoff_t q_fileoffset; ++ ++ struct xfs_disk_dquot q_core; ++ xfs_dq_logitem_t q_logitem; ++ /* total regular nblks used+reserved */ ++ xfs_qcnt_t q_res_bcount; ++ /* total inos allocd+reserved */ ++ xfs_qcnt_t q_res_icount; ++ /* total realtime blks used+reserved */ ++ xfs_qcnt_t q_res_rtbcount; ++ xfs_qcnt_t q_prealloc_lo_wmark; ++ xfs_qcnt_t q_prealloc_hi_wmark; ++ int64_t q_low_space[XFS_QLOWSP_MAX]; ++ struct mutex q_qlock; ++ struct completion q_flush; ++ atomic_t q_pincount; ++ struct wait_queue_head q_pinwait; ++}; + + /* + * Lock hierarchy for q_qlock: + * XFS_QLOCK_NORMAL is the implicit default, +- * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 ++ * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 + */ + enum { + XFS_QLOCK_NORMAL = 0, +@@ -64,21 +67,21 @@ enum { + }; + + /* +- * Manage the q_flush completion queue embedded in the dquot. This completion ++ * Manage the q_flush completion queue embedded in the dquot. This completion + * queue synchronizes processes attempting to flush the in-core dquot back to + * disk. + */ +-static inline void xfs_dqflock(xfs_dquot_t *dqp) ++static inline void xfs_dqflock(struct xfs_dquot *dqp) + { + wait_for_completion(&dqp->q_flush); + } + +-static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp) ++static inline bool xfs_dqflock_nowait(struct xfs_dquot *dqp) + { + return try_wait_for_completion(&dqp->q_flush); + } + +-static inline void xfs_dqfunlock(xfs_dquot_t *dqp) ++static inline void xfs_dqfunlock(struct xfs_dquot *dqp) + { + complete(&dqp->q_flush); + } +@@ -112,7 +115,7 @@ static inline int xfs_this_quota_on(stru + } + } + +-static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) ++static inline struct xfs_dquot *xfs_inode_dquot(struct xfs_inode *ip, int type) + { + switch (type & XFS_DQ_ALLTYPES) { + case XFS_DQ_USER: +@@ -147,31 +150,30 @@ static inline bool xfs_dquot_lowsp(struc + #define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) + #define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) + +-extern void xfs_qm_dqdestroy(xfs_dquot_t *); +-extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **); +-extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); +-extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, +- xfs_disk_dquot_t *); +-extern void xfs_qm_adjust_dqlimits(struct xfs_mount *, +- struct xfs_dquot *); +-extern xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip, +- uint type); +-extern int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id, ++void xfs_qm_dqdestroy(struct xfs_dquot *dqp); ++int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp); ++void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp); ++void xfs_qm_adjust_dqtimers(struct xfs_mount *mp, ++ struct xfs_disk_dquot *d); ++void xfs_qm_adjust_dqlimits(struct xfs_mount *mp, ++ struct xfs_dquot *d); ++xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip, uint type); ++int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id, + uint type, bool can_alloc, + struct xfs_dquot **dqpp); +-extern int xfs_qm_dqget_inode(struct xfs_inode *ip, uint type, +- bool can_alloc, +- struct xfs_dquot **dqpp); +-extern int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id, ++int xfs_qm_dqget_inode(struct xfs_inode *ip, uint type, ++ bool can_alloc, ++ struct xfs_dquot **dqpp); ++int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id, + uint type, struct xfs_dquot **dqpp); +-extern int xfs_qm_dqget_uncached(struct xfs_mount *mp, +- xfs_dqid_t id, uint type, +- struct xfs_dquot **dqpp); +-extern void xfs_qm_dqput(xfs_dquot_t *); ++int xfs_qm_dqget_uncached(struct xfs_mount *mp, ++ xfs_dqid_t id, uint type, ++ struct xfs_dquot **dqpp); ++void xfs_qm_dqput(struct xfs_dquot *dqp); + +-extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); ++void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *); + +-extern void xfs_dquot_set_prealloc_limits(struct xfs_dquot *); ++void xfs_dquot_set_prealloc_limits(struct xfs_dquot *); + + static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) + { +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2577,6 +2577,7 @@ xlog_recover_do_reg_buffer( + int bit; + int nbits; + xfs_failaddr_t fa; ++ const size_t size_disk_dquot = sizeof(struct xfs_disk_dquot); + + trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f); + +@@ -2619,7 +2620,7 @@ xlog_recover_do_reg_buffer( + "XFS: NULL dquot in %s.", __func__); + goto next; + } +- if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { ++ if (item->ri_buf[i].i_len < size_disk_dquot) { + xfs_alert(mp, + "XFS: dquot too small (%d) in %s.", + item->ri_buf[i].i_len, __func__); +@@ -3250,7 +3251,7 @@ xlog_recover_dquot_pass2( + xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); + return -EFSCORRUPTED; + } +- if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { ++ if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) { + xfs_alert(log->l_mp, "dquot too small (%d) in %s.", + item->ri_buf[1].i_len, __func__); + return -EFSCORRUPTED; +--- a/fs/xfs/xfs_qm.c ++++ b/fs/xfs/xfs_qm.c +@@ -244,14 +244,14 @@ xfs_qm_unmount_quotas( + + STATIC int + xfs_qm_dqattach_one( +- xfs_inode_t *ip, +- xfs_dqid_t id, +- uint type, +- bool doalloc, +- xfs_dquot_t **IO_idqpp) ++ struct xfs_inode *ip, ++ xfs_dqid_t id, ++ uint type, ++ bool doalloc, ++ struct xfs_dquot **IO_idqpp) + { +- xfs_dquot_t *dqp; +- int error; ++ struct xfs_dquot *dqp; ++ int error; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + error = 0; +@@ -544,8 +544,8 @@ xfs_qm_set_defquota( + uint type, + xfs_quotainfo_t *qinf) + { +- xfs_dquot_t *dqp; +- struct xfs_def_quota *defq; ++ struct xfs_dquot *dqp; ++ struct xfs_def_quota *defq; + struct xfs_disk_dquot *ddqp; + int error; + +@@ -1746,14 +1746,14 @@ error_rele: + * Actually transfer ownership, and do dquot modifications. + * These were already reserved. + */ +-xfs_dquot_t * ++struct xfs_dquot * + xfs_qm_vop_chown( +- xfs_trans_t *tp, +- xfs_inode_t *ip, +- xfs_dquot_t **IO_olddq, +- xfs_dquot_t *newdq) ++ struct xfs_trans *tp, ++ struct xfs_inode *ip, ++ struct xfs_dquot **IO_olddq, ++ struct xfs_dquot *newdq) + { +- xfs_dquot_t *prevdq; ++ struct xfs_dquot *prevdq; + uint bfield = XFS_IS_REALTIME_INODE(ip) ? + XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; + +--- a/fs/xfs/xfs_qm_bhv.c ++++ b/fs/xfs/xfs_qm_bhv.c +@@ -54,11 +54,11 @@ xfs_fill_statvfs_from_dquot( + */ + void + xfs_qm_statvfs( +- xfs_inode_t *ip, ++ struct xfs_inode *ip, + struct kstatfs *statp) + { +- xfs_mount_t *mp = ip->i_mount; +- xfs_dquot_t *dqp; ++ struct xfs_mount *mp = ip->i_mount; ++ struct xfs_dquot *dqp; + + if (!xfs_qm_dqget(mp, xfs_get_projid(ip), XFS_DQ_PROJ, false, &dqp)) { + xfs_fill_statvfs_from_dquot(statp, dqp); +--- a/fs/xfs/xfs_trans_dquot.c ++++ b/fs/xfs/xfs_trans_dquot.c +@@ -25,8 +25,8 @@ STATIC void xfs_trans_alloc_dqinfo(xfs_t + */ + void + xfs_trans_dqjoin( +- xfs_trans_t *tp, +- xfs_dquot_t *dqp) ++ struct xfs_trans *tp, ++ struct xfs_dquot *dqp) + { + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(dqp->q_logitem.qli_dquot == dqp); +@@ -49,8 +49,8 @@ xfs_trans_dqjoin( + */ + void + xfs_trans_log_dquot( +- xfs_trans_t *tp, +- xfs_dquot_t *dqp) ++ struct xfs_trans *tp, ++ struct xfs_dquot *dqp) + { + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + +@@ -486,12 +486,12 @@ xfs_trans_apply_dquot_deltas( + */ + void + xfs_trans_unreserve_and_mod_dquots( +- xfs_trans_t *tp) ++ struct xfs_trans *tp) + { + int i, j; +- xfs_dquot_t *dqp; ++ struct xfs_dquot *dqp; + struct xfs_dqtrx *qtrx, *qa; +- bool locked; ++ bool locked; + + if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + return; +@@ -571,21 +571,21 @@ xfs_quota_warn( + */ + STATIC int + xfs_trans_dqresv( +- xfs_trans_t *tp, +- xfs_mount_t *mp, +- xfs_dquot_t *dqp, +- int64_t nblks, +- long ninos, +- uint flags) +-{ +- xfs_qcnt_t hardlimit; +- xfs_qcnt_t softlimit; +- time_t timer; +- xfs_qwarncnt_t warns; +- xfs_qwarncnt_t warnlimit; +- xfs_qcnt_t total_count; +- xfs_qcnt_t *resbcountp; +- xfs_quotainfo_t *q = mp->m_quotainfo; ++ struct xfs_trans *tp, ++ struct xfs_mount *mp, ++ struct xfs_dquot *dqp, ++ int64_t nblks, ++ long ninos, ++ uint flags) ++{ ++ xfs_qcnt_t hardlimit; ++ xfs_qcnt_t softlimit; ++ time_t timer; ++ xfs_qwarncnt_t warns; ++ xfs_qwarncnt_t warnlimit; ++ xfs_qcnt_t total_count; ++ xfs_qcnt_t *resbcountp; ++ xfs_quotainfo_t *q = mp->m_quotainfo; + struct xfs_def_quota *defq; + + diff --git a/queue-5.4/xfs-remove-the-xfs_dq_logitem_t-typedef.patch b/queue-5.4/xfs-remove-the-xfs_dq_logitem_t-typedef.patch new file mode 100644 index 00000000000..bd86591f789 --- /dev/null +++ b/queue-5.4/xfs-remove-the-xfs_dq_logitem_t-typedef.patch @@ -0,0 +1,65 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:28 +0530 +Subject: xfs: remove the xfs_dq_logitem_t typedef +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-12-chandan.babu@oracle.com> + +From: Pavel Reichl + +commit fd8b81dbbb23d4a3508cfac83256b4f5e770941c upstream. + +Signed-off-by: Pavel Reichl +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_dquot.c | 2 +- + fs/xfs/xfs_dquot.h | 2 +- + fs/xfs/xfs_dquot_item.h | 10 +++++----- + 3 files changed, 7 insertions(+), 7 deletions(-) + +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -1018,7 +1018,7 @@ xfs_qm_dqflush_done( + struct xfs_buf *bp, + struct xfs_log_item *lip) + { +- xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; ++ struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip; + struct xfs_dquot *dqp = qip->qli_dquot; + struct xfs_ail *ailp = lip->li_ailp; + +--- a/fs/xfs/xfs_dquot.h ++++ b/fs/xfs/xfs_dquot.h +@@ -40,7 +40,7 @@ struct xfs_dquot { + xfs_fileoff_t q_fileoffset; + + struct xfs_disk_dquot q_core; +- xfs_dq_logitem_t q_logitem; ++ struct xfs_dq_logitem q_logitem; + /* total regular nblks used+reserved */ + xfs_qcnt_t q_res_bcount; + /* total inos allocd+reserved */ +--- a/fs/xfs/xfs_dquot_item.h ++++ b/fs/xfs/xfs_dquot_item.h +@@ -11,11 +11,11 @@ struct xfs_trans; + struct xfs_mount; + struct xfs_qoff_logitem; + +-typedef struct xfs_dq_logitem { +- struct xfs_log_item qli_item; /* common portion */ +- struct xfs_dquot *qli_dquot; /* dquot ptr */ +- xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ +-} xfs_dq_logitem_t; ++struct xfs_dq_logitem { ++ struct xfs_log_item qli_item; /* common portion */ ++ struct xfs_dquot *qli_dquot; /* dquot ptr */ ++ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ ++}; + + typedef struct xfs_qoff_logitem { + struct xfs_log_item qql_item; /* common portion */ diff --git a/queue-5.4/xfs-remove-the-xfs_qoff_logitem_t-typedef.patch b/queue-5.4/xfs-remove-the-xfs_qoff_logitem_t-typedef.patch new file mode 100644 index 00000000000..e506edb5574 --- /dev/null +++ b/queue-5.4/xfs-remove-the-xfs_qoff_logitem_t-typedef.patch @@ -0,0 +1,183 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:29 +0530 +Subject: xfs: remove the xfs_qoff_logitem_t typedef +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-13-chandan.babu@oracle.com> + +From: Pavel Reichl + +commit d0bdfb106907e4a3ef4f25f6d27e392abf41f3a0 upstream. + +Signed-off-by: Pavel Reichl +Reviewed-by: Darrick J. Wong +[darrick: fix a comment] +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_trans_resv.c | 4 ++-- + fs/xfs/xfs_dquot_item.h | 28 +++++++++++++++------------- + fs/xfs/xfs_qm_syscalls.c | 29 ++++++++++++++++------------- + fs/xfs/xfs_trans_dquot.c | 12 ++++++------ + 4 files changed, 39 insertions(+), 34 deletions(-) + +--- a/fs/xfs/libxfs/xfs_trans_resv.c ++++ b/fs/xfs/libxfs/xfs_trans_resv.c +@@ -800,7 +800,7 @@ xfs_calc_qm_dqalloc_reservation( + + /* + * Turning off quotas. +- * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 ++ * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2 + * the superblock for the quota flags: sector size + */ + STATIC uint +@@ -813,7 +813,7 @@ xfs_calc_qm_quotaoff_reservation( + + /* + * End of turning off quotas. +- * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 ++ * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2 + */ + STATIC uint + xfs_calc_qm_quotaoff_end_reservation(void) +--- a/fs/xfs/xfs_dquot_item.h ++++ b/fs/xfs/xfs_dquot_item.h +@@ -12,24 +12,26 @@ struct xfs_mount; + struct xfs_qoff_logitem; + + struct xfs_dq_logitem { +- struct xfs_log_item qli_item; /* common portion */ ++ struct xfs_log_item qli_item; /* common portion */ + struct xfs_dquot *qli_dquot; /* dquot ptr */ +- xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ ++ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ + }; + +-typedef struct xfs_qoff_logitem { +- struct xfs_log_item qql_item; /* common portion */ +- struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ ++struct xfs_qoff_logitem { ++ struct xfs_log_item qql_item; /* common portion */ ++ struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ + unsigned int qql_flags; +-} xfs_qoff_logitem_t; ++}; + + +-extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *); +-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, +- struct xfs_qoff_logitem *, uint); +-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, +- struct xfs_qoff_logitem *, uint); +-extern void xfs_trans_log_quotaoff_item(struct xfs_trans *, +- struct xfs_qoff_logitem *); ++void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp); ++struct xfs_qoff_logitem *xfs_qm_qoff_logitem_init(struct xfs_mount *mp, ++ struct xfs_qoff_logitem *start, ++ uint flags); ++struct xfs_qoff_logitem *xfs_trans_get_qoff_item(struct xfs_trans *tp, ++ struct xfs_qoff_logitem *startqoff, ++ uint flags); ++void xfs_trans_log_quotaoff_item(struct xfs_trans *tp, ++ struct xfs_qoff_logitem *qlp); + + #endif /* __XFS_DQUOT_ITEM_H__ */ +--- a/fs/xfs/xfs_qm_syscalls.c ++++ b/fs/xfs/xfs_qm_syscalls.c +@@ -19,9 +19,12 @@ + #include "xfs_qm.h" + #include "xfs_icache.h" + +-STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); +-STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, +- uint); ++STATIC int xfs_qm_log_quotaoff(struct xfs_mount *mp, ++ struct xfs_qoff_logitem **qoffstartp, ++ uint flags); ++STATIC int xfs_qm_log_quotaoff_end(struct xfs_mount *mp, ++ struct xfs_qoff_logitem *startqoff, ++ uint flags); + + /* + * Turn off quota accounting and/or enforcement for all udquots and/or +@@ -40,7 +43,7 @@ xfs_qm_scall_quotaoff( + uint dqtype; + int error; + uint inactivate_flags; +- xfs_qoff_logitem_t *qoffstart; ++ struct xfs_qoff_logitem *qoffstart; + + /* + * No file system can have quotas enabled on disk but not in core. +@@ -540,13 +543,13 @@ out_unlock: + + STATIC int + xfs_qm_log_quotaoff_end( +- xfs_mount_t *mp, +- xfs_qoff_logitem_t *startqoff, ++ struct xfs_mount *mp, ++ struct xfs_qoff_logitem *startqoff, + uint flags) + { +- xfs_trans_t *tp; ++ struct xfs_trans *tp; + int error; +- xfs_qoff_logitem_t *qoffi; ++ struct xfs_qoff_logitem *qoffi; + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp); + if (error) +@@ -568,13 +571,13 @@ xfs_qm_log_quotaoff_end( + + STATIC int + xfs_qm_log_quotaoff( +- xfs_mount_t *mp, +- xfs_qoff_logitem_t **qoffstartp, +- uint flags) ++ struct xfs_mount *mp, ++ struct xfs_qoff_logitem **qoffstartp, ++ uint flags) + { +- xfs_trans_t *tp; ++ struct xfs_trans *tp; + int error; +- xfs_qoff_logitem_t *qoffi; ++ struct xfs_qoff_logitem *qoffi; + + *qoffstartp = NULL; + +--- a/fs/xfs/xfs_trans_dquot.c ++++ b/fs/xfs/xfs_trans_dquot.c +@@ -824,13 +824,13 @@ xfs_trans_reserve_quota_nblks( + /* + * This routine is called to allocate a quotaoff log item. + */ +-xfs_qoff_logitem_t * ++struct xfs_qoff_logitem * + xfs_trans_get_qoff_item( +- xfs_trans_t *tp, +- xfs_qoff_logitem_t *startqoff, ++ struct xfs_trans *tp, ++ struct xfs_qoff_logitem *startqoff, + uint flags) + { +- xfs_qoff_logitem_t *q; ++ struct xfs_qoff_logitem *q; + + ASSERT(tp != NULL); + +@@ -852,8 +852,8 @@ xfs_trans_get_qoff_item( + */ + void + xfs_trans_log_quotaoff_item( +- xfs_trans_t *tp, +- xfs_qoff_logitem_t *qlp) ++ struct xfs_trans *tp, ++ struct xfs_qoff_logitem *qlp) + { + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &qlp->qql_item.li_flags); diff --git a/queue-5.4/xfs-replace-function-declaration-by-actual-definition.patch b/queue-5.4/xfs-replace-function-declaration-by-actual-definition.patch new file mode 100644 index 00000000000..794eee6bb58 --- /dev/null +++ b/queue-5.4/xfs-replace-function-declaration-by-actual-definition.patch @@ -0,0 +1,179 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:30 +0530 +Subject: xfs: Replace function declaration by actual definition +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-14-chandan.babu@oracle.com> + +From: Pavel Reichl + +commit 1cc95e6f0d7cfd61c9d3c5cdd4e7345b173f764f upstream. + +Signed-off-by: Pavel Reichl +Reviewed-by: Darrick J. Wong +[darrick: fix typo in subject line] +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_qm_syscalls.c | 140 ++++++++++++++++++++++------------------------- + 1 file changed, 66 insertions(+), 74 deletions(-) + +--- a/fs/xfs/xfs_qm_syscalls.c ++++ b/fs/xfs/xfs_qm_syscalls.c +@@ -19,12 +19,72 @@ + #include "xfs_qm.h" + #include "xfs_icache.h" + +-STATIC int xfs_qm_log_quotaoff(struct xfs_mount *mp, +- struct xfs_qoff_logitem **qoffstartp, +- uint flags); +-STATIC int xfs_qm_log_quotaoff_end(struct xfs_mount *mp, +- struct xfs_qoff_logitem *startqoff, +- uint flags); ++STATIC int ++xfs_qm_log_quotaoff( ++ struct xfs_mount *mp, ++ struct xfs_qoff_logitem **qoffstartp, ++ uint flags) ++{ ++ struct xfs_trans *tp; ++ int error; ++ struct xfs_qoff_logitem *qoffi; ++ ++ *qoffstartp = NULL; ++ ++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp); ++ if (error) ++ goto out; ++ ++ qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); ++ xfs_trans_log_quotaoff_item(tp, qoffi); ++ ++ spin_lock(&mp->m_sb_lock); ++ mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; ++ spin_unlock(&mp->m_sb_lock); ++ ++ xfs_log_sb(tp); ++ ++ /* ++ * We have to make sure that the transaction is secure on disk before we ++ * return and actually stop quota accounting. So, make it synchronous. ++ * We don't care about quotoff's performance. ++ */ ++ xfs_trans_set_sync(tp); ++ error = xfs_trans_commit(tp); ++ if (error) ++ goto out; ++ ++ *qoffstartp = qoffi; ++out: ++ return error; ++} ++ ++STATIC int ++xfs_qm_log_quotaoff_end( ++ struct xfs_mount *mp, ++ struct xfs_qoff_logitem *startqoff, ++ uint flags) ++{ ++ struct xfs_trans *tp; ++ int error; ++ struct xfs_qoff_logitem *qoffi; ++ ++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp); ++ if (error) ++ return error; ++ ++ qoffi = xfs_trans_get_qoff_item(tp, startqoff, ++ flags & XFS_ALL_QUOTA_ACCT); ++ xfs_trans_log_quotaoff_item(tp, qoffi); ++ ++ /* ++ * We have to make sure that the transaction is secure on disk before we ++ * return and actually stop quota accounting. So, make it synchronous. ++ * We don't care about quotoff's performance. ++ */ ++ xfs_trans_set_sync(tp); ++ return xfs_trans_commit(tp); ++} + + /* + * Turn off quota accounting and/or enforcement for all udquots and/or +@@ -541,74 +601,6 @@ out_unlock: + return error; + } + +-STATIC int +-xfs_qm_log_quotaoff_end( +- struct xfs_mount *mp, +- struct xfs_qoff_logitem *startqoff, +- uint flags) +-{ +- struct xfs_trans *tp; +- int error; +- struct xfs_qoff_logitem *qoffi; +- +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp); +- if (error) +- return error; +- +- qoffi = xfs_trans_get_qoff_item(tp, startqoff, +- flags & XFS_ALL_QUOTA_ACCT); +- xfs_trans_log_quotaoff_item(tp, qoffi); +- +- /* +- * We have to make sure that the transaction is secure on disk before we +- * return and actually stop quota accounting. So, make it synchronous. +- * We don't care about quotoff's performance. +- */ +- xfs_trans_set_sync(tp); +- return xfs_trans_commit(tp); +-} +- +- +-STATIC int +-xfs_qm_log_quotaoff( +- struct xfs_mount *mp, +- struct xfs_qoff_logitem **qoffstartp, +- uint flags) +-{ +- struct xfs_trans *tp; +- int error; +- struct xfs_qoff_logitem *qoffi; +- +- *qoffstartp = NULL; +- +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp); +- if (error) +- goto out; +- +- qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); +- xfs_trans_log_quotaoff_item(tp, qoffi); +- +- spin_lock(&mp->m_sb_lock); +- mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; +- spin_unlock(&mp->m_sb_lock); +- +- xfs_log_sb(tp); +- +- /* +- * We have to make sure that the transaction is secure on disk before we +- * return and actually stop quota accounting. So, make it synchronous. +- * We don't care about quotoff's performance. +- */ +- xfs_trans_set_sync(tp); +- error = xfs_trans_commit(tp); +- if (error) +- goto out; +- +- *qoffstartp = qoffi; +-out: +- return error; +-} +- + /* Fill out the quota context. */ + static void + xfs_qm_scall_getquota_fill_qc( diff --git a/queue-5.4/xfs-rework-collapse-range-into-an-atomic-operation.patch b/queue-5.4/xfs-rework-collapse-range-into-an-atomic-operation.patch new file mode 100644 index 00000000000..0404a4436cc --- /dev/null +++ b/queue-5.4/xfs-rework-collapse-range-into-an-atomic-operation.patch @@ -0,0 +1,98 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:20 +0530 +Subject: xfs: rework collapse range into an atomic operation +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-4-chandan.babu@oracle.com> + +From: Brian Foster + +commit 211683b21de959a647de74faedfdd8a5d189327e upstream. + +The collapse range operation uses a unique transaction and ilock +cycle for the hole punch and each extent shift iteration of the +overall operation. While the hole punch is safe as a separate +operation due to the iolock, cycling the ilock after each extent +shift is risky w.r.t. concurrent operations, similar to insert range. + +To avoid this problem, make collapse range atomic with respect to +ilock. Hold the ilock across the entire operation, replace the +individual transactions with a single rolling transaction sequence +and finish dfops on each iteration to perform pending frees and roll +the transaction. Remove the unnecessary quota reservation as +collapse range can only ever merge extents (and thus remove extent +records and potentially free bmap blocks). The dfops call +automatically relogs the inode to keep it moving in the log. This +guarantees that nothing else can change the extent mapping of an +inode while a collapse range operation is in progress. + +Signed-off-by: Brian Foster +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_bmap_util.c | 29 +++++++++++++++-------------- + 1 file changed, 15 insertions(+), 14 deletions(-) + +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -1237,7 +1237,6 @@ xfs_collapse_file_space( + int error; + xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len); + xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len); +- uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); + bool done = false; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); +@@ -1253,32 +1252,34 @@ xfs_collapse_file_space( + if (error) + return error; + +- while (!error && !done) { +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, +- &tp); +- if (error) +- break; ++ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); ++ if (error) ++ return error; + +- xfs_ilock(ip, XFS_ILOCK_EXCL); +- error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, +- ip->i_gdquot, ip->i_pdquot, resblks, 0, +- XFS_QMOPT_RES_REGBLKS); +- if (error) +- goto out_trans_cancel; +- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ++ xfs_ilock(ip, XFS_ILOCK_EXCL); ++ xfs_trans_ijoin(tp, ip, 0); + ++ while (!done) { + error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb, + &done); + if (error) + goto out_trans_cancel; ++ if (done) ++ break; + +- error = xfs_trans_commit(tp); ++ /* finish any deferred frees and roll the transaction */ ++ error = xfs_defer_finish(&tp); ++ if (error) ++ goto out_trans_cancel; + } + ++ error = xfs_trans_commit(tp); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + + out_trans_cancel: + xfs_trans_cancel(tp); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + } + diff --git a/queue-5.4/xfs-rework-insert-range-into-an-atomic-operation.patch b/queue-5.4/xfs-rework-insert-range-into-an-atomic-operation.patch new file mode 100644 index 00000000000..d60dd758303 --- /dev/null +++ b/queue-5.4/xfs-rework-insert-range-into-an-atomic-operation.patch @@ -0,0 +1,100 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:19 +0530 +Subject: xfs: rework insert range into an atomic operation +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-3-chandan.babu@oracle.com> + +From: Brian Foster + +commit dd87f87d87fa4359a54e7b44549742f579e3e805 upstream. + +The insert range operation uses a unique transaction and ilock cycle +for the extent split and each extent shift iteration of the overall +operation. While this works, it is risks racing with other +operations in subtle ways such as COW writeback modifying an extent +tree in the middle of a shift operation. + +To avoid this problem, make insert range atomic with respect to +ilock. Hold the ilock across the entire operation, replace the +individual transactions with a single rolling transaction sequence +and relog the inode to keep it moving in the log. This guarantees +that nothing else can change the extent mapping of an inode while +an insert range operation is in progress. + +Signed-off-by: Brian Foster +Reviewed-by: Allison Collins +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_bmap_util.c | 32 +++++++++++++------------------- + 1 file changed, 13 insertions(+), 19 deletions(-) + +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -1321,47 +1321,41 @@ xfs_insert_file_space( + if (error) + return error; + +- /* +- * The extent shifting code works on extent granularity. So, if stop_fsb +- * is not the starting block of extent, we need to split the extent at +- * stop_fsb. +- */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); +- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ++ xfs_trans_ijoin(tp, ip, 0); + ++ /* ++ * The extent shifting code works on extent granularity. So, if stop_fsb ++ * is not the starting block of extent, we need to split the extent at ++ * stop_fsb. ++ */ + error = xfs_bmap_split_extent(tp, ip, stop_fsb); + if (error) + goto out_trans_cancel; + +- error = xfs_trans_commit(tp); +- if (error) +- return error; +- +- while (!error && !done) { +- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, +- &tp); ++ do { ++ error = xfs_trans_roll_inode(&tp, ip); + if (error) +- break; ++ goto out_trans_cancel; + +- xfs_ilock(ip, XFS_ILOCK_EXCL); +- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb, + &done, stop_fsb); + if (error) + goto out_trans_cancel; ++ } while (!done); + +- error = xfs_trans_commit(tp); +- } +- ++ error = xfs_trans_commit(tp); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + + out_trans_cancel: + xfs_trans_cancel(tp); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + } + diff --git a/queue-5.4/xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch b/queue-5.4/xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch new file mode 100644 index 00000000000..81f18a4c58f --- /dev/null +++ b/queue-5.4/xfs-tail-updates-only-need-to-occur-when-lsn-changes.patch @@ -0,0 +1,220 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:37 +0530 +Subject: xfs: tail updates only need to occur when LSN changes +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-21-chandan.babu@oracle.com> + +From: Dave Chinner + +commit 8eb807bd839938b45bf7a97f0568d2a845ba6929 upstream. + +We currently wake anything waiting on the log tail to move whenever +the log item at the tail of the log is removed. Historically this +was fine behaviour because there were very few items at any given +LSN. But with delayed logging, there may be thousands of items at +any given LSN, and we can't move the tail until they are all gone. + +Hence if we are removing them in near tail-first order, we might be +waking up processes waiting on the tail LSN to change (e.g. log +space waiters) repeatedly without them being able to make progress. +This also occurs with the new sync push waiters, and can result in +thousands of spurious wakeups every second when under heavy direct +reclaim pressure. + +To fix this, check that the tail LSN has actually changed on the +AIL before triggering wakeups. This will reduce the number of +spurious wakeups when doing bulk AIL removal and make this code much +more efficient. + +Signed-off-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Reviewed-by: Brian Foster +Reviewed-by: Allison Collins +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_inode_item.c | 18 ++++++++++++---- + fs/xfs/xfs_trans_ail.c | 52 +++++++++++++++++++++++++++++++++--------------- + fs/xfs/xfs_trans_priv.h | 4 +-- + 3 files changed, 51 insertions(+), 23 deletions(-) + +--- a/fs/xfs/xfs_inode_item.c ++++ b/fs/xfs/xfs_inode_item.c +@@ -732,19 +732,27 @@ xfs_iflush_done( + * holding the lock before removing the inode from the AIL. + */ + if (need_ail) { +- bool mlip_changed = false; ++ xfs_lsn_t tail_lsn = 0; + + /* this is an opencoded batch version of xfs_trans_ail_delete */ + spin_lock(&ailp->ail_lock); + list_for_each_entry(blip, &tmp, li_bio_list) { + if (INODE_ITEM(blip)->ili_logged && +- blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) +- mlip_changed |= xfs_ail_delete_one(ailp, blip); +- else { ++ blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) { ++ /* ++ * xfs_ail_update_finish() only cares about the ++ * lsn of the first tail item removed, any ++ * others will be at the same or higher lsn so ++ * we just ignore them. ++ */ ++ xfs_lsn_t lsn = xfs_ail_delete_one(ailp, blip); ++ if (!tail_lsn && lsn) ++ tail_lsn = lsn; ++ } else { + xfs_clear_li_failed(blip); + } + } +- xfs_ail_update_finish(ailp, mlip_changed); ++ xfs_ail_update_finish(ailp, tail_lsn); + } + + /* +--- a/fs/xfs/xfs_trans_ail.c ++++ b/fs/xfs/xfs_trans_ail.c +@@ -108,17 +108,25 @@ xfs_ail_next( + * We need the AIL lock in order to get a coherent read of the lsn of the last + * item in the AIL. + */ ++static xfs_lsn_t ++__xfs_ail_min_lsn( ++ struct xfs_ail *ailp) ++{ ++ struct xfs_log_item *lip = xfs_ail_min(ailp); ++ ++ if (lip) ++ return lip->li_lsn; ++ return 0; ++} ++ + xfs_lsn_t + xfs_ail_min_lsn( + struct xfs_ail *ailp) + { +- xfs_lsn_t lsn = 0; +- struct xfs_log_item *lip; ++ xfs_lsn_t lsn; + + spin_lock(&ailp->ail_lock); +- lip = xfs_ail_min(ailp); +- if (lip) +- lsn = lip->li_lsn; ++ lsn = __xfs_ail_min_lsn(ailp); + spin_unlock(&ailp->ail_lock); + + return lsn; +@@ -683,11 +691,12 @@ xfs_ail_push_all_sync( + void + xfs_ail_update_finish( + struct xfs_ail *ailp, +- bool do_tail_update) __releases(ailp->ail_lock) ++ xfs_lsn_t old_lsn) __releases(ailp->ail_lock) + { + struct xfs_mount *mp = ailp->ail_mount; + +- if (!do_tail_update) { ++ /* if the tail lsn hasn't changed, don't do updates or wakeups. */ ++ if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) { + spin_unlock(&ailp->ail_lock); + return; + } +@@ -732,7 +741,7 @@ xfs_trans_ail_update_bulk( + xfs_lsn_t lsn) __releases(ailp->ail_lock) + { + struct xfs_log_item *mlip; +- int mlip_changed = 0; ++ xfs_lsn_t tail_lsn = 0; + int i; + LIST_HEAD(tmp); + +@@ -747,9 +756,10 @@ xfs_trans_ail_update_bulk( + continue; + + trace_xfs_ail_move(lip, lip->li_lsn, lsn); ++ if (mlip == lip && !tail_lsn) ++ tail_lsn = lip->li_lsn; ++ + xfs_ail_delete(ailp, lip); +- if (mlip == lip) +- mlip_changed = 1; + } else { + trace_xfs_ail_insert(lip, 0, lsn); + } +@@ -760,15 +770,23 @@ xfs_trans_ail_update_bulk( + if (!list_empty(&tmp)) + xfs_ail_splice(ailp, cur, &tmp, lsn); + +- xfs_ail_update_finish(ailp, mlip_changed); ++ xfs_ail_update_finish(ailp, tail_lsn); + } + +-bool ++/* ++ * Delete one log item from the AIL. ++ * ++ * If this item was at the tail of the AIL, return the LSN of the log item so ++ * that we can use it to check if the LSN of the tail of the log has moved ++ * when finishing up the AIL delete process in xfs_ail_update_finish(). ++ */ ++xfs_lsn_t + xfs_ail_delete_one( + struct xfs_ail *ailp, + struct xfs_log_item *lip) + { + struct xfs_log_item *mlip = xfs_ail_min(ailp); ++ xfs_lsn_t lsn = lip->li_lsn; + + trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); + xfs_ail_delete(ailp, lip); +@@ -776,7 +794,9 @@ xfs_ail_delete_one( + clear_bit(XFS_LI_IN_AIL, &lip->li_flags); + lip->li_lsn = 0; + +- return mlip == lip; ++ if (mlip == lip) ++ return lsn; ++ return 0; + } + + /** +@@ -807,7 +827,7 @@ xfs_trans_ail_delete( + int shutdown_type) + { + struct xfs_mount *mp = ailp->ail_mount; +- bool need_update; ++ xfs_lsn_t tail_lsn; + + if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { + spin_unlock(&ailp->ail_lock); +@@ -820,8 +840,8 @@ xfs_trans_ail_delete( + return; + } + +- need_update = xfs_ail_delete_one(ailp, lip); +- xfs_ail_update_finish(ailp, need_update); ++ tail_lsn = xfs_ail_delete_one(ailp, lip); ++ xfs_ail_update_finish(ailp, tail_lsn); + } + + int +--- a/fs/xfs/xfs_trans_priv.h ++++ b/fs/xfs/xfs_trans_priv.h +@@ -91,8 +91,8 @@ xfs_trans_ail_update( + xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); + } + +-bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); +-void xfs_ail_update_finish(struct xfs_ail *ailp, bool do_tail_update) ++xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); ++void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn) + __releases(ailp->ail_lock); + void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, + int shutdown_type); diff --git a/queue-5.4/xfs-throttle-commits-on-delayed-background-cil-push.patch b/queue-5.4/xfs-throttle-commits-on-delayed-background-cil-push.patch new file mode 100644 index 00000000000..43b7b919e60 --- /dev/null +++ b/queue-5.4/xfs-throttle-commits-on-delayed-background-cil-push.patch @@ -0,0 +1,189 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:35 +0530 +Subject: xfs: Throttle commits on delayed background CIL push +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-19-chandan.babu@oracle.com> + +From: Dave Chinner + +commit 0e7ab7efe77451cba4cbecb6c9f5ef83cf32b36b upstream. + +In certain situations the background CIL push can be indefinitely +delayed. While we have workarounds from the obvious cases now, it +doesn't solve the underlying issue. This issue is that there is no +upper limit on the CIL where we will either force or wait for +a background push to start, hence allowing the CIL to grow without +bound until it consumes all log space. + +To fix this, add a new wait queue to the CIL which allows background +pushes to wait for the CIL context to be switched out. This happens +when the push starts, so it will allow us to block incoming +transaction commit completion until the push has started. This will +only affect processes that are running modifications, and only when +the CIL threshold has been significantly overrun. + +This has no apparent impact on performance, and doesn't even trigger +until over 45 million inodes had been created in a 16-way fsmark +test on a 2GB log. That was limiting at 64MB of log space used, so +the active CIL size is only about 3% of the total log in that case. +The concurrent removal of those files did not trigger the background +sleep at all. + +Signed-off-by: Dave Chinner +Reviewed-by: Allison Collins +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_cil.c | 37 +++++++++++++++++++++++++++++++++---- + fs/xfs/xfs_log_priv.h | 24 ++++++++++++++++++++++++ + fs/xfs/xfs_trace.h | 1 + + 3 files changed, 58 insertions(+), 4 deletions(-) + +--- a/fs/xfs/xfs_log_cil.c ++++ b/fs/xfs/xfs_log_cil.c +@@ -671,6 +671,11 @@ xlog_cil_push( + ASSERT(push_seq <= ctx->sequence); + + /* ++ * Wake up any background push waiters now this context is being pushed. ++ */ ++ wake_up_all(&ctx->push_wait); ++ ++ /* + * Check if we've anything to push. If there is nothing, then we don't + * move on to a new sequence number and so we have to be able to push + * this sequence again later. +@@ -746,6 +751,7 @@ xlog_cil_push( + */ + INIT_LIST_HEAD(&new_ctx->committing); + INIT_LIST_HEAD(&new_ctx->busy_extents); ++ init_waitqueue_head(&new_ctx->push_wait); + new_ctx->sequence = ctx->sequence + 1; + new_ctx->cil = cil; + cil->xc_ctx = new_ctx; +@@ -900,7 +906,7 @@ xlog_cil_push_work( + */ + static void + xlog_cil_push_background( +- struct xlog *log) ++ struct xlog *log) __releases(cil->xc_ctx_lock) + { + struct xfs_cil *cil = log->l_cilp; + +@@ -914,14 +920,36 @@ xlog_cil_push_background( + * don't do a background push if we haven't used up all the + * space available yet. + */ +- if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) ++ if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) { ++ up_read(&cil->xc_ctx_lock); + return; ++ } + + spin_lock(&cil->xc_push_lock); + if (cil->xc_push_seq < cil->xc_current_sequence) { + cil->xc_push_seq = cil->xc_current_sequence; + queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); + } ++ ++ /* ++ * Drop the context lock now, we can't hold that if we need to sleep ++ * because we are over the blocking threshold. The push_lock is still ++ * held, so blocking threshold sleep/wakeup is still correctly ++ * serialised here. ++ */ ++ up_read(&cil->xc_ctx_lock); ++ ++ /* ++ * If we are well over the space limit, throttle the work that is being ++ * done until the push work on this context has begun. ++ */ ++ if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) { ++ trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); ++ ASSERT(cil->xc_ctx->space_used < log->l_logsize); ++ xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock); ++ return; ++ } ++ + spin_unlock(&cil->xc_push_lock); + + } +@@ -1038,9 +1066,9 @@ xfs_log_commit_cil( + if (lip->li_ops->iop_committing) + lip->li_ops->iop_committing(lip, xc_commit_lsn); + } +- xlog_cil_push_background(log); + +- up_read(&cil->xc_ctx_lock); ++ /* xlog_cil_push_background() releases cil->xc_ctx_lock */ ++ xlog_cil_push_background(log); + } + + /* +@@ -1199,6 +1227,7 @@ xlog_cil_init( + + INIT_LIST_HEAD(&ctx->committing); + INIT_LIST_HEAD(&ctx->busy_extents); ++ init_waitqueue_head(&ctx->push_wait); + ctx->sequence = 1; + ctx->cil = cil; + cil->xc_ctx = ctx; +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -247,6 +247,7 @@ struct xfs_cil_ctx { + struct xfs_log_vec *lv_chain; /* logvecs being pushed */ + struct list_head iclog_entry; + struct list_head committing; /* ctx committing list */ ++ wait_queue_head_t push_wait; /* background push throttle */ + struct work_struct discard_endio_work; + }; + +@@ -344,10 +345,33 @@ struct xfs_cil { + * buffer window (32MB) as measurements have shown this to be roughly the + * point of diminishing performance increases under highly concurrent + * modification workloads. ++ * ++ * To prevent the CIL from overflowing upper commit size bounds, we introduce a ++ * new threshold at which we block committing transactions until the background ++ * CIL commit commences and switches to a new context. While this is not a hard ++ * limit, it forces the process committing a transaction to the CIL to block and ++ * yeild the CPU, giving the CIL push work a chance to be scheduled and start ++ * work. This prevents a process running lots of transactions from overfilling ++ * the CIL because it is not yielding the CPU. We set the blocking limit at ++ * twice the background push space threshold so we keep in line with the AIL ++ * push thresholds. ++ * ++ * Note: this is not a -hard- limit as blocking is applied after the transaction ++ * is inserted into the CIL and the push has been triggered. It is largely a ++ * throttling mechanism that allows the CIL push to be scheduled and run. A hard ++ * limit will be difficult to implement without introducing global serialisation ++ * in the CIL commit fast path, and it's not at all clear that we actually need ++ * such hard limits given the ~7 years we've run without a hard limit before ++ * finding the first situation where a checkpoint size overflow actually ++ * occurred. Hence the simple throttle, and an ASSERT check to tell us that ++ * we've overrun the max size. + */ + #define XLOG_CIL_SPACE_LIMIT(log) \ + min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4) + ++#define XLOG_CIL_BLOCKING_SPACE_LIMIT(log) \ ++ (XLOG_CIL_SPACE_LIMIT(log) * 2) ++ + /* + * ticket grant locks, queues and accounting have their own cachlines + * as these are quite hot and can be operated on concurrently. +--- a/fs/xfs/xfs_trace.h ++++ b/fs/xfs/xfs_trace.h +@@ -1011,6 +1011,7 @@ DEFINE_LOGGRANT_EVENT(xfs_log_regrant_re + DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); + DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); + DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); ++DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait); + + DECLARE_EVENT_CLASS(xfs_log_item_class, + TP_PROTO(struct xfs_log_item *lip), diff --git a/queue-5.4/xfs-trylock-underlying-buffer-on-dquot-flush.patch b/queue-5.4/xfs-trylock-underlying-buffer-on-dquot-flush.patch new file mode 100644 index 00000000000..7c03609329a --- /dev/null +++ b/queue-5.4/xfs-trylock-underlying-buffer-on-dquot-flush.patch @@ -0,0 +1,112 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:39 +0530 +Subject: xfs: trylock underlying buffer on dquot flush +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-23-chandan.babu@oracle.com> + +From: Brian Foster + +commit 8d3d7e2b35ea7d91d6e085c93b5efecfb0fba307 upstream. + +A dquot flush currently blocks on the buffer lock for the underlying +dquot buffer. In turn, this causes xfsaild to block rather than +continue processing other items in the meantime. Update +xfs_qm_dqflush() to trylock the buffer, similar to how inode buffers +are handled, and return -EAGAIN if the lock fails. Fix up any +callers that don't currently handle the error properly. + +Signed-off-by: Brian Foster +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_dquot.c | 6 +++--- + fs/xfs/xfs_dquot_item.c | 3 ++- + fs/xfs/xfs_qm.c | 14 +++++++++----- + 3 files changed, 14 insertions(+), 9 deletions(-) + +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -1105,8 +1105,8 @@ xfs_qm_dqflush( + * Get the buffer containing the on-disk dquot + */ + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, +- mp->m_quotainfo->qi_dqchunklen, 0, &bp, +- &xfs_dquot_buf_ops); ++ mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK, ++ &bp, &xfs_dquot_buf_ops); + if (error) + goto out_unlock; + +@@ -1176,7 +1176,7 @@ xfs_qm_dqflush( + + out_unlock: + xfs_dqfunlock(dqp); +- return -EIO; ++ return error; + } + + /* +--- a/fs/xfs/xfs_dquot_item.c ++++ b/fs/xfs/xfs_dquot_item.c +@@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push( + if (!xfs_buf_delwri_queue(bp, buffer_list)) + rval = XFS_ITEM_FLUSHING; + xfs_buf_relse(bp); +- } ++ } else if (error == -EAGAIN) ++ rval = XFS_ITEM_LOCKED; + + spin_lock(&lip->li_ailp->ail_lock); + out_unlock: +--- a/fs/xfs/xfs_qm.c ++++ b/fs/xfs/xfs_qm.c +@@ -121,12 +121,11 @@ xfs_qm_dqpurge( + { + struct xfs_mount *mp = dqp->q_mount; + struct xfs_quotainfo *qi = mp->m_quotainfo; ++ int error = -EAGAIN; + + xfs_dqlock(dqp); +- if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { +- xfs_dqunlock(dqp); +- return -EAGAIN; +- } ++ if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) ++ goto out_unlock; + + dqp->dq_flags |= XFS_DQ_FREEING; + +@@ -139,7 +138,6 @@ xfs_qm_dqpurge( + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + struct xfs_buf *bp = NULL; +- int error; + + /* + * We don't care about getting disk errors here. We need +@@ -149,6 +147,8 @@ xfs_qm_dqpurge( + if (!error) { + error = xfs_bwrite(bp); + xfs_buf_relse(bp); ++ } else if (error == -EAGAIN) { ++ goto out_unlock; + } + xfs_dqflock(dqp); + } +@@ -174,6 +174,10 @@ xfs_qm_dqpurge( + + xfs_qm_dqdestroy(dqp); + return 0; ++ ++out_unlock: ++ xfs_dqunlock(dqp); ++ return error; + } + + /* diff --git a/queue-5.4/xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch b/queue-5.4/xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch new file mode 100644 index 00000000000..3dc8b69e344 --- /dev/null +++ b/queue-5.4/xfs-use-scnprintf-for-avoiding-potential-buffer-overflow.patch @@ -0,0 +1,57 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:26 +0530 +Subject: xfs: Use scnprintf() for avoiding potential buffer overflow +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-10-chandan.babu@oracle.com> + +From: Takashi Iwai + +commit 17bb60b74124e9491d593e2601e3afe14daa2f57 upstream. + +Since snprintf() returns the would-be-output size instead of the +actual output size, the succeeding calls may go beyond the given +buffer limit. Fix it by replacing with scnprintf(). + +Signed-off-by: Takashi Iwai +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_stats.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/fs/xfs/xfs_stats.c ++++ b/fs/xfs/xfs_stats.c +@@ -57,13 +57,13 @@ int xfs_stats_format(struct xfsstats __p + /* Loop over all stats groups */ + + for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { +- len += snprintf(buf + len, PATH_MAX - len, "%s", ++ len += scnprintf(buf + len, PATH_MAX - len, "%s", + xstats[i].desc); + /* inner loop does each group */ + for (; j < xstats[i].endpoint; j++) +- len += snprintf(buf + len, PATH_MAX - len, " %u", ++ len += scnprintf(buf + len, PATH_MAX - len, " %u", + counter_val(stats, j)); +- len += snprintf(buf + len, PATH_MAX - len, "\n"); ++ len += scnprintf(buf + len, PATH_MAX - len, "\n"); + } + /* extra precision counters */ + for_each_possible_cpu(i) { +@@ -72,9 +72,9 @@ int xfs_stats_format(struct xfsstats __p + xs_read_bytes += per_cpu_ptr(stats, i)->s.xs_read_bytes; + } + +- len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", ++ len += scnprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n", + xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); +- len += snprintf(buf + len, PATH_MAX-len, "debug %u\n", ++ len += scnprintf(buf + len, PATH_MAX-len, "debug %u\n", + #if defined(DEBUG) + 1); + #else diff --git a/queue-5.4/xfs-xfs_buf_corruption_error-should-take-__this_address.patch b/queue-5.4/xfs-xfs_buf_corruption_error-should-take-__this_address.patch new file mode 100644 index 00000000000..7e19e084cc6 --- /dev/null +++ b/queue-5.4/xfs-xfs_buf_corruption_error-should-take-__this_address.patch @@ -0,0 +1,69 @@ +From foo@baz Wed Oct 26 04:49:40 PM CEST 2022 +From: Chandan Babu R +Date: Wed, 26 Oct 2022 11:58:22 +0530 +Subject: xfs: xfs_buf_corruption_error should take __this_address +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221026062843.927600-6-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit e83cf875d67a6cb9ddfaa8b45d2fa93d12b5c66f upstream. + +Add a xfs_failaddr_t parameter to this function so that callers can +potentially pass in (and therefore report) the exact point in the code +where we decided that a metadata buffer was corrupt. This enables us to +wire it up to checking functions that have to run outside of verifiers. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_buf.c | 2 +- + fs/xfs/xfs_error.c | 5 +++-- + fs/xfs/xfs_error.h | 2 +- + 3 files changed, 5 insertions(+), 4 deletions(-) + +--- a/fs/xfs/xfs_buf.c ++++ b/fs/xfs/xfs_buf.c +@@ -1564,7 +1564,7 @@ __xfs_buf_mark_corrupt( + { + ASSERT(bp->b_flags & XBF_DONE); + +- xfs_buf_corruption_error(bp); ++ xfs_buf_corruption_error(bp, fa); + xfs_buf_stale(bp); + } + +--- a/fs/xfs/xfs_error.c ++++ b/fs/xfs/xfs_error.c +@@ -350,13 +350,14 @@ xfs_corruption_error( + */ + void + xfs_buf_corruption_error( +- struct xfs_buf *bp) ++ struct xfs_buf *bp, ++ xfs_failaddr_t fa) + { + struct xfs_mount *mp = bp->b_mount; + + xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, + "Metadata corruption detected at %pS, %s block 0x%llx", +- __return_address, bp->b_ops->name, bp->b_bn); ++ fa, bp->b_ops->name, bp->b_bn); + + xfs_alert(mp, "Unmount and run xfs_repair"); + +--- a/fs/xfs/xfs_error.h ++++ b/fs/xfs/xfs_error.h +@@ -15,7 +15,7 @@ extern void xfs_corruption_error(const c + struct xfs_mount *mp, const void *buf, size_t bufsize, + const char *filename, int linenum, + xfs_failaddr_t failaddr); +-void xfs_buf_corruption_error(struct xfs_buf *bp); ++void xfs_buf_corruption_error(struct xfs_buf *bp, xfs_failaddr_t fa); + extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error, + const char *name, const void *buf, size_t bufsz, + xfs_failaddr_t failaddr);