From 6cee36365e2bced1b42c7020502a602219350a59 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 21 Sep 2022 11:01:24 +0200 Subject: [PATCH] 5.4-stable patches added patches: iomap-iomap-that-extends-beyond-eof-should-be-marked-dirty.patch maintainers-add-chandan-as-xfs-maintainer-for-5.4.y.patch xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch xfs-always-log-corruption-errors.patch xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch xfs-fix-some-memory-leaks-in-log-recovery.patch xfs-range-check-ri_cnt-when-recovering-log-items.patch xfs-refactor-agfl-length-computation-function.patch xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch xfs-split-the-sunit-parameter-update-into-two-parts.patch xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch --- ...ds-beyond-eof-should-be-marked-dirty.patch | 69 ++ ...-chandan-as-xfs-maintainer-for-5.4.y.patch | 37 + queue-5.4/series | 17 + ...-assert-in-xfs_fsmap_owner_from_rmap.patch | 34 + .../xfs-always-log-corruption-errors.patch | 834 ++++++++++++++++++ ...a-blocks-during-unwritten-conversion.patch | 57 ++ ...pointer-arguments-to-error-functions.patch | 102 +++ ...rupted-when-log-contents-are-invalid.patch | 222 +++++ ...-if-that-would-cause-repair-failures.patch | 230 +++++ ...-when-target_ip-exists-in-xfs_rename.patch | 133 +++ ...ix-some-memory-leaks-in-log-recovery.patch | 50 ++ ...eck-ri_cnt-when-recovering-log-items.patch | 47 + ...tor-agfl-length-computation-function.patch | 65 ++ ...th-efscorrupted-for-corrupt-metadata.patch | 96 ++ ...tweak-an-assert-in-xfs_fs_map_blocks.patch | 41 + ...unit-parameter-update-into-two-parts.patch | 190 ++++ ...boundary-to-avoid-cow-writeback-race.patch | 96 ++ ...face-for-buf-log-item-ail-flag-check.patch | 41 + 18 files changed, 2361 insertions(+) create mode 100644 queue-5.4/iomap-iomap-that-extends-beyond-eof-should-be-marked-dirty.patch create mode 100644 queue-5.4/maintainers-add-chandan-as-xfs-maintainer-for-5.4.y.patch create mode 100644 queue-5.4/xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch create mode 100644 queue-5.4/xfs-always-log-corruption-errors.patch create mode 100644 queue-5.4/xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch create mode 100644 queue-5.4/xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch create mode 100644 queue-5.4/xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch create mode 100644 queue-5.4/xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch create mode 100644 queue-5.4/xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch create mode 100644 queue-5.4/xfs-fix-some-memory-leaks-in-log-recovery.patch create mode 100644 queue-5.4/xfs-range-check-ri_cnt-when-recovering-log-items.patch create mode 100644 queue-5.4/xfs-refactor-agfl-length-computation-function.patch create mode 100644 queue-5.4/xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch create mode 100644 queue-5.4/xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch create mode 100644 queue-5.4/xfs-split-the-sunit-parameter-update-into-two-parts.patch create mode 100644 queue-5.4/xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch create mode 100644 queue-5.4/xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch diff --git a/queue-5.4/iomap-iomap-that-extends-beyond-eof-should-be-marked-dirty.patch b/queue-5.4/iomap-iomap-that-extends-beyond-eof-should-be-marked-dirty.patch new file mode 100644 index 00000000000..5288946aefa --- /dev/null +++ b/queue-5.4/iomap-iomap-that-extends-beyond-eof-should-be-marked-dirty.patch @@ -0,0 +1,69 @@ +From foo@baz Wed Sep 21 10:59:33 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:37 +0530 +Subject: iomap: iomap that extends beyond EOF should be marked dirty +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-3-chandan.babu@oracle.com> + +From: Chandan Babu R + +From: Dave Chinner + +commit 7684e2c4384d5d1f884b01ab8bff2369e4db0bff upstream. + +When doing a direct IO that spans the current EOF, and there are +written blocks beyond EOF that extend beyond the current write, the +only metadata update that needs to be done is a file size extension. + +However, we don't mark such iomaps as IOMAP_F_DIRTY to indicate that +there is IO completion metadata updates required, and hence we may +fail to correctly sync file size extensions made in IO completion +when O_DSYNC writes are being used and the hardware supports FUA. + +Hence when setting IOMAP_F_DIRTY, we need to also take into account +whether the iomap spans the current EOF. If it does, then we need to +mark it dirty so that IO completion will call generic_write_sync() +to flush the inode size update to stable storage correctly. + +Fixes: 3460cac1ca76 ("iomap: Use FUA for pure data O_DSYNC DIO writes") +Signed-off-by: Dave Chinner +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +[darrick: removed the ext4 part; they'll handle it separately] +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_iomap.c | 7 +++++++ + include/linux/iomap.h | 2 ++ + 2 files changed, 9 insertions(+) + +--- a/fs/xfs/xfs_iomap.c ++++ b/fs/xfs/xfs_iomap.c +@@ -1055,6 +1055,13 @@ xfs_file_iomap_begin( + trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); + + out_finish: ++ /* ++ * Writes that span EOF might trigger an IO size update on completion, ++ * so consider them to be dirty for the purposes of O_DSYNC even if ++ * there is no other metadata changes pending or have been made here. ++ */ ++ if ((flags & IOMAP_WRITE) && offset + length > i_size_read(inode)) ++ iomap->flags |= IOMAP_F_DIRTY; + return xfs_bmbt_to_iomap(ip, iomap, &imap, shared); + + out_found: +--- a/include/linux/iomap.h ++++ b/include/linux/iomap.h +@@ -32,6 +32,8 @@ struct vm_fault; + * + * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access + * written data and requires fdatasync to commit them to persistent storage. ++ * This needs to take into account metadata changes that *may* be made at IO ++ * completion, such as file size updates from direct IO. + */ + #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ + #define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ diff --git a/queue-5.4/maintainers-add-chandan-as-xfs-maintainer-for-5.4.y.patch b/queue-5.4/maintainers-add-chandan-as-xfs-maintainer-for-5.4.y.patch new file mode 100644 index 00000000000..67882f9d8c3 --- /dev/null +++ b/queue-5.4/maintainers-add-chandan-as-xfs-maintainer-for-5.4.y.patch @@ -0,0 +1,37 @@ +From foo@baz Wed Sep 21 10:59:33 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:36 +0530 +Subject: MAINTAINERS: add Chandan as xfs maintainer for 5.4.y +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-2-chandan.babu@oracle.com> + +From: Chandan Babu R + +This is an attempt to direct the bots and humans that are testing +LTS 5.4.y towards the maintainer of xfs in the 5.4.y tree. + +Update Darrick's email address from upstream and add Chandan as xfs +maintaier for the 5.4.y tree. + +Suggested-by: Darrick J. Wong +Link: https://lore.kernel.org/linux-xfs/Yrx6%2F0UmYyuBPjEr@magnolia/ +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + MAINTAINERS | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -17864,7 +17864,8 @@ S: Supported + F: sound/xen/* + + XFS FILESYSTEM +-M: Darrick J. Wong ++M: Chandan Babu R ++M: Darrick J. Wong + M: linux-xfs@vger.kernel.org + L: linux-xfs@vger.kernel.org + W: http://xfs.org/ diff --git a/queue-5.4/series b/queue-5.4/series index cdbd5aee88d..71d204d4914 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -11,6 +11,23 @@ task_stack-x86-cea-force-inline-stack-helpers.patch tracing-hold-caller_addr-to-hardirq_-enable-disable-.patch cifs-revalidate-mapping-when-doing-direct-writes.patch cifs-don-t-send-down-the-destination-address-to-sendmsg-for-a-sock_stream.patch +maintainers-add-chandan-as-xfs-maintainer-for-5.4.y.patch +iomap-iomap-that-extends-beyond-eof-should-be-marked-dirty.patch +xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch +xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch +xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch +xfs-range-check-ri_cnt-when-recovering-log-items.patch +xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch +xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch +xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch +xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch +xfs-always-log-corruption-errors.patch +xfs-fix-some-memory-leaks-in-log-recovery.patch +xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch +xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch +xfs-refactor-agfl-length-computation-function.patch +xfs-split-the-sunit-parameter-update-into-two-parts.patch +xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch asoc-nau8824-fix-semaphore-unbalance-at-error-paths.patch regulator-pfuze100-fix-the-global-out-of-bounds-acce.patch rxrpc-fix-local-destruction-being-repeated.patch diff --git a/queue-5.4/xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch b/queue-5.4/xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch new file mode 100644 index 00000000000..cd2a33575cc --- /dev/null +++ b/queue-5.4/xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch @@ -0,0 +1,34 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:40 +0530 +Subject: xfs: add missing assert in xfs_fsmap_owner_from_rmap +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-6-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 110f09cb705af8c53f2a457baf771d2935ed62d4 upstream. + +The fsmap handler shouldn't fail silently if the rmap code ever feeds it +a special owner number that isn't known to the fsmap handler. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_fsmap.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -146,6 +146,7 @@ xfs_fsmap_owner_from_rmap( + dest->fmr_owner = XFS_FMR_OWN_FREE; + break; + default: ++ ASSERT(0); + return -EFSCORRUPTED; + } + return 0; diff --git a/queue-5.4/xfs-always-log-corruption-errors.patch b/queue-5.4/xfs-always-log-corruption-errors.patch new file mode 100644 index 00000000000..a585d4359b3 --- /dev/null +++ b/queue-5.4/xfs-always-log-corruption-errors.patch @@ -0,0 +1,834 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:46 +0530 +Subject: xfs: always log corruption errors +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-12-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit a5155b870d687de1a5f07e774b49b1e8ef0f6f50 upstream. + +Make sure we log something to dmesg whenever we return -EFSCORRUPTED up +the call stack. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Carlos Maiolino +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_alloc.c | 9 +++++++-- + fs/xfs/libxfs/xfs_attr_leaf.c | 12 +++++++++--- + fs/xfs/libxfs/xfs_bmap.c | 8 +++++++- + fs/xfs/libxfs/xfs_btree.c | 5 ++++- + fs/xfs/libxfs/xfs_da_btree.c | 24 ++++++++++++++++++------ + fs/xfs/libxfs/xfs_dir2.c | 4 +++- + fs/xfs/libxfs/xfs_dir2_leaf.c | 4 +++- + fs/xfs/libxfs/xfs_dir2_node.c | 12 +++++++++--- + fs/xfs/libxfs/xfs_inode_fork.c | 6 ++++++ + fs/xfs/libxfs/xfs_refcount.c | 4 +++- + fs/xfs/libxfs/xfs_rtbitmap.c | 6 ++++-- + fs/xfs/xfs_acl.c | 15 ++++++++++++--- + fs/xfs/xfs_attr_inactive.c | 6 +++++- + fs/xfs/xfs_attr_list.c | 5 ++++- + fs/xfs/xfs_bmap_item.c | 3 ++- + fs/xfs/xfs_error.c | 21 +++++++++++++++++++++ + fs/xfs/xfs_error.h | 1 + + fs/xfs/xfs_extfree_item.c | 3 ++- + fs/xfs/xfs_inode.c | 15 ++++++++++++--- + fs/xfs/xfs_inode_item.c | 5 ++++- + fs/xfs/xfs_iops.c | 10 +++++++--- + fs/xfs/xfs_log_recover.c | 23 ++++++++++++++++++----- + fs/xfs/xfs_qm.c | 13 +++++++++++-- + fs/xfs/xfs_refcount_item.c | 3 ++- + fs/xfs/xfs_rmap_item.c | 7 +++++-- + 25 files changed, 179 insertions(+), 45 deletions(-) + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -684,8 +684,10 @@ xfs_alloc_update_counters( + + xfs_trans_agblocks_delta(tp, len); + if (unlikely(be32_to_cpu(agf->agf_freeblks) > +- be32_to_cpu(agf->agf_length))) ++ be32_to_cpu(agf->agf_length))) { ++ xfs_buf_corruption_error(agbp); + return -EFSCORRUPTED; ++ } + + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); + return 0; +@@ -751,6 +753,7 @@ xfs_alloc_ag_vextent_small( + + bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); + if (!bp) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, args->mp); + error = -EFSCORRUPTED; + goto error; + } +@@ -2087,8 +2090,10 @@ xfs_free_agfl_block( + return error; + + bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); +- if (!bp) ++ if (!bp) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp); + return -EFSCORRUPTED; ++ } + xfs_trans_binval(tp, bp); + + return 0; +--- a/fs/xfs/libxfs/xfs_attr_leaf.c ++++ b/fs/xfs/libxfs/xfs_attr_leaf.c +@@ -2287,8 +2287,10 @@ xfs_attr3_leaf_lookup_int( + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); + entries = xfs_attr3_leaf_entryp(leaf); +- if (ichdr.count >= args->geo->blksize / 8) ++ if (ichdr.count >= args->geo->blksize / 8) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; ++ } + + /* + * Binary search. (note: small blocks will skip this loop) +@@ -2304,10 +2306,14 @@ xfs_attr3_leaf_lookup_int( + else + break; + } +- if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) ++ if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; +- if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) ++ } ++ if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; ++ } + + /* + * Since we may have duplicate hashval's, find the first matching +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -729,6 +729,7 @@ xfs_bmap_extents_to_btree( + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); + abp = xfs_btree_get_bufl(mp, tp, args.fsbno); + if (!abp) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto out_unreserve_dquot; + } +@@ -1084,6 +1085,7 @@ xfs_bmap_add_attrfork( + if (XFS_IFORK_Q(ip)) + goto trans_cancel; + if (ip->i_d.di_anextents != 0) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto trans_cancel; + } +@@ -1374,6 +1376,7 @@ xfs_bmap_last_before( + case XFS_DINODE_FMT_EXTENTS: + break; + default: ++ ASSERT(0); + return -EFSCORRUPTED; + } + +@@ -1474,8 +1477,10 @@ xfs_bmap_last_offset( + return 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && +- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) ++ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) { ++ ASSERT(0); + return -EFSCORRUPTED; ++ } + + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); + if (error || is_empty) +@@ -5872,6 +5877,7 @@ xfs_bmap_insert_extents( + del_cursor); + + if (stop_fsb >= got.br_startoff + got.br_blockcount) { ++ ASSERT(0); + error = -EFSCORRUPTED; + goto del_cursor; + } +--- a/fs/xfs/libxfs/xfs_btree.c ++++ b/fs/xfs/libxfs/xfs_btree.c +@@ -1820,6 +1820,7 @@ xfs_btree_lookup_get_block( + + out_bad: + *blkp = NULL; ++ xfs_buf_corruption_error(bp); + xfs_trans_brelse(cur->bc_tp, bp); + return -EFSCORRUPTED; + } +@@ -1867,8 +1868,10 @@ xfs_btree_lookup( + XFS_BTREE_STATS_INC(cur, lookup); + + /* No such thing as a zero-level tree. */ +- if (cur->bc_nlevels == 0) ++ if (cur->bc_nlevels == 0) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, cur->bc_mp); + return -EFSCORRUPTED; ++ } + + block = NULL; + keyno = 0; +--- a/fs/xfs/libxfs/xfs_da_btree.c ++++ b/fs/xfs/libxfs/xfs_da_btree.c +@@ -504,6 +504,7 @@ xfs_da3_split( + node = oldblk->bp->b_addr; + if (node->hdr.info.forw) { + if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { ++ xfs_buf_corruption_error(oldblk->bp); + error = -EFSCORRUPTED; + goto out; + } +@@ -516,6 +517,7 @@ xfs_da3_split( + node = oldblk->bp->b_addr; + if (node->hdr.info.back) { + if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { ++ xfs_buf_corruption_error(oldblk->bp); + error = -EFSCORRUPTED; + goto out; + } +@@ -1541,8 +1543,10 @@ xfs_da3_node_lookup_int( + break; + } + +- if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) ++ if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { ++ xfs_buf_corruption_error(blk->bp); + return -EFSCORRUPTED; ++ } + + blk->magic = XFS_DA_NODE_MAGIC; + +@@ -1554,15 +1558,18 @@ xfs_da3_node_lookup_int( + btree = dp->d_ops->node_tree_p(node); + + /* Tree taller than we can handle; bail out! */ +- if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) ++ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { ++ xfs_buf_corruption_error(blk->bp); + return -EFSCORRUPTED; ++ } + + /* Check the level from the root. */ + if (blkno == args->geo->leafblk) + expected_level = nodehdr.level - 1; +- else if (expected_level != nodehdr.level) ++ else if (expected_level != nodehdr.level) { ++ xfs_buf_corruption_error(blk->bp); + return -EFSCORRUPTED; +- else ++ } else + expected_level--; + + max = nodehdr.count; +@@ -1612,12 +1619,17 @@ xfs_da3_node_lookup_int( + } + + /* We can't point back to the root. */ +- if (blkno == args->geo->leafblk) ++ if (blkno == args->geo->leafblk) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ++ dp->i_mount); + return -EFSCORRUPTED; ++ } + } + +- if (expected_level != 0) ++ if (expected_level != 0) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, dp->i_mount); + return -EFSCORRUPTED; ++ } + + /* + * A leaf block that ends in the hashval that we are interested in +--- a/fs/xfs/libxfs/xfs_dir2.c ++++ b/fs/xfs/libxfs/xfs_dir2.c +@@ -600,8 +600,10 @@ xfs_dir2_isblock( + if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) + return rval; + rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; +- if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) ++ if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount); + return -EFSCORRUPTED; ++ } + *vp = rval; + return 0; + } +--- a/fs/xfs/libxfs/xfs_dir2_leaf.c ++++ b/fs/xfs/libxfs/xfs_dir2_leaf.c +@@ -1343,8 +1343,10 @@ xfs_dir2_leaf_removename( + oldbest = be16_to_cpu(bf[0].length); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + bestsp = xfs_dir2_leaf_bests_p(ltp); +- if (be16_to_cpu(bestsp[db]) != oldbest) ++ if (be16_to_cpu(bestsp[db]) != oldbest) { ++ xfs_buf_corruption_error(lbp); + return -EFSCORRUPTED; ++ } + /* + * Mark the former data entry unused. + */ +--- a/fs/xfs/libxfs/xfs_dir2_node.c ++++ b/fs/xfs/libxfs/xfs_dir2_node.c +@@ -374,8 +374,10 @@ xfs_dir2_leaf_to_node( + leaf = lbp->b_addr; + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + if (be32_to_cpu(ltp->bestcount) > +- (uint)dp->i_d.di_size / args->geo->blksize) ++ (uint)dp->i_d.di_size / args->geo->blksize) { ++ xfs_buf_corruption_error(lbp); + return -EFSCORRUPTED; ++ } + + /* + * Copy freespace entries from the leaf block to the new block. +@@ -446,8 +448,10 @@ xfs_dir2_leafn_add( + * Quick check just to make sure we are not going to index + * into other peoples memory + */ +- if (index < 0) ++ if (index < 0) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; ++ } + + /* + * If there are already the maximum number of leaf entries in +@@ -740,8 +744,10 @@ xfs_dir2_leafn_lookup_for_entry( + ents = dp->d_ops->leaf_ents_p(leaf); + + xfs_dir3_leaf_check(dp, bp); +- if (leafhdr.count <= 0) ++ if (leafhdr.count <= 0) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; ++ } + + /* + * Look up the hash value in the leaf entries. +--- a/fs/xfs/libxfs/xfs_inode_fork.c ++++ b/fs/xfs/libxfs/xfs_inode_fork.c +@@ -75,11 +75,15 @@ xfs_iformat_fork( + error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); + break; + default: ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, ++ dip, sizeof(*dip), __this_address); + return -EFSCORRUPTED; + } + break; + + default: ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, ++ sizeof(*dip), __this_address); + return -EFSCORRUPTED; + } + if (error) +@@ -110,6 +114,8 @@ xfs_iformat_fork( + error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); + break; + default: ++ xfs_inode_verifier_error(ip, error, __func__, dip, ++ sizeof(*dip), __this_address); + error = -EFSCORRUPTED; + break; + } +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -1591,8 +1591,10 @@ xfs_refcount_recover_extent( + struct list_head *debris = priv; + struct xfs_refcount_recovery *rr; + +- if (be32_to_cpu(rec->refc.rc_refcount) != 1) ++ if (be32_to_cpu(rec->refc.rc_refcount) != 1) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, cur->bc_mp); + return -EFSCORRUPTED; ++ } + + rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); + xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); +--- a/fs/xfs/libxfs/xfs_rtbitmap.c ++++ b/fs/xfs/libxfs/xfs_rtbitmap.c +@@ -15,7 +15,7 @@ + #include "xfs_bmap.h" + #include "xfs_trans.h" + #include "xfs_rtalloc.h" +- ++#include "xfs_error.h" + + /* + * Realtime allocator bitmap functions shared with userspace. +@@ -70,8 +70,10 @@ xfs_rtbuf_get( + if (error) + return error; + +- if (nmap == 0 || !xfs_bmap_is_real_extent(&map)) ++ if (nmap == 0 || !xfs_bmap_is_real_extent(&map)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; ++ } + + ASSERT(map.br_startblock != NULLFSBLOCK); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, +--- a/fs/xfs/xfs_acl.c ++++ b/fs/xfs/xfs_acl.c +@@ -12,6 +12,7 @@ + #include "xfs_inode.h" + #include "xfs_attr.h" + #include "xfs_trace.h" ++#include "xfs_error.h" + #include + + +@@ -23,6 +24,7 @@ + + STATIC struct posix_acl * + xfs_acl_from_disk( ++ struct xfs_mount *mp, + const struct xfs_acl *aclp, + int len, + int max_entries) +@@ -32,11 +34,18 @@ xfs_acl_from_disk( + const struct xfs_acl_entry *ace; + unsigned int count, i; + +- if (len < sizeof(*aclp)) ++ if (len < sizeof(*aclp)) { ++ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, aclp, ++ len); + return ERR_PTR(-EFSCORRUPTED); ++ } ++ + count = be32_to_cpu(aclp->acl_cnt); +- if (count > max_entries || XFS_ACL_SIZE(count) != len) ++ if (count > max_entries || XFS_ACL_SIZE(count) != len) { ++ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, aclp, ++ len); + return ERR_PTR(-EFSCORRUPTED); ++ } + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) +@@ -145,7 +154,7 @@ xfs_get_acl(struct inode *inode, int typ + if (error != -ENOATTR) + acl = ERR_PTR(error); + } else { +- acl = xfs_acl_from_disk(xfs_acl, len, ++ acl = xfs_acl_from_disk(ip->i_mount, xfs_acl, len, + XFS_ACL_MAX_ENTRIES(ip->i_mount)); + kmem_free(xfs_acl); + } +--- a/fs/xfs/xfs_attr_inactive.c ++++ b/fs/xfs/xfs_attr_inactive.c +@@ -22,6 +22,7 @@ + #include "xfs_attr_leaf.h" + #include "xfs_quota.h" + #include "xfs_dir2.h" ++#include "xfs_error.h" + + /* + * Look at all the extents for this logical region, +@@ -209,6 +210,7 @@ xfs_attr3_node_inactive( + */ + if (level > XFS_DA_NODE_MAXDEPTH) { + xfs_trans_brelse(*trans, bp); /* no locks for later trans */ ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; + } + +@@ -258,8 +260,9 @@ xfs_attr3_node_inactive( + error = xfs_attr3_leaf_inactive(trans, dp, child_bp); + break; + default: +- error = -EFSCORRUPTED; ++ xfs_buf_corruption_error(child_bp); + xfs_trans_brelse(*trans, child_bp); ++ error = -EFSCORRUPTED; + break; + } + if (error) +@@ -342,6 +345,7 @@ xfs_attr3_root_inactive( + break; + default: + error = -EFSCORRUPTED; ++ xfs_buf_corruption_error(bp); + xfs_trans_brelse(*trans, bp); + break; + } +--- a/fs/xfs/xfs_attr_list.c ++++ b/fs/xfs/xfs_attr_list.c +@@ -258,8 +258,10 @@ xfs_attr_node_list_lookup( + return 0; + + /* We can't point back to the root. */ +- if (cursor->blkno == 0) ++ if (cursor->blkno == 0) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; ++ } + } + + if (expected_level != 0) +@@ -269,6 +271,7 @@ xfs_attr_node_list_lookup( + return 0; + + out_corruptbuf: ++ xfs_buf_corruption_error(bp); + xfs_trans_brelse(tp, bp); + return -EFSCORRUPTED; + } +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -21,7 +21,7 @@ + #include "xfs_icache.h" + #include "xfs_bmap_btree.h" + #include "xfs_trans_space.h" +- ++#include "xfs_error.h" + + kmem_zone_t *xfs_bui_zone; + kmem_zone_t *xfs_bud_zone; +@@ -525,6 +525,7 @@ xfs_bui_recover( + type = bui_type; + break; + default: ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto err_inode; + } +--- a/fs/xfs/xfs_error.c ++++ b/fs/xfs/xfs_error.c +@@ -342,6 +342,27 @@ xfs_corruption_error( + } + + /* ++ * Complain about the kinds of metadata corruption that we can't detect from a ++ * verifier, such as incorrect inter-block relationship data. Does not set ++ * bp->b_error. ++ */ ++void ++xfs_buf_corruption_error( ++ struct xfs_buf *bp) ++{ ++ struct xfs_mount *mp = bp->b_mount; ++ ++ xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, ++ "Metadata corruption detected at %pS, %s block 0x%llx", ++ __return_address, bp->b_ops->name, bp->b_bn); ++ ++ xfs_alert(mp, "Unmount and run xfs_repair"); ++ ++ if (xfs_error_level >= XFS_ERRLEVEL_HIGH) ++ xfs_stack_trace(); ++} ++ ++/* + * Warnings specifically for verifier errors. Differentiate CRC vs. invalid + * values, and omit the stack trace unless the error level is tuned high. + */ +--- a/fs/xfs/xfs_error.h ++++ b/fs/xfs/xfs_error.h +@@ -15,6 +15,7 @@ extern void xfs_corruption_error(const c + struct xfs_mount *mp, const void *buf, size_t bufsize, + const char *filename, int linenum, + xfs_failaddr_t failaddr); ++void xfs_buf_corruption_error(struct xfs_buf *bp); + extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error, + const char *name, const void *buf, size_t bufsz, + xfs_failaddr_t failaddr); +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -21,7 +21,7 @@ + #include "xfs_alloc.h" + #include "xfs_bmap.h" + #include "xfs_trace.h" +- ++#include "xfs_error.h" + + kmem_zone_t *xfs_efi_zone; + kmem_zone_t *xfs_efd_zone; +@@ -228,6 +228,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf + } + return 0; + } ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; + } + +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -2149,8 +2149,10 @@ xfs_iunlink_update_bucket( + * passed in because either we're adding or removing ourselves from the + * head of the list. + */ +- if (old_value == new_agino) ++ if (old_value == new_agino) { ++ xfs_buf_corruption_error(agibp); + return -EFSCORRUPTED; ++ } + + agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); + offset = offsetof(struct xfs_agi, agi_unlinked) + +@@ -2213,6 +2215,8 @@ xfs_iunlink_update_inode( + /* Make sure the old pointer isn't garbage. */ + old_value = be32_to_cpu(dip->di_next_unlinked); + if (!xfs_verify_agino_or_null(mp, agno, old_value)) { ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, ++ sizeof(*dip), __this_address); + error = -EFSCORRUPTED; + goto out; + } +@@ -2224,8 +2228,11 @@ xfs_iunlink_update_inode( + */ + *old_next_agino = old_value; + if (old_value == next_agino) { +- if (next_agino != NULLAGINO) ++ if (next_agino != NULLAGINO) { ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, ++ dip, sizeof(*dip), __this_address); + error = -EFSCORRUPTED; ++ } + goto out; + } + +@@ -2276,8 +2283,10 @@ xfs_iunlink( + */ + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (next_agino == agino || +- !xfs_verify_agino_or_null(mp, agno, next_agino)) ++ !xfs_verify_agino_or_null(mp, agno, next_agino)) { ++ xfs_buf_corruption_error(agibp); + return -EFSCORRUPTED; ++ } + + if (next_agino != NULLAGINO) { + struct xfs_perag *pag; +--- a/fs/xfs/xfs_inode_item.c ++++ b/fs/xfs/xfs_inode_item.c +@@ -17,6 +17,7 @@ + #include "xfs_trans_priv.h" + #include "xfs_buf_item.h" + #include "xfs_log.h" ++#include "xfs_error.h" + + #include + +@@ -828,8 +829,10 @@ xfs_inode_item_format_convert( + { + struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; + +- if (buf->i_len != sizeof(*in_f32)) ++ if (buf->i_len != sizeof(*in_f32)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; ++ } + + in_f->ilf_type = in_f32->ilf_type; + in_f->ilf_size = in_f32->ilf_size; +--- a/fs/xfs/xfs_iops.c ++++ b/fs/xfs/xfs_iops.c +@@ -20,6 +20,7 @@ + #include "xfs_symlink.h" + #include "xfs_dir2.h" + #include "xfs_iomap.h" ++#include "xfs_error.h" + + #include + #include +@@ -470,17 +471,20 @@ xfs_vn_get_link_inline( + struct inode *inode, + struct delayed_call *done) + { ++ struct xfs_inode *ip = XFS_I(inode); + char *link; + +- ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); ++ ASSERT(ip->i_df.if_flags & XFS_IFINLINE); + + /* + * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if + * if_data is junk. + */ +- link = XFS_I(inode)->i_df.if_u1.if_data; +- if (!link) ++ link = ip->i_df.if_u1.if_data; ++ if (!link) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ip->i_mount); + return ERR_PTR(-EFSCORRUPTED); ++ } + return link; + } + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -3537,6 +3537,7 @@ xfs_cui_copy_format( + memcpy(dst_cui_fmt, src_cui_fmt, len); + return 0; + } ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; + } + +@@ -3601,8 +3602,10 @@ xlog_recover_cud_pass2( + struct xfs_ail *ailp = log->l_ailp; + + cud_formatp = item->ri_buf[0].i_addr; +- if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) ++ if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; ++ } + cui_id = cud_formatp->cud_cui_id; + + /* +@@ -3654,6 +3657,7 @@ xfs_bui_copy_format( + memcpy(dst_bui_fmt, src_bui_fmt, len); + return 0; + } ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; + } + +@@ -3677,8 +3681,10 @@ xlog_recover_bui_pass2( + + bui_formatp = item->ri_buf[0].i_addr; + +- if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) ++ if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; ++ } + buip = xfs_bui_init(mp); + error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); + if (error) { +@@ -3720,8 +3726,10 @@ xlog_recover_bud_pass2( + struct xfs_ail *ailp = log->l_ailp; + + bud_formatp = item->ri_buf[0].i_addr; +- if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) ++ if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; ++ } + bui_id = bud_formatp->bud_bui_id; + + /* +@@ -5181,8 +5189,10 @@ xlog_recover_process( + * If the filesystem is CRC enabled, this mismatch becomes a + * fatal log corruption failure. + */ +- if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) ++ if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; ++ } + } + + xlog_unpack_data(rhead, dp, log); +@@ -5305,8 +5315,11 @@ xlog_do_recovery_pass( + "invalid iclog size (%d bytes), using lsunit (%d bytes)", + h_size, log->l_mp->m_logbsize); + h_size = log->l_mp->m_logbsize; +- } else ++ } else { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ++ log->l_mp); + return -EFSCORRUPTED; ++ } + } + + if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && +--- a/fs/xfs/xfs_qm.c ++++ b/fs/xfs/xfs_qm.c +@@ -22,6 +22,7 @@ + #include "xfs_qm.h" + #include "xfs_trace.h" + #include "xfs_icache.h" ++#include "xfs_error.h" + + /* + * The global quota manager. There is only one of these for the entire +@@ -754,11 +755,19 @@ xfs_qm_qino_alloc( + if ((flags & XFS_QMOPT_PQUOTA) && + (mp->m_sb.sb_gquotino != NULLFSINO)) { + ino = mp->m_sb.sb_gquotino; +- ASSERT(mp->m_sb.sb_pquotino == NULLFSINO); ++ if (mp->m_sb.sb_pquotino != NULLFSINO) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ++ mp); ++ return -EFSCORRUPTED; ++ } + } else if ((flags & XFS_QMOPT_GQUOTA) && + (mp->m_sb.sb_pquotino != NULLFSINO)) { + ino = mp->m_sb.sb_pquotino; +- ASSERT(mp->m_sb.sb_gquotino == NULLFSINO); ++ if (mp->m_sb.sb_gquotino != NULLFSINO) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ++ mp); ++ return -EFSCORRUPTED; ++ } + } + if (ino != NULLFSINO) { + error = xfs_iget(mp, NULL, ino, 0, 0, ip); +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -17,7 +17,7 @@ + #include "xfs_refcount_item.h" + #include "xfs_log.h" + #include "xfs_refcount.h" +- ++#include "xfs_error.h" + + kmem_zone_t *xfs_cui_zone; + kmem_zone_t *xfs_cud_zone; +@@ -536,6 +536,7 @@ xfs_cui_recover( + type = refc_type; + break; + default: ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto abort_error; + } +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -17,7 +17,7 @@ + #include "xfs_rmap_item.h" + #include "xfs_log.h" + #include "xfs_rmap.h" +- ++#include "xfs_error.h" + + kmem_zone_t *xfs_rui_zone; + kmem_zone_t *xfs_rud_zone; +@@ -171,8 +171,10 @@ xfs_rui_copy_format( + src_rui_fmt = buf->i_addr; + len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); + +- if (buf->i_len != len) ++ if (buf->i_len != len) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; ++ } + + memcpy(dst_rui_fmt, src_rui_fmt, len); + return 0; +@@ -581,6 +583,7 @@ xfs_rui_recover( + type = XFS_RMAP_FREE; + break; + default: ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + error = -EFSCORRUPTED; + goto abort_error; + } diff --git a/queue-5.4/xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch b/queue-5.4/xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch new file mode 100644 index 00000000000..7583269ef66 --- /dev/null +++ b/queue-5.4/xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch @@ -0,0 +1,57 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:42 +0530 +Subject: xfs: attach dquots and reserve quota blocks during unwritten conversion +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-8-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 2815a16d7ff6230a8e37928829d221bb075aa160 upstream. + +In xfs_iomap_write_unwritten, we need to ensure that dquots are attached +to the inode and quota blocks reserved so that we capture in the quota +counters any blocks allocated to handle a bmbt split. This can happen +on the first unwritten extent conversion to a preallocated sparse file +on a fresh mount. + +This was found by running generic/311 with quotas enabled. The bug +seems to have been introduced in "[XFS] rework iocore infrastructure, +remove some code and make it more" from ~2002? + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_iomap.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/fs/xfs/xfs_iomap.c ++++ b/fs/xfs/xfs_iomap.c +@@ -765,6 +765,11 @@ xfs_iomap_write_unwritten( + */ + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; + ++ /* Attach dquots so that bmbt splits are accounted correctly. */ ++ error = xfs_qm_dqattach(ip); ++ if (error) ++ return error; ++ + do { + /* + * Set up a transaction to convert the range of extents +@@ -783,6 +788,11 @@ xfs_iomap_write_unwritten( + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + ++ error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, ++ XFS_QMOPT_RES_REGBLKS); ++ if (error) ++ goto error_on_bmapi_transaction; ++ + /* + * Modify the unwritten extent state of the buffer. + */ diff --git a/queue-5.4/xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch b/queue-5.4/xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch new file mode 100644 index 00000000000..d3bf73a14fb --- /dev/null +++ b/queue-5.4/xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch @@ -0,0 +1,102 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:45 +0530 +Subject: xfs: constify the buffer pointer arguments to error functions +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-11-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit d243b89a611e83dc97ce7102419360677a664076 upstream. + +Some of the xfs error message functions take a pointer to a buffer that +will be dumped to the system log. The logging functions don't change +the contents, so constify all the parameters. This enables the next +patch to ensure that we log bad metadata when we encounter it. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Carlos Maiolino +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_error.c | 6 +++--- + fs/xfs/xfs_error.h | 6 +++--- + fs/xfs/xfs_message.c | 2 +- + fs/xfs/xfs_message.h | 2 +- + 4 files changed, 8 insertions(+), 8 deletions(-) + +--- a/fs/xfs/xfs_error.c ++++ b/fs/xfs/xfs_error.c +@@ -329,7 +329,7 @@ xfs_corruption_error( + const char *tag, + int level, + struct xfs_mount *mp, +- void *buf, ++ const void *buf, + size_t bufsize, + const char *filename, + int linenum, +@@ -350,7 +350,7 @@ xfs_buf_verifier_error( + struct xfs_buf *bp, + int error, + const char *name, +- void *buf, ++ const void *buf, + size_t bufsz, + xfs_failaddr_t failaddr) + { +@@ -402,7 +402,7 @@ xfs_inode_verifier_error( + struct xfs_inode *ip, + int error, + const char *name, +- void *buf, ++ const void *buf, + size_t bufsz, + xfs_failaddr_t failaddr) + { +--- a/fs/xfs/xfs_error.h ++++ b/fs/xfs/xfs_error.h +@@ -12,16 +12,16 @@ extern void xfs_error_report(const char + const char *filename, int linenum, + xfs_failaddr_t failaddr); + extern void xfs_corruption_error(const char *tag, int level, +- struct xfs_mount *mp, void *buf, size_t bufsize, ++ struct xfs_mount *mp, const void *buf, size_t bufsize, + const char *filename, int linenum, + xfs_failaddr_t failaddr); + extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error, +- const char *name, void *buf, size_t bufsz, ++ const char *name, const void *buf, size_t bufsz, + xfs_failaddr_t failaddr); + extern void xfs_verifier_error(struct xfs_buf *bp, int error, + xfs_failaddr_t failaddr); + extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, +- const char *name, void *buf, size_t bufsz, ++ const char *name, const void *buf, size_t bufsz, + xfs_failaddr_t failaddr); + + #define XFS_ERROR_REPORT(e, lvl, mp) \ +--- a/fs/xfs/xfs_message.c ++++ b/fs/xfs/xfs_message.c +@@ -105,7 +105,7 @@ assfail(char *expr, char *file, int line + } + + void +-xfs_hex_dump(void *p, int length) ++xfs_hex_dump(const void *p, int length) + { + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); + } +--- a/fs/xfs/xfs_message.h ++++ b/fs/xfs/xfs_message.h +@@ -60,6 +60,6 @@ do { \ + extern void assfail(char *expr, char *f, int l); + extern void asswarn(char *expr, char *f, int l); + +-extern void xfs_hex_dump(void *p, int length); ++extern void xfs_hex_dump(const void *p, int length); + + #endif /* __XFS_MESSAGE_H */ diff --git a/queue-5.4/xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch b/queue-5.4/xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch new file mode 100644 index 00000000000..5d0eab6a371 --- /dev/null +++ b/queue-5.4/xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch @@ -0,0 +1,222 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:44 +0530 +Subject: xfs: convert EIO to EFSCORRUPTED when log contents are invalid +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-10-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 895e196fb6f84402dcd0c1d3c3feb8a58049564e upstream. + +Convert EIO to EFSCORRUPTED in the logging code when we can determine +that the log contents are invalid. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_bmap_item.c | 4 ++-- + fs/xfs/xfs_extfree_item.c | 2 +- + fs/xfs/xfs_log_recover.c | 32 ++++++++++++++++---------------- + fs/xfs/xfs_refcount_item.c | 2 +- + fs/xfs/xfs_rmap_item.c | 2 +- + 5 files changed, 21 insertions(+), 21 deletions(-) + +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -456,7 +456,7 @@ xfs_bui_recover( + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { + set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); + xfs_bui_release(buip); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* +@@ -490,7 +490,7 @@ xfs_bui_recover( + */ + set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); + xfs_bui_release(buip); +- return -EIO; ++ return -EFSCORRUPTED; + } + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -624,7 +624,7 @@ xfs_efi_recover( + */ + set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); + xfs_efi_release(efip); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -471,7 +471,7 @@ xlog_find_verify_log_record( + xfs_warn(log->l_mp, + "Log inconsistent (didn't find previous header)"); + ASSERT(0); +- error = -EIO; ++ error = -EFSCORRUPTED; + goto out; + } + +@@ -1350,7 +1350,7 @@ xlog_find_tail( + return error; + if (!error) { + xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); +- return -EIO; ++ return -EFSCORRUPTED; + } + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); + +@@ -3166,7 +3166,7 @@ xlog_recover_inode_pass2( + default: + xfs_warn(log->l_mp, "%s: Invalid flag", __func__); + ASSERT(0); +- error = -EIO; ++ error = -EFSCORRUPTED; + goto out_release; + } + } +@@ -3247,12 +3247,12 @@ xlog_recover_dquot_pass2( + recddq = item->ri_buf[1].i_addr; + if (recddq == NULL) { + xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); +- return -EIO; ++ return -EFSCORRUPTED; + } + if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { + xfs_alert(log->l_mp, "dquot too small (%d) in %s.", + item->ri_buf[1].i_len, __func__); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* +@@ -3279,7 +3279,7 @@ xlog_recover_dquot_pass2( + if (fa) { + xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", + dq_f->qlf_id, fa); +- return -EIO; ++ return -EFSCORRUPTED; + } + ASSERT(dq_f->qlf_len == 1); + +@@ -4018,7 +4018,7 @@ xlog_recover_commit_pass1( + xfs_warn(log->l_mp, "%s: invalid item type (%d)", + __func__, ITEM_TYPE(item)); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + +@@ -4066,7 +4066,7 @@ xlog_recover_commit_pass2( + xfs_warn(log->l_mp, "%s: invalid item type (%d)", + __func__, ITEM_TYPE(item)); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + +@@ -4187,7 +4187,7 @@ xlog_recover_add_to_cont_trans( + ASSERT(len <= sizeof(struct xfs_trans_header)); + if (len > sizeof(struct xfs_trans_header)) { + xfs_warn(log->l_mp, "%s: bad header length", __func__); +- return -EIO; ++ return -EFSCORRUPTED; + } + + xlog_recover_add_item(&trans->r_itemq); +@@ -4243,13 +4243,13 @@ xlog_recover_add_to_trans( + xfs_warn(log->l_mp, "%s: bad header magic number", + __func__); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + + if (len > sizeof(struct xfs_trans_header)) { + xfs_warn(log->l_mp, "%s: bad header length", __func__); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* +@@ -4285,7 +4285,7 @@ xlog_recover_add_to_trans( + in_f->ilf_size); + ASSERT(0); + kmem_free(ptr); +- return -EIO; ++ return -EFSCORRUPTED; + } + + item->ri_total = in_f->ilf_size; +@@ -4389,7 +4389,7 @@ xlog_recovery_process_trans( + default: + xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); + ASSERT(0); +- error = -EIO; ++ error = -EFSCORRUPTED; + break; + } + if (error || freeit) +@@ -4469,7 +4469,7 @@ xlog_recover_process_ophdr( + xfs_warn(log->l_mp, "%s: bad clientid 0x%x", + __func__, ohead->oh_clientid); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* +@@ -4479,7 +4479,7 @@ xlog_recover_process_ophdr( + if (dp + len > end) { + xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); + WARN_ON(1); +- return -EIO; ++ return -EFSCORRUPTED; + } + + trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); +@@ -5209,7 +5209,7 @@ xlog_valid_rec_header( + (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { + xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", + __func__, be32_to_cpu(rhead->h_version)); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* LR body must have data or it wouldn't have been written */ +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -497,7 +497,7 @@ xfs_cui_recover( + */ + set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); + xfs_cui_release(cuip); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -539,7 +539,7 @@ xfs_rui_recover( + */ + set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); + xfs_rui_release(ruip); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + diff --git a/queue-5.4/xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch b/queue-5.4/xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch new file mode 100644 index 00000000000..54c91b42a91 --- /dev/null +++ b/queue-5.4/xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch @@ -0,0 +1,230 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:52 +0530 +Subject: xfs: don't commit sunit/swidth updates to disk if that would cause repair failures +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-18-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 13eaec4b2adf2657b8167b67e27c97cc7314d923 upstream. + +Alex Lyakas reported[1] that mounting an xfs filesystem with new sunit +and swidth values could cause xfs_repair to fail loudly. The problem +here is that repair calculates the where mkfs should have allocated the +root inode, based on the superblock geometry. The allocation decisions +depend on sunit, which means that we really can't go updating sunit if +it would lead to a subsequent repair failure on an otherwise correct +filesystem. + +Port from xfs_repair some code that computes the location of the root +inode and teach mount to skip the ondisk update if it would cause +problems for repair. Along the way we'll update the documentation, +provide a function for computing the minimum AGFL size instead of +open-coding it, and cut down some indenting in the mount code. + +Note that we allow the mount to proceed (and new allocations will +reflect this new geometry) because we've never screened this kind of +thing before. We'll have to wait for a new future incompat feature to +enforce correct behavior, alas. + +Note that the geometry reporting always uses the superblock values, not +the incore ones, so that is what xfs_info and xfs_growfs will report. + +[1] https://lore.kernel.org/linux-xfs/20191125130744.GA44777@bfoster/T/#m00f9594b511e076e2fcdd489d78bc30216d72a7d + +Reported-by: Alex Lyakas +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_ialloc.c | 64 +++++++++++++++++++++++++++++++++++++++++++++ + fs/xfs/libxfs/xfs_ialloc.h | 1 + fs/xfs/xfs_mount.c | 45 ++++++++++++++++++++++++++++++- + fs/xfs/xfs_trace.h | 21 ++++++++++++++ + 4 files changed, 130 insertions(+), 1 deletion(-) + +--- a/fs/xfs/libxfs/xfs_ialloc.c ++++ b/fs/xfs/libxfs/xfs_ialloc.c +@@ -2854,3 +2854,67 @@ xfs_ialloc_setup_geometry( + else + igeo->ialloc_align = 0; + } ++ ++/* Compute the location of the root directory inode that is laid out by mkfs. */ ++xfs_ino_t ++xfs_ialloc_calc_rootino( ++ struct xfs_mount *mp, ++ int sunit) ++{ ++ struct xfs_ino_geometry *igeo = M_IGEO(mp); ++ xfs_agblock_t first_bno; ++ ++ /* ++ * Pre-calculate the geometry of AG 0. We know what it looks like ++ * because libxfs knows how to create allocation groups now. ++ * ++ * first_bno is the first block in which mkfs could possibly have ++ * allocated the root directory inode, once we factor in the metadata ++ * that mkfs formats before it. Namely, the four AG headers... ++ */ ++ first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize); ++ ++ /* ...the two free space btree roots... */ ++ first_bno += 2; ++ ++ /* ...the inode btree root... */ ++ first_bno += 1; ++ ++ /* ...the initial AGFL... */ ++ first_bno += xfs_alloc_min_freelist(mp, NULL); ++ ++ /* ...the free inode btree root... */ ++ if (xfs_sb_version_hasfinobt(&mp->m_sb)) ++ first_bno++; ++ ++ /* ...the reverse mapping btree root... */ ++ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) ++ first_bno++; ++ ++ /* ...the reference count btree... */ ++ if (xfs_sb_version_hasreflink(&mp->m_sb)) ++ first_bno++; ++ ++ /* ++ * ...and the log, if it is allocated in the first allocation group. ++ * ++ * This can happen with filesystems that only have a single ++ * allocation group, or very odd geometries created by old mkfs ++ * versions on very small filesystems. ++ */ ++ if (mp->m_sb.sb_logstart && ++ XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) ++ first_bno += mp->m_sb.sb_logblocks; ++ ++ /* ++ * Now round first_bno up to whatever allocation alignment is given ++ * by the filesystem or was passed in. ++ */ ++ if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0) ++ first_bno = roundup(first_bno, sunit); ++ else if (xfs_sb_version_hasalign(&mp->m_sb) && ++ mp->m_sb.sb_inoalignmt > 1) ++ first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt); ++ ++ return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno)); ++} +--- a/fs/xfs/libxfs/xfs_ialloc.h ++++ b/fs/xfs/libxfs/xfs_ialloc.h +@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btre + + int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); + void xfs_ialloc_setup_geometry(struct xfs_mount *mp); ++xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); + + #endif /* __XFS_IALLOC_H__ */ +--- a/fs/xfs/xfs_mount.c ++++ b/fs/xfs/xfs_mount.c +@@ -31,7 +31,7 @@ + #include "xfs_reflink.h" + #include "xfs_extent_busy.h" + #include "xfs_health.h" +- ++#include "xfs_trace.h" + + static DEFINE_MUTEX(xfs_uuid_table_mutex); + static int xfs_uuid_table_size; +@@ -365,6 +365,42 @@ release_buf: + } + + /* ++ * If the sunit/swidth change would move the precomputed root inode value, we ++ * must reject the ondisk change because repair will stumble over that. ++ * However, we allow the mount to proceed because we never rejected this ++ * combination before. Returns true to update the sb, false otherwise. ++ */ ++static inline int ++xfs_check_new_dalign( ++ struct xfs_mount *mp, ++ int new_dalign, ++ bool *update_sb) ++{ ++ struct xfs_sb *sbp = &mp->m_sb; ++ xfs_ino_t calc_ino; ++ ++ calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign); ++ trace_xfs_check_new_dalign(mp, new_dalign, calc_ino); ++ ++ if (sbp->sb_rootino == calc_ino) { ++ *update_sb = true; ++ return 0; ++ } ++ ++ xfs_warn(mp, ++"Cannot change stripe alignment; would require moving root inode."); ++ ++ /* ++ * XXX: Next time we add a new incompat feature, this should start ++ * returning -EINVAL to fail the mount. Until then, spit out a warning ++ * that we're ignoring the administrator's instructions. ++ */ ++ xfs_warn(mp, "Skipping superblock stripe alignment update."); ++ *update_sb = false; ++ return 0; ++} ++ ++/* + * If we were provided with new sunit/swidth values as mount options, make sure + * that they pass basic alignment and superblock feature checks, and convert + * them into the same units (FSB) that everything else expects. This step +@@ -424,10 +460,17 @@ xfs_update_alignment( + struct xfs_sb *sbp = &mp->m_sb; + + if (mp->m_dalign) { ++ bool update_sb; ++ int error; ++ + if (sbp->sb_unit == mp->m_dalign && + sbp->sb_width == mp->m_swidth) + return 0; + ++ error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb); ++ if (error || !update_sb) ++ return error; ++ + sbp->sb_unit = mp->m_dalign; + sbp->sb_width = mp->m_swidth; + mp->m_update_sb = true; +--- a/fs/xfs/xfs_trace.h ++++ b/fs/xfs/xfs_trace.h +@@ -3609,6 +3609,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large); + DEFINE_KMEM_EVENT(kmem_realloc); + DEFINE_KMEM_EVENT(kmem_zone_alloc); + ++TRACE_EVENT(xfs_check_new_dalign, ++ TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino), ++ TP_ARGS(mp, new_dalign, calc_rootino), ++ TP_STRUCT__entry( ++ __field(dev_t, dev) ++ __field(int, new_dalign) ++ __field(xfs_ino_t, sb_rootino) ++ __field(xfs_ino_t, calc_rootino) ++ ), ++ TP_fast_assign( ++ __entry->dev = mp->m_super->s_dev; ++ __entry->new_dalign = new_dalign; ++ __entry->sb_rootino = mp->m_sb.sb_rootino; ++ __entry->calc_rootino = calc_rootino; ++ ), ++ TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu", ++ MAJOR(__entry->dev), MINOR(__entry->dev), ++ __entry->new_dalign, __entry->sb_rootino, ++ __entry->calc_rootino) ++) ++ + #endif /* _TRACE_XFS_H */ + + #undef TRACE_INCLUDE_PATH diff --git a/queue-5.4/xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch b/queue-5.4/xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch new file mode 100644 index 00000000000..d051c0cb547 --- /dev/null +++ b/queue-5.4/xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch @@ -0,0 +1,133 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:43 +0530 +Subject: xfs: Fix deadlock between AGI and AGF when target_ip exists in xfs_rename() +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-9-chandan.babu@oracle.com> + +From: kaixuxia + +commit 93597ae8dac0149b5c00b787cba6bf7ba213e666 upstream. + +When target_ip exists in xfs_rename(), the xfs_dir_replace() call may +need to hold the AGF lock to allocate more blocks, and then invoking +the xfs_droplink() call to hold AGI lock to drop target_ip onto the +unlinked list, so we get the lock order AGF->AGI. This would break the +ordering constraint on AGI and AGF locking - inode allocation locks +the AGI, then can allocate a new extent for new inodes, locking the +AGF after the AGI. + +In this patch we check whether the replace operation need more +blocks firstly. If so, acquire the agi lock firstly to preserve +locking order(AGI/AGF). Actually, the locking order problem only +occurs when we are locking the AGI/AGF of the same AG. For multiple +AGs the AGI lock will be released after the transaction committed. + +Signed-off-by: kaixuxia +Reviewed-by: Darrick J. Wong +[darrick: reword the comment] +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_dir2.h | 2 ++ + fs/xfs/libxfs/xfs_dir2_sf.c | 28 +++++++++++++++++++++++----- + fs/xfs/xfs_inode.c | 17 +++++++++++++++++ + 3 files changed, 42 insertions(+), 5 deletions(-) + +--- a/fs/xfs/libxfs/xfs_dir2.h ++++ b/fs/xfs/libxfs/xfs_dir2.h +@@ -124,6 +124,8 @@ extern int xfs_dir_lookup(struct xfs_tra + extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_name *name, xfs_ino_t ino, + xfs_extlen_t tot); ++extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp, ++ xfs_ino_t inum); + extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_name *name, xfs_ino_t inum, + xfs_extlen_t tot); +--- a/fs/xfs/libxfs/xfs_dir2_sf.c ++++ b/fs/xfs/libxfs/xfs_dir2_sf.c +@@ -945,6 +945,27 @@ xfs_dir2_sf_removename( + } + + /* ++ * Check whether the sf dir replace operation need more blocks. ++ */ ++bool ++xfs_dir2_sf_replace_needblock( ++ struct xfs_inode *dp, ++ xfs_ino_t inum) ++{ ++ int newsize; ++ struct xfs_dir2_sf_hdr *sfp; ++ ++ if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL) ++ return false; ++ ++ sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data; ++ newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF; ++ ++ return inum > XFS_DIR2_MAX_SHORT_INUM && ++ sfp->i8count == 0 && newsize > XFS_IFORK_DSIZE(dp); ++} ++ ++/* + * Replace the inode number of an entry in a shortform directory. + */ + int /* error */ +@@ -980,17 +1001,14 @@ xfs_dir2_sf_replace( + */ + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { + int error; /* error return value */ +- int newsize; /* new inode size */ + +- newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF; + /* + * Won't fit as shortform, convert to block then do replace. + */ +- if (newsize > XFS_IFORK_DSIZE(dp)) { ++ if (xfs_dir2_sf_replace_needblock(dp, args->inumber)) { + error = xfs_dir2_sf_to_block(args); +- if (error) { ++ if (error) + return error; +- } + return xfs_dir2_block_replace(args); + } + /* +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -3215,6 +3215,7 @@ xfs_rename( + struct xfs_trans *tp; + struct xfs_inode *wip = NULL; /* whiteout inode */ + struct xfs_inode *inodes[__XFS_SORT_INODES]; ++ struct xfs_buf *agibp; + int num_inodes = __XFS_SORT_INODES; + bool new_parent = (src_dp != target_dp); + bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); +@@ -3379,6 +3380,22 @@ xfs_rename( + * In case there is already an entry with the same + * name at the destination directory, remove it first. + */ ++ ++ /* ++ * Check whether the replace operation will need to allocate ++ * blocks. This happens when the shortform directory lacks ++ * space and we have to convert it to a block format directory. ++ * When more blocks are necessary, we must lock the AGI first ++ * to preserve locking order (AGI -> AGF). ++ */ ++ if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) { ++ error = xfs_read_agi(mp, tp, ++ XFS_INO_TO_AGNO(mp, target_ip->i_ino), ++ &agibp); ++ if (error) ++ goto out_trans_cancel; ++ } ++ + error = xfs_dir_replace(tp, target_dp, target_name, + src_ip->i_ino, spaceres); + if (error) diff --git a/queue-5.4/xfs-fix-some-memory-leaks-in-log-recovery.patch b/queue-5.4/xfs-fix-some-memory-leaks-in-log-recovery.patch new file mode 100644 index 00000000000..61669d85570 --- /dev/null +++ b/queue-5.4/xfs-fix-some-memory-leaks-in-log-recovery.patch @@ -0,0 +1,50 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:47 +0530 +Subject: xfs: fix some memory leaks in log recovery +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-13-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 050552cbe06a3a9c3f977dcf11ff998ae1d5c2d5 upstream. + +Fix a few places where we xlog_alloc_buffer a buffer, hit an error, and +then bail out without freeing the buffer. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_recover.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -1347,10 +1347,11 @@ xlog_find_tail( + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, + &rhead_blk, &rhead, &wrapped); + if (error < 0) +- return error; ++ goto done; + if (!error) { + xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); +- return -EFSCORRUPTED; ++ error = -EFSCORRUPTED; ++ goto done; + } + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); + +@@ -5318,7 +5319,8 @@ xlog_do_recovery_pass( + } else { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, + log->l_mp); +- return -EFSCORRUPTED; ++ error = -EFSCORRUPTED; ++ goto bread_err1; + } + } + diff --git a/queue-5.4/xfs-range-check-ri_cnt-when-recovering-log-items.patch b/queue-5.4/xfs-range-check-ri_cnt-when-recovering-log-items.patch new file mode 100644 index 00000000000..12bc1cc0fb2 --- /dev/null +++ b/queue-5.4/xfs-range-check-ri_cnt-when-recovering-log-items.patch @@ -0,0 +1,47 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:41 +0530 +Subject: xfs: range check ri_cnt when recovering log items +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-7-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit d6abecb82573fed5f7e4b595b5c0bd37707d2848 upstream. + +Range check the region counter when we're reassembling regions from log +items during log recovery. In the old days ASSERT would halt the +kernel, but this isn't true any more so we have to make an explicit +error return. + +Coverity-id: 1132508 +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_recover.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -4293,7 +4293,16 @@ xlog_recover_add_to_trans( + kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), + 0); + } +- ASSERT(item->ri_total > item->ri_cnt); ++ ++ if (item->ri_total <= item->ri_cnt) { ++ xfs_warn(log->l_mp, ++ "log item region count (%d) overflowed size (%d)", ++ item->ri_cnt, item->ri_total); ++ ASSERT(0); ++ kmem_free(ptr); ++ return -EFSCORRUPTED; ++ } ++ + /* Description region is ri_buf[0] */ + item->ri_buf[item->ri_cnt].i_addr = ptr; + item->ri_buf[item->ri_cnt].i_len = len; diff --git a/queue-5.4/xfs-refactor-agfl-length-computation-function.patch b/queue-5.4/xfs-refactor-agfl-length-computation-function.patch new file mode 100644 index 00000000000..cc2217fb415 --- /dev/null +++ b/queue-5.4/xfs-refactor-agfl-length-computation-function.patch @@ -0,0 +1,65 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:50 +0530 +Subject: xfs: refactor agfl length computation function +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-16-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 1cac233cfe71f21e069705a4930c18e48d897be6 upstream. + +Refactor xfs_alloc_min_freelist to accept a NULL @pag argument, in which +case it returns the largest possible minimum length. This will be used +in an upcoming patch to compute the length of the AGFL at mkfs time. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_alloc.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -1998,24 +1998,32 @@ xfs_alloc_longest_free_extent( + return pag->pagf_flcount > 0 || pag->pagf_longest > 0; + } + ++/* ++ * Compute the minimum length of the AGFL in the given AG. If @pag is NULL, ++ * return the largest possible minimum length. ++ */ + unsigned int + xfs_alloc_min_freelist( + struct xfs_mount *mp, + struct xfs_perag *pag) + { ++ /* AG btrees have at least 1 level. */ ++ static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1}; ++ const uint8_t *levels = pag ? pag->pagf_levels : fake_levels; + unsigned int min_free; + ++ ASSERT(mp->m_ag_maxlevels > 0); ++ + /* space needed by-bno freespace btree */ +- min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, ++ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, + mp->m_ag_maxlevels); + /* space needed by-size freespace btree */ +- min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, ++ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, + mp->m_ag_maxlevels); + /* space needed reverse mapping used space btree */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) +- min_free += min_t(unsigned int, +- pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, +- mp->m_rmap_maxlevels); ++ min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, ++ mp->m_rmap_maxlevels); + + return min_free; + } diff --git a/queue-5.4/xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch b/queue-5.4/xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch new file mode 100644 index 00000000000..6c467db3150 --- /dev/null +++ b/queue-5.4/xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch @@ -0,0 +1,96 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:38 +0530 +Subject: xfs: replace -EIO with -EFSCORRUPTED for corrupt metadata +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-4-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit c2414ad6e66ab96b867309454498f7fb29b7e855 upstream. + +There are a few places where we return -EIO instead of -EFSCORRUPTED +when we find corrupt metadata. Fix those places. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_bmap.c | 6 +++--- + fs/xfs/xfs_attr_inactive.c | 6 +++--- + fs/xfs/xfs_dquot.c | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -1374,7 +1374,7 @@ xfs_bmap_last_before( + case XFS_DINODE_FMT_EXTENTS: + break; + default: +- return -EIO; ++ return -EFSCORRUPTED; + } + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { +@@ -1475,7 +1475,7 @@ xfs_bmap_last_offset( + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) +- return -EIO; ++ return -EFSCORRUPTED; + + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); + if (error || is_empty) +@@ -5872,7 +5872,7 @@ xfs_bmap_insert_extents( + del_cursor); + + if (stop_fsb >= got.br_startoff + got.br_blockcount) { +- error = -EIO; ++ error = -EFSCORRUPTED; + goto del_cursor; + } + +--- a/fs/xfs/xfs_attr_inactive.c ++++ b/fs/xfs/xfs_attr_inactive.c +@@ -209,7 +209,7 @@ xfs_attr3_node_inactive( + */ + if (level > XFS_DA_NODE_MAXDEPTH) { + xfs_trans_brelse(*trans, bp); /* no locks for later trans */ +- return -EIO; ++ return -EFSCORRUPTED; + } + + node = bp->b_addr; +@@ -258,7 +258,7 @@ xfs_attr3_node_inactive( + error = xfs_attr3_leaf_inactive(trans, dp, child_bp); + break; + default: +- error = -EIO; ++ error = -EFSCORRUPTED; + xfs_trans_brelse(*trans, child_bp); + break; + } +@@ -341,7 +341,7 @@ xfs_attr3_root_inactive( + error = xfs_attr3_leaf_inactive(trans, dp, bp); + break; + default: +- error = -EIO; ++ error = -EFSCORRUPTED; + xfs_trans_brelse(*trans, bp); + break; + } +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -1125,7 +1125,7 @@ xfs_qm_dqflush( + xfs_buf_relse(bp); + xfs_dqfunlock(dqp); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* This is the only portion of data that needs to persist */ diff --git a/queue-5.4/xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch b/queue-5.4/xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch new file mode 100644 index 00000000000..0d4de923a3f --- /dev/null +++ b/queue-5.4/xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch @@ -0,0 +1,41 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:39 +0530 +Subject: xfs: slightly tweak an assert in xfs_fs_map_blocks +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-5-chandan.babu@oracle.com> + +From: Christoph Hellwig + +commit 88cdb7147b21b2d8b4bd3f3d95ce0bffd73e1ac3 upstream. + +We should never see delalloc blocks for a pNFS layout, write or not. +Adjust the assert to check for that. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_pnfs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/xfs/xfs_pnfs.c ++++ b/fs/xfs/xfs_pnfs.c +@@ -147,11 +147,11 @@ xfs_fs_map_blocks( + if (error) + goto out_unlock; + ++ ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); ++ + if (write) { + enum xfs_prealloc_flags flags = 0; + +- ASSERT(imap.br_startblock != DELAYSTARTBLOCK); +- + if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { + /* + * xfs_iomap_write_direct() expects to take ownership of diff --git a/queue-5.4/xfs-split-the-sunit-parameter-update-into-two-parts.patch b/queue-5.4/xfs-split-the-sunit-parameter-update-into-two-parts.patch new file mode 100644 index 00000000000..309ae907bce --- /dev/null +++ b/queue-5.4/xfs-split-the-sunit-parameter-update-into-two-parts.patch @@ -0,0 +1,190 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:51 +0530 +Subject: xfs: split the sunit parameter update into two parts +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-17-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 4f5b1b3a8fa07dc8ecedfaf539b3deed8931a73e upstream. + +If the administrator provided a sunit= mount option, we need to validate +the raw parameter, convert the mount option units (512b blocks) into the +internal unit (fs blocks), and then validate that the (now cooked) +parameter doesn't screw anything up on disk. The incore inode geometry +computation can depend on the new sunit option, but a subsequent patch +will make validating the cooked value depends on the computed inode +geometry, so break the sunit update into two steps. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_mount.c | 123 +++++++++++++++++++++++++++++++---------------------- + 1 file changed, 72 insertions(+), 51 deletions(-) + +--- a/fs/xfs/xfs_mount.c ++++ b/fs/xfs/xfs_mount.c +@@ -365,66 +365,76 @@ release_buf: + } + + /* +- * Update alignment values based on mount options and sb values ++ * If we were provided with new sunit/swidth values as mount options, make sure ++ * that they pass basic alignment and superblock feature checks, and convert ++ * them into the same units (FSB) that everything else expects. This step ++ * /must/ be done before computing the inode geometry. + */ + STATIC int +-xfs_update_alignment(xfs_mount_t *mp) ++xfs_validate_new_dalign( ++ struct xfs_mount *mp) + { +- xfs_sb_t *sbp = &(mp->m_sb); ++ if (mp->m_dalign == 0) ++ return 0; + +- if (mp->m_dalign) { ++ /* ++ * If stripe unit and stripe width are not multiples ++ * of the fs blocksize turn off alignment. ++ */ ++ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || ++ (BBTOB(mp->m_swidth) & mp->m_blockmask)) { ++ xfs_warn(mp, ++ "alignment check failed: sunit/swidth vs. blocksize(%d)", ++ mp->m_sb.sb_blocksize); ++ return -EINVAL; ++ } else { + /* +- * If stripe unit and stripe width are not multiples +- * of the fs blocksize turn off alignment. ++ * Convert the stripe unit and width to FSBs. + */ +- if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || +- (BBTOB(mp->m_swidth) & mp->m_blockmask)) { ++ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); ++ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) { + xfs_warn(mp, +- "alignment check failed: sunit/swidth vs. blocksize(%d)", +- sbp->sb_blocksize); ++ "alignment check failed: sunit/swidth vs. agsize(%d)", ++ mp->m_sb.sb_agblocks); + return -EINVAL; +- } else { +- /* +- * Convert the stripe unit and width to FSBs. +- */ +- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); +- if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { +- xfs_warn(mp, +- "alignment check failed: sunit/swidth vs. agsize(%d)", +- sbp->sb_agblocks); +- return -EINVAL; +- } else if (mp->m_dalign) { +- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); +- } else { +- xfs_warn(mp, +- "alignment check failed: sunit(%d) less than bsize(%d)", +- mp->m_dalign, sbp->sb_blocksize); +- return -EINVAL; +- } +- } +- +- /* +- * Update superblock with new values +- * and log changes +- */ +- if (xfs_sb_version_hasdalign(sbp)) { +- if (sbp->sb_unit != mp->m_dalign) { +- sbp->sb_unit = mp->m_dalign; +- mp->m_update_sb = true; +- } +- if (sbp->sb_width != mp->m_swidth) { +- sbp->sb_width = mp->m_swidth; +- mp->m_update_sb = true; +- } ++ } else if (mp->m_dalign) { ++ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); + } else { + xfs_warn(mp, +- "cannot change alignment: superblock does not support data alignment"); ++ "alignment check failed: sunit(%d) less than bsize(%d)", ++ mp->m_dalign, mp->m_sb.sb_blocksize); + return -EINVAL; + } ++ } ++ ++ if (!xfs_sb_version_hasdalign(&mp->m_sb)) { ++ xfs_warn(mp, ++"cannot change alignment: superblock does not support data alignment"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/* Update alignment values based on mount options and sb values. */ ++STATIC int ++xfs_update_alignment( ++ struct xfs_mount *mp) ++{ ++ struct xfs_sb *sbp = &mp->m_sb; ++ ++ if (mp->m_dalign) { ++ if (sbp->sb_unit == mp->m_dalign && ++ sbp->sb_width == mp->m_swidth) ++ return 0; ++ ++ sbp->sb_unit = mp->m_dalign; ++ sbp->sb_width = mp->m_swidth; ++ mp->m_update_sb = true; + } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && + xfs_sb_version_hasdalign(&mp->m_sb)) { +- mp->m_dalign = sbp->sb_unit; +- mp->m_swidth = sbp->sb_width; ++ mp->m_dalign = sbp->sb_unit; ++ mp->m_swidth = sbp->sb_width; + } + + return 0; +@@ -692,12 +702,12 @@ xfs_mountfs( + } + + /* +- * Check if sb_agblocks is aligned at stripe boundary +- * If sb_agblocks is NOT aligned turn off m_dalign since +- * allocator alignment is within an ag, therefore ag has +- * to be aligned at stripe boundary. ++ * If we were given new sunit/swidth options, do some basic validation ++ * checks and convert the incore dalign and swidth values to the ++ * same units (FSB) that everything else uses. This /must/ happen ++ * before computing the inode geometry. + */ +- error = xfs_update_alignment(mp); ++ error = xfs_validate_new_dalign(mp); + if (error) + goto out; + +@@ -708,6 +718,17 @@ xfs_mountfs( + xfs_rmapbt_compute_maxlevels(mp); + xfs_refcountbt_compute_maxlevels(mp); + ++ /* ++ * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks ++ * is NOT aligned turn off m_dalign since allocator alignment is within ++ * an ag, therefore ag has to be aligned at stripe boundary. Note that ++ * we must compute the free space and rmap btree geometry before doing ++ * this. ++ */ ++ error = xfs_update_alignment(mp); ++ if (error) ++ goto out; ++ + /* enable fail_at_unmount as default */ + mp->m_fail_unmount = true; + diff --git a/queue-5.4/xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch b/queue-5.4/xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch new file mode 100644 index 00000000000..85febc700bc --- /dev/null +++ b/queue-5.4/xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch @@ -0,0 +1,96 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:48 +0530 +Subject: xfs: stabilize insert range start boundary to avoid COW writeback race +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-14-chandan.babu@oracle.com> + +From: Brian Foster + +commit d0c2204135a0cdbc607c94c481cf1ccb2f659aa7 upstream. + +generic/522 (fsx) occasionally fails with a file corruption due to +an insert range operation. The primary characteristic of the +corruption is a misplaced insert range operation that differs from +the requested target offset. The reason for this behavior is a race +between the extent shift sequence of an insert range and a COW +writeback completion that causes a front merge with the first extent +in the shift. + +The shift preparation function flushes and unmaps from the target +offset of the operation to the end of the file to ensure no +modifications can be made and page cache is invalidated before file +data is shifted. An insert range operation then splits the extent at +the target offset, if necessary, and begins to shift the start +offset of each extent starting from the end of the file to the start +offset. The shift sequence operates at extent level and so depends +on the preparation sequence to guarantee no changes can be made to +the target range during the shift. If the block immediately prior to +the target offset was dirty and shared, however, it can undergo +writeback and move from the COW fork to the data fork at any point +during the shift. If the block is contiguous with the block at the +start offset of the insert range, it can front merge and alter the +start offset of the extent. Once the shift sequence reaches the +target offset, it shifts based on the latest start offset and +silently changes the target offset of the operation and corrupts the +file. + +To address this problem, update the shift preparation code to +stabilize the start boundary along with the full range of the +insert. Also update the existing corruption check to fail if any +extent is shifted with a start offset behind the target offset of +the insert range. This prevents insert from racing with COW +writeback completion and fails loudly in the event of an unexpected +extent shift. + +Signed-off-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_bmap.c | 2 +- + fs/xfs/xfs_bmap_util.c | 12 ++++++++++++ + 2 files changed, 13 insertions(+), 1 deletion(-) + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -5876,7 +5876,7 @@ xfs_bmap_insert_extents( + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); + +- if (stop_fsb >= got.br_startoff + got.br_blockcount) { ++ if (stop_fsb > got.br_startoff) { + ASSERT(0); + error = -EFSCORRUPTED; + goto del_cursor; +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -1167,6 +1167,7 @@ xfs_prepare_shift( + struct xfs_inode *ip, + loff_t offset) + { ++ struct xfs_mount *mp = ip->i_mount; + int error; + + /* +@@ -1180,6 +1181,17 @@ xfs_prepare_shift( + } + + /* ++ * Shift operations must stabilize the start block offset boundary along ++ * with the full range of the operation. If we don't, a COW writeback ++ * completion could race with an insert, front merge with the start ++ * extent (after split) during the shift and corrupt the file. Start ++ * with the block just prior to the start to stabilize the boundary. ++ */ ++ offset = round_down(offset, 1 << mp->m_sb.sb_blocklog); ++ if (offset) ++ offset -= (1 << mp->m_sb.sb_blocklog); ++ ++ /* + * Writeback and invalidate cache for the remainder of the file as we're + * about to shift down every extent from offset to EOF. + */ diff --git a/queue-5.4/xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch b/queue-5.4/xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch new file mode 100644 index 00000000000..20a1efbf383 --- /dev/null +++ b/queue-5.4/xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch @@ -0,0 +1,41 @@ +From foo@baz Wed Sep 21 10:59:34 AM CEST 2022 +From: Chandan Babu R +Date: Wed, 21 Sep 2022 08:53:49 +0530 +Subject: xfs: use bitops interface for buf log item AIL flag check +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220921032352.307699-15-chandan.babu@oracle.com> + +From: Brian Foster + +commit 826f7e34130a4ce756138540170cbe935c537a47 upstream. + +The xfs_log_item flags were converted to atomic bitops as of commit +22525c17ed ("xfs: log item flags are racy"). The assert check for +AIL presence in xfs_buf_item_relse() still uses the old value based +check. This likely went unnoticed as XFS_LI_IN_AIL evaluates to 0 +and causes the assert to unconditionally pass. Fix up the check. + +Signed-off-by: Brian Foster +Fixes: 22525c17ed ("xfs: log item flags are racy") +Reviewed-by: Eric Sandeen +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_buf_item.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/xfs/xfs_buf_item.c ++++ b/fs/xfs/xfs_buf_item.c +@@ -956,7 +956,7 @@ xfs_buf_item_relse( + struct xfs_buf_log_item *bip = bp->b_log_item; + + trace_xfs_buf_item_relse(bp, _RET_IP_); +- ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); ++ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + + bp->b_log_item = NULL; + if (list_empty(&bp->b_li_list)) -- 2.47.3