From 174d6838f307da318d30fea616b673880ef2a990 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 26 Sep 2022 08:54:12 +0200 Subject: [PATCH] 5.4-stable patches added patches: ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch ext4-make-directory-inode-spreading-reflect-flexbg-size.patch xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch xfs-always-log-corruption-errors.patch xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch xfs-fix-an-abba-deadlock-in-xfs_rename.patch xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch xfs-fix-some-memory-leaks-in-log-recovery.patch xfs-fix-use-after-free-when-aborting-corrupt-attr-inactivation.patch xfs-range-check-ri_cnt-when-recovering-log-items.patch xfs-refactor-agfl-length-computation-function.patch xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch xfs-split-the-sunit-parameter-update-into-two-parts.patch xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch --- ...ing-when-eh_entries-0-and-eh_depth-0.patch | 85 ++ ...-inode-spreading-reflect-flexbg-size.patch | 39 + queue-5.4/series | 19 + ...-assert-in-xfs_fsmap_owner_from_rmap.patch | 34 + .../xfs-always-log-corruption-errors.patch | 834 ++++++++++++++++++ ...a-blocks-during-unwritten-conversion.patch | 57 ++ ...pointer-arguments-to-error-functions.patch | 102 +++ ...rupted-when-log-contents-are-invalid.patch | 222 +++++ ...-if-that-would-cause-repair-failures.patch | 230 +++++ ...s-fix-an-abba-deadlock-in-xfs_rename.patch | 127 +++ ...-when-target_ip-exists-in-xfs_rename.patch | 133 +++ ...ix-some-memory-leaks-in-log-recovery.patch | 50 ++ ...n-aborting-corrupt-attr-inactivation.patch | 37 + ...eck-ri_cnt-when-recovering-log-items.patch | 47 + ...tor-agfl-length-computation-function.patch | 65 ++ ...th-efscorrupted-for-corrupt-metadata.patch | 96 ++ ...tweak-an-assert-in-xfs_fs_map_blocks.patch | 41 + ...unit-parameter-update-into-two-parts.patch | 190 ++++ ...boundary-to-avoid-cow-writeback-race.patch | 96 ++ ...face-for-buf-log-item-ail-flag-check.patch | 41 + 20 files changed, 2545 insertions(+) create mode 100644 queue-5.4/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch create mode 100644 queue-5.4/ext4-make-directory-inode-spreading-reflect-flexbg-size.patch create mode 100644 queue-5.4/xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch create mode 100644 queue-5.4/xfs-always-log-corruption-errors.patch create mode 100644 queue-5.4/xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch create mode 100644 queue-5.4/xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch create mode 100644 queue-5.4/xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch create mode 100644 queue-5.4/xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch create mode 100644 queue-5.4/xfs-fix-an-abba-deadlock-in-xfs_rename.patch create mode 100644 queue-5.4/xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch create mode 100644 queue-5.4/xfs-fix-some-memory-leaks-in-log-recovery.patch create mode 100644 queue-5.4/xfs-fix-use-after-free-when-aborting-corrupt-attr-inactivation.patch create mode 100644 queue-5.4/xfs-range-check-ri_cnt-when-recovering-log-items.patch create mode 100644 queue-5.4/xfs-refactor-agfl-length-computation-function.patch create mode 100644 queue-5.4/xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch create mode 100644 queue-5.4/xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch create mode 100644 queue-5.4/xfs-split-the-sunit-parameter-update-into-two-parts.patch create mode 100644 queue-5.4/xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch create mode 100644 queue-5.4/xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch diff --git a/queue-5.4/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch b/queue-5.4/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch new file mode 100644 index 00000000000..d4cf69dfe58 --- /dev/null +++ b/queue-5.4/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch @@ -0,0 +1,85 @@ +From 29a5b8a137ac8eb410cc823653a29ac0e7b7e1b0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= +Date: Mon, 22 Aug 2022 10:42:35 +0100 +Subject: ext4: fix bug in extents parsing when eh_entries == 0 and eh_depth > 0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Luís Henriques + +commit 29a5b8a137ac8eb410cc823653a29ac0e7b7e1b0 upstream. + +When walking through an inode extents, the ext4_ext_binsearch_idx() function +assumes that the extent header has been previously validated. However, there +are no checks that verify that the number of entries (eh->eh_entries) is +non-zero when depth is > 0. And this will lead to problems because the +EXT_FIRST_INDEX() and EXT_LAST_INDEX() will return garbage and result in this: + +[ 135.245946] ------------[ cut here ]------------ +[ 135.247579] kernel BUG at fs/ext4/extents.c:2258! +[ 135.249045] invalid opcode: 0000 [#1] PREEMPT SMP +[ 135.250320] CPU: 2 PID: 238 Comm: tmp118 Not tainted 5.19.0-rc8+ #4 +[ 135.252067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014 +[ 135.255065] RIP: 0010:ext4_ext_map_blocks+0xc20/0xcb0 +[ 135.256475] Code: +[ 135.261433] RSP: 0018:ffffc900005939f8 EFLAGS: 00010246 +[ 135.262847] RAX: 0000000000000024 RBX: ffffc90000593b70 RCX: 0000000000000023 +[ 135.264765] RDX: ffff8880038e5f10 RSI: 0000000000000003 RDI: ffff8880046e922c +[ 135.266670] RBP: ffff8880046e9348 R08: 0000000000000001 R09: ffff888002ca580c +[ 135.268576] R10: 0000000000002602 R11: 0000000000000000 R12: 0000000000000024 +[ 135.270477] R13: 0000000000000000 R14: 0000000000000024 R15: 0000000000000000 +[ 135.272394] FS: 00007fdabdc56740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 +[ 135.274510] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 135.276075] CR2: 00007ffc26bd4f00 CR3: 0000000006261004 CR4: 0000000000170ea0 +[ 135.277952] Call Trace: +[ 135.278635] +[ 135.279247] ? preempt_count_add+0x6d/0xa0 +[ 135.280358] ? percpu_counter_add_batch+0x55/0xb0 +[ 135.281612] ? _raw_read_unlock+0x18/0x30 +[ 135.282704] ext4_map_blocks+0x294/0x5a0 +[ 135.283745] ? xa_load+0x6f/0xa0 +[ 135.284562] ext4_mpage_readpages+0x3d6/0x770 +[ 135.285646] read_pages+0x67/0x1d0 +[ 135.286492] ? folio_add_lru+0x51/0x80 +[ 135.287441] page_cache_ra_unbounded+0x124/0x170 +[ 135.288510] filemap_get_pages+0x23d/0x5a0 +[ 135.289457] ? path_openat+0xa72/0xdd0 +[ 135.290332] filemap_read+0xbf/0x300 +[ 135.291158] ? _raw_spin_lock_irqsave+0x17/0x40 +[ 135.292192] new_sync_read+0x103/0x170 +[ 135.293014] vfs_read+0x15d/0x180 +[ 135.293745] ksys_read+0xa1/0xe0 +[ 135.294461] do_syscall_64+0x3c/0x80 +[ 135.295284] entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +This patch simply adds an extra check in __ext4_ext_check(), verifying that +eh_entries is not 0 when eh_depth is > 0. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=215941 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216283 +Cc: Baokun Li +Cc: stable@kernel.org +Signed-off-by: Luís Henriques +Reviewed-by: Jan Kara +Reviewed-by: Baokun Li +Link: https://lore.kernel.org/r/20220822094235.2690-1-lhenriques@suse.de +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -500,6 +500,10 @@ static int __ext4_ext_check(const char * + error_msg = "invalid eh_entries"; + goto corrupted; + } ++ if (unlikely((eh->eh_entries == 0) && (depth > 0))) { ++ error_msg = "eh_entries is 0 but eh_depth is > 0"; ++ goto corrupted; ++ } + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { + error_msg = "invalid extent entries"; + goto corrupted; diff --git a/queue-5.4/ext4-make-directory-inode-spreading-reflect-flexbg-size.patch b/queue-5.4/ext4-make-directory-inode-spreading-reflect-flexbg-size.patch new file mode 100644 index 00000000000..3c0682268f9 --- /dev/null +++ b/queue-5.4/ext4-make-directory-inode-spreading-reflect-flexbg-size.patch @@ -0,0 +1,39 @@ +From 613c5a85898d1cd44e68f28d65eccf64a8ace9cf Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Thu, 8 Sep 2022 11:21:26 +0200 +Subject: ext4: make directory inode spreading reflect flexbg size + +From: Jan Kara + +commit 613c5a85898d1cd44e68f28d65eccf64a8ace9cf upstream. + +Currently the Orlov inode allocator searches for free inodes for a +directory only in flex block groups with at most inodes_per_group/16 +more directory inodes than average per flex block group. However with +growing size of flex block group this becomes unnecessarily strict. +Scale allowed difference from average directory count per flex block +group with flex block group size as we do with other metrics. + +Tested-by: Stefan Wahren +Tested-by: Ojaswin Mujoo +Cc: stable@kernel.org +Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/ +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220908092136.11770-3-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ialloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -500,7 +500,7 @@ static int find_group_orlov(struct super + goto fallback; + } + +- max_dirs = ndirs / ngroups + inodes_per_group / 16; ++ max_dirs = ndirs / ngroups + inodes_per_group*flex_size / 16; + min_inodes = avefreei - inodes_per_group*flex_size / 4; + if (min_inodes < 1) + min_inodes = 1; diff --git a/queue-5.4/series b/queue-5.4/series index ebab4e10c01..3c12d93e941 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -99,3 +99,22 @@ drm-amdgpu-use-dirty-framebuffer-helper.patch drm-amd-display-limit-user-regamma-to-a-valid-value.patch drm-rockchip-fix-return-type-of-cdn_dp_connector_mod.patch workqueue-don-t-skip-lockdep-work-dependency-in-canc.patch +ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch +ext4-make-directory-inode-spreading-reflect-flexbg-size.patch +xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch +xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch +xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch +xfs-range-check-ri_cnt-when-recovering-log-items.patch +xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch +xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch +xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch +xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch +xfs-always-log-corruption-errors.patch +xfs-fix-some-memory-leaks-in-log-recovery.patch +xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch +xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch +xfs-refactor-agfl-length-computation-function.patch +xfs-split-the-sunit-parameter-update-into-two-parts.patch +xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch +xfs-fix-an-abba-deadlock-in-xfs_rename.patch +xfs-fix-use-after-free-when-aborting-corrupt-attr-inactivation.patch diff --git a/queue-5.4/xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch b/queue-5.4/xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch new file mode 100644 index 00000000000..57ac740ca39 --- /dev/null +++ b/queue-5.4/xfs-add-missing-assert-in-xfs_fsmap_owner_from_rmap.patch @@ -0,0 +1,34 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:42 +0530 +Subject: xfs: add missing assert in xfs_fsmap_owner_from_rmap +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-6-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 110f09cb705af8c53f2a457baf771d2935ed62d4 upstream. + +The fsmap handler shouldn't fail silently if the rmap code ever feeds it +a special owner number that isn't known to the fsmap handler. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_fsmap.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -146,6 +146,7 @@ xfs_fsmap_owner_from_rmap( + dest->fmr_owner = XFS_FMR_OWN_FREE; + break; + default: ++ ASSERT(0); + return -EFSCORRUPTED; + } + return 0; diff --git a/queue-5.4/xfs-always-log-corruption-errors.patch b/queue-5.4/xfs-always-log-corruption-errors.patch new file mode 100644 index 00000000000..23dce60637b --- /dev/null +++ b/queue-5.4/xfs-always-log-corruption-errors.patch @@ -0,0 +1,834 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:48 +0530 +Subject: xfs: always log corruption errors +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-12-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit a5155b870d687de1a5f07e774b49b1e8ef0f6f50 upstream. + +Make sure we log something to dmesg whenever we return -EFSCORRUPTED up +the call stack. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Carlos Maiolino +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_alloc.c | 9 +++++++-- + fs/xfs/libxfs/xfs_attr_leaf.c | 12 +++++++++--- + fs/xfs/libxfs/xfs_bmap.c | 8 +++++++- + fs/xfs/libxfs/xfs_btree.c | 5 ++++- + fs/xfs/libxfs/xfs_da_btree.c | 24 ++++++++++++++++++------ + fs/xfs/libxfs/xfs_dir2.c | 4 +++- + fs/xfs/libxfs/xfs_dir2_leaf.c | 4 +++- + fs/xfs/libxfs/xfs_dir2_node.c | 12 +++++++++--- + fs/xfs/libxfs/xfs_inode_fork.c | 6 ++++++ + fs/xfs/libxfs/xfs_refcount.c | 4 +++- + fs/xfs/libxfs/xfs_rtbitmap.c | 6 ++++-- + fs/xfs/xfs_acl.c | 15 ++++++++++++--- + fs/xfs/xfs_attr_inactive.c | 6 +++++- + fs/xfs/xfs_attr_list.c | 5 ++++- + fs/xfs/xfs_bmap_item.c | 3 ++- + fs/xfs/xfs_error.c | 21 +++++++++++++++++++++ + fs/xfs/xfs_error.h | 1 + + fs/xfs/xfs_extfree_item.c | 3 ++- + fs/xfs/xfs_inode.c | 15 ++++++++++++--- + fs/xfs/xfs_inode_item.c | 5 ++++- + fs/xfs/xfs_iops.c | 10 +++++++--- + fs/xfs/xfs_log_recover.c | 23 ++++++++++++++++++----- + fs/xfs/xfs_qm.c | 13 +++++++++++-- + fs/xfs/xfs_refcount_item.c | 3 ++- + fs/xfs/xfs_rmap_item.c | 7 +++++-- + 25 files changed, 179 insertions(+), 45 deletions(-) + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -684,8 +684,10 @@ xfs_alloc_update_counters( + + xfs_trans_agblocks_delta(tp, len); + if (unlikely(be32_to_cpu(agf->agf_freeblks) > +- be32_to_cpu(agf->agf_length))) ++ be32_to_cpu(agf->agf_length))) { ++ xfs_buf_corruption_error(agbp); + return -EFSCORRUPTED; ++ } + + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); + return 0; +@@ -751,6 +753,7 @@ xfs_alloc_ag_vextent_small( + + bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); + if (!bp) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, args->mp); + error = -EFSCORRUPTED; + goto error; + } +@@ -2087,8 +2090,10 @@ xfs_free_agfl_block( + return error; + + bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); +- if (!bp) ++ if (!bp) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp); + return -EFSCORRUPTED; ++ } + xfs_trans_binval(tp, bp); + + return 0; +--- a/fs/xfs/libxfs/xfs_attr_leaf.c ++++ b/fs/xfs/libxfs/xfs_attr_leaf.c +@@ -2287,8 +2287,10 @@ xfs_attr3_leaf_lookup_int( + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf); + entries = xfs_attr3_leaf_entryp(leaf); +- if (ichdr.count >= args->geo->blksize / 8) ++ if (ichdr.count >= args->geo->blksize / 8) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; ++ } + + /* + * Binary search. (note: small blocks will skip this loop) +@@ -2304,10 +2306,14 @@ xfs_attr3_leaf_lookup_int( + else + break; + } +- if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) ++ if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; +- if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) ++ } ++ if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; ++ } + + /* + * Since we may have duplicate hashval's, find the first matching +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -729,6 +729,7 @@ xfs_bmap_extents_to_btree( + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); + abp = xfs_btree_get_bufl(mp, tp, args.fsbno); + if (!abp) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto out_unreserve_dquot; + } +@@ -1084,6 +1085,7 @@ xfs_bmap_add_attrfork( + if (XFS_IFORK_Q(ip)) + goto trans_cancel; + if (ip->i_d.di_anextents != 0) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto trans_cancel; + } +@@ -1374,6 +1376,7 @@ xfs_bmap_last_before( + case XFS_DINODE_FMT_EXTENTS: + break; + default: ++ ASSERT(0); + return -EFSCORRUPTED; + } + +@@ -1474,8 +1477,10 @@ xfs_bmap_last_offset( + return 0; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && +- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) ++ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) { ++ ASSERT(0); + return -EFSCORRUPTED; ++ } + + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); + if (error || is_empty) +@@ -5872,6 +5877,7 @@ xfs_bmap_insert_extents( + del_cursor); + + if (stop_fsb >= got.br_startoff + got.br_blockcount) { ++ ASSERT(0); + error = -EFSCORRUPTED; + goto del_cursor; + } +--- a/fs/xfs/libxfs/xfs_btree.c ++++ b/fs/xfs/libxfs/xfs_btree.c +@@ -1820,6 +1820,7 @@ xfs_btree_lookup_get_block( + + out_bad: + *blkp = NULL; ++ xfs_buf_corruption_error(bp); + xfs_trans_brelse(cur->bc_tp, bp); + return -EFSCORRUPTED; + } +@@ -1867,8 +1868,10 @@ xfs_btree_lookup( + XFS_BTREE_STATS_INC(cur, lookup); + + /* No such thing as a zero-level tree. */ +- if (cur->bc_nlevels == 0) ++ if (cur->bc_nlevels == 0) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, cur->bc_mp); + return -EFSCORRUPTED; ++ } + + block = NULL; + keyno = 0; +--- a/fs/xfs/libxfs/xfs_da_btree.c ++++ b/fs/xfs/libxfs/xfs_da_btree.c +@@ -504,6 +504,7 @@ xfs_da3_split( + node = oldblk->bp->b_addr; + if (node->hdr.info.forw) { + if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { ++ xfs_buf_corruption_error(oldblk->bp); + error = -EFSCORRUPTED; + goto out; + } +@@ -516,6 +517,7 @@ xfs_da3_split( + node = oldblk->bp->b_addr; + if (node->hdr.info.back) { + if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { ++ xfs_buf_corruption_error(oldblk->bp); + error = -EFSCORRUPTED; + goto out; + } +@@ -1541,8 +1543,10 @@ xfs_da3_node_lookup_int( + break; + } + +- if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) ++ if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { ++ xfs_buf_corruption_error(blk->bp); + return -EFSCORRUPTED; ++ } + + blk->magic = XFS_DA_NODE_MAGIC; + +@@ -1554,15 +1558,18 @@ xfs_da3_node_lookup_int( + btree = dp->d_ops->node_tree_p(node); + + /* Tree taller than we can handle; bail out! */ +- if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) ++ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { ++ xfs_buf_corruption_error(blk->bp); + return -EFSCORRUPTED; ++ } + + /* Check the level from the root. */ + if (blkno == args->geo->leafblk) + expected_level = nodehdr.level - 1; +- else if (expected_level != nodehdr.level) ++ else if (expected_level != nodehdr.level) { ++ xfs_buf_corruption_error(blk->bp); + return -EFSCORRUPTED; +- else ++ } else + expected_level--; + + max = nodehdr.count; +@@ -1612,12 +1619,17 @@ xfs_da3_node_lookup_int( + } + + /* We can't point back to the root. */ +- if (blkno == args->geo->leafblk) ++ if (blkno == args->geo->leafblk) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ++ dp->i_mount); + return -EFSCORRUPTED; ++ } + } + +- if (expected_level != 0) ++ if (expected_level != 0) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, dp->i_mount); + return -EFSCORRUPTED; ++ } + + /* + * A leaf block that ends in the hashval that we are interested in +--- a/fs/xfs/libxfs/xfs_dir2.c ++++ b/fs/xfs/libxfs/xfs_dir2.c +@@ -600,8 +600,10 @@ xfs_dir2_isblock( + if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK))) + return rval; + rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; +- if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) ++ if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount); + return -EFSCORRUPTED; ++ } + *vp = rval; + return 0; + } +--- a/fs/xfs/libxfs/xfs_dir2_leaf.c ++++ b/fs/xfs/libxfs/xfs_dir2_leaf.c +@@ -1343,8 +1343,10 @@ xfs_dir2_leaf_removename( + oldbest = be16_to_cpu(bf[0].length); + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + bestsp = xfs_dir2_leaf_bests_p(ltp); +- if (be16_to_cpu(bestsp[db]) != oldbest) ++ if (be16_to_cpu(bestsp[db]) != oldbest) { ++ xfs_buf_corruption_error(lbp); + return -EFSCORRUPTED; ++ } + /* + * Mark the former data entry unused. + */ +--- a/fs/xfs/libxfs/xfs_dir2_node.c ++++ b/fs/xfs/libxfs/xfs_dir2_node.c +@@ -374,8 +374,10 @@ xfs_dir2_leaf_to_node( + leaf = lbp->b_addr; + ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); + if (be32_to_cpu(ltp->bestcount) > +- (uint)dp->i_d.di_size / args->geo->blksize) ++ (uint)dp->i_d.di_size / args->geo->blksize) { ++ xfs_buf_corruption_error(lbp); + return -EFSCORRUPTED; ++ } + + /* + * Copy freespace entries from the leaf block to the new block. +@@ -446,8 +448,10 @@ xfs_dir2_leafn_add( + * Quick check just to make sure we are not going to index + * into other peoples memory + */ +- if (index < 0) ++ if (index < 0) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; ++ } + + /* + * If there are already the maximum number of leaf entries in +@@ -740,8 +744,10 @@ xfs_dir2_leafn_lookup_for_entry( + ents = dp->d_ops->leaf_ents_p(leaf); + + xfs_dir3_leaf_check(dp, bp); +- if (leafhdr.count <= 0) ++ if (leafhdr.count <= 0) { ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; ++ } + + /* + * Look up the hash value in the leaf entries. +--- a/fs/xfs/libxfs/xfs_inode_fork.c ++++ b/fs/xfs/libxfs/xfs_inode_fork.c +@@ -75,11 +75,15 @@ xfs_iformat_fork( + error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK); + break; + default: ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, ++ dip, sizeof(*dip), __this_address); + return -EFSCORRUPTED; + } + break; + + default: ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, ++ sizeof(*dip), __this_address); + return -EFSCORRUPTED; + } + if (error) +@@ -110,6 +114,8 @@ xfs_iformat_fork( + error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK); + break; + default: ++ xfs_inode_verifier_error(ip, error, __func__, dip, ++ sizeof(*dip), __this_address); + error = -EFSCORRUPTED; + break; + } +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -1591,8 +1591,10 @@ xfs_refcount_recover_extent( + struct list_head *debris = priv; + struct xfs_refcount_recovery *rr; + +- if (be32_to_cpu(rec->refc.rc_refcount) != 1) ++ if (be32_to_cpu(rec->refc.rc_refcount) != 1) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, cur->bc_mp); + return -EFSCORRUPTED; ++ } + + rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); + xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); +--- a/fs/xfs/libxfs/xfs_rtbitmap.c ++++ b/fs/xfs/libxfs/xfs_rtbitmap.c +@@ -15,7 +15,7 @@ + #include "xfs_bmap.h" + #include "xfs_trans.h" + #include "xfs_rtalloc.h" +- ++#include "xfs_error.h" + + /* + * Realtime allocator bitmap functions shared with userspace. +@@ -70,8 +70,10 @@ xfs_rtbuf_get( + if (error) + return error; + +- if (nmap == 0 || !xfs_bmap_is_real_extent(&map)) ++ if (nmap == 0 || !xfs_bmap_is_real_extent(&map)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; ++ } + + ASSERT(map.br_startblock != NULLFSBLOCK); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, +--- a/fs/xfs/xfs_acl.c ++++ b/fs/xfs/xfs_acl.c +@@ -12,6 +12,7 @@ + #include "xfs_inode.h" + #include "xfs_attr.h" + #include "xfs_trace.h" ++#include "xfs_error.h" + #include + + +@@ -23,6 +24,7 @@ + + STATIC struct posix_acl * + xfs_acl_from_disk( ++ struct xfs_mount *mp, + const struct xfs_acl *aclp, + int len, + int max_entries) +@@ -32,11 +34,18 @@ xfs_acl_from_disk( + const struct xfs_acl_entry *ace; + unsigned int count, i; + +- if (len < sizeof(*aclp)) ++ if (len < sizeof(*aclp)) { ++ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, aclp, ++ len); + return ERR_PTR(-EFSCORRUPTED); ++ } ++ + count = be32_to_cpu(aclp->acl_cnt); +- if (count > max_entries || XFS_ACL_SIZE(count) != len) ++ if (count > max_entries || XFS_ACL_SIZE(count) != len) { ++ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, aclp, ++ len); + return ERR_PTR(-EFSCORRUPTED); ++ } + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) +@@ -145,7 +154,7 @@ xfs_get_acl(struct inode *inode, int typ + if (error != -ENOATTR) + acl = ERR_PTR(error); + } else { +- acl = xfs_acl_from_disk(xfs_acl, len, ++ acl = xfs_acl_from_disk(ip->i_mount, xfs_acl, len, + XFS_ACL_MAX_ENTRIES(ip->i_mount)); + kmem_free(xfs_acl); + } +--- a/fs/xfs/xfs_attr_inactive.c ++++ b/fs/xfs/xfs_attr_inactive.c +@@ -22,6 +22,7 @@ + #include "xfs_attr_leaf.h" + #include "xfs_quota.h" + #include "xfs_dir2.h" ++#include "xfs_error.h" + + /* + * Look at all the extents for this logical region, +@@ -209,6 +210,7 @@ xfs_attr3_node_inactive( + */ + if (level > XFS_DA_NODE_MAXDEPTH) { + xfs_trans_brelse(*trans, bp); /* no locks for later trans */ ++ xfs_buf_corruption_error(bp); + return -EFSCORRUPTED; + } + +@@ -258,8 +260,9 @@ xfs_attr3_node_inactive( + error = xfs_attr3_leaf_inactive(trans, dp, child_bp); + break; + default: +- error = -EFSCORRUPTED; ++ xfs_buf_corruption_error(child_bp); + xfs_trans_brelse(*trans, child_bp); ++ error = -EFSCORRUPTED; + break; + } + if (error) +@@ -342,6 +345,7 @@ xfs_attr3_root_inactive( + break; + default: + error = -EFSCORRUPTED; ++ xfs_buf_corruption_error(bp); + xfs_trans_brelse(*trans, bp); + break; + } +--- a/fs/xfs/xfs_attr_list.c ++++ b/fs/xfs/xfs_attr_list.c +@@ -258,8 +258,10 @@ xfs_attr_node_list_lookup( + return 0; + + /* We can't point back to the root. */ +- if (cursor->blkno == 0) ++ if (cursor->blkno == 0) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; ++ } + } + + if (expected_level != 0) +@@ -269,6 +271,7 @@ xfs_attr_node_list_lookup( + return 0; + + out_corruptbuf: ++ xfs_buf_corruption_error(bp); + xfs_trans_brelse(tp, bp); + return -EFSCORRUPTED; + } +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -21,7 +21,7 @@ + #include "xfs_icache.h" + #include "xfs_bmap_btree.h" + #include "xfs_trans_space.h" +- ++#include "xfs_error.h" + + kmem_zone_t *xfs_bui_zone; + kmem_zone_t *xfs_bud_zone; +@@ -525,6 +525,7 @@ xfs_bui_recover( + type = bui_type; + break; + default: ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto err_inode; + } +--- a/fs/xfs/xfs_error.c ++++ b/fs/xfs/xfs_error.c +@@ -342,6 +342,27 @@ xfs_corruption_error( + } + + /* ++ * Complain about the kinds of metadata corruption that we can't detect from a ++ * verifier, such as incorrect inter-block relationship data. Does not set ++ * bp->b_error. ++ */ ++void ++xfs_buf_corruption_error( ++ struct xfs_buf *bp) ++{ ++ struct xfs_mount *mp = bp->b_mount; ++ ++ xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR, ++ "Metadata corruption detected at %pS, %s block 0x%llx", ++ __return_address, bp->b_ops->name, bp->b_bn); ++ ++ xfs_alert(mp, "Unmount and run xfs_repair"); ++ ++ if (xfs_error_level >= XFS_ERRLEVEL_HIGH) ++ xfs_stack_trace(); ++} ++ ++/* + * Warnings specifically for verifier errors. Differentiate CRC vs. invalid + * values, and omit the stack trace unless the error level is tuned high. + */ +--- a/fs/xfs/xfs_error.h ++++ b/fs/xfs/xfs_error.h +@@ -15,6 +15,7 @@ extern void xfs_corruption_error(const c + struct xfs_mount *mp, const void *buf, size_t bufsize, + const char *filename, int linenum, + xfs_failaddr_t failaddr); ++void xfs_buf_corruption_error(struct xfs_buf *bp); + extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error, + const char *name, const void *buf, size_t bufsz, + xfs_failaddr_t failaddr); +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -21,7 +21,7 @@ + #include "xfs_alloc.h" + #include "xfs_bmap.h" + #include "xfs_trace.h" +- ++#include "xfs_error.h" + + kmem_zone_t *xfs_efi_zone; + kmem_zone_t *xfs_efd_zone; +@@ -228,6 +228,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf + } + return 0; + } ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; + } + +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -2149,8 +2149,10 @@ xfs_iunlink_update_bucket( + * passed in because either we're adding or removing ourselves from the + * head of the list. + */ +- if (old_value == new_agino) ++ if (old_value == new_agino) { ++ xfs_buf_corruption_error(agibp); + return -EFSCORRUPTED; ++ } + + agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino); + offset = offsetof(struct xfs_agi, agi_unlinked) + +@@ -2213,6 +2215,8 @@ xfs_iunlink_update_inode( + /* Make sure the old pointer isn't garbage. */ + old_value = be32_to_cpu(dip->di_next_unlinked); + if (!xfs_verify_agino_or_null(mp, agno, old_value)) { ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, ++ sizeof(*dip), __this_address); + error = -EFSCORRUPTED; + goto out; + } +@@ -2224,8 +2228,11 @@ xfs_iunlink_update_inode( + */ + *old_next_agino = old_value; + if (old_value == next_agino) { +- if (next_agino != NULLAGINO) ++ if (next_agino != NULLAGINO) { ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, ++ dip, sizeof(*dip), __this_address); + error = -EFSCORRUPTED; ++ } + goto out; + } + +@@ -2276,8 +2283,10 @@ xfs_iunlink( + */ + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (next_agino == agino || +- !xfs_verify_agino_or_null(mp, agno, next_agino)) ++ !xfs_verify_agino_or_null(mp, agno, next_agino)) { ++ xfs_buf_corruption_error(agibp); + return -EFSCORRUPTED; ++ } + + if (next_agino != NULLAGINO) { + struct xfs_perag *pag; +--- a/fs/xfs/xfs_inode_item.c ++++ b/fs/xfs/xfs_inode_item.c +@@ -17,6 +17,7 @@ + #include "xfs_trans_priv.h" + #include "xfs_buf_item.h" + #include "xfs_log.h" ++#include "xfs_error.h" + + #include + +@@ -828,8 +829,10 @@ xfs_inode_item_format_convert( + { + struct xfs_inode_log_format_32 *in_f32 = buf->i_addr; + +- if (buf->i_len != sizeof(*in_f32)) ++ if (buf->i_len != sizeof(*in_f32)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; ++ } + + in_f->ilf_type = in_f32->ilf_type; + in_f->ilf_size = in_f32->ilf_size; +--- a/fs/xfs/xfs_iops.c ++++ b/fs/xfs/xfs_iops.c +@@ -20,6 +20,7 @@ + #include "xfs_symlink.h" + #include "xfs_dir2.h" + #include "xfs_iomap.h" ++#include "xfs_error.h" + + #include + #include +@@ -470,17 +471,20 @@ xfs_vn_get_link_inline( + struct inode *inode, + struct delayed_call *done) + { ++ struct xfs_inode *ip = XFS_I(inode); + char *link; + +- ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); ++ ASSERT(ip->i_df.if_flags & XFS_IFINLINE); + + /* + * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if + * if_data is junk. + */ +- link = XFS_I(inode)->i_df.if_u1.if_data; +- if (!link) ++ link = ip->i_df.if_u1.if_data; ++ if (!link) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ip->i_mount); + return ERR_PTR(-EFSCORRUPTED); ++ } + return link; + } + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -3537,6 +3537,7 @@ xfs_cui_copy_format( + memcpy(dst_cui_fmt, src_cui_fmt, len); + return 0; + } ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; + } + +@@ -3601,8 +3602,10 @@ xlog_recover_cud_pass2( + struct xfs_ail *ailp = log->l_ailp; + + cud_formatp = item->ri_buf[0].i_addr; +- if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) ++ if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; ++ } + cui_id = cud_formatp->cud_cui_id; + + /* +@@ -3654,6 +3657,7 @@ xfs_bui_copy_format( + memcpy(dst_bui_fmt, src_bui_fmt, len); + return 0; + } ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; + } + +@@ -3677,8 +3681,10 @@ xlog_recover_bui_pass2( + + bui_formatp = item->ri_buf[0].i_addr; + +- if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) ++ if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; ++ } + buip = xfs_bui_init(mp); + error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format); + if (error) { +@@ -3720,8 +3726,10 @@ xlog_recover_bud_pass2( + struct xfs_ail *ailp = log->l_ailp; + + bud_formatp = item->ri_buf[0].i_addr; +- if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) ++ if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; ++ } + bui_id = bud_formatp->bud_bui_id; + + /* +@@ -5181,8 +5189,10 @@ xlog_recover_process( + * If the filesystem is CRC enabled, this mismatch becomes a + * fatal log corruption failure. + */ +- if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) ++ if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); + return -EFSCORRUPTED; ++ } + } + + xlog_unpack_data(rhead, dp, log); +@@ -5305,8 +5315,11 @@ xlog_do_recovery_pass( + "invalid iclog size (%d bytes), using lsunit (%d bytes)", + h_size, log->l_mp->m_logbsize); + h_size = log->l_mp->m_logbsize; +- } else ++ } else { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ++ log->l_mp); + return -EFSCORRUPTED; ++ } + } + + if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && +--- a/fs/xfs/xfs_qm.c ++++ b/fs/xfs/xfs_qm.c +@@ -22,6 +22,7 @@ + #include "xfs_qm.h" + #include "xfs_trace.h" + #include "xfs_icache.h" ++#include "xfs_error.h" + + /* + * The global quota manager. There is only one of these for the entire +@@ -754,11 +755,19 @@ xfs_qm_qino_alloc( + if ((flags & XFS_QMOPT_PQUOTA) && + (mp->m_sb.sb_gquotino != NULLFSINO)) { + ino = mp->m_sb.sb_gquotino; +- ASSERT(mp->m_sb.sb_pquotino == NULLFSINO); ++ if (mp->m_sb.sb_pquotino != NULLFSINO) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ++ mp); ++ return -EFSCORRUPTED; ++ } + } else if ((flags & XFS_QMOPT_GQUOTA) && + (mp->m_sb.sb_pquotino != NULLFSINO)) { + ino = mp->m_sb.sb_pquotino; +- ASSERT(mp->m_sb.sb_gquotino == NULLFSINO); ++ if (mp->m_sb.sb_gquotino != NULLFSINO) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, ++ mp); ++ return -EFSCORRUPTED; ++ } + } + if (ino != NULLFSINO) { + error = xfs_iget(mp, NULL, ino, 0, 0, ip); +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -17,7 +17,7 @@ + #include "xfs_refcount_item.h" + #include "xfs_log.h" + #include "xfs_refcount.h" +- ++#include "xfs_error.h" + + kmem_zone_t *xfs_cui_zone; + kmem_zone_t *xfs_cud_zone; +@@ -536,6 +536,7 @@ xfs_cui_recover( + type = refc_type; + break; + default: ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + error = -EFSCORRUPTED; + goto abort_error; + } +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -17,7 +17,7 @@ + #include "xfs_rmap_item.h" + #include "xfs_log.h" + #include "xfs_rmap.h" +- ++#include "xfs_error.h" + + kmem_zone_t *xfs_rui_zone; + kmem_zone_t *xfs_rud_zone; +@@ -171,8 +171,10 @@ xfs_rui_copy_format( + src_rui_fmt = buf->i_addr; + len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); + +- if (buf->i_len != len) ++ if (buf->i_len != len) { ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + return -EFSCORRUPTED; ++ } + + memcpy(dst_rui_fmt, src_rui_fmt, len); + return 0; +@@ -581,6 +583,7 @@ xfs_rui_recover( + type = XFS_RMAP_FREE; + break; + default: ++ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); + error = -EFSCORRUPTED; + goto abort_error; + } diff --git a/queue-5.4/xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch b/queue-5.4/xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch new file mode 100644 index 00000000000..ec3333caf6f --- /dev/null +++ b/queue-5.4/xfs-attach-dquots-and-reserve-quota-blocks-during-unwritten-conversion.patch @@ -0,0 +1,57 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:44 +0530 +Subject: xfs: attach dquots and reserve quota blocks during unwritten conversion +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-8-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 2815a16d7ff6230a8e37928829d221bb075aa160 upstream. + +In xfs_iomap_write_unwritten, we need to ensure that dquots are attached +to the inode and quota blocks reserved so that we capture in the quota +counters any blocks allocated to handle a bmbt split. This can happen +on the first unwritten extent conversion to a preallocated sparse file +on a fresh mount. + +This was found by running generic/311 with quotas enabled. The bug +seems to have been introduced in "[XFS] rework iocore infrastructure, +remove some code and make it more" from ~2002? + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_iomap.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/fs/xfs/xfs_iomap.c ++++ b/fs/xfs/xfs_iomap.c +@@ -765,6 +765,11 @@ xfs_iomap_write_unwritten( + */ + resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; + ++ /* Attach dquots so that bmbt splits are accounted correctly. */ ++ error = xfs_qm_dqattach(ip); ++ if (error) ++ return error; ++ + do { + /* + * Set up a transaction to convert the range of extents +@@ -783,6 +788,11 @@ xfs_iomap_write_unwritten( + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + ++ error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, ++ XFS_QMOPT_RES_REGBLKS); ++ if (error) ++ goto error_on_bmapi_transaction; ++ + /* + * Modify the unwritten extent state of the buffer. + */ diff --git a/queue-5.4/xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch b/queue-5.4/xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch new file mode 100644 index 00000000000..0dc97a53a2c --- /dev/null +++ b/queue-5.4/xfs-constify-the-buffer-pointer-arguments-to-error-functions.patch @@ -0,0 +1,102 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:47 +0530 +Subject: xfs: constify the buffer pointer arguments to error functions +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-11-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit d243b89a611e83dc97ce7102419360677a664076 upstream. + +Some of the xfs error message functions take a pointer to a buffer that +will be dumped to the system log. The logging functions don't change +the contents, so constify all the parameters. This enables the next +patch to ensure that we log bad metadata when we encounter it. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Carlos Maiolino +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_error.c | 6 +++--- + fs/xfs/xfs_error.h | 6 +++--- + fs/xfs/xfs_message.c | 2 +- + fs/xfs/xfs_message.h | 2 +- + 4 files changed, 8 insertions(+), 8 deletions(-) + +--- a/fs/xfs/xfs_error.c ++++ b/fs/xfs/xfs_error.c +@@ -329,7 +329,7 @@ xfs_corruption_error( + const char *tag, + int level, + struct xfs_mount *mp, +- void *buf, ++ const void *buf, + size_t bufsize, + const char *filename, + int linenum, +@@ -350,7 +350,7 @@ xfs_buf_verifier_error( + struct xfs_buf *bp, + int error, + const char *name, +- void *buf, ++ const void *buf, + size_t bufsz, + xfs_failaddr_t failaddr) + { +@@ -402,7 +402,7 @@ xfs_inode_verifier_error( + struct xfs_inode *ip, + int error, + const char *name, +- void *buf, ++ const void *buf, + size_t bufsz, + xfs_failaddr_t failaddr) + { +--- a/fs/xfs/xfs_error.h ++++ b/fs/xfs/xfs_error.h +@@ -12,16 +12,16 @@ extern void xfs_error_report(const char + const char *filename, int linenum, + xfs_failaddr_t failaddr); + extern void xfs_corruption_error(const char *tag, int level, +- struct xfs_mount *mp, void *buf, size_t bufsize, ++ struct xfs_mount *mp, const void *buf, size_t bufsize, + const char *filename, int linenum, + xfs_failaddr_t failaddr); + extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error, +- const char *name, void *buf, size_t bufsz, ++ const char *name, const void *buf, size_t bufsz, + xfs_failaddr_t failaddr); + extern void xfs_verifier_error(struct xfs_buf *bp, int error, + xfs_failaddr_t failaddr); + extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error, +- const char *name, void *buf, size_t bufsz, ++ const char *name, const void *buf, size_t bufsz, + xfs_failaddr_t failaddr); + + #define XFS_ERROR_REPORT(e, lvl, mp) \ +--- a/fs/xfs/xfs_message.c ++++ b/fs/xfs/xfs_message.c +@@ -105,7 +105,7 @@ assfail(char *expr, char *file, int line + } + + void +-xfs_hex_dump(void *p, int length) ++xfs_hex_dump(const void *p, int length) + { + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); + } +--- a/fs/xfs/xfs_message.h ++++ b/fs/xfs/xfs_message.h +@@ -60,6 +60,6 @@ do { \ + extern void assfail(char *expr, char *f, int l); + extern void asswarn(char *expr, char *f, int l); + +-extern void xfs_hex_dump(void *p, int length); ++extern void xfs_hex_dump(const void *p, int length); + + #endif /* __XFS_MESSAGE_H */ diff --git a/queue-5.4/xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch b/queue-5.4/xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch new file mode 100644 index 00000000000..eff8a28af19 --- /dev/null +++ b/queue-5.4/xfs-convert-eio-to-efscorrupted-when-log-contents-are-invalid.patch @@ -0,0 +1,222 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:46 +0530 +Subject: xfs: convert EIO to EFSCORRUPTED when log contents are invalid +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-10-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 895e196fb6f84402dcd0c1d3c3feb8a58049564e upstream. + +Convert EIO to EFSCORRUPTED in the logging code when we can determine +that the log contents are invalid. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_bmap_item.c | 4 ++-- + fs/xfs/xfs_extfree_item.c | 2 +- + fs/xfs/xfs_log_recover.c | 32 ++++++++++++++++---------------- + fs/xfs/xfs_refcount_item.c | 2 +- + fs/xfs/xfs_rmap_item.c | 2 +- + 5 files changed, 21 insertions(+), 21 deletions(-) + +--- a/fs/xfs/xfs_bmap_item.c ++++ b/fs/xfs/xfs_bmap_item.c +@@ -456,7 +456,7 @@ xfs_bui_recover( + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) { + set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); + xfs_bui_release(buip); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* +@@ -490,7 +490,7 @@ xfs_bui_recover( + */ + set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); + xfs_bui_release(buip); +- return -EIO; ++ return -EFSCORRUPTED; + } + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, +--- a/fs/xfs/xfs_extfree_item.c ++++ b/fs/xfs/xfs_extfree_item.c +@@ -624,7 +624,7 @@ xfs_efi_recover( + */ + set_bit(XFS_EFI_RECOVERED, &efip->efi_flags); + xfs_efi_release(efip); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -471,7 +471,7 @@ xlog_find_verify_log_record( + xfs_warn(log->l_mp, + "Log inconsistent (didn't find previous header)"); + ASSERT(0); +- error = -EIO; ++ error = -EFSCORRUPTED; + goto out; + } + +@@ -1350,7 +1350,7 @@ xlog_find_tail( + return error; + if (!error) { + xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); +- return -EIO; ++ return -EFSCORRUPTED; + } + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); + +@@ -3166,7 +3166,7 @@ xlog_recover_inode_pass2( + default: + xfs_warn(log->l_mp, "%s: Invalid flag", __func__); + ASSERT(0); +- error = -EIO; ++ error = -EFSCORRUPTED; + goto out_release; + } + } +@@ -3247,12 +3247,12 @@ xlog_recover_dquot_pass2( + recddq = item->ri_buf[1].i_addr; + if (recddq == NULL) { + xfs_alert(log->l_mp, "NULL dquot in %s.", __func__); +- return -EIO; ++ return -EFSCORRUPTED; + } + if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { + xfs_alert(log->l_mp, "dquot too small (%d) in %s.", + item->ri_buf[1].i_len, __func__); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* +@@ -3279,7 +3279,7 @@ xlog_recover_dquot_pass2( + if (fa) { + xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS", + dq_f->qlf_id, fa); +- return -EIO; ++ return -EFSCORRUPTED; + } + ASSERT(dq_f->qlf_len == 1); + +@@ -4018,7 +4018,7 @@ xlog_recover_commit_pass1( + xfs_warn(log->l_mp, "%s: invalid item type (%d)", + __func__, ITEM_TYPE(item)); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + +@@ -4066,7 +4066,7 @@ xlog_recover_commit_pass2( + xfs_warn(log->l_mp, "%s: invalid item type (%d)", + __func__, ITEM_TYPE(item)); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + +@@ -4187,7 +4187,7 @@ xlog_recover_add_to_cont_trans( + ASSERT(len <= sizeof(struct xfs_trans_header)); + if (len > sizeof(struct xfs_trans_header)) { + xfs_warn(log->l_mp, "%s: bad header length", __func__); +- return -EIO; ++ return -EFSCORRUPTED; + } + + xlog_recover_add_item(&trans->r_itemq); +@@ -4243,13 +4243,13 @@ xlog_recover_add_to_trans( + xfs_warn(log->l_mp, "%s: bad header magic number", + __func__); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + + if (len > sizeof(struct xfs_trans_header)) { + xfs_warn(log->l_mp, "%s: bad header length", __func__); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* +@@ -4285,7 +4285,7 @@ xlog_recover_add_to_trans( + in_f->ilf_size); + ASSERT(0); + kmem_free(ptr); +- return -EIO; ++ return -EFSCORRUPTED; + } + + item->ri_total = in_f->ilf_size; +@@ -4389,7 +4389,7 @@ xlog_recovery_process_trans( + default: + xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); + ASSERT(0); +- error = -EIO; ++ error = -EFSCORRUPTED; + break; + } + if (error || freeit) +@@ -4469,7 +4469,7 @@ xlog_recover_process_ophdr( + xfs_warn(log->l_mp, "%s: bad clientid 0x%x", + __func__, ohead->oh_clientid); + ASSERT(0); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* +@@ -4479,7 +4479,7 @@ xlog_recover_process_ophdr( + if (dp + len > end) { + xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); + WARN_ON(1); +- return -EIO; ++ return -EFSCORRUPTED; + } + + trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); +@@ -5209,7 +5209,7 @@ xlog_valid_rec_header( + (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { + xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", + __func__, be32_to_cpu(rhead->h_version)); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* LR body must have data or it wouldn't have been written */ +--- a/fs/xfs/xfs_refcount_item.c ++++ b/fs/xfs/xfs_refcount_item.c +@@ -497,7 +497,7 @@ xfs_cui_recover( + */ + set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); + xfs_cui_release(cuip); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + +--- a/fs/xfs/xfs_rmap_item.c ++++ b/fs/xfs/xfs_rmap_item.c +@@ -539,7 +539,7 @@ xfs_rui_recover( + */ + set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); + xfs_rui_release(ruip); +- return -EIO; ++ return -EFSCORRUPTED; + } + } + diff --git a/queue-5.4/xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch b/queue-5.4/xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch new file mode 100644 index 00000000000..d1549fb32ed --- /dev/null +++ b/queue-5.4/xfs-don-t-commit-sunit-swidth-updates-to-disk-if-that-would-cause-repair-failures.patch @@ -0,0 +1,230 @@ +From foo@baz Mon Sep 26 08:48:29 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:54 +0530 +Subject: xfs: don't commit sunit/swidth updates to disk if that would cause repair failures +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-18-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 13eaec4b2adf2657b8167b67e27c97cc7314d923 upstream. + +Alex Lyakas reported[1] that mounting an xfs filesystem with new sunit +and swidth values could cause xfs_repair to fail loudly. The problem +here is that repair calculates the where mkfs should have allocated the +root inode, based on the superblock geometry. The allocation decisions +depend on sunit, which means that we really can't go updating sunit if +it would lead to a subsequent repair failure on an otherwise correct +filesystem. + +Port from xfs_repair some code that computes the location of the root +inode and teach mount to skip the ondisk update if it would cause +problems for repair. Along the way we'll update the documentation, +provide a function for computing the minimum AGFL size instead of +open-coding it, and cut down some indenting in the mount code. + +Note that we allow the mount to proceed (and new allocations will +reflect this new geometry) because we've never screened this kind of +thing before. We'll have to wait for a new future incompat feature to +enforce correct behavior, alas. + +Note that the geometry reporting always uses the superblock values, not +the incore ones, so that is what xfs_info and xfs_growfs will report. + +[1] https://lore.kernel.org/linux-xfs/20191125130744.GA44777@bfoster/T/#m00f9594b511e076e2fcdd489d78bc30216d72a7d + +Reported-by: Alex Lyakas +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_ialloc.c | 64 +++++++++++++++++++++++++++++++++++++++++++++ + fs/xfs/libxfs/xfs_ialloc.h | 1 + fs/xfs/xfs_mount.c | 45 ++++++++++++++++++++++++++++++- + fs/xfs/xfs_trace.h | 21 ++++++++++++++ + 4 files changed, 130 insertions(+), 1 deletion(-) + +--- a/fs/xfs/libxfs/xfs_ialloc.c ++++ b/fs/xfs/libxfs/xfs_ialloc.c +@@ -2854,3 +2854,67 @@ xfs_ialloc_setup_geometry( + else + igeo->ialloc_align = 0; + } ++ ++/* Compute the location of the root directory inode that is laid out by mkfs. */ ++xfs_ino_t ++xfs_ialloc_calc_rootino( ++ struct xfs_mount *mp, ++ int sunit) ++{ ++ struct xfs_ino_geometry *igeo = M_IGEO(mp); ++ xfs_agblock_t first_bno; ++ ++ /* ++ * Pre-calculate the geometry of AG 0. We know what it looks like ++ * because libxfs knows how to create allocation groups now. ++ * ++ * first_bno is the first block in which mkfs could possibly have ++ * allocated the root directory inode, once we factor in the metadata ++ * that mkfs formats before it. Namely, the four AG headers... ++ */ ++ first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize); ++ ++ /* ...the two free space btree roots... */ ++ first_bno += 2; ++ ++ /* ...the inode btree root... */ ++ first_bno += 1; ++ ++ /* ...the initial AGFL... */ ++ first_bno += xfs_alloc_min_freelist(mp, NULL); ++ ++ /* ...the free inode btree root... */ ++ if (xfs_sb_version_hasfinobt(&mp->m_sb)) ++ first_bno++; ++ ++ /* ...the reverse mapping btree root... */ ++ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) ++ first_bno++; ++ ++ /* ...the reference count btree... */ ++ if (xfs_sb_version_hasreflink(&mp->m_sb)) ++ first_bno++; ++ ++ /* ++ * ...and the log, if it is allocated in the first allocation group. ++ * ++ * This can happen with filesystems that only have a single ++ * allocation group, or very odd geometries created by old mkfs ++ * versions on very small filesystems. ++ */ ++ if (mp->m_sb.sb_logstart && ++ XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) ++ first_bno += mp->m_sb.sb_logblocks; ++ ++ /* ++ * Now round first_bno up to whatever allocation alignment is given ++ * by the filesystem or was passed in. ++ */ ++ if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0) ++ first_bno = roundup(first_bno, sunit); ++ else if (xfs_sb_version_hasalign(&mp->m_sb) && ++ mp->m_sb.sb_inoalignmt > 1) ++ first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt); ++ ++ return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno)); ++} +--- a/fs/xfs/libxfs/xfs_ialloc.h ++++ b/fs/xfs/libxfs/xfs_ialloc.h +@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btre + + int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); + void xfs_ialloc_setup_geometry(struct xfs_mount *mp); ++xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); + + #endif /* __XFS_IALLOC_H__ */ +--- a/fs/xfs/xfs_mount.c ++++ b/fs/xfs/xfs_mount.c +@@ -31,7 +31,7 @@ + #include "xfs_reflink.h" + #include "xfs_extent_busy.h" + #include "xfs_health.h" +- ++#include "xfs_trace.h" + + static DEFINE_MUTEX(xfs_uuid_table_mutex); + static int xfs_uuid_table_size; +@@ -365,6 +365,42 @@ release_buf: + } + + /* ++ * If the sunit/swidth change would move the precomputed root inode value, we ++ * must reject the ondisk change because repair will stumble over that. ++ * However, we allow the mount to proceed because we never rejected this ++ * combination before. Returns true to update the sb, false otherwise. ++ */ ++static inline int ++xfs_check_new_dalign( ++ struct xfs_mount *mp, ++ int new_dalign, ++ bool *update_sb) ++{ ++ struct xfs_sb *sbp = &mp->m_sb; ++ xfs_ino_t calc_ino; ++ ++ calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign); ++ trace_xfs_check_new_dalign(mp, new_dalign, calc_ino); ++ ++ if (sbp->sb_rootino == calc_ino) { ++ *update_sb = true; ++ return 0; ++ } ++ ++ xfs_warn(mp, ++"Cannot change stripe alignment; would require moving root inode."); ++ ++ /* ++ * XXX: Next time we add a new incompat feature, this should start ++ * returning -EINVAL to fail the mount. Until then, spit out a warning ++ * that we're ignoring the administrator's instructions. ++ */ ++ xfs_warn(mp, "Skipping superblock stripe alignment update."); ++ *update_sb = false; ++ return 0; ++} ++ ++/* + * If we were provided with new sunit/swidth values as mount options, make sure + * that they pass basic alignment and superblock feature checks, and convert + * them into the same units (FSB) that everything else expects. This step +@@ -424,10 +460,17 @@ xfs_update_alignment( + struct xfs_sb *sbp = &mp->m_sb; + + if (mp->m_dalign) { ++ bool update_sb; ++ int error; ++ + if (sbp->sb_unit == mp->m_dalign && + sbp->sb_width == mp->m_swidth) + return 0; + ++ error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb); ++ if (error || !update_sb) ++ return error; ++ + sbp->sb_unit = mp->m_dalign; + sbp->sb_width = mp->m_swidth; + mp->m_update_sb = true; +--- a/fs/xfs/xfs_trace.h ++++ b/fs/xfs/xfs_trace.h +@@ -3609,6 +3609,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large); + DEFINE_KMEM_EVENT(kmem_realloc); + DEFINE_KMEM_EVENT(kmem_zone_alloc); + ++TRACE_EVENT(xfs_check_new_dalign, ++ TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino), ++ TP_ARGS(mp, new_dalign, calc_rootino), ++ TP_STRUCT__entry( ++ __field(dev_t, dev) ++ __field(int, new_dalign) ++ __field(xfs_ino_t, sb_rootino) ++ __field(xfs_ino_t, calc_rootino) ++ ), ++ TP_fast_assign( ++ __entry->dev = mp->m_super->s_dev; ++ __entry->new_dalign = new_dalign; ++ __entry->sb_rootino = mp->m_sb.sb_rootino; ++ __entry->calc_rootino = calc_rootino; ++ ), ++ TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu", ++ MAJOR(__entry->dev), MINOR(__entry->dev), ++ __entry->new_dalign, __entry->sb_rootino, ++ __entry->calc_rootino) ++) ++ + #endif /* _TRACE_XFS_H */ + + #undef TRACE_INCLUDE_PATH diff --git a/queue-5.4/xfs-fix-an-abba-deadlock-in-xfs_rename.patch b/queue-5.4/xfs-fix-an-abba-deadlock-in-xfs_rename.patch new file mode 100644 index 00000000000..494b21137bd --- /dev/null +++ b/queue-5.4/xfs-fix-an-abba-deadlock-in-xfs_rename.patch @@ -0,0 +1,127 @@ +From foo@baz Mon Sep 26 08:48:29 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:55 +0530 +Subject: xfs: fix an ABBA deadlock in xfs_rename +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-19-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 6da1b4b1ab36d80a3994fd4811c8381de10af604 upstream. + +When overlayfs is running on top of xfs and the user unlinks a file in +the overlay, overlayfs will create a whiteout inode and ask xfs to +"rename" the whiteout file atop the one being unlinked. If the file +being unlinked loses its one nlink, we then have to put the inode on the +unlinked list. + +This requires us to grab the AGI buffer of the whiteout inode to take it +off the unlinked list (which is where whiteouts are created) and to grab +the AGI buffer of the file being deleted. If the whiteout was created +in a higher numbered AG than the file being deleted, we'll lock the AGIs +in the wrong order and deadlock. + +Therefore, grab all the AGI locks we think we'll need ahead of time, and +in order of increasing AG number per the locking rules. + +Reported-by: wenli xie +Fixes: 93597ae8dac0 ("xfs: Fix deadlock between AGI and AGF when target_ip exists in xfs_rename()") +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_dir2.h | 2 -- + fs/xfs/libxfs/xfs_dir2_sf.c | 2 +- + fs/xfs/xfs_inode.c | 42 +++++++++++++++++++++++++----------------- + 3 files changed, 26 insertions(+), 20 deletions(-) + +--- a/fs/xfs/libxfs/xfs_dir2.h ++++ b/fs/xfs/libxfs/xfs_dir2.h +@@ -124,8 +124,6 @@ extern int xfs_dir_lookup(struct xfs_tra + extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_name *name, xfs_ino_t ino, + xfs_extlen_t tot); +-extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp, +- xfs_ino_t inum); + extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_name *name, xfs_ino_t inum, + xfs_extlen_t tot); +--- a/fs/xfs/libxfs/xfs_dir2_sf.c ++++ b/fs/xfs/libxfs/xfs_dir2_sf.c +@@ -947,7 +947,7 @@ xfs_dir2_sf_removename( + /* + * Check whether the sf dir replace operation need more blocks. + */ +-bool ++static bool + xfs_dir2_sf_replace_needblock( + struct xfs_inode *dp, + xfs_ino_t inum) +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -3224,7 +3224,7 @@ xfs_rename( + struct xfs_trans *tp; + struct xfs_inode *wip = NULL; /* whiteout inode */ + struct xfs_inode *inodes[__XFS_SORT_INODES]; +- struct xfs_buf *agibp; ++ int i; + int num_inodes = __XFS_SORT_INODES; + bool new_parent = (src_dp != target_dp); + bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); +@@ -3337,6 +3337,30 @@ xfs_rename( + } + + /* ++ * Lock the AGI buffers we need to handle bumping the nlink of the ++ * whiteout inode off the unlinked list and to handle dropping the ++ * nlink of the target inode. Per locking order rules, do this in ++ * increasing AG order and before directory block allocation tries to ++ * grab AGFs because we grab AGIs before AGFs. ++ * ++ * The (vfs) caller must ensure that if src is a directory then ++ * target_ip is either null or an empty directory. ++ */ ++ for (i = 0; i < num_inodes && inodes[i] != NULL; i++) { ++ if (inodes[i] == wip || ++ (inodes[i] == target_ip && ++ (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) { ++ struct xfs_buf *bp; ++ xfs_agnumber_t agno; ++ ++ agno = XFS_INO_TO_AGNO(mp, inodes[i]->i_ino); ++ error = xfs_read_agi(mp, tp, agno, &bp); ++ if (error) ++ goto out_trans_cancel; ++ } ++ } ++ ++ /* + * Directory entry creation below may acquire the AGF. Remove + * the whiteout from the unlinked list first to preserve correct + * AGI/AGF locking order. This dirties the transaction so failures +@@ -3389,22 +3413,6 @@ xfs_rename( + * In case there is already an entry with the same + * name at the destination directory, remove it first. + */ +- +- /* +- * Check whether the replace operation will need to allocate +- * blocks. This happens when the shortform directory lacks +- * space and we have to convert it to a block format directory. +- * When more blocks are necessary, we must lock the AGI first +- * to preserve locking order (AGI -> AGF). +- */ +- if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) { +- error = xfs_read_agi(mp, tp, +- XFS_INO_TO_AGNO(mp, target_ip->i_ino), +- &agibp); +- if (error) +- goto out_trans_cancel; +- } +- + error = xfs_dir_replace(tp, target_dp, target_name, + src_ip->i_ino, spaceres); + if (error) diff --git a/queue-5.4/xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch b/queue-5.4/xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch new file mode 100644 index 00000000000..cb9ddc5a845 --- /dev/null +++ b/queue-5.4/xfs-fix-deadlock-between-agi-and-agf-when-target_ip-exists-in-xfs_rename.patch @@ -0,0 +1,133 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:45 +0530 +Subject: xfs: Fix deadlock between AGI and AGF when target_ip exists in xfs_rename() +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-9-chandan.babu@oracle.com> + +From: kaixuxia + +commit 93597ae8dac0149b5c00b787cba6bf7ba213e666 upstream. + +When target_ip exists in xfs_rename(), the xfs_dir_replace() call may +need to hold the AGF lock to allocate more blocks, and then invoking +the xfs_droplink() call to hold AGI lock to drop target_ip onto the +unlinked list, so we get the lock order AGF->AGI. This would break the +ordering constraint on AGI and AGF locking - inode allocation locks +the AGI, then can allocate a new extent for new inodes, locking the +AGF after the AGI. + +In this patch we check whether the replace operation need more +blocks firstly. If so, acquire the agi lock firstly to preserve +locking order(AGI/AGF). Actually, the locking order problem only +occurs when we are locking the AGI/AGF of the same AG. For multiple +AGs the AGI lock will be released after the transaction committed. + +Signed-off-by: kaixuxia +Reviewed-by: Darrick J. Wong +[darrick: reword the comment] +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_dir2.h | 2 ++ + fs/xfs/libxfs/xfs_dir2_sf.c | 28 +++++++++++++++++++++++----- + fs/xfs/xfs_inode.c | 17 +++++++++++++++++ + 3 files changed, 42 insertions(+), 5 deletions(-) + +--- a/fs/xfs/libxfs/xfs_dir2.h ++++ b/fs/xfs/libxfs/xfs_dir2.h +@@ -124,6 +124,8 @@ extern int xfs_dir_lookup(struct xfs_tra + extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_name *name, xfs_ino_t ino, + xfs_extlen_t tot); ++extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp, ++ xfs_ino_t inum); + extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_name *name, xfs_ino_t inum, + xfs_extlen_t tot); +--- a/fs/xfs/libxfs/xfs_dir2_sf.c ++++ b/fs/xfs/libxfs/xfs_dir2_sf.c +@@ -945,6 +945,27 @@ xfs_dir2_sf_removename( + } + + /* ++ * Check whether the sf dir replace operation need more blocks. ++ */ ++bool ++xfs_dir2_sf_replace_needblock( ++ struct xfs_inode *dp, ++ xfs_ino_t inum) ++{ ++ int newsize; ++ struct xfs_dir2_sf_hdr *sfp; ++ ++ if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL) ++ return false; ++ ++ sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data; ++ newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF; ++ ++ return inum > XFS_DIR2_MAX_SHORT_INUM && ++ sfp->i8count == 0 && newsize > XFS_IFORK_DSIZE(dp); ++} ++ ++/* + * Replace the inode number of an entry in a shortform directory. + */ + int /* error */ +@@ -980,17 +1001,14 @@ xfs_dir2_sf_replace( + */ + if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) { + int error; /* error return value */ +- int newsize; /* new inode size */ + +- newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF; + /* + * Won't fit as shortform, convert to block then do replace. + */ +- if (newsize > XFS_IFORK_DSIZE(dp)) { ++ if (xfs_dir2_sf_replace_needblock(dp, args->inumber)) { + error = xfs_dir2_sf_to_block(args); +- if (error) { ++ if (error) + return error; +- } + return xfs_dir2_block_replace(args); + } + /* +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -3215,6 +3215,7 @@ xfs_rename( + struct xfs_trans *tp; + struct xfs_inode *wip = NULL; /* whiteout inode */ + struct xfs_inode *inodes[__XFS_SORT_INODES]; ++ struct xfs_buf *agibp; + int num_inodes = __XFS_SORT_INODES; + bool new_parent = (src_dp != target_dp); + bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); +@@ -3379,6 +3380,22 @@ xfs_rename( + * In case there is already an entry with the same + * name at the destination directory, remove it first. + */ ++ ++ /* ++ * Check whether the replace operation will need to allocate ++ * blocks. This happens when the shortform directory lacks ++ * space and we have to convert it to a block format directory. ++ * When more blocks are necessary, we must lock the AGI first ++ * to preserve locking order (AGI -> AGF). ++ */ ++ if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) { ++ error = xfs_read_agi(mp, tp, ++ XFS_INO_TO_AGNO(mp, target_ip->i_ino), ++ &agibp); ++ if (error) ++ goto out_trans_cancel; ++ } ++ + error = xfs_dir_replace(tp, target_dp, target_name, + src_ip->i_ino, spaceres); + if (error) diff --git a/queue-5.4/xfs-fix-some-memory-leaks-in-log-recovery.patch b/queue-5.4/xfs-fix-some-memory-leaks-in-log-recovery.patch new file mode 100644 index 00000000000..975068a3933 --- /dev/null +++ b/queue-5.4/xfs-fix-some-memory-leaks-in-log-recovery.patch @@ -0,0 +1,50 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:49 +0530 +Subject: xfs: fix some memory leaks in log recovery +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-13-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 050552cbe06a3a9c3f977dcf11ff998ae1d5c2d5 upstream. + +Fix a few places where we xlog_alloc_buffer a buffer, hit an error, and +then bail out without freeing the buffer. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_recover.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -1347,10 +1347,11 @@ xlog_find_tail( + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, + &rhead_blk, &rhead, &wrapped); + if (error < 0) +- return error; ++ goto done; + if (!error) { + xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); +- return -EFSCORRUPTED; ++ error = -EFSCORRUPTED; ++ goto done; + } + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); + +@@ -5318,7 +5319,8 @@ xlog_do_recovery_pass( + } else { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, + log->l_mp); +- return -EFSCORRUPTED; ++ error = -EFSCORRUPTED; ++ goto bread_err1; + } + } + diff --git a/queue-5.4/xfs-fix-use-after-free-when-aborting-corrupt-attr-inactivation.patch b/queue-5.4/xfs-fix-use-after-free-when-aborting-corrupt-attr-inactivation.patch new file mode 100644 index 00000000000..2e987a8343d --- /dev/null +++ b/queue-5.4/xfs-fix-use-after-free-when-aborting-corrupt-attr-inactivation.patch @@ -0,0 +1,37 @@ +From foo@baz Mon Sep 26 08:48:29 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:56 +0530 +Subject: xfs: fix use-after-free when aborting corrupt attr inactivation +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-20-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 496b9bcd62b0b3a160be61e3265a086f97adcbd3 upstream. + +Log the corrupt buffer before we release the buffer. + +Fixes: a5155b870d687 ("xfs: always log corruption errors") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_attr_inactive.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/xfs/xfs_attr_inactive.c ++++ b/fs/xfs/xfs_attr_inactive.c +@@ -209,8 +209,8 @@ xfs_attr3_node_inactive( + * Since this code is recursive (gasp!) we must protect ourselves. + */ + if (level > XFS_DA_NODE_MAXDEPTH) { +- xfs_trans_brelse(*trans, bp); /* no locks for later trans */ + xfs_buf_corruption_error(bp); ++ xfs_trans_brelse(*trans, bp); /* no locks for later trans */ + return -EFSCORRUPTED; + } + diff --git a/queue-5.4/xfs-range-check-ri_cnt-when-recovering-log-items.patch b/queue-5.4/xfs-range-check-ri_cnt-when-recovering-log-items.patch new file mode 100644 index 00000000000..a33bf477b41 --- /dev/null +++ b/queue-5.4/xfs-range-check-ri_cnt-when-recovering-log-items.patch @@ -0,0 +1,47 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:43 +0530 +Subject: xfs: range check ri_cnt when recovering log items +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-7-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit d6abecb82573fed5f7e4b595b5c0bd37707d2848 upstream. + +Range check the region counter when we're reassembling regions from log +items during log recovery. In the old days ASSERT would halt the +kernel, but this isn't true any more so we have to make an explicit +error return. + +Coverity-id: 1132508 +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_recover.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -4293,7 +4293,16 @@ xlog_recover_add_to_trans( + kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), + 0); + } +- ASSERT(item->ri_total > item->ri_cnt); ++ ++ if (item->ri_total <= item->ri_cnt) { ++ xfs_warn(log->l_mp, ++ "log item region count (%d) overflowed size (%d)", ++ item->ri_cnt, item->ri_total); ++ ASSERT(0); ++ kmem_free(ptr); ++ return -EFSCORRUPTED; ++ } ++ + /* Description region is ri_buf[0] */ + item->ri_buf[item->ri_cnt].i_addr = ptr; + item->ri_buf[item->ri_cnt].i_len = len; diff --git a/queue-5.4/xfs-refactor-agfl-length-computation-function.patch b/queue-5.4/xfs-refactor-agfl-length-computation-function.patch new file mode 100644 index 00000000000..5131aba6449 --- /dev/null +++ b/queue-5.4/xfs-refactor-agfl-length-computation-function.patch @@ -0,0 +1,65 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:52 +0530 +Subject: xfs: refactor agfl length computation function +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-16-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 1cac233cfe71f21e069705a4930c18e48d897be6 upstream. + +Refactor xfs_alloc_min_freelist to accept a NULL @pag argument, in which +case it returns the largest possible minimum length. This will be used +in an upcoming patch to compute the length of the AGFL at mkfs time. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_alloc.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -1998,24 +1998,32 @@ xfs_alloc_longest_free_extent( + return pag->pagf_flcount > 0 || pag->pagf_longest > 0; + } + ++/* ++ * Compute the minimum length of the AGFL in the given AG. If @pag is NULL, ++ * return the largest possible minimum length. ++ */ + unsigned int + xfs_alloc_min_freelist( + struct xfs_mount *mp, + struct xfs_perag *pag) + { ++ /* AG btrees have at least 1 level. */ ++ static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1}; ++ const uint8_t *levels = pag ? pag->pagf_levels : fake_levels; + unsigned int min_free; + ++ ASSERT(mp->m_ag_maxlevels > 0); ++ + /* space needed by-bno freespace btree */ +- min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, ++ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, + mp->m_ag_maxlevels); + /* space needed by-size freespace btree */ +- min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, ++ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, + mp->m_ag_maxlevels); + /* space needed reverse mapping used space btree */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) +- min_free += min_t(unsigned int, +- pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, +- mp->m_rmap_maxlevels); ++ min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, ++ mp->m_rmap_maxlevels); + + return min_free; + } diff --git a/queue-5.4/xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch b/queue-5.4/xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch new file mode 100644 index 00000000000..37de649067f --- /dev/null +++ b/queue-5.4/xfs-replace-eio-with-efscorrupted-for-corrupt-metadata.patch @@ -0,0 +1,96 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:40 +0530 +Subject: xfs: replace -EIO with -EFSCORRUPTED for corrupt metadata +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-4-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit c2414ad6e66ab96b867309454498f7fb29b7e855 upstream. + +There are a few places where we return -EIO instead of -EFSCORRUPTED +when we find corrupt metadata. Fix those places. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_bmap.c | 6 +++--- + fs/xfs/xfs_attr_inactive.c | 6 +++--- + fs/xfs/xfs_dquot.c | 2 +- + 3 files changed, 7 insertions(+), 7 deletions(-) + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -1374,7 +1374,7 @@ xfs_bmap_last_before( + case XFS_DINODE_FMT_EXTENTS: + break; + default: +- return -EIO; ++ return -EFSCORRUPTED; + } + + if (!(ifp->if_flags & XFS_IFEXTENTS)) { +@@ -1475,7 +1475,7 @@ xfs_bmap_last_offset( + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) +- return -EIO; ++ return -EFSCORRUPTED; + + error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); + if (error || is_empty) +@@ -5872,7 +5872,7 @@ xfs_bmap_insert_extents( + del_cursor); + + if (stop_fsb >= got.br_startoff + got.br_blockcount) { +- error = -EIO; ++ error = -EFSCORRUPTED; + goto del_cursor; + } + +--- a/fs/xfs/xfs_attr_inactive.c ++++ b/fs/xfs/xfs_attr_inactive.c +@@ -209,7 +209,7 @@ xfs_attr3_node_inactive( + */ + if (level > XFS_DA_NODE_MAXDEPTH) { + xfs_trans_brelse(*trans, bp); /* no locks for later trans */ +- return -EIO; ++ return -EFSCORRUPTED; + } + + node = bp->b_addr; +@@ -258,7 +258,7 @@ xfs_attr3_node_inactive( + error = xfs_attr3_leaf_inactive(trans, dp, child_bp); + break; + default: +- error = -EIO; ++ error = -EFSCORRUPTED; + xfs_trans_brelse(*trans, child_bp); + break; + } +@@ -341,7 +341,7 @@ xfs_attr3_root_inactive( + error = xfs_attr3_leaf_inactive(trans, dp, bp); + break; + default: +- error = -EIO; ++ error = -EFSCORRUPTED; + xfs_trans_brelse(*trans, bp); + break; + } +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -1125,7 +1125,7 @@ xfs_qm_dqflush( + xfs_buf_relse(bp); + xfs_dqfunlock(dqp); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); +- return -EIO; ++ return -EFSCORRUPTED; + } + + /* This is the only portion of data that needs to persist */ diff --git a/queue-5.4/xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch b/queue-5.4/xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch new file mode 100644 index 00000000000..0f994f70dc0 --- /dev/null +++ b/queue-5.4/xfs-slightly-tweak-an-assert-in-xfs_fs_map_blocks.patch @@ -0,0 +1,41 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:41 +0530 +Subject: xfs: slightly tweak an assert in xfs_fs_map_blocks +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-5-chandan.babu@oracle.com> + +From: Christoph Hellwig + +commit 88cdb7147b21b2d8b4bd3f3d95ce0bffd73e1ac3 upstream. + +We should never see delalloc blocks for a pNFS layout, write or not. +Adjust the assert to check for that. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_pnfs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/xfs/xfs_pnfs.c ++++ b/fs/xfs/xfs_pnfs.c +@@ -147,11 +147,11 @@ xfs_fs_map_blocks( + if (error) + goto out_unlock; + ++ ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); ++ + if (write) { + enum xfs_prealloc_flags flags = 0; + +- ASSERT(imap.br_startblock != DELAYSTARTBLOCK); +- + if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { + /* + * xfs_iomap_write_direct() expects to take ownership of diff --git a/queue-5.4/xfs-split-the-sunit-parameter-update-into-two-parts.patch b/queue-5.4/xfs-split-the-sunit-parameter-update-into-two-parts.patch new file mode 100644 index 00000000000..60dfad24f6b --- /dev/null +++ b/queue-5.4/xfs-split-the-sunit-parameter-update-into-two-parts.patch @@ -0,0 +1,190 @@ +From foo@baz Mon Sep 26 08:48:29 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:53 +0530 +Subject: xfs: split the sunit parameter update into two parts +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-17-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 4f5b1b3a8fa07dc8ecedfaf539b3deed8931a73e upstream. + +If the administrator provided a sunit= mount option, we need to validate +the raw parameter, convert the mount option units (512b blocks) into the +internal unit (fs blocks), and then validate that the (now cooked) +parameter doesn't screw anything up on disk. The incore inode geometry +computation can depend on the new sunit option, but a subsequent patch +will make validating the cooked value depends on the computed inode +geometry, so break the sunit update into two steps. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_mount.c | 123 +++++++++++++++++++++++++++++++---------------------- + 1 file changed, 72 insertions(+), 51 deletions(-) + +--- a/fs/xfs/xfs_mount.c ++++ b/fs/xfs/xfs_mount.c +@@ -365,66 +365,76 @@ release_buf: + } + + /* +- * Update alignment values based on mount options and sb values ++ * If we were provided with new sunit/swidth values as mount options, make sure ++ * that they pass basic alignment and superblock feature checks, and convert ++ * them into the same units (FSB) that everything else expects. This step ++ * /must/ be done before computing the inode geometry. + */ + STATIC int +-xfs_update_alignment(xfs_mount_t *mp) ++xfs_validate_new_dalign( ++ struct xfs_mount *mp) + { +- xfs_sb_t *sbp = &(mp->m_sb); ++ if (mp->m_dalign == 0) ++ return 0; + +- if (mp->m_dalign) { ++ /* ++ * If stripe unit and stripe width are not multiples ++ * of the fs blocksize turn off alignment. ++ */ ++ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || ++ (BBTOB(mp->m_swidth) & mp->m_blockmask)) { ++ xfs_warn(mp, ++ "alignment check failed: sunit/swidth vs. blocksize(%d)", ++ mp->m_sb.sb_blocksize); ++ return -EINVAL; ++ } else { + /* +- * If stripe unit and stripe width are not multiples +- * of the fs blocksize turn off alignment. ++ * Convert the stripe unit and width to FSBs. + */ +- if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || +- (BBTOB(mp->m_swidth) & mp->m_blockmask)) { ++ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); ++ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) { + xfs_warn(mp, +- "alignment check failed: sunit/swidth vs. blocksize(%d)", +- sbp->sb_blocksize); ++ "alignment check failed: sunit/swidth vs. agsize(%d)", ++ mp->m_sb.sb_agblocks); + return -EINVAL; +- } else { +- /* +- * Convert the stripe unit and width to FSBs. +- */ +- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); +- if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { +- xfs_warn(mp, +- "alignment check failed: sunit/swidth vs. agsize(%d)", +- sbp->sb_agblocks); +- return -EINVAL; +- } else if (mp->m_dalign) { +- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); +- } else { +- xfs_warn(mp, +- "alignment check failed: sunit(%d) less than bsize(%d)", +- mp->m_dalign, sbp->sb_blocksize); +- return -EINVAL; +- } +- } +- +- /* +- * Update superblock with new values +- * and log changes +- */ +- if (xfs_sb_version_hasdalign(sbp)) { +- if (sbp->sb_unit != mp->m_dalign) { +- sbp->sb_unit = mp->m_dalign; +- mp->m_update_sb = true; +- } +- if (sbp->sb_width != mp->m_swidth) { +- sbp->sb_width = mp->m_swidth; +- mp->m_update_sb = true; +- } ++ } else if (mp->m_dalign) { ++ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); + } else { + xfs_warn(mp, +- "cannot change alignment: superblock does not support data alignment"); ++ "alignment check failed: sunit(%d) less than bsize(%d)", ++ mp->m_dalign, mp->m_sb.sb_blocksize); + return -EINVAL; + } ++ } ++ ++ if (!xfs_sb_version_hasdalign(&mp->m_sb)) { ++ xfs_warn(mp, ++"cannot change alignment: superblock does not support data alignment"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/* Update alignment values based on mount options and sb values. */ ++STATIC int ++xfs_update_alignment( ++ struct xfs_mount *mp) ++{ ++ struct xfs_sb *sbp = &mp->m_sb; ++ ++ if (mp->m_dalign) { ++ if (sbp->sb_unit == mp->m_dalign && ++ sbp->sb_width == mp->m_swidth) ++ return 0; ++ ++ sbp->sb_unit = mp->m_dalign; ++ sbp->sb_width = mp->m_swidth; ++ mp->m_update_sb = true; + } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && + xfs_sb_version_hasdalign(&mp->m_sb)) { +- mp->m_dalign = sbp->sb_unit; +- mp->m_swidth = sbp->sb_width; ++ mp->m_dalign = sbp->sb_unit; ++ mp->m_swidth = sbp->sb_width; + } + + return 0; +@@ -692,12 +702,12 @@ xfs_mountfs( + } + + /* +- * Check if sb_agblocks is aligned at stripe boundary +- * If sb_agblocks is NOT aligned turn off m_dalign since +- * allocator alignment is within an ag, therefore ag has +- * to be aligned at stripe boundary. ++ * If we were given new sunit/swidth options, do some basic validation ++ * checks and convert the incore dalign and swidth values to the ++ * same units (FSB) that everything else uses. This /must/ happen ++ * before computing the inode geometry. + */ +- error = xfs_update_alignment(mp); ++ error = xfs_validate_new_dalign(mp); + if (error) + goto out; + +@@ -708,6 +718,17 @@ xfs_mountfs( + xfs_rmapbt_compute_maxlevels(mp); + xfs_refcountbt_compute_maxlevels(mp); + ++ /* ++ * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks ++ * is NOT aligned turn off m_dalign since allocator alignment is within ++ * an ag, therefore ag has to be aligned at stripe boundary. Note that ++ * we must compute the free space and rmap btree geometry before doing ++ * this. ++ */ ++ error = xfs_update_alignment(mp); ++ if (error) ++ goto out; ++ + /* enable fail_at_unmount as default */ + mp->m_fail_unmount = true; + diff --git a/queue-5.4/xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch b/queue-5.4/xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch new file mode 100644 index 00000000000..c2e880ee966 --- /dev/null +++ b/queue-5.4/xfs-stabilize-insert-range-start-boundary-to-avoid-cow-writeback-race.patch @@ -0,0 +1,96 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:50 +0530 +Subject: xfs: stabilize insert range start boundary to avoid COW writeback race +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-14-chandan.babu@oracle.com> + +From: Brian Foster + +commit d0c2204135a0cdbc607c94c481cf1ccb2f659aa7 upstream. + +generic/522 (fsx) occasionally fails with a file corruption due to +an insert range operation. The primary characteristic of the +corruption is a misplaced insert range operation that differs from +the requested target offset. The reason for this behavior is a race +between the extent shift sequence of an insert range and a COW +writeback completion that causes a front merge with the first extent +in the shift. + +The shift preparation function flushes and unmaps from the target +offset of the operation to the end of the file to ensure no +modifications can be made and page cache is invalidated before file +data is shifted. An insert range operation then splits the extent at +the target offset, if necessary, and begins to shift the start +offset of each extent starting from the end of the file to the start +offset. The shift sequence operates at extent level and so depends +on the preparation sequence to guarantee no changes can be made to +the target range during the shift. If the block immediately prior to +the target offset was dirty and shared, however, it can undergo +writeback and move from the COW fork to the data fork at any point +during the shift. If the block is contiguous with the block at the +start offset of the insert range, it can front merge and alter the +start offset of the extent. Once the shift sequence reaches the +target offset, it shifts based on the latest start offset and +silently changes the target offset of the operation and corrupts the +file. + +To address this problem, update the shift preparation code to +stabilize the start boundary along with the full range of the +insert. Also update the existing corruption check to fail if any +extent is shifted with a start offset behind the target offset of +the insert range. This prevents insert from racing with COW +writeback completion and fails loudly in the event of an unexpected +extent shift. + +Signed-off-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_bmap.c | 2 +- + fs/xfs/xfs_bmap_util.c | 12 ++++++++++++ + 2 files changed, 13 insertions(+), 1 deletion(-) + +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -5876,7 +5876,7 @@ xfs_bmap_insert_extents( + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); + +- if (stop_fsb >= got.br_startoff + got.br_blockcount) { ++ if (stop_fsb > got.br_startoff) { + ASSERT(0); + error = -EFSCORRUPTED; + goto del_cursor; +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -1167,6 +1167,7 @@ xfs_prepare_shift( + struct xfs_inode *ip, + loff_t offset) + { ++ struct xfs_mount *mp = ip->i_mount; + int error; + + /* +@@ -1180,6 +1181,17 @@ xfs_prepare_shift( + } + + /* ++ * Shift operations must stabilize the start block offset boundary along ++ * with the full range of the operation. If we don't, a COW writeback ++ * completion could race with an insert, front merge with the start ++ * extent (after split) during the shift and corrupt the file. Start ++ * with the block just prior to the start to stabilize the boundary. ++ */ ++ offset = round_down(offset, 1 << mp->m_sb.sb_blocklog); ++ if (offset) ++ offset -= (1 << mp->m_sb.sb_blocklog); ++ ++ /* + * Writeback and invalidate cache for the remainder of the file as we're + * about to shift down every extent from offset to EOF. + */ diff --git a/queue-5.4/xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch b/queue-5.4/xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch new file mode 100644 index 00000000000..d4c49321875 --- /dev/null +++ b/queue-5.4/xfs-use-bitops-interface-for-buf-log-item-ail-flag-check.patch @@ -0,0 +1,41 @@ +From foo@baz Mon Sep 26 08:48:28 AM CEST 2022 +From: Chandan Babu R +Date: Sat, 24 Sep 2022 18:26:51 +0530 +Subject: xfs: use bitops interface for buf log item AIL flag check +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20220924125656.101069-15-chandan.babu@oracle.com> + +From: Brian Foster + +commit 826f7e34130a4ce756138540170cbe935c537a47 upstream. + +The xfs_log_item flags were converted to atomic bitops as of commit +22525c17ed ("xfs: log item flags are racy"). The assert check for +AIL presence in xfs_buf_item_relse() still uses the old value based +check. This likely went unnoticed as XFS_LI_IN_AIL evaluates to 0 +and causes the assert to unconditionally pass. Fix up the check. + +Signed-off-by: Brian Foster +Fixes: 22525c17ed ("xfs: log item flags are racy") +Reviewed-by: Eric Sandeen +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_buf_item.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/xfs/xfs_buf_item.c ++++ b/fs/xfs/xfs_buf_item.c +@@ -956,7 +956,7 @@ xfs_buf_item_relse( + struct xfs_buf_log_item *bip = bp->b_log_item; + + trace_xfs_buf_item_relse(bp, _RET_IP_); +- ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); ++ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + + bp->b_log_item = NULL; + if (list_empty(&bp->b_li_list)) -- 2.47.3