From fc70e0b4c30182e30c398fb9aa132838967ceed2 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 9 Feb 2024 13:42:40 -0500 Subject: [PATCH] Fixes for 6.6 Signed-off-by: Sasha Levin --- ...catherine-as-xfs-maintainer-for-6.6..patch | 32 +++ queue-6.6/series | 21 ++ ...ent-items-when-recovery-intents-fail.patch | 128 ++++++++++ ...d-io-and-ficlone-to-run-concurrently.patch | 233 ++++++++++++++++++ .../xfs-bump-max-fsgeom-struct-version.patch | 41 +++ queue-6.6/xfs-clean-up-dqblk-extraction.patch | 92 +++++++ ...xflag_realtime-handling-in-xfs_ioctl.patch | 67 +++++ ...ry-does-not-validate-the-recovered-d.patch | 57 +++++ ...s-factor-out-xfs_defer_pending_abort.patch | 74 ++++++ ...lect-in-kconfig-xfs_online_scrub_sta.patch | 47 ++++ ...-internal-error-from-agfl-exhaustion.patch | 111 +++++++++ ...nversion-error-in-xfs_bmap_del_exten.patch | 43 ++++ ...s-0-from-xfs_bmapi_write-in-xfs_allo.patch | 100 ++++++++ ...eing-of-rt-data-fork-extent-mappings.patch | 140 +++++++++++ ...ry-does-not-validate-the-recovered-i.patch | 77 ++++++ ...-introduce-protection-for-drop-nlink.patch | 43 ++++ ...xlen-is-still-congruent-with-prod-wh.patch | 117 +++++++++ ...he-written-blocks-in-xfs_reflink_end.patch | 53 ++++ ...vent-rt-growfs-when-quota-is-enabled.patch | 42 ++++ ...-stable-writes-flag-on-the-rt-device.patch | 84 +++++++ ...uld-return-negative-errnos-when-rt-d.patch | 67 +++++ ...c_sema-if-flushing-data-device-fails.patch | 102 ++++++++ 22 files changed, 1771 insertions(+) create mode 100644 queue-6.6/maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch create mode 100644 queue-6.6/xfs-abort-intent-items-when-recovery-intents-fail.patch create mode 100644 queue-6.6/xfs-allow-read-io-and-ficlone-to-run-concurrently.patch create mode 100644 queue-6.6/xfs-bump-max-fsgeom-struct-version.patch create mode 100644 queue-6.6/xfs-clean-up-dqblk-extraction.patch create mode 100644 queue-6.6/xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch create mode 100644 queue-6.6/xfs-dquot-recovery-does-not-validate-the-recovered-d.patch create mode 100644 queue-6.6/xfs-factor-out-xfs_defer_pending_abort.patch create mode 100644 queue-6.6/xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch create mode 100644 queue-6.6/xfs-fix-internal-error-from-agfl-exhaustion.patch create mode 100644 queue-6.6/xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch create mode 100644 queue-6.6/xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch create mode 100644 queue-6.6/xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch create mode 100644 queue-6.6/xfs-inode-recovery-does-not-validate-the-recovered-i.patch create mode 100644 queue-6.6/xfs-introduce-protection-for-drop-nlink.patch create mode 100644 queue-6.6/xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch create mode 100644 queue-6.6/xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch create mode 100644 queue-6.6/xfs-prevent-rt-growfs-when-quota-is-enabled.patch create mode 100644 queue-6.6/xfs-respect-the-stable-writes-flag-on-the-rt-device.patch create mode 100644 queue-6.6/xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch create mode 100644 queue-6.6/xfs-up-ic_sema-if-flushing-data-device-fails.patch diff --git a/queue-6.6/maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch b/queue-6.6/maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch new file mode 100644 index 00000000000..e556f03924f --- /dev/null +++ b/queue-6.6/maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch @@ -0,0 +1,32 @@ +From edbd1fc15e333d71ba8a12534f21738eaf617869 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:34 -0800 +Subject: MAINTAINERS: add Catherine as xfs maintainer for 6.6.y + +From: Catherine Hoang + +This is an attempt to direct the bots and humans that are testing +LTS 6.6.y towards the maintainer of xfs in the 6.6.y tree. + +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + MAINTAINERS | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/MAINTAINERS b/MAINTAINERS +index dd5de540ec0b..40312bb550f0 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -23630,6 +23630,7 @@ F: include/xen/arm/swiotlb-xen.h + F: include/xen/swiotlb-xen.h + + XFS FILESYSTEM ++M: Catherine Hoang + M: Chandan Babu R + R: Darrick J. Wong + L: linux-xfs@vger.kernel.org +-- +2.43.0 + diff --git a/queue-6.6/series b/queue-6.6/series index 9f21b1bd48c..2ccd55c87c8 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -14,3 +14,24 @@ rust-upgrade-to-rust-1.72.1.patch rust-task-remove-redundant-explicit-link.patch rust-print-use-explicit-link-in-documentation.patch rust-upgrade-to-rust-1.73.0.patch +maintainers-add-catherine-as-xfs-maintainer-for-6.6..patch +xfs-bump-max-fsgeom-struct-version.patch +xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch +xfs-prevent-rt-growfs-when-quota-is-enabled.patch +xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch +xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch +xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch +xfs-introduce-protection-for-drop-nlink.patch +xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch +xfs-allow-read-io-and-ficlone-to-run-concurrently.patch +xfs-factor-out-xfs_defer_pending_abort.patch +xfs-abort-intent-items-when-recovery-intents-fail.patch +xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch +xfs-up-ic_sema-if-flushing-data-device-fails.patch +xfs-fix-internal-error-from-agfl-exhaustion.patch +xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch +xfs-inode-recovery-does-not-validate-the-recovered-i.patch +xfs-clean-up-dqblk-extraction.patch +xfs-dquot-recovery-does-not-validate-the-recovered-d.patch +xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch +xfs-respect-the-stable-writes-flag-on-the-rt-device.patch diff --git a/queue-6.6/xfs-abort-intent-items-when-recovery-intents-fail.patch b/queue-6.6/xfs-abort-intent-items-when-recovery-intents-fail.patch new file mode 100644 index 00000000000..d27744e26e8 --- /dev/null +++ b/queue-6.6/xfs-abort-intent-items-when-recovery-intents-fail.patch @@ -0,0 +1,128 @@ +From dedf4b860cb59b0e44f55e778b02dc7196b7ac4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:45 -0800 +Subject: xfs: abort intent items when recovery intents fail + +From: Long Li + +commit f8f9d952e42dd49ae534f61f2fa7ca0876cb9848 upstream. + +When recovering intents, we capture newly created intent items as part of +committing recovered intent items. If intent recovery fails at a later +point, we forget to remove those newly created intent items from the AIL +and hang: + + [root@localhost ~]# cat /proc/539/stack + [<0>] xfs_ail_push_all_sync+0x174/0x230 + [<0>] xfs_unmount_flush_inodes+0x8d/0xd0 + [<0>] xfs_mountfs+0x15f7/0x1e70 + [<0>] xfs_fs_fill_super+0x10ec/0x1b20 + [<0>] get_tree_bdev+0x3c8/0x730 + [<0>] vfs_get_tree+0x89/0x2c0 + [<0>] path_mount+0xecf/0x1800 + [<0>] do_mount+0xf3/0x110 + [<0>] __x64_sys_mount+0x154/0x1f0 + [<0>] do_syscall_64+0x39/0x80 + [<0>] entry_SYSCALL_64_after_hwframe+0x63/0xcd + +When newly created intent items fail to commit via transaction, intent +recovery hasn't created done items for these newly created intent items, +so the capture structure is the sole owner of the captured intent items. +We must release them explicitly or else they leak: + +unreferenced object 0xffff888016719108 (size 432): + comm "mount", pid 529, jiffies 4294706839 (age 144.463s) + hex dump (first 32 bytes): + 08 91 71 16 80 88 ff ff 08 91 71 16 80 88 ff ff ..q.......q..... + 18 91 71 16 80 88 ff ff 18 91 71 16 80 88 ff ff ..q.......q..... + backtrace: + [] xfs_efi_init+0x18f/0x1d0 + [] xfs_extent_free_create_intent+0x50/0x150 + [] xfs_defer_create_intents+0x16a/0x340 + [] xfs_defer_ops_capture_and_commit+0x8e/0xad0 + [] xfs_cui_item_recover+0x819/0x980 + [] xlog_recover_process_intents+0x246/0xb70 + [] xlog_recover_finish+0x8a/0x9a0 + [] xfs_log_mount_finish+0x2bb/0x4a0 + [] xfs_mountfs+0x14bf/0x1e70 + [] xfs_fs_fill_super+0x10d0/0x1b20 + [] get_tree_bdev+0x3d2/0x6d0 + [] vfs_get_tree+0x89/0x2c0 + [] path_mount+0xecf/0x1800 + [] do_mount+0xf3/0x110 + [] __x64_sys_mount+0x154/0x1f0 + [] do_syscall_64+0x39/0x80 + +Fix the problem above by abort intent items that don't have a done item +when recovery intents fail. + +Fixes: e6fff81e4870 ("xfs: proper replay of deferred ops queued during log recovery") +Signed-off-by: Long Li +Reviewed-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_defer.c | 5 +++-- + fs/xfs/libxfs/xfs_defer.h | 2 +- + fs/xfs/xfs_log_recover.c | 2 +- + 3 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c +index 88388e12f8e7..f71679ce23b9 100644 +--- a/fs/xfs/libxfs/xfs_defer.c ++++ b/fs/xfs/libxfs/xfs_defer.c +@@ -763,12 +763,13 @@ xfs_defer_ops_capture( + + /* Release all resources that we used to capture deferred ops. */ + void +-xfs_defer_ops_capture_free( ++xfs_defer_ops_capture_abort( + struct xfs_mount *mp, + struct xfs_defer_capture *dfc) + { + unsigned short i; + ++ xfs_defer_pending_abort(mp, &dfc->dfc_dfops); + xfs_defer_cancel_list(mp, &dfc->dfc_dfops); + + for (i = 0; i < dfc->dfc_held.dr_bufs; i++) +@@ -809,7 +810,7 @@ xfs_defer_ops_capture_and_commit( + /* Commit the transaction and add the capture structure to the list. */ + error = xfs_trans_commit(tp); + if (error) { +- xfs_defer_ops_capture_free(mp, dfc); ++ xfs_defer_ops_capture_abort(mp, dfc); + return error; + } + +diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h +index 114a3a4930a3..8788ad5f6a73 100644 +--- a/fs/xfs/libxfs/xfs_defer.h ++++ b/fs/xfs/libxfs/xfs_defer.h +@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, + struct list_head *capture_list); + void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, + struct xfs_defer_resources *dres); +-void xfs_defer_ops_capture_free(struct xfs_mount *mp, ++void xfs_defer_ops_capture_abort(struct xfs_mount *mp, + struct xfs_defer_capture *d); + void xfs_defer_resources_rele(struct xfs_defer_resources *dres); + +diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c +index 13b94d2e605b..a1e18b24971a 100644 +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2511,7 +2511,7 @@ xlog_abort_defer_ops( + + list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { + list_del_init(&dfc->dfc_list); +- xfs_defer_ops_capture_free(mp, dfc); ++ xfs_defer_ops_capture_abort(mp, dfc); + } + } + +-- +2.43.0 + diff --git a/queue-6.6/xfs-allow-read-io-and-ficlone-to-run-concurrently.patch b/queue-6.6/xfs-allow-read-io-and-ficlone-to-run-concurrently.patch new file mode 100644 index 00000000000..704d4ab090b --- /dev/null +++ b/queue-6.6/xfs-allow-read-io-and-ficlone-to-run-concurrently.patch @@ -0,0 +1,233 @@ +From f7f5e8c270677efe0b56d4545a82bdacc0363ce4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:43 -0800 +Subject: xfs: allow read IO and FICLONE to run concurrently + +From: Catherine Hoang + +commit 14a537983b228cb050ceca3a5b743d01315dc4aa upstream. + +One of our VM cluster management products needs to snapshot KVM image +files so that they can be restored in case of failure. Snapshotting is +done by redirecting VM disk writes to a sidecar file and using reflink +on the disk image, specifically the FICLONE ioctl as used by +"cp --reflink". Reflink locks the source and destination files while it +operates, which means that reads from the main vm disk image are blocked, +causing the vm to stall. When an image file is heavily fragmented, the +copy process could take several minutes. Some of the vm image files have +50-100 million extent records, and duplicating that much metadata locks +the file for 30 minutes or more. Having activities suspended for such +a long time in a cluster node could result in node eviction. + +Clone operations and read IO do not change any data in the source file, +so they should be able to run concurrently. Demote the exclusive locks +taken by FICLONE to shared locks to allow reads while cloning. While a +clone is in progress, writes will take the IOLOCK_EXCL, so they block +until the clone completes. + +Link: https://lore.kernel.org/linux-xfs/8911B94D-DD29-4D6E-B5BC-32EAF1866245@oracle.com/ +Signed-off-by: Catherine Hoang +Reviewed-by: "Darrick J. Wong" +Reviewed-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Signed-off-by: Chandan Babu R +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_file.c | 63 +++++++++++++++++++++++++++++++++++--------- + fs/xfs/xfs_inode.c | 17 ++++++++++++ + fs/xfs/xfs_inode.h | 9 +++++++ + fs/xfs/xfs_reflink.c | 4 +++ + 4 files changed, 80 insertions(+), 13 deletions(-) + +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index 203700278ddb..e33e5e13b95f 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -214,6 +214,43 @@ xfs_ilock_iocb( + return 0; + } + ++static int ++xfs_ilock_iocb_for_write( ++ struct kiocb *iocb, ++ unsigned int *lock_mode) ++{ ++ ssize_t ret; ++ struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); ++ ++ ret = xfs_ilock_iocb(iocb, *lock_mode); ++ if (ret) ++ return ret; ++ ++ if (*lock_mode == XFS_IOLOCK_EXCL) ++ return 0; ++ if (!xfs_iflags_test(ip, XFS_IREMAPPING)) ++ return 0; ++ ++ xfs_iunlock(ip, *lock_mode); ++ *lock_mode = XFS_IOLOCK_EXCL; ++ return xfs_ilock_iocb(iocb, *lock_mode); ++} ++ ++static unsigned int ++xfs_ilock_for_write_fault( ++ struct xfs_inode *ip) ++{ ++ /* get a shared lock if no remapping in progress */ ++ xfs_ilock(ip, XFS_MMAPLOCK_SHARED); ++ if (!xfs_iflags_test(ip, XFS_IREMAPPING)) ++ return XFS_MMAPLOCK_SHARED; ++ ++ /* wait for remapping to complete */ ++ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); ++ xfs_ilock(ip, XFS_MMAPLOCK_EXCL); ++ return XFS_MMAPLOCK_EXCL; ++} ++ + STATIC ssize_t + xfs_file_dio_read( + struct kiocb *iocb, +@@ -551,7 +588,7 @@ xfs_file_dio_write_aligned( + unsigned int iolock = XFS_IOLOCK_SHARED; + ssize_t ret; + +- ret = xfs_ilock_iocb(iocb, iolock); ++ ret = xfs_ilock_iocb_for_write(iocb, &iolock); + if (ret) + return ret; + ret = xfs_file_write_checks(iocb, from, &iolock); +@@ -618,7 +655,7 @@ xfs_file_dio_write_unaligned( + flags = IOMAP_DIO_FORCE_WAIT; + } + +- ret = xfs_ilock_iocb(iocb, iolock); ++ ret = xfs_ilock_iocb_for_write(iocb, &iolock); + if (ret) + return ret; + +@@ -1180,7 +1217,7 @@ xfs_file_remap_range( + if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) + xfs_log_force_inode(dest); + out_unlock: +- xfs_iunlock2_io_mmap(src, dest); ++ xfs_iunlock2_remapping(src, dest); + if (ret) + trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); + return remapped > 0 ? remapped : ret; +@@ -1328,6 +1365,7 @@ __xfs_filemap_fault( + struct inode *inode = file_inode(vmf->vma->vm_file); + struct xfs_inode *ip = XFS_I(inode); + vm_fault_t ret; ++ unsigned int lock_mode = 0; + + trace_xfs_filemap_fault(ip, order, write_fault); + +@@ -1336,25 +1374,24 @@ __xfs_filemap_fault( + file_update_time(vmf->vma->vm_file); + } + ++ if (IS_DAX(inode) || write_fault) ++ lock_mode = xfs_ilock_for_write_fault(XFS_I(inode)); ++ + if (IS_DAX(inode)) { + pfn_t pfn; + +- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + ret = xfs_dax_fault(vmf, order, write_fault, &pfn); + if (ret & VM_FAULT_NEEDDSYNC) + ret = dax_finish_sync_fault(vmf, order, pfn); +- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ++ } else if (write_fault) { ++ ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops); + } else { +- if (write_fault) { +- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); +- ret = iomap_page_mkwrite(vmf, +- &xfs_page_mkwrite_iomap_ops); +- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); +- } else { +- ret = filemap_fault(vmf); +- } ++ ret = filemap_fault(vmf); + } + ++ if (lock_mode) ++ xfs_iunlock(XFS_I(inode), lock_mode); ++ + if (write_fault) + sb_end_pagefault(inode->i_sb); + return ret; +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index fb85c5c81745..f9d29acd72b9 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -3628,6 +3628,23 @@ xfs_iunlock2_io_mmap( + inode_unlock(VFS_I(ip1)); + } + ++/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */ ++void ++xfs_iunlock2_remapping( ++ struct xfs_inode *ip1, ++ struct xfs_inode *ip2) ++{ ++ xfs_iflags_clear(ip1, XFS_IREMAPPING); ++ ++ if (ip1 != ip2) ++ xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED); ++ xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); ++ ++ if (ip1 != ip2) ++ inode_unlock_shared(VFS_I(ip1)); ++ inode_unlock(VFS_I(ip2)); ++} ++ + /* + * Reload the incore inode list for this inode. Caller should ensure that + * the link count cannot change, either by taking ILOCK_SHARED or otherwise +diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h +index 0c5bdb91152e..3dc47937da5d 100644 +--- a/fs/xfs/xfs_inode.h ++++ b/fs/xfs/xfs_inode.h +@@ -347,6 +347,14 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) + /* Quotacheck is running but inode has not been added to quota counts. */ + #define XFS_IQUOTAUNCHECKED (1 << 14) + ++/* ++ * Remap in progress. Callers that wish to update file data while ++ * holding a shared IOLOCK or MMAPLOCK must drop the lock and retake ++ * the lock in exclusive mode. Relocking the file will block until ++ * IREMAPPING is cleared. ++ */ ++#define XFS_IREMAPPING (1U << 15) ++ + /* All inode state flags related to inode reclaim. */ + #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \ + XFS_IRECLAIM | \ +@@ -595,6 +603,7 @@ void xfs_end_io(struct work_struct *work); + + int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); + void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); ++void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2); + + static inline bool + xfs_inode_unlinked_incomplete( +diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c +index eb9102453aff..658edee8381d 100644 +--- a/fs/xfs/xfs_reflink.c ++++ b/fs/xfs/xfs_reflink.c +@@ -1540,6 +1540,10 @@ xfs_reflink_remap_prep( + if (ret) + goto out_unlock; + ++ xfs_iflags_set(src, XFS_IREMAPPING); ++ if (inode_in != inode_out) ++ xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL); ++ + return 0; + out_unlock: + xfs_iunlock2_io_mmap(src, dest); +-- +2.43.0 + diff --git a/queue-6.6/xfs-bump-max-fsgeom-struct-version.patch b/queue-6.6/xfs-bump-max-fsgeom-struct-version.patch new file mode 100644 index 00000000000..cdd03ce59ad --- /dev/null +++ b/queue-6.6/xfs-bump-max-fsgeom-struct-version.patch @@ -0,0 +1,41 @@ +From a05bc3d408f3a79da838b165951bedb6a380a10c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:35 -0800 +Subject: xfs: bump max fsgeom struct version + +From: Darrick J. Wong + +commit 9488062805943c2d63350d3ef9e4dc093799789a upstream. + +The latest version of the fs geometry structure is v5. Bump this +constant so that xfs_db and mkfs calls to libxfs_fs_geometry will fill +out all the fields. + +IOWs, this commit is a no-op for the kernel, but will be useful for +userspace reporting in later changes. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_sb.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h +index a5e14740ec9a..19134b23c10b 100644 +--- a/fs/xfs/libxfs/xfs_sb.h ++++ b/fs/xfs/libxfs/xfs_sb.h +@@ -25,7 +25,7 @@ extern uint64_t xfs_sb_version_to_features(struct xfs_sb *sbp); + + extern int xfs_update_secondary_sbs(struct xfs_mount *mp); + +-#define XFS_FS_GEOM_MAX_STRUCT_VER (4) ++#define XFS_FS_GEOM_MAX_STRUCT_VER (5) + extern void xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo, + int struct_version); + extern int xfs_sb_read_secondary(struct xfs_mount *mp, +-- +2.43.0 + diff --git a/queue-6.6/xfs-clean-up-dqblk-extraction.patch b/queue-6.6/xfs-clean-up-dqblk-extraction.patch new file mode 100644 index 00000000000..8508f40de24 --- /dev/null +++ b/queue-6.6/xfs-clean-up-dqblk-extraction.patch @@ -0,0 +1,92 @@ +From 399cb61dfc6e9cb6b9015cb131eb6b8969011ac2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:51 -0800 +Subject: xfs: clean up dqblk extraction + +From: Darrick J. Wong + +commit ed17f7da5f0c8b65b7b5f7c98beb0aadbc0546ee upstream. + +Since the introduction of xfs_dqblk in V5, xfs really ought to find the +dqblk pointer from the dquot buffer, then compute the xfs_disk_dquot +pointer from the dqblk pointer. Fix the open-coded xfs_buf_offset calls +and do the type checking in the correct order. + +Note that this has made no practical difference since the start of the +xfs_disk_dquot is coincident with the start of the xfs_dqblk. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_dquot.c | 5 +++-- + fs/xfs/xfs_dquot_item_recover.c | 7 ++++--- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c +index ac6ba646624d..a013b87ab8d5 100644 +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -562,7 +562,8 @@ xfs_dquot_from_disk( + struct xfs_dquot *dqp, + struct xfs_buf *bp) + { +- struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; ++ struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset); ++ struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq; + + /* + * Ensure that we got the type and ID we were looking for. +@@ -1250,7 +1251,7 @@ xfs_qm_dqflush( + } + + /* Flush the incore dquot to the ondisk buffer. */ +- dqblk = bp->b_addr + dqp->q_bufoffset; ++ dqblk = xfs_buf_offset(bp, dqp->q_bufoffset); + xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp); + + /* +diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c +index 8966ba842395..db2cb5e4197b 100644 +--- a/fs/xfs/xfs_dquot_item_recover.c ++++ b/fs/xfs/xfs_dquot_item_recover.c +@@ -65,6 +65,7 @@ xlog_recover_dquot_commit_pass2( + { + struct xfs_mount *mp = log->l_mp; + struct xfs_buf *bp; ++ struct xfs_dqblk *dqb; + struct xfs_disk_dquot *ddq, *recddq; + struct xfs_dq_logformat *dq_f; + xfs_failaddr_t fa; +@@ -130,14 +131,14 @@ xlog_recover_dquot_commit_pass2( + return error; + + ASSERT(bp); +- ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); ++ dqb = xfs_buf_offset(bp, dq_f->qlf_boffset); ++ ddq = &dqb->dd_diskdq; + + /* + * If the dquot has an LSN in it, recover the dquot only if it's less + * than the lsn of the transaction we are replaying. + */ + if (xfs_has_crc(mp)) { +- struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; + xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); + + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { +@@ -147,7 +148,7 @@ xlog_recover_dquot_commit_pass2( + + memcpy(ddq, recddq, item->ri_buf[1].i_len); + if (xfs_has_crc(mp)) { +- xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), ++ xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } + +-- +2.43.0 + diff --git a/queue-6.6/xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch b/queue-6.6/xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch new file mode 100644 index 00000000000..c2dbfb48cb1 --- /dev/null +++ b/queue-6.6/xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch @@ -0,0 +1,67 @@ +From 1ce001494b6243c87b01e6d68b430c2d84cb2e39 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:53 -0800 +Subject: xfs: clean up FS_XFLAG_REALTIME handling in xfs_ioctl_setattr_xflags + +From: Christoph Hellwig + +commit c421df0b19430417a04f68919fc3d1943d20ac04 upstream. + +Introduce a local boolean variable if FS_XFLAG_REALTIME to make the +checks for it more obvious, and de-densify a few of the conditionals +using it to make them more readable while at it. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20231025141020.192413-4-hch@lst.de +Reviewed-by: Darrick J. Wong +Signed-off-by: Christian Brauner +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_ioctl.c | 22 ++++++++++++---------- + 1 file changed, 12 insertions(+), 10 deletions(-) + +diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c +index 55bb01173cde..be69e7be713e 100644 +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -1120,23 +1120,25 @@ xfs_ioctl_setattr_xflags( + struct fileattr *fa) + { + struct xfs_mount *mp = ip->i_mount; ++ bool rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME); + uint64_t i_flags2; + +- /* Can't change realtime flag if any extents are allocated. */ +- if ((ip->i_df.if_nextents || ip->i_delayed_blks) && +- XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) +- return -EINVAL; ++ if (rtflag != XFS_IS_REALTIME_INODE(ip)) { ++ /* Can't change realtime flag if any extents are allocated. */ ++ if (ip->i_df.if_nextents || ip->i_delayed_blks) ++ return -EINVAL; ++ } + +- /* If realtime flag is set then must have realtime device */ +- if (fa->fsx_xflags & FS_XFLAG_REALTIME) { ++ if (rtflag) { ++ /* If realtime flag is set then must have realtime device */ + if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || + (ip->i_extsize % mp->m_sb.sb_rextsize)) + return -EINVAL; +- } + +- /* Clear reflink if we are actually able to set the rt flag. */ +- if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) +- ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; ++ /* Clear reflink if we are actually able to set the rt flag. */ ++ if (xfs_is_reflink_inode(ip)) ++ ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; ++ } + + /* diflags2 only valid for v3 inodes. */ + i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); +-- +2.43.0 + diff --git a/queue-6.6/xfs-dquot-recovery-does-not-validate-the-recovered-d.patch b/queue-6.6/xfs-dquot-recovery-does-not-validate-the-recovered-d.patch new file mode 100644 index 00000000000..8056793526c --- /dev/null +++ b/queue-6.6/xfs-dquot-recovery-does-not-validate-the-recovered-d.patch @@ -0,0 +1,57 @@ +From 53a81473c11e37e0a80e352a91836bb2eb276235 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:52 -0800 +Subject: xfs: dquot recovery does not validate the recovered dquot + +From: Darrick J. Wong + +commit 9c235dfc3d3f901fe22acb20f2ab37ff39f2ce02 upstream. + +When we're recovering ondisk quota records from the log, we need to +validate the recovered buffer contents before writing them to disk. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_dquot_item_recover.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c +index db2cb5e4197b..2c2720ce6923 100644 +--- a/fs/xfs/xfs_dquot_item_recover.c ++++ b/fs/xfs/xfs_dquot_item_recover.c +@@ -19,6 +19,7 @@ + #include "xfs_log.h" + #include "xfs_log_priv.h" + #include "xfs_log_recover.h" ++#include "xfs_error.h" + + STATIC void + xlog_recover_dquot_ra_pass2( +@@ -152,6 +153,19 @@ xlog_recover_dquot_commit_pass2( + XFS_DQUOT_CRC_OFF); + } + ++ /* Validate the recovered dquot. */ ++ fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id); ++ if (fa) { ++ XFS_CORRUPTION_ERROR("Bad dquot after recovery", ++ XFS_ERRLEVEL_LOW, mp, dqb, ++ sizeof(struct xfs_dqblk)); ++ xfs_alert(mp, ++ "Metadata corruption detected at %pS, dquot 0x%x", ++ fa, dq_f->qlf_id); ++ error = -EFSCORRUPTED; ++ goto out_release; ++ } ++ + ASSERT(dq_f->qlf_size == 2); + ASSERT(bp->b_mount == mp); + bp->b_flags |= _XBF_LOGRECOVERY; +-- +2.43.0 + diff --git a/queue-6.6/xfs-factor-out-xfs_defer_pending_abort.patch b/queue-6.6/xfs-factor-out-xfs_defer_pending_abort.patch new file mode 100644 index 00000000000..788145a977b --- /dev/null +++ b/queue-6.6/xfs-factor-out-xfs_defer_pending_abort.patch @@ -0,0 +1,74 @@ +From a3ae93d0aac98fb0e4dd6bc78511eef095eb8958 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:44 -0800 +Subject: xfs: factor out xfs_defer_pending_abort + +From: Long Li + +commit 2a5db859c6825b5d50377dda9c3cc729c20cad43 upstream. + +Factor out xfs_defer_pending_abort() from xfs_defer_trans_abort(), which +not use transaction parameter, so it can be used after the transaction +life cycle. + +Signed-off-by: Long Li +Reviewed-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_defer.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c +index bcfb6a4203cd..88388e12f8e7 100644 +--- a/fs/xfs/libxfs/xfs_defer.c ++++ b/fs/xfs/libxfs/xfs_defer.c +@@ -245,21 +245,18 @@ xfs_defer_create_intents( + return ret; + } + +-/* Abort all the intents that were committed. */ + STATIC void +-xfs_defer_trans_abort( +- struct xfs_trans *tp, +- struct list_head *dop_pending) ++xfs_defer_pending_abort( ++ struct xfs_mount *mp, ++ struct list_head *dop_list) + { + struct xfs_defer_pending *dfp; + const struct xfs_defer_op_type *ops; + +- trace_xfs_defer_trans_abort(tp, _RET_IP_); +- + /* Abort intent items that don't have a done item. */ +- list_for_each_entry(dfp, dop_pending, dfp_list) { ++ list_for_each_entry(dfp, dop_list, dfp_list) { + ops = defer_op_types[dfp->dfp_type]; +- trace_xfs_defer_pending_abort(tp->t_mountp, dfp); ++ trace_xfs_defer_pending_abort(mp, dfp); + if (dfp->dfp_intent && !dfp->dfp_done) { + ops->abort_intent(dfp->dfp_intent); + dfp->dfp_intent = NULL; +@@ -267,6 +264,16 @@ xfs_defer_trans_abort( + } + } + ++/* Abort all the intents that were committed. */ ++STATIC void ++xfs_defer_trans_abort( ++ struct xfs_trans *tp, ++ struct list_head *dop_pending) ++{ ++ trace_xfs_defer_trans_abort(tp, _RET_IP_); ++ xfs_defer_pending_abort(tp->t_mountp, dop_pending); ++} ++ + /* + * Capture resources that the caller said not to release ("held") when the + * transaction commits. Caller is responsible for zero-initializing @dres. +-- +2.43.0 + diff --git a/queue-6.6/xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch b/queue-6.6/xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch new file mode 100644 index 00000000000..8e0a9c64be1 --- /dev/null +++ b/queue-6.6/xfs-fix-again-select-in-kconfig-xfs_online_scrub_sta.patch @@ -0,0 +1,47 @@ +From 156bdf447fef047ee91b002cf23c47cb0fadae19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:49 -0800 +Subject: xfs: fix again select in kconfig XFS_ONLINE_SCRUB_STATS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Anthony Iliopoulos + +commit a2e4388adfa44684c7c428a5a5980efe0d75e13e upstream. + +Commit 57c0f4a8ea3a attempted to fix the select in the kconfig entry +XFS_ONLINE_SCRUB_STATS by selecting XFS_DEBUG, but the original +intention was to select DEBUG_FS, since the feature relies on debugfs to +export the related scrub statistics. + +Fixes: 57c0f4a8ea3a ("xfs: fix select in config XFS_ONLINE_SCRUB_STATS") + +Reported-by: Holger Hoffstätte +Signed-off-by: Anthony Iliopoulos +Reviewed-by: Dave Chinner +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig +index ed0bc8cbc703..567fb37274d3 100644 +--- a/fs/xfs/Kconfig ++++ b/fs/xfs/Kconfig +@@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS + bool "XFS online metadata check usage data collection" + default y + depends on XFS_ONLINE_SCRUB +- select XFS_DEBUG ++ select DEBUG_FS + help + If you say Y here, the kernel will gather usage data about + the online metadata check subsystem. This includes the number +-- +2.43.0 + diff --git a/queue-6.6/xfs-fix-internal-error-from-agfl-exhaustion.patch b/queue-6.6/xfs-fix-internal-error-from-agfl-exhaustion.patch new file mode 100644 index 00000000000..4a48d824e74 --- /dev/null +++ b/queue-6.6/xfs-fix-internal-error-from-agfl-exhaustion.patch @@ -0,0 +1,111 @@ +From 47fde25b235763386afbfdedbabd59953cc38047 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:48 -0800 +Subject: xfs: fix internal error from AGFL exhaustion + +From: Omar Sandoval + +commit f63a5b3769ad7659da4c0420751d78958ab97675 upstream. + +We've been seeing XFS errors like the following: + +XFS: Internal error i != 1 at line 3526 of file fs/xfs/libxfs/xfs_btree.c. Caller xfs_btree_insert+0x1ec/0x280 +... +Call Trace: + xfs_corruption_error+0x94/0xa0 + xfs_btree_insert+0x221/0x280 + xfs_alloc_fixup_trees+0x104/0x3e0 + xfs_alloc_ag_vextent_size+0x667/0x820 + xfs_alloc_fix_freelist+0x5d9/0x750 + xfs_free_extent_fix_freelist+0x65/0xa0 + __xfs_free_extent+0x57/0x180 +... + +This is the XFS_IS_CORRUPT() check in xfs_btree_insert() when +xfs_btree_insrec() fails. + +After converting this into a panic and dissecting the core dump, I found +that xfs_btree_insrec() is failing because it's trying to split a leaf +node in the cntbt when the AG free list is empty. In particular, it's +failing to get a block from the AGFL _while trying to refill the AGFL_. + +If a single operation splits every level of the bnobt and the cntbt (and +the rmapbt if it is enabled) at once, the free list will be empty. Then, +when the next operation tries to refill the free list, it allocates +space. If the allocation does not use a full extent, it will need to +insert records for the remaining space in the bnobt and cntbt. And if +those new records go in full leaves, the leaves (and potentially more +nodes up to the old root) need to be split. + +Fix it by accounting for the additional splits that may be required to +refill the free list in the calculation for the minimum free list size. + +P.S. As far as I can tell, this bug has existed for a long time -- maybe +back to xfs-history commit afdf80ae7405 ("Add XFS_AG_MAXLEVELS macros +...") in April 1994! It requires a very unlucky sequence of events, and +in fact we didn't hit it until a particular sparse mmap workload updated +from 5.12 to 5.19. But this bug existed in 5.12, so it must've been +exposed by some other change in allocation or writeback patterns. It's +also much less likely to be hit with the rmapbt enabled, since that +increases the minimum free list size and is unlikely to split at the +same time as the bnobt and cntbt. + +Reviewed-by: "Darrick J. Wong" +Reviewed-by: Dave Chinner +Signed-off-by: Omar Sandoval +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_alloc.c | 27 ++++++++++++++++++++++++--- + 1 file changed, 24 insertions(+), 3 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c +index 3069194527dd..100ab5931b31 100644 +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist( + + ASSERT(mp->m_alloc_maxlevels > 0); + ++ /* ++ * For a btree shorter than the maximum height, the worst case is that ++ * every level gets split and a new level is added, then while inserting ++ * another entry to refill the AGFL, every level under the old root gets ++ * split again. This is: ++ * ++ * (full height split reservation) + (AGFL refill split height) ++ * = (current height + 1) + (current height - 1) ++ * = (new height) + (new height - 2) ++ * = 2 * new height - 2 ++ * ++ * For a btree of maximum height, the worst case is that every level ++ * under the root gets split, then while inserting another entry to ++ * refill the AGFL, every level under the root gets split again. This is ++ * also: ++ * ++ * 2 * (current height - 1) ++ * = 2 * (new height - 1) ++ * = 2 * new height - 2 ++ */ ++ + /* space needed by-bno freespace btree */ + min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, +- mp->m_alloc_maxlevels); ++ mp->m_alloc_maxlevels) * 2 - 2; + /* space needed by-size freespace btree */ + min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, +- mp->m_alloc_maxlevels); ++ mp->m_alloc_maxlevels) * 2 - 2; + /* space needed reverse mapping used space btree */ + if (xfs_has_rmapbt(mp)) + min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, +- mp->m_rmap_maxlevels); ++ mp->m_rmap_maxlevels) * 2 - 2; + + return min_free; + } +-- +2.43.0 + diff --git a/queue-6.6/xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch b/queue-6.6/xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch new file mode 100644 index 00000000000..d7adc41daa6 --- /dev/null +++ b/queue-6.6/xfs-fix-units-conversion-error-in-xfs_bmap_del_exten.patch @@ -0,0 +1,43 @@ +From c38e81be2bc8b64c942cba29de80ecc0c44af64d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:39 -0800 +Subject: xfs: fix units conversion error in xfs_bmap_del_extent_delay + +From: Darrick J. Wong + +commit ddd98076d5c075c8a6c49d9e6e8ee12844137f23 upstream. + +The unit conversions in this function do not make sense. First we +convert a block count to bytes, then divide that bytes value by +rextsize, which is in blocks, to get an rt extent count. You can't +divide bytes by blocks to get a (possibly multiblock) extent value. + +Fortunately nobody uses delalloc on the rt volume so this hasn't +mattered. + +Fixes: fa5c836ca8eb5 ("xfs: refactor xfs_bunmapi_cow") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_bmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c +index 26bfa34b4bbf..617cc7e78e38 100644 +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -4827,7 +4827,7 @@ xfs_bmap_del_extent_delay( + ASSERT(got_endoff >= del_endoff); + + if (isrt) { +- uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); ++ uint64_t rtexts = del->br_blockcount; + + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_frextents(mp, rtexts); +-- +2.43.0 + diff --git a/queue-6.6/xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch b/queue-6.6/xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch new file mode 100644 index 00000000000..5cfd6ef895e --- /dev/null +++ b/queue-6.6/xfs-handle-nimaps-0-from-xfs_bmapi_write-in-xfs_allo.patch @@ -0,0 +1,100 @@ +From b126c3855bd639237cc1e848c082f3499c425bfa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:42 -0800 +Subject: xfs: handle nimaps=0 from xfs_bmapi_write in xfs_alloc_file_space + +From: Christoph Hellwig + +commit 35dc55b9e80cb9ec4bcb969302000b002b2ed850 upstream. + +If xfs_bmapi_write finds a delalloc extent at the requested range, it +tries to convert the entire delalloc extent to a real allocation. + +But if the allocator cannot find a single free extent large enough to +cover the start block of the requested range, xfs_bmapi_write will +return 0 but leave *nimaps set to 0. + +In that case we simply need to keep looping with the same startoffset_fsb +so that one of the following allocations will eventually reach the +requested range. + +Note that this could affect any caller of xfs_bmapi_write that covers +an existing delayed allocation. As far as I can tell we do not have +any other such caller, though - the regular writeback path uses +xfs_bmapi_convert_delalloc to convert delayed allocations to real ones, +and direct I/O invalidates the page cache first. + +Signed-off-by: Christoph Hellwig +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_bmap_util.c | 24 +++++++++++++----------- + 1 file changed, 13 insertions(+), 11 deletions(-) + +diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c +index fcefab687285..ad4aba5002c1 100644 +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -780,12 +780,10 @@ xfs_alloc_file_space( + { + xfs_mount_t *mp = ip->i_mount; + xfs_off_t count; +- xfs_filblks_t allocated_fsb; + xfs_filblks_t allocatesize_fsb; + xfs_extlen_t extsz, temp; + xfs_fileoff_t startoffset_fsb; + xfs_fileoff_t endoffset_fsb; +- int nimaps; + int rt; + xfs_trans_t *tp; + xfs_bmbt_irec_t imaps[1], *imapp; +@@ -808,7 +806,6 @@ xfs_alloc_file_space( + + count = len; + imapp = &imaps[0]; +- nimaps = 1; + startoffset_fsb = XFS_B_TO_FSBT(mp, offset); + endoffset_fsb = XFS_B_TO_FSB(mp, offset + count); + allocatesize_fsb = endoffset_fsb - startoffset_fsb; +@@ -819,6 +816,7 @@ xfs_alloc_file_space( + while (allocatesize_fsb && !error) { + xfs_fileoff_t s, e; + unsigned int dblocks, rblocks, resblks; ++ int nimaps = 1; + + /* + * Determine space reservations for data/realtime. +@@ -884,15 +882,19 @@ xfs_alloc_file_space( + if (error) + break; + +- allocated_fsb = imapp->br_blockcount; +- +- if (nimaps == 0) { +- error = -ENOSPC; +- break; ++ /* ++ * If the allocator cannot find a single free extent large ++ * enough to cover the start block of the requested range, ++ * xfs_bmapi_write will return 0 but leave *nimaps set to 0. ++ * ++ * In that case we simply need to keep looping with the same ++ * startoffset_fsb so that one of the following allocations ++ * will eventually reach the requested range. ++ */ ++ if (nimaps) { ++ startoffset_fsb += imapp->br_blockcount; ++ allocatesize_fsb -= imapp->br_blockcount; + } +- +- startoffset_fsb += allocated_fsb; +- allocatesize_fsb -= allocated_fsb; + } + + return error; +-- +2.43.0 + diff --git a/queue-6.6/xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch b/queue-6.6/xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch new file mode 100644 index 00000000000..e5b276b16a7 --- /dev/null +++ b/queue-6.6/xfs-hoist-freeing-of-rt-data-fork-extent-mappings.patch @@ -0,0 +1,140 @@ +From d4aa355a46f147a2f392f925bd6b037dac0f1117 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:36 -0800 +Subject: xfs: hoist freeing of rt data fork extent mappings + +From: Darrick J. Wong + +commit 6c664484337b37fa0cf6e958f4019623e30d40f7 upstream. + +Currently, xfs_bmap_del_extent_real contains a bunch of code to convert +the physical extent of a data fork mapping for a realtime file into rt +extents and pass that to the rt extent freeing function. Since the +details of this aren't needed when CONFIG_XFS_REALTIME=n, move it to +xfs_rtbitmap.c to reduce code size when realtime isn't enabled. + +This will (one day) enable realtime EFIs to reuse the same +unit-converting call with less code duplication. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_bmap.c | 19 +++---------------- + fs/xfs/libxfs/xfs_rtbitmap.c | 33 +++++++++++++++++++++++++++++++++ + fs/xfs/xfs_rtalloc.h | 5 +++++ + 3 files changed, 41 insertions(+), 16 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c +index 30c931b38853..26bfa34b4bbf 100644 +--- a/fs/xfs/libxfs/xfs_bmap.c ++++ b/fs/xfs/libxfs/xfs_bmap.c +@@ -5057,33 +5057,20 @@ xfs_bmap_del_extent_real( + + flags = XFS_ILOG_CORE; + if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { +- xfs_filblks_t len; +- xfs_extlen_t mod; +- +- len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, +- &mod); +- ASSERT(mod == 0); +- + if (!(bflags & XFS_BMAPI_REMAP)) { +- xfs_fsblock_t bno; +- +- bno = div_u64_rem(del->br_startblock, +- mp->m_sb.sb_rextsize, &mod); +- ASSERT(mod == 0); +- +- error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); ++ error = xfs_rtfree_blocks(tp, del->br_startblock, ++ del->br_blockcount); + if (error) + goto done; + } + + do_fx = 0; +- nblks = len * mp->m_sb.sb_rextsize; + qfield = XFS_TRANS_DQ_RTBCOUNT; + } else { + do_fx = 1; +- nblks = del->br_blockcount; + qfield = XFS_TRANS_DQ_BCOUNT; + } ++ nblks = del->br_blockcount; + + del_endblock = del->br_startblock + del->br_blockcount; + if (cur) { +diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c +index fa180ab66b73..655108a4cd05 100644 +--- a/fs/xfs/libxfs/xfs_rtbitmap.c ++++ b/fs/xfs/libxfs/xfs_rtbitmap.c +@@ -1005,6 +1005,39 @@ xfs_rtfree_extent( + return 0; + } + ++/* ++ * Free some blocks in the realtime subvolume. rtbno and rtlen are in units of ++ * rt blocks, not rt extents; must be aligned to the rt extent size; and rtlen ++ * cannot exceed XFS_MAX_BMBT_EXTLEN. ++ */ ++int ++xfs_rtfree_blocks( ++ struct xfs_trans *tp, ++ xfs_fsblock_t rtbno, ++ xfs_filblks_t rtlen) ++{ ++ struct xfs_mount *mp = tp->t_mountp; ++ xfs_rtblock_t bno; ++ xfs_filblks_t len; ++ xfs_extlen_t mod; ++ ++ ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN); ++ ++ len = div_u64_rem(rtlen, mp->m_sb.sb_rextsize, &mod); ++ if (mod) { ++ ASSERT(mod == 0); ++ return -EIO; ++ } ++ ++ bno = div_u64_rem(rtbno, mp->m_sb.sb_rextsize, &mod); ++ if (mod) { ++ ASSERT(mod == 0); ++ return -EIO; ++ } ++ ++ return xfs_rtfree_extent(tp, bno, len); ++} ++ + /* Find all the free records within a given range. */ + int + xfs_rtalloc_query_range( +diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h +index 62c7ad79cbb6..3b2f1b499a11 100644 +--- a/fs/xfs/xfs_rtalloc.h ++++ b/fs/xfs/xfs_rtalloc.h +@@ -58,6 +58,10 @@ xfs_rtfree_extent( + xfs_rtblock_t bno, /* starting block number to free */ + xfs_extlen_t len); /* length of extent freed */ + ++/* Same as above, but in units of rt blocks. */ ++int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, ++ xfs_filblks_t rtlen); ++ + /* + * Initialize realtime fields in the mount structure. + */ +@@ -139,6 +143,7 @@ int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp); + #else + # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) + # define xfs_rtfree_extent(t,b,l) (ENOSYS) ++# define xfs_rtfree_blocks(t,rb,rl) (ENOSYS) + # define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) + # define xfs_growfs_rt(mp,in) (ENOSYS) + # define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS) +-- +2.43.0 + diff --git a/queue-6.6/xfs-inode-recovery-does-not-validate-the-recovered-i.patch b/queue-6.6/xfs-inode-recovery-does-not-validate-the-recovered-i.patch new file mode 100644 index 00000000000..570765bed2e --- /dev/null +++ b/queue-6.6/xfs-inode-recovery-does-not-validate-the-recovered-i.patch @@ -0,0 +1,77 @@ +From b10f08cb42bde45627cccd214db68c83b3c7cbf6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:50 -0800 +Subject: xfs: inode recovery does not validate the recovered inode + +From: Dave Chinner + +commit 038ca189c0d2c1570b4d922f25b524007c85cf94 upstream. + +Discovered when trying to track down a weird recovery corruption +issue that wasn't detected at recovery time. + +The specific corruption was a zero extent count field when big +extent counts are in use, and it turns out the dinode verifier +doesn't detect that specific corruption case, either. So fix it too. + +Signed-off-by: Dave Chinner +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_inode_buf.c | 3 +++ + fs/xfs/xfs_inode_item_recover.c | 14 +++++++++++++- + 2 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c +index a35781577cad..0f970a0b3382 100644 +--- a/fs/xfs/libxfs/xfs_inode_buf.c ++++ b/fs/xfs/libxfs/xfs_inode_buf.c +@@ -508,6 +508,9 @@ xfs_dinode_verify( + if (mode && nextents + naextents > nblocks) + return __this_address; + ++ if (nextents + naextents == 0 && nblocks != 0) ++ return __this_address; ++ + if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) + return __this_address; + +diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c +index e6609067ef26..144198a6b270 100644 +--- a/fs/xfs/xfs_inode_item_recover.c ++++ b/fs/xfs/xfs_inode_item_recover.c +@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2( + struct xfs_log_dinode *ldip; + uint isize; + int need_free = 0; ++ xfs_failaddr_t fa; + + if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { + in_f = item->ri_buf[0].i_addr; +@@ -530,8 +531,19 @@ xlog_recover_inode_commit_pass2( + (dip->di_mode != 0)) + error = xfs_recover_inode_owner_change(mp, dip, in_f, + buffer_list); +- /* re-generate the checksum. */ ++ /* re-generate the checksum and validate the recovered inode. */ + xfs_dinode_calc_crc(log->l_mp, dip); ++ fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip); ++ if (fa) { ++ XFS_CORRUPTION_ERROR( ++ "Bad dinode after recovery", ++ XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip)); ++ xfs_alert(mp, ++ "Metadata corruption detected at %pS, inode 0x%llx", ++ fa, in_f->ilf_ino); ++ error = -EFSCORRUPTED; ++ goto out_release; ++ } + + ASSERT(bp->b_mount == mp); + bp->b_flags |= _XBF_LOGRECOVERY; +-- +2.43.0 + diff --git a/queue-6.6/xfs-introduce-protection-for-drop-nlink.patch b/queue-6.6/xfs-introduce-protection-for-drop-nlink.patch new file mode 100644 index 00000000000..7c94f56b214 --- /dev/null +++ b/queue-6.6/xfs-introduce-protection-for-drop-nlink.patch @@ -0,0 +1,43 @@ +From 0554272f8e9dd7e5a94b88a96446cf4e6c8c83fe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:41 -0800 +Subject: xfs: introduce protection for drop nlink + +From: Cheng Lin + +commit 2b99e410b28f5a75ae417e6389e767c7745d6fce upstream. + +When abnormal drop_nlink are detected on the inode, +return error, to avoid corruption propagation. + +Signed-off-by: Cheng Lin +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_inode.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index 4d55f58d99b7..fb85c5c81745 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -918,6 +918,13 @@ xfs_droplink( + xfs_trans_t *tp, + xfs_inode_t *ip) + { ++ if (VFS_I(ip)->i_nlink == 0) { ++ xfs_alert(ip->i_mount, ++ "%s: Attempt to drop inode (%llu) with nlink zero.", ++ __func__, ip->i_ino); ++ return -EFSCORRUPTED; ++ } ++ + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + + drop_nlink(VFS_I(ip)); +-- +2.43.0 + diff --git a/queue-6.6/xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch b/queue-6.6/xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch new file mode 100644 index 00000000000..a015379eeff --- /dev/null +++ b/queue-6.6/xfs-make-sure-maxlen-is-still-congruent-with-prod-wh.patch @@ -0,0 +1,117 @@ +From 8401165f28dd5709d239433e47b26d6bafcbddfc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:40 -0800 +Subject: xfs: make sure maxlen is still congruent with prod when rounding down + +From: Darrick J. Wong + +commit f6a2dae2a1f52ea23f649c02615d073beba4cc35 upstream. + +In commit 2a6ca4baed62, we tried to fix an overflow problem in the +realtime allocator that was caused by an overly large maxlen value +causing xfs_rtcheck_range to run off the end of the realtime bitmap. +Unfortunately, there is a subtle bug here -- maxlen (and minlen) both +have to be aligned with @prod, but @prod can be larger than 1 if the +user has set an extent size hint on the file, and that extent size hint +is larger than the realtime extent size. + +If the rt free space extents are not aligned to this file's extszhint +because other files without extent size hints allocated space (or the +number of rt extents is similarly not aligned), then it's possible that +maxlen after clamping to sb_rextents will no longer be aligned to prod. +The allocation will succeed just fine, but we still trip the assertion. + +Fix the problem by reducing maxlen by any misalignment with prod. While +we're at it, split the assertions into two so that we can tell which +value had the bad alignment. + +Fixes: 2a6ca4baed62 ("xfs: make sure the rt allocator doesn't run off the end") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_rtalloc.c | 31 ++++++++++++++++++++++++++----- + 1 file changed, 26 insertions(+), 5 deletions(-) + +diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c +index 31fd65b3aaa9..0e4e2df08aed 100644 +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -211,6 +211,23 @@ xfs_rtallocate_range( + return error; + } + ++/* ++ * Make sure we don't run off the end of the rt volume. Be careful that ++ * adjusting maxlen downwards doesn't cause us to fail the alignment checks. ++ */ ++static inline xfs_extlen_t ++xfs_rtallocate_clamp_len( ++ struct xfs_mount *mp, ++ xfs_rtblock_t startrtx, ++ xfs_extlen_t rtxlen, ++ xfs_extlen_t prod) ++{ ++ xfs_extlen_t ret; ++ ++ ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx; ++ return rounddown(ret, prod); ++} ++ + /* + * Attempt to allocate an extent minlen<=len<=maxlen starting from + * bitmap block bbno. If we don't get maxlen then use prod to trim +@@ -248,7 +265,7 @@ xfs_rtallocate_extent_block( + i <= end; + i++) { + /* Make sure we don't scan off the end of the rt volume. */ +- maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i; ++ maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod); + + /* + * See if there's a free extent of maxlen starting at i. +@@ -355,7 +372,8 @@ xfs_rtallocate_extent_exact( + int isfree; /* extent is free */ + xfs_rtblock_t next; /* next block to try (dummy) */ + +- ASSERT(minlen % prod == 0 && maxlen % prod == 0); ++ ASSERT(minlen % prod == 0); ++ ASSERT(maxlen % prod == 0); + /* + * Check if the range in question (for maxlen) is free. + */ +@@ -438,7 +456,9 @@ xfs_rtallocate_extent_near( + xfs_rtblock_t n; /* next block to try */ + xfs_rtblock_t r; /* result block */ + +- ASSERT(minlen % prod == 0 && maxlen % prod == 0); ++ ASSERT(minlen % prod == 0); ++ ASSERT(maxlen % prod == 0); ++ + /* + * If the block number given is off the end, silently set it to + * the last block. +@@ -447,7 +467,7 @@ xfs_rtallocate_extent_near( + bno = mp->m_sb.sb_rextents - 1; + + /* Make sure we don't run off the end of the rt volume. */ +- maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno; ++ maxlen = xfs_rtallocate_clamp_len(mp, bno, maxlen, prod); + if (maxlen < minlen) { + *rtblock = NULLRTBLOCK; + return 0; +@@ -638,7 +658,8 @@ xfs_rtallocate_extent_size( + xfs_rtblock_t r; /* result block number */ + xfs_suminfo_t sum; /* summary information for extents */ + +- ASSERT(minlen % prod == 0 && maxlen % prod == 0); ++ ASSERT(minlen % prod == 0); ++ ASSERT(maxlen % prod == 0); + ASSERT(maxlen != 0); + + /* +-- +2.43.0 + diff --git a/queue-6.6/xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch b/queue-6.6/xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch new file mode 100644 index 00000000000..33b997ab081 --- /dev/null +++ b/queue-6.6/xfs-only-remap-the-written-blocks-in-xfs_reflink_end.patch @@ -0,0 +1,53 @@ +From 88759c80d3e673e75ed46cd283b090db16d0564a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:46 -0800 +Subject: xfs: only remap the written blocks in xfs_reflink_end_cow_extent + +From: Christoph Hellwig + +commit 55f669f34184ecb25b8353f29c7f6f1ae5b313d1 upstream. + +xfs_reflink_end_cow_extent looks up the COW extent and the data fork +extent at offset_fsb, and then proceeds to remap the common subset +between the two. + +It does however not limit the remapped extent to the passed in +[*offset_fsbm end_fsb] range and thus potentially remaps more blocks than +the one handled by the current I/O completion. This means that with +sufficiently large data and COW extents we could be remapping COW fork +mappings that have not been written to, leading to a stale data exposure +on a powerfail event. + +We use to have a xfs_trim_range to make the remap fit the I/O completion +range, but that got (apparently accidentally) removed in commit +df2fd88f8ac7 ("xfs: rewrite xfs_reflink_end_cow to use intents"). + +Note that I've only found this by code inspection, and a test case would +probably require very specific delay and error injection. + +Fixes: df2fd88f8ac7 ("xfs: rewrite xfs_reflink_end_cow to use intents") +Signed-off-by: Christoph Hellwig +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_reflink.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c +index 658edee8381d..e5b62dc28466 100644 +--- a/fs/xfs/xfs_reflink.c ++++ b/fs/xfs/xfs_reflink.c +@@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent( + } + } + del = got; ++ xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb); + + /* Grab the corresponding mapping in the data fork. */ + nmaps = 1; +-- +2.43.0 + diff --git a/queue-6.6/xfs-prevent-rt-growfs-when-quota-is-enabled.patch b/queue-6.6/xfs-prevent-rt-growfs-when-quota-is-enabled.patch new file mode 100644 index 00000000000..ab85faf0383 --- /dev/null +++ b/queue-6.6/xfs-prevent-rt-growfs-when-quota-is-enabled.patch @@ -0,0 +1,42 @@ +From 6c15f39f53a79bf881765aba6bbba165bc2b8318 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:37 -0800 +Subject: xfs: prevent rt growfs when quota is enabled + +From: Darrick J. Wong + +commit b73494fa9a304ab95b59f07845e8d7d36e4d23e0 upstream. + +Quotas aren't (yet) supported with realtime, so we shouldn't allow +userspace to set up a realtime section when quotas are enabled, even if +they attached one via mount options. IOWS, you shouldn't be able to do: + +# mkfs.xfs -f /dev/sda +# mount /dev/sda /mnt -o rtdev=/dev/sdb,usrquota +# xfs_growfs -r /mnt + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_rtalloc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c +index 16534e9873f6..31fd65b3aaa9 100644 +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -954,7 +954,7 @@ xfs_growfs_rt( + return -EINVAL; + + /* Unsupported realtime features. */ +- if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp)) ++ if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp)) + return -EOPNOTSUPP; + + nrblocks = in->newblocks; +-- +2.43.0 + diff --git a/queue-6.6/xfs-respect-the-stable-writes-flag-on-the-rt-device.patch b/queue-6.6/xfs-respect-the-stable-writes-flag-on-the-rt-device.patch new file mode 100644 index 00000000000..1e3973dff86 --- /dev/null +++ b/queue-6.6/xfs-respect-the-stable-writes-flag-on-the-rt-device.patch @@ -0,0 +1,84 @@ +From f70972235a9432ab34b98435eb6433ba3a3c2437 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:54 -0800 +Subject: xfs: respect the stable writes flag on the RT device + +From: Christoph Hellwig + +commit 9c04138414c00ae61421f36ada002712c4bac94a upstream. + +Update the per-folio stable writes flag dependening on which device an +inode resides on. + +Signed-off-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20231025141020.192413-5-hch@lst.de +Reviewed-by: Darrick J. Wong +Signed-off-by: Christian Brauner +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_inode.h | 8 ++++++++ + fs/xfs/xfs_ioctl.c | 8 ++++++++ + fs/xfs/xfs_iops.c | 7 +++++++ + 3 files changed, 23 insertions(+) + +diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h +index 3dc47937da5d..3beb470f1892 100644 +--- a/fs/xfs/xfs_inode.h ++++ b/fs/xfs/xfs_inode.h +@@ -569,6 +569,14 @@ extern void xfs_setup_inode(struct xfs_inode *ip); + extern void xfs_setup_iops(struct xfs_inode *ip); + extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init); + ++static inline void xfs_update_stable_writes(struct xfs_inode *ip) ++{ ++ if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev)) ++ mapping_set_stable_writes(VFS_I(ip)->i_mapping); ++ else ++ mapping_clear_stable_writes(VFS_I(ip)->i_mapping); ++} ++ + /* + * When setting up a newly allocated inode, we need to call + * xfs_finish_inode_setup() once the inode is fully instantiated at +diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c +index be69e7be713e..535f6d38cdb5 100644 +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -1149,6 +1149,14 @@ xfs_ioctl_setattr_xflags( + ip->i_diflags2 = i_flags2; + + xfs_diflags_to_iflags(ip, false); ++ ++ /* ++ * Make the stable writes flag match that of the device the inode ++ * resides on when flipping the RT flag. ++ */ ++ if (rtflag != XFS_IS_REALTIME_INODE(ip) && S_ISREG(VFS_I(ip)->i_mode)) ++ xfs_update_stable_writes(ip); ++ + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + XFS_STATS_INC(mp, xs_ig_attrchg); +diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c +index 2b3b05c28e9e..b8ec045708c3 100644 +--- a/fs/xfs/xfs_iops.c ++++ b/fs/xfs/xfs_iops.c +@@ -1298,6 +1298,13 @@ xfs_setup_inode( + gfp_mask = mapping_gfp_mask(inode->i_mapping); + mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + ++ /* ++ * For real-time inodes update the stable write flags to that of the RT ++ * device instead of the data device. ++ */ ++ if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip)) ++ xfs_update_stable_writes(ip); ++ + /* + * If there is no attribute fork no ACL can exist on this inode, + * and it can't have any file capabilities attached to it either. +-- +2.43.0 + diff --git a/queue-6.6/xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch b/queue-6.6/xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch new file mode 100644 index 00000000000..686151568e4 --- /dev/null +++ b/queue-6.6/xfs-rt-stubs-should-return-negative-errnos-when-rt-d.patch @@ -0,0 +1,67 @@ +From e75c773a18b424a062d93af81ad3c4dd92fd7b78 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:38 -0800 +Subject: xfs: rt stubs should return negative errnos when rt disabled + +From: Darrick J. Wong + +commit c2988eb5cff75c02bc57e02c323154aa08f55b78 upstream. + +When realtime support is not compiled into the kernel, these functions +should return negative errnos, not positive errnos. While we're at it, +fix a broken macro declaration. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_rtalloc.h | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h +index 3b2f1b499a11..65c284e9d33e 100644 +--- a/fs/xfs/xfs_rtalloc.h ++++ b/fs/xfs/xfs_rtalloc.h +@@ -141,17 +141,17 @@ int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, + bool *is_free); + int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp); + #else +-# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS) +-# define xfs_rtfree_extent(t,b,l) (ENOSYS) +-# define xfs_rtfree_blocks(t,rb,rl) (ENOSYS) +-# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS) +-# define xfs_growfs_rt(mp,in) (ENOSYS) +-# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS) +-# define xfs_rtalloc_query_all(m,t,f,p) (ENOSYS) +-# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS) +-# define xfs_verify_rtbno(m, r) (false) +-# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS) +-# define xfs_rtalloc_reinit_frextents(m) (0) ++# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (-ENOSYS) ++# define xfs_rtfree_extent(t,b,l) (-ENOSYS) ++# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) ++# define xfs_rtpick_extent(m,t,l,rb) (-ENOSYS) ++# define xfs_growfs_rt(mp,in) (-ENOSYS) ++# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) ++# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) ++# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) ++# define xfs_verify_rtbno(m, r) (false) ++# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) ++# define xfs_rtalloc_reinit_frextents(m) (0) + static inline int /* error */ + xfs_rtmount_init( + xfs_mount_t *mp) /* file system mount structure */ +@@ -162,7 +162,7 @@ xfs_rtmount_init( + xfs_warn(mp, "Not built with CONFIG_XFS_RT"); + return -ENOSYS; + } +-# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) ++# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (-ENOSYS)) + # define xfs_rtunmount_inodes(m) + #endif /* CONFIG_XFS_RT */ + +-- +2.43.0 + diff --git a/queue-6.6/xfs-up-ic_sema-if-flushing-data-device-fails.patch b/queue-6.6/xfs-up-ic_sema-if-flushing-data-device-fails.patch new file mode 100644 index 00000000000..60fdbe10281 --- /dev/null +++ b/queue-6.6/xfs-up-ic_sema-if-flushing-data-device-fails.patch @@ -0,0 +1,102 @@ +From 69c011d2a20e37461246216ade0501eff863e090 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 15:20:47 -0800 +Subject: xfs: up(ic_sema) if flushing data device fails + +From: Leah Rumancik + +commit 471de20303dda0b67981e06d59cc6c4a83fd2a3c upstream. + +We flush the data device cache before we issue external log IO. If +the flush fails, we shut down the log immediately and return. However, +the iclog->ic_sema is left in a decremented state so let's add an up(). +Prior to this patch, xfs/438 would fail consistently when running with +an external log device: + +sync + -> xfs_log_force + -> xlog_write_iclog + -> down(&iclog->ic_sema) + -> blkdev_issue_flush (fail causes us to intiate shutdown) + -> xlog_force_shutdown + -> return + +unmount + -> xfs_log_umount + -> xlog_wait_iclog_completion + -> down(&iclog->ic_sema) --------> HANG + +There is a second early return / shutdown. Make sure the up() happens +for it as well. Also make sure we cleanup the iclog state, +xlog_state_done_syncing, before dropping the iclog lock. + +Fixes: b5d721eaae47 ("xfs: external logs need to flush data device") +Fixes: 842a42d126b4 ("xfs: shutdown on failure to add page to log bio") +Fixes: 7d839e325af2 ("xfs: check return codes when flushing block devices") +Signed-off-by: Leah Rumancik +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Catherine Hoang +Acked-by: Chandan Babu R +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_log.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c +index 51c100c86177..ee206facf0dc 100644 +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -1893,9 +1893,7 @@ xlog_write_iclog( + * the buffer manually, the code needs to be kept in sync + * with the I/O completion path. + */ +- xlog_state_done_syncing(iclog); +- up(&iclog->ic_sema); +- return; ++ goto sync; + } + + /* +@@ -1925,20 +1923,17 @@ xlog_write_iclog( + * avoid shutdown re-entering this path and erroring out again. + */ + if (log->l_targ != log->l_mp->m_ddev_targp && +- blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) { +- xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); +- return; +- } ++ blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) ++ goto shutdown; + } + if (iclog->ic_flags & XLOG_ICL_NEED_FUA) + iclog->ic_bio.bi_opf |= REQ_FUA; + + iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); + +- if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { +- xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); +- return; +- } ++ if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) ++ goto shutdown; ++ + if (is_vmalloc_addr(iclog->ic_data)) + flush_kernel_vmap_range(iclog->ic_data, count); + +@@ -1959,6 +1954,12 @@ xlog_write_iclog( + } + + submit_bio(&iclog->ic_bio); ++ return; ++shutdown: ++ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); ++sync: ++ xlog_state_done_syncing(iclog); ++ up(&iclog->ic_sema); + } + + /* +-- +2.43.0 + -- 2.47.3