From 0e2f80afcfa699ce722c01afc9286a942bd57211 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 28 Feb 2025 14:31:02 +1100 Subject: [PATCH] fs/dax: ensure all pages are idle prior to filesystem unmount File systems call dax_break_mapping() prior to reallocating file system blocks to ensure the page is not undergoing any DMA or other accesses. Generally this is needed when a file is truncated to ensure that if a block is reallocated nothing is writing to it. However filesystems currently don't call this when an FS DAX inode is evicted. This can cause problems when the file system is unmounted as a page can continue to be under going DMA or other remote access after unmount. This means if the file system is remounted any truncate or other operation which requires the underlying file system block to be freed will not wait for the remote access to complete. Therefore a busy block may be reallocated to a new file leading to corruption. Link: https://lkml.kernel.org/r/2d3cf575bbd095084993154be2f0aa7442e5cd28.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Tested-by: Alison Schofield Cc: Alexander Gordeev Cc: Asahi Lina Cc: Balbir Singh Cc: Bjorn Helgaas Cc: Catalin Marinas Cc: Christian Borntraeger Cc: Christoph Hellwig Cc: Chunyan Zhang Cc: Dan Wiliams Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Dave Hansen Cc: Dave Jiang Cc: David Hildenbrand Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Huacai Chen Cc: Ira Weiny Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: John Hubbard Cc: linmiaohe Cc: Logan Gunthorpe Cc: Matthew Wilcow (Oracle) Cc: Michael "Camp Drill Sergeant" Ellerman Cc: Nicholas Piggin Cc: Peter Xu Cc: Sven Schnelle Cc: Ted Ts'o Cc: Vasily Gorbik Cc: Vishal Verma Cc: Vivek Goyal Cc: WANG Xuerui Cc: Will Deacon Signed-off-by: Andrew Morton --- fs/dax.c | 27 +++++++++++++++++++++++++++ fs/ext4/inode.c | 2 ++ fs/xfs/xfs_super.c | 12 ++++++++++++ include/linux/dax.h | 5 +++++ 4 files changed, 46 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 14fbe51630371..bc538ba560580 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -884,6 +884,13 @@ static int wait_page_idle(struct page *page, TASK_INTERRUPTIBLE, 0, 0, cb(inode)); } +static void wait_page_idle_uninterruptible(struct page *page, + struct inode *inode) +{ + ___wait_var_event(page, dax_page_is_idle(page), + TASK_UNINTERRUPTIBLE, 0, 0, schedule()); +} + /* * Unmaps the inode and waits for any DMA to complete prior to deleting the * DAX mapping entries for the range. @@ -919,6 +926,26 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end, } EXPORT_SYMBOL_GPL(dax_break_layout); +void dax_break_layout_final(struct inode *inode) +{ + struct page *page; + + if (!dax_mapping(inode->i_mapping)) + return; + + do { + page = dax_layout_busy_page_range(inode->i_mapping, 0, + LLONG_MAX); + if (!page) + break; + + wait_page_idle_uninterruptible(page, inode); + } while (true); + + dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX); +} +EXPORT_SYMBOL_GPL(dax_break_layout_final); + /* * Invalidate DAX entry if it is clean. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2342bac14a9e8..3cc8da6357aa0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -181,6 +181,8 @@ void ext4_evict_inode(struct inode *inode) trace_ext4_evict_inode(inode); + dax_break_layout_final(inode); + if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ext4_evict_ea_inode(inode); if (inode->i_nlink) { diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0055066fb1d98..37898f89b3ea0 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -751,6 +751,17 @@ xfs_fs_drop_inode( return generic_drop_inode(inode); } +STATIC void +xfs_fs_evict_inode( + struct inode *inode) +{ + if (IS_DAX(inode)) + dax_break_layout_final(inode); + + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); +} + static void xfs_mount_free( struct xfs_mount *mp) @@ -1215,6 +1226,7 @@ static const struct super_operations xfs_super_operations = { .destroy_inode = xfs_fs_destroy_inode, .dirty_inode = xfs_fs_dirty_inode, .drop_inode = xfs_fs_drop_inode, + .evict_inode = xfs_fs_evict_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, diff --git a/include/linux/dax.h b/include/linux/dax.h index 2fbb262092cac..2333c30f6d368 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -232,6 +232,10 @@ static inline int __must_check dax_break_layout(struct inode *inode, { return 0; } + +static inline void dax_break_layout_final(struct inode *inode) +{ +} #endif bool dax_alive(struct dax_device *dax_dev); @@ -266,6 +270,7 @@ static inline int __must_check dax_break_layout_inode(struct inode *inode, { return dax_break_layout(inode, 0, LLONG_MAX, cb); } +void dax_break_layout_final(struct inode *inode); int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same, -- 2.39.5