]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
fs/dax: ensure all pages are idle prior to filesystem unmount
authorAlistair Popple <apopple@nvidia.com>
Fri, 28 Feb 2025 03:31:02 +0000 (14:31 +1100)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 18 Mar 2025 05:06:38 +0000 (22:06 -0700)
File systems call dax_break_mapping() prior to reallocating file system
blocks to ensure the page is not undergoing any DMA or other accesses.
Generally this is needed when a file is truncated to ensure that if a
block is reallocated nothing is writing to it.  However filesystems
currently don't call this when an FS DAX inode is evicted.

This can cause problems when the file system is unmounted as a page can
continue to be under going DMA or other remote access after unmount.  This
means if the file system is remounted any truncate or other operation
which requires the underlying file system block to be freed will not wait
for the remote access to complete.  Therefore a busy block may be
reallocated to a new file leading to corruption.

Link: https://lkml.kernel.org/r/2d3cf575bbd095084993154be2f0aa7442e5cd28.1740713401.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Asahi Lina <lina@asahilina.net>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: Dan Wiliams <dan.j.williams@intel.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: linmiaohe <linmiaohe@huawei.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
fs/dax.c
fs/ext4/inode.c
fs/xfs/xfs_super.c
include/linux/dax.h

index 14fbe516303713dcae1aeffafbab78f9c6444340..bc538ba56058088308d5d433be303810c552922d 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -884,6 +884,13 @@ static int wait_page_idle(struct page *page,
                                TASK_INTERRUPTIBLE, 0, 0, cb(inode));
 }
 
+static void wait_page_idle_uninterruptible(struct page *page,
+                                       struct inode *inode)
+{
+       ___wait_var_event(page, dax_page_is_idle(page),
+                       TASK_UNINTERRUPTIBLE, 0, 0, schedule());
+}
+
 /*
  * Unmaps the inode and waits for any DMA to complete prior to deleting the
  * DAX mapping entries for the range.
@@ -919,6 +926,26 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
 }
 EXPORT_SYMBOL_GPL(dax_break_layout);
 
+void dax_break_layout_final(struct inode *inode)
+{
+       struct page *page;
+
+       if (!dax_mapping(inode->i_mapping))
+               return;
+
+       do {
+               page = dax_layout_busy_page_range(inode->i_mapping, 0,
+                                               LLONG_MAX);
+               if (!page)
+                       break;
+
+               wait_page_idle_uninterruptible(page, inode);
+       } while (true);
+
+       dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX);
+}
+EXPORT_SYMBOL_GPL(dax_break_layout_final);
+
 /*
  * Invalidate DAX entry if it is clean.
  */
index 2342bac14a9e88153c4890d6b653fd38850d91b2..3cc8da6357aa0e50b01f5d4f0e7caf565c944d89 100644 (file)
@@ -181,6 +181,8 @@ void ext4_evict_inode(struct inode *inode)
 
        trace_ext4_evict_inode(inode);
 
+       dax_break_layout_final(inode);
+
        if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
                ext4_evict_ea_inode(inode);
        if (inode->i_nlink) {
index 0055066fb1d98b51969fbafd971c04e20fce48fe..37898f89b3ea03eefa09bf2d6cab98d37c837cea 100644 (file)
@@ -751,6 +751,17 @@ xfs_fs_drop_inode(
        return generic_drop_inode(inode);
 }
 
+STATIC void
+xfs_fs_evict_inode(
+       struct inode            *inode)
+{
+       if (IS_DAX(inode))
+               dax_break_layout_final(inode);
+
+       truncate_inode_pages_final(&inode->i_data);
+       clear_inode(inode);
+}
+
 static void
 xfs_mount_free(
        struct xfs_mount        *mp)
@@ -1215,6 +1226,7 @@ static const struct super_operations xfs_super_operations = {
        .destroy_inode          = xfs_fs_destroy_inode,
        .dirty_inode            = xfs_fs_dirty_inode,
        .drop_inode             = xfs_fs_drop_inode,
+       .evict_inode            = xfs_fs_evict_inode,
        .put_super              = xfs_fs_put_super,
        .sync_fs                = xfs_fs_sync_fs,
        .freeze_fs              = xfs_fs_freeze,
index 2fbb262092caca519c43fc165baf134358663c94..2333c30f6d36838e8a77745babdc5eba4212783a 100644 (file)
@@ -232,6 +232,10 @@ static inline int __must_check dax_break_layout(struct inode *inode,
 {
        return 0;
 }
+
+static inline void dax_break_layout_final(struct inode *inode)
+{
+}
 #endif
 
 bool dax_alive(struct dax_device *dax_dev);
@@ -266,6 +270,7 @@ static inline int __must_check dax_break_layout_inode(struct inode *inode,
 {
        return dax_break_layout(inode, 0, LLONG_MAX, cb);
 }
+void dax_break_layout_final(struct inode *inode);
 int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
                                  struct inode *dest, loff_t destoff,
                                  loff_t len, bool *is_same,