btrfs: reloc: unconditionally invalidate the page cache for each cluster

author Qu Wenruo <wqu@suse.com>

Tue, 15 Jul 2025 03:48:39 +0000 (13:18 +0930)

committer David Sterba <dsterba@suse.com>

Mon, 21 Jul 2025 23:13:03 +0000 (01:13 +0200)
author Qu Wenruo <wqu@suse.com>
Tue, 15 Jul 2025 03:48:39 +0000 (13:18 +0930)
committer David Sterba <dsterba@suse.com>
Mon, 21 Jul 2025 23:13:03 +0000 (01:13 +0200)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c

index 8a71cffb4dfbf40d6f9bb6f6fd9fdfab195cccb2..e2197d10a1d63476194724bdfdec261a4307d849 100644 (file)
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2666,66 +2666,24 @@ static noinline_for_stack int prealloc_file_extent_cluster(struct reloc_control
         u64 num_bytes;
         int nr;
         int ret = 0;
-       u64 i_size = i_size_read(&inode->vfs_inode);
         u64 prealloc_start = cluster->start - offset;
         u64 prealloc_end = cluster->end - offset;
         u64 cur_offset = prealloc_start;
  
         /*
-        * For subpage case, previous i_size may not be aligned to PAGE_SIZE.
-        * This means the range [i_size, PAGE_END + 1) is filled with zeros by
-        * btrfs_do_readpage() call of previously relocated file cluster.
+        * For blocksize < folio size case (either bs < page size or large folios),
+        * beyond i_size, all blocks are filled with zero.
          *
-        * If the current cluster starts in the above range, btrfs_do_readpage()
+        * If the current cluster covers the above range, btrfs_do_readpage()
          * will skip the read, and relocate_one_folio() will later writeback
          * the padding zeros as new data, causing data corruption.
          *
-        * Here we have to manually invalidate the range (i_size, PAGE_END + 1).
+        * Here we have to invalidate the cache covering our cluster.
          */
-       if (!PAGE_ALIGNED(i_size)) {
-               struct address_space *mapping = inode->vfs_inode.i_mapping;
-               struct btrfs_fs_info *fs_info = inode->root->fs_info;
-               const u32 sectorsize = fs_info->sectorsize;
-               struct folio *folio;
-
-               ASSERT(sectorsize < PAGE_SIZE);
-               ASSERT(IS_ALIGNED(i_size, sectorsize));
-
-               /*
-                * Subpage can't handle page with DIRTY but without UPTODATE
-                * bit as it can lead to the following deadlock:
-                *
-                * btrfs_read_folio()
-                * | Page already *locked*
-                * |- btrfs_lock_and_flush_ordered_range()
-                *    |- btrfs_start_ordered_extent()
-                *       |- extent_write_cache_pages()
-                *          |- lock_page()
-                *             We try to lock the page we already hold.
-                *
-                * Here we just writeback the whole data reloc inode, so that
-                * we will be ensured to have no dirty range in the page, and
-                * are safe to clear the uptodate bits.
-                *
-                * This shouldn't cause too much overhead, as we need to write
-                * the data back anyway.
-                */
-               ret = filemap_write_and_wait(mapping);
-               if (ret < 0)
-                       return ret;
-
-               folio = filemap_lock_folio(mapping, i_size >> PAGE_SHIFT);
-               /*
-                * If page is freed we don't need to do anything then, as we
-                * will re-read the whole page anyway.
-                */
-               if (!IS_ERR(folio)) {
-                       btrfs_subpage_clear_uptodate(fs_info, folio, i_size,
-                                       round_up(i_size, PAGE_SIZE) - i_size);
-                       folio_unlock(folio);
-                       folio_put(folio);
-               }
-       }
+       ret = filemap_invalidate_inode(&inode->vfs_inode, true, prealloc_start,
+                                      prealloc_end);
+       if (ret < 0)
+               return ret;
  
         BUG_ON(cluster->start != cluster->boundary[0]);
         ret = btrfs_alloc_data_chunk_ondemand(inode,
author	Qu Wenruo <wqu@suse.com>
	Tue, 15 Jul 2025 03:48:39 +0000 (13:18 +0930)
committer	David Sterba <dsterba@suse.com>
	Mon, 21 Jul 2025 23:13:03 +0000 (01:13 +0200)