btrfs: fix incorrect buffered IO fallback for append direct writes

author Qu Wenruo <wqu@suse.com>

Thu, 4 Jun 2026 00:29:47 +0000 (09:59 +0930)

committer Johannes Thumshirn <johannes.thumshirn@wdc.com>

Tue, 9 Jun 2026 16:22:46 +0000 (18:22 +0200)
author Qu Wenruo <wqu@suse.com>
Thu, 4 Jun 2026 00:29:47 +0000 (09:59 +0930)
committer Johannes Thumshirn <johannes.thumshirn@wdc.com>
Tue, 9 Jun 2026 16:22:46 +0000 (18:22 +0200)
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c

index 88cb2e82a50768221ae4399c83925e90280df06c..412309825d6fa36bd18ceb0172bd30d919996848 100644 (file)
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -15,10 +15,12 @@
  
  struct btrfs_dio_data {
         ssize_t submitted;
+       loff_t old_isize;
         struct extent_changeset *data_reserved;
         struct btrfs_ordered_extent *ordered;
         bool data_space_reserved;
         bool nocow_done;
+       bool updated_isize;
  };
  
  struct btrfs_dio_private {
@@ -228,6 +230,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
         bool space_reserved = false;
         u64 len = *lenp;
         u64 prev_len;
+       loff_t old_isize;
         int ret = 0;
  
         /*
@@ -341,8 +344,14 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
          * Need to update the i_size under the extent lock so buffered
          * readers will get the updated i_size when we unlock.
          */
-       if (start + len > i_size_read(inode))
+       old_isize = i_size_read(inode);
+       if (start + len > old_isize) {
+               if (!dio_data->updated_isize) {
+                       dio_data->old_isize = old_isize;
+                       dio_data->updated_isize = true;
+               }
                 i_size_write(inode, start + len);
+       }
  out:
         if (ret && space_reserved) {
                 btrfs_delalloc_release_extents(BTRFS_I(inode), len);
@@ -625,6 +634,38 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
                 pos += submitted;
                 length -= submitted;
                 if (write) {
+                       /*
+                        * Got a short write and have updated the isize, need to
+                        * revert the isize change.
+                        *
+                        * Normally we need to update isize with extent lock hold,
+                        * but we're safe due to the following factors:
+                        *
+                        * - Only a single writer can be enlarging isize
+                        *   Enlarging isize will take the exclusive inode lock.
+                        *
+                        * - Buffered readers need to wait for the OE we're holding
+                        *   Buffered readers will lock extent and wait for OE
+                        *   of the folio range, and since page cache is invalidated
+                        *   the OE wait can not be skipped.
+                        *
+                        * So here we are safe to revert the isize before
+                        * finishing the OE, and no reader of the remaining range
+                        * can see the enlarged size.
+                        *
+                        * TODO: Extend the DIO_LOCKED lifespan for direct writes,
+                        * and only enlarge isize after a successful write.
+                        */
+                       if (dio_data->updated_isize) {
+                               u64 new_isize;
+
+                               if (submitted == 0)
+                                       new_isize = dio_data->old_isize;
+                               else
+                                       new_isize = max(dio_data->old_isize, pos);
+                               i_size_write(inode, new_isize);
+                               dio_data->updated_isize = false;
+                       }
                         /*
                          * We have a short write, if there is any range
                          * that is submitted properly, that part will have
author	Qu Wenruo <wqu@suse.com>
	Thu, 4 Jun 2026 00:29:47 +0000 (09:59 +0930)
committer	Johannes Thumshirn <johannes.thumshirn@wdc.com>
	Tue, 9 Jun 2026 16:22:46 +0000 (18:22 +0200)