]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
btrfs: fix -ENOSPC mmap write failure on NOCOW files/extents
authorFilipe Manana <fdmanana@suse.com>
Tue, 8 Jul 2025 15:01:19 +0000 (16:01 +0100)
committerDavid Sterba <dsterba@suse.com>
Mon, 21 Jul 2025 22:09:22 +0000 (00:09 +0200)
If we attempt a mmap write into a NOCOW file or a prealloc extent when
there is no more available data space (or unallocated space to allocate a
new data block group) and we can do a NOCOW write (there are no reflinks
for the target extent or snapshots), we always fail due to -ENOSPC, unlike
for the regular buffered write and direct IO paths where we check that we
can do a NOCOW write in case we can't reserve data space.

Simple reproducer:

  $ cat test.sh
  #!/bin/bash

  DEV=/dev/sdi
  MNT=/mnt/sdi

  umount $DEV &> /dev/null
  mkfs.btrfs -f -b $((512 * 1024 * 1024)) $DEV
  mount $DEV $MNT

  touch $MNT/foobar
  # Make it a NOCOW file.
  chattr +C $MNT/foobar

  # Add initial data to file.
  xfs_io -c "pwrite -S 0xab 0 1M" $MNT/foobar

  # Fill all the remaining data space and unallocated space with data.
  dd if=/dev/zero of=$MNT/filler bs=4K &> /dev/null

  # Overwrite the file with a mmap write. Should succeed.
  xfs_io -c "mmap -w 0 1M"        \
         -c "mwrite -S 0xcd 0 1M" \
         -c "munmap"              \
         $MNT/foobar

  # Unmount, mount again and verify the new data was persisted.
  umount $MNT
  mount $DEV $MNT

  od -A d -t x1 $MNT/foobar

  umount $MNT

Running this:

  $ ./test.sh
  (...)
  wrote 1048576/1048576 bytes at offset 0
  1 MiB, 256 ops; 0.0008 sec (1.188 GiB/sec and 311435.5231 ops/sec)
  ./test.sh: line 24: 234865 Bus error               xfs_io -c "mmap -w 0 1M" -c "mwrite -S 0xcd 0 1M" -c "munmap" $MNT/foobar
  0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab
  *
  1048576

Fix this by not failing in case we can't allocate data space and we can
NOCOW into the target extent - reserving only metadata space in this case.

After this change the test passes:

  $ ./test.sh
  (...)
  wrote 1048576/1048576 bytes at offset 0
  1 MiB, 256 ops; 0.0007 sec (1.262 GiB/sec and 330749.3540 ops/sec)
  0000000 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd
  *
  1048576

A test case for fstests will be added soon.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/file.c

index 05b046c6806fe595865e6b2a23ee28222f2bf227..cb07274d4126ace9c9de5badaf93030ca7be952d 100644 (file)
@@ -1841,6 +1841,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
        loff_t size;
        size_t fsize = folio_size(folio);
        int ret;
+       bool only_release_metadata = false;
        u64 reserved_space;
        u64 page_start;
        u64 page_end;
@@ -1861,10 +1862,34 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
         * end up waiting indefinitely to get a lock on the page currently
         * being processed by btrfs_page_mkwrite() function.
         */
-       ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
-                                          page_start, reserved_space);
-       if (ret < 0)
+       ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
+                                         page_start, reserved_space, false);
+       if (ret < 0) {
+               size_t write_bytes = reserved_space;
+
+               if (btrfs_check_nocow_lock(BTRFS_I(inode), page_start,
+                                          &write_bytes, false) <= 0)
+                       goto out_noreserve;
+
+               only_release_metadata = true;
+
+               /*
+                * Can't write the whole range, there may be shared extents or
+                * holes in the range, bail out with @only_release_metadata set
+                * to true so that we unlock the nocow lock before returning the
+                * error.
+                */
+               if (write_bytes < reserved_space)
+                       goto out_noreserve;
+       }
+       ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), reserved_space,
+                                             reserved_space, false);
+       if (ret < 0) {
+               if (!only_release_metadata)
+                       btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
+                                                      page_start, reserved_space);
                goto out_noreserve;
+       }
 
        ret = file_update_time(vmf->vma->vm_file);
        if (ret < 0)
@@ -1905,10 +1930,16 @@ again:
        if (folio_contains(folio, (size - 1) >> PAGE_SHIFT)) {
                reserved_space = round_up(size - page_start, fs_info->sectorsize);
                if (reserved_space < fsize) {
+                       const u64 to_free = fsize - reserved_space;
+
                        end = page_start + reserved_space - 1;
-                       btrfs_delalloc_release_space(BTRFS_I(inode),
-                                       data_reserved, end + 1,
-                                       fsize - reserved_space, true);
+                       if (only_release_metadata)
+                               btrfs_delalloc_release_metadata(BTRFS_I(inode),
+                                                               to_free, true);
+                       else
+                               btrfs_delalloc_release_space(BTRFS_I(inode),
+                                                            data_reserved, end + 1,
+                                                            to_free, true);
                }
        }
 
@@ -1945,10 +1976,16 @@ again:
 
        btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
 
+       if (only_release_metadata)
+               btrfs_set_extent_bit(io_tree, page_start, end, EXTENT_NORESERVE,
+                                    &cached_state);
+
        btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
        up_read(&BTRFS_I(inode)->i_mmap_lock);
 
        btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
+       if (only_release_metadata)
+               btrfs_check_nocow_unlock(BTRFS_I(inode));
        sb_end_pagefault(inode->i_sb);
        extent_changeset_free(data_reserved);
        return VM_FAULT_LOCKED;
@@ -1958,10 +1995,16 @@ out_unlock:
        up_read(&BTRFS_I(inode)->i_mmap_lock);
 out:
        btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
-       btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
-                                    reserved_space, true);
+       if (only_release_metadata)
+               btrfs_delalloc_release_metadata(BTRFS_I(inode), reserved_space, true);
+       else
+               btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
+                                            page_start, reserved_space, true);
        extent_changeset_free(data_reserved);
 out_noreserve:
+       if (only_release_metadata)
+               btrfs_check_nocow_unlock(BTRFS_I(inode));
+
        sb_end_pagefault(inode->i_sb);
 
        if (ret < 0)