6.12-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 30 Dec 2024 08:50:08 +0000 (09:50 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 30 Dec 2024 08:50:08 +0000 (09:50 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 30 Dec 2024 08:50:08 +0000 (09:50 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 30 Dec 2024 08:50:08 +0000 (09:50 +0100)
diff --git a/queue-6.12/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch b/queue-6.12/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch

new file mode 100644 (file)

index 0000000..7aae615
--- /dev/null
+++ b/queue-6.12/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch
@@ -0,0 +1,34 @@
+From 2c8507c63f5498d4ee4af404a8e44ceae4345056 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 9 Dec 2024 16:43:44 +0000
+Subject: btrfs: avoid monopolizing a core when activating a swap file
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 2c8507c63f5498d4ee4af404a8e44ceae4345056 upstream.
+
+During swap activation we iterate over the extents of a file and we can
+have many thousands of them, so we can end up in a busy loop monopolizing
+a core. Avoid this by doing a voluntary reschedule after processing each
+extent.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -7117,6 +7117,8 @@ noinline int can_nocow_extent(struct ino
+                       ret = -EAGAIN;
+                       goto out;
+               }
++
++              cond_resched();
+       }
+ 
+       if (file_extent)
diff --git a/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch b/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch

new file mode 100644 (file)

index 0000000..8dbd669
--- /dev/null
+++ b/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch
@@ -0,0 +1,44 @@
+From 0fba7be1ca6df2881e68386e5575fe096f33c4ca Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 13 Dec 2024 12:33:22 -0800
+Subject: btrfs: check folio mapping after unlock in put_file_data()
+
+From: Boris Burkov <boris@bur.io>
+
+commit 0fba7be1ca6df2881e68386e5575fe096f33c4ca upstream.
+
+When we call btrfs_read_folio() we get an unlocked folio, so it is possible
+for a different thread to concurrently modify folio->mapping. We must
+check that this hasn't happened once we do have the lock.
+
+CC: stable@vger.kernel.org # 6.12+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/send.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -5291,6 +5291,7 @@ static int put_file_data(struct send_ctx
+               unsigned cur_len = min_t(unsigned, len,
+                                        PAGE_SIZE - pg_offset);
+ 
++again:
+               folio = filemap_lock_folio(mapping, index);
+               if (IS_ERR(folio)) {
+                       page_cache_sync_readahead(mapping,
+@@ -5323,6 +5324,11 @@ static int put_file_data(struct send_ctx
+                               ret = -EIO;
+                               break;
+                       }
++                      if (folio->mapping != mapping) {
++                              folio_unlock(folio);
++                              folio_put(folio);
++                              goto again;
++                      }
+               }
+ 
+               memcpy_from_folio(sctx->send_buf + sctx->send_size, folio,
diff --git a/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch b/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch

new file mode 100644 (file)

index 0000000..9c9c158
--- /dev/null
+++ b/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch
@@ -0,0 +1,102 @@
+From 3e74859ee35edc33a022c3f3971df066ea0ca6b9 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 13 Dec 2024 12:22:32 -0800
+Subject: btrfs: check folio mapping after unlock in relocate_one_folio()
+
+From: Boris Burkov <boris@bur.io>
+
+commit 3e74859ee35edc33a022c3f3971df066ea0ca6b9 upstream.
+
+When we call btrfs_read_folio() to bring a folio uptodate, we unlock the
+folio. The result of that is that a different thread can modify the
+mapping (like remove it with invalidate) before we call folio_lock().
+This results in an invalid page and we need to try again.
+
+In particular, if we are relocating concurrently with aborting a
+transaction, this can result in a crash like the following:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000000
+  PGD 0 P4D 0
+  Oops: 0000 [#1] SMP
+  CPU: 76 PID: 1411631 Comm: kworker/u322:5
+  Workqueue: events_unbound btrfs_reclaim_bgs_work
+  RIP: 0010:set_page_extent_mapped+0x20/0xb0
+  RSP: 0018:ffffc900516a7be8 EFLAGS: 00010246
+  RAX: ffffea009e851d08 RBX: ffffea009e0b1880 RCX: 0000000000000000
+  RDX: 0000000000000000 RSI: ffffc900516a7b90 RDI: ffffea009e0b1880
+  RBP: 0000000003573000 R08: 0000000000000001 R09: ffff88c07fd2f3f0
+  R10: 0000000000000000 R11: 0000194754b575be R12: 0000000003572000
+  R13: 0000000003572fff R14: 0000000000100cca R15: 0000000005582fff
+  FS:  0000000000000000(0000) GS:ffff88c07fd00000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000000000 CR3: 000000407d00f002 CR4: 00000000007706f0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  PKRU: 55555554
+  Call Trace:
+  <TASK>
+  ? __die+0x78/0xc0
+  ? page_fault_oops+0x2a8/0x3a0
+  ? __switch_to+0x133/0x530
+  ? wq_worker_running+0xa/0x40
+  ? exc_page_fault+0x63/0x130
+  ? asm_exc_page_fault+0x22/0x30
+  ? set_page_extent_mapped+0x20/0xb0
+  relocate_file_extent_cluster+0x1a7/0x940
+  relocate_data_extent+0xaf/0x120
+  relocate_block_group+0x20f/0x480
+  btrfs_relocate_block_group+0x152/0x320
+  btrfs_relocate_chunk+0x3d/0x120
+  btrfs_reclaim_bgs_work+0x2ae/0x4e0
+  process_scheduled_works+0x184/0x370
+  worker_thread+0xc6/0x3e0
+  ? blk_add_timer+0xb0/0xb0
+  kthread+0xae/0xe0
+  ? flush_tlb_kernel_range+0x90/0x90
+  ret_from_fork+0x2f/0x40
+  ? flush_tlb_kernel_range+0x90/0x90
+  ret_from_fork_asm+0x11/0x20
+  </TASK>
+
+This occurs because cleanup_one_transaction() calls
+destroy_delalloc_inodes() which calls invalidate_inode_pages2() which
+takes the folio_lock before setting mapping to NULL. We fail to check
+this, and subsequently call set_extent_mapping(), which assumes that
+mapping != NULL (in fact it asserts that in debug mode)
+
+Note that the "fixes" patch here is not the one that introduced the
+race (the very first iteration of this code from 2009) but a more recent
+change that made this particular crash happen in practice.
+
+Fixes: e7f1326cc24e ("btrfs: set page extent mapped after read_folio in relocate_one_page")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/relocation.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -2902,6 +2902,7 @@ static int relocate_one_folio(struct rel
+       const bool use_rst = btrfs_need_stripe_tree_update(fs_info, rc->block_group->flags);
+ 
+       ASSERT(index <= last_index);
++again:
+       folio = filemap_lock_folio(inode->i_mapping, index);
+       if (IS_ERR(folio)) {
+ 
+@@ -2937,6 +2938,11 @@ static int relocate_one_folio(struct rel
+                       ret = -EIO;
+                       goto release_folio;
+               }
++              if (folio->mapping != inode->i_mapping) {
++                      folio_unlock(folio);
++                      folio_put(folio);
++                      goto again;
++              }
+       }
+ 
+       /*
diff --git a/queue-6.12/btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch b/queue-6.12/btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch

new file mode 100644 (file)

index 0000000..f2907aa
--- /dev/null
+++ b/queue-6.12/btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch
@@ -0,0 +1,333 @@
+From 03018e5d8508254534511d40fb57bc150e6a87f2 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Mon, 9 Dec 2024 12:54:14 +0000
+Subject: btrfs: fix swap file activation failure due to extents that used to be shared
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 03018e5d8508254534511d40fb57bc150e6a87f2 upstream.
+
+When activating a swap file, to determine if an extent is shared we use
+can_nocow_extent(), which ends up at btrfs_cross_ref_exist(). That helper
+is meant to be quick because it's used in the NOCOW write path, when
+flushing delalloc and when doing a direct IO write, however it does return
+some false positives, meaning it may indicate that an extent is shared
+even if it's no longer the case. For the write path this is fine, we just
+do a unnecessary COW operation instead of doing a more rigorous check
+which would be too heavy (calling btrfs_is_data_extent_shared()).
+
+However when activating a swap file, the false positives simply result
+in a failure, which is confusing for users/applications. One particular
+case where this happens is when a data extent only has 1 reference but
+that reference is not inlined in the extent item located in the extent
+tree - this happens when we create more than 33 references for an extent
+and then delete those 33 references plus every other non-inline reference
+except one. The function check_committed_ref() assumes that if the size
+of an extent item doesn't match the size of struct btrfs_extent_item
+plus the size of an inline reference (plus an owner reference in case
+simple quotas are enabled), then the extent is shared - that is not the
+case however, we can have a single reference but it's not inlined - the
+reason we do this is to be fast and avoid inspecting non-inline references
+which may be located in another leaf of the extent tree, slowing down
+write paths.
+
+The following test script reproduces the bug:
+
+   $ cat test.sh
+   #!/bin/bash
+
+   DEV=/dev/sdi
+   MNT=/mnt/sdi
+   NUM_CLONES=50
+
+   umount $DEV &> /dev/null
+
+   run_test()
+   {
+        local sync_after_add_reflinks=$1
+        local sync_after_remove_reflinks=$2
+
+        mkfs.btrfs -f $DEV > /dev/null
+        #mkfs.xfs -f $DEV > /dev/null
+        mount $DEV $MNT
+
+        touch $MNT/foo
+        chmod 0600 $MNT/foo
+       # On btrfs the file must be NOCOW.
+        chattr +C $MNT/foo &> /dev/null
+        xfs_io -s -c "pwrite -b 1M 0 1M" $MNT/foo
+        mkswap $MNT/foo
+
+        for ((i = 1; i <= $NUM_CLONES; i++)); do
+            touch $MNT/foo_clone_$i
+            chmod 0600 $MNT/foo_clone_$i
+            # On btrfs the file must be NOCOW.
+            chattr +C $MNT/foo_clone_$i &> /dev/null
+            cp --reflink=always $MNT/foo $MNT/foo_clone_$i
+        done
+
+        if [ $sync_after_add_reflinks -ne 0 ]; then
+            # Flush delayed refs and commit current transaction.
+            sync -f $MNT
+        fi
+
+        # Remove the original file and all clones except the last.
+        rm -f $MNT/foo
+        for ((i = 1; i < $NUM_CLONES; i++)); do
+            rm -f $MNT/foo_clone_$i
+        done
+
+        if [ $sync_after_remove_reflinks -ne 0 ]; then
+            # Flush delayed refs and commit current transaction.
+            sync -f $MNT
+        fi
+
+        # Now use the last clone as a swap file. It should work since
+        # its extent are not shared anymore.
+        swapon $MNT/foo_clone_${NUM_CLONES}
+        swapoff $MNT/foo_clone_${NUM_CLONES}
+
+        umount $MNT
+   }
+
+   echo -e "\nTest without sync after creating and removing clones"
+   run_test 0 0
+
+   echo -e "\nTest with sync after creating clones"
+   run_test 1 0
+
+   echo -e "\nTest with sync after removing clones"
+   run_test 0 1
+
+   echo -e "\nTest with sync after creating and removing clones"
+   run_test 1 1
+
+Running the test:
+
+   $ ./test.sh
+   Test without sync after creating and removing clones
+   wrote 1048576/1048576 bytes at offset 0
+   1 MiB, 1 ops; 0.0017 sec (556.793 MiB/sec and 556.7929 ops/sec)
+   Setting up swapspace version 1, size = 1020 KiB (1044480 bytes)
+   no label, UUID=a6b9c29e-5ef4-4689-a8ac-bc199c750f02
+   swapon: /mnt/sdi/foo_clone_50: swapon failed: Invalid argument
+   swapoff: /mnt/sdi/foo_clone_50: swapoff failed: Invalid argument
+
+   Test with sync after creating clones
+   wrote 1048576/1048576 bytes at offset 0
+   1 MiB, 1 ops; 0.0036 sec (271.739 MiB/sec and 271.7391 ops/sec)
+   Setting up swapspace version 1, size = 1020 KiB (1044480 bytes)
+   no label, UUID=5e9008d6-1f7a-4948-a1b4-3f30aba20a33
+   swapon: /mnt/sdi/foo_clone_50: swapon failed: Invalid argument
+   swapoff: /mnt/sdi/foo_clone_50: swapoff failed: Invalid argument
+
+   Test with sync after removing clones
+   wrote 1048576/1048576 bytes at offset 0
+   1 MiB, 1 ops; 0.0103 sec (96.665 MiB/sec and 96.6651 ops/sec)
+   Setting up swapspace version 1, size = 1020 KiB (1044480 bytes)
+   no label, UUID=916c2740-fa9f-4385-9f06-29c3f89e4764
+
+   Test with sync after creating and removing clones
+   wrote 1048576/1048576 bytes at offset 0
+   1 MiB, 1 ops; 0.0031 sec (314.268 MiB/sec and 314.2678 ops/sec)
+   Setting up swapspace version 1, size = 1020 KiB (1044480 bytes)
+   no label, UUID=06aab1dd-4d90-49c0-bd9f-3a8db4e2f912
+   swapon: /mnt/sdi/foo_clone_50: swapon failed: Invalid argument
+   swapoff: /mnt/sdi/foo_clone_50: swapoff failed: Invalid argument
+
+Fix this by reworking btrfs_swap_activate() to instead of using extent
+maps and checking for shared extents with can_nocow_extent(), iterate
+over the inode's file extent items and use the accurate
+btrfs_is_data_extent_shared().
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |  102 ++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 72 insertions(+), 30 deletions(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -9782,15 +9782,16 @@ static int btrfs_swap_activate(struct sw
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_state *cached_state = NULL;
+-      struct extent_map *em = NULL;
+       struct btrfs_chunk_map *map = NULL;
+       struct btrfs_device *device = NULL;
+       struct btrfs_swap_info bsi = {
+               .lowest_ppage = (sector_t)-1ULL,
+       };
++      struct btrfs_backref_share_check_ctx *backref_ctx = NULL;
++      struct btrfs_path *path = NULL;
+       int ret = 0;
+       u64 isize;
+-      u64 start;
++      u64 prev_extent_end = 0;
+ 
+       /*
+        * Acquire the inode's mmap lock to prevent races with memory mapped
+@@ -9829,6 +9830,13 @@ static int btrfs_swap_activate(struct sw
+               goto out_unlock_mmap;
+       }
+ 
++      path = btrfs_alloc_path();
++      backref_ctx = btrfs_alloc_backref_share_check_ctx();
++      if (!path || !backref_ctx) {
++              ret = -ENOMEM;
++              goto out_unlock_mmap;
++      }
++
+       /*
+        * Balance or device remove/replace/resize can move stuff around from
+        * under us. The exclop protection makes sure they aren't running/won't
+@@ -9887,24 +9895,39 @@ static int btrfs_swap_activate(struct sw
+       isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
+ 
+       lock_extent(io_tree, 0, isize - 1, &cached_state);
+-      start = 0;
+-      while (start < isize) {
+-              u64 logical_block_start, physical_block_start;
++      while (prev_extent_end < isize) {
++              struct btrfs_key key;
++              struct extent_buffer *leaf;
++              struct btrfs_file_extent_item *ei;
+               struct btrfs_block_group *bg;
+-              u64 len = isize - start;
+-
+-              em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len);
+-              if (IS_ERR(em)) {
+-                      ret = PTR_ERR(em);
+-                      goto out;
+-              }
+-
+-              if (em->disk_bytenr == EXTENT_MAP_HOLE) {
++              u64 logical_block_start;
++              u64 physical_block_start;
++              u64 extent_gen;
++              u64 disk_bytenr;
++              u64 len;
++
++              key.objectid = btrfs_ino(BTRFS_I(inode));
++              key.type = BTRFS_EXTENT_DATA_KEY;
++              key.offset = prev_extent_end;
++
++              ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
++              if (ret < 0)
++                      goto out;
++
++              /*
++               * If key not found it means we have an implicit hole (NO_HOLES
++               * is enabled).
++               */
++              if (ret > 0) {
+                       btrfs_warn(fs_info, "swapfile must not have holes");
+                       ret = -EINVAL;
+                       goto out;
+               }
+-              if (em->disk_bytenr == EXTENT_MAP_INLINE) {
++
++              leaf = path->nodes[0];
++              ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
++
++              if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) {
+                       /*
+                        * It's unlikely we'll ever actually find ourselves
+                        * here, as a file small enough to fit inline won't be
+@@ -9916,23 +9939,45 @@ static int btrfs_swap_activate(struct sw
+                       ret = -EINVAL;
+                       goto out;
+               }
+-              if (extent_map_is_compressed(em)) {
++
++              if (btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
+                       btrfs_warn(fs_info, "swapfile must not be compressed");
+                       ret = -EINVAL;
+                       goto out;
+               }
+ 
+-              logical_block_start = extent_map_block_start(em) + (start - em->start);
+-              len = min(len, em->len - (start - em->start));
+-              free_extent_map(em);
+-              em = NULL;
++              disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
++              if (disk_bytenr == 0) {
++                      btrfs_warn(fs_info, "swapfile must not have holes");
++                      ret = -EINVAL;
++                      goto out;
++              }
++
++              logical_block_start = disk_bytenr + btrfs_file_extent_offset(leaf, ei);
++              extent_gen = btrfs_file_extent_generation(leaf, ei);
++              prev_extent_end = btrfs_file_extent_end(path);
++
++              if (prev_extent_end > isize)
++                      len = isize - key.offset;
++              else
++                      len = btrfs_file_extent_num_bytes(leaf, ei);
++
++              backref_ctx->curr_leaf_bytenr = leaf->start;
++
++              /*
++               * Don't need the path anymore, release to avoid deadlocks when
++               * calling btrfs_is_data_extent_shared() because when joining a
++               * transaction it can block waiting for the current one's commit
++               * which in turn may be trying to lock the same leaf to flush
++               * delayed items for example.
++               */
++              btrfs_release_path(path);
+ 
+-              ret = can_nocow_extent(inode, start, &len, NULL, false, true);
++              ret = btrfs_is_data_extent_shared(BTRFS_I(inode), disk_bytenr,
++                                                extent_gen, backref_ctx);
+               if (ret < 0) {
+                       goto out;
+-              } else if (ret) {
+-                      ret = 0;
+-              } else {
++              } else if (ret > 0) {
+                       btrfs_warn(fs_info,
+                                  "swapfile must not be copy-on-write");
+                       ret = -EINVAL;
+@@ -9967,7 +10012,6 @@ static int btrfs_swap_activate(struct sw
+ 
+               physical_block_start = (map->stripes[0].physical +
+                                       (logical_block_start - map->start));
+-              len = min(len, map->chunk_len - (logical_block_start - map->start));
+               btrfs_free_chunk_map(map);
+               map = NULL;
+ 
+@@ -10008,20 +10052,16 @@ static int btrfs_swap_activate(struct sw
+                               if (ret)
+                                       goto out;
+                       }
+-                      bsi.start = start;
++                      bsi.start = key.offset;
+                       bsi.block_start = physical_block_start;
+                       bsi.block_len = len;
+               }
+-
+-              start += len;
+       }
+ 
+       if (bsi.block_len)
+               ret = btrfs_add_swap_extent(sis, &bsi);
+ 
+ out:
+-      if (!IS_ERR_OR_NULL(em))
+-              free_extent_map(em);
+       if (!IS_ERR_OR_NULL(map))
+               btrfs_free_chunk_map(map);
+ 
+@@ -10036,6 +10076,8 @@ out:
+ 
+ out_unlock_mmap:
+       up_write(&BTRFS_I(inode)->i_mmap_lock);
++      btrfs_free_backref_share_ctx(backref_ctx);
++      btrfs_free_path(path);
+       if (ret)
+               return ret;
+ 
diff --git a/queue-6.12/btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch b/queue-6.12/btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch

new file mode 100644 (file)

index 0000000..404981d
--- /dev/null
+++ b/queue-6.12/btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch
@@ -0,0 +1,100 @@
+From f2363e6fcc7938c5f0f6ac066fad0dd247598b51 Mon Sep 17 00:00:00 2001
+From: Julian Sun <sunjunchao2870@gmail.com>
+Date: Wed, 11 Dec 2024 19:13:15 +0800
+Subject: btrfs: fix transaction atomicity bug when enabling simple quotas
+
+From: Julian Sun <sunjunchao2870@gmail.com>
+
+commit f2363e6fcc7938c5f0f6ac066fad0dd247598b51 upstream.
+
+Set squota incompat bit before committing the transaction that enables
+the feature.
+
+With the config CONFIG_BTRFS_ASSERT enabled, an assertion
+failure occurs regarding the simple quota feature.
+
+  [5.596534] assertion failed: btrfs_fs_incompat(fs_info, SIMPLE_QUOTA), in fs/btrfs/qgroup.c:365
+  [5.597098] ------------[ cut here ]------------
+  [5.597371] kernel BUG at fs/btrfs/qgroup.c:365!
+  [5.597946] CPU: 1 UID: 0 PID: 268 Comm: mount Not tainted 6.13.0-rc2-00031-gf92f4749861b #146
+  [5.598450] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
+  [5.599008] RIP: 0010:btrfs_read_qgroup_config+0x74d/0x7a0
+  [5.604303]  <TASK>
+  [5.605230]  ? btrfs_read_qgroup_config+0x74d/0x7a0
+  [5.605538]  ? exc_invalid_op+0x56/0x70
+  [5.605775]  ? btrfs_read_qgroup_config+0x74d/0x7a0
+  [5.606066]  ? asm_exc_invalid_op+0x1f/0x30
+  [5.606441]  ? btrfs_read_qgroup_config+0x74d/0x7a0
+  [5.606741]  ? btrfs_read_qgroup_config+0x74d/0x7a0
+  [5.607038]  ? try_to_wake_up+0x317/0x760
+  [5.607286]  open_ctree+0xd9c/0x1710
+  [5.607509]  btrfs_get_tree+0x58a/0x7e0
+  [5.608002]  vfs_get_tree+0x2e/0x100
+  [5.608224]  fc_mount+0x16/0x60
+  [5.608420]  btrfs_get_tree+0x2f8/0x7e0
+  [5.608897]  vfs_get_tree+0x2e/0x100
+  [5.609121]  path_mount+0x4c8/0xbc0
+  [5.609538]  __x64_sys_mount+0x10d/0x150
+
+The issue can be easily reproduced using the following reproducer:
+
+  root@q:linux# cat repro.sh
+  set -e
+
+  mkfs.btrfs -q -f /dev/sdb
+  mount /dev/sdb /mnt/btrfs
+  btrfs quota enable -s /mnt/btrfs
+  umount /mnt/btrfs
+  mount /dev/sdb /mnt/btrfs
+
+The issue is that when enabling quotas, at btrfs_quota_enable(), we set
+BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE at fs_info->qgroup_flags and persist
+it in the quota root in the item with the key BTRFS_QGROUP_STATUS_KEY, but
+we only set the incompat bit BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA after we
+commit the transaction used to enable simple quotas.
+
+This means that if after that transaction commit we unmount the filesystem
+without starting and committing any other transaction, or we have a power
+failure, the next time we mount the filesystem we will find the flag
+BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE set in the item with the key
+BTRFS_QGROUP_STATUS_KEY but we will not find the incompat bit
+BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA set in the superblock, triggering an
+assertion failure at:
+
+  btrfs_read_qgroup_config() -> qgroup_read_enable_gen()
+
+To fix this issue, set the BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA flag
+immediately after setting the BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE.
+This ensures that both flags are flushed to disk within the same
+transaction.
+
+Fixes: 182940f4f4db ("btrfs: qgroup: add new quota mode for simple quotas")
+CC: stable@vger.kernel.org # 6.6+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Julian Sun <sunjunchao2870@gmail.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/qgroup.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -1122,6 +1122,7 @@ int btrfs_quota_enable(struct btrfs_fs_i
+       fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON;
+       if (simple) {
+               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
++              btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA);
+               btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid);
+       } else {
+               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+@@ -1255,8 +1256,6 @@ out_add_root:
+       spin_lock(&fs_info->qgroup_lock);
+       fs_info->quota_root = quota_root;
+       set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+-      if (simple)
+-              btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA);
+       spin_unlock(&fs_info->qgroup_lock);
+ 
+       /* Skip rescan for simple qgroups. */
diff --git a/queue-6.12/btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch b/queue-6.12/btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch

new file mode 100644 (file)

index 0000000..aa6b4b1
--- /dev/null
+++ b/queue-6.12/btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch
@@ -0,0 +1,73 @@
+From 44f52bbe96dfdbe4aca3818a2534520082a07040 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 11 Dec 2024 16:08:07 +0000
+Subject: btrfs: fix use-after-free when COWing tree bock and tracing is enabled
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 44f52bbe96dfdbe4aca3818a2534520082a07040 upstream.
+
+When a COWing a tree block, at btrfs_cow_block(), and we have the
+tracepoint trace_btrfs_cow_block() enabled and preemption is also enabled
+(CONFIG_PREEMPT=y), we can trigger a use-after-free in the COWed extent
+buffer while inside the tracepoint code. This is because in some paths
+that call btrfs_cow_block(), such as btrfs_search_slot(), we are holding
+the last reference on the extent buffer @buf so btrfs_force_cow_block()
+drops the last reference on the @buf extent buffer when it calls
+free_extent_buffer_stale(buf), which schedules the release of the extent
+buffer with RCU. This means that if we are on a kernel with preemption,
+the current task may be preempted before calling trace_btrfs_cow_block()
+and the extent buffer already released by the time trace_btrfs_cow_block()
+is called, resulting in a use-after-free.
+
+Fix this by moving the trace_btrfs_cow_block() from btrfs_cow_block() to
+btrfs_force_cow_block() before the COWed extent buffer is freed.
+This also has a side effect of invoking the tracepoint in the tree defrag
+code, at defrag.c:btrfs_realloc_node(), since btrfs_force_cow_block() is
+called there, but this is fine and it was actually missing there.
+
+Reported-by: syzbot+8517da8635307182c8a5@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/linux-btrfs/6759a9b9.050a0220.1ac542.000d.GAE@google.com/
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.c |   11 ++++-------
+ 1 file changed, 4 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -654,6 +654,8 @@ int btrfs_force_cow_block(struct btrfs_t
+                       goto error_unlock_cow;
+               }
+       }
++
++      trace_btrfs_cow_block(root, buf, cow);
+       if (unlock_orig)
+               btrfs_tree_unlock(buf);
+       free_extent_buffer_stale(buf);
+@@ -710,7 +712,6 @@ int btrfs_cow_block(struct btrfs_trans_h
+ {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       u64 search_start;
+-      int ret;
+ 
+       if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) {
+               btrfs_abort_transaction(trans, -EUCLEAN);
+@@ -751,12 +752,8 @@ int btrfs_cow_block(struct btrfs_trans_h
+        * Also We don't care about the error, as it's handled internally.
+        */
+       btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
+-      ret = btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
+-                                  cow_ret, search_start, 0, nest);
+-
+-      trace_btrfs_cow_block(root, buf, *cow_ret);
+-
+-      return ret;
++      return btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
++                                   cow_ret, search_start, 0, nest);
+ }
+ ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
+ 
diff --git a/queue-6.12/btrfs-sysfs-fix-direct-super-block-member-reads.patch b/queue-6.12/btrfs-sysfs-fix-direct-super-block-member-reads.patch

new file mode 100644 (file)

index 0000000..8aee80b
--- /dev/null
+++ b/queue-6.12/btrfs-sysfs-fix-direct-super-block-member-reads.patch
@@ -0,0 +1,63 @@
+From fca432e73db2bec0fdbfbf6d98d3ebcd5388a977 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Wed, 18 Dec 2024 17:00:56 +1030
+Subject: btrfs: sysfs: fix direct super block member reads
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit fca432e73db2bec0fdbfbf6d98d3ebcd5388a977 upstream.
+
+The following sysfs entries are reading super block member directly,
+which can have a different endian and cause wrong values:
+
+- sys/fs/btrfs/<uuid>/nodesize
+- sys/fs/btrfs/<uuid>/sectorsize
+- sys/fs/btrfs/<uuid>/clone_alignment
+
+Thankfully those values (nodesize and sectorsize) are always aligned
+inside the btrfs_super_block, so it won't trigger unaligned read errors,
+just endian problems.
+
+Fix them by using the native cached members instead.
+
+Fixes: df93589a1737 ("btrfs: export more from FS_INFO to sysfs")
+CC: stable@vger.kernel.org
+Reviewed-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/sysfs.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/btrfs/sysfs.c
++++ b/fs/btrfs/sysfs.c
+@@ -1118,7 +1118,7 @@ static ssize_t btrfs_nodesize_show(struc
+ {
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ 
+-      return sysfs_emit(buf, "%u\n", fs_info->super_copy->nodesize);
++      return sysfs_emit(buf, "%u\n", fs_info->nodesize);
+ }
+ 
+ BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
+@@ -1128,7 +1128,7 @@ static ssize_t btrfs_sectorsize_show(str
+ {
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ 
+-      return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
++      return sysfs_emit(buf, "%u\n", fs_info->sectorsize);
+ }
+ 
+ BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
+@@ -1180,7 +1180,7 @@ static ssize_t btrfs_clone_alignment_sho
+ {
+       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ 
+-      return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
++      return sysfs_emit(buf, "%u\n", fs_info->sectorsize);
+ }
+ 
+ BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
diff --git a/queue-6.12/series b/queue-6.12/series

index 31ab97e1fb3b93c2d1399161254c34eb3cb037fe..a1122f7b3586893902217e74a62db6b97fbf5dd8 100644 (file)
--- a/queue-6.12/series
+++ b/queue-6.12/series
@@ -100,3 +100,10 @@ power-supply-cros_charge-control-allow-start_threshold-end_threshold.patch
  power-supply-cros_charge-control-hide-start-threshold-on-v2-cmd.patch
  power-supply-gpio-charger-fix-set-charge-current-limits.patch
  btrfs-fix-race-with-memory-mapped-writes-when-activating-swap-file.patch
+btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch
+btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch
+btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch
+btrfs-sysfs-fix-direct-super-block-member-reads.patch
+btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch
+btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch
+btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 30 Dec 2024 08:50:08 +0000 (09:50 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 30 Dec 2024 08:50:08 +0000 (09:50 +0100)
queue-6.12/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/btrfs-sysfs-fix-direct-super-block-member-reads.patch	[new file with mode: 0644]	patch \| blob
queue-6.12/series		patch \| blob \| blame \| history