From: Greg Kroah-Hartman Date: Mon, 30 Dec 2024 08:50:08 +0000 (+0100) Subject: 6.12-stable patches X-Git-Tag: v6.1.123~11 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5812a07bc1c77fccd07dc519f91d3dbb91c92985;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch btrfs-sysfs-fix-direct-super-block-member-reads.patch --- diff --git a/queue-6.12/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch b/queue-6.12/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch new file mode 100644 index 00000000000..7aae615c035 --- /dev/null +++ b/queue-6.12/btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch @@ -0,0 +1,34 @@ +From 2c8507c63f5498d4ee4af404a8e44ceae4345056 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 9 Dec 2024 16:43:44 +0000 +Subject: btrfs: avoid monopolizing a core when activating a swap file + +From: Filipe Manana + +commit 2c8507c63f5498d4ee4af404a8e44ceae4345056 upstream. + +During swap activation we iterate over the extents of a file and we can +have many thousands of them, so we can end up in a busy loop monopolizing +a core. Avoid this by doing a voluntary reschedule after processing each +extent. + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -7117,6 +7117,8 @@ noinline int can_nocow_extent(struct ino + ret = -EAGAIN; + goto out; + } ++ ++ cond_resched(); + } + + if (file_extent) diff --git a/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch b/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch new file mode 100644 index 00000000000..8dbd669eb44 --- /dev/null +++ b/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch @@ -0,0 +1,44 @@ +From 0fba7be1ca6df2881e68386e5575fe096f33c4ca Mon Sep 17 00:00:00 2001 +From: Boris Burkov +Date: Fri, 13 Dec 2024 12:33:22 -0800 +Subject: btrfs: check folio mapping after unlock in put_file_data() + +From: Boris Burkov + +commit 0fba7be1ca6df2881e68386e5575fe096f33c4ca upstream. + +When we call btrfs_read_folio() we get an unlocked folio, so it is possible +for a different thread to concurrently modify folio->mapping. We must +check that this hasn't happened once we do have the lock. + +CC: stable@vger.kernel.org # 6.12+ +Reviewed-by: Qu Wenruo +Signed-off-by: Boris Burkov +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/send.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -5291,6 +5291,7 @@ static int put_file_data(struct send_ctx + unsigned cur_len = min_t(unsigned, len, + PAGE_SIZE - pg_offset); + ++again: + folio = filemap_lock_folio(mapping, index); + if (IS_ERR(folio)) { + page_cache_sync_readahead(mapping, +@@ -5323,6 +5324,11 @@ static int put_file_data(struct send_ctx + ret = -EIO; + break; + } ++ if (folio->mapping != mapping) { ++ folio_unlock(folio); ++ folio_put(folio); ++ goto again; ++ } + } + + memcpy_from_folio(sctx->send_buf + sctx->send_size, folio, diff --git a/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch b/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch new file mode 100644 index 00000000000..9c9c15807dd --- /dev/null +++ b/queue-6.12/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch @@ -0,0 +1,102 @@ +From 3e74859ee35edc33a022c3f3971df066ea0ca6b9 Mon Sep 17 00:00:00 2001 +From: Boris Burkov +Date: Fri, 13 Dec 2024 12:22:32 -0800 +Subject: btrfs: check folio mapping after unlock in relocate_one_folio() + +From: Boris Burkov + +commit 3e74859ee35edc33a022c3f3971df066ea0ca6b9 upstream. + +When we call btrfs_read_folio() to bring a folio uptodate, we unlock the +folio. The result of that is that a different thread can modify the +mapping (like remove it with invalidate) before we call folio_lock(). +This results in an invalid page and we need to try again. + +In particular, if we are relocating concurrently with aborting a +transaction, this can result in a crash like the following: + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + PGD 0 P4D 0 + Oops: 0000 [#1] SMP + CPU: 76 PID: 1411631 Comm: kworker/u322:5 + Workqueue: events_unbound btrfs_reclaim_bgs_work + RIP: 0010:set_page_extent_mapped+0x20/0xb0 + RSP: 0018:ffffc900516a7be8 EFLAGS: 00010246 + RAX: ffffea009e851d08 RBX: ffffea009e0b1880 RCX: 0000000000000000 + RDX: 0000000000000000 RSI: ffffc900516a7b90 RDI: ffffea009e0b1880 + RBP: 0000000003573000 R08: 0000000000000001 R09: ffff88c07fd2f3f0 + R10: 0000000000000000 R11: 0000194754b575be R12: 0000000003572000 + R13: 0000000003572fff R14: 0000000000100cca R15: 0000000005582fff + FS: 0000000000000000(0000) GS:ffff88c07fd00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000000 CR3: 000000407d00f002 CR4: 00000000007706f0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + + ? __die+0x78/0xc0 + ? page_fault_oops+0x2a8/0x3a0 + ? __switch_to+0x133/0x530 + ? wq_worker_running+0xa/0x40 + ? exc_page_fault+0x63/0x130 + ? asm_exc_page_fault+0x22/0x30 + ? set_page_extent_mapped+0x20/0xb0 + relocate_file_extent_cluster+0x1a7/0x940 + relocate_data_extent+0xaf/0x120 + relocate_block_group+0x20f/0x480 + btrfs_relocate_block_group+0x152/0x320 + btrfs_relocate_chunk+0x3d/0x120 + btrfs_reclaim_bgs_work+0x2ae/0x4e0 + process_scheduled_works+0x184/0x370 + worker_thread+0xc6/0x3e0 + ? blk_add_timer+0xb0/0xb0 + kthread+0xae/0xe0 + ? flush_tlb_kernel_range+0x90/0x90 + ret_from_fork+0x2f/0x40 + ? flush_tlb_kernel_range+0x90/0x90 + ret_from_fork_asm+0x11/0x20 + + +This occurs because cleanup_one_transaction() calls +destroy_delalloc_inodes() which calls invalidate_inode_pages2() which +takes the folio_lock before setting mapping to NULL. We fail to check +this, and subsequently call set_extent_mapping(), which assumes that +mapping != NULL (in fact it asserts that in debug mode) + +Note that the "fixes" patch here is not the one that introduced the +race (the very first iteration of this code from 2009) but a more recent +change that made this particular crash happen in practice. + +Fixes: e7f1326cc24e ("btrfs: set page extent mapped after read_folio in relocate_one_page") +CC: stable@vger.kernel.org # 6.1+ +Reviewed-by: Qu Wenruo +Signed-off-by: Boris Burkov +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/relocation.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/fs/btrfs/relocation.c ++++ b/fs/btrfs/relocation.c +@@ -2902,6 +2902,7 @@ static int relocate_one_folio(struct rel + const bool use_rst = btrfs_need_stripe_tree_update(fs_info, rc->block_group->flags); + + ASSERT(index <= last_index); ++again: + folio = filemap_lock_folio(inode->i_mapping, index); + if (IS_ERR(folio)) { + +@@ -2937,6 +2938,11 @@ static int relocate_one_folio(struct rel + ret = -EIO; + goto release_folio; + } ++ if (folio->mapping != inode->i_mapping) { ++ folio_unlock(folio); ++ folio_put(folio); ++ goto again; ++ } + } + + /* diff --git a/queue-6.12/btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch b/queue-6.12/btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch new file mode 100644 index 00000000000..f2907aac115 --- /dev/null +++ b/queue-6.12/btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch @@ -0,0 +1,333 @@ +From 03018e5d8508254534511d40fb57bc150e6a87f2 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 9 Dec 2024 12:54:14 +0000 +Subject: btrfs: fix swap file activation failure due to extents that used to be shared + +From: Filipe Manana + +commit 03018e5d8508254534511d40fb57bc150e6a87f2 upstream. + +When activating a swap file, to determine if an extent is shared we use +can_nocow_extent(), which ends up at btrfs_cross_ref_exist(). That helper +is meant to be quick because it's used in the NOCOW write path, when +flushing delalloc and when doing a direct IO write, however it does return +some false positives, meaning it may indicate that an extent is shared +even if it's no longer the case. For the write path this is fine, we just +do a unnecessary COW operation instead of doing a more rigorous check +which would be too heavy (calling btrfs_is_data_extent_shared()). + +However when activating a swap file, the false positives simply result +in a failure, which is confusing for users/applications. One particular +case where this happens is when a data extent only has 1 reference but +that reference is not inlined in the extent item located in the extent +tree - this happens when we create more than 33 references for an extent +and then delete those 33 references plus every other non-inline reference +except one. The function check_committed_ref() assumes that if the size +of an extent item doesn't match the size of struct btrfs_extent_item +plus the size of an inline reference (plus an owner reference in case +simple quotas are enabled), then the extent is shared - that is not the +case however, we can have a single reference but it's not inlined - the +reason we do this is to be fast and avoid inspecting non-inline references +which may be located in another leaf of the extent tree, slowing down +write paths. + +The following test script reproduces the bug: + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdi + MNT=/mnt/sdi + NUM_CLONES=50 + + umount $DEV &> /dev/null + + run_test() + { + local sync_after_add_reflinks=$1 + local sync_after_remove_reflinks=$2 + + mkfs.btrfs -f $DEV > /dev/null + #mkfs.xfs -f $DEV > /dev/null + mount $DEV $MNT + + touch $MNT/foo + chmod 0600 $MNT/foo + # On btrfs the file must be NOCOW. + chattr +C $MNT/foo &> /dev/null + xfs_io -s -c "pwrite -b 1M 0 1M" $MNT/foo + mkswap $MNT/foo + + for ((i = 1; i <= $NUM_CLONES; i++)); do + touch $MNT/foo_clone_$i + chmod 0600 $MNT/foo_clone_$i + # On btrfs the file must be NOCOW. + chattr +C $MNT/foo_clone_$i &> /dev/null + cp --reflink=always $MNT/foo $MNT/foo_clone_$i + done + + if [ $sync_after_add_reflinks -ne 0 ]; then + # Flush delayed refs and commit current transaction. + sync -f $MNT + fi + + # Remove the original file and all clones except the last. + rm -f $MNT/foo + for ((i = 1; i < $NUM_CLONES; i++)); do + rm -f $MNT/foo_clone_$i + done + + if [ $sync_after_remove_reflinks -ne 0 ]; then + # Flush delayed refs and commit current transaction. + sync -f $MNT + fi + + # Now use the last clone as a swap file. It should work since + # its extent are not shared anymore. + swapon $MNT/foo_clone_${NUM_CLONES} + swapoff $MNT/foo_clone_${NUM_CLONES} + + umount $MNT + } + + echo -e "\nTest without sync after creating and removing clones" + run_test 0 0 + + echo -e "\nTest with sync after creating clones" + run_test 1 0 + + echo -e "\nTest with sync after removing clones" + run_test 0 1 + + echo -e "\nTest with sync after creating and removing clones" + run_test 1 1 + +Running the test: + + $ ./test.sh + Test without sync after creating and removing clones + wrote 1048576/1048576 bytes at offset 0 + 1 MiB, 1 ops; 0.0017 sec (556.793 MiB/sec and 556.7929 ops/sec) + Setting up swapspace version 1, size = 1020 KiB (1044480 bytes) + no label, UUID=a6b9c29e-5ef4-4689-a8ac-bc199c750f02 + swapon: /mnt/sdi/foo_clone_50: swapon failed: Invalid argument + swapoff: /mnt/sdi/foo_clone_50: swapoff failed: Invalid argument + + Test with sync after creating clones + wrote 1048576/1048576 bytes at offset 0 + 1 MiB, 1 ops; 0.0036 sec (271.739 MiB/sec and 271.7391 ops/sec) + Setting up swapspace version 1, size = 1020 KiB (1044480 bytes) + no label, UUID=5e9008d6-1f7a-4948-a1b4-3f30aba20a33 + swapon: /mnt/sdi/foo_clone_50: swapon failed: Invalid argument + swapoff: /mnt/sdi/foo_clone_50: swapoff failed: Invalid argument + + Test with sync after removing clones + wrote 1048576/1048576 bytes at offset 0 + 1 MiB, 1 ops; 0.0103 sec (96.665 MiB/sec and 96.6651 ops/sec) + Setting up swapspace version 1, size = 1020 KiB (1044480 bytes) + no label, UUID=916c2740-fa9f-4385-9f06-29c3f89e4764 + + Test with sync after creating and removing clones + wrote 1048576/1048576 bytes at offset 0 + 1 MiB, 1 ops; 0.0031 sec (314.268 MiB/sec and 314.2678 ops/sec) + Setting up swapspace version 1, size = 1020 KiB (1044480 bytes) + no label, UUID=06aab1dd-4d90-49c0-bd9f-3a8db4e2f912 + swapon: /mnt/sdi/foo_clone_50: swapon failed: Invalid argument + swapoff: /mnt/sdi/foo_clone_50: swapoff failed: Invalid argument + +Fix this by reworking btrfs_swap_activate() to instead of using extent +maps and checking for shared extents with can_nocow_extent(), iterate +over the inode's file extent items and use the accurate +btrfs_is_data_extent_shared(). + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 102 ++++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 72 insertions(+), 30 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -9782,15 +9782,16 @@ static int btrfs_swap_activate(struct sw + struct btrfs_fs_info *fs_info = root->fs_info; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_state *cached_state = NULL; +- struct extent_map *em = NULL; + struct btrfs_chunk_map *map = NULL; + struct btrfs_device *device = NULL; + struct btrfs_swap_info bsi = { + .lowest_ppage = (sector_t)-1ULL, + }; ++ struct btrfs_backref_share_check_ctx *backref_ctx = NULL; ++ struct btrfs_path *path = NULL; + int ret = 0; + u64 isize; +- u64 start; ++ u64 prev_extent_end = 0; + + /* + * Acquire the inode's mmap lock to prevent races with memory mapped +@@ -9829,6 +9830,13 @@ static int btrfs_swap_activate(struct sw + goto out_unlock_mmap; + } + ++ path = btrfs_alloc_path(); ++ backref_ctx = btrfs_alloc_backref_share_check_ctx(); ++ if (!path || !backref_ctx) { ++ ret = -ENOMEM; ++ goto out_unlock_mmap; ++ } ++ + /* + * Balance or device remove/replace/resize can move stuff around from + * under us. The exclop protection makes sure they aren't running/won't +@@ -9887,24 +9895,39 @@ static int btrfs_swap_activate(struct sw + isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); + + lock_extent(io_tree, 0, isize - 1, &cached_state); +- start = 0; +- while (start < isize) { +- u64 logical_block_start, physical_block_start; ++ while (prev_extent_end < isize) { ++ struct btrfs_key key; ++ struct extent_buffer *leaf; ++ struct btrfs_file_extent_item *ei; + struct btrfs_block_group *bg; +- u64 len = isize - start; +- +- em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len); +- if (IS_ERR(em)) { +- ret = PTR_ERR(em); +- goto out; +- } +- +- if (em->disk_bytenr == EXTENT_MAP_HOLE) { ++ u64 logical_block_start; ++ u64 physical_block_start; ++ u64 extent_gen; ++ u64 disk_bytenr; ++ u64 len; ++ ++ key.objectid = btrfs_ino(BTRFS_I(inode)); ++ key.type = BTRFS_EXTENT_DATA_KEY; ++ key.offset = prev_extent_end; ++ ++ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ++ if (ret < 0) ++ goto out; ++ ++ /* ++ * If key not found it means we have an implicit hole (NO_HOLES ++ * is enabled). ++ */ ++ if (ret > 0) { + btrfs_warn(fs_info, "swapfile must not have holes"); + ret = -EINVAL; + goto out; + } +- if (em->disk_bytenr == EXTENT_MAP_INLINE) { ++ ++ leaf = path->nodes[0]; ++ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); ++ ++ if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) { + /* + * It's unlikely we'll ever actually find ourselves + * here, as a file small enough to fit inline won't be +@@ -9916,23 +9939,45 @@ static int btrfs_swap_activate(struct sw + ret = -EINVAL; + goto out; + } +- if (extent_map_is_compressed(em)) { ++ ++ if (btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) { + btrfs_warn(fs_info, "swapfile must not be compressed"); + ret = -EINVAL; + goto out; + } + +- logical_block_start = extent_map_block_start(em) + (start - em->start); +- len = min(len, em->len - (start - em->start)); +- free_extent_map(em); +- em = NULL; ++ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); ++ if (disk_bytenr == 0) { ++ btrfs_warn(fs_info, "swapfile must not have holes"); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ logical_block_start = disk_bytenr + btrfs_file_extent_offset(leaf, ei); ++ extent_gen = btrfs_file_extent_generation(leaf, ei); ++ prev_extent_end = btrfs_file_extent_end(path); ++ ++ if (prev_extent_end > isize) ++ len = isize - key.offset; ++ else ++ len = btrfs_file_extent_num_bytes(leaf, ei); ++ ++ backref_ctx->curr_leaf_bytenr = leaf->start; ++ ++ /* ++ * Don't need the path anymore, release to avoid deadlocks when ++ * calling btrfs_is_data_extent_shared() because when joining a ++ * transaction it can block waiting for the current one's commit ++ * which in turn may be trying to lock the same leaf to flush ++ * delayed items for example. ++ */ ++ btrfs_release_path(path); + +- ret = can_nocow_extent(inode, start, &len, NULL, false, true); ++ ret = btrfs_is_data_extent_shared(BTRFS_I(inode), disk_bytenr, ++ extent_gen, backref_ctx); + if (ret < 0) { + goto out; +- } else if (ret) { +- ret = 0; +- } else { ++ } else if (ret > 0) { + btrfs_warn(fs_info, + "swapfile must not be copy-on-write"); + ret = -EINVAL; +@@ -9967,7 +10012,6 @@ static int btrfs_swap_activate(struct sw + + physical_block_start = (map->stripes[0].physical + + (logical_block_start - map->start)); +- len = min(len, map->chunk_len - (logical_block_start - map->start)); + btrfs_free_chunk_map(map); + map = NULL; + +@@ -10008,20 +10052,16 @@ static int btrfs_swap_activate(struct sw + if (ret) + goto out; + } +- bsi.start = start; ++ bsi.start = key.offset; + bsi.block_start = physical_block_start; + bsi.block_len = len; + } +- +- start += len; + } + + if (bsi.block_len) + ret = btrfs_add_swap_extent(sis, &bsi); + + out: +- if (!IS_ERR_OR_NULL(em)) +- free_extent_map(em); + if (!IS_ERR_OR_NULL(map)) + btrfs_free_chunk_map(map); + +@@ -10036,6 +10076,8 @@ out: + + out_unlock_mmap: + up_write(&BTRFS_I(inode)->i_mmap_lock); ++ btrfs_free_backref_share_ctx(backref_ctx); ++ btrfs_free_path(path); + if (ret) + return ret; + diff --git a/queue-6.12/btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch b/queue-6.12/btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch new file mode 100644 index 00000000000..404981d34da --- /dev/null +++ b/queue-6.12/btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch @@ -0,0 +1,100 @@ +From f2363e6fcc7938c5f0f6ac066fad0dd247598b51 Mon Sep 17 00:00:00 2001 +From: Julian Sun +Date: Wed, 11 Dec 2024 19:13:15 +0800 +Subject: btrfs: fix transaction atomicity bug when enabling simple quotas + +From: Julian Sun + +commit f2363e6fcc7938c5f0f6ac066fad0dd247598b51 upstream. + +Set squota incompat bit before committing the transaction that enables +the feature. + +With the config CONFIG_BTRFS_ASSERT enabled, an assertion +failure occurs regarding the simple quota feature. + + [5.596534] assertion failed: btrfs_fs_incompat(fs_info, SIMPLE_QUOTA), in fs/btrfs/qgroup.c:365 + [5.597098] ------------[ cut here ]------------ + [5.597371] kernel BUG at fs/btrfs/qgroup.c:365! + [5.597946] CPU: 1 UID: 0 PID: 268 Comm: mount Not tainted 6.13.0-rc2-00031-gf92f4749861b #146 + [5.598450] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 + [5.599008] RIP: 0010:btrfs_read_qgroup_config+0x74d/0x7a0 + [5.604303] + [5.605230] ? btrfs_read_qgroup_config+0x74d/0x7a0 + [5.605538] ? exc_invalid_op+0x56/0x70 + [5.605775] ? btrfs_read_qgroup_config+0x74d/0x7a0 + [5.606066] ? asm_exc_invalid_op+0x1f/0x30 + [5.606441] ? btrfs_read_qgroup_config+0x74d/0x7a0 + [5.606741] ? btrfs_read_qgroup_config+0x74d/0x7a0 + [5.607038] ? try_to_wake_up+0x317/0x760 + [5.607286] open_ctree+0xd9c/0x1710 + [5.607509] btrfs_get_tree+0x58a/0x7e0 + [5.608002] vfs_get_tree+0x2e/0x100 + [5.608224] fc_mount+0x16/0x60 + [5.608420] btrfs_get_tree+0x2f8/0x7e0 + [5.608897] vfs_get_tree+0x2e/0x100 + [5.609121] path_mount+0x4c8/0xbc0 + [5.609538] __x64_sys_mount+0x10d/0x150 + +The issue can be easily reproduced using the following reproducer: + + root@q:linux# cat repro.sh + set -e + + mkfs.btrfs -q -f /dev/sdb + mount /dev/sdb /mnt/btrfs + btrfs quota enable -s /mnt/btrfs + umount /mnt/btrfs + mount /dev/sdb /mnt/btrfs + +The issue is that when enabling quotas, at btrfs_quota_enable(), we set +BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE at fs_info->qgroup_flags and persist +it in the quota root in the item with the key BTRFS_QGROUP_STATUS_KEY, but +we only set the incompat bit BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA after we +commit the transaction used to enable simple quotas. + +This means that if after that transaction commit we unmount the filesystem +without starting and committing any other transaction, or we have a power +failure, the next time we mount the filesystem we will find the flag +BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE set in the item with the key +BTRFS_QGROUP_STATUS_KEY but we will not find the incompat bit +BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA set in the superblock, triggering an +assertion failure at: + + btrfs_read_qgroup_config() -> qgroup_read_enable_gen() + +To fix this issue, set the BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA flag +immediately after setting the BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE. +This ensures that both flags are flushed to disk within the same +transaction. + +Fixes: 182940f4f4db ("btrfs: qgroup: add new quota mode for simple quotas") +CC: stable@vger.kernel.org # 6.6+ +Reviewed-by: Filipe Manana +Signed-off-by: Julian Sun +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/qgroup.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -1122,6 +1122,7 @@ int btrfs_quota_enable(struct btrfs_fs_i + fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON; + if (simple) { + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE; ++ btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA); + btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid); + } else { + fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; +@@ -1255,8 +1256,6 @@ out_add_root: + spin_lock(&fs_info->qgroup_lock); + fs_info->quota_root = quota_root; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); +- if (simple) +- btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA); + spin_unlock(&fs_info->qgroup_lock); + + /* Skip rescan for simple qgroups. */ diff --git a/queue-6.12/btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch b/queue-6.12/btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch new file mode 100644 index 00000000000..aa6b4b154b8 --- /dev/null +++ b/queue-6.12/btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch @@ -0,0 +1,73 @@ +From 44f52bbe96dfdbe4aca3818a2534520082a07040 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Wed, 11 Dec 2024 16:08:07 +0000 +Subject: btrfs: fix use-after-free when COWing tree bock and tracing is enabled + +From: Filipe Manana + +commit 44f52bbe96dfdbe4aca3818a2534520082a07040 upstream. + +When a COWing a tree block, at btrfs_cow_block(), and we have the +tracepoint trace_btrfs_cow_block() enabled and preemption is also enabled +(CONFIG_PREEMPT=y), we can trigger a use-after-free in the COWed extent +buffer while inside the tracepoint code. This is because in some paths +that call btrfs_cow_block(), such as btrfs_search_slot(), we are holding +the last reference on the extent buffer @buf so btrfs_force_cow_block() +drops the last reference on the @buf extent buffer when it calls +free_extent_buffer_stale(buf), which schedules the release of the extent +buffer with RCU. This means that if we are on a kernel with preemption, +the current task may be preempted before calling trace_btrfs_cow_block() +and the extent buffer already released by the time trace_btrfs_cow_block() +is called, resulting in a use-after-free. + +Fix this by moving the trace_btrfs_cow_block() from btrfs_cow_block() to +btrfs_force_cow_block() before the COWed extent buffer is freed. +This also has a side effect of invoking the tracepoint in the tree defrag +code, at defrag.c:btrfs_realloc_node(), since btrfs_force_cow_block() is +called there, but this is fine and it was actually missing there. + +Reported-by: syzbot+8517da8635307182c8a5@syzkaller.appspotmail.com +Link: https://lore.kernel.org/linux-btrfs/6759a9b9.050a0220.1ac542.000d.GAE@google.com/ +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.c | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -654,6 +654,8 @@ int btrfs_force_cow_block(struct btrfs_t + goto error_unlock_cow; + } + } ++ ++ trace_btrfs_cow_block(root, buf, cow); + if (unlock_orig) + btrfs_tree_unlock(buf); + free_extent_buffer_stale(buf); +@@ -710,7 +712,6 @@ int btrfs_cow_block(struct btrfs_trans_h + { + struct btrfs_fs_info *fs_info = root->fs_info; + u64 search_start; +- int ret; + + if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) { + btrfs_abort_transaction(trans, -EUCLEAN); +@@ -751,12 +752,8 @@ int btrfs_cow_block(struct btrfs_trans_h + * Also We don't care about the error, as it's handled internally. + */ + btrfs_qgroup_trace_subtree_after_cow(trans, root, buf); +- ret = btrfs_force_cow_block(trans, root, buf, parent, parent_slot, +- cow_ret, search_start, 0, nest); +- +- trace_btrfs_cow_block(root, buf, *cow_ret); +- +- return ret; ++ return btrfs_force_cow_block(trans, root, buf, parent, parent_slot, ++ cow_ret, search_start, 0, nest); + } + ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO); + diff --git a/queue-6.12/btrfs-sysfs-fix-direct-super-block-member-reads.patch b/queue-6.12/btrfs-sysfs-fix-direct-super-block-member-reads.patch new file mode 100644 index 00000000000..8aee80b8d8a --- /dev/null +++ b/queue-6.12/btrfs-sysfs-fix-direct-super-block-member-reads.patch @@ -0,0 +1,63 @@ +From fca432e73db2bec0fdbfbf6d98d3ebcd5388a977 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Wed, 18 Dec 2024 17:00:56 +1030 +Subject: btrfs: sysfs: fix direct super block member reads + +From: Qu Wenruo + +commit fca432e73db2bec0fdbfbf6d98d3ebcd5388a977 upstream. + +The following sysfs entries are reading super block member directly, +which can have a different endian and cause wrong values: + +- sys/fs/btrfs//nodesize +- sys/fs/btrfs//sectorsize +- sys/fs/btrfs//clone_alignment + +Thankfully those values (nodesize and sectorsize) are always aligned +inside the btrfs_super_block, so it won't trigger unaligned read errors, +just endian problems. + +Fix them by using the native cached members instead. + +Fixes: df93589a1737 ("btrfs: export more from FS_INFO to sysfs") +CC: stable@vger.kernel.org +Reviewed-by: Naohiro Aota +Reviewed-by: Johannes Thumshirn +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/sysfs.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -1118,7 +1118,7 @@ static ssize_t btrfs_nodesize_show(struc + { + struct btrfs_fs_info *fs_info = to_fs_info(kobj); + +- return sysfs_emit(buf, "%u\n", fs_info->super_copy->nodesize); ++ return sysfs_emit(buf, "%u\n", fs_info->nodesize); + } + + BTRFS_ATTR(, nodesize, btrfs_nodesize_show); +@@ -1128,7 +1128,7 @@ static ssize_t btrfs_sectorsize_show(str + { + struct btrfs_fs_info *fs_info = to_fs_info(kobj); + +- return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize); ++ return sysfs_emit(buf, "%u\n", fs_info->sectorsize); + } + + BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show); +@@ -1180,7 +1180,7 @@ static ssize_t btrfs_clone_alignment_sho + { + struct btrfs_fs_info *fs_info = to_fs_info(kobj); + +- return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize); ++ return sysfs_emit(buf, "%u\n", fs_info->sectorsize); + } + + BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show); diff --git a/queue-6.12/series b/queue-6.12/series index 31ab97e1fb3..a1122f7b358 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -100,3 +100,10 @@ power-supply-cros_charge-control-allow-start_threshold-end_threshold.patch power-supply-cros_charge-control-hide-start-threshold-on-v2-cmd.patch power-supply-gpio-charger-fix-set-charge-current-limits.patch btrfs-fix-race-with-memory-mapped-writes-when-activating-swap-file.patch +btrfs-avoid-monopolizing-a-core-when-activating-a-swap-file.patch +btrfs-fix-swap-file-activation-failure-due-to-extents-that-used-to-be-shared.patch +btrfs-fix-transaction-atomicity-bug-when-enabling-simple-quotas.patch +btrfs-sysfs-fix-direct-super-block-member-reads.patch +btrfs-fix-use-after-free-when-cowing-tree-bock-and-tracing-is-enabled.patch +btrfs-check-folio-mapping-after-unlock-in-put_file_data.patch +btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch