From: Greg Kroah-Hartman Date: Mon, 11 Apr 2022 07:56:42 +0000 (+0200) Subject: 5.16-stable patches X-Git-Tag: v4.9.310~76 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=448ff4058e615a1f31dc6708c7e4f0cf0f266ff5;p=thirdparty%2Fkernel%2Fstable-queue.git 5.16-stable patches added patches: btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch io_uring-fix-race-between-timeout-flush-and-removal.patch io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch qed-fix-ethtool-register-dump.patch spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch x86-pm-save-the-msr-validity-status-at-context-setup.patch x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch --- diff --git a/queue-5.16/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch b/queue-5.16/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch new file mode 100644 index 00000000000..b27eaf06d8e --- /dev/null +++ b/queue-5.16/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch @@ -0,0 +1,107 @@ +From 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Tue, 15 Mar 2022 19:28:05 +0800 +Subject: btrfs: avoid defragging extents whose next extents are not targets + +From: Qu Wenruo + +commit 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a upstream. + +[BUG] +There is a report that autodefrag is defragging single sector, which +is completely waste of IO, and no help for defragging: + + btrfs-cleaner-808 defrag_one_locked_range: root=256 ino=651122 start=0 len=4096 + +[CAUSE] +In defrag_collect_targets(), we check if the current range (A) can be merged +with next one (B). + +If mergeable, we will add range A into target for defrag. + +However there is a catch for autodefrag, when checking mergeability +against range B, we intentionally pass 0 as @newer_than, hoping to get a +higher chance to merge with the next extent. + +But in the next iteration, range B will looked up by defrag_lookup_extent(), +with non-zero @newer_than. + +And if range B is not really newer, it will rejected directly, causing +only range A being defragged, while we expect to defrag both range A and +B. + +[FIX] +Since the root cause is the difference in check condition of +defrag_check_next_extent() and defrag_collect_targets(), we fix it by: + +1. Pass @newer_than to defrag_check_next_extent() +2. Pass @extent_thresh to defrag_check_next_extent() + +This makes the check between defrag_collect_targets() and +defrag_check_next_extent() more consistent. + +While there is still some minor difference, the remaining checks are +focus on runtime flags like writeback/delalloc, which are mostly +transient and safe to be checked only in defrag_collect_targets(). + +Link: https://github.com/btrfs/linux/issues/423#issuecomment-1066981856 +CC: stable@vger.kernel.org # 5.16+ +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ioctl.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -1189,7 +1189,7 @@ static u32 get_extent_max_capacity(const + } + + static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, +- bool locked) ++ u32 extent_thresh, u64 newer_than, bool locked) + { + struct extent_map *next; + bool ret = false; +@@ -1199,11 +1199,12 @@ static bool defrag_check_next_extent(str + return false; + + /* +- * We want to check if the next extent can be merged with the current +- * one, which can be an extent created in a past generation, so we pass +- * a minimum generation of 0 to defrag_lookup_extent(). ++ * Here we need to pass @newer_then when checking the next extent, or ++ * we will hit a case we mark current extent for defrag, but the next ++ * one will not be a target. ++ * This will just cause extra IO without really reducing the fragments. + */ +- next = defrag_lookup_extent(inode, em->start + em->len, 0, locked); ++ next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked); + /* No more em or hole */ + if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) + goto out; +@@ -1215,6 +1216,13 @@ static bool defrag_check_next_extent(str + */ + if (next->len >= get_extent_max_capacity(em)) + goto out; ++ /* Skip older extent */ ++ if (next->generation < newer_than) ++ goto out; ++ /* Also check extent size */ ++ if (next->len >= extent_thresh) ++ goto out; ++ + ret = true; + out: + free_extent_map(next); +@@ -1420,7 +1428,7 @@ static int defrag_collect_targets(struct + goto next; + + next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, +- locked); ++ extent_thresh, newer_than, locked); + if (!next_mergeable) { + struct defrag_target_range *last; + diff --git a/queue-5.16/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch b/queue-5.16/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch new file mode 100644 index 00000000000..d6394647760 --- /dev/null +++ b/queue-5.16/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch @@ -0,0 +1,93 @@ +From b642b52d0b50f4d398cb4293f64992d0eed2e2ce Mon Sep 17 00:00:00 2001 +From: Ethan Lien +Date: Mon, 7 Mar 2022 18:00:04 +0800 +Subject: btrfs: fix qgroup reserve overflow the qgroup limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ethan Lien + +commit b642b52d0b50f4d398cb4293f64992d0eed2e2ce upstream. + +We use extent_changeset->bytes_changed in qgroup_reserve_data() to record +how many bytes we set for EXTENT_QGROUP_RESERVED state. Currently the +bytes_changed is set as "unsigned int", and it will overflow if we try to +fallocate a range larger than 4GiB. The result is we reserve less bytes +and eventually break the qgroup limit. + +Unlike regular buffered/direct write, which we use one changeset for +each ordered extent, which can never be larger than 256M. For +fallocate, we use one changeset for the whole range, thus it no longer +respects the 256M per extent limit, and caused the problem. + +The following example test script reproduces the problem: + + $ cat qgroup-overflow.sh + #!/bin/bash + + DEV=/dev/sdj + MNT=/mnt/sdj + + mkfs.btrfs -f $DEV + mount $DEV $MNT + + # Set qgroup limit to 2GiB. + btrfs quota enable $MNT + btrfs qgroup limit 2G $MNT + + # Try to fallocate a 3GiB file. This should fail. + echo + echo "Try to fallocate a 3GiB file..." + fallocate -l 3G $MNT/3G.file + + # Try to fallocate a 5GiB file. + echo + echo "Try to fallocate a 5GiB file..." + fallocate -l 5G $MNT/5G.file + + # See we break the qgroup limit. + echo + sync + btrfs qgroup show -r $MNT + + umount $MNT + +When running the test: + + $ ./qgroup-overflow.sh + (...) + + Try to fallocate a 3GiB file... + fallocate: fallocate failed: Disk quota exceeded + + Try to fallocate a 5GiB file... + + qgroupid         rfer         excl     max_rfer + --------         ----         ----     -------- + 0/5           5.00GiB      5.00GiB      2.00GiB + +Since we have no control of how bytes_changed is used, it's better to +set it to u64. + +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Qu Wenruo +Signed-off-by: Ethan Lien +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/btrfs/extent_io.h ++++ b/fs/btrfs/extent_io.h +@@ -118,7 +118,7 @@ struct btrfs_bio_ctrl { + */ + struct extent_changeset { + /* How many bytes are set/cleared in this operation */ +- unsigned int bytes_changed; ++ u64 bytes_changed; + + /* Changed ranges */ + struct ulist range_changed; diff --git a/queue-5.16/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch b/queue-5.16/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch new file mode 100644 index 00000000000..55df19f0f38 --- /dev/null +++ b/queue-5.16/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch @@ -0,0 +1,91 @@ +From 60021bd754c6ca0addc6817994f20290a321d8d6 Mon Sep 17 00:00:00 2001 +From: Kaiwen Hu +Date: Wed, 23 Mar 2022 15:10:32 +0800 +Subject: btrfs: prevent subvol with swapfile from being deleted + +From: Kaiwen Hu + +commit 60021bd754c6ca0addc6817994f20290a321d8d6 upstream. + +A subvolume with an active swapfile must not be deleted otherwise it +would not be possible to deactivate it. + +After the subvolume is deleted, we cannot swapoff the swapfile in this +deleted subvolume because the path is unreachable. The swapfile is +still active and holding references, the filesystem cannot be unmounted. + +The test looks like this: + + mkfs.btrfs -f $dev > /dev/null + mount $dev $mnt + + btrfs sub create $mnt/subvol + touch $mnt/subvol/swapfile + chmod 600 $mnt/subvol/swapfile + chattr +C $mnt/subvol/swapfile + dd if=/dev/zero of=$mnt/subvol/swapfile bs=1K count=4096 + mkswap $mnt/subvol/swapfile + swapon $mnt/subvol/swapfile + + btrfs sub delete $mnt/subvol + swapoff $mnt/subvol/swapfile # failed: No such file or directory + swapoff --all + + unmount $mnt # target is busy. + +To prevent above issue, we simply check that whether the subvolume +contains any active swapfile, and stop the deleting process. This +behavior is like snapshot ioctl dealing with a swapfile. + +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Robbie Ko +Reviewed-by: Qu Wenruo +Reviewed-by: Filipe Manana +Signed-off-by: Kaiwen Hu +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/inode.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4462,6 +4462,13 @@ int btrfs_delete_subvolume(struct inode + dest->root_key.objectid); + return -EPERM; + } ++ if (atomic_read(&dest->nr_swapfiles)) { ++ spin_unlock(&dest->root_item_lock); ++ btrfs_warn(fs_info, ++ "attempt to delete subvolume %llu with active swapfile", ++ root->root_key.objectid); ++ return -EPERM; ++ } + root_flags = btrfs_root_flags(&dest->root_item); + btrfs_set_root_flags(&dest->root_item, + root_flags | BTRFS_ROOT_SUBVOL_DEAD); +@@ -10764,8 +10771,23 @@ static int btrfs_swap_activate(struct sw + * set. We use this counter to prevent snapshots. We must increment it + * before walking the extents because we don't want a concurrent + * snapshot to run after we've already checked the extents. +- */ ++ * ++ * It is possible that subvolume is marked for deletion but still not ++ * removed yet. To prevent this race, we check the root status before ++ * activating the swapfile. ++ */ ++ spin_lock(&root->root_item_lock); ++ if (btrfs_root_dead(root)) { ++ spin_unlock(&root->root_item_lock); ++ ++ btrfs_exclop_finish(fs_info); ++ btrfs_warn(fs_info, ++ "cannot activate swapfile because subvolume %llu is being deleted", ++ root->root_key.objectid); ++ return -EPERM; ++ } + atomic_inc(&root->nr_swapfiles); ++ spin_unlock(&root->root_item_lock); + + isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); + diff --git a/queue-5.16/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch b/queue-5.16/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch new file mode 100644 index 00000000000..553899409b6 --- /dev/null +++ b/queue-5.16/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch @@ -0,0 +1,216 @@ +From bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 Mon Sep 17 00:00:00 2001 +From: Qu Wenruo +Date: Tue, 8 Mar 2022 13:36:38 +0800 +Subject: btrfs: remove device item and update super block in the same transaction +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Qu Wenruo + +commit bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 upstream. + +[BUG] +There is a report that a btrfs has a bad super block num devices. + +This makes btrfs to reject the fs completely. + + BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here + BTRFS error (device sdd3): failed to read chunk tree: -22 + BTRFS error (device sdd3): open_ctree failed + +[CAUSE] +During btrfs device removal, chunk tree and super block num devs are +updated in two different transactions: + + btrfs_rm_device() + |- btrfs_rm_dev_item(device) + | |- trans = btrfs_start_transaction() + | | Now we got transaction X + | | + | |- btrfs_del_item() + | | Now device item is removed from chunk tree + | | + | |- btrfs_commit_transaction() + | Transaction X got committed, super num devs untouched, + | but device item removed from chunk tree. + | (AKA, super num devs is already incorrect) + | + |- cur_devices->num_devices--; + |- cur_devices->total_devices--; + |- btrfs_set_super_num_devices() + All those operations are not in transaction X, thus it will + only be written back to disk in next transaction. + +So after the transaction X in btrfs_rm_dev_item() committed, but before +transaction X+1 (which can be minutes away), a power loss happen, then +we got the super num mismatch. + +[FIX] +Instead of starting and committing a transaction inside +btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and +pass it to btrfs_rm_dev_item(). + +And only commit the transaction after everything is done. + +Reported-by: Luca Béla Palkovics +Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/ +CC: stable@vger.kernel.org # 4.14+ +Reviewed-by: Anand Jain +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/volumes.c | 65 ++++++++++++++++++++++------------------------------- + 1 file changed, 28 insertions(+), 37 deletions(-) + +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -1942,23 +1942,18 @@ static void update_dev_time(const char * + path_put(&path); + } + +-static int btrfs_rm_dev_item(struct btrfs_device *device) ++static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans, ++ struct btrfs_device *device) + { + struct btrfs_root *root = device->fs_info->chunk_root; + int ret; + struct btrfs_path *path; + struct btrfs_key key; +- struct btrfs_trans_handle *trans; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + +- trans = btrfs_start_transaction(root, 0); +- if (IS_ERR(trans)) { +- btrfs_free_path(path); +- return PTR_ERR(trans); +- } + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = device->devid; +@@ -1969,21 +1964,12 @@ static int btrfs_rm_dev_item(struct btrf + if (ret) { + if (ret > 0) + ret = -ENOENT; +- btrfs_abort_transaction(trans, ret); +- btrfs_end_transaction(trans); + goto out; + } + + ret = btrfs_del_item(trans, root, path); +- if (ret) { +- btrfs_abort_transaction(trans, ret); +- btrfs_end_transaction(trans); +- } +- + out: + btrfs_free_path(path); +- if (!ret) +- ret = btrfs_commit_transaction(trans); + return ret; + } + +@@ -2124,6 +2110,7 @@ int btrfs_rm_device(struct btrfs_fs_info + struct btrfs_dev_lookup_args *args, + struct block_device **bdev, fmode_t *mode) + { ++ struct btrfs_trans_handle *trans; + struct btrfs_device *device; + struct btrfs_fs_devices *cur_devices; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; +@@ -2139,7 +2126,7 @@ int btrfs_rm_device(struct btrfs_fs_info + + ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); + if (ret) +- goto out; ++ return ret; + + device = btrfs_find_device(fs_info->fs_devices, args); + if (!device) { +@@ -2147,27 +2134,22 @@ int btrfs_rm_device(struct btrfs_fs_info + ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; + else + ret = -ENOENT; +- goto out; ++ return ret; + } + + if (btrfs_pinned_by_swapfile(fs_info, device)) { + btrfs_warn_in_rcu(fs_info, + "cannot remove device %s (devid %llu) due to active swapfile", + rcu_str_deref(device->name), device->devid); +- ret = -ETXTBSY; +- goto out; ++ return -ETXTBSY; + } + +- if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { +- ret = BTRFS_ERROR_DEV_TGT_REPLACE; +- goto out; +- } ++ if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) ++ return BTRFS_ERROR_DEV_TGT_REPLACE; + + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && +- fs_info->fs_devices->rw_devices == 1) { +- ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; +- goto out; +- } ++ fs_info->fs_devices->rw_devices == 1) ++ return BTRFS_ERROR_DEV_ONLY_WRITABLE; + + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { + mutex_lock(&fs_info->chunk_mutex); +@@ -2182,14 +2164,22 @@ int btrfs_rm_device(struct btrfs_fs_info + if (ret) + goto error_undo; + +- /* +- * TODO: the superblock still includes this device in its num_devices +- * counter although write_all_supers() is not locked out. This +- * could give a filesystem state which requires a degraded mount. +- */ +- ret = btrfs_rm_dev_item(device); +- if (ret) ++ trans = btrfs_start_transaction(fs_info->chunk_root, 0); ++ if (IS_ERR(trans)) { ++ ret = PTR_ERR(trans); + goto error_undo; ++ } ++ ++ ret = btrfs_rm_dev_item(trans, device); ++ if (ret) { ++ /* Any error in dev item removal is critical */ ++ btrfs_crit(fs_info, ++ "failed to remove device item for devid %llu: %d", ++ device->devid, ret); ++ btrfs_abort_transaction(trans, ret); ++ btrfs_end_transaction(trans); ++ return ret; ++ } + + clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); + btrfs_scrub_cancel_dev(device); +@@ -2272,7 +2262,8 @@ int btrfs_rm_device(struct btrfs_fs_info + free_fs_devices(cur_devices); + } + +-out: ++ ret = btrfs_commit_transaction(trans); ++ + return ret; + + error_undo: +@@ -2284,7 +2275,7 @@ error_undo: + device->fs_devices->rw_devices++; + mutex_unlock(&fs_info->chunk_mutex); + } +- goto out; ++ return ret; + } + + void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) diff --git a/queue-5.16/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch b/queue-5.16/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch new file mode 100644 index 00000000000..dee0c10f2a4 --- /dev/null +++ b/queue-5.16/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch @@ -0,0 +1,151 @@ +From 0b9e66762aa0cda2a9c2d5542d64e04dac528fa6 Mon Sep 17 00:00:00 2001 +From: Johannes Thumshirn +Date: Mon, 7 Mar 2022 02:47:17 -0800 +Subject: btrfs: zoned: traverse devices under chunk_mutex in btrfs_can_activate_zone + +From: Johannes Thumshirn + +commit 0b9e66762aa0cda2a9c2d5542d64e04dac528fa6 upstream. + +btrfs_can_activate_zone() can be called with the device_list_mutex already +held, which will lead to a deadlock: + +insert_dev_extents() // Takes device_list_mutex +`-> insert_dev_extent() + `-> btrfs_insert_empty_item() + `-> btrfs_insert_empty_items() + `-> btrfs_search_slot() + `-> btrfs_cow_block() + `-> __btrfs_cow_block() + `-> btrfs_alloc_tree_block() + `-> btrfs_reserve_extent() + `-> find_free_extent() + `-> find_free_extent_update_loop() + `-> can_allocate_chunk() + `-> btrfs_can_activate_zone() // Takes device_list_mutex again + +Instead of using the RCU on fs_devices->device_list we +can use fs_devices->alloc_list, protected by the chunk_mutex to traverse +the list of active devices. + +We are in the chunk allocation thread. The newer chunk allocation +happens from the devices in the fs_device->alloc_list protected by the +chunk_mutex. + + btrfs_create_chunk() + lockdep_assert_held(&info->chunk_mutex); + gather_device_info + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) + +Also, a device that reappears after the mount won't join the alloc_list +yet and, it will be in the dev_list, which we don't want to consider in +the context of the chunk alloc. + + [15.166572] WARNING: possible recursive locking detected + [15.167117] 5.17.0-rc6-dennis #79 Not tainted + [15.167487] -------------------------------------------- + [15.167733] kworker/u8:3/146 is trying to acquire lock: + [15.167733] ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: find_free_extent+0x15a/0x14f0 [btrfs] + [15.167733] + [15.167733] but task is already holding lock: + [15.167733] ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_create_pending_block_groups+0x20a/0x560 [btrfs] + [15.167733] + [15.167733] other info that might help us debug this: + [15.167733] Possible unsafe locking scenario: + [15.167733] + [15.171834] CPU0 + [15.171834] ---- + [15.171834] lock(&fs_devs->device_list_mutex); + [15.171834] lock(&fs_devs->device_list_mutex); + [15.171834] + [15.171834] *** DEADLOCK *** + [15.171834] + [15.171834] May be due to missing lock nesting notation + [15.171834] + [15.171834] 5 locks held by kworker/u8:3/146: + [15.171834] #0: ffff888100050938 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x1c3/0x5a0 + [15.171834] #1: ffffc9000067be80 ((work_completion)(&fs_info->async_data_reclaim_work)){+.+.}-{0:0}, at: process_one_work+0x1c3/0x5a0 + [15.176244] #2: ffff88810521e620 (sb_internal){.+.+}-{0:0}, at: flush_space+0x335/0x600 [btrfs] + [15.176244] #3: ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_create_pending_block_groups+0x20a/0x560 [btrfs] + [15.176244] #4: ffff8881152e4b78 (btrfs-dev-00){++++}-{3:3}, at: __btrfs_tree_lock+0x27/0x130 [btrfs] + [15.179641] + [15.179641] stack backtrace: + [15.179641] CPU: 1 PID: 146 Comm: kworker/u8:3 Not tainted 5.17.0-rc6-dennis #79 + [15.179641] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1.fc35 04/01/2014 + [15.179641] Workqueue: events_unbound btrfs_async_reclaim_data_space [btrfs] + [15.179641] Call Trace: + [15.179641] + [15.179641] dump_stack_lvl+0x45/0x59 + [15.179641] __lock_acquire.cold+0x217/0x2b2 + [15.179641] lock_acquire+0xbf/0x2b0 + [15.183838] ? find_free_extent+0x15a/0x14f0 [btrfs] + [15.183838] __mutex_lock+0x8e/0x970 + [15.183838] ? find_free_extent+0x15a/0x14f0 [btrfs] + [15.183838] ? find_free_extent+0x15a/0x14f0 [btrfs] + [15.183838] ? lock_is_held_type+0xd7/0x130 + [15.183838] ? find_free_extent+0x15a/0x14f0 [btrfs] + [15.183838] find_free_extent+0x15a/0x14f0 [btrfs] + [15.183838] ? _raw_spin_unlock+0x24/0x40 + [15.183838] ? btrfs_get_alloc_profile+0x106/0x230 [btrfs] + [15.187601] btrfs_reserve_extent+0x131/0x260 [btrfs] + [15.187601] btrfs_alloc_tree_block+0xb5/0x3b0 [btrfs] + [15.187601] __btrfs_cow_block+0x138/0x600 [btrfs] + [15.187601] btrfs_cow_block+0x10f/0x230 [btrfs] + [15.187601] btrfs_search_slot+0x55f/0xbc0 [btrfs] + [15.187601] ? lock_is_held_type+0xd7/0x130 + [15.187601] btrfs_insert_empty_items+0x2d/0x60 [btrfs] + [15.187601] btrfs_create_pending_block_groups+0x2b3/0x560 [btrfs] + [15.187601] __btrfs_end_transaction+0x36/0x2a0 [btrfs] + [15.192037] flush_space+0x374/0x600 [btrfs] + [15.192037] ? find_held_lock+0x2b/0x80 + [15.192037] ? btrfs_async_reclaim_data_space+0x49/0x180 [btrfs] + [15.192037] ? lock_release+0x131/0x2b0 + [15.192037] btrfs_async_reclaim_data_space+0x70/0x180 [btrfs] + [15.192037] process_one_work+0x24c/0x5a0 + [15.192037] worker_thread+0x4a/0x3d0 + +Fixes: a85f05e59bc1 ("btrfs: zoned: avoid chunk allocation if active block group has enough space") +CC: stable@vger.kernel.org # 5.16+ +Reviewed-by: Anand Jain +Signed-off-by: Johannes Thumshirn +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/zoned.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -1936,18 +1936,19 @@ int btrfs_zone_finish(struct btrfs_block + + bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) + { ++ struct btrfs_fs_info *fs_info = fs_devices->fs_info; + struct btrfs_device *device; + bool ret = false; + +- if (!btrfs_is_zoned(fs_devices->fs_info)) ++ if (!btrfs_is_zoned(fs_info)) + return true; + + /* Non-single profiles are not supported yet */ + ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0); + + /* Check if there is a device with active zones left */ +- mutex_lock(&fs_devices->device_list_mutex); +- list_for_each_entry(device, &fs_devices->devices, dev_list) { ++ mutex_lock(&fs_info->chunk_mutex); ++ list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { + struct btrfs_zoned_device_info *zinfo = device->zone_info; + + if (!device->bdev) +@@ -1959,7 +1960,7 @@ bool btrfs_can_activate_zone(struct btrf + break; + } + } +- mutex_unlock(&fs_devices->device_list_mutex); ++ mutex_unlock(&fs_info->chunk_mutex); + + return ret; + } diff --git a/queue-5.16/io_uring-fix-race-between-timeout-flush-and-removal.patch b/queue-5.16/io_uring-fix-race-between-timeout-flush-and-removal.patch new file mode 100644 index 00000000000..755ef39991a --- /dev/null +++ b/queue-5.16/io_uring-fix-race-between-timeout-flush-and-removal.patch @@ -0,0 +1,56 @@ +From e677edbcabee849bfdd43f1602bccbecf736a646 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 8 Apr 2022 11:08:58 -0600 +Subject: io_uring: fix race between timeout flush and removal + +From: Jens Axboe + +commit e677edbcabee849bfdd43f1602bccbecf736a646 upstream. + +io_flush_timeouts() assumes the timeout isn't in progress of triggering +or being removed/canceled, so it unconditionally removes it from the +timeout list and attempts to cancel it. + +Leave it on the list and let the normal timeout cancelation take care +of it. + +Cc: stable@vger.kernel.org # 5.5+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -1614,12 +1614,11 @@ static __cold void io_flush_timeouts(str + __must_hold(&ctx->completion_lock) + { + u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); ++ struct io_kiocb *req, *tmp; + + spin_lock_irq(&ctx->timeout_lock); +- while (!list_empty(&ctx->timeout_list)) { ++ list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { + u32 events_needed, events_got; +- struct io_kiocb *req = list_first_entry(&ctx->timeout_list, +- struct io_kiocb, timeout.list); + + if (io_is_timeout_noseq(req)) + break; +@@ -1636,7 +1635,6 @@ static __cold void io_flush_timeouts(str + if (events_got < events_needed) + break; + +- list_del_init(&req->timeout.list); + io_kill_timeout(req, 0); + } + ctx->cq_last_tm_flush = seq; +@@ -6223,6 +6221,7 @@ static int io_timeout_prep(struct io_kio + if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) + return -EINVAL; + ++ INIT_LIST_HEAD(&req->timeout.list); + data->mode = io_translate_timeout_mode(flags); + hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); + diff --git a/queue-5.16/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch b/queue-5.16/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch new file mode 100644 index 00000000000..7e051e56027 --- /dev/null +++ b/queue-5.16/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch @@ -0,0 +1,39 @@ +From 0f5e4b83b37a96e3643951588ed7176b9b187c0a Mon Sep 17 00:00:00 2001 +From: Eugene Syromiatnikov +Date: Wed, 6 Apr 2022 13:55:33 +0200 +Subject: io_uring: implement compat handling for IORING_REGISTER_IOWQ_AFF + +From: Eugene Syromiatnikov + +commit 0f5e4b83b37a96e3643951588ed7176b9b187c0a upstream. + +Similarly to the way it is done im mbind syscall. + +Cc: stable@vger.kernel.org # 5.14 +Fixes: fe76421d1da1dcdb ("io_uring: allow user configurable IO thread CPU affinity") +Signed-off-by: Eugene Syromiatnikov +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -10799,7 +10799,15 @@ static __cold int io_register_iowq_aff(s + if (len > cpumask_size()) + len = cpumask_size(); + +- if (copy_from_user(new_mask, arg, len)) { ++ if (in_compat_syscall()) { ++ ret = compat_get_bitmap(cpumask_bits(new_mask), ++ (const compat_ulong_t __user *)arg, ++ len * 8 /* CHAR_BIT */); ++ } else { ++ ret = copy_from_user(new_mask, arg, len); ++ } ++ ++ if (ret) { + free_cpumask_var(new_mask); + return -EFAULT; + } diff --git a/queue-5.16/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch b/queue-5.16/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch new file mode 100644 index 00000000000..584600c7b9e --- /dev/null +++ b/queue-5.16/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch @@ -0,0 +1,36 @@ +From e590928de7547454469693da9bc7ffd562e54b7e Mon Sep 17 00:00:00 2001 +From: Kan Liang +Date: Mon, 28 Mar 2022 08:49:03 -0700 +Subject: perf/x86/intel: Update the FRONTEND MSR mask on Sapphire Rapids + +From: Kan Liang + +commit e590928de7547454469693da9bc7ffd562e54b7e upstream. + +On Sapphire Rapids, the FRONTEND_RETIRED.MS_FLOWS event requires the +FRONTEND MSR value 0x8. However, the current FRONTEND MSR mask doesn't +support it. + +Update intel_spr_extra_regs[] to support it. + +Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids") +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/1648482543-14923-2-git-send-email-kan.liang@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/events/intel/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -281,7 +281,7 @@ static struct extra_reg intel_spr_extra_ + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), +- INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), ++ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END diff --git a/queue-5.16/qed-fix-ethtool-register-dump.patch b/queue-5.16/qed-fix-ethtool-register-dump.patch new file mode 100644 index 00000000000..ae21eafe9a2 --- /dev/null +++ b/queue-5.16/qed-fix-ethtool-register-dump.patch @@ -0,0 +1,45 @@ +From 20921c0c86092b4082c91bd7c88305da74e5520b Mon Sep 17 00:00:00 2001 +From: Manish Chopra +Date: Fri, 1 Apr 2022 11:53:04 -0700 +Subject: qed: fix ethtool register dump + +From: Manish Chopra + +commit 20921c0c86092b4082c91bd7c88305da74e5520b upstream. + +To fix a coverity complain, commit d5ac07dfbd2b +("qed: Initialize debug string array") removed "sw-platform" +(one of the common global parameters) from the dump as this +was used in the dump with an uninitialized string, however +it did not reduce the number of common global parameters +which caused the incorrect (unable to parse) register dump + +this patch fixes it with reducing NUM_COMMON_GLOBAL_PARAMS +bye one. + +Cc: stable@vger.kernel.org +Cc: Tim Gardner +Cc: "David S. Miller" +Fixes: d5ac07dfbd2b ("qed: Initialize debug string array") +Signed-off-by: Prabhakar Kushwaha +Signed-off-by: Alok Prasad +Signed-off-by: Ariel Elior +Signed-off-by: Manish Chopra +Reviewed-by: Tim Gardner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/qlogic/qed/qed_debug.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c ++++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c +@@ -489,7 +489,7 @@ struct split_type_defs { + + #define STATIC_DEBUG_LINE_DWORDS 9 + +-#define NUM_COMMON_GLOBAL_PARAMS 11 ++#define NUM_COMMON_GLOBAL_PARAMS 10 + + #define MAX_RECURSION_DEPTH 10 + diff --git a/queue-5.16/series b/queue-5.16/series index bb2d7b91f97..df8426b4cf6 100644 --- a/queue-5.16/series +++ b/queue-5.16/series @@ -223,3 +223,15 @@ mmmremap.c-avoid-pointless-invalidate_range_start-end-on-mremap-old_size-0.patch mm-mempolicy-fix-mpol_new-leak-in-shared_policy_replace.patch io_uring-don-t-check-req-file-in-io_fsync_prep.patch io_uring-defer-splice-tee-file-validity-check-until-command-issue.patch +io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch +io_uring-fix-race-between-timeout-flush-and-removal.patch +x86-pm-save-the-msr-validity-status-at-context-setup.patch +x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch +perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch +btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch +btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch +btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch +btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch +btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch +spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch +qed-fix-ethtool-register-dump.patch diff --git a/queue-5.16/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch b/queue-5.16/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch new file mode 100644 index 00000000000..51f2be3ebfb --- /dev/null +++ b/queue-5.16/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch @@ -0,0 +1,41 @@ +From 409543cec01a84610029d6440c480c3fdd7214fb Mon Sep 17 00:00:00 2001 +From: Vinod Koul +Date: Wed, 6 Apr 2022 18:52:38 +0530 +Subject: spi: core: add dma_map_dev for __spi_unmap_msg() + +From: Vinod Koul + +commit 409543cec01a84610029d6440c480c3fdd7214fb upstream. + +Commit b470e10eb43f ("spi: core: add dma_map_dev for dma device") added +dma_map_dev for _spi_map_msg() but missed to add for unmap routine, +__spi_unmap_msg(), so add it now. + +Fixes: b470e10eb43f ("spi: core: add dma_map_dev for dma device") +Cc: stable@vger.kernel.org # v5.14+ +Signed-off-by: Vinod Koul +Link: https://lore.kernel.org/r/20220406132238.1029249-1-vkoul@kernel.org +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + drivers/spi/spi.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/spi/spi.c ++++ b/drivers/spi/spi.c +@@ -1151,11 +1151,15 @@ static int __spi_unmap_msg(struct spi_co + + if (ctlr->dma_tx) + tx_dev = ctlr->dma_tx->device->dev; ++ else if (ctlr->dma_map_dev) ++ tx_dev = ctlr->dma_map_dev; + else + tx_dev = ctlr->dev.parent; + + if (ctlr->dma_rx) + rx_dev = ctlr->dma_rx->device->dev; ++ else if (ctlr->dma_map_dev) ++ rx_dev = ctlr->dma_map_dev; + else + rx_dev = ctlr->dev.parent; + diff --git a/queue-5.16/x86-pm-save-the-msr-validity-status-at-context-setup.patch b/queue-5.16/x86-pm-save-the-msr-validity-status-at-context-setup.patch new file mode 100644 index 00000000000..47adc223fcc --- /dev/null +++ b/queue-5.16/x86-pm-save-the-msr-validity-status-at-context-setup.patch @@ -0,0 +1,55 @@ +From 73924ec4d560257004d5b5116b22a3647661e364 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Mon, 4 Apr 2022 17:34:19 -0700 +Subject: x86/pm: Save the MSR validity status at context setup + +From: Pawan Gupta + +commit 73924ec4d560257004d5b5116b22a3647661e364 upstream. + +The mechanism to save/restore MSRs during S3 suspend/resume checks for +the MSR validity during suspend, and only restores the MSR if its a +valid MSR. This is not optimal, as an invalid MSR will unnecessarily +throw an exception for every suspend cycle. The more invalid MSRs, +higher the impact will be. + +Check and save the MSR validity at setup. This ensures that only valid +MSRs that are guaranteed to not throw an exception will be attempted +during suspend. + +Fixes: 7a9c2dd08ead ("x86/pm: Introduce quirk framework to save/restore extra MSR registers around suspend/resume") +Suggested-by: Dave Hansen +Signed-off-by: Pawan Gupta +Reviewed-by: Dave Hansen +Acked-by: Borislav Petkov +Cc: stable@vger.kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/power/cpu.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/power/cpu.c ++++ b/arch/x86/power/cpu.c +@@ -40,7 +40,8 @@ static void msr_save_context(struct save + struct saved_msr *end = msr + ctxt->saved_msrs.num; + + while (msr < end) { +- msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q); ++ if (msr->valid) ++ rdmsrl(msr->info.msr_no, msr->info.reg.q); + msr++; + } + } +@@ -424,8 +425,10 @@ static int msr_build_context(const u32 * + } + + for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { ++ u64 dummy; ++ + msr_array[i].info.msr_no = msr_id[j]; +- msr_array[i].valid = false; ++ msr_array[i].valid = !rdmsrl_safe(msr_id[j], &dummy); + msr_array[i].info.reg.q = 0; + } + saved_msrs->num = total_num; diff --git a/queue-5.16/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch b/queue-5.16/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch new file mode 100644 index 00000000000..807ce86533b --- /dev/null +++ b/queue-5.16/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch @@ -0,0 +1,60 @@ +From e2a1256b17b16f9b9adf1b6fea56819e7b68e463 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Mon, 4 Apr 2022 17:35:45 -0700 +Subject: x86/speculation: Restore speculation related MSRs during S3 resume + +From: Pawan Gupta + +commit e2a1256b17b16f9b9adf1b6fea56819e7b68e463 upstream. + +After resuming from suspend-to-RAM, the MSRs that control CPU's +speculative execution behavior are not being restored on the boot CPU. + +These MSRs are used to mitigate speculative execution vulnerabilities. +Not restoring them correctly may leave the CPU vulnerable. Secondary +CPU's MSRs are correctly being restored at S3 resume by +identify_secondary_cpu(). + +During S3 resume, restore these MSRs for boot CPU when restoring its +processor state. + +Fixes: 772439717dbf ("x86/bugs/intel: Set proper CPU features and setup RDS") +Reported-by: Neelima Krishnan +Signed-off-by: Pawan Gupta +Tested-by: Neelima Krishnan +Acked-by: Borislav Petkov +Reviewed-by: Dave Hansen +Cc: stable@vger.kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/power/cpu.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/arch/x86/power/cpu.c ++++ b/arch/x86/power/cpu.c +@@ -503,10 +503,24 @@ static int pm_cpu_check(const struct x86 + return ret; + } + ++static void pm_save_spec_msr(void) ++{ ++ u32 spec_msr_id[] = { ++ MSR_IA32_SPEC_CTRL, ++ MSR_IA32_TSX_CTRL, ++ MSR_TSX_FORCE_ABORT, ++ MSR_IA32_MCU_OPT_CTRL, ++ MSR_AMD64_LS_CFG, ++ }; ++ ++ msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); ++} ++ + static int pm_check_save_msr(void) + { + dmi_check_system(msr_save_dmi_table); + pm_cpu_check(msr_save_cpu_table); ++ pm_save_spec_msr(); + + return 0; + }