From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 11 Apr 2022 07:56:42 +0000 (+0200)
Subject: 5.16-stable patches
X-Git-Tag: v4.9.310~76
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=448ff4058e615a1f31dc6708c7e4f0cf0f266ff5;p=thirdparty%2Fkernel%2Fstable-queue.git

5.16-stable patches

added patches:
	btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch
	btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
	btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
	btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch
	btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch
	io_uring-fix-race-between-timeout-flush-and-removal.patch
	io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch
	perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch
	qed-fix-ethtool-register-dump.patch
	spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch
	x86-pm-save-the-msr-validity-status-at-context-setup.patch
	x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch
---

diff --git a/queue-5.16/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch b/queue-5.16/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch
new file mode 100644
index 00000000000..b27eaf06d8e
--- /dev/null
+++ b/queue-5.16/btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch
@@ -0,0 +1,107 @@
+From 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 15 Mar 2022 19:28:05 +0800
+Subject: btrfs: avoid defragging extents whose next extents are not targets
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit 75a36a7d3ea904cef2e5b56af0c58cc60dcf947a upstream.
+
+[BUG]
+There is a report that autodefrag is defragging single sector, which
+is completely waste of IO, and no help for defragging:
+
+   btrfs-cleaner-808 defrag_one_locked_range: root=256 ino=651122 start=0 len=4096
+
+[CAUSE]
+In defrag_collect_targets(), we check if the current range (A) can be merged
+with next one (B).
+
+If mergeable, we will add range A into target for defrag.
+
+However there is a catch for autodefrag, when checking mergeability
+against range B, we intentionally pass 0 as @newer_than, hoping to get a
+higher chance to merge with the next extent.
+
+But in the next iteration, range B will looked up by defrag_lookup_extent(),
+with non-zero @newer_than.
+
+And if range B is not really newer, it will rejected directly, causing
+only range A being defragged, while we expect to defrag both range A and
+B.
+
+[FIX]
+Since the root cause is the difference in check condition of
+defrag_check_next_extent() and defrag_collect_targets(), we fix it by:
+
+1. Pass @newer_than to defrag_check_next_extent()
+2. Pass @extent_thresh to defrag_check_next_extent()
+
+This makes the check between defrag_collect_targets() and
+defrag_check_next_extent() more consistent.
+
+While there is still some minor difference, the remaining checks are
+focus on runtime flags like writeback/delalloc, which are mostly
+transient and safe to be checked only in defrag_collect_targets().
+
+Link: https://github.com/btrfs/linux/issues/423#issuecomment-1066981856
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ioctl.c |   20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1189,7 +1189,7 @@ static u32 get_extent_max_capacity(const
+ }
+ 
+ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
+-				     bool locked)
++				     u32 extent_thresh, u64 newer_than, bool locked)
+ {
+ 	struct extent_map *next;
+ 	bool ret = false;
+@@ -1199,11 +1199,12 @@ static bool defrag_check_next_extent(str
+ 		return false;
+ 
+ 	/*
+-	 * We want to check if the next extent can be merged with the current
+-	 * one, which can be an extent created in a past generation, so we pass
+-	 * a minimum generation of 0 to defrag_lookup_extent().
++	 * Here we need to pass @newer_then when checking the next extent, or
++	 * we will hit a case we mark current extent for defrag, but the next
++	 * one will not be a target.
++	 * This will just cause extra IO without really reducing the fragments.
+ 	 */
+-	next = defrag_lookup_extent(inode, em->start + em->len, 0, locked);
++	next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked);
+ 	/* No more em or hole */
+ 	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
+ 		goto out;
+@@ -1215,6 +1216,13 @@ static bool defrag_check_next_extent(str
+ 	 */
+ 	if (next->len >= get_extent_max_capacity(em))
+ 		goto out;
++	/* Skip older extent */
++	if (next->generation < newer_than)
++		goto out;
++	/* Also check extent size */
++	if (next->len >= extent_thresh)
++		goto out;
++
+ 	ret = true;
+ out:
+ 	free_extent_map(next);
+@@ -1420,7 +1428,7 @@ static int defrag_collect_targets(struct
+ 			goto next;
+ 
+ 		next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
+-							  locked);
++						extent_thresh, newer_than, locked);
+ 		if (!next_mergeable) {
+ 			struct defrag_target_range *last;
+ 
diff --git a/queue-5.16/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch b/queue-5.16/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
new file mode 100644
index 00000000000..d6394647760
--- /dev/null
+++ b/queue-5.16/btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
@@ -0,0 +1,93 @@
+From b642b52d0b50f4d398cb4293f64992d0eed2e2ce Mon Sep 17 00:00:00 2001
+From: Ethan Lien <ethanlien@synology.com>
+Date: Mon, 7 Mar 2022 18:00:04 +0800
+Subject: btrfs: fix qgroup reserve overflow the qgroup limit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ethan Lien <ethanlien@synology.com>
+
+commit b642b52d0b50f4d398cb4293f64992d0eed2e2ce upstream.
+
+We use extent_changeset->bytes_changed in qgroup_reserve_data() to record
+how many bytes we set for EXTENT_QGROUP_RESERVED state. Currently the
+bytes_changed is set as "unsigned int", and it will overflow if we try to
+fallocate a range larger than 4GiB. The result is we reserve less bytes
+and eventually break the qgroup limit.
+
+Unlike regular buffered/direct write, which we use one changeset for
+each ordered extent, which can never be larger than 256M.  For
+fallocate, we use one changeset for the whole range, thus it no longer
+respects the 256M per extent limit, and caused the problem.
+
+The following example test script reproduces the problem:
+
+  $ cat qgroup-overflow.sh
+  #!/bin/bash
+
+  DEV=/dev/sdj
+  MNT=/mnt/sdj
+
+  mkfs.btrfs -f $DEV
+  mount $DEV $MNT
+
+  # Set qgroup limit to 2GiB.
+  btrfs quota enable $MNT
+  btrfs qgroup limit 2G $MNT
+
+  # Try to fallocate a 3GiB file. This should fail.
+  echo
+  echo "Try to fallocate a 3GiB file..."
+  fallocate -l 3G $MNT/3G.file
+
+  # Try to fallocate a 5GiB file.
+  echo
+  echo "Try to fallocate a 5GiB file..."
+  fallocate -l 5G $MNT/5G.file
+
+  # See we break the qgroup limit.
+  echo
+  sync
+  btrfs qgroup show -r $MNT
+
+  umount $MNT
+
+When running the test:
+
+  $ ./qgroup-overflow.sh
+  (...)
+
+  Try to fallocate a 3GiB file...
+  fallocate: fallocate failed: Disk quota exceeded
+
+  Try to fallocate a 5GiB file...
+
+  qgroupidÂ Â Â Â Â Â Â Â  rferÂ Â Â Â Â Â Â Â  exclÂ Â Â Â  max_rfer
+  --------Â Â Â Â Â Â Â Â  ----Â Â Â Â Â Â Â Â  ----Â Â Â Â  --------
+  0/5Â Â Â Â Â Â Â Â Â Â  5.00GiBÂ Â Â Â Â  5.00GiBÂ Â Â Â Â  2.00GiB
+
+Since we have no control of how bytes_changed is used, it's better to
+set it to u64.
+
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Ethan Lien <ethanlien@synology.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent_io.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/extent_io.h
++++ b/fs/btrfs/extent_io.h
+@@ -118,7 +118,7 @@ struct btrfs_bio_ctrl {
+  */
+ struct extent_changeset {
+ 	/* How many bytes are set/cleared in this operation */
+-	unsigned int bytes_changed;
++	u64 bytes_changed;
+ 
+ 	/* Changed ranges */
+ 	struct ulist range_changed;
diff --git a/queue-5.16/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch b/queue-5.16/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
new file mode 100644
index 00000000000..55df19f0f38
--- /dev/null
+++ b/queue-5.16/btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
@@ -0,0 +1,91 @@
+From 60021bd754c6ca0addc6817994f20290a321d8d6 Mon Sep 17 00:00:00 2001
+From: Kaiwen Hu <kevinhu@synology.com>
+Date: Wed, 23 Mar 2022 15:10:32 +0800
+Subject: btrfs: prevent subvol with swapfile from being deleted
+
+From: Kaiwen Hu <kevinhu@synology.com>
+
+commit 60021bd754c6ca0addc6817994f20290a321d8d6 upstream.
+
+A subvolume with an active swapfile must not be deleted otherwise it
+would not be possible to deactivate it.
+
+After the subvolume is deleted, we cannot swapoff the swapfile in this
+deleted subvolume because the path is unreachable.  The swapfile is
+still active and holding references, the filesystem cannot be unmounted.
+
+The test looks like this:
+
+  mkfs.btrfs -f $dev > /dev/null
+  mount $dev $mnt
+
+  btrfs sub create $mnt/subvol
+  touch $mnt/subvol/swapfile
+  chmod 600 $mnt/subvol/swapfile
+  chattr +C $mnt/subvol/swapfile
+  dd if=/dev/zero of=$mnt/subvol/swapfile bs=1K count=4096
+  mkswap $mnt/subvol/swapfile
+  swapon $mnt/subvol/swapfile
+
+  btrfs sub delete $mnt/subvol
+  swapoff $mnt/subvol/swapfile  # failed: No such file or directory
+  swapoff --all
+
+  unmount $mnt                  # target is busy.
+
+To prevent above issue, we simply check that whether the subvolume
+contains any active swapfile, and stop the deleting process.  This
+behavior is like snapshot ioctl dealing with a swapfile.
+
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Robbie Ko <robbieko@synology.com>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Kaiwen Hu <kevinhu@synology.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/inode.c |   24 +++++++++++++++++++++++-
+ 1 file changed, 23 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -4462,6 +4462,13 @@ int btrfs_delete_subvolume(struct inode
+ 			   dest->root_key.objectid);
+ 		return -EPERM;
+ 	}
++	if (atomic_read(&dest->nr_swapfiles)) {
++		spin_unlock(&dest->root_item_lock);
++		btrfs_warn(fs_info,
++			   "attempt to delete subvolume %llu with active swapfile",
++			   root->root_key.objectid);
++		return -EPERM;
++	}
+ 	root_flags = btrfs_root_flags(&dest->root_item);
+ 	btrfs_set_root_flags(&dest->root_item,
+ 			     root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+@@ -10764,8 +10771,23 @@ static int btrfs_swap_activate(struct sw
+ 	 * set. We use this counter to prevent snapshots. We must increment it
+ 	 * before walking the extents because we don't want a concurrent
+ 	 * snapshot to run after we've already checked the extents.
+-	 */
++	 *
++	 * It is possible that subvolume is marked for deletion but still not
++	 * removed yet. To prevent this race, we check the root status before
++	 * activating the swapfile.
++	 */
++	spin_lock(&root->root_item_lock);
++	if (btrfs_root_dead(root)) {
++		spin_unlock(&root->root_item_lock);
++
++		btrfs_exclop_finish(fs_info);
++		btrfs_warn(fs_info,
++		"cannot activate swapfile because subvolume %llu is being deleted",
++			root->root_key.objectid);
++		return -EPERM;
++	}
+ 	atomic_inc(&root->nr_swapfiles);
++	spin_unlock(&root->root_item_lock);
+ 
+ 	isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
+ 
diff --git a/queue-5.16/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch b/queue-5.16/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch
new file mode 100644
index 00000000000..553899409b6
--- /dev/null
+++ b/queue-5.16/btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch
@@ -0,0 +1,216 @@
+From bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 Mon Sep 17 00:00:00 2001
+From: Qu Wenruo <wqu@suse.com>
+Date: Tue, 8 Mar 2022 13:36:38 +0800
+Subject: btrfs: remove device item and update super block in the same transaction
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Qu Wenruo <wqu@suse.com>
+
+commit bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 upstream.
+
+[BUG]
+There is a report that a btrfs has a bad super block num devices.
+
+This makes btrfs to reject the fs completely.
+
+  BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here
+  BTRFS error (device sdd3): failed to read chunk tree: -22
+  BTRFS error (device sdd3): open_ctree failed
+
+[CAUSE]
+During btrfs device removal, chunk tree and super block num devs are
+updated in two different transactions:
+
+  btrfs_rm_device()
+  |- btrfs_rm_dev_item(device)
+  |  |- trans = btrfs_start_transaction()
+  |  |  Now we got transaction X
+  |  |
+  |  |- btrfs_del_item()
+  |  |  Now device item is removed from chunk tree
+  |  |
+  |  |- btrfs_commit_transaction()
+  |     Transaction X got committed, super num devs untouched,
+  |     but device item removed from chunk tree.
+  |     (AKA, super num devs is already incorrect)
+  |
+  |- cur_devices->num_devices--;
+  |- cur_devices->total_devices--;
+  |- btrfs_set_super_num_devices()
+     All those operations are not in transaction X, thus it will
+     only be written back to disk in next transaction.
+
+So after the transaction X in btrfs_rm_dev_item() committed, but before
+transaction X+1 (which can be minutes away), a power loss happen, then
+we got the super num mismatch.
+
+[FIX]
+Instead of starting and committing a transaction inside
+btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
+pass it to btrfs_rm_dev_item().
+
+And only commit the transaction after everything is done.
+
+Reported-by: Luca BÃ©la Palkovics <luca.bela.palkovics@gmail.com>
+Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46CKmp3igr44okQ@mail.gmail.com/
+CC: stable@vger.kernel.org # 4.14+
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/volumes.c |   65 ++++++++++++++++++++++-------------------------------
+ 1 file changed, 28 insertions(+), 37 deletions(-)
+
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -1942,23 +1942,18 @@ static void update_dev_time(const char *
+ 	path_put(&path);
+ }
+ 
+-static int btrfs_rm_dev_item(struct btrfs_device *device)
++static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
++			     struct btrfs_device *device)
+ {
+ 	struct btrfs_root *root = device->fs_info->chunk_root;
+ 	int ret;
+ 	struct btrfs_path *path;
+ 	struct btrfs_key key;
+-	struct btrfs_trans_handle *trans;
+ 
+ 	path = btrfs_alloc_path();
+ 	if (!path)
+ 		return -ENOMEM;
+ 
+-	trans = btrfs_start_transaction(root, 0);
+-	if (IS_ERR(trans)) {
+-		btrfs_free_path(path);
+-		return PTR_ERR(trans);
+-	}
+ 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ 	key.type = BTRFS_DEV_ITEM_KEY;
+ 	key.offset = device->devid;
+@@ -1969,21 +1964,12 @@ static int btrfs_rm_dev_item(struct btrf
+ 	if (ret) {
+ 		if (ret > 0)
+ 			ret = -ENOENT;
+-		btrfs_abort_transaction(trans, ret);
+-		btrfs_end_transaction(trans);
+ 		goto out;
+ 	}
+ 
+ 	ret = btrfs_del_item(trans, root, path);
+-	if (ret) {
+-		btrfs_abort_transaction(trans, ret);
+-		btrfs_end_transaction(trans);
+-	}
+-
+ out:
+ 	btrfs_free_path(path);
+-	if (!ret)
+-		ret = btrfs_commit_transaction(trans);
+ 	return ret;
+ }
+ 
+@@ -2124,6 +2110,7 @@ int btrfs_rm_device(struct btrfs_fs_info
+ 		    struct btrfs_dev_lookup_args *args,
+ 		    struct block_device **bdev, fmode_t *mode)
+ {
++	struct btrfs_trans_handle *trans;
+ 	struct btrfs_device *device;
+ 	struct btrfs_fs_devices *cur_devices;
+ 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+@@ -2139,7 +2126,7 @@ int btrfs_rm_device(struct btrfs_fs_info
+ 
+ 	ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
+ 	if (ret)
+-		goto out;
++		return ret;
+ 
+ 	device = btrfs_find_device(fs_info->fs_devices, args);
+ 	if (!device) {
+@@ -2147,27 +2134,22 @@ int btrfs_rm_device(struct btrfs_fs_info
+ 			ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
+ 		else
+ 			ret = -ENOENT;
+-		goto out;
++		return ret;
+ 	}
+ 
+ 	if (btrfs_pinned_by_swapfile(fs_info, device)) {
+ 		btrfs_warn_in_rcu(fs_info,
+ 		  "cannot remove device %s (devid %llu) due to active swapfile",
+ 				  rcu_str_deref(device->name), device->devid);
+-		ret = -ETXTBSY;
+-		goto out;
++		return -ETXTBSY;
+ 	}
+ 
+-	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
+-		ret = BTRFS_ERROR_DEV_TGT_REPLACE;
+-		goto out;
+-	}
++	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
++		return BTRFS_ERROR_DEV_TGT_REPLACE;
+ 
+ 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
+-	    fs_info->fs_devices->rw_devices == 1) {
+-		ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
+-		goto out;
+-	}
++	    fs_info->fs_devices->rw_devices == 1)
++		return BTRFS_ERROR_DEV_ONLY_WRITABLE;
+ 
+ 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
+ 		mutex_lock(&fs_info->chunk_mutex);
+@@ -2182,14 +2164,22 @@ int btrfs_rm_device(struct btrfs_fs_info
+ 	if (ret)
+ 		goto error_undo;
+ 
+-	/*
+-	 * TODO: the superblock still includes this device in its num_devices
+-	 * counter although write_all_supers() is not locked out. This
+-	 * could give a filesystem state which requires a degraded mount.
+-	 */
+-	ret = btrfs_rm_dev_item(device);
+-	if (ret)
++	trans = btrfs_start_transaction(fs_info->chunk_root, 0);
++	if (IS_ERR(trans)) {
++		ret = PTR_ERR(trans);
+ 		goto error_undo;
++	}
++
++	ret = btrfs_rm_dev_item(trans, device);
++	if (ret) {
++		/* Any error in dev item removal is critical */
++		btrfs_crit(fs_info,
++			   "failed to remove device item for devid %llu: %d",
++			   device->devid, ret);
++		btrfs_abort_transaction(trans, ret);
++		btrfs_end_transaction(trans);
++		return ret;
++	}
+ 
+ 	clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+ 	btrfs_scrub_cancel_dev(device);
+@@ -2272,7 +2262,8 @@ int btrfs_rm_device(struct btrfs_fs_info
+ 		free_fs_devices(cur_devices);
+ 	}
+ 
+-out:
++	ret = btrfs_commit_transaction(trans);
++
+ 	return ret;
+ 
+ error_undo:
+@@ -2284,7 +2275,7 @@ error_undo:
+ 		device->fs_devices->rw_devices++;
+ 		mutex_unlock(&fs_info->chunk_mutex);
+ 	}
+-	goto out;
++	return ret;
+ }
+ 
+ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
diff --git a/queue-5.16/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch b/queue-5.16/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch
new file mode 100644
index 00000000000..dee0c10f2a4
--- /dev/null
+++ b/queue-5.16/btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch
@@ -0,0 +1,151 @@
+From 0b9e66762aa0cda2a9c2d5542d64e04dac528fa6 Mon Sep 17 00:00:00 2001
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Date: Mon, 7 Mar 2022 02:47:17 -0800
+Subject: btrfs: zoned: traverse devices under chunk_mutex in btrfs_can_activate_zone
+
+From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+
+commit 0b9e66762aa0cda2a9c2d5542d64e04dac528fa6 upstream.
+
+btrfs_can_activate_zone() can be called with the device_list_mutex already
+held, which will lead to a deadlock:
+
+insert_dev_extents() // Takes device_list_mutex
+`-> insert_dev_extent()
+ `-> btrfs_insert_empty_item()
+  `-> btrfs_insert_empty_items()
+   `-> btrfs_search_slot()
+    `-> btrfs_cow_block()
+     `-> __btrfs_cow_block()
+      `-> btrfs_alloc_tree_block()
+       `-> btrfs_reserve_extent()
+        `-> find_free_extent()
+         `-> find_free_extent_update_loop()
+          `-> can_allocate_chunk()
+           `-> btrfs_can_activate_zone() // Takes device_list_mutex again
+
+Instead of using the RCU on fs_devices->device_list we
+can use fs_devices->alloc_list, protected by the chunk_mutex to traverse
+the list of active devices.
+
+We are in the chunk allocation thread. The newer chunk allocation
+happens from the devices in the fs_device->alloc_list protected by the
+chunk_mutex.
+
+  btrfs_create_chunk()
+    lockdep_assert_held(&info->chunk_mutex);
+    gather_device_info
+      list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list)
+
+Also, a device that reappears after the mount won't join the alloc_list
+yet and, it will be in the dev_list, which we don't want to consider in
+the context of the chunk alloc.
+
+  [15.166572] WARNING: possible recursive locking detected
+  [15.167117] 5.17.0-rc6-dennis #79 Not tainted
+  [15.167487] --------------------------------------------
+  [15.167733] kworker/u8:3/146 is trying to acquire lock:
+  [15.167733] ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.167733]
+  [15.167733] but task is already holding lock:
+  [15.167733] ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_create_pending_block_groups+0x20a/0x560 [btrfs]
+  [15.167733]
+  [15.167733] other info that might help us debug this:
+  [15.167733]  Possible unsafe locking scenario:
+  [15.167733]
+  [15.171834]        CPU0
+  [15.171834]        ----
+  [15.171834]   lock(&fs_devs->device_list_mutex);
+  [15.171834]   lock(&fs_devs->device_list_mutex);
+  [15.171834]
+  [15.171834]  *** DEADLOCK ***
+  [15.171834]
+  [15.171834]  May be due to missing lock nesting notation
+  [15.171834]
+  [15.171834] 5 locks held by kworker/u8:3/146:
+  [15.171834]  #0: ffff888100050938 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x1c3/0x5a0
+  [15.171834]  #1: ffffc9000067be80 ((work_completion)(&fs_info->async_data_reclaim_work)){+.+.}-{0:0}, at: process_one_work+0x1c3/0x5a0
+  [15.176244]  #2: ffff88810521e620 (sb_internal){.+.+}-{0:0}, at: flush_space+0x335/0x600 [btrfs]
+  [15.176244]  #3: ffff888102962ee0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: btrfs_create_pending_block_groups+0x20a/0x560 [btrfs]
+  [15.176244]  #4: ffff8881152e4b78 (btrfs-dev-00){++++}-{3:3}, at: __btrfs_tree_lock+0x27/0x130 [btrfs]
+  [15.179641]
+  [15.179641] stack backtrace:
+  [15.179641] CPU: 1 PID: 146 Comm: kworker/u8:3 Not tainted 5.17.0-rc6-dennis #79
+  [15.179641] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1.fc35 04/01/2014
+  [15.179641] Workqueue: events_unbound btrfs_async_reclaim_data_space [btrfs]
+  [15.179641] Call Trace:
+  [15.179641]  <TASK>
+  [15.179641]  dump_stack_lvl+0x45/0x59
+  [15.179641]  __lock_acquire.cold+0x217/0x2b2
+  [15.179641]  lock_acquire+0xbf/0x2b0
+  [15.183838]  ? find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  __mutex_lock+0x8e/0x970
+  [15.183838]  ? find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  ? find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  ? lock_is_held_type+0xd7/0x130
+  [15.183838]  ? find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  find_free_extent+0x15a/0x14f0 [btrfs]
+  [15.183838]  ? _raw_spin_unlock+0x24/0x40
+  [15.183838]  ? btrfs_get_alloc_profile+0x106/0x230 [btrfs]
+  [15.187601]  btrfs_reserve_extent+0x131/0x260 [btrfs]
+  [15.187601]  btrfs_alloc_tree_block+0xb5/0x3b0 [btrfs]
+  [15.187601]  __btrfs_cow_block+0x138/0x600 [btrfs]
+  [15.187601]  btrfs_cow_block+0x10f/0x230 [btrfs]
+  [15.187601]  btrfs_search_slot+0x55f/0xbc0 [btrfs]
+  [15.187601]  ? lock_is_held_type+0xd7/0x130
+  [15.187601]  btrfs_insert_empty_items+0x2d/0x60 [btrfs]
+  [15.187601]  btrfs_create_pending_block_groups+0x2b3/0x560 [btrfs]
+  [15.187601]  __btrfs_end_transaction+0x36/0x2a0 [btrfs]
+  [15.192037]  flush_space+0x374/0x600 [btrfs]
+  [15.192037]  ? find_held_lock+0x2b/0x80
+  [15.192037]  ? btrfs_async_reclaim_data_space+0x49/0x180 [btrfs]
+  [15.192037]  ? lock_release+0x131/0x2b0
+  [15.192037]  btrfs_async_reclaim_data_space+0x70/0x180 [btrfs]
+  [15.192037]  process_one_work+0x24c/0x5a0
+  [15.192037]  worker_thread+0x4a/0x3d0
+
+Fixes: a85f05e59bc1 ("btrfs: zoned: avoid chunk allocation if active block group has enough space")
+CC: stable@vger.kernel.org # 5.16+
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |    9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1936,18 +1936,19 @@ int btrfs_zone_finish(struct btrfs_block
+ 
+ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
+ {
++	struct btrfs_fs_info *fs_info = fs_devices->fs_info;
+ 	struct btrfs_device *device;
+ 	bool ret = false;
+ 
+-	if (!btrfs_is_zoned(fs_devices->fs_info))
++	if (!btrfs_is_zoned(fs_info))
+ 		return true;
+ 
+ 	/* Non-single profiles are not supported yet */
+ 	ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0);
+ 
+ 	/* Check if there is a device with active zones left */
+-	mutex_lock(&fs_devices->device_list_mutex);
+-	list_for_each_entry(device, &fs_devices->devices, dev_list) {
++	mutex_lock(&fs_info->chunk_mutex);
++	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+ 		struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ 
+ 		if (!device->bdev)
+@@ -1959,7 +1960,7 @@ bool btrfs_can_activate_zone(struct btrf
+ 			break;
+ 		}
+ 	}
+-	mutex_unlock(&fs_devices->device_list_mutex);
++	mutex_unlock(&fs_info->chunk_mutex);
+ 
+ 	return ret;
+ }
diff --git a/queue-5.16/io_uring-fix-race-between-timeout-flush-and-removal.patch b/queue-5.16/io_uring-fix-race-between-timeout-flush-and-removal.patch
new file mode 100644
index 00000000000..755ef39991a
--- /dev/null
+++ b/queue-5.16/io_uring-fix-race-between-timeout-flush-and-removal.patch
@@ -0,0 +1,56 @@
+From e677edbcabee849bfdd43f1602bccbecf736a646 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 8 Apr 2022 11:08:58 -0600
+Subject: io_uring: fix race between timeout flush and removal
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit e677edbcabee849bfdd43f1602bccbecf736a646 upstream.
+
+io_flush_timeouts() assumes the timeout isn't in progress of triggering
+or being removed/canceled, so it unconditionally removes it from the
+timeout list and attempts to cancel it.
+
+Leave it on the list and let the normal timeout cancelation take care
+of it.
+
+Cc: stable@vger.kernel.org # 5.5+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1614,12 +1614,11 @@ static __cold void io_flush_timeouts(str
+ 	__must_hold(&ctx->completion_lock)
+ {
+ 	u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
++	struct io_kiocb *req, *tmp;
+ 
+ 	spin_lock_irq(&ctx->timeout_lock);
+-	while (!list_empty(&ctx->timeout_list)) {
++	list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
+ 		u32 events_needed, events_got;
+-		struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
+-						struct io_kiocb, timeout.list);
+ 
+ 		if (io_is_timeout_noseq(req))
+ 			break;
+@@ -1636,7 +1635,6 @@ static __cold void io_flush_timeouts(str
+ 		if (events_got < events_needed)
+ 			break;
+ 
+-		list_del_init(&req->timeout.list);
+ 		io_kill_timeout(req, 0);
+ 	}
+ 	ctx->cq_last_tm_flush = seq;
+@@ -6223,6 +6221,7 @@ static int io_timeout_prep(struct io_kio
+ 	if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
+ 		return -EINVAL;
+ 
++	INIT_LIST_HEAD(&req->timeout.list);
+ 	data->mode = io_translate_timeout_mode(flags);
+ 	hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
+ 
diff --git a/queue-5.16/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch b/queue-5.16/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch
new file mode 100644
index 00000000000..7e051e56027
--- /dev/null
+++ b/queue-5.16/io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch
@@ -0,0 +1,39 @@
+From 0f5e4b83b37a96e3643951588ed7176b9b187c0a Mon Sep 17 00:00:00 2001
+From: Eugene Syromiatnikov <esyr@redhat.com>
+Date: Wed, 6 Apr 2022 13:55:33 +0200
+Subject: io_uring: implement compat handling for IORING_REGISTER_IOWQ_AFF
+
+From: Eugene Syromiatnikov <esyr@redhat.com>
+
+commit 0f5e4b83b37a96e3643951588ed7176b9b187c0a upstream.
+
+Similarly to the way it is done im mbind syscall.
+
+Cc: stable@vger.kernel.org # 5.14
+Fixes: fe76421d1da1dcdb ("io_uring: allow user configurable IO thread CPU affinity")
+Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -10799,7 +10799,15 @@ static __cold int io_register_iowq_aff(s
+ 	if (len > cpumask_size())
+ 		len = cpumask_size();
+ 
+-	if (copy_from_user(new_mask, arg, len)) {
++	if (in_compat_syscall()) {
++		ret = compat_get_bitmap(cpumask_bits(new_mask),
++					(const compat_ulong_t __user *)arg,
++					len * 8 /* CHAR_BIT */);
++	} else {
++		ret = copy_from_user(new_mask, arg, len);
++	}
++
++	if (ret) {
+ 		free_cpumask_var(new_mask);
+ 		return -EFAULT;
+ 	}
diff --git a/queue-5.16/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch b/queue-5.16/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch
new file mode 100644
index 00000000000..584600c7b9e
--- /dev/null
+++ b/queue-5.16/perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch
@@ -0,0 +1,36 @@
+From e590928de7547454469693da9bc7ffd562e54b7e Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Mon, 28 Mar 2022 08:49:03 -0700
+Subject: perf/x86/intel: Update the FRONTEND MSR mask on Sapphire Rapids
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit e590928de7547454469693da9bc7ffd562e54b7e upstream.
+
+On Sapphire Rapids, the FRONTEND_RETIRED.MS_FLOWS event requires the
+FRONTEND MSR value 0x8. However, the current FRONTEND MSR mask doesn't
+support it.
+
+Update intel_spr_extra_regs[] to support it.
+
+Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids")
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/1648482543-14923-2-git-send-email-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -281,7 +281,7 @@ static struct extra_reg intel_spr_extra_
+ 	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
+ 	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+-	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
++	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE),
+ 	INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+ 	INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
+ 	EVENT_EXTRA_END
diff --git a/queue-5.16/qed-fix-ethtool-register-dump.patch b/queue-5.16/qed-fix-ethtool-register-dump.patch
new file mode 100644
index 00000000000..ae21eafe9a2
--- /dev/null
+++ b/queue-5.16/qed-fix-ethtool-register-dump.patch
@@ -0,0 +1,45 @@
+From 20921c0c86092b4082c91bd7c88305da74e5520b Mon Sep 17 00:00:00 2001
+From: Manish Chopra <manishc@marvell.com>
+Date: Fri, 1 Apr 2022 11:53:04 -0700
+Subject: qed: fix ethtool register dump
+
+From: Manish Chopra <manishc@marvell.com>
+
+commit 20921c0c86092b4082c91bd7c88305da74e5520b upstream.
+
+To fix a coverity complain, commit d5ac07dfbd2b
+("qed: Initialize debug string array") removed "sw-platform"
+(one of the common global parameters) from the dump as this
+was used in the dump with an uninitialized string, however
+it did not reduce the number of common global parameters
+which caused the incorrect (unable to parse) register dump
+
+this patch fixes it with reducing NUM_COMMON_GLOBAL_PARAMS
+bye one.
+
+Cc: stable@vger.kernel.org
+Cc: Tim Gardner <tim.gardner@canonical.com>
+Cc: "David S. Miller" <davem@davemloft.net>
+Fixes: d5ac07dfbd2b ("qed: Initialize debug string array")
+Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
+Signed-off-by: Alok Prasad <palok@marvell.com>
+Signed-off-by: Ariel Elior <aelior@marvell.com>
+Signed-off-by: Manish Chopra <manishc@marvell.com>
+Reviewed-by: Tim Gardner <tim.gardner@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_debug.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
+@@ -489,7 +489,7 @@ struct split_type_defs {
+ 
+ #define STATIC_DEBUG_LINE_DWORDS	9
+ 
+-#define NUM_COMMON_GLOBAL_PARAMS	11
++#define NUM_COMMON_GLOBAL_PARAMS	10
+ 
+ #define MAX_RECURSION_DEPTH		10
+ 
diff --git a/queue-5.16/series b/queue-5.16/series
index bb2d7b91f97..df8426b4cf6 100644
--- a/queue-5.16/series
+++ b/queue-5.16/series
@@ -223,3 +223,15 @@ mmmremap.c-avoid-pointless-invalidate_range_start-end-on-mremap-old_size-0.patch
 mm-mempolicy-fix-mpol_new-leak-in-shared_policy_replace.patch
 io_uring-don-t-check-req-file-in-io_fsync_prep.patch
 io_uring-defer-splice-tee-file-validity-check-until-command-issue.patch
+io_uring-implement-compat-handling-for-ioring_register_iowq_aff.patch
+io_uring-fix-race-between-timeout-flush-and-removal.patch
+x86-pm-save-the-msr-validity-status-at-context-setup.patch
+x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch
+perf-x86-intel-update-the-frontend-msr-mask-on-sapphire-rapids.patch
+btrfs-fix-qgroup-reserve-overflow-the-qgroup-limit.patch
+btrfs-zoned-traverse-devices-under-chunk_mutex-in-btrfs_can_activate_zone.patch
+btrfs-remove-device-item-and-update-super-block-in-the-same-transaction.patch
+btrfs-avoid-defragging-extents-whose-next-extents-are-not-targets.patch
+btrfs-prevent-subvol-with-swapfile-from-being-deleted.patch
+spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch
+qed-fix-ethtool-register-dump.patch
diff --git a/queue-5.16/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch b/queue-5.16/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch
new file mode 100644
index 00000000000..51f2be3ebfb
--- /dev/null
+++ b/queue-5.16/spi-core-add-dma_map_dev-for-__spi_unmap_msg.patch
@@ -0,0 +1,41 @@
+From 409543cec01a84610029d6440c480c3fdd7214fb Mon Sep 17 00:00:00 2001
+From: Vinod Koul <vkoul@kernel.org>
+Date: Wed, 6 Apr 2022 18:52:38 +0530
+Subject: spi: core: add dma_map_dev for __spi_unmap_msg()
+
+From: Vinod Koul <vkoul@kernel.org>
+
+commit 409543cec01a84610029d6440c480c3fdd7214fb upstream.
+
+Commit b470e10eb43f ("spi: core: add dma_map_dev for dma device") added
+dma_map_dev for _spi_map_msg() but missed to add for unmap routine,
+__spi_unmap_msg(), so add it now.
+
+Fixes: b470e10eb43f ("spi: core: add dma_map_dev for dma device")
+Cc: stable@vger.kernel.org # v5.14+
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Link: https://lore.kernel.org/r/20220406132238.1029249-1-vkoul@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/spi/spi.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/spi/spi.c
++++ b/drivers/spi/spi.c
+@@ -1151,11 +1151,15 @@ static int __spi_unmap_msg(struct spi_co
+ 
+ 	if (ctlr->dma_tx)
+ 		tx_dev = ctlr->dma_tx->device->dev;
++	else if (ctlr->dma_map_dev)
++		tx_dev = ctlr->dma_map_dev;
+ 	else
+ 		tx_dev = ctlr->dev.parent;
+ 
+ 	if (ctlr->dma_rx)
+ 		rx_dev = ctlr->dma_rx->device->dev;
++	else if (ctlr->dma_map_dev)
++		rx_dev = ctlr->dma_map_dev;
+ 	else
+ 		rx_dev = ctlr->dev.parent;
+ 
diff --git a/queue-5.16/x86-pm-save-the-msr-validity-status-at-context-setup.patch b/queue-5.16/x86-pm-save-the-msr-validity-status-at-context-setup.patch
new file mode 100644
index 00000000000..47adc223fcc
--- /dev/null
+++ b/queue-5.16/x86-pm-save-the-msr-validity-status-at-context-setup.patch
@@ -0,0 +1,55 @@
+From 73924ec4d560257004d5b5116b22a3647661e364 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Apr 2022 17:34:19 -0700
+Subject: x86/pm: Save the MSR validity status at context setup
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit 73924ec4d560257004d5b5116b22a3647661e364 upstream.
+
+The mechanism to save/restore MSRs during S3 suspend/resume checks for
+the MSR validity during suspend, and only restores the MSR if its a
+valid MSR.  This is not optimal, as an invalid MSR will unnecessarily
+throw an exception for every suspend cycle.  The more invalid MSRs,
+higher the impact will be.
+
+Check and save the MSR validity at setup.  This ensures that only valid
+MSRs that are guaranteed to not throw an exception will be attempted
+during suspend.
+
+Fixes: 7a9c2dd08ead ("x86/pm: Introduce quirk framework to save/restore extra MSR registers around suspend/resume")
+Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/power/cpu.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -40,7 +40,8 @@ static void msr_save_context(struct save
+ 	struct saved_msr *end = msr + ctxt->saved_msrs.num;
+ 
+ 	while (msr < end) {
+-		msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q);
++		if (msr->valid)
++			rdmsrl(msr->info.msr_no, msr->info.reg.q);
+ 		msr++;
+ 	}
+ }
+@@ -424,8 +425,10 @@ static int msr_build_context(const u32 *
+ 	}
+ 
+ 	for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
++		u64 dummy;
++
+ 		msr_array[i].info.msr_no	= msr_id[j];
+-		msr_array[i].valid		= false;
++		msr_array[i].valid		= !rdmsrl_safe(msr_id[j], &dummy);
+ 		msr_array[i].info.reg.q		= 0;
+ 	}
+ 	saved_msrs->num   = total_num;
diff --git a/queue-5.16/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch b/queue-5.16/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch
new file mode 100644
index 00000000000..807ce86533b
--- /dev/null
+++ b/queue-5.16/x86-speculation-restore-speculation-related-msrs-during-s3-resume.patch
@@ -0,0 +1,60 @@
+From e2a1256b17b16f9b9adf1b6fea56819e7b68e463 Mon Sep 17 00:00:00 2001
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Date: Mon, 4 Apr 2022 17:35:45 -0700
+Subject: x86/speculation: Restore speculation related MSRs during S3 resume
+
+From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+
+commit e2a1256b17b16f9b9adf1b6fea56819e7b68e463 upstream.
+
+After resuming from suspend-to-RAM, the MSRs that control CPU's
+speculative execution behavior are not being restored on the boot CPU.
+
+These MSRs are used to mitigate speculative execution vulnerabilities.
+Not restoring them correctly may leave the CPU vulnerable.  Secondary
+CPU's MSRs are correctly being restored at S3 resume by
+identify_secondary_cpu().
+
+During S3 resume, restore these MSRs for boot CPU when restoring its
+processor state.
+
+Fixes: 772439717dbf ("x86/bugs/intel: Set proper CPU features and setup RDS")
+Reported-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
+Acked-by: Borislav Petkov <bp@suse.de>
+Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/power/cpu.c |   14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -503,10 +503,24 @@ static int pm_cpu_check(const struct x86
+ 	return ret;
+ }
+ 
++static void pm_save_spec_msr(void)
++{
++	u32 spec_msr_id[] = {
++		MSR_IA32_SPEC_CTRL,
++		MSR_IA32_TSX_CTRL,
++		MSR_TSX_FORCE_ABORT,
++		MSR_IA32_MCU_OPT_CTRL,
++		MSR_AMD64_LS_CFG,
++	};
++
++	msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
++}
++
+ static int pm_check_save_msr(void)
+ {
+ 	dmi_check_system(msr_save_dmi_table);
+ 	pm_cpu_check(msr_save_cpu_table);
++	pm_save_spec_msr();
+ 
+ 	return 0;
+ }