From 3efe9d7ddf8f4c19e85bc008de8fa1eacd1cc9f3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 3 Jun 2022 17:50:52 +0200
Subject: [PATCH] 5.17-stable patches

added patches:
	drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch
	exfat-check-if-cluster-num-is-valid.patch
	exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch
	kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch
	kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch
	kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch
	kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch
	kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch
	kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch
	kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch
	kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch
	kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch
	kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch
	net-ipa-compute-proper-aggregation-limit.patch
	netfilter-conntrack-re-fetch-conntrack-after-insertion.patch
	netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
	netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch
	netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch
	netfilter-nft_limit-clone-packet-limits-cost-value.patch
	x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch
	x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch
	x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch
	x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch
---
 ...ing-in-call-to-intel_read_wm_latency.patch |  57 ++++++
 .../exfat-check-if-cluster-num-is-valid.patch | 104 ++++++++++
 ...directory-information-after-renaming.patch |  98 +++++++++
 ...ncorrect-null-check-on-list-iterator.patch |  51 +++++
 ...terfaces-to-prevent-kernel-data-leak.patch |  88 ++++++++
 ...ulator-without-a-decoded-instruction.patch | 107 ++++++++++
 ...-a-vcpu-after-.vm_destroy-was-called.patch |  63 ++++++
 ...a-triple-fault-never-escapes-from-l2.patch |  83 ++++++++
 ...ndling-wrongly-considered-from-guest.patch |  40 ++++
 ..._cmpxchg_user-causing-non-atomicness.patch |  35 ++++
 ...nced-and-no-tlb-flushing-is-required.patch |  89 ++++++++
 ...xchg_user-to-emulate-atomic-accesses.patch | 103 ++++++++++
 ...hg_user-to-update-guest-pte-a-d-bits.patch |  84 ++++++++
 ...ipa-compute-proper-aggregation-limit.patch |  49 +++++
 ...k-re-fetch-conntrack-after-insertion.patch |  43 ++++
 ...le-hook-unregistration-in-netns-path.patch | 137 +++++++++++++
 ...es-hold-mutex-on-netns-pre_exit-path.patch |  32 +++
 ...s-sanitize-nft_set_desc_concat_parse.patch |  74 +++++++
 ...limit-clone-packet-limits-cost-value.patch |  31 +++
 queue-5.17/series                             |  23 +++
 ...uabi-size-to-sizeof-struct-kvm_xsave.patch | 113 +++++++++++
 ...ync-pf-token-outside-of-raw-spinlock.patch |  91 +++++++++
 ...ct-gfp-flags-for-preemption-disabled.patch |  81 ++++++++
 ...macros-for-cmpxchg-on-user-addresses.patch | 191 ++++++++++++++++++
 24 files changed, 1867 insertions(+)
 create mode 100644 queue-5.17/drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch
 create mode 100644 queue-5.17/exfat-check-if-cluster-num-is-valid.patch
 create mode 100644 queue-5.17/exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch
 create mode 100644 queue-5.17/kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch
 create mode 100644 queue-5.17/kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch
 create mode 100644 queue-5.17/kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch
 create mode 100644 queue-5.17/kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch
 create mode 100644 queue-5.17/kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch
 create mode 100644 queue-5.17/kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch
 create mode 100644 queue-5.17/kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch
 create mode 100644 queue-5.17/kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch
 create mode 100644 queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch
 create mode 100644 queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch
 create mode 100644 queue-5.17/net-ipa-compute-proper-aggregation-limit.patch
 create mode 100644 queue-5.17/netfilter-conntrack-re-fetch-conntrack-after-insertion.patch
 create mode 100644 queue-5.17/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
 create mode 100644 queue-5.17/netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch
 create mode 100644 queue-5.17/netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch
 create mode 100644 queue-5.17/netfilter-nft_limit-clone-packet-limits-cost-value.patch
 create mode 100644 queue-5.17/x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch
 create mode 100644 queue-5.17/x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch
 create mode 100644 queue-5.17/x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch
 create mode 100644 queue-5.17/x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch

diff --git a/queue-5.17/drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch b/queue-5.17/drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch
new file mode 100644
index 00000000000..80b0d00a8d7
--- /dev/null
+++ b/queue-5.17/drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch
@@ -0,0 +1,57 @@
+From 336feb502a715909a8136eb6a62a83d7268a353b Mon Sep 17 00:00:00 2001
+From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Date: Wed, 27 Apr 2022 17:47:14 -0500
+Subject: drm/i915: Fix -Wstringop-overflow warning in call to intel_read_wm_latency()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Gustavo A. R. Silva <gustavoars@kernel.org>
+
+commit 336feb502a715909a8136eb6a62a83d7268a353b upstream.
+
+Fix the following -Wstringop-overflow warnings when building with GCC-11:
+
+drivers/gpu/drm/i915/intel_pm.c:3106:9: warning: âintel_read_wm_latencyâ accessing 16 bytes in a region of size 10 [-Wstringop-overflow=]
+ 3106 |         intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
+      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+drivers/gpu/drm/i915/intel_pm.c:3106:9: note: referencing argument 2 of type âu16 *â {aka âshort unsigned int *â}
+drivers/gpu/drm/i915/intel_pm.c:2861:13: note: in a call to function âintel_read_wm_latencyâ
+ 2861 | static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
+      |             ^~~~~~~~~~~~~~~~~~~~~
+
+by removing the over-specified array size from the argument declarations.
+
+It seems that this code is actually safe because the size of the
+array depends on the hardware generation, and the function checks
+for that.
+
+Notice that wm can be an array of 5 elements:
+drivers/gpu/drm/i915/intel_pm.c:3109:   intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
+
+or an array of 8 elements:
+drivers/gpu/drm/i915/intel_pm.c:3131:   intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency);
+
+and the compiler legitimately complains about that.
+
+This helps with the ongoing efforts to globally enable
+-Wstringop-overflow.
+
+Link: https://github.com/KSPP/linux/issues/181
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/intel_pm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/intel_pm.c
++++ b/drivers/gpu/drm/i915/intel_pm.c
+@@ -2876,7 +2876,7 @@ static void ilk_compute_wm_level(const s
+ }
+ 
+ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
+-				  u16 wm[8])
++				  u16 wm[])
+ {
+ 	struct intel_uncore *uncore = &dev_priv->uncore;
+ 
diff --git a/queue-5.17/exfat-check-if-cluster-num-is-valid.patch b/queue-5.17/exfat-check-if-cluster-num-is-valid.patch
new file mode 100644
index 00000000000..0ddcb541677
--- /dev/null
+++ b/queue-5.17/exfat-check-if-cluster-num-is-valid.patch
@@ -0,0 +1,104 @@
+From 64ba4b15e5c045f8b746c6da5fc9be9a6b00b61d Mon Sep 17 00:00:00 2001
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+Date: Tue, 17 May 2022 08:13:08 +0900
+Subject: exfat: check if cluster num is valid
+
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+
+commit 64ba4b15e5c045f8b746c6da5fc9be9a6b00b61d upstream.
+
+Syzbot reported slab-out-of-bounds read in exfat_clear_bitmap.
+This was triggered by reproducer calling truncute with size 0,
+which causes the following trace:
+
+BUG: KASAN: slab-out-of-bounds in exfat_clear_bitmap+0x147/0x490 fs/exfat/balloc.c:174
+Read of size 8 at addr ffff888115aa9508 by task syz-executor251/365
+
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack_lvl+0x1e2/0x24b lib/dump_stack.c:118
+ print_address_description+0x81/0x3c0 mm/kasan/report.c:233
+ __kasan_report mm/kasan/report.c:419 [inline]
+ kasan_report+0x1a4/0x1f0 mm/kasan/report.c:436
+ __asan_report_load8_noabort+0x14/0x20 mm/kasan/report_generic.c:309
+ exfat_clear_bitmap+0x147/0x490 fs/exfat/balloc.c:174
+ exfat_free_cluster+0x25a/0x4a0 fs/exfat/fatent.c:181
+ __exfat_truncate+0x99e/0xe00 fs/exfat/file.c:217
+ exfat_truncate+0x11b/0x4f0 fs/exfat/file.c:243
+ exfat_setattr+0xa03/0xd40 fs/exfat/file.c:339
+ notify_change+0xb76/0xe10 fs/attr.c:336
+ do_truncate+0x1ea/0x2d0 fs/open.c:65
+
+Move the is_valid_cluster() helper from fatent.c to a common
+header to make it reusable in other *.c files. And add is_valid_cluster()
+to validate if cluster number is within valid range in exfat_clear_bitmap()
+and exfat_set_bitmap().
+
+Link: https://syzkaller.appspot.com/bug?id=50381fc73821ecae743b8cf24b4c9a04776f767c
+Reported-by: syzbot+a4087e40b9c13aad7892@syzkaller.appspotmail.com
+Fixes: 1e49a94cf707 ("exfat: add bitmap operations")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exfat/balloc.c   |    8 ++++++--
+ fs/exfat/exfat_fs.h |    6 ++++++
+ fs/exfat/fatent.c   |    6 ------
+ 3 files changed, 12 insertions(+), 8 deletions(-)
+
+--- a/fs/exfat/balloc.c
++++ b/fs/exfat/balloc.c
+@@ -148,7 +148,9 @@ int exfat_set_bitmap(struct inode *inode
+ 	struct super_block *sb = inode->i_sb;
+ 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ 
+-	WARN_ON(clu < EXFAT_FIRST_CLUSTER);
++	if (!is_valid_cluster(sbi, clu))
++		return -EINVAL;
++
+ 	ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
+ 	i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
+ 	b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
+@@ -166,7 +168,9 @@ void exfat_clear_bitmap(struct inode *in
+ 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ 	struct exfat_mount_options *opts = &sbi->options;
+ 
+-	WARN_ON(clu < EXFAT_FIRST_CLUSTER);
++	if (!is_valid_cluster(sbi, clu))
++		return;
++
+ 	ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
+ 	i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
+ 	b = BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent_idx);
+--- a/fs/exfat/exfat_fs.h
++++ b/fs/exfat/exfat_fs.h
+@@ -380,6 +380,12 @@ static inline int exfat_sector_to_cluste
+ 		EXFAT_RESERVED_CLUSTERS;
+ }
+ 
++static inline bool is_valid_cluster(struct exfat_sb_info *sbi,
++		unsigned int clus)
++{
++	return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters;
++}
++
+ /* super.c */
+ int exfat_set_volume_dirty(struct super_block *sb);
+ int exfat_clear_volume_dirty(struct super_block *sb);
+--- a/fs/exfat/fatent.c
++++ b/fs/exfat/fatent.c
+@@ -81,12 +81,6 @@ int exfat_ent_set(struct super_block *sb
+ 	return 0;
+ }
+ 
+-static inline bool is_valid_cluster(struct exfat_sb_info *sbi,
+-		unsigned int clus)
+-{
+-	return clus >= EXFAT_FIRST_CLUSTER && clus < sbi->num_clusters;
+-}
+-
+ int exfat_ent_get(struct super_block *sb, unsigned int loc,
+ 		unsigned int *content)
+ {
diff --git a/queue-5.17/exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch b/queue-5.17/exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch
new file mode 100644
index 00000000000..a8275598121
--- /dev/null
+++ b/queue-5.17/exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch
@@ -0,0 +1,98 @@
+From d8dad2588addd1d861ce19e7df3b702330f0c7e3 Mon Sep 17 00:00:00 2001
+From: Yuezhang Mo <Yuezhang.Mo@sony.com>
+Date: Mon, 4 Apr 2022 11:58:06 +0900
+Subject: exfat: fix referencing wrong parent directory information after renaming
+
+From: Yuezhang Mo <Yuezhang.Mo@sony.com>
+
+commit d8dad2588addd1d861ce19e7df3b702330f0c7e3 upstream.
+
+During renaming, the parent directory information maybe
+updated. But the file/directory still references to the
+old parent directory information.
+
+This bug will cause 2 problems.
+
+(1) The renamed file can not be written.
+
+    [10768.175172] exFAT-fs (sda1): error, failed to bmap (inode : 7afd50e4 iblock : 0, err : -5)
+    [10768.184285] exFAT-fs (sda1): Filesystem has been set read-only
+    ash: write error: Input/output error
+
+(2) Some dentries of the renamed file/directory are not set
+    to deleted after removing the file/directory.
+
+exfat_update_parent_info() is a workaround for the wrong parent
+directory information being used after renaming. Now that bug is
+fixed, this is no longer needed, so remove it.
+
+Fixes: 5f2aa075070c ("exfat: add inode operations")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
+Reviewed-by: Andy Wu <Andy.Wu@sony.com>
+Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
+Reviewed-by: Daniel Palmer <daniel.palmer@sony.com>
+Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exfat/namei.c |   27 +--------------------------
+ 1 file changed, 1 insertion(+), 26 deletions(-)
+
+--- a/fs/exfat/namei.c
++++ b/fs/exfat/namei.c
+@@ -1062,6 +1062,7 @@ static int exfat_rename_file(struct inod
+ 
+ 		exfat_remove_entries(inode, p_dir, oldentry, 0,
+ 			num_old_entries);
++		ei->dir = *p_dir;
+ 		ei->entry = newentry;
+ 	} else {
+ 		if (exfat_get_entry_type(epold) == TYPE_FILE) {
+@@ -1149,28 +1150,6 @@ static int exfat_move_file(struct inode
+ 	return 0;
+ }
+ 
+-static void exfat_update_parent_info(struct exfat_inode_info *ei,
+-		struct inode *parent_inode)
+-{
+-	struct exfat_sb_info *sbi = EXFAT_SB(parent_inode->i_sb);
+-	struct exfat_inode_info *parent_ei = EXFAT_I(parent_inode);
+-	loff_t parent_isize = i_size_read(parent_inode);
+-
+-	/*
+-	 * the problem that struct exfat_inode_info caches wrong parent info.
+-	 *
+-	 * because of flag-mismatch of ei->dir,
+-	 * there is abnormal traversing cluster chain.
+-	 */
+-	if (unlikely(parent_ei->flags != ei->dir.flags ||
+-		     parent_isize != EXFAT_CLU_TO_B(ei->dir.size, sbi) ||
+-		     parent_ei->start_clu != ei->dir.dir)) {
+-		exfat_chain_set(&ei->dir, parent_ei->start_clu,
+-			EXFAT_B_TO_CLU_ROUND_UP(parent_isize, sbi),
+-			parent_ei->flags);
+-	}
+-}
+-
+ /* rename or move a old file into a new file */
+ static int __exfat_rename(struct inode *old_parent_inode,
+ 		struct exfat_inode_info *ei, struct inode *new_parent_inode,
+@@ -1201,8 +1180,6 @@ static int __exfat_rename(struct inode *
+ 		return -ENOENT;
+ 	}
+ 
+-	exfat_update_parent_info(ei, old_parent_inode);
+-
+ 	exfat_chain_dup(&olddir, &ei->dir);
+ 	dentry = ei->entry;
+ 
+@@ -1223,8 +1200,6 @@ static int __exfat_rename(struct inode *
+ 			goto out;
+ 		}
+ 
+-		exfat_update_parent_info(new_ei, new_parent_inode);
+-
+ 		p_dir = &(new_ei->dir);
+ 		new_entry = new_ei->entry;
+ 		ep = exfat_get_dentry(sb, p_dir, new_entry, &new_bh);
diff --git a/queue-5.17/kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch b/queue-5.17/kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch
new file mode 100644
index 00000000000..d9e102dea1b
--- /dev/null
+++ b/queue-5.17/kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch
@@ -0,0 +1,51 @@
+From 300981abddcb13f8f06ad58f52358b53a8096775 Mon Sep 17 00:00:00 2001
+From: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+Date: Thu, 14 Apr 2022 14:21:03 +0800
+Subject: KVM: PPC: Book3S HV: fix incorrect NULL check on list iterator
+
+From: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+
+commit 300981abddcb13f8f06ad58f52358b53a8096775 upstream.
+
+The bug is here:
+	if (!p)
+                return ret;
+
+The list iterator value 'p' will *always* be set and non-NULL by
+list_for_each_entry(), so it is incorrect to assume that the iterator
+value will be NULL if the list is empty or no element is found.
+
+To fix the bug, Use a new value 'iter' as the list iterator, while use
+the old value 'p' as a dedicated variable to point to the found element.
+
+Fixes: dfaa973ae960 ("KVM: PPC: Book3S HV: In H_SVM_INIT_DONE, migrate remaining normal-GFNs to secure-GFNs")
+Cc: stable@vger.kernel.org # v5.9+
+Signed-off-by: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220414062103.8153-1-xiam0nd.tong@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kvm/book3s_hv_uvmem.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
++++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
+@@ -360,13 +360,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsi
+ static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+ 		struct kvm *kvm, unsigned long *gfn)
+ {
+-	struct kvmppc_uvmem_slot *p;
++	struct kvmppc_uvmem_slot *p = NULL, *iter;
+ 	bool ret = false;
+ 	unsigned long i;
+ 
+-	list_for_each_entry(p, &kvm->arch.uvmem_pfns, list)
+-		if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns)
++	list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list)
++		if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) {
++			p = iter;
+ 			break;
++		}
+ 	if (!p)
+ 		return ret;
+ 	/*
diff --git a/queue-5.17/kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch b/queue-5.17/kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch
new file mode 100644
index 00000000000..85276ddaebe
--- /dev/null
+++ b/queue-5.17/kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch
@@ -0,0 +1,88 @@
+From d22d2474e3953996f03528b84b7f52cc26a39403 Mon Sep 17 00:00:00 2001
+From: Ashish Kalra <ashish.kalra@amd.com>
+Date: Mon, 16 May 2022 15:43:10 +0000
+Subject: KVM: SVM: Use kzalloc for sev ioctl interfaces to prevent kernel data leak
+
+From: Ashish Kalra <ashish.kalra@amd.com>
+
+commit d22d2474e3953996f03528b84b7f52cc26a39403 upstream.
+
+For some sev ioctl interfaces, the length parameter that is passed maybe
+less than or equal to SEV_FW_BLOB_MAX_SIZE, but larger than the data
+that PSP firmware returns. In this case, kmalloc will allocate memory
+that is the size of the input rather than the size of the data.
+Since PSP firmware doesn't fully overwrite the allocated buffer, these
+sev ioctl interface may return uninitialized kernel slab memory.
+
+Reported-by: Andy Nguyen <theflow@google.com>
+Suggested-by: David Rientjes <rientjes@google.com>
+Suggested-by: Peter Gonda <pgonda@google.com>
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Fixes: eaf78265a4ab3 ("KVM: SVM: Move SEV code to separate file")
+Fixes: 2c07ded06427d ("KVM: SVM: add support for SEV attestation command")
+Fixes: 4cfdd47d6d95a ("KVM: SVM: Add KVM_SEV SEND_START command")
+Fixes: d3d1af85e2c75 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command")
+Fixes: eba04b20e4861 ("KVM: x86: Account a variety of miscellaneous allocations")
+Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
+Reviewed-by: Peter Gonda <pgonda@google.com>
+Message-Id: <20220516154310.3685678-1-Ashish.Kalra@amd.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c |   12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -684,7 +684,7 @@ static int sev_launch_measure(struct kvm
+ 		if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ 			return -EINVAL;
+ 
+-		blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
++		blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
+ 		if (!blob)
+ 			return -ENOMEM;
+ 
+@@ -804,7 +804,7 @@ static int __sev_dbg_decrypt_user(struct
+ 	if (!IS_ALIGNED(dst_paddr, 16) ||
+ 	    !IS_ALIGNED(paddr,     16) ||
+ 	    !IS_ALIGNED(size,      16)) {
+-		tpage = (void *)alloc_page(GFP_KERNEL);
++		tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO);
+ 		if (!tpage)
+ 			return -ENOMEM;
+ 
+@@ -1090,7 +1090,7 @@ static int sev_get_attestation_report(st
+ 		if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ 			return -EINVAL;
+ 
+-		blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
++		blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT);
+ 		if (!blob)
+ 			return -ENOMEM;
+ 
+@@ -1172,7 +1172,7 @@ static int sev_send_start(struct kvm *kv
+ 		return -EINVAL;
+ 
+ 	/* allocate the memory to hold the session data blob */
+-	session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT);
++	session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT);
+ 	if (!session_data)
+ 		return -ENOMEM;
+ 
+@@ -1296,11 +1296,11 @@ static int sev_send_update_data(struct k
+ 
+ 	/* allocate memory for header and transport buffer */
+ 	ret = -ENOMEM;
+-	hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
++	hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
+ 	if (!hdr)
+ 		goto e_unpin;
+ 
+-	trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
++	trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
+ 	if (!trans_data)
+ 		goto e_free_hdr;
+ 
diff --git a/queue-5.17/kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch b/queue-5.17/kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch
new file mode 100644
index 00000000000..094b7de1004
--- /dev/null
+++ b/queue-5.17/kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch
@@ -0,0 +1,107 @@
+From fee060cd52d69c114b62d1a2948ea9648b5131f9 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 11 Mar 2022 03:27:41 +0000
+Subject: KVM: x86: avoid calling x86 emulator without a decoded instruction
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit fee060cd52d69c114b62d1a2948ea9648b5131f9 upstream.
+
+Whenever x86_decode_emulated_instruction() detects a breakpoint, it
+returns the value that kvm_vcpu_check_breakpoint() writes into its
+pass-by-reference second argument.  Unfortunately this is completely
+bogus because the expected outcome of x86_decode_emulated_instruction
+is an EMULATION_* value.
+
+Then, if kvm_vcpu_check_breakpoint() does "*r = 0" (corresponding to
+a KVM_EXIT_DEBUG userspace exit), it is misunderstood as EMULATION_OK
+and x86_emulate_instruction() is called without having decoded the
+instruction.  This causes various havoc from running with a stale
+emulation context.
+
+The fix is to move the call to kvm_vcpu_check_breakpoint() where it was
+before commit 4aa2691dcbd3 ("KVM: x86: Factor out x86 instruction
+emulation with decoding") introduced x86_decode_emulated_instruction().
+The other caller of the function does not need breakpoint checks,
+because it is invoked as part of a vmexit and the processor has already
+checked those before executing the instruction that #GP'd.
+
+This fixes CVE-2022-1852.
+
+Reported-by: Qiuhao Li <qiuhao@sysec.org>
+Reported-by: Gaoning Pan <pgn@zju.edu.cn>
+Reported-by: Yongkang Jia <kangel@zju.edu.cn>
+Fixes: 4aa2691dcbd3 ("KVM: x86: Factor out x86 instruction emulation with decoding")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220311032801.3467418-2-seanjc@google.com>
+[Rewrote commit message according to Qiuhao's report, since a patch
+ already existed to fix the bug. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   31 +++++++++++++++++++------------
+ 1 file changed, 19 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8169,7 +8169,7 @@ int kvm_skip_emulated_instruction(struct
+ }
+ EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+ 
+-static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
++static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r)
+ {
+ 	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
+ 	    (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
+@@ -8238,25 +8238,23 @@ static bool is_vmware_backdoor_opcode(st
+ }
+ 
+ /*
+- * Decode to be emulated instruction. Return EMULATION_OK if success.
++ * Decode an instruction for emulation.  The caller is responsible for handling
++ * code breakpoints.  Note, manually detecting code breakpoints is unnecessary
++ * (and wrong) when emulating on an intercepted fault-like exception[*], as
++ * code breakpoints have higher priority and thus have already been done by
++ * hardware.
++ *
++ * [*] Except #MC, which is higher priority, but KVM should never emulate in
++ *     response to a machine check.
+  */
+ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
+ 				    void *insn, int insn_len)
+ {
+-	int r = EMULATION_OK;
+ 	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
++	int r;
+ 
+ 	init_emulate_ctxt(vcpu);
+ 
+-	/*
+-	 * We will reenter on the same instruction since we do not set
+-	 * complete_userspace_io. This does not handle watchpoints yet,
+-	 * those would be handled in the emulate_ops.
+-	 */
+-	if (!(emulation_type & EMULTYPE_SKIP) &&
+-	    kvm_vcpu_check_breakpoint(vcpu, &r))
+-		return r;
+-
+ 	r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
+ 
+ 	trace_kvm_emulate_insn_start(vcpu);
+@@ -8289,6 +8287,15 @@ int x86_emulate_instruction(struct kvm_v
+ 	if (!(emulation_type & EMULTYPE_NO_DECODE)) {
+ 		kvm_clear_exception_queue(vcpu);
+ 
++		/*
++		 * Return immediately if RIP hits a code breakpoint, such #DBs
++		 * are fault-like and are higher priority than any faults on
++		 * the code fetch itself.
++		 */
++		if (!(emulation_type & EMULTYPE_SKIP) &&
++		    kvm_vcpu_check_code_breakpoint(vcpu, &r))
++			return r;
++
+ 		r = x86_decode_emulated_instruction(vcpu, emulation_type,
+ 						    insn, insn_len);
+ 		if (r != EMULATION_OK)  {
diff --git a/queue-5.17/kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch b/queue-5.17/kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch
new file mode 100644
index 00000000000..64a18b470b3
--- /dev/null
+++ b/queue-5.17/kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch
@@ -0,0 +1,63 @@
+From 6fcee03df6a1a3101a77344be37bb85c6142d56c Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Tue, 22 Mar 2022 19:24:42 +0200
+Subject: KVM: x86: avoid loading a vCPU after .vm_destroy was called
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 6fcee03df6a1a3101a77344be37bb85c6142d56c upstream.
+
+This can cause various unexpected issues, since VM is partially
+destroyed at that point.
+
+For example when AVIC is enabled, this causes avic_vcpu_load to
+access physical id page entry which is already freed by .vm_destroy.
+
+Fixes: 8221c1370056 ("svm: Manage vcpu load/unload when enable AVIC")
+Cc: stable@vger.kernel.org
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20220322172449.235575-2-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -11655,20 +11655,15 @@ static void kvm_unload_vcpu_mmu(struct k
+ 	vcpu_put(vcpu);
+ }
+ 
+-static void kvm_free_vcpus(struct kvm *kvm)
++static void kvm_unload_vcpu_mmus(struct kvm *kvm)
+ {
+ 	unsigned long i;
+ 	struct kvm_vcpu *vcpu;
+ 
+-	/*
+-	 * Unpin any mmu pages first.
+-	 */
+ 	kvm_for_each_vcpu(i, vcpu, kvm) {
+ 		kvm_clear_async_pf_completion_queue(vcpu);
+ 		kvm_unload_vcpu_mmu(vcpu);
+ 	}
+-
+-	kvm_destroy_vcpus(kvm);
+ }
+ 
+ void kvm_arch_sync_events(struct kvm *kvm)
+@@ -11774,11 +11769,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm
+ 		__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
+ 		mutex_unlock(&kvm->slots_lock);
+ 	}
++	kvm_unload_vcpu_mmus(kvm);
+ 	static_call_cond(kvm_x86_vm_destroy)(kvm);
+ 	kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
+ 	kvm_pic_destroy(kvm);
+ 	kvm_ioapic_destroy(kvm);
+-	kvm_free_vcpus(kvm);
++	kvm_destroy_vcpus(kvm);
+ 	kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+ 	kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
+ 	kvm_mmu_uninit_vm(kvm);
diff --git a/queue-5.17/kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch b/queue-5.17/kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch
new file mode 100644
index 00000000000..e4555dc6a64
--- /dev/null
+++ b/queue-5.17/kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch
@@ -0,0 +1,83 @@
+From 45846661d10422ce9e22da21f8277540b29eca22 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 7 Apr 2022 00:23:13 +0000
+Subject: KVM: x86: Drop WARNs that assert a triple fault never "escapes" from L2
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 45846661d10422ce9e22da21f8277540b29eca22 upstream.
+
+Remove WARNs that sanity check that KVM never lets a triple fault for L2
+escape and incorrectly end up in L1.  In normal operation, the sanity
+check is perfectly valid, but it incorrectly assumes that it's impossible
+for userspace to induce KVM_REQ_TRIPLE_FAULT without bouncing through
+KVM_RUN (which guarantees kvm_check_nested_state() will see and handle
+the triple fault).
+
+The WARN can currently be triggered if userspace injects a machine check
+while L2 is active and CR4.MCE=0.  And a future fix to allow save/restore
+of KVM_REQ_TRIPLE_FAULT, e.g. so that a synthesized triple fault isn't
+lost on migration, will make it trivially easy for userspace to trigger
+the WARN.
+
+Clearing KVM_REQ_TRIPLE_FAULT when forcibly leaving guest mode is
+tempting, but wrong, especially if/when the request is saved/restored,
+e.g. if userspace restores events (including a triple fault) and then
+restores nested state (which may forcibly leave guest mode).  Ignoring
+the fact that KVM doesn't currently provide the necessary APIs, it's
+userspace's responsibility to manage pending events during save/restore.
+
+  ------------[ cut here ]------------
+  WARNING: CPU: 7 PID: 1399 at arch/x86/kvm/vmx/nested.c:4522 nested_vmx_vmexit+0x7fe/0xd90 [kvm_intel]
+  Modules linked in: kvm_intel kvm irqbypass
+  CPU: 7 PID: 1399 Comm: state_test Not tainted 5.17.0-rc3+ #808
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+  RIP: 0010:nested_vmx_vmexit+0x7fe/0xd90 [kvm_intel]
+  Call Trace:
+   <TASK>
+   vmx_leave_nested+0x30/0x40 [kvm_intel]
+   vmx_set_nested_state+0xca/0x3e0 [kvm_intel]
+   kvm_arch_vcpu_ioctl+0xf49/0x13e0 [kvm]
+   kvm_vcpu_ioctl+0x4b9/0x660 [kvm]
+   __x64_sys_ioctl+0x83/0xb0
+   do_syscall_64+0x3b/0xc0
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+   </TASK>
+  ---[ end trace 0000000000000000 ]---
+
+Fixes: cb6a32c2b877 ("KVM: x86: Handle triple fault in L2 without killing L1")
+Cc: stable@vger.kernel.org
+Cc: Chenyi Qiang <chenyi.qiang@intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220407002315.78092-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    3 ---
+ arch/x86/kvm/vmx/nested.c |    3 ---
+ 2 files changed, 6 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -790,9 +790,6 @@ int nested_svm_vmexit(struct vcpu_svm *s
+ 	struct kvm_host_map map;
+ 	int rc;
+ 
+-	/* Triple faults in L2 should never escape. */
+-	WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
+-
+ 	rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
+ 	if (rc) {
+ 		if (rc == -EINVAL)
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4518,9 +4518,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *
+ 	/* trying to cancel vmlaunch/vmresume is a bug */
+ 	WARN_ON_ONCE(vmx->nested.nested_run_pending);
+ 
+-	/* Similarly, triple faults in L2 should never escape. */
+-	WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
+-
+ 	if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+ 		/*
+ 		 * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
diff --git a/queue-5.17/kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch b/queue-5.17/kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch
new file mode 100644
index 00000000000..55a211dedf8
--- /dev/null
+++ b/queue-5.17/kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch
@@ -0,0 +1,40 @@
+From ffd1925a596ce68bed7d81c61cb64bc35f788a9d Mon Sep 17 00:00:00 2001
+From: Yanfei Xu <yanfei.xu@intel.com>
+Date: Mon, 23 May 2022 22:08:21 +0800
+Subject: KVM: x86: Fix the intel_pt PMI handling wrongly considered from guest
+
+From: Yanfei Xu <yanfei.xu@intel.com>
+
+commit ffd1925a596ce68bed7d81c61cb64bc35f788a9d upstream.
+
+When kernel handles the vm-exit caused by external interrupts and NMI,
+it always sets kvm_intr_type to tell if it's dealing an IRQ or NMI. For
+the PMI scenario, it could be IRQ or NMI.
+
+However, intel_pt PMIs are only generated for HARDWARE perf events, and
+HARDWARE events are always configured to generate NMIs.  Use
+kvm_handling_nmi_from_guest() to precisely identify if the intel_pt PMI
+came from the guest; this avoids false positives if an intel_pt PMI/NMI
+arrives while the host is handling an unrelated IRQ VM-Exit.
+
+Fixes: db215756ae59 ("KVM: x86: More precisely identify NMI from guest when handling PMI")
+Signed-off-by: Yanfei Xu <yanfei.xu@intel.com>
+Message-Id: <20220523140821.1345605-1-yanfei.xu@intel.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/vmx.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -7858,7 +7858,7 @@ static unsigned int vmx_handle_intel_pt_
+ 	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
+ 
+ 	/* '0' on failure so that the !PT case can use a RET0 static call. */
+-	if (!kvm_arch_pmi_in_guest(vcpu))
++	if (!vcpu || !kvm_handling_nmi_from_guest(vcpu))
+ 		return 0;
+ 
+ 	kvm_make_request(KVM_REQ_PMI, vcpu);
diff --git a/queue-5.17/kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch b/queue-5.17/kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch
new file mode 100644
index 00000000000..39736c539ed
--- /dev/null
+++ b/queue-5.17/kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch
@@ -0,0 +1,35 @@
+From 33fbe6befa622c082f7d417896832856814bdde0 Mon Sep 17 00:00:00 2001
+From: Maxim Levitsky <mlevitsk@redhat.com>
+Date: Thu, 12 May 2022 13:14:20 +0300
+Subject: KVM: x86: fix typo in __try_cmpxchg_user causing non-atomicness
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+commit 33fbe6befa622c082f7d417896832856814bdde0 upstream.
+
+This shows up as a TDP MMU leak when running nested.  Non-working cmpxchg on L0
+relies makes L1 install two different shadow pages under same spte, and one of
+them is leaked.
+
+Fixes: 1c2361f667f36 ("KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses")
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20220512101420.306759-1-mlevitsk@redhat.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7207,7 +7207,7 @@ static int emulator_cmpxchg_emulated(str
+ 		goto emul_write;
+ 
+ 	hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa));
+-	if (kvm_is_error_hva(addr))
++	if (kvm_is_error_hva(hva))
+ 		goto emul_write;
+ 
+ 	hva += offset_in_page(gpa);
diff --git a/queue-5.17/kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch b/queue-5.17/kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch
new file mode 100644
index 00000000000..1e3ee897a44
--- /dev/null
+++ b/queue-5.17/kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch
@@ -0,0 +1,89 @@
+From 8d5678a76689acbf91245a3791fe853ab773090f Mon Sep 17 00:00:00 2001
+From: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Date: Tue, 15 Mar 2022 17:35:13 +0800
+Subject: KVM: x86/mmu: Don't rebuild page when the page is synced and no tlb flushing is required
+
+From: Hou Wenlong <houwenlong.hwl@antgroup.com>
+
+commit 8d5678a76689acbf91245a3791fe853ab773090f upstream.
+
+Before Commit c3e5e415bc1e6 ("KVM: X86: Change kvm_sync_page()
+to return true when remote flush is needed"), the return value
+of kvm_sync_page() indicates whether the page is synced, and
+kvm_mmu_get_page() would rebuild page when the sync fails.
+But now, kvm_sync_page() returns false when the page is
+synced and no tlb flushing is required, which leads to
+rebuild page in kvm_mmu_get_page(). So return the return
+value of mmu->sync_page() directly and check it in
+kvm_mmu_get_page(). If the sync fails, the page will be
+zapped and the invalid_list is not empty, so set flush as
+true is accepted in mmu_sync_children().
+
+Cc: stable@vger.kernel.org
+Fixes: c3e5e415bc1e6 ("KVM: X86: Change kvm_sync_page() to return true when remote flush is needed")
+Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Acked-by: Lai Jiangshan <jiangshanlai@gmail.com>
+Message-Id: <0dabeeb789f57b0d793f85d073893063e692032d.1647336064.git.houwenlong.hwl@antgroup.com>
+[mmu_sync_children should not flush if the page is zapped. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -1894,17 +1894,14 @@ static void kvm_mmu_commit_zap_page(stru
+ 	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)])	\
+ 		if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
+ 
+-static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
++static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ 			 struct list_head *invalid_list)
+ {
+ 	int ret = vcpu->arch.mmu->sync_page(vcpu, sp);
+ 
+-	if (ret < 0) {
++	if (ret < 0)
+ 		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
+-		return false;
+-	}
+-
+-	return !!ret;
++	return ret;
+ }
+ 
+ static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm,
+@@ -2033,7 +2030,7 @@ static int mmu_sync_children(struct kvm_
+ 
+ 		for_each_sp(pages, sp, parents, i) {
+ 			kvm_unlink_unsync_page(vcpu->kvm, sp);
+-			flush |= kvm_sync_page(vcpu, sp, &invalid_list);
++			flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0;
+ 			mmu_pages_clear_parents(&parents);
+ 		}
+ 		if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
+@@ -2074,6 +2071,7 @@ static struct kvm_mmu_page *kvm_mmu_get_
+ 	struct hlist_head *sp_list;
+ 	unsigned quadrant;
+ 	struct kvm_mmu_page *sp;
++	int ret;
+ 	int collisions = 0;
+ 	LIST_HEAD(invalid_list);
+ 
+@@ -2126,11 +2124,13 @@ static struct kvm_mmu_page *kvm_mmu_get_
+ 			 * If the sync fails, the page is zapped.  If so, break
+ 			 * in order to rebuild it.
+ 			 */
+-			if (!kvm_sync_page(vcpu, sp, &invalid_list))
++			ret = kvm_sync_page(vcpu, sp, &invalid_list);
++			if (ret < 0)
+ 				break;
+ 
+ 			WARN_ON(!list_empty(&invalid_list));
+-			kvm_flush_remote_tlbs(vcpu->kvm);
++			if (ret > 0)
++				kvm_flush_remote_tlbs(vcpu->kvm);
+ 		}
+ 
+ 		__clear_sp_write_flooding_count(sp);
diff --git a/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch b/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch
new file mode 100644
index 00000000000..3b598209d10
--- /dev/null
+++ b/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch
@@ -0,0 +1,103 @@
+From 1c2361f667f3648855ceae25f1332c18413fdb9f Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 2 Feb 2022 00:49:44 +0000
+Subject: KVM: x86: Use __try_cmpxchg_user() to emulate atomic accesses
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 1c2361f667f3648855ceae25f1332c18413fdb9f upstream.
+
+Use the recently introduce __try_cmpxchg_user() to emulate atomic guest
+accesses via the associated userspace address instead of mapping the
+backing pfn into kernel address space.  Using kvm_vcpu_map() is unsafe as
+it does not coordinate with KVM's mmu_notifier to ensure the hva=>pfn
+translation isn't changed/unmapped in the memremap() path, i.e. when
+there's no struct page and thus no elevated refcount.
+
+Fixes: 42e35f8072c3 ("KVM/X86: Use kvm_vcpu_map in emulator_cmpxchg_emulated")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220202004945.2540433-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c |   35 ++++++++++++++---------------------
+ 1 file changed, 14 insertions(+), 21 deletions(-)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -7168,15 +7168,8 @@ static int emulator_write_emulated(struc
+ 				   exception, &write_emultor);
+ }
+ 
+-#define CMPXCHG_TYPE(t, ptr, old, new) \
+-	(cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
+-
+-#ifdef CONFIG_X86_64
+-#  define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
+-#else
+-#  define CMPXCHG64(ptr, old, new) \
+-	(cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
+-#endif
++#define emulator_try_cmpxchg_user(t, ptr, old, new) \
++	(__try_cmpxchg_user((t __user *)(ptr), (t *)(old), *(t *)(new), efault ## t))
+ 
+ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
+ 				     unsigned long addr,
+@@ -7185,12 +7178,11 @@ static int emulator_cmpxchg_emulated(str
+ 				     unsigned int bytes,
+ 				     struct x86_exception *exception)
+ {
+-	struct kvm_host_map map;
+ 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ 	u64 page_line_mask;
++	unsigned long hva;
+ 	gpa_t gpa;
+-	char *kaddr;
+-	bool exchanged;
++	int r;
+ 
+ 	/* guests cmpxchg8b have to be emulated atomically */
+ 	if (bytes > 8 || (bytes & (bytes - 1)))
+@@ -7214,31 +7206,32 @@ static int emulator_cmpxchg_emulated(str
+ 	if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
+ 		goto emul_write;
+ 
+-	if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
++	hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa));
++	if (kvm_is_error_hva(addr))
+ 		goto emul_write;
+ 
+-	kaddr = map.hva + offset_in_page(gpa);
++	hva += offset_in_page(gpa);
+ 
+ 	switch (bytes) {
+ 	case 1:
+-		exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
++		r = emulator_try_cmpxchg_user(u8, hva, old, new);
+ 		break;
+ 	case 2:
+-		exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
++		r = emulator_try_cmpxchg_user(u16, hva, old, new);
+ 		break;
+ 	case 4:
+-		exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
++		r = emulator_try_cmpxchg_user(u32, hva, old, new);
+ 		break;
+ 	case 8:
+-		exchanged = CMPXCHG64(kaddr, old, new);
++		r = emulator_try_cmpxchg_user(u64, hva, old, new);
+ 		break;
+ 	default:
+ 		BUG();
+ 	}
+ 
+-	kvm_vcpu_unmap(vcpu, &map, true);
+-
+-	if (!exchanged)
++	if (r < 0)
++		goto emul_write;
++	if (r)
+ 		return X86EMUL_CMPXCHG_FAILED;
+ 
+ 	kvm_page_track_write(vcpu, gpa, new, bytes);
diff --git a/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch b/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch
new file mode 100644
index 00000000000..5eaf367fe73
--- /dev/null
+++ b/queue-5.17/kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch
@@ -0,0 +1,84 @@
+From f122dfe4476890d60b8c679128cd2259ec96a24c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 2 Feb 2022 00:49:43 +0000
+Subject: KVM: x86: Use __try_cmpxchg_user() to update guest PTE A/D bits
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f122dfe4476890d60b8c679128cd2259ec96a24c upstream.
+
+Use the recently introduced __try_cmpxchg_user() to update guest PTE A/D
+bits instead of mapping the PTE into kernel address space.  The VM_PFNMAP
+path is broken as it assumes that vm_pgoff is the base pfn of the mapped
+VMA range, which is conceptually wrong as vm_pgoff is the offset relative
+to the file and has nothing to do with the pfn.  The horrific hack worked
+for the original use case (backing guest memory with /dev/mem), but leads
+to accessing "random" pfns for pretty much any other VM_PFNMAP case.
+
+Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs")
+Debugged-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Tested-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220202004945.2540433-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/paging_tmpl.h |   38 +-------------------------------------
+ 1 file changed, 1 insertion(+), 37 deletions(-)
+
+--- a/arch/x86/kvm/mmu/paging_tmpl.h
++++ b/arch/x86/kvm/mmu/paging_tmpl.h
+@@ -144,42 +144,6 @@ static bool FNAME(is_rsvd_bits_set)(stru
+ 	       FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte);
+ }
+ 
+-static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+-			       pt_element_t __user *ptep_user, unsigned index,
+-			       pt_element_t orig_pte, pt_element_t new_pte)
+-{
+-	signed char r;
+-
+-	if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
+-		return -EFAULT;
+-
+-#ifdef CMPXCHG
+-	asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
+-		     "setnz %b[r]\n"
+-		     "2:"
+-		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r])
+-		     : [ptr] "+m" (*ptep_user),
+-		       [old] "+a" (orig_pte),
+-		       [r] "=q" (r)
+-		     : [new] "r" (new_pte)
+-		     : "memory");
+-#else
+-	asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
+-		     "setnz %b[r]\n"
+-		     "2:"
+-		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r])
+-		     : [ptr] "+m" (*ptep_user),
+-		       [old] "+A" (orig_pte),
+-		       [r] "=q" (r)
+-		     : [new_lo] "b" ((u32)new_pte),
+-		       [new_hi] "c" ((u32)(new_pte >> 32))
+-		     : "memory");
+-#endif
+-
+-	user_access_end();
+-	return r;
+-}
+-
+ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
+ 				  struct kvm_mmu_page *sp, u64 *spte,
+ 				  u64 gpte)
+@@ -278,7 +242,7 @@ static int FNAME(update_accessed_dirty_b
+ 		if (unlikely(!walker->pte_writable[level - 1]))
+ 			continue;
+ 
+-		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
++		ret = __try_cmpxchg_user(ptep_user, &orig_pte, pte, fault);
+ 		if (ret)
+ 			return ret;
+ 
diff --git a/queue-5.17/net-ipa-compute-proper-aggregation-limit.patch b/queue-5.17/net-ipa-compute-proper-aggregation-limit.patch
new file mode 100644
index 00000000000..e863f1f02e9
--- /dev/null
+++ b/queue-5.17/net-ipa-compute-proper-aggregation-limit.patch
@@ -0,0 +1,49 @@
+From c5794097b269f15961ed78f7f27b50e51766dec9 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Thu, 21 Apr 2022 13:53:33 -0500
+Subject: net: ipa: compute proper aggregation limit
+
+From: Alex Elder <elder@linaro.org>
+
+commit c5794097b269f15961ed78f7f27b50e51766dec9 upstream.
+
+The aggregation byte limit for an endpoint is currently computed
+based on the endpoint's receive buffer size.
+
+However, some bytes at the front of each receive buffer are reserved
+on the assumption that--as with SKBs--it might be useful to insert
+data (such as headers) before what lands in the buffer.
+
+The aggregation byte limit currently doesn't take into account that
+reserved space, and as a result, aggregation could require space
+past that which is available in the buffer.
+
+Fix this by reducing the size used to compute the aggregation byte
+limit by the NET_SKB_PAD offset reserved for each receive buffer.
+
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -723,13 +723,15 @@ static void ipa_endpoint_init_aggr(struc
+ 
+ 	if (endpoint->data->aggregation) {
+ 		if (!endpoint->toward_ipa) {
++			u32 buffer_size;
+ 			bool close_eof;
+ 			u32 limit;
+ 
+ 			val |= u32_encode_bits(IPA_ENABLE_AGGR, AGGR_EN_FMASK);
+ 			val |= u32_encode_bits(IPA_GENERIC, AGGR_TYPE_FMASK);
+ 
+-			limit = ipa_aggr_size_kb(IPA_RX_BUFFER_SIZE);
++			buffer_size = IPA_RX_BUFFER_SIZE - NET_SKB_PAD;
++			limit = ipa_aggr_size_kb(buffer_size);
+ 			val |= aggr_byte_limit_encoded(version, limit);
+ 
+ 			limit = IPA_AGGR_TIME_LIMIT;
diff --git a/queue-5.17/netfilter-conntrack-re-fetch-conntrack-after-insertion.patch b/queue-5.17/netfilter-conntrack-re-fetch-conntrack-after-insertion.patch
new file mode 100644
index 00000000000..b634dd75240
--- /dev/null
+++ b/queue-5.17/netfilter-conntrack-re-fetch-conntrack-after-insertion.patch
@@ -0,0 +1,43 @@
+From 56b14ecec97f39118bf85c9ac2438c5a949509ed Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 20 May 2022 00:02:04 +0200
+Subject: netfilter: conntrack: re-fetch conntrack after insertion
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 56b14ecec97f39118bf85c9ac2438c5a949509ed upstream.
+
+In case the conntrack is clashing, insertion can free skb->_nfct and
+set skb->_nfct to the already-confirmed entry.
+
+This wasn't found before because the conntrack entry and the extension
+space used to free'd after an rcu grace period, plus the race needs
+events enabled to trigger.
+
+Reported-by: <syzbot+793a590957d9c1b96620@syzkaller.appspotmail.com>
+Fixes: 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race")
+Fixes: 2ad9d7747c10 ("netfilter: conntrack: free extension area immediately")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_conntrack_core.h |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/include/net/netfilter/nf_conntrack_core.h
++++ b/include/net/netfilter/nf_conntrack_core.h
+@@ -58,8 +58,13 @@ static inline int nf_conntrack_confirm(s
+ 	int ret = NF_ACCEPT;
+ 
+ 	if (ct) {
+-		if (!nf_ct_is_confirmed(ct))
++		if (!nf_ct_is_confirmed(ct)) {
+ 			ret = __nf_conntrack_confirm(skb);
++
++			if (ret == NF_ACCEPT)
++				ct = (struct nf_conn *)skb_nfct(skb);
++		}
++
+ 		if (likely(ret == NF_ACCEPT))
+ 			nf_ct_deliver_cached_events(ct);
+ 	}
diff --git a/queue-5.17/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch b/queue-5.17/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
new file mode 100644
index 00000000000..07dc611b5f5
--- /dev/null
+++ b/queue-5.17/netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
@@ -0,0 +1,137 @@
+From f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 30 May 2022 18:24:06 +0200
+Subject: netfilter: nf_tables: double hook unregistration in netns path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 upstream.
+
+__nft_release_hooks() is called from pre_netns exit path which
+unregisters the hooks, then the NETDEV_UNREGISTER event is triggered
+which unregisters the hooks again.
+
+[  565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270
+[...]
+[  565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G            E     5.18.0-rc7+ #27
+[  565.253682] Workqueue: netns cleanup_net
+[  565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270
+[...]
+[  565.297120] Call Trace:
+[  565.300900]  <TASK>
+[  565.304683]  nf_tables_flowtable_event+0x16a/0x220 [nf_tables]
+[  565.308518]  raw_notifier_call_chain+0x63/0x80
+[  565.312386]  unregister_netdevice_many+0x54f/0xb50
+
+Unregister and destroy netdev hook from netns pre_exit via kfree_rcu
+so the NETDEV_UNREGISTER path see unregistered hooks.
+
+Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |   54 +++++++++++++++++++++++++++++++-----------
+ 1 file changed, 41 insertions(+), 13 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -222,12 +222,18 @@ err_register:
+ }
+ 
+ static void nft_netdev_unregister_hooks(struct net *net,
+-					struct list_head *hook_list)
++					struct list_head *hook_list,
++					bool release_netdev)
+ {
+-	struct nft_hook *hook;
++	struct nft_hook *hook, *next;
+ 
+-	list_for_each_entry(hook, hook_list, list)
++	list_for_each_entry_safe(hook, next, hook_list, list) {
+ 		nf_unregister_net_hook(net, &hook->ops);
++		if (release_netdev) {
++			list_del(&hook->list);
++			kfree_rcu(hook, rcu);
++		}
++	}
+ }
+ 
+ static int nf_tables_register_hook(struct net *net,
+@@ -253,9 +259,10 @@ static int nf_tables_register_hook(struc
+ 	return nf_register_net_hook(net, &basechain->ops);
+ }
+ 
+-static void nf_tables_unregister_hook(struct net *net,
+-				      const struct nft_table *table,
+-				      struct nft_chain *chain)
++static void __nf_tables_unregister_hook(struct net *net,
++					const struct nft_table *table,
++					struct nft_chain *chain,
++					bool release_netdev)
+ {
+ 	struct nft_base_chain *basechain;
+ 	const struct nf_hook_ops *ops;
+@@ -270,11 +277,19 @@ static void nf_tables_unregister_hook(st
+ 		return basechain->type->ops_unregister(net, ops);
+ 
+ 	if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+-		nft_netdev_unregister_hooks(net, &basechain->hook_list);
++		nft_netdev_unregister_hooks(net, &basechain->hook_list,
++					    release_netdev);
+ 	else
+ 		nf_unregister_net_hook(net, &basechain->ops);
+ }
+ 
++static void nf_tables_unregister_hook(struct net *net,
++				      const struct nft_table *table,
++				      struct nft_chain *chain)
++{
++	return __nf_tables_unregister_hook(net, table, chain, false);
++}
++
+ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
+ {
+ 	struct nftables_pernet *nft_net = nft_pernet(net);
+@@ -7222,13 +7237,25 @@ static void nft_unregister_flowtable_hoo
+ 				    FLOW_BLOCK_UNBIND);
+ }
+ 
+-static void nft_unregister_flowtable_net_hooks(struct net *net,
+-					       struct list_head *hook_list)
++static void __nft_unregister_flowtable_net_hooks(struct net *net,
++						 struct list_head *hook_list,
++					         bool release_netdev)
+ {
+-	struct nft_hook *hook;
++	struct nft_hook *hook, *next;
+ 
+-	list_for_each_entry(hook, hook_list, list)
++	list_for_each_entry_safe(hook, next, hook_list, list) {
+ 		nf_unregister_net_hook(net, &hook->ops);
++		if (release_netdev) {
++			list_del(&hook->list);
++			kfree_rcu(hook);
++		}
++	}
++}
++
++static void nft_unregister_flowtable_net_hooks(struct net *net,
++					       struct list_head *hook_list)
++{
++	__nft_unregister_flowtable_net_hooks(net, hook_list, false);
+ }
+ 
+ static int nft_register_flowtable_net_hooks(struct net *net,
+@@ -9672,9 +9699,10 @@ static void __nft_release_hook(struct ne
+ 	struct nft_chain *chain;
+ 
+ 	list_for_each_entry(chain, &table->chains, list)
+-		nf_tables_unregister_hook(net, table, chain);
++		__nf_tables_unregister_hook(net, table, chain, true);
+ 	list_for_each_entry(flowtable, &table->flowtables, list)
+-		nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list);
++		__nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list,
++						     true);
+ }
+ 
+ static void __nft_release_hooks(struct net *net)
diff --git a/queue-5.17/netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch b/queue-5.17/netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch
new file mode 100644
index 00000000000..3a76450d359
--- /dev/null
+++ b/queue-5.17/netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch
@@ -0,0 +1,32 @@
+From 3923b1e4406680d57da7e873da77b1683035d83f Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 30 May 2022 18:24:05 +0200
+Subject: netfilter: nf_tables: hold mutex on netns pre_exit path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit 3923b1e4406680d57da7e873da77b1683035d83f upstream.
+
+clean_net() runs in workqueue while walking over the lists, grab mutex.
+
+Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -9813,7 +9813,11 @@ static int __net_init nf_tables_init_net
+ 
+ static void __net_exit nf_tables_pre_exit_net(struct net *net)
+ {
++	struct nftables_pernet *nft_net = nft_pernet(net);
++
++	mutex_lock(&nft_net->commit_mutex);
+ 	__nft_release_hooks(net);
++	mutex_unlock(&nft_net->commit_mutex);
+ }
+ 
+ static void __net_exit nf_tables_exit_net(struct net *net)
diff --git a/queue-5.17/netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch b/queue-5.17/netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch
new file mode 100644
index 00000000000..922038f8598
--- /dev/null
+++ b/queue-5.17/netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch
@@ -0,0 +1,74 @@
+From fecf31ee395b0295f2d7260aa29946b7605f7c85 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Fri, 27 May 2022 09:56:18 +0200
+Subject: netfilter: nf_tables: sanitize nft_set_desc_concat_parse()
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+commit fecf31ee395b0295f2d7260aa29946b7605f7c85 upstream.
+
+Add several sanity checks for nft_set_desc_concat_parse():
+
+- validate desc->field_count not larger than desc->field_len array.
+- field length cannot be larger than desc->field_len (ie. U8_MAX)
+- total length of the concatenation cannot be larger than register array.
+
+Joint work with Florian Westphal.
+
+Fixes: f3a2181e16f1 ("netfilter: nf_tables: Support for sets with multiple ranged fields")
+Reported-by: <zhangziming.zzm@antgroup.com>
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_tables_api.c |   17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4167,6 +4167,9 @@ static int nft_set_desc_concat_parse(con
+ 	u32 len;
+ 	int err;
+ 
++	if (desc->field_count >= ARRAY_SIZE(desc->field_len))
++		return -E2BIG;
++
+ 	err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr,
+ 					  nft_concat_policy, NULL);
+ 	if (err < 0)
+@@ -4176,9 +4179,8 @@ static int nft_set_desc_concat_parse(con
+ 		return -EINVAL;
+ 
+ 	len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN]));
+-
+-	if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT)
+-		return -E2BIG;
++	if (!len || len > U8_MAX)
++		return -EINVAL;
+ 
+ 	desc->field_len[desc->field_count++] = len;
+ 
+@@ -4189,7 +4191,8 @@ static int nft_set_desc_concat(struct nf
+ 			       const struct nlattr *nla)
+ {
+ 	struct nlattr *attr;
+-	int rem, err;
++	u32 num_regs = 0;
++	int rem, err, i;
+ 
+ 	nla_for_each_nested(attr, nla, rem) {
+ 		if (nla_type(attr) != NFTA_LIST_ELEM)
+@@ -4200,6 +4203,12 @@ static int nft_set_desc_concat(struct nf
+ 			return err;
+ 	}
+ 
++	for (i = 0; i < desc->field_count; i++)
++		num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
++
++	if (num_regs > NFT_REG32_COUNT)
++		return -E2BIG;
++
+ 	return 0;
+ }
+ 
diff --git a/queue-5.17/netfilter-nft_limit-clone-packet-limits-cost-value.patch b/queue-5.17/netfilter-nft_limit-clone-packet-limits-cost-value.patch
new file mode 100644
index 00000000000..9905e5c4eb4
--- /dev/null
+++ b/queue-5.17/netfilter-nft_limit-clone-packet-limits-cost-value.patch
@@ -0,0 +1,31 @@
+From 558254b0b602b8605d7246a10cfeb584b1fcabfc Mon Sep 17 00:00:00 2001
+From: Phil Sutter <phil@nwl.cc>
+Date: Tue, 24 May 2022 14:50:01 +0200
+Subject: netfilter: nft_limit: Clone packet limits' cost value
+
+From: Phil Sutter <phil@nwl.cc>
+
+commit 558254b0b602b8605d7246a10cfeb584b1fcabfc upstream.
+
+When cloning a packet-based limit expression, copy the cost value as
+well. Otherwise the new limit is not functional anymore.
+
+Fixes: 3b9e2ea6c11bf ("netfilter: nft_limit: move stateful fields out of expression data")
+Signed-off-by: Phil Sutter <phil@nwl.cc>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nft_limit.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/netfilter/nft_limit.c
++++ b/net/netfilter/nft_limit.c
+@@ -213,6 +213,8 @@ static int nft_limit_pkts_clone(struct n
+ 	struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
+ 	struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
+ 
++	priv_dst->cost = priv_src->cost;
++
+ 	return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+ }
+ 
diff --git a/queue-5.17/series b/queue-5.17/series
index 075c0e0cbd5..149793ac5d7 100644
--- a/queue-5.17/series
+++ b/queue-5.17/series
@@ -14,3 +14,26 @@ i2c-ismt-prevent-memory-corruption-in-ismt_access.patch
 assoc_array-fix-bug_on-during-garbage-collect.patch
 pipe-make-poll_usage-boolean-and-annotate-its-access.patch
 pipe-fix-missing-lock-in-pipe_resize_ring.patch
+net-ipa-compute-proper-aggregation-limit.patch
+drm-i915-fix-wstringop-overflow-warning-in-call-to-intel_read_wm_latency.patch
+exfat-check-if-cluster-num-is-valid.patch
+exfat-fix-referencing-wrong-parent-directory-information-after-renaming.patch
+netfilter-nft_limit-clone-packet-limits-cost-value.patch
+netfilter-nf_tables-sanitize-nft_set_desc_concat_parse.patch
+netfilter-nf_tables-hold-mutex-on-netns-pre_exit-path.patch
+netfilter-nf_tables-double-hook-unregistration-in-netns-path.patch
+netfilter-conntrack-re-fetch-conntrack-after-insertion.patch
+kvm-ppc-book3s-hv-fix-incorrect-null-check-on-list-iterator.patch
+x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch
+x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch
+x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch
+x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch
+kvm-x86-use-__try_cmpxchg_user-to-update-guest-pte-a-d-bits.patch
+kvm-x86-use-__try_cmpxchg_user-to-emulate-atomic-accesses.patch
+kvm-x86-fix-typo-in-__try_cmpxchg_user-causing-non-atomicness.patch
+kvm-x86-avoid-calling-x86-emulator-without-a-decoded-instruction.patch
+kvm-x86-avoid-loading-a-vcpu-after-.vm_destroy-was-called.patch
+kvm-x86-fix-the-intel_pt-pmi-handling-wrongly-considered-from-guest.patch
+kvm-x86-drop-warns-that-assert-a-triple-fault-never-escapes-from-l2.patch
+kvm-x86-mmu-don-t-rebuild-page-when-the-page-is-synced-and-no-tlb-flushing-is-required.patch
+kvm-svm-use-kzalloc-for-sev-ioctl-interfaces-to-prevent-kernel-data-leak.patch
diff --git a/queue-5.17/x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch b/queue-5.17/x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch
new file mode 100644
index 00000000000..f71d4648710
--- /dev/null
+++ b/queue-5.17/x86-fpu-kvm-set-the-base-guest-fpu-uabi-size-to-sizeof-struct-kvm_xsave.patch
@@ -0,0 +1,113 @@
+From d187ba5312307d51818beafaad87d28a7d939adf Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 4 May 2022 00:12:19 +0000
+Subject: x86/fpu: KVM: Set the base guest FPU uABI size to sizeof(struct kvm_xsave)
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d187ba5312307d51818beafaad87d28a7d939adf upstream.
+
+Set the starting uABI size of KVM's guest FPU to 'struct kvm_xsave',
+i.e. to KVM's historical uABI size.  When saving FPU state for usersapce,
+KVM (well, now the FPU) sets the FP+SSE bits in the XSAVE header even if
+the host doesn't support XSAVE.  Setting the XSAVE header allows the VM
+to be migrated to a host that does support XSAVE without the new host
+having to handle FPU state that may or may not be compatible with XSAVE.
+
+Setting the uABI size to the host's default size results in out-of-bounds
+writes (setting the FP+SSE bits) and data corruption (that is thankfully
+caught by KASAN) when running on hosts without XSAVE, e.g. on Core2 CPUs.
+
+WARN if the default size is larger than KVM's historical uABI size; all
+features that can push the FPU size beyond the historical size must be
+opt-in.
+
+  ==================================================================
+  BUG: KASAN: slab-out-of-bounds in fpu_copy_uabi_to_guest_fpstate+0x86/0x130
+  Read of size 8 at addr ffff888011e33a00 by task qemu-build/681
+  CPU: 1 PID: 681 Comm: qemu-build Not tainted 5.18.0-rc5-KASAN-amd64 #1
+  Hardware name:  /DG35EC, BIOS ECG3510M.86A.0118.2010.0113.1426 01/13/2010
+  Call Trace:
+   <TASK>
+   dump_stack_lvl+0x34/0x45
+   print_report.cold+0x45/0x575
+   kasan_report+0x9b/0xd0
+   fpu_copy_uabi_to_guest_fpstate+0x86/0x130
+   kvm_arch_vcpu_ioctl+0x72a/0x1c50 [kvm]
+   kvm_vcpu_ioctl+0x47f/0x7b0 [kvm]
+   __x64_sys_ioctl+0x5de/0xc90
+   do_syscall_64+0x31/0x50
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+   </TASK>
+  Allocated by task 0:
+  (stack is not available)
+  The buggy address belongs to the object at ffff888011e33800
+   which belongs to the cache kmalloc-512 of size 512
+  The buggy address is located 0 bytes to the right of
+   512-byte region [ffff888011e33800, ffff888011e33a00)
+  The buggy address belongs to the physical page:
+  page:0000000089cd4adb refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11e30
+  head:0000000089cd4adb order:2 compound_mapcount:0 compound_pincount:0
+  flags: 0x4000000000010200(slab|head|zone=1)
+  raw: 4000000000010200 dead000000000100 dead000000000122 ffff888001041c80
+  raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
+  page dumped because: kasan: bad access detected
+  Memory state around the buggy address:
+   ffff888011e33900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+   ffff888011e33980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+  >ffff888011e33a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+                     ^
+   ffff888011e33a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+   ffff888011e33b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+  ==================================================================
+  Disabling lock debugging due to kernel taint
+
+Fixes: be50b2065dfa ("kvm: x86: Add support for getting/setting expanded xstate buffer")
+Fixes: c60427dd50ba ("x86/fpu: Add uabi_size to guest_fpu")
+Reported-by: Zdenek Kaspar <zkaspar82@gmail.com>
+Cc: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: kvm@vger.kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Tested-by: Zdenek Kaspar <zkaspar82@gmail.com>
+Message-Id: <20220504001219.983513-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/fpu/core.c |   17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/fpu/core.c
++++ b/arch/x86/kernel/fpu/core.c
+@@ -14,6 +14,8 @@
+ #include <asm/traps.h>
+ #include <asm/irq_regs.h>
+ 
++#include <uapi/asm/kvm.h>
++
+ #include <linux/hardirq.h>
+ #include <linux/pkeys.h>
+ #include <linux/vmalloc.h>
+@@ -232,7 +234,20 @@ bool fpu_alloc_guest_fpstate(struct fpu_
+ 	gfpu->fpstate		= fpstate;
+ 	gfpu->xfeatures		= fpu_user_cfg.default_features;
+ 	gfpu->perm		= fpu_user_cfg.default_features;
+-	gfpu->uabi_size		= fpu_user_cfg.default_size;
++
++	/*
++	 * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
++	 * to userspace, even when XSAVE is unsupported, so that restoring FPU
++	 * state on a different CPU that does support XSAVE can cleanly load
++	 * the incoming state using its natural XSAVE.  In other words, KVM's
++	 * uABI size may be larger than this host's default size.  Conversely,
++	 * the default size should never be larger than KVM's base uABI size;
++	 * all features that can expand the uABI size must be opt-in.
++	 */
++	gfpu->uabi_size		= sizeof(struct kvm_xsave);
++	if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size))
++		gfpu->uabi_size = fpu_user_cfg.default_size;
++
+ 	fpu_init_guest_permissions(gfpu);
+ 
+ 	return true;
diff --git a/queue-5.17/x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch b/queue-5.17/x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch
new file mode 100644
index 00000000000..44d39c4ebd1
--- /dev/null
+++ b/queue-5.17/x86-kvm-alloc-dummy-async-pf-token-outside-of-raw-spinlock.patch
@@ -0,0 +1,91 @@
+From 0547758a6de3cc71a0cfdd031a3621a30db6a68b Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 19 May 2022 07:57:11 -0700
+Subject: x86/kvm: Alloc dummy async #PF token outside of raw spinlock
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 0547758a6de3cc71a0cfdd031a3621a30db6a68b upstream.
+
+Drop the raw spinlock in kvm_async_pf_task_wake() before allocating the
+the dummy async #PF token, the allocator is preemptible on PREEMPT_RT
+kernels and must not be called from truly atomic contexts.
+
+Opportunistically document why it's ok to loop on allocation failure,
+i.e. why the function won't get stuck in an infinite loop.
+
+Reported-by: Yajun Deng <yajun.deng@linux.dev>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kvm.c |   41 +++++++++++++++++++++++++++--------------
+ 1 file changed, 27 insertions(+), 14 deletions(-)
+
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -191,7 +191,7 @@ void kvm_async_pf_task_wake(u32 token)
+ {
+ 	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
+ 	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
+-	struct kvm_task_sleep_node *n;
++	struct kvm_task_sleep_node *n, *dummy = NULL;
+ 
+ 	if (token == ~0) {
+ 		apf_task_wake_all();
+@@ -203,28 +203,41 @@ again:
+ 	n = _find_apf_task(b, token);
+ 	if (!n) {
+ 		/*
+-		 * async PF was not yet handled.
+-		 * Add dummy entry for the token.
++		 * Async #PF not yet handled, add a dummy entry for the token.
++		 * Allocating the token must be down outside of the raw lock
++		 * as the allocator is preemptible on PREEMPT_RT kernels.
+ 		 */
+-		n = kzalloc(sizeof(*n), GFP_ATOMIC);
+-		if (!n) {
++		if (!dummy) {
++			raw_spin_unlock(&b->lock);
++			dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
++
+ 			/*
+-			 * Allocation failed! Busy wait while other cpu
+-			 * handles async PF.
++			 * Continue looping on allocation failure, eventually
++			 * the async #PF will be handled and allocating a new
++			 * node will be unnecessary.
++			 */
++			if (!dummy)
++				cpu_relax();
++
++			/*
++			 * Recheck for async #PF completion before enqueueing
++			 * the dummy token to avoid duplicate list entries.
+ 			 */
+-			raw_spin_unlock(&b->lock);
+-			cpu_relax();
+ 			goto again;
+ 		}
+-		n->token = token;
+-		n->cpu = smp_processor_id();
+-		init_swait_queue_head(&n->wq);
+-		hlist_add_head(&n->link, &b->list);
++		dummy->token = token;
++		dummy->cpu = smp_processor_id();
++		init_swait_queue_head(&dummy->wq);
++		hlist_add_head(&dummy->link, &b->list);
++		dummy = NULL;
+ 	} else {
+ 		apf_task_wake_one(n);
+ 	}
+ 	raw_spin_unlock(&b->lock);
+-	return;
++
++	/* A dummy token might be allocated and ultimately not used.  */
++	if (dummy)
++		kfree(dummy);
+ }
+ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
+ 
diff --git a/queue-5.17/x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch b/queue-5.17/x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch
new file mode 100644
index 00000000000..8ace75d0694
--- /dev/null
+++ b/queue-5.17/x86-kvm-use-correct-gfp-flags-for-preemption-disabled.patch
@@ -0,0 +1,81 @@
+From baec4f5a018fe2d708fc1022330dba04b38b5fe3 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 24 May 2022 09:43:31 -0400
+Subject: x86, kvm: use correct GFP flags for preemption disabled
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit baec4f5a018fe2d708fc1022330dba04b38b5fe3 upstream.
+
+Commit ddd7ed842627 ("x86/kvm: Alloc dummy async #PF token outside of
+raw spinlock") leads to the following Smatch static checker warning:
+
+	arch/x86/kernel/kvm.c:212 kvm_async_pf_task_wake()
+	warn: sleeping in atomic context
+
+arch/x86/kernel/kvm.c
+    202         raw_spin_lock(&b->lock);
+    203         n = _find_apf_task(b, token);
+    204         if (!n) {
+    205                 /*
+    206                  * Async #PF not yet handled, add a dummy entry for the token.
+    207                  * Allocating the token must be down outside of the raw lock
+    208                  * as the allocator is preemptible on PREEMPT_RT kernels.
+    209                  */
+    210                 if (!dummy) {
+    211                         raw_spin_unlock(&b->lock);
+--> 212                         dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
+                                                                ^^^^^^^^^^
+Smatch thinks the caller has preempt disabled.  The `smdb.py preempt
+kvm_async_pf_task_wake` output call tree is:
+
+sysvec_kvm_asyncpf_interrupt() <- disables preempt
+-> __sysvec_kvm_asyncpf_interrupt()
+   -> kvm_async_pf_task_wake()
+
+The caller is this:
+
+arch/x86/kernel/kvm.c
+   290        DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
+   291        {
+   292                struct pt_regs *old_regs = set_irq_regs(regs);
+   293                u32 token;
+   294
+   295                ack_APIC_irq();
+   296
+   297                inc_irq_stat(irq_hv_callback_count);
+   298
+   299                if (__this_cpu_read(apf_reason.enabled)) {
+   300                        token = __this_cpu_read(apf_reason.token);
+   301                        kvm_async_pf_task_wake(token);
+   302                        __this_cpu_write(apf_reason.token, 0);
+   303                        wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
+   304                }
+   305
+   306                set_irq_regs(old_regs);
+   307        }
+
+The DEFINE_IDTENTRY_SYSVEC() is a wrapper that calls this function
+from the call_on_irqstack_cond().  It's inside the call_on_irqstack_cond()
+where preempt is disabled (unless it's already disabled).  The
+irq_enter/exit_rcu() functions disable/enable preempt.
+
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/kvm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kvm.c
++++ b/arch/x86/kernel/kvm.c
+@@ -209,7 +209,7 @@ again:
+ 		 */
+ 		if (!dummy) {
+ 			raw_spin_unlock(&b->lock);
+-			dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
++			dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC);
+ 
+ 			/*
+ 			 * Continue looping on allocation failure, eventually
diff --git a/queue-5.17/x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch b/queue-5.17/x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch
new file mode 100644
index 00000000000..b0bb6f1e256
--- /dev/null
+++ b/queue-5.17/x86-uaccess-implement-macros-for-cmpxchg-on-user-addresses.patch
@@ -0,0 +1,191 @@
+From 989b5db215a2f22f89d730b607b071d964780f10 Mon Sep 17 00:00:00 2001
+From: Peter Zijlstra <peterz@infradead.org>
+Date: Wed, 2 Feb 2022 00:49:42 +0000
+Subject: x86/uaccess: Implement macros for CMPXCHG on user addresses
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+commit 989b5db215a2f22f89d730b607b071d964780f10 upstream.
+
+Add support for CMPXCHG loops on userspace addresses.  Provide both an
+"unsafe" version for tight loops that do their own uaccess begin/end, as
+well as a "safe" version for use cases where the CMPXCHG is not buried in
+a loop, e.g. KVM will resume the guest instead of looping when emulation
+of a guest atomic accesses fails the CMPXCHG.
+
+Provide 8-byte versions for 32-bit kernels so that KVM can do CMPXCHG on
+guest PAE PTEs, which are accessed via userspace addresses.
+
+Guard the asm_volatile_goto() variation with CC_HAS_ASM_GOTO_TIED_OUTPUT,
+the "+m" constraint fails on some compilers that otherwise support
+CC_HAS_ASM_GOTO_OUTPUT.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220202004945.2540433-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/uaccess.h |  142 +++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 142 insertions(+)
+
+--- a/arch/x86/include/asm/uaccess.h
++++ b/arch/x86/include/asm/uaccess.h
+@@ -409,6 +409,103 @@ do {									\
+ 
+ #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+ 
++#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label)	({ \
++	bool success;							\
++	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
++	__typeof__(*(_ptr)) __old = *_old;				\
++	__typeof__(*(_ptr)) __new = (_new);				\
++	asm_volatile_goto("\n"						\
++		     "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
++		     _ASM_EXTABLE_UA(1b, %l[label])			\
++		     : CC_OUT(z) (success),				\
++		       [ptr] "+m" (*_ptr),				\
++		       [old] "+a" (__old)				\
++		     : [new] ltype (__new)				\
++		     : "memory"						\
++		     : label);						\
++	if (unlikely(!success))						\
++		*_old = __old;						\
++	likely(success);					})
++
++#ifdef CONFIG_X86_32
++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label)	({	\
++	bool success;							\
++	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
++	__typeof__(*(_ptr)) __old = *_old;				\
++	__typeof__(*(_ptr)) __new = (_new);				\
++	asm_volatile_goto("\n"						\
++		     "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n"		\
++		     _ASM_EXTABLE_UA(1b, %l[label])			\
++		     : CC_OUT(z) (success),				\
++		       "+A" (__old),					\
++		       [ptr] "+m" (*_ptr)				\
++		     : "b" ((u32)__new),				\
++		       "c" ((u32)((u64)__new >> 32))			\
++		     : "memory"						\
++		     : label);						\
++	if (unlikely(!success))						\
++		*_old = __old;						\
++	likely(success);					})
++#endif // CONFIG_X86_32
++#else  // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label)	({ \
++	int __err = 0;							\
++	bool success;							\
++	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
++	__typeof__(*(_ptr)) __old = *_old;				\
++	__typeof__(*(_ptr)) __new = (_new);				\
++	asm volatile("\n"						\
++		     "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
++		     CC_SET(z)						\
++		     "2:\n"						\
++		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG,	\
++					   %[errout])			\
++		     : CC_OUT(z) (success),				\
++		       [errout] "+r" (__err),				\
++		       [ptr] "+m" (*_ptr),				\
++		       [old] "+a" (__old)				\
++		     : [new] ltype (__new)				\
++		     : "memory", "cc");					\
++	if (unlikely(__err))						\
++		goto label;						\
++	if (unlikely(!success))						\
++		*_old = __old;						\
++	likely(success);					})
++
++#ifdef CONFIG_X86_32
++/*
++ * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error.
++ * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are
++ * hardcoded by CMPXCHG8B, leaving only ESI and EDI.  If the compiler uses
++ * both ESI and EDI for the memory operand, compilation will fail if the error
++ * is an input+output as there will be no register available for input.
++ */
++#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label)	({	\
++	int __result;							\
++	__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold);		\
++	__typeof__(*(_ptr)) __old = *_old;				\
++	__typeof__(*(_ptr)) __new = (_new);				\
++	asm volatile("\n"						\
++		     "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n"		\
++		     "mov $0, %%ecx\n\t"				\
++		     "setz %%cl\n"					\
++		     "2:\n"						\
++		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \
++		     : [result]"=c" (__result),				\
++		       "+A" (__old),					\
++		       [ptr] "+m" (*_ptr)				\
++		     : "b" ((u32)__new),				\
++		       "c" ((u32)((u64)__new >> 32))			\
++		     : "memory", "cc");					\
++	if (unlikely(__result < 0))					\
++		goto label;						\
++	if (unlikely(!__result))					\
++		*_old = __old;						\
++	likely(__result);					})
++#endif // CONFIG_X86_32
++#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
++
+ /* FIXME: this hack is definitely wrong -AK */
+ struct __large_struct { unsigned long buf[100]; };
+ #define __m(x) (*(struct __large_struct __user *)(x))
+@@ -501,6 +598,51 @@ do {										\
+ } while (0)
+ #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+ 
++extern void __try_cmpxchg_user_wrong_size(void);
++
++#ifndef CONFIG_X86_32
++#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label)		\
++	__try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label)
++#endif
++
++/*
++ * Force the pointer to u<size> to match the size expected by the asm helper.
++ * clang/LLVM compiles all cases and only discards the unused paths after
++ * processing errors, which breaks i386 if the pointer is an 8-byte value.
++ */
++#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({			\
++	bool __ret;								\
++	__chk_user_ptr(_ptr);							\
++	switch (sizeof(*(_ptr))) {						\
++	case 1:	__ret = __try_cmpxchg_user_asm("b", "q",			\
++					       (__force u8 *)(_ptr), (_oldp),	\
++					       (_nval), _label);		\
++		break;								\
++	case 2:	__ret = __try_cmpxchg_user_asm("w", "r",			\
++					       (__force u16 *)(_ptr), (_oldp),	\
++					       (_nval), _label);		\
++		break;								\
++	case 4:	__ret = __try_cmpxchg_user_asm("l", "r",			\
++					       (__force u32 *)(_ptr), (_oldp),	\
++					       (_nval), _label);		\
++		break;								\
++	case 8:	__ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\
++						 (_nval), _label);		\
++		break;								\
++	default: __try_cmpxchg_user_wrong_size();				\
++	}									\
++	__ret;						})
++
++/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */
++#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label)	({		\
++	int __ret = -EFAULT;						\
++	__uaccess_begin_nospec();					\
++	__ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label);	\
++_label:									\
++	__uaccess_end();						\
++	__ret;								\
++							})
++
+ /*
+  * We want the unsafe accessors to always be inlined and use
+  * the error labels - thus the macro games.
-- 
2.47.3