From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 12 Aug 2022 14:41:43 +0000 (+0200)
Subject: 5.18-stable patches
X-Git-Tag: v5.15.61~200
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=17428860da4a3ad5d7a8f41b8390dcfac8b3e429;p=thirdparty%2Fkernel%2Fstable-queue.git

5.18-stable patches

added patches:
	kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch
	kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch
	kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch
	kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch
	kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch
	kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch
	kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
	kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch
	kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
	kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
	kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
	kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch
---

diff --git a/queue-5.18/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch b/queue-5.18/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch
new file mode 100644
index 00000000000..719dd26dc1d
--- /dev/null
+++ b/queue-5.18/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch
@@ -0,0 +1,38 @@
+From 3ba2c95ea180740b16281fa43a3ee5f47279c0ed Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 29 Apr 2022 21:00:22 +0000
+Subject: KVM: Do not incorporate page offset into gfn=>pfn cache user address
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 3ba2c95ea180740b16281fa43a3ee5f47279c0ed upstream.
+
+Don't adjust the userspace address in the gfn=>pfn cache by the page
+offset from the gpa.  KVM should never use the user address directly, and
+all KVM operations that translate a user address to something else
+require the user address to be page aligned.  Ignoring the offset will
+allow the cache to reuse a gfn=>hva translation in the unlikely event
+that the page offset of the gpa changes, but the gfn does not.  And more
+importantly, not having to (un)adjust the user address will simplify a
+future bug fix.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/pfncache.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -274,8 +274,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 			ret = -EFAULT;
+ 			goto out;
+ 		}
+-
+-		gpc->uhva += page_offset;
+ 	}
+ 
+ 	/*
diff --git a/queue-5.18/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch b/queue-5.18/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch
new file mode 100644
index 00000000000..993e67bfae7
--- /dev/null
+++ b/queue-5.18/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch
@@ -0,0 +1,86 @@
+From 345b0fd6fe5f66dfe841bad0b39dd11a5672df68 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 29 Apr 2022 21:00:20 +0000
+Subject: KVM: Drop unused @gpa param from gfn=>pfn cache's __release_gpc() helper
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 345b0fd6fe5f66dfe841bad0b39dd11a5672df68 upstream.
+
+Drop the @pga param from __release_gpc() and rename the helper to make it
+more obvious that the cache itself is not being released.  The helper
+will be reused by a future commit to release a pfn+khva combination that
+is _never_ associated with the cache, at which point the current name
+would go from slightly misleading to blatantly wrong.
+
+No functional change intended.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/pfncache.c |   10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -95,7 +95,7 @@ bool kvm_gfn_to_pfn_cache_check(struct k
+ }
+ EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
+ 
+-static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa)
++static void gpc_release_pfn_and_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
+ {
+ 	/* Unmap the old page if it was mapped before, and release it */
+ 	if (!is_error_noslot_pfn(pfn)) {
+@@ -146,7 +146,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 	unsigned long page_offset = gpa & ~PAGE_MASK;
+ 	kvm_pfn_t old_pfn, new_pfn;
+ 	unsigned long old_uhva;
+-	gpa_t old_gpa;
+ 	void *old_khva;
+ 	bool old_valid;
+ 	int ret = 0;
+@@ -160,7 +159,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 
+ 	write_lock_irq(&gpc->lock);
+ 
+-	old_gpa = gpc->gpa;
+ 	old_pfn = gpc->pfn;
+ 	old_khva = gpc->khva - offset_in_page(gpc->khva);
+ 	old_uhva = gpc->uhva;
+@@ -244,7 +242,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+  out:
+ 	write_unlock_irq(&gpc->lock);
+ 
+-	__release_gpc(kvm, old_pfn, old_khva, old_gpa);
++	gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ 
+ 	return ret;
+ }
+@@ -254,14 +252,12 @@ void kvm_gfn_to_pfn_cache_unmap(struct k
+ {
+ 	void *old_khva;
+ 	kvm_pfn_t old_pfn;
+-	gpa_t old_gpa;
+ 
+ 	write_lock_irq(&gpc->lock);
+ 
+ 	gpc->valid = false;
+ 
+ 	old_khva = gpc->khva - offset_in_page(gpc->khva);
+-	old_gpa = gpc->gpa;
+ 	old_pfn = gpc->pfn;
+ 
+ 	/*
+@@ -273,7 +269,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct k
+ 
+ 	write_unlock_irq(&gpc->lock);
+ 
+-	__release_gpc(kvm, old_pfn, old_khva, old_gpa);
++	gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ }
+ EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
+ 
diff --git a/queue-5.18/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch b/queue-5.18/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch
new file mode 100644
index 00000000000..873529c95c7
--- /dev/null
+++ b/queue-5.18/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch
@@ -0,0 +1,356 @@
+From 58cd407ca4c6278cf9f9d09a2e663bf645b0c982 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 29 Apr 2022 21:00:24 +0000
+Subject: KVM: Fix multiple races in gfn=>pfn cache refresh
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 58cd407ca4c6278cf9f9d09a2e663bf645b0c982 upstream.
+
+Rework the gfn=>pfn cache (gpc) refresh logic to address multiple races
+between the cache itself, and between the cache and mmu_notifier events.
+
+The existing refresh code attempts to guard against races with the
+mmu_notifier by speculatively marking the cache valid, and then marking
+it invalid if a mmu_notifier invalidation occurs.  That handles the case
+where an invalidation occurs between dropping and re-acquiring gpc->lock,
+but it doesn't handle the scenario where the cache is refreshed after the
+cache was invalidated by the notifier, but before the notifier elevates
+mmu_notifier_count.  The gpc refresh can't use the "retry" helper as its
+invalidation occurs _before_ mmu_notifier_count is elevated and before
+mmu_notifier_range_start is set/updated.
+
+  CPU0                                    CPU1
+  ----                                    ----
+
+  gfn_to_pfn_cache_invalidate_start()
+  |
+  -> gpc->valid = false;
+                                          kvm_gfn_to_pfn_cache_refresh()
+                                          |
+                                          |-> gpc->valid = true;
+
+                                          hva_to_pfn_retry()
+                                          |
+                                          -> acquire kvm->mmu_lock
+                                             kvm->mmu_notifier_count == 0
+                                             mmu_seq == kvm->mmu_notifier_seq
+                                             drop kvm->mmu_lock
+                                             return pfn 'X'
+  acquire kvm->mmu_lock
+  kvm_inc_notifier_count()
+  drop kvm->mmu_lock()
+  kernel frees pfn 'X'
+                                          kvm_gfn_to_pfn_cache_check()
+                                          |
+                                          |-> gpc->valid == true
+
+                                          caller accesses freed pfn 'X'
+
+Key off of mn_active_invalidate_count to detect that a pfncache refresh
+needs to wait for an in-progress mmu_notifier invalidation.  While
+mn_active_invalidate_count is not guaranteed to be stable, it is
+guaranteed to be elevated prior to an invalidation acquiring gpc->lock,
+so either the refresh will see an active invalidation and wait, or the
+invalidation will run after the refresh completes.
+
+Speculatively marking the cache valid is itself flawed, as a concurrent
+kvm_gfn_to_pfn_cache_check() would see a valid cache with stale pfn/khva
+values.  The KVM Xen use case explicitly allows/wants multiple users;
+even though the caches are allocated per vCPU, __kvm_xen_has_interrupt()
+can read a different vCPU (or vCPUs).  Address this race by invalidating
+the cache prior to dropping gpc->lock (this is made possible by fixing
+the above mmu_notifier race).
+
+Complicating all of this is the fact that both the hva=>pfn resolution
+and mapping of the kernel address can sleep, i.e. must be done outside
+of gpc->lock.
+
+Fix the above races in one fell swoop, trying to fix each individual race
+is largely pointless and essentially impossible to test, e.g. closing one
+hole just shifts the focus to the other hole.
+
+Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support")
+Cc: stable@vger.kernel.org
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Mingwei Zhang <mizhang@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-8-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |    9 ++
+ virt/kvm/pfncache.c |  193 ++++++++++++++++++++++++++++++++--------------------
+ 2 files changed, 131 insertions(+), 71 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -724,6 +724,15 @@ static int kvm_mmu_notifier_invalidate_r
+ 	kvm->mn_active_invalidate_count++;
+ 	spin_unlock(&kvm->mn_invalidate_lock);
+ 
++	/*
++	 * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e.
++	 * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring
++	 * each cache's lock.  There are relatively few caches in existence at
++	 * any given time, and the caches themselves can check for hva overlap,
++	 * i.e. don't need to rely on memslot overlap checks for performance.
++	 * Because this runs without holding mmu_lock, the pfn caches must use
++	 * mn_active_invalidate_count (see above) instead of mmu_notifier_count.
++	 */
+ 	gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
+ 					  hva_range.may_block);
+ 
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -112,31 +112,122 @@ static void gpc_release_pfn_and_khva(str
+ 	}
+ }
+ 
+-static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
++static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
+ {
++	/*
++	 * mn_active_invalidate_count acts for all intents and purposes
++	 * like mmu_notifier_count here; but the latter cannot be used
++	 * here because the invalidation of caches in the mmu_notifier
++	 * event occurs _before_ mmu_notifier_count is elevated.
++	 *
++	 * Note, it does not matter that mn_active_invalidate_count
++	 * is not protected by gpc->lock.  It is guaranteed to
++	 * be elevated before the mmu_notifier acquires gpc->lock, and
++	 * isn't dropped until after mmu_notifier_seq is updated.
++	 */
++	if (kvm->mn_active_invalidate_count)
++		return true;
++
++	/*
++	 * Ensure mn_active_invalidate_count is read before
++	 * mmu_notifier_seq.  This pairs with the smp_wmb() in
++	 * mmu_notifier_invalidate_range_end() to guarantee either the
++	 * old (non-zero) value of mn_active_invalidate_count or the
++	 * new (incremented) value of mmu_notifier_seq is observed.
++	 */
++	smp_rmb();
++	return kvm->mmu_notifier_seq != mmu_seq;
++}
++
++static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
++{
++	/* Note, the new page offset may be different than the old! */
++	void *old_khva = gpc->khva - offset_in_page(gpc->khva);
++	kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
++	void *new_khva = NULL;
+ 	unsigned long mmu_seq;
+-	kvm_pfn_t new_pfn;
+-	int retry;
++
++	lockdep_assert_held(&gpc->refresh_lock);
++
++	lockdep_assert_held_write(&gpc->lock);
++
++	/*
++	 * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva
++	 * assets have already been updated and so a concurrent check() from a
++	 * different task may not fail the gpa/uhva/generation checks.
++	 */
++	gpc->valid = false;
+ 
+ 	do {
+ 		mmu_seq = kvm->mmu_notifier_seq;
+ 		smp_rmb();
+ 
++		write_unlock_irq(&gpc->lock);
++
++		/*
++		 * If the previous iteration "failed" due to an mmu_notifier
++		 * event, release the pfn and unmap the kernel virtual address
++		 * from the previous attempt.  Unmapping might sleep, so this
++		 * needs to be done after dropping the lock.  Opportunistically
++		 * check for resched while the lock isn't held.
++		 */
++		if (new_pfn != KVM_PFN_ERR_FAULT) {
++			/*
++			 * Keep the mapping if the previous iteration reused
++			 * the existing mapping and didn't create a new one.
++			 */
++			if (new_khva == old_khva)
++				new_khva = NULL;
++
++			gpc_release_pfn_and_khva(kvm, new_pfn, new_khva);
++
++			cond_resched();
++		}
++
+ 		/* We always request a writeable mapping */
+-		new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
++		new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL);
+ 		if (is_error_noslot_pfn(new_pfn))
+-			break;
++			goto out_error;
++
++		/*
++		 * Obtain a new kernel mapping if KVM itself will access the
++		 * pfn.  Note, kmap() and memremap() can both sleep, so this
++		 * too must be done outside of gpc->lock!
++		 */
++		if (gpc->usage & KVM_HOST_USES_PFN) {
++			if (new_pfn == gpc->pfn) {
++				new_khva = old_khva;
++			} else if (pfn_valid(new_pfn)) {
++				new_khva = kmap(pfn_to_page(new_pfn));
++#ifdef CONFIG_HAS_IOMEM
++			} else {
++				new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
++#endif
++			}
++			if (!new_khva) {
++				kvm_release_pfn_clean(new_pfn);
++				goto out_error;
++			}
++		}
++
++		write_lock_irq(&gpc->lock);
+ 
+-		KVM_MMU_READ_LOCK(kvm);
+-		retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva);
+-		KVM_MMU_READ_UNLOCK(kvm);
+-		if (!retry)
+-			break;
++		/*
++		 * Other tasks must wait for _this_ refresh to complete before
++		 * attempting to refresh.
++		 */
++		WARN_ON_ONCE(gpc->valid);
++	} while (mmu_notifier_retry_cache(kvm, mmu_seq));
++
++	gpc->valid = true;
++	gpc->pfn = new_pfn;
++	gpc->khva = new_khva + (gpc->gpa & ~PAGE_MASK);
++	return 0;
+ 
+-		cond_resched();
+-	} while (1);
++out_error:
++	write_lock_irq(&gpc->lock);
+ 
+-	return new_pfn;
++	return -EFAULT;
+ }
+ 
+ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+@@ -147,7 +238,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 	kvm_pfn_t old_pfn, new_pfn;
+ 	unsigned long old_uhva;
+ 	void *old_khva;
+-	bool old_valid;
+ 	int ret = 0;
+ 
+ 	/*
+@@ -169,7 +259,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 	old_pfn = gpc->pfn;
+ 	old_khva = gpc->khva - offset_in_page(gpc->khva);
+ 	old_uhva = gpc->uhva;
+-	old_valid = gpc->valid;
+ 
+ 	/* If the userspace HVA is invalid, refresh that first */
+ 	if (gpc->gpa != gpa || gpc->generation != slots->generation ||
+@@ -182,7 +271,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 		gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
+ 
+ 		if (kvm_is_error_hva(gpc->uhva)) {
+-			gpc->pfn = KVM_PFN_ERR_FAULT;
+ 			ret = -EFAULT;
+ 			goto out;
+ 		}
+@@ -194,60 +282,8 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 	 * If the userspace HVA changed or the PFN was already invalid,
+ 	 * drop the lock and do the HVA to PFN lookup again.
+ 	 */
+-	if (!old_valid || old_uhva != gpc->uhva) {
+-		unsigned long uhva = gpc->uhva;
+-		void *new_khva = NULL;
+-
+-		/* Placeholders for "hva is valid but not yet mapped" */
+-		gpc->pfn = KVM_PFN_ERR_FAULT;
+-		gpc->khva = NULL;
+-		gpc->valid = true;
+-
+-		write_unlock_irq(&gpc->lock);
+-
+-		new_pfn = hva_to_pfn_retry(kvm, uhva);
+-		if (is_error_noslot_pfn(new_pfn)) {
+-			ret = -EFAULT;
+-			goto map_done;
+-		}
+-
+-		if (gpc->usage & KVM_HOST_USES_PFN) {
+-			if (new_pfn == old_pfn) {
+-				/*
+-				 * Reuse the existing pfn and khva, but put the
+-				 * reference acquired hva_to_pfn_retry(); the
+-				 * cache still holds a reference to the pfn
+-				 * from the previous refresh.
+-				 */
+-				gpc_release_pfn_and_khva(kvm, new_pfn, NULL);
+-
+-				new_khva = old_khva;
+-				old_pfn = KVM_PFN_ERR_FAULT;
+-				old_khva = NULL;
+-			} else if (pfn_valid(new_pfn)) {
+-				new_khva = kmap(pfn_to_page(new_pfn));
+-#ifdef CONFIG_HAS_IOMEM
+-			} else {
+-				new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
+-#endif
+-			}
+-			if (new_khva)
+-				new_khva += page_offset;
+-			else
+-				ret = -EFAULT;
+-		}
+-
+-	map_done:
+-		write_lock_irq(&gpc->lock);
+-		if (ret) {
+-			gpc->valid = false;
+-			gpc->pfn = KVM_PFN_ERR_FAULT;
+-			gpc->khva = NULL;
+-		} else {
+-			/* At this point, gpc->valid may already have been cleared */
+-			gpc->pfn = new_pfn;
+-			gpc->khva = new_khva;
+-		}
++	if (!gpc->valid || old_uhva != gpc->uhva) {
++		ret = hva_to_pfn_retry(kvm, gpc);
+ 	} else {
+ 		/* If the HVAâPFN mapping was already valid, don't unmap it. */
+ 		old_pfn = KVM_PFN_ERR_FAULT;
+@@ -255,11 +291,26 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 	}
+ 
+  out:
++	/*
++	 * Invalidate the cache and purge the pfn/khva if the refresh failed.
++	 * Some/all of the uhva, gpa, and memslot generation info may still be
++	 * valid, leave it as is.
++	 */
++	if (ret) {
++		gpc->valid = false;
++		gpc->pfn = KVM_PFN_ERR_FAULT;
++		gpc->khva = NULL;
++	}
++
++	/* Snapshot the new pfn before dropping the lock! */
++	new_pfn = gpc->pfn;
++
+ 	write_unlock_irq(&gpc->lock);
+ 
+ 	mutex_unlock(&gpc->refresh_lock);
+ 
+-	gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
++	if (old_pfn != new_pfn)
++		gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ 
+ 	return ret;
+ }
diff --git a/queue-5.18/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch b/queue-5.18/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch
new file mode 100644
index 00000000000..74131495b6d
--- /dev/null
+++ b/queue-5.18/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch
@@ -0,0 +1,105 @@
+From 93984f19e7bce4c18084a6ef3dacafb155b806ed Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 29 Apr 2022 21:00:23 +0000
+Subject: KVM: Fully serialize gfn=>pfn cache refresh via mutex
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 93984f19e7bce4c18084a6ef3dacafb155b806ed upstream.
+
+Protect gfn=>pfn cache refresh with a mutex to fully serialize refreshes.
+The refresh logic doesn't protect against
+
+- concurrent unmaps, or refreshes with different GPAs (which may or may not
+  happen in practice, for example if a cache is only used under vcpu->mutex;
+  but it's allowed in the code)
+
+- a false negative on the memslot generation.  If the first refresh sees
+  a stale memslot generation, it will refresh the hva and generation before
+  moving on to the hva=>pfn translation.  If it then drops gpc->lock, a
+  different user of the cache can come along, acquire gpc->lock, see that
+  the memslot generation is fresh, and skip the hva=>pfn update due to the
+  userspace address also matching (because it too was updated).
+
+The refresh path can already sleep during hva=>pfn resolution, so wrap
+the refresh with a mutex to ensure that any given refresh runs to
+completion before other callers can start their refresh.
+
+Cc: stable@vger.kernel.org
+Cc: Lai Jiangshan <jiangshanlai@gmail.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-7-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kvm_types.h |    2 ++
+ virt/kvm/pfncache.c       |   12 ++++++++++++
+ 2 files changed, 14 insertions(+)
+
+--- a/include/linux/kvm_types.h
++++ b/include/linux/kvm_types.h
+@@ -19,6 +19,7 @@ struct kvm_memslots;
+ enum kvm_mr_change;
+ 
+ #include <linux/bits.h>
++#include <linux/mutex.h>
+ #include <linux/types.h>
+ #include <linux/spinlock_types.h>
+ 
+@@ -69,6 +70,7 @@ struct gfn_to_pfn_cache {
+ 	struct kvm_vcpu *vcpu;
+ 	struct list_head list;
+ 	rwlock_t lock;
++	struct mutex refresh_lock;
+ 	void *khva;
+ 	kvm_pfn_t pfn;
+ 	enum pfn_cache_usage usage;
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -157,6 +157,13 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 	if (page_offset + len > PAGE_SIZE)
+ 		return -EINVAL;
+ 
++	/*
++	 * If another task is refreshing the cache, wait for it to complete.
++	 * There is no guarantee that concurrent refreshes will see the same
++	 * gpa, memslots generation, etc..., so they must be fully serialized.
++	 */
++	mutex_lock(&gpc->refresh_lock);
++
+ 	write_lock_irq(&gpc->lock);
+ 
+ 	old_pfn = gpc->pfn;
+@@ -250,6 +257,8 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+  out:
+ 	write_unlock_irq(&gpc->lock);
+ 
++	mutex_unlock(&gpc->refresh_lock);
++
+ 	gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ 
+ 	return ret;
+@@ -261,6 +270,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct k
+ 	void *old_khva;
+ 	kvm_pfn_t old_pfn;
+ 
++	mutex_lock(&gpc->refresh_lock);
+ 	write_lock_irq(&gpc->lock);
+ 
+ 	gpc->valid = false;
+@@ -276,6 +286,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct k
+ 	gpc->pfn = KVM_PFN_ERR_FAULT;
+ 
+ 	write_unlock_irq(&gpc->lock);
++	mutex_unlock(&gpc->refresh_lock);
+ 
+ 	gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ }
+@@ -290,6 +301,7 @@ int kvm_gfn_to_pfn_cache_init(struct kvm
+ 
+ 	if (!gpc->active) {
+ 		rwlock_init(&gpc->lock);
++		mutex_init(&gpc->refresh_lock);
+ 
+ 		gpc->khva = NULL;
+ 		gpc->pfn = KVM_PFN_ERR_FAULT;
diff --git a/queue-5.18/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch b/queue-5.18/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch
new file mode 100644
index 00000000000..a551acd5d1a
--- /dev/null
+++ b/queue-5.18/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch
@@ -0,0 +1,45 @@
+From ca58f3aa53d165afe4ab74c755bc2f6d168617ac Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:51 +0000
+Subject: KVM: nVMX: Account for KVM reserved CR4 bits in consistency checks
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ca58f3aa53d165afe4ab74c755bc2f6d168617ac upstream.
+
+Check that the guest (L2) and host (L1) CR4 values that would be loaded
+by nested VM-Enter and VM-Exit respectively are valid with respect to
+KVM's (L0 host) allowed CR4 bits.  Failure to check KVM reserved bits
+would allow L1 to load an illegal CR4 (or trigger hardware VM-Fail or
+failed VM-Entry) by massaging guest CPUID to allow features that are not
+supported by KVM.  Amusingly, KVM itself is an accomplice in its doom, as
+KVM adjusts L1's MSR_IA32_VMX_CR4_FIXED1 to allow L1 to enable bits for
+L2 based on L1's CPUID model.
+
+Note, although nested_{guest,host}_cr4_valid() are _currently_ used if
+and only if the vCPU is post-VMXON (nested.vmxon == true), that may not
+be true in the future, e.g. emulating VMXON has a bug where it doesn't
+check the allowed/required CR0/CR4 bits.
+
+Cc: stable@vger.kernel.org
+Fixes: 3899152ccbf4 ("KVM: nVMX: fix checks on CR{0,4} during virtual VMX operation")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/nested.h
++++ b/arch/x86/kvm/vmx/nested.h
+@@ -281,7 +281,8 @@ static inline bool nested_cr4_valid(stru
+ 	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
+ 	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
+ 
+-	return fixed_bits_valid(val, fixed0, fixed1);
++	return fixed_bits_valid(val, fixed0, fixed1) &&
++	       __kvm_is_valid_cr4(vcpu, val);
+ }
+ 
+ /* No difference in the restrictions on guest and host CR4 in VMX operation. */
diff --git a/queue-5.18/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch b/queue-5.18/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch
new file mode 100644
index 00000000000..44beeb6f3b1
--- /dev/null
+++ b/queue-5.18/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch
@@ -0,0 +1,75 @@
+From c7d855c2aff2d511fd60ee2e356134c4fb394799 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:52 +0000
+Subject: KVM: nVMX: Inject #UD if VMXON is attempted with incompatible CR0/CR4
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c7d855c2aff2d511fd60ee2e356134c4fb394799 upstream.
+
+Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed
+per the associated nested VMX MSRs' fixed0/1 settings.  KVM cannot rely
+on hardware to perform the checks, even for the few checks that have
+higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in
+hardware while running the guest, (b) there may incompatible CR0/CR4 bits
+that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may
+have further restricted the allowed CR0/CR4 values by manipulating the
+guest's nested VMX MSRs.
+
+Note, despite a very strong desire to throw shade at Jim, commit
+70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks")
+is not to blame for the buggy behavior (though the comment...).  That
+commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks
+(though it did erroneously drop the CPL check, but that has already been
+remedied).  KVM may force CR0.PE=1, but will do so only when also
+forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033
+Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF")
+Reported-by: Eric Li <ercli@ucdavis.edu>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |   23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4973,20 +4973,25 @@ static int handle_vmon(struct kvm_vcpu *
+ 		| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+ 
+ 	/*
+-	 * The Intel VMX Instruction Reference lists a bunch of bits that are
+-	 * prerequisite to running VMXON, most notably cr4.VMXE must be set to
+-	 * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
+-	 * Otherwise, we should fail with #UD.  But most faulting conditions
+-	 * have already been checked by hardware, prior to the VM-exit for
+-	 * VMXON.  We do test guest cr4.VMXE because processor CR4 always has
+-	 * that bit set to 1 in non-root mode.
++	 * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
++	 * that have higher priority than VM-Exit (see Intel SDM's pseudocode
++	 * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
++	 * running the guest, i.e. KVM needs to check the _guest_ values.
++	 *
++	 * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
++	 * !COMPATIBILITY modes.  KVM may run the guest in VM86 to emulate Real
++	 * Mode, but KVM will never take the guest out of those modes.
+ 	 */
+-	if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
++	if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
++	    !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
+ 		kvm_queue_exception(vcpu, UD_VECTOR);
+ 		return 1;
+ 	}
+ 
+-	/* CPL=0 must be checked manually. */
++	/*
++	 * CPL=0 and all other checks that are lower priority than VM-Exit must
++	 * be checked manually.
++	 */
+ 	if (vmx_get_cpl(vcpu)) {
+ 		kvm_inject_gp(vcpu, 0);
+ 		return 1;
diff --git a/queue-5.18/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch b/queue-5.18/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
new file mode 100644
index 00000000000..f34afd0f972
--- /dev/null
+++ b/queue-5.18/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
@@ -0,0 +1,175 @@
+From f8ae08f9789ad59d318ea75b570caa454aceda81 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:54 +0000
+Subject: KVM: nVMX: Let userspace set nVMX MSR to any _host_ supported value
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit f8ae08f9789ad59d318ea75b570caa454aceda81 upstream.
+
+Restrict the nVMX MSRs based on KVM's config, not based on the guest's
+current config.  Using the guest's config to audit the new config
+prevents userspace from restoring the original config (KVM's config) if
+at any point in the past the guest's config was restricted in any way.
+
+Fixes: 62cc6b9dc61e ("KVM: nVMX: support restore of VMX capability MSRs")
+Cc: stable@vger.kernel.org
+Cc: David Matlack <dmatlack@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c |   70 ++++++++++++++++++++++++----------------------
+ 1 file changed, 37 insertions(+), 33 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -1224,7 +1224,7 @@ static int vmx_restore_vmx_basic(struct
+ 		BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
+ 		/* reserved */
+ 		BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+-	u64 vmx_basic = vmx->nested.msrs.basic;
++	u64 vmx_basic = vmcs_config.nested.basic;
+ 
+ 	if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+ 		return -EINVAL;
+@@ -1247,36 +1247,42 @@ static int vmx_restore_vmx_basic(struct
+ 	return 0;
+ }
+ 
+-static int
+-vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
++				u32 **low, u32 **high)
+ {
+-	u64 supported;
+-	u32 *lowp, *highp;
+-
+ 	switch (msr_index) {
+ 	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+-		lowp = &vmx->nested.msrs.pinbased_ctls_low;
+-		highp = &vmx->nested.msrs.pinbased_ctls_high;
++		*low = &msrs->pinbased_ctls_low;
++		*high = &msrs->pinbased_ctls_high;
+ 		break;
+ 	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+-		lowp = &vmx->nested.msrs.procbased_ctls_low;
+-		highp = &vmx->nested.msrs.procbased_ctls_high;
++		*low = &msrs->procbased_ctls_low;
++		*high = &msrs->procbased_ctls_high;
+ 		break;
+ 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+-		lowp = &vmx->nested.msrs.exit_ctls_low;
+-		highp = &vmx->nested.msrs.exit_ctls_high;
++		*low = &msrs->exit_ctls_low;
++		*high = &msrs->exit_ctls_high;
+ 		break;
+ 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+-		lowp = &vmx->nested.msrs.entry_ctls_low;
+-		highp = &vmx->nested.msrs.entry_ctls_high;
++		*low = &msrs->entry_ctls_low;
++		*high = &msrs->entry_ctls_high;
+ 		break;
+ 	case MSR_IA32_VMX_PROCBASED_CTLS2:
+-		lowp = &vmx->nested.msrs.secondary_ctls_low;
+-		highp = &vmx->nested.msrs.secondary_ctls_high;
++		*low = &msrs->secondary_ctls_low;
++		*high = &msrs->secondary_ctls_high;
+ 		break;
+ 	default:
+ 		BUG();
+ 	}
++}
++
++static int
++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++	u32 *lowp, *highp;
++	u64 supported;
++
++	vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);
+ 
+ 	supported = vmx_control_msr(*lowp, *highp);
+ 
+@@ -1288,6 +1294,7 @@ vmx_restore_control_msr(struct vcpu_vmx
+ 	if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
+ 		return -EINVAL;
+ 
++	vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
+ 	*lowp = data;
+ 	*highp = data >> 32;
+ 	return 0;
+@@ -1301,10 +1308,8 @@ static int vmx_restore_vmx_misc(struct v
+ 		BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
+ 		/* reserved */
+ 		GENMASK_ULL(13, 9) | BIT_ULL(31);
+-	u64 vmx_misc;
+-
+-	vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+-				   vmx->nested.msrs.misc_high);
++	u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
++				       vmcs_config.nested.misc_high);
+ 
+ 	if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+ 		return -EINVAL;
+@@ -1332,10 +1337,8 @@ static int vmx_restore_vmx_misc(struct v
+ 
+ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+ {
+-	u64 vmx_ept_vpid_cap;
+-
+-	vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+-					   vmx->nested.msrs.vpid_caps);
++	u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
++					       vmcs_config.nested.vpid_caps);
+ 
+ 	/* Every bit is either reserved or a feature bit. */
+ 	if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
+@@ -1346,20 +1349,21 @@ static int vmx_restore_vmx_ept_vpid_cap(
+ 	return 0;
+ }
+ 
+-static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
+ {
+-	u64 *msr;
+-
+ 	switch (msr_index) {
+ 	case MSR_IA32_VMX_CR0_FIXED0:
+-		msr = &vmx->nested.msrs.cr0_fixed0;
+-		break;
++		return &msrs->cr0_fixed0;
+ 	case MSR_IA32_VMX_CR4_FIXED0:
+-		msr = &vmx->nested.msrs.cr4_fixed0;
+-		break;
++		return &msrs->cr4_fixed0;
+ 	default:
+ 		BUG();
+ 	}
++}
++
++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++	const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);
+ 
+ 	/*
+ 	 * 1 bits (which indicates bits which "must-be-1" during VMX operation)
+@@ -1368,7 +1372,7 @@ static int vmx_restore_fixed0_msr(struct
+ 	if (!is_bitwise_subset(data, *msr, -1ULL))
+ 		return -EINVAL;
+ 
+-	*msr = data;
++	*vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
+ 	return 0;
+ }
+ 
+@@ -1429,7 +1433,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcp
+ 		vmx->nested.msrs.vmcs_enum = data;
+ 		return 0;
+ 	case MSR_IA32_VMX_VMFUNC:
+-		if (data & ~vmx->nested.msrs.vmfunc_controls)
++		if (data & ~vmcs_config.nested.vmfunc_controls)
+ 			return -EINVAL;
+ 		vmx->nested.msrs.vmfunc_controls = data;
+ 		return 0;
diff --git a/queue-5.18/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch b/queue-5.18/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch
new file mode 100644
index 00000000000..b6b2fb21a14
--- /dev/null
+++ b/queue-5.18/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch
@@ -0,0 +1,41 @@
+From 3dddf65b4f4c451c345d34ae85bdf1791a746e49 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 29 Apr 2022 21:00:21 +0000
+Subject: KVM: Put the extra pfn reference when reusing a pfn in the gpc cache
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 3dddf65b4f4c451c345d34ae85bdf1791a746e49 upstream.
+
+Put the struct page reference to pfn acquired by hva_to_pfn() when the
+old and new pfns for a gfn=>pfn cache match.  The cache already has a
+reference via the old/current pfn, and will only put one reference when
+the cache is done with the pfn.
+
+Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/pfncache.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -206,6 +206,14 @@ int kvm_gfn_to_pfn_cache_refresh(struct
+ 
+ 		if (gpc->usage & KVM_HOST_USES_PFN) {
+ 			if (new_pfn == old_pfn) {
++				/*
++				 * Reuse the existing pfn and khva, but put the
++				 * reference acquired hva_to_pfn_retry(); the
++				 * cache still holds a reference to the pfn
++				 * from the previous refresh.
++				 */
++				gpc_release_pfn_and_khva(kvm, new_pfn, NULL);
++
+ 				new_khva = old_khva;
+ 				old_pfn = KVM_PFN_ERR_FAULT;
+ 				old_khva = NULL;
diff --git a/queue-5.18/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch b/queue-5.18/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
new file mode 100644
index 00000000000..adfe8d75cb3
--- /dev/null
+++ b/queue-5.18/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
@@ -0,0 +1,100 @@
+From c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 Mon Sep 17 00:00:00 2001
+From: Nico Boehr <nrb@linux.ibm.com>
+Date: Mon, 18 Jul 2022 15:04:34 +0200
+Subject: KVM: s390: pv: don't present the ecall interrupt twice
+
+From: Nico Boehr <nrb@linux.ibm.com>
+
+commit c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 upstream.
+
+When the SIGP interpretation facility is present and a VCPU sends an
+ecall to another VCPU in enabled wait, the sending VCPU receives a 56
+intercept (partial execution), so KVM can wake up the receiving CPU.
+Note that the SIGP interpretation facility will take care of the
+interrupt delivery and KVM's only job is to wake the receiving VCPU.
+
+For PV, the sending VCPU will receive a 108 intercept (pv notify) and
+should continue like in the non-PV case, i.e. wake the receiving VCPU.
+
+For PV and non-PV guests the interrupt delivery will occur through the
+SIGP interpretation facility on SIE entry when SIE finds the X bit in
+the status field set.
+
+However, in handle_pv_notification(), there was no special handling for
+SIGP, which leads to interrupt injection being requested by KVM for the
+next SIE entry. This results in the interrupt being delivered twice:
+once by the SIGP interpretation facility and once by KVM through the
+IICTL.
+
+Add the necessary special handling in handle_pv_notification(), similar
+to handle_partial_execution(), which simply wakes the receiving VCPU and
+leave interrupt delivery to the SIGP interpretation facility.
+
+In contrast to external calls, emergency calls are not interpreted but
+also cause a 108 intercept, which is why we still need to call
+handle_instruction() for SIGP orders other than ecall.
+
+Since kvm_s390_handle_sigp_pei() is now called for all SIGP orders which
+cause a 108 intercept - even if they are actually handled by
+handle_instruction() - move the tracepoint in kvm_s390_handle_sigp_pei()
+to avoid possibly confusing trace messages.
+
+Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 5.7
+Fixes: da24a0cc58ed ("KVM: s390: protvirt: Instruction emulation")
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220718130434.73302-1-nrb@linux.ibm.com
+Message-Id: <20220718130434.73302-1-nrb@linux.ibm.com>
+Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/kvm/intercept.c |   15 +++++++++++++++
+ arch/s390/kvm/sigp.c      |    4 ++--
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu
+ 
+ static int handle_pv_notification(struct kvm_vcpu *vcpu)
+ {
++	int ret;
++
+ 	if (vcpu->arch.sie_block->ipa == 0xb210)
+ 		return handle_pv_spx(vcpu);
+ 	if (vcpu->arch.sie_block->ipa == 0xb220)
+ 		return handle_pv_sclp(vcpu);
+ 	if (vcpu->arch.sie_block->ipa == 0xb9a4)
+ 		return handle_pv_uvc(vcpu);
++	if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
++		/*
++		 * Besides external call, other SIGP orders also cause a
++		 * 108 (pv notify) intercept. In contrast to external call,
++		 * these orders need to be emulated and hence the appropriate
++		 * place to handle them is in handle_instruction().
++		 * So first try kvm_s390_handle_sigp_pei() and if that isn't
++		 * successful, go on with handle_instruction().
++		 */
++		ret = kvm_s390_handle_sigp_pei(vcpu);
++		if (!ret)
++			return ret;
++	}
+ 
+ 	return handle_instruction(vcpu);
+ }
+--- a/arch/s390/kvm/sigp.c
++++ b/arch/s390/kvm/sigp.c
+@@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_
+ 	struct kvm_vcpu *dest_vcpu;
+ 	u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+ 
+-	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+-
+ 	if (order_code == SIGP_EXTERNAL_CALL) {
++		trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
++
+ 		dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+ 		BUG_ON(dest_vcpu == NULL);
+ 
diff --git a/queue-5.18/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch b/queue-5.18/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
new file mode 100644
index 00000000000..de7496e36eb
--- /dev/null
+++ b/queue-5.18/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
@@ -0,0 +1,66 @@
+From ec6e4d863258d4bfb36d48d5e3ef68140234d688 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 11 Jul 2022 23:27:48 +0000
+Subject: KVM: x86: Mark TSS busy during LTR emulation _after_ all fault checks
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ec6e4d863258d4bfb36d48d5e3ef68140234d688 upstream.
+
+Wait to mark the TSS as busy during LTR emulation until after all fault
+checks for the LTR have passed.  Specifically, don't mark the TSS busy if
+the new TSS base is non-canonical.
+
+Opportunistically drop the one-off !seg_desc.PRESENT check for TR as the
+only reason for the early check was to avoid marking a !PRESENT TSS as
+busy, i.e. the common !PRESENT is now done before setting the busy bit.
+
+Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
+Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220711232750.1092012-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |   19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1687,16 +1687,6 @@ static int __load_segment_descriptor(str
+ 	case VCPU_SREG_TR:
+ 		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
+ 			goto exception;
+-		if (!seg_desc.p) {
+-			err_vec = NP_VECTOR;
+-			goto exception;
+-		}
+-		old_desc = seg_desc;
+-		seg_desc.type |= 2; /* busy */
+-		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
+-						  sizeof(seg_desc), &ctxt->exception);
+-		if (ret != X86EMUL_CONTINUE)
+-			return ret;
+ 		break;
+ 	case VCPU_SREG_LDTR:
+ 		if (seg_desc.s || seg_desc.type != 2)
+@@ -1737,6 +1727,15 @@ static int __load_segment_descriptor(str
+ 				((u64)base3 << 32), ctxt))
+ 			return emulate_gp(ctxt, 0);
+ 	}
++
++	if (seg == VCPU_SREG_TR) {
++		old_desc = seg_desc;
++		seg_desc.type |= 2; /* busy */
++		ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
++						  sizeof(seg_desc), &ctxt->exception);
++		if (ret != X86EMUL_CONTINUE)
++			return ret;
++	}
+ load:
+ 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
+ 	if (desc)
diff --git a/queue-5.18/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch b/queue-5.18/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
new file mode 100644
index 00000000000..e1a8cabfb73
--- /dev/null
+++ b/queue-5.18/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch
@@ -0,0 +1,41 @@
+From 2626206963ace9e8bf92b6eea5ff78dd674c555c Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 11 Jul 2022 23:27:49 +0000
+Subject: KVM: x86: Set error code to segment selector on LLDT/LTR non-canonical #GP
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 2626206963ace9e8bf92b6eea5ff78dd674c555c upstream.
+
+When injecting a #GP on LLDT/LTR due to a non-canonical LDT/TSS base, set
+the error code to the selector.  Intel SDM's says nothing about the #GP,
+but AMD's APM explicitly states that both LLDT and LTR set the error code
+to the selector, not zero.
+
+Note, a non-canonical memory operand on LLDT/LTR does generate a #GP(0),
+but the KVM code in question is specific to the base from the descriptor.
+
+Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220711232750.1092012-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1724,8 +1724,8 @@ static int __load_segment_descriptor(str
+ 		if (ret != X86EMUL_CONTINUE)
+ 			return ret;
+ 		if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
+-				((u64)base3 << 32), ctxt))
+-			return emulate_gp(ctxt, 0);
++						 ((u64)base3 << 32), ctxt))
++			return emulate_gp(ctxt, err_code);
+ 	}
+ 
+ 	if (seg == VCPU_SREG_TR) {
diff --git a/queue-5.18/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch b/queue-5.18/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch
new file mode 100644
index 00000000000..228f1ae41af
--- /dev/null
+++ b/queue-5.18/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch
@@ -0,0 +1,104 @@
+From c33f6f2228fe8517e38941a508e9f905f99ecba9 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 7 Jun 2022 21:35:50 +0000
+Subject: KVM: x86: Split kvm_is_valid_cr4() and export only the non-vendor bits
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c33f6f2228fe8517e38941a508e9f905f99ecba9 upstream.
+
+Split the common x86 parts of kvm_is_valid_cr4(), i.e. the reserved bits
+checks, into a separate helper, __kvm_is_valid_cr4(), and export only the
+inner helper to vendor code in order to prevent nested VMX from calling
+back into vmx_is_valid_cr4() via kvm_is_valid_cr4().
+
+On SVM, this is a nop as SVM doesn't place any additional restrictions on
+CR4.
+
+On VMX, this is also currently a nop, but only because nested VMX is
+missing checks on reserved CR4 bits for nested VM-Enter.  That bug will
+be fixed in a future patch, and could simply use kvm_is_valid_cr4() as-is,
+but nVMX has _another_ bug where VMXON emulation doesn't enforce VMX's
+restrictions on CR0/CR4.  The cleanest and most intuitive way to fix the
+VMXON bug is to use nested_host_cr{0,4}_valid().  If the CR4 variant
+routes through kvm_is_valid_cr4(), using nested_host_cr4_valid() won't do
+the right thing for the VMXON case as vmx_is_valid_cr4() enforces VMX's
+restrictions if and only if the vCPU is post-VMXON.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/nested.c |    3 ++-
+ arch/x86/kvm/vmx/vmx.c    |    4 ++--
+ arch/x86/kvm/x86.c        |   12 +++++++++---
+ arch/x86/kvm/x86.h        |    2 +-
+ 4 files changed, 14 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -292,7 +292,8 @@ static bool __nested_vmcb_check_save(str
+ 			return false;
+ 	}
+ 
+-	if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
++	/* Note, SVM doesn't have any additional restrictions on CR4. */
++	if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4)))
+ 		return false;
+ 
+ 	if (CC(!kvm_valid_efer(vcpu, save->efer)))
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -3230,8 +3230,8 @@ static bool vmx_is_valid_cr4(struct kvm_
+ {
+ 	/*
+ 	 * We operate under the default treatment of SMM, so VMX cannot be
+-	 * enabled under SMM.  Note, whether or not VMXE is allowed at all is
+-	 * handled by kvm_is_valid_cr4().
++	 * enabled under SMM.  Note, whether or not VMXE is allowed at all,
++	 * i.e. is a reserved bit, is handled by common x86 code.
+ 	 */
+ 	if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu))
+ 		return false;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1066,7 +1066,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *
+ }
+ EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
+ 
+-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
+ 	if (cr4 & cr4_reserved_bits)
+ 		return false;
+@@ -1074,9 +1074,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *v
+ 	if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
+ 		return false;
+ 
+-	return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
++	return true;
++}
++EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
++
++static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++	return __kvm_is_valid_cr4(vcpu, cr4) &&
++	       static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
+ }
+-EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
+ 
+ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
+ {
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -407,7 +407,7 @@ static inline void kvm_machine_check(voi
+ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
+ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
+ int kvm_spec_ctrl_test_value(u64 value);
+-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
+ 			      struct x86_exception *e);
+ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
diff --git a/queue-5.18/series b/queue-5.18/series
index e033813c557..28a50517b91 100644
--- a/queue-5.18/series
+++ b/queue-5.18/series
@@ -23,3 +23,15 @@ hid-wacom-don-t-register-pad_input-for-touch-switch.patch
 kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_run_pending-case.patch
 kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_run_pending-case.patch
 kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-with-gif-0.patch
+kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch
+kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch
+kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch
+kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch
+kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch
+kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch
+kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch
+kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch
+kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch
+kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch
+kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch
+kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch