From: Greg Kroah-Hartman Date: Fri, 12 Aug 2022 14:41:43 +0000 (+0200) Subject: 5.18-stable patches X-Git-Tag: v5.15.61~200 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=17428860da4a3ad5d7a8f41b8390dcfac8b3e429;p=thirdparty%2Fkernel%2Fstable-queue.git 5.18-stable patches added patches: kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch --- diff --git a/queue-5.18/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch b/queue-5.18/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch new file mode 100644 index 00000000000..719dd26dc1d --- /dev/null +++ b/queue-5.18/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch @@ -0,0 +1,38 @@ +From 3ba2c95ea180740b16281fa43a3ee5f47279c0ed Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 29 Apr 2022 21:00:22 +0000 +Subject: KVM: Do not incorporate page offset into gfn=>pfn cache user address + +From: Sean Christopherson + +commit 3ba2c95ea180740b16281fa43a3ee5f47279c0ed upstream. + +Don't adjust the userspace address in the gfn=>pfn cache by the page +offset from the gpa. KVM should never use the user address directly, and +all KVM operations that translate a user address to something else +require the user address to be page aligned. Ignoring the offset will +allow the cache to reuse a gfn=>hva translation in the unlikely event +that the page offset of the gpa changes, but the gfn does not. And more +importantly, not having to (un)adjust the user address will simplify a +future bug fix. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220429210025.3293691-6-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/pfncache.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/virt/kvm/pfncache.c ++++ b/virt/kvm/pfncache.c +@@ -274,8 +274,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct + ret = -EFAULT; + goto out; + } +- +- gpc->uhva += page_offset; + } + + /* diff --git a/queue-5.18/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch b/queue-5.18/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch new file mode 100644 index 00000000000..993e67bfae7 --- /dev/null +++ b/queue-5.18/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch @@ -0,0 +1,86 @@ +From 345b0fd6fe5f66dfe841bad0b39dd11a5672df68 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 29 Apr 2022 21:00:20 +0000 +Subject: KVM: Drop unused @gpa param from gfn=>pfn cache's __release_gpc() helper + +From: Sean Christopherson + +commit 345b0fd6fe5f66dfe841bad0b39dd11a5672df68 upstream. + +Drop the @pga param from __release_gpc() and rename the helper to make it +more obvious that the cache itself is not being released. The helper +will be reused by a future commit to release a pfn+khva combination that +is _never_ associated with the cache, at which point the current name +would go from slightly misleading to blatantly wrong. + +No functional change intended. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220429210025.3293691-4-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/pfncache.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/virt/kvm/pfncache.c ++++ b/virt/kvm/pfncache.c +@@ -95,7 +95,7 @@ bool kvm_gfn_to_pfn_cache_check(struct k + } + EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check); + +-static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa) ++static void gpc_release_pfn_and_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva) + { + /* Unmap the old page if it was mapped before, and release it */ + if (!is_error_noslot_pfn(pfn)) { +@@ -146,7 +146,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct + unsigned long page_offset = gpa & ~PAGE_MASK; + kvm_pfn_t old_pfn, new_pfn; + unsigned long old_uhva; +- gpa_t old_gpa; + void *old_khva; + bool old_valid; + int ret = 0; +@@ -160,7 +159,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct + + write_lock_irq(&gpc->lock); + +- old_gpa = gpc->gpa; + old_pfn = gpc->pfn; + old_khva = gpc->khva - offset_in_page(gpc->khva); + old_uhva = gpc->uhva; +@@ -244,7 +242,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct + out: + write_unlock_irq(&gpc->lock); + +- __release_gpc(kvm, old_pfn, old_khva, old_gpa); ++ gpc_release_pfn_and_khva(kvm, old_pfn, old_khva); + + return ret; + } +@@ -254,14 +252,12 @@ void kvm_gfn_to_pfn_cache_unmap(struct k + { + void *old_khva; + kvm_pfn_t old_pfn; +- gpa_t old_gpa; + + write_lock_irq(&gpc->lock); + + gpc->valid = false; + + old_khva = gpc->khva - offset_in_page(gpc->khva); +- old_gpa = gpc->gpa; + old_pfn = gpc->pfn; + + /* +@@ -273,7 +269,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct k + + write_unlock_irq(&gpc->lock); + +- __release_gpc(kvm, old_pfn, old_khva, old_gpa); ++ gpc_release_pfn_and_khva(kvm, old_pfn, old_khva); + } + EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap); + diff --git a/queue-5.18/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch b/queue-5.18/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch new file mode 100644 index 00000000000..873529c95c7 --- /dev/null +++ b/queue-5.18/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch @@ -0,0 +1,356 @@ +From 58cd407ca4c6278cf9f9d09a2e663bf645b0c982 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 29 Apr 2022 21:00:24 +0000 +Subject: KVM: Fix multiple races in gfn=>pfn cache refresh + +From: Sean Christopherson + +commit 58cd407ca4c6278cf9f9d09a2e663bf645b0c982 upstream. + +Rework the gfn=>pfn cache (gpc) refresh logic to address multiple races +between the cache itself, and between the cache and mmu_notifier events. + +The existing refresh code attempts to guard against races with the +mmu_notifier by speculatively marking the cache valid, and then marking +it invalid if a mmu_notifier invalidation occurs. That handles the case +where an invalidation occurs between dropping and re-acquiring gpc->lock, +but it doesn't handle the scenario where the cache is refreshed after the +cache was invalidated by the notifier, but before the notifier elevates +mmu_notifier_count. The gpc refresh can't use the "retry" helper as its +invalidation occurs _before_ mmu_notifier_count is elevated and before +mmu_notifier_range_start is set/updated. + + CPU0 CPU1 + ---- ---- + + gfn_to_pfn_cache_invalidate_start() + | + -> gpc->valid = false; + kvm_gfn_to_pfn_cache_refresh() + | + |-> gpc->valid = true; + + hva_to_pfn_retry() + | + -> acquire kvm->mmu_lock + kvm->mmu_notifier_count == 0 + mmu_seq == kvm->mmu_notifier_seq + drop kvm->mmu_lock + return pfn 'X' + acquire kvm->mmu_lock + kvm_inc_notifier_count() + drop kvm->mmu_lock() + kernel frees pfn 'X' + kvm_gfn_to_pfn_cache_check() + | + |-> gpc->valid == true + + caller accesses freed pfn 'X' + +Key off of mn_active_invalidate_count to detect that a pfncache refresh +needs to wait for an in-progress mmu_notifier invalidation. While +mn_active_invalidate_count is not guaranteed to be stable, it is +guaranteed to be elevated prior to an invalidation acquiring gpc->lock, +so either the refresh will see an active invalidation and wait, or the +invalidation will run after the refresh completes. + +Speculatively marking the cache valid is itself flawed, as a concurrent +kvm_gfn_to_pfn_cache_check() would see a valid cache with stale pfn/khva +values. The KVM Xen use case explicitly allows/wants multiple users; +even though the caches are allocated per vCPU, __kvm_xen_has_interrupt() +can read a different vCPU (or vCPUs). Address this race by invalidating +the cache prior to dropping gpc->lock (this is made possible by fixing +the above mmu_notifier race). + +Complicating all of this is the fact that both the hva=>pfn resolution +and mapping of the kernel address can sleep, i.e. must be done outside +of gpc->lock. + +Fix the above races in one fell swoop, trying to fix each individual race +is largely pointless and essentially impossible to test, e.g. closing one +hole just shifts the focus to the other hole. + +Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support") +Cc: stable@vger.kernel.org +Cc: David Woodhouse +Cc: Mingwei Zhang +Signed-off-by: Sean Christopherson +Message-Id: <20220429210025.3293691-8-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 9 ++ + virt/kvm/pfncache.c | 193 ++++++++++++++++++++++++++++++++-------------------- + 2 files changed, 131 insertions(+), 71 deletions(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -724,6 +724,15 @@ static int kvm_mmu_notifier_invalidate_r + kvm->mn_active_invalidate_count++; + spin_unlock(&kvm->mn_invalidate_lock); + ++ /* ++ * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e. ++ * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring ++ * each cache's lock. There are relatively few caches in existence at ++ * any given time, and the caches themselves can check for hva overlap, ++ * i.e. don't need to rely on memslot overlap checks for performance. ++ * Because this runs without holding mmu_lock, the pfn caches must use ++ * mn_active_invalidate_count (see above) instead of mmu_notifier_count. ++ */ + gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, + hva_range.may_block); + +--- a/virt/kvm/pfncache.c ++++ b/virt/kvm/pfncache.c +@@ -112,31 +112,122 @@ static void gpc_release_pfn_and_khva(str + } + } + +-static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva) ++static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq) + { ++ /* ++ * mn_active_invalidate_count acts for all intents and purposes ++ * like mmu_notifier_count here; but the latter cannot be used ++ * here because the invalidation of caches in the mmu_notifier ++ * event occurs _before_ mmu_notifier_count is elevated. ++ * ++ * Note, it does not matter that mn_active_invalidate_count ++ * is not protected by gpc->lock. It is guaranteed to ++ * be elevated before the mmu_notifier acquires gpc->lock, and ++ * isn't dropped until after mmu_notifier_seq is updated. ++ */ ++ if (kvm->mn_active_invalidate_count) ++ return true; ++ ++ /* ++ * Ensure mn_active_invalidate_count is read before ++ * mmu_notifier_seq. This pairs with the smp_wmb() in ++ * mmu_notifier_invalidate_range_end() to guarantee either the ++ * old (non-zero) value of mn_active_invalidate_count or the ++ * new (incremented) value of mmu_notifier_seq is observed. ++ */ ++ smp_rmb(); ++ return kvm->mmu_notifier_seq != mmu_seq; ++} ++ ++static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) ++{ ++ /* Note, the new page offset may be different than the old! */ ++ void *old_khva = gpc->khva - offset_in_page(gpc->khva); ++ kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; ++ void *new_khva = NULL; + unsigned long mmu_seq; +- kvm_pfn_t new_pfn; +- int retry; ++ ++ lockdep_assert_held(&gpc->refresh_lock); ++ ++ lockdep_assert_held_write(&gpc->lock); ++ ++ /* ++ * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva ++ * assets have already been updated and so a concurrent check() from a ++ * different task may not fail the gpa/uhva/generation checks. ++ */ ++ gpc->valid = false; + + do { + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + ++ write_unlock_irq(&gpc->lock); ++ ++ /* ++ * If the previous iteration "failed" due to an mmu_notifier ++ * event, release the pfn and unmap the kernel virtual address ++ * from the previous attempt. Unmapping might sleep, so this ++ * needs to be done after dropping the lock. Opportunistically ++ * check for resched while the lock isn't held. ++ */ ++ if (new_pfn != KVM_PFN_ERR_FAULT) { ++ /* ++ * Keep the mapping if the previous iteration reused ++ * the existing mapping and didn't create a new one. ++ */ ++ if (new_khva == old_khva) ++ new_khva = NULL; ++ ++ gpc_release_pfn_and_khva(kvm, new_pfn, new_khva); ++ ++ cond_resched(); ++ } ++ + /* We always request a writeable mapping */ +- new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL); ++ new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL); + if (is_error_noslot_pfn(new_pfn)) +- break; ++ goto out_error; ++ ++ /* ++ * Obtain a new kernel mapping if KVM itself will access the ++ * pfn. Note, kmap() and memremap() can both sleep, so this ++ * too must be done outside of gpc->lock! ++ */ ++ if (gpc->usage & KVM_HOST_USES_PFN) { ++ if (new_pfn == gpc->pfn) { ++ new_khva = old_khva; ++ } else if (pfn_valid(new_pfn)) { ++ new_khva = kmap(pfn_to_page(new_pfn)); ++#ifdef CONFIG_HAS_IOMEM ++ } else { ++ new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB); ++#endif ++ } ++ if (!new_khva) { ++ kvm_release_pfn_clean(new_pfn); ++ goto out_error; ++ } ++ } ++ ++ write_lock_irq(&gpc->lock); + +- KVM_MMU_READ_LOCK(kvm); +- retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva); +- KVM_MMU_READ_UNLOCK(kvm); +- if (!retry) +- break; ++ /* ++ * Other tasks must wait for _this_ refresh to complete before ++ * attempting to refresh. ++ */ ++ WARN_ON_ONCE(gpc->valid); ++ } while (mmu_notifier_retry_cache(kvm, mmu_seq)); ++ ++ gpc->valid = true; ++ gpc->pfn = new_pfn; ++ gpc->khva = new_khva + (gpc->gpa & ~PAGE_MASK); ++ return 0; + +- cond_resched(); +- } while (1); ++out_error: ++ write_lock_irq(&gpc->lock); + +- return new_pfn; ++ return -EFAULT; + } + + int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, +@@ -147,7 +238,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct + kvm_pfn_t old_pfn, new_pfn; + unsigned long old_uhva; + void *old_khva; +- bool old_valid; + int ret = 0; + + /* +@@ -169,7 +259,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct + old_pfn = gpc->pfn; + old_khva = gpc->khva - offset_in_page(gpc->khva); + old_uhva = gpc->uhva; +- old_valid = gpc->valid; + + /* If the userspace HVA is invalid, refresh that first */ + if (gpc->gpa != gpa || gpc->generation != slots->generation || +@@ -182,7 +271,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct + gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn); + + if (kvm_is_error_hva(gpc->uhva)) { +- gpc->pfn = KVM_PFN_ERR_FAULT; + ret = -EFAULT; + goto out; + } +@@ -194,60 +282,8 @@ int kvm_gfn_to_pfn_cache_refresh(struct + * If the userspace HVA changed or the PFN was already invalid, + * drop the lock and do the HVA to PFN lookup again. + */ +- if (!old_valid || old_uhva != gpc->uhva) { +- unsigned long uhva = gpc->uhva; +- void *new_khva = NULL; +- +- /* Placeholders for "hva is valid but not yet mapped" */ +- gpc->pfn = KVM_PFN_ERR_FAULT; +- gpc->khva = NULL; +- gpc->valid = true; +- +- write_unlock_irq(&gpc->lock); +- +- new_pfn = hva_to_pfn_retry(kvm, uhva); +- if (is_error_noslot_pfn(new_pfn)) { +- ret = -EFAULT; +- goto map_done; +- } +- +- if (gpc->usage & KVM_HOST_USES_PFN) { +- if (new_pfn == old_pfn) { +- /* +- * Reuse the existing pfn and khva, but put the +- * reference acquired hva_to_pfn_retry(); the +- * cache still holds a reference to the pfn +- * from the previous refresh. +- */ +- gpc_release_pfn_and_khva(kvm, new_pfn, NULL); +- +- new_khva = old_khva; +- old_pfn = KVM_PFN_ERR_FAULT; +- old_khva = NULL; +- } else if (pfn_valid(new_pfn)) { +- new_khva = kmap(pfn_to_page(new_pfn)); +-#ifdef CONFIG_HAS_IOMEM +- } else { +- new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB); +-#endif +- } +- if (new_khva) +- new_khva += page_offset; +- else +- ret = -EFAULT; +- } +- +- map_done: +- write_lock_irq(&gpc->lock); +- if (ret) { +- gpc->valid = false; +- gpc->pfn = KVM_PFN_ERR_FAULT; +- gpc->khva = NULL; +- } else { +- /* At this point, gpc->valid may already have been cleared */ +- gpc->pfn = new_pfn; +- gpc->khva = new_khva; +- } ++ if (!gpc->valid || old_uhva != gpc->uhva) { ++ ret = hva_to_pfn_retry(kvm, gpc); + } else { + /* If the HVA→PFN mapping was already valid, don't unmap it. */ + old_pfn = KVM_PFN_ERR_FAULT; +@@ -255,11 +291,26 @@ int kvm_gfn_to_pfn_cache_refresh(struct + } + + out: ++ /* ++ * Invalidate the cache and purge the pfn/khva if the refresh failed. ++ * Some/all of the uhva, gpa, and memslot generation info may still be ++ * valid, leave it as is. ++ */ ++ if (ret) { ++ gpc->valid = false; ++ gpc->pfn = KVM_PFN_ERR_FAULT; ++ gpc->khva = NULL; ++ } ++ ++ /* Snapshot the new pfn before dropping the lock! */ ++ new_pfn = gpc->pfn; ++ + write_unlock_irq(&gpc->lock); + + mutex_unlock(&gpc->refresh_lock); + +- gpc_release_pfn_and_khva(kvm, old_pfn, old_khva); ++ if (old_pfn != new_pfn) ++ gpc_release_pfn_and_khva(kvm, old_pfn, old_khva); + + return ret; + } diff --git a/queue-5.18/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch b/queue-5.18/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch new file mode 100644 index 00000000000..74131495b6d --- /dev/null +++ b/queue-5.18/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch @@ -0,0 +1,105 @@ +From 93984f19e7bce4c18084a6ef3dacafb155b806ed Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 29 Apr 2022 21:00:23 +0000 +Subject: KVM: Fully serialize gfn=>pfn cache refresh via mutex + +From: Sean Christopherson + +commit 93984f19e7bce4c18084a6ef3dacafb155b806ed upstream. + +Protect gfn=>pfn cache refresh with a mutex to fully serialize refreshes. +The refresh logic doesn't protect against + +- concurrent unmaps, or refreshes with different GPAs (which may or may not + happen in practice, for example if a cache is only used under vcpu->mutex; + but it's allowed in the code) + +- a false negative on the memslot generation. If the first refresh sees + a stale memslot generation, it will refresh the hva and generation before + moving on to the hva=>pfn translation. If it then drops gpc->lock, a + different user of the cache can come along, acquire gpc->lock, see that + the memslot generation is fresh, and skip the hva=>pfn update due to the + userspace address also matching (because it too was updated). + +The refresh path can already sleep during hva=>pfn resolution, so wrap +the refresh with a mutex to ensure that any given refresh runs to +completion before other callers can start their refresh. + +Cc: stable@vger.kernel.org +Cc: Lai Jiangshan +Signed-off-by: Sean Christopherson +Message-Id: <20220429210025.3293691-7-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/kvm_types.h | 2 ++ + virt/kvm/pfncache.c | 12 ++++++++++++ + 2 files changed, 14 insertions(+) + +--- a/include/linux/kvm_types.h ++++ b/include/linux/kvm_types.h +@@ -19,6 +19,7 @@ struct kvm_memslots; + enum kvm_mr_change; + + #include ++#include + #include + #include + +@@ -69,6 +70,7 @@ struct gfn_to_pfn_cache { + struct kvm_vcpu *vcpu; + struct list_head list; + rwlock_t lock; ++ struct mutex refresh_lock; + void *khva; + kvm_pfn_t pfn; + enum pfn_cache_usage usage; +--- a/virt/kvm/pfncache.c ++++ b/virt/kvm/pfncache.c +@@ -157,6 +157,13 @@ int kvm_gfn_to_pfn_cache_refresh(struct + if (page_offset + len > PAGE_SIZE) + return -EINVAL; + ++ /* ++ * If another task is refreshing the cache, wait for it to complete. ++ * There is no guarantee that concurrent refreshes will see the same ++ * gpa, memslots generation, etc..., so they must be fully serialized. ++ */ ++ mutex_lock(&gpc->refresh_lock); ++ + write_lock_irq(&gpc->lock); + + old_pfn = gpc->pfn; +@@ -250,6 +257,8 @@ int kvm_gfn_to_pfn_cache_refresh(struct + out: + write_unlock_irq(&gpc->lock); + ++ mutex_unlock(&gpc->refresh_lock); ++ + gpc_release_pfn_and_khva(kvm, old_pfn, old_khva); + + return ret; +@@ -261,6 +270,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct k + void *old_khva; + kvm_pfn_t old_pfn; + ++ mutex_lock(&gpc->refresh_lock); + write_lock_irq(&gpc->lock); + + gpc->valid = false; +@@ -276,6 +286,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct k + gpc->pfn = KVM_PFN_ERR_FAULT; + + write_unlock_irq(&gpc->lock); ++ mutex_unlock(&gpc->refresh_lock); + + gpc_release_pfn_and_khva(kvm, old_pfn, old_khva); + } +@@ -290,6 +301,7 @@ int kvm_gfn_to_pfn_cache_init(struct kvm + + if (!gpc->active) { + rwlock_init(&gpc->lock); ++ mutex_init(&gpc->refresh_lock); + + gpc->khva = NULL; + gpc->pfn = KVM_PFN_ERR_FAULT; diff --git a/queue-5.18/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch b/queue-5.18/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch new file mode 100644 index 00000000000..a551acd5d1a --- /dev/null +++ b/queue-5.18/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch @@ -0,0 +1,45 @@ +From ca58f3aa53d165afe4ab74c755bc2f6d168617ac Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 7 Jun 2022 21:35:51 +0000 +Subject: KVM: nVMX: Account for KVM reserved CR4 bits in consistency checks + +From: Sean Christopherson + +commit ca58f3aa53d165afe4ab74c755bc2f6d168617ac upstream. + +Check that the guest (L2) and host (L1) CR4 values that would be loaded +by nested VM-Enter and VM-Exit respectively are valid with respect to +KVM's (L0 host) allowed CR4 bits. Failure to check KVM reserved bits +would allow L1 to load an illegal CR4 (or trigger hardware VM-Fail or +failed VM-Entry) by massaging guest CPUID to allow features that are not +supported by KVM. Amusingly, KVM itself is an accomplice in its doom, as +KVM adjusts L1's MSR_IA32_VMX_CR4_FIXED1 to allow L1 to enable bits for +L2 based on L1's CPUID model. + +Note, although nested_{guest,host}_cr4_valid() are _currently_ used if +and only if the vCPU is post-VMXON (nested.vmxon == true), that may not +be true in the future, e.g. emulating VMXON has a bug where it doesn't +check the allowed/required CR0/CR4 bits. + +Cc: stable@vger.kernel.org +Fixes: 3899152ccbf4 ("KVM: nVMX: fix checks on CR{0,4} during virtual VMX operation") +Signed-off-by: Sean Christopherson +Message-Id: <20220607213604.3346000-3-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kvm/vmx/nested.h ++++ b/arch/x86/kvm/vmx/nested.h +@@ -281,7 +281,8 @@ static inline bool nested_cr4_valid(stru + u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0; + u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1; + +- return fixed_bits_valid(val, fixed0, fixed1); ++ return fixed_bits_valid(val, fixed0, fixed1) && ++ __kvm_is_valid_cr4(vcpu, val); + } + + /* No difference in the restrictions on guest and host CR4 in VMX operation. */ diff --git a/queue-5.18/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch b/queue-5.18/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch new file mode 100644 index 00000000000..44beeb6f3b1 --- /dev/null +++ b/queue-5.18/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch @@ -0,0 +1,75 @@ +From c7d855c2aff2d511fd60ee2e356134c4fb394799 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 7 Jun 2022 21:35:52 +0000 +Subject: KVM: nVMX: Inject #UD if VMXON is attempted with incompatible CR0/CR4 + +From: Sean Christopherson + +commit c7d855c2aff2d511fd60ee2e356134c4fb394799 upstream. + +Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed +per the associated nested VMX MSRs' fixed0/1 settings. KVM cannot rely +on hardware to perform the checks, even for the few checks that have +higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in +hardware while running the guest, (b) there may incompatible CR0/CR4 bits +that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may +have further restricted the allowed CR0/CR4 values by manipulating the +guest's nested VMX MSRs. + +Note, despite a very strong desire to throw shade at Jim, commit +70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks") +is not to blame for the buggy behavior (though the comment...). That +commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks +(though it did erroneously drop the CPL check, but that has already been +remedied). KVM may force CR0.PE=1, but will do so only when also +forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033 +Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF") +Reported-by: Eric Li +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220607213604.3346000-4-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4973,20 +4973,25 @@ static int handle_vmon(struct kvm_vcpu * + | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; + + /* +- * The Intel VMX Instruction Reference lists a bunch of bits that are +- * prerequisite to running VMXON, most notably cr4.VMXE must be set to +- * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this). +- * Otherwise, we should fail with #UD. But most faulting conditions +- * have already been checked by hardware, prior to the VM-exit for +- * VMXON. We do test guest cr4.VMXE because processor CR4 always has +- * that bit set to 1 in non-root mode. ++ * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks ++ * that have higher priority than VM-Exit (see Intel SDM's pseudocode ++ * for VMXON), as KVM must load valid CR0/CR4 values into hardware while ++ * running the guest, i.e. KVM needs to check the _guest_ values. ++ * ++ * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and ++ * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real ++ * Mode, but KVM will never take the guest out of those modes. + */ +- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { ++ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || ++ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + +- /* CPL=0 must be checked manually. */ ++ /* ++ * CPL=0 and all other checks that are lower priority than VM-Exit must ++ * be checked manually. ++ */ + if (vmx_get_cpl(vcpu)) { + kvm_inject_gp(vcpu, 0); + return 1; diff --git a/queue-5.18/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch b/queue-5.18/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch new file mode 100644 index 00000000000..f34afd0f972 --- /dev/null +++ b/queue-5.18/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch @@ -0,0 +1,175 @@ +From f8ae08f9789ad59d318ea75b570caa454aceda81 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 7 Jun 2022 21:35:54 +0000 +Subject: KVM: nVMX: Let userspace set nVMX MSR to any _host_ supported value + +From: Sean Christopherson + +commit f8ae08f9789ad59d318ea75b570caa454aceda81 upstream. + +Restrict the nVMX MSRs based on KVM's config, not based on the guest's +current config. Using the guest's config to audit the new config +prevents userspace from restoring the original config (KVM's config) if +at any point in the past the guest's config was restricted in any way. + +Fixes: 62cc6b9dc61e ("KVM: nVMX: support restore of VMX capability MSRs") +Cc: stable@vger.kernel.org +Cc: David Matlack +Signed-off-by: Sean Christopherson +Message-Id: <20220607213604.3346000-6-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/vmx/nested.c | 70 ++++++++++++++++++++++++---------------------- + 1 file changed, 37 insertions(+), 33 deletions(-) + +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -1224,7 +1224,7 @@ static int vmx_restore_vmx_basic(struct + BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | + /* reserved */ + BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); +- u64 vmx_basic = vmx->nested.msrs.basic; ++ u64 vmx_basic = vmcs_config.nested.basic; + + if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) + return -EINVAL; +@@ -1247,36 +1247,42 @@ static int vmx_restore_vmx_basic(struct + return 0; + } + +-static int +-vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index, ++ u32 **low, u32 **high) + { +- u64 supported; +- u32 *lowp, *highp; +- + switch (msr_index) { + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: +- lowp = &vmx->nested.msrs.pinbased_ctls_low; +- highp = &vmx->nested.msrs.pinbased_ctls_high; ++ *low = &msrs->pinbased_ctls_low; ++ *high = &msrs->pinbased_ctls_high; + break; + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: +- lowp = &vmx->nested.msrs.procbased_ctls_low; +- highp = &vmx->nested.msrs.procbased_ctls_high; ++ *low = &msrs->procbased_ctls_low; ++ *high = &msrs->procbased_ctls_high; + break; + case MSR_IA32_VMX_TRUE_EXIT_CTLS: +- lowp = &vmx->nested.msrs.exit_ctls_low; +- highp = &vmx->nested.msrs.exit_ctls_high; ++ *low = &msrs->exit_ctls_low; ++ *high = &msrs->exit_ctls_high; + break; + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: +- lowp = &vmx->nested.msrs.entry_ctls_low; +- highp = &vmx->nested.msrs.entry_ctls_high; ++ *low = &msrs->entry_ctls_low; ++ *high = &msrs->entry_ctls_high; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: +- lowp = &vmx->nested.msrs.secondary_ctls_low; +- highp = &vmx->nested.msrs.secondary_ctls_high; ++ *low = &msrs->secondary_ctls_low; ++ *high = &msrs->secondary_ctls_high; + break; + default: + BUG(); + } ++} ++ ++static int ++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++{ ++ u32 *lowp, *highp; ++ u64 supported; ++ ++ vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp); + + supported = vmx_control_msr(*lowp, *highp); + +@@ -1288,6 +1294,7 @@ vmx_restore_control_msr(struct vcpu_vmx + if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32))) + return -EINVAL; + ++ vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp); + *lowp = data; + *highp = data >> 32; + return 0; +@@ -1301,10 +1308,8 @@ static int vmx_restore_vmx_misc(struct v + BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | + /* reserved */ + GENMASK_ULL(13, 9) | BIT_ULL(31); +- u64 vmx_misc; +- +- vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, +- vmx->nested.msrs.misc_high); ++ u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, ++ vmcs_config.nested.misc_high); + + if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) + return -EINVAL; +@@ -1332,10 +1337,8 @@ static int vmx_restore_vmx_misc(struct v + + static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) + { +- u64 vmx_ept_vpid_cap; +- +- vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps, +- vmx->nested.msrs.vpid_caps); ++ u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps, ++ vmcs_config.nested.vpid_caps); + + /* Every bit is either reserved or a feature bit. */ + if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) +@@ -1346,20 +1349,21 @@ static int vmx_restore_vmx_ept_vpid_cap( + return 0; + } + +-static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index) + { +- u64 *msr; +- + switch (msr_index) { + case MSR_IA32_VMX_CR0_FIXED0: +- msr = &vmx->nested.msrs.cr0_fixed0; +- break; ++ return &msrs->cr0_fixed0; + case MSR_IA32_VMX_CR4_FIXED0: +- msr = &vmx->nested.msrs.cr4_fixed0; +- break; ++ return &msrs->cr4_fixed0; + default: + BUG(); + } ++} ++ ++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) ++{ ++ const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index); + + /* + * 1 bits (which indicates bits which "must-be-1" during VMX operation) +@@ -1368,7 +1372,7 @@ static int vmx_restore_fixed0_msr(struct + if (!is_bitwise_subset(data, *msr, -1ULL)) + return -EINVAL; + +- *msr = data; ++ *vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data; + return 0; + } + +@@ -1429,7 +1433,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcp + vmx->nested.msrs.vmcs_enum = data; + return 0; + case MSR_IA32_VMX_VMFUNC: +- if (data & ~vmx->nested.msrs.vmfunc_controls) ++ if (data & ~vmcs_config.nested.vmfunc_controls) + return -EINVAL; + vmx->nested.msrs.vmfunc_controls = data; + return 0; diff --git a/queue-5.18/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch b/queue-5.18/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch new file mode 100644 index 00000000000..b6b2fb21a14 --- /dev/null +++ b/queue-5.18/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch @@ -0,0 +1,41 @@ +From 3dddf65b4f4c451c345d34ae85bdf1791a746e49 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Fri, 29 Apr 2022 21:00:21 +0000 +Subject: KVM: Put the extra pfn reference when reusing a pfn in the gpc cache + +From: Sean Christopherson + +commit 3dddf65b4f4c451c345d34ae85bdf1791a746e49 upstream. + +Put the struct page reference to pfn acquired by hva_to_pfn() when the +old and new pfns for a gfn=>pfn cache match. The cache already has a +reference via the old/current pfn, and will only put one reference when +the cache is done with the pfn. + +Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220429210025.3293691-5-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/pfncache.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/virt/kvm/pfncache.c ++++ b/virt/kvm/pfncache.c +@@ -206,6 +206,14 @@ int kvm_gfn_to_pfn_cache_refresh(struct + + if (gpc->usage & KVM_HOST_USES_PFN) { + if (new_pfn == old_pfn) { ++ /* ++ * Reuse the existing pfn and khva, but put the ++ * reference acquired hva_to_pfn_retry(); the ++ * cache still holds a reference to the pfn ++ * from the previous refresh. ++ */ ++ gpc_release_pfn_and_khva(kvm, new_pfn, NULL); ++ + new_khva = old_khva; + old_pfn = KVM_PFN_ERR_FAULT; + old_khva = NULL; diff --git a/queue-5.18/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch b/queue-5.18/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch new file mode 100644 index 00000000000..adfe8d75cb3 --- /dev/null +++ b/queue-5.18/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch @@ -0,0 +1,100 @@ +From c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 Mon Sep 17 00:00:00 2001 +From: Nico Boehr +Date: Mon, 18 Jul 2022 15:04:34 +0200 +Subject: KVM: s390: pv: don't present the ecall interrupt twice + +From: Nico Boehr + +commit c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 upstream. + +When the SIGP interpretation facility is present and a VCPU sends an +ecall to another VCPU in enabled wait, the sending VCPU receives a 56 +intercept (partial execution), so KVM can wake up the receiving CPU. +Note that the SIGP interpretation facility will take care of the +interrupt delivery and KVM's only job is to wake the receiving VCPU. + +For PV, the sending VCPU will receive a 108 intercept (pv notify) and +should continue like in the non-PV case, i.e. wake the receiving VCPU. + +For PV and non-PV guests the interrupt delivery will occur through the +SIGP interpretation facility on SIE entry when SIE finds the X bit in +the status field set. + +However, in handle_pv_notification(), there was no special handling for +SIGP, which leads to interrupt injection being requested by KVM for the +next SIE entry. This results in the interrupt being delivered twice: +once by the SIGP interpretation facility and once by KVM through the +IICTL. + +Add the necessary special handling in handle_pv_notification(), similar +to handle_partial_execution(), which simply wakes the receiving VCPU and +leave interrupt delivery to the SIGP interpretation facility. + +In contrast to external calls, emergency calls are not interpreted but +also cause a 108 intercept, which is why we still need to call +handle_instruction() for SIGP orders other than ecall. + +Since kvm_s390_handle_sigp_pei() is now called for all SIGP orders which +cause a 108 intercept - even if they are actually handled by +handle_instruction() - move the tracepoint in kvm_s390_handle_sigp_pei() +to avoid possibly confusing trace messages. + +Signed-off-by: Nico Boehr +Cc: # 5.7 +Fixes: da24a0cc58ed ("KVM: s390: protvirt: Instruction emulation") +Reviewed-by: Claudio Imbrenda +Reviewed-by: Janosch Frank +Reviewed-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20220718130434.73302-1-nrb@linux.ibm.com +Message-Id: <20220718130434.73302-1-nrb@linux.ibm.com> +Signed-off-by: Claudio Imbrenda +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kvm/intercept.c | 15 +++++++++++++++ + arch/s390/kvm/sigp.c | 4 ++-- + 2 files changed, 17 insertions(+), 2 deletions(-) + +--- a/arch/s390/kvm/intercept.c ++++ b/arch/s390/kvm/intercept.c +@@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu + + static int handle_pv_notification(struct kvm_vcpu *vcpu) + { ++ int ret; ++ + if (vcpu->arch.sie_block->ipa == 0xb210) + return handle_pv_spx(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb220) + return handle_pv_sclp(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb9a4) + return handle_pv_uvc(vcpu); ++ if (vcpu->arch.sie_block->ipa >> 8 == 0xae) { ++ /* ++ * Besides external call, other SIGP orders also cause a ++ * 108 (pv notify) intercept. In contrast to external call, ++ * these orders need to be emulated and hence the appropriate ++ * place to handle them is in handle_instruction(). ++ * So first try kvm_s390_handle_sigp_pei() and if that isn't ++ * successful, go on with handle_instruction(). ++ */ ++ ret = kvm_s390_handle_sigp_pei(vcpu); ++ if (!ret) ++ return ret; ++ } + + return handle_instruction(vcpu); + } +--- a/arch/s390/kvm/sigp.c ++++ b/arch/s390/kvm/sigp.c +@@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_ + struct kvm_vcpu *dest_vcpu; + u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); + +- trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); +- + if (order_code == SIGP_EXTERNAL_CALL) { ++ trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); ++ + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); + BUG_ON(dest_vcpu == NULL); + diff --git a/queue-5.18/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch b/queue-5.18/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch new file mode 100644 index 00000000000..de7496e36eb --- /dev/null +++ b/queue-5.18/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch @@ -0,0 +1,66 @@ +From ec6e4d863258d4bfb36d48d5e3ef68140234d688 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Mon, 11 Jul 2022 23:27:48 +0000 +Subject: KVM: x86: Mark TSS busy during LTR emulation _after_ all fault checks + +From: Sean Christopherson + +commit ec6e4d863258d4bfb36d48d5e3ef68140234d688 upstream. + +Wait to mark the TSS as busy during LTR emulation until after all fault +checks for the LTR have passed. Specifically, don't mark the TSS busy if +the new TSS base is non-canonical. + +Opportunistically drop the one-off !seg_desc.PRESENT check for TR as the +only reason for the early check was to avoid marking a !PRESENT TSS as +busy, i.e. the common !PRESENT is now done before setting the busy bit. + +Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR") +Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com +Cc: stable@vger.kernel.org +Cc: Tetsuo Handa +Cc: Hou Wenlong +Signed-off-by: Sean Christopherson +Reviewed-by: Maxim Levitsky +Link: https://lore.kernel.org/r/20220711232750.1092012-2-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -1687,16 +1687,6 @@ static int __load_segment_descriptor(str + case VCPU_SREG_TR: + if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) + goto exception; +- if (!seg_desc.p) { +- err_vec = NP_VECTOR; +- goto exception; +- } +- old_desc = seg_desc; +- seg_desc.type |= 2; /* busy */ +- ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, +- sizeof(seg_desc), &ctxt->exception); +- if (ret != X86EMUL_CONTINUE) +- return ret; + break; + case VCPU_SREG_LDTR: + if (seg_desc.s || seg_desc.type != 2) +@@ -1737,6 +1727,15 @@ static int __load_segment_descriptor(str + ((u64)base3 << 32), ctxt)) + return emulate_gp(ctxt, 0); + } ++ ++ if (seg == VCPU_SREG_TR) { ++ old_desc = seg_desc; ++ seg_desc.type |= 2; /* busy */ ++ ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, ++ sizeof(seg_desc), &ctxt->exception); ++ if (ret != X86EMUL_CONTINUE) ++ return ret; ++ } + load: + ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); + if (desc) diff --git a/queue-5.18/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch b/queue-5.18/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch new file mode 100644 index 00000000000..e1a8cabfb73 --- /dev/null +++ b/queue-5.18/kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch @@ -0,0 +1,41 @@ +From 2626206963ace9e8bf92b6eea5ff78dd674c555c Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Mon, 11 Jul 2022 23:27:49 +0000 +Subject: KVM: x86: Set error code to segment selector on LLDT/LTR non-canonical #GP + +From: Sean Christopherson + +commit 2626206963ace9e8bf92b6eea5ff78dd674c555c upstream. + +When injecting a #GP on LLDT/LTR due to a non-canonical LDT/TSS base, set +the error code to the selector. Intel SDM's says nothing about the #GP, +but AMD's APM explicitly states that both LLDT and LTR set the error code +to the selector, not zero. + +Note, a non-canonical memory operand on LLDT/LTR does generate a #GP(0), +but the KVM code in question is specific to the base from the descriptor. + +Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR") +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Reviewed-by: Maxim Levitsky +Link: https://lore.kernel.org/r/20220711232750.1092012-3-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/emulate.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -1724,8 +1724,8 @@ static int __load_segment_descriptor(str + if (ret != X86EMUL_CONTINUE) + return ret; + if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | +- ((u64)base3 << 32), ctxt)) +- return emulate_gp(ctxt, 0); ++ ((u64)base3 << 32), ctxt)) ++ return emulate_gp(ctxt, err_code); + } + + if (seg == VCPU_SREG_TR) { diff --git a/queue-5.18/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch b/queue-5.18/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch new file mode 100644 index 00000000000..228f1ae41af --- /dev/null +++ b/queue-5.18/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch @@ -0,0 +1,104 @@ +From c33f6f2228fe8517e38941a508e9f905f99ecba9 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Tue, 7 Jun 2022 21:35:50 +0000 +Subject: KVM: x86: Split kvm_is_valid_cr4() and export only the non-vendor bits + +From: Sean Christopherson + +commit c33f6f2228fe8517e38941a508e9f905f99ecba9 upstream. + +Split the common x86 parts of kvm_is_valid_cr4(), i.e. the reserved bits +checks, into a separate helper, __kvm_is_valid_cr4(), and export only the +inner helper to vendor code in order to prevent nested VMX from calling +back into vmx_is_valid_cr4() via kvm_is_valid_cr4(). + +On SVM, this is a nop as SVM doesn't place any additional restrictions on +CR4. + +On VMX, this is also currently a nop, but only because nested VMX is +missing checks on reserved CR4 bits for nested VM-Enter. That bug will +be fixed in a future patch, and could simply use kvm_is_valid_cr4() as-is, +but nVMX has _another_ bug where VMXON emulation doesn't enforce VMX's +restrictions on CR0/CR4. The cleanest and most intuitive way to fix the +VMXON bug is to use nested_host_cr{0,4}_valid(). If the CR4 variant +routes through kvm_is_valid_cr4(), using nested_host_cr4_valid() won't do +the right thing for the VMXON case as vmx_is_valid_cr4() enforces VMX's +restrictions if and only if the vCPU is post-VMXON. + +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220607213604.3346000-2-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/svm/nested.c | 3 ++- + arch/x86/kvm/vmx/vmx.c | 4 ++-- + arch/x86/kvm/x86.c | 12 +++++++++--- + arch/x86/kvm/x86.h | 2 +- + 4 files changed, 14 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/svm/nested.c ++++ b/arch/x86/kvm/svm/nested.c +@@ -292,7 +292,8 @@ static bool __nested_vmcb_check_save(str + return false; + } + +- if (CC(!kvm_is_valid_cr4(vcpu, save->cr4))) ++ /* Note, SVM doesn't have any additional restrictions on CR4. */ ++ if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4))) + return false; + + if (CC(!kvm_valid_efer(vcpu, save->efer))) +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -3230,8 +3230,8 @@ static bool vmx_is_valid_cr4(struct kvm_ + { + /* + * We operate under the default treatment of SMM, so VMX cannot be +- * enabled under SMM. Note, whether or not VMXE is allowed at all is +- * handled by kvm_is_valid_cr4(). ++ * enabled under SMM. Note, whether or not VMXE is allowed at all, ++ * i.e. is a reserved bit, is handled by common x86 code. + */ + if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) + return false; +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1066,7 +1066,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu * + } + EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); + +-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) + { + if (cr4 & cr4_reserved_bits) + return false; +@@ -1074,9 +1074,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *v + if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) + return false; + +- return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4); ++ return true; ++} ++EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4); ++ ++static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ++{ ++ return __kvm_is_valid_cr4(vcpu, cr4) && ++ static_call(kvm_x86_is_valid_cr4)(vcpu, cr4); + } +-EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); + + void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4) + { +--- a/arch/x86/kvm/x86.h ++++ b/arch/x86/kvm/x86.h +@@ -407,7 +407,7 @@ static inline void kvm_machine_check(voi + void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); + void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); + int kvm_spec_ctrl_test_value(u64 value); +-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); ++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); + int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, + struct x86_exception *e); + int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); diff --git a/queue-5.18/series b/queue-5.18/series index e033813c557..28a50517b91 100644 --- a/queue-5.18/series +++ b/queue-5.18/series @@ -23,3 +23,15 @@ hid-wacom-don-t-register-pad_input-for-touch-switch.patch kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_run_pending-case.patch kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_run_pending-case.patch kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-with-gif-0.patch +kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch +kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__release_gpc-helper.patch +kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-in-the-gpc-cache.patch +kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch +kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch +kvm-do-not-incorporate-page-offset-into-gfn-pfn-cache-user-address.patch +kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-non-vendor-bits.patch +kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-supported-value.patch +kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consistency-checks.patch +kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incompatible-cr0-cr4.patch +kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-all-fault-checks.patch +kvm-x86-set-error-code-to-segment-selector-on-lldt-ltr-non-canonical-gp.patch