Fixes for 6.3

author Sasha Levin <sashal@kernel.org>

Fri, 12 May 2023 18:10:05 +0000 (14:10 -0400)

committer Sasha Levin <sashal@kernel.org>

Fri, 12 May 2023 18:10:05 +0000 (14:10 -0400)
author Sasha Levin <sashal@kernel.org>
Fri, 12 May 2023 18:10:05 +0000 (14:10 -0400)
committer Sasha Levin <sashal@kernel.org>
Fri, 12 May 2023 18:10:05 +0000 (14:10 -0400)
diff --git a/queue-6.3/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch b/queue-6.3/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch

new file mode 100644 (file)

index 0000000..c7ceb0a
--- /dev/null
+++ b/queue-6.3/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch
@@ -0,0 +1,118 @@
+From 171ce7ae557672ad0c3852111ce013be1bc0caf6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 15:20:23 +0200
+Subject: KVM: VMX: Make CR0.WP a guest owned bit
+
+From: Mathias Krause <minipli@grsecurity.net>
+
+[ Upstream commit fb509f76acc8d42bed11bca308404f81c2be856a ]
+
+Guests like grsecurity that make heavy use of CR0.WP to implement kernel
+level W^X will suffer from the implied VMEXITs.
+
+With EPT there is no need to intercept a guest change of CR0.WP, so
+simply make it a guest owned bit if we can do so.
+
+This implies that a read of a guest's CR0.WP bit might need a VMREAD.
+However, the only potentially affected user seems to be kvm_init_mmu()
+which is a heavy operation to begin with. But also most callers already
+cache the full value of CR0 anyway, so no additional VMREAD is needed.
+The only exception is nested_vmx_load_cr3().
+
+This change is VMX-specific, as SVM has no such fine grained control
+register intercept control.
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mathias Krause <minipli@grsecurity.net>
+Link: https://lore.kernel.org/r/20230322013731.102955-7-minipli@grsecurity.net
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mathias Krause <minipli@grsecurity.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/kvm_cache_regs.h |  2 +-
+ arch/x86/kvm/vmx/nested.c     |  4 ++--
+ arch/x86/kvm/vmx/vmx.c        |  2 +-
+ arch/x86/kvm/vmx/vmx.h        | 18 ++++++++++++++++++
+ 4 files changed, 22 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
+index 4c91f626c0580..e50d353b5c1c4 100644
+--- a/arch/x86/kvm/kvm_cache_regs.h
++++ b/arch/x86/kvm/kvm_cache_regs.h
+@@ -4,7 +4,7 @@
+ 
+ #include <linux/kvm_host.h>
+ 
+-#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
++#define KVM_POSSIBLE_CR0_GUEST_BITS   (X86_CR0_TS | X86_CR0_WP)
+ #define KVM_POSSIBLE_CR4_GUEST_BITS                             \
+       (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+        | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 768487611db78..89fa35fba3d86 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4483,7 +4483,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+        * CR0_GUEST_HOST_MASK is already set in the original vmcs01
+        * (KVM doesn't change it);
+        */
+-      vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
++      vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
+       vmx_set_cr0(vcpu, vmcs12->host_cr0);
+ 
+       /* Same as above - no reason to call set_cr4_guest_host_mask().  */
+@@ -4634,7 +4634,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
+        */
+       vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
+ 
+-      vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
++      vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
+       vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
+ 
+       vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 64b35223dc3d7..8ead0916e252e 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -4773,7 +4773,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
+       /* 22.2.1, 20.8.1 */
+       vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
+ 
+-      vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
++      vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
+       vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
+ 
+       set_cr4_guest_host_mask(vmx);
+diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
+index 2acdc54bc34b1..423e9d3c9c408 100644
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
+                               (1 << VCPU_EXREG_EXIT_INFO_1) | \
+                               (1 << VCPU_EXREG_EXIT_INFO_2))
+ 
++static inline unsigned long vmx_l1_guest_owned_cr0_bits(void)
++{
++      unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS;
++
++      /*
++       * CR0.WP needs to be intercepted when KVM is shadowing legacy paging
++       * in order to construct shadow PTEs with the correct protections.
++       * Note!  CR0.WP technically can be passed through to the guest if
++       * paging is disabled, but checking CR0.PG would generate a cyclical
++       * dependency of sorts due to forcing the caller to ensure CR0 holds
++       * the correct value prior to determining which CR0 bits can be owned
++       * by L1.  Keep it simple and limit the optimization to EPT.
++       */
++      if (!enable_ept)
++              bits &= ~X86_CR0_WP;
++      return bits;
++}
++
+ static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
+ {
+       return container_of(kvm, struct kvm_vmx, kvm);
+-- 
+2.39.2
+
diff --git a/queue-6.3/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch b/queue-6.3/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch

new file mode 100644 (file)

index 0000000..c2697b9
--- /dev/null
+++ b/queue-6.3/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch
@@ -0,0 +1,69 @@
+From 588288d1cf0c17f55be23bfce919082d005b2743 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 15:20:21 +0200
+Subject: KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP
+ enabled
+
+From: Mathias Krause <minipli@grsecurity.net>
+
+[ Upstream commit 01b31714bd90be2784f7145bf93b7f78f3d081e1 ]
+
+There is no need to unload the MMU roots with TDP enabled when only
+CR0.WP has changed -- the paging structures are still valid, only the
+permission bitmap needs to be updated.
+
+One heavy user of toggling CR0.WP is grsecurity's KERNEXEC feature to
+implement kernel W^X.
+
+The optimization brings a huge performance gain for this case as the
+following micro-benchmark running 'ssdd 10 50000' from rt-tests[1] on a
+grsecurity L1 VM shows (runtime in seconds, lower is better):
+
+                       legacy     TDP    shadow
+kvm-x86/next@d8708b     8.43s    9.45s    70.3s
+             +patch     5.39s    5.63s    70.2s
+
+For legacy MMU this is ~36% faster, for TDP MMU even ~40% faster. Also
+TDP and legacy MMU now both have a similar runtime which vanishes the
+need to disable TDP MMU for grsecurity.
+
+Shadow MMU sees no measurable difference and is still slow, as expected.
+
+[1] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git
+
+Signed-off-by: Mathias Krause <minipli@grsecurity.net>
+Link: https://lore.kernel.org/r/20230322013731.102955-3-minipli@grsecurity.net
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mathias Krause <minipli@grsecurity.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 3d852ce849206..999b2db0737be 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -906,6 +906,18 @@ EXPORT_SYMBOL_GPL(load_pdptrs);
+ 
+ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
+ {
++      /*
++       * CR0.WP is incorporated into the MMU role, but only for non-nested,
++       * indirect shadow MMUs.  If TDP is enabled, the MMU's metadata needs
++       * to be updated, e.g. so that emulating guest translations does the
++       * right thing, but there's no need to unload the root as CR0.WP
++       * doesn't affect SPTEs.
++       */
++      if (tdp_enabled && (cr0 ^ old_cr0) == X86_CR0_WP) {
++              kvm_init_mmu(vcpu);
++              return;
++      }
++
+       if ((cr0 ^ old_cr0) & X86_CR0_PG) {
+               kvm_clear_async_pf_completion_queue(vcpu);
+               kvm_async_pf_hash_reset(vcpu);
+-- 
+2.39.2
+
diff --git a/queue-6.3/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch b/queue-6.3/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch

new file mode 100644 (file)

index 0000000..b307ff4
--- /dev/null
+++ b/queue-6.3/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch
@@ -0,0 +1,66 @@
+From b44c6ca79b915a989e7921ef76d4d0adc05bffe6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 15:20:22 +0200
+Subject: KVM: x86: Make use of kvm_read_cr*_bits() when testing bits
+
+From: Mathias Krause <minipli@grsecurity.net>
+
+[ Upstream commit 74cdc836919bf34684ef66f995273f35e2189daf ]
+
+Make use of the kvm_read_cr{0,4}_bits() helper functions when we only
+want to know the state of certain bits instead of the whole register.
+
+This not only makes the intent cleaner, it also avoids a potential
+VMREAD in case the tested bits aren't guest owned.
+
+Signed-off-by: Mathias Krause <minipli@grsecurity.net>
+Link: https://lore.kernel.org/r/20230322013731.102955-5-minipli@grsecurity.net
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mathias Krause <minipli@grsecurity.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/pmu.c     | 4 ++--
+ arch/x86/kvm/vmx/vmx.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
+index 612e6c70ce2e7..f4aa170b5b972 100644
+--- a/arch/x86/kvm/pmu.c
++++ b/arch/x86/kvm/pmu.c
+@@ -540,9 +540,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
+       if (!pmc)
+               return 1;
+ 
+-      if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
++      if (!(kvm_read_cr4_bits(vcpu, X86_CR4_PCE)) &&
+           (static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
+-          (kvm_read_cr0(vcpu) & X86_CR0_PE))
++          (kvm_read_cr0_bits(vcpu, X86_CR0_PE)))
+               return 1;
+ 
+       *data = pmc_read_counter(pmc) & mask;
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index dd92361f41b3f..64b35223dc3d7 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -5500,7 +5500,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
+               break;
+       case 3: /* lmsw */
+               val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
+-              trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
++              trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val));
+               kvm_lmsw(vcpu, val);
+ 
+               return kvm_skip_emulated_instruction(vcpu);
+@@ -7558,7 +7558,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+       if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
+               return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
+ 
+-      if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
++      if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
+               if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+                       cache = MTRR_TYPE_WRBACK;
+               else
+-- 
+2.39.2
+
diff --git a/queue-6.3/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch b/queue-6.3/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch

new file mode 100644 (file)

index 0000000..96cd1d6
--- /dev/null
+++ b/queue-6.3/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch
@@ -0,0 +1,132 @@
+From 486a413cb7ee0c99d3a559cbad97cae281706ad9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 15:20:20 +0200
+Subject: KVM: x86/mmu: Avoid indirect call for get_cr3
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 2fdcc1b324189b5fb20655baebd40cd82e2bdf0c ]
+
+Most of the time, calls to get_guest_pgd result in calling
+kvm_read_cr3 (the exception is only nested TDP).  Hardcode
+the default instead of using the get_cr3 function, avoiding
+a retpoline if they are enabled.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Mathias Krause <minipli@grsecurity.net>
+Link: https://lore.kernel.org/r/20230322013731.102955-2-minipli@grsecurity.net
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mathias Krause <minipli@grsecurity.net> # backport to v6.3.x
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c         | 31 ++++++++++++++++++++-----------
+ arch/x86/kvm/mmu/paging_tmpl.h |  2 +-
+ 2 files changed, 21 insertions(+), 12 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index c8ebe542c565f..18c0deeaa2ec4 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -242,6 +242,20 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
+       return regs;
+ }
+ 
++static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu)
++{
++      return kvm_read_cr3(vcpu);
++}
++
++static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu,
++                                                struct kvm_mmu *mmu)
++{
++      if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3)
++              return kvm_read_cr3(vcpu);
++
++      return mmu->get_guest_pgd(vcpu);
++}
++
+ static inline bool kvm_available_flush_tlb_with_range(void)
+ {
+       return kvm_x86_ops.tlb_remote_flush_with_range;
+@@ -3731,7 +3745,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
+       int quadrant, i, r;
+       hpa_t root;
+ 
+-      root_pgd = mmu->get_guest_pgd(vcpu);
++      root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
+       root_gfn = root_pgd >> PAGE_SHIFT;
+ 
+       if (mmu_check_root(vcpu, root_gfn))
+@@ -4181,7 +4195,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+       arch.token = alloc_apf_token(vcpu);
+       arch.gfn = gfn;
+       arch.direct_map = vcpu->arch.mmu->root_role.direct;
+-      arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu);
++      arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
+ 
+       return kvm_setup_async_pf(vcpu, cr2_or_gpa,
+                                 kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
+@@ -4200,7 +4214,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
+               return;
+ 
+       if (!vcpu->arch.mmu->root_role.direct &&
+-            work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
++            work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
+               return;
+ 
+       kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
+@@ -4604,11 +4618,6 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
+ 
+-static unsigned long get_cr3(struct kvm_vcpu *vcpu)
+-{
+-      return kvm_read_cr3(vcpu);
+-}
+-
+ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
+                          unsigned int access)
+ {
+@@ -5159,7 +5168,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu,
+       context->page_fault = kvm_tdp_page_fault;
+       context->sync_page = nonpaging_sync_page;
+       context->invlpg = NULL;
+-      context->get_guest_pgd = get_cr3;
++      context->get_guest_pgd = get_guest_cr3;
+       context->get_pdptr = kvm_pdptr_read;
+       context->inject_page_fault = kvm_inject_page_fault;
+ 
+@@ -5309,7 +5318,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu,
+ 
+       kvm_init_shadow_mmu(vcpu, cpu_role);
+ 
+-      context->get_guest_pgd     = get_cr3;
++      context->get_guest_pgd     = get_guest_cr3;
+       context->get_pdptr         = kvm_pdptr_read;
+       context->inject_page_fault = kvm_inject_page_fault;
+ }
+@@ -5323,7 +5332,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu,
+               return;
+ 
+       g_context->cpu_role.as_u64   = new_mode.as_u64;
+-      g_context->get_guest_pgd     = get_cr3;
++      g_context->get_guest_pgd     = get_guest_cr3;
+       g_context->get_pdptr         = kvm_pdptr_read;
+       g_context->inject_page_fault = kvm_inject_page_fault;
+ 
+diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
+index 57f0b75c80f9d..2ea2861bbb3c1 100644
+--- a/arch/x86/kvm/mmu/paging_tmpl.h
++++ b/arch/x86/kvm/mmu/paging_tmpl.h
+@@ -324,7 +324,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
+       trace_kvm_mmu_pagetable_walk(addr, access);
+ retry_walk:
+       walker->level = mmu->cpu_role.base.level;
+-      pte           = mmu->get_guest_pgd(vcpu);
++      pte           = kvm_mmu_get_guest_pgd(vcpu, mmu);
+       have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
+ 
+ #if PTTYPE == 64
+-- 
+2.39.2
+
diff --git a/queue-6.3/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch b/queue-6.3/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch

new file mode 100644 (file)

index 0000000..2450cfb
--- /dev/null
+++ b/queue-6.3/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch
@@ -0,0 +1,117 @@
+From 3c08f16493b5fcd97add715e8b752803ca1914f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 15:20:24 +0200
+Subject: KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated
+ permission faults
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit cf9f4c0eb1699d306e348b1fd0225af7b2c282d3 ]
+
+Refresh the MMU's snapshot of the vCPU's CR0.WP prior to checking for
+permission faults when emulating a guest memory access and CR0.WP may be
+guest owned.  If the guest toggles only CR0.WP and triggers emulation of
+a supervisor write, e.g. when KVM is emulating UMIP, KVM may consume a
+stale CR0.WP, i.e. use stale protection bits metadata.
+
+Note, KVM passes through CR0.WP if and only if EPT is enabled as CR0.WP
+is part of the MMU role for legacy shadow paging, and SVM (NPT) doesn't
+support per-bit interception controls for CR0.  Don't bother checking for
+EPT vs. NPT as the "old == new" check will always be true under NPT, i.e.
+the only cost is the read of vcpu->arch.cr4 (SVM unconditionally grabs CR0
+from the VMCB on VM-Exit).
+
+Reported-by: Mathias Krause <minipli@grsecurity.net>
+Link: https://lkml.kernel.org/r/677169b4-051f-fcae-756b-9a3e1bb9f8fe%40grsecurity.net
+Fixes: fb509f76acc8 ("KVM: VMX: Make CR0.WP a guest owned bit")
+Tested-by: Mathias Krause <minipli@grsecurity.net>
+Link: https://lore.kernel.org/r/20230405002608.418442-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Mathias Krause <minipli@grsecurity.net> # backport to v6.3.x
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu.h     | 26 +++++++++++++++++++++++++-
+ arch/x86/kvm/mmu/mmu.c | 15 +++++++++++++++
+ 2 files changed, 40 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
+index 168c46fd8dd18..0f38b78ab04b7 100644
+--- a/arch/x86/kvm/mmu.h
++++ b/arch/x86/kvm/mmu.h
+@@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
+ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+                               u64 fault_address, char *insn, int insn_len);
++void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
++                                      struct kvm_mmu *mmu);
+ 
+ int kvm_mmu_load(struct kvm_vcpu *vcpu);
+ void kvm_mmu_unload(struct kvm_vcpu *vcpu);
+@@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
+                                         vcpu->arch.mmu->root_role.level);
+ }
+ 
++static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
++                                                  struct kvm_mmu *mmu)
++{
++      /*
++       * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e.
++       * @mmu's snapshot of CR0.WP and thus all related paging metadata may
++       * be stale.  Refresh CR0.WP and the metadata on-demand when checking
++       * for permission faults.  Exempt nested MMUs, i.e. MMUs for shadowing
++       * nEPT and nNPT, as CR0.WP is ignored in both cases.  Note, KVM does
++       * need to refresh nested_mmu, a.k.a. the walker used to translate L2
++       * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP.
++       */
++      if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu)
++              return;
++
++      __kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
++}
++
+ /*
+  * Check if a given access (described through the I/D, W/R and U/S bits of a
+  * page fault error code pfec) causes a permission fault with the given PTE
+@@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+       u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
+       bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
+       int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1;
+-      bool fault = (mmu->permissions[index] >> pte_access) & 1;
+       u32 errcode = PFERR_PRESENT_MASK;
++      bool fault;
++
++      kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
++
++      fault = (mmu->permissions[index] >> pte_access) & 1;
+ 
+       WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
+       if (unlikely(mmu->pkru_mask)) {
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 18c0deeaa2ec4..d3812de54b02c 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5121,6 +5121,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
+       return role;
+ }
+ 
++void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
++                                      struct kvm_mmu *mmu)
++{
++      const bool cr0_wp = !!kvm_read_cr0_bits(vcpu, X86_CR0_WP);
++
++      BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP);
++      BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS));
++
++      if (is_cr0_wp(mmu) == cr0_wp)
++              return;
++
++      mmu->cpu_role.base.cr0_wp = cr0_wp;
++      reset_guest_paging_metadata(vcpu, mmu);
++}
++
+ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
+ {
+       /* tdp_root_level is architecture forced level, use it if nonzero */
+-- 
+2.39.2
+
diff --git a/queue-6.3/series b/queue-6.3/series

index d9f052c23a9ab67b8e6ddd6432eab510b0e1f93f..307187505de9f69b0af88a2177106facfec144c0 100644 (file)
--- a/queue-6.3/series
+++ b/queue-6.3/series
@@ -118,3 +118,8 @@ crypto-engine-fix-crypto_queue-backlog-handling.patch
  perf-symbols-fix-return-incorrect-build_id-size-in-e.patch
  perf-tracepoint-fix-memory-leak-in-is_valid_tracepoi.patch
  perf-stat-separate-bperf-from-bpf_profiler.patch
+kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch
+kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch
+kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch
+kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch
+kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch
author	Sasha Levin <sashal@kernel.org>
	Fri, 12 May 2023 18:10:05 +0000 (14:10 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Fri, 12 May 2023 18:10:05 +0000 (14:10 -0400)
queue-6.3/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch	[new file with mode: 0644]	patch \| blob
queue-6.3/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch	[new file with mode: 0644]	patch \| blob
queue-6.3/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch	[new file with mode: 0644]	patch \| blob
queue-6.3/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch	[new file with mode: 0644]	patch \| blob
queue-6.3/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch	[new file with mode: 0644]	patch \| blob
queue-6.3/series		patch \| blob \| blame \| history