From 9ee52e640145dfe14375bff999931f444fc4213c Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 12 May 2023 14:10:05 -0400 Subject: [PATCH] Fixes for 6.3 Signed-off-by: Sasha Levin --- ...vm-vmx-make-cr0.wp-a-guest-owned-bit.patch | 118 ++++++++++++++++ ...nload-mmu-roots-when-only-toggling-c.patch | 69 +++++++++ ...-of-kvm_read_cr-_bits-when-testing-b.patch | 66 +++++++++ ...-mmu-avoid-indirect-call-for-get_cr3.patch | 132 ++++++++++++++++++ ...esh-cr0.wp-prior-to-checking-for-emu.patch | 117 ++++++++++++++++ queue-6.3/series | 5 + 6 files changed, 507 insertions(+) create mode 100644 queue-6.3/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch create mode 100644 queue-6.3/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch create mode 100644 queue-6.3/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch create mode 100644 queue-6.3/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch create mode 100644 queue-6.3/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch diff --git a/queue-6.3/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch b/queue-6.3/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch new file mode 100644 index 00000000000..c7ceb0a2b18 --- /dev/null +++ b/queue-6.3/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch @@ -0,0 +1,118 @@ +From 171ce7ae557672ad0c3852111ce013be1bc0caf6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 12 May 2023 15:20:23 +0200 +Subject: KVM: VMX: Make CR0.WP a guest owned bit + +From: Mathias Krause + +[ Upstream commit fb509f76acc8d42bed11bca308404f81c2be856a ] + +Guests like grsecurity that make heavy use of CR0.WP to implement kernel +level W^X will suffer from the implied VMEXITs. + +With EPT there is no need to intercept a guest change of CR0.WP, so +simply make it a guest owned bit if we can do so. + +This implies that a read of a guest's CR0.WP bit might need a VMREAD. +However, the only potentially affected user seems to be kvm_init_mmu() +which is a heavy operation to begin with. But also most callers already +cache the full value of CR0 anyway, so no additional VMREAD is needed. +The only exception is nested_vmx_load_cr3(). + +This change is VMX-specific, as SVM has no such fine grained control +register intercept control. + +Suggested-by: Sean Christopherson +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-7-minipli@grsecurity.net +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/kvm_cache_regs.h | 2 +- + arch/x86/kvm/vmx/nested.c | 4 ++-- + arch/x86/kvm/vmx/vmx.c | 2 +- + arch/x86/kvm/vmx/vmx.h | 18 ++++++++++++++++++ + 4 files changed, 22 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h +index 4c91f626c0580..e50d353b5c1c4 100644 +--- a/arch/x86/kvm/kvm_cache_regs.h ++++ b/arch/x86/kvm/kvm_cache_regs.h +@@ -4,7 +4,7 @@ + + #include + +-#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS ++#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP) + #define KVM_POSSIBLE_CR4_GUEST_BITS \ + (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index 768487611db78..89fa35fba3d86 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4483,7 +4483,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, + * CR0_GUEST_HOST_MASK is already set in the original vmcs01 + * (KVM doesn't change it); + */ +- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmx_set_cr0(vcpu, vmcs12->host_cr0); + + /* Same as above - no reason to call set_cr4_guest_host_mask(). */ +@@ -4634,7 +4634,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) + */ + vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); + +- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); + + vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 64b35223dc3d7..8ead0916e252e 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -4773,7 +4773,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) + /* 22.2.1, 20.8.1 */ + vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); + +- vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); + + set_cr4_guest_host_mask(vmx); +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index 2acdc54bc34b1..423e9d3c9c408 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) + (1 << VCPU_EXREG_EXIT_INFO_1) | \ + (1 << VCPU_EXREG_EXIT_INFO_2)) + ++static inline unsigned long vmx_l1_guest_owned_cr0_bits(void) ++{ ++ unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ ++ /* ++ * CR0.WP needs to be intercepted when KVM is shadowing legacy paging ++ * in order to construct shadow PTEs with the correct protections. ++ * Note! CR0.WP technically can be passed through to the guest if ++ * paging is disabled, but checking CR0.PG would generate a cyclical ++ * dependency of sorts due to forcing the caller to ensure CR0 holds ++ * the correct value prior to determining which CR0 bits can be owned ++ * by L1. Keep it simple and limit the optimization to EPT. ++ */ ++ if (!enable_ept) ++ bits &= ~X86_CR0_WP; ++ return bits; ++} ++ + static __always_inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) + { + return container_of(kvm, struct kvm_vmx, kvm); +-- +2.39.2 + diff --git a/queue-6.3/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch b/queue-6.3/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch new file mode 100644 index 00000000000..c2697b9169f --- /dev/null +++ b/queue-6.3/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch @@ -0,0 +1,69 @@ +From 588288d1cf0c17f55be23bfce919082d005b2743 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 12 May 2023 15:20:21 +0200 +Subject: KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP + enabled + +From: Mathias Krause + +[ Upstream commit 01b31714bd90be2784f7145bf93b7f78f3d081e1 ] + +There is no need to unload the MMU roots with TDP enabled when only +CR0.WP has changed -- the paging structures are still valid, only the +permission bitmap needs to be updated. + +One heavy user of toggling CR0.WP is grsecurity's KERNEXEC feature to +implement kernel W^X. + +The optimization brings a huge performance gain for this case as the +following micro-benchmark running 'ssdd 10 50000' from rt-tests[1] on a +grsecurity L1 VM shows (runtime in seconds, lower is better): + + legacy TDP shadow +kvm-x86/next@d8708b 8.43s 9.45s 70.3s + +patch 5.39s 5.63s 70.2s + +For legacy MMU this is ~36% faster, for TDP MMU even ~40% faster. Also +TDP and legacy MMU now both have a similar runtime which vanishes the +need to disable TDP MMU for grsecurity. + +Shadow MMU sees no measurable difference and is still slow, as expected. + +[1] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git + +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-3-minipli@grsecurity.net +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/x86.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 3d852ce849206..999b2db0737be 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -906,6 +906,18 @@ EXPORT_SYMBOL_GPL(load_pdptrs); + + void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0) + { ++ /* ++ * CR0.WP is incorporated into the MMU role, but only for non-nested, ++ * indirect shadow MMUs. If TDP is enabled, the MMU's metadata needs ++ * to be updated, e.g. so that emulating guest translations does the ++ * right thing, but there's no need to unload the root as CR0.WP ++ * doesn't affect SPTEs. ++ */ ++ if (tdp_enabled && (cr0 ^ old_cr0) == X86_CR0_WP) { ++ kvm_init_mmu(vcpu); ++ return; ++ } ++ + if ((cr0 ^ old_cr0) & X86_CR0_PG) { + kvm_clear_async_pf_completion_queue(vcpu); + kvm_async_pf_hash_reset(vcpu); +-- +2.39.2 + diff --git a/queue-6.3/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch b/queue-6.3/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch new file mode 100644 index 00000000000..b307ff4b8e2 --- /dev/null +++ b/queue-6.3/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch @@ -0,0 +1,66 @@ +From b44c6ca79b915a989e7921ef76d4d0adc05bffe6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 12 May 2023 15:20:22 +0200 +Subject: KVM: x86: Make use of kvm_read_cr*_bits() when testing bits + +From: Mathias Krause + +[ Upstream commit 74cdc836919bf34684ef66f995273f35e2189daf ] + +Make use of the kvm_read_cr{0,4}_bits() helper functions when we only +want to know the state of certain bits instead of the whole register. + +This not only makes the intent cleaner, it also avoids a potential +VMREAD in case the tested bits aren't guest owned. + +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-5-minipli@grsecurity.net +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/pmu.c | 4 ++-- + arch/x86/kvm/vmx/vmx.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c +index 612e6c70ce2e7..f4aa170b5b972 100644 +--- a/arch/x86/kvm/pmu.c ++++ b/arch/x86/kvm/pmu.c +@@ -540,9 +540,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) + if (!pmc) + return 1; + +- if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) && ++ if (!(kvm_read_cr4_bits(vcpu, X86_CR4_PCE)) && + (static_call(kvm_x86_get_cpl)(vcpu) != 0) && +- (kvm_read_cr0(vcpu) & X86_CR0_PE)) ++ (kvm_read_cr0_bits(vcpu, X86_CR0_PE))) + return 1; + + *data = pmc_read_counter(pmc) & mask; +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index dd92361f41b3f..64b35223dc3d7 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -5500,7 +5500,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) + break; + case 3: /* lmsw */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; +- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); ++ trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val)); + kvm_lmsw(vcpu, val); + + return kvm_skip_emulated_instruction(vcpu); +@@ -7558,7 +7558,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) + return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT; + +- if (kvm_read_cr0(vcpu) & X86_CR0_CD) { ++ if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) { + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cache = MTRR_TYPE_WRBACK; + else +-- +2.39.2 + diff --git a/queue-6.3/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch b/queue-6.3/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch new file mode 100644 index 00000000000..96cd1d6a4fd --- /dev/null +++ b/queue-6.3/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch @@ -0,0 +1,132 @@ +From 486a413cb7ee0c99d3a559cbad97cae281706ad9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 12 May 2023 15:20:20 +0200 +Subject: KVM: x86/mmu: Avoid indirect call for get_cr3 + +From: Paolo Bonzini + +[ Upstream commit 2fdcc1b324189b5fb20655baebd40cd82e2bdf0c ] + +Most of the time, calls to get_guest_pgd result in calling +kvm_read_cr3 (the exception is only nested TDP). Hardcode +the default instead of using the get_cr3 function, avoiding +a retpoline if they are enabled. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-2-minipli@grsecurity.net +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause # backport to v6.3.x +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu/mmu.c | 31 ++++++++++++++++++++----------- + arch/x86/kvm/mmu/paging_tmpl.h | 2 +- + 2 files changed, 21 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index c8ebe542c565f..18c0deeaa2ec4 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -242,6 +242,20 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu) + return regs; + } + ++static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu) ++{ ++ return kvm_read_cr3(vcpu); ++} ++ ++static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3) ++ return kvm_read_cr3(vcpu); ++ ++ return mmu->get_guest_pgd(vcpu); ++} ++ + static inline bool kvm_available_flush_tlb_with_range(void) + { + return kvm_x86_ops.tlb_remote_flush_with_range; +@@ -3731,7 +3745,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) + int quadrant, i, r; + hpa_t root; + +- root_pgd = mmu->get_guest_pgd(vcpu); ++ root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu); + root_gfn = root_pgd >> PAGE_SHIFT; + + if (mmu_check_root(vcpu, root_gfn)) +@@ -4181,7 +4195,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + arch.token = alloc_apf_token(vcpu); + arch.gfn = gfn; + arch.direct_map = vcpu->arch.mmu->root_role.direct; +- arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); ++ arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); + + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); +@@ -4200,7 +4214,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) + return; + + if (!vcpu->arch.mmu->root_role.direct && +- work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu)) ++ work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) + return; + + kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); +@@ -4604,11 +4618,6 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) + } + EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); + +-static unsigned long get_cr3(struct kvm_vcpu *vcpu) +-{ +- return kvm_read_cr3(vcpu); +-} +- + static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, + unsigned int access) + { +@@ -5159,7 +5168,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, + context->page_fault = kvm_tdp_page_fault; + context->sync_page = nonpaging_sync_page; + context->invlpg = NULL; +- context->get_guest_pgd = get_cr3; ++ context->get_guest_pgd = get_guest_cr3; + context->get_pdptr = kvm_pdptr_read; + context->inject_page_fault = kvm_inject_page_fault; + +@@ -5309,7 +5318,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu, + + kvm_init_shadow_mmu(vcpu, cpu_role); + +- context->get_guest_pgd = get_cr3; ++ context->get_guest_pgd = get_guest_cr3; + context->get_pdptr = kvm_pdptr_read; + context->inject_page_fault = kvm_inject_page_fault; + } +@@ -5323,7 +5332,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu, + return; + + g_context->cpu_role.as_u64 = new_mode.as_u64; +- g_context->get_guest_pgd = get_cr3; ++ g_context->get_guest_pgd = get_guest_cr3; + g_context->get_pdptr = kvm_pdptr_read; + g_context->inject_page_fault = kvm_inject_page_fault; + +diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h +index 57f0b75c80f9d..2ea2861bbb3c1 100644 +--- a/arch/x86/kvm/mmu/paging_tmpl.h ++++ b/arch/x86/kvm/mmu/paging_tmpl.h +@@ -324,7 +324,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, + trace_kvm_mmu_pagetable_walk(addr, access); + retry_walk: + walker->level = mmu->cpu_role.base.level; +- pte = mmu->get_guest_pgd(vcpu); ++ pte = kvm_mmu_get_guest_pgd(vcpu, mmu); + have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); + + #if PTTYPE == 64 +-- +2.39.2 + diff --git a/queue-6.3/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch b/queue-6.3/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch new file mode 100644 index 00000000000..2450cfb4568 --- /dev/null +++ b/queue-6.3/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch @@ -0,0 +1,117 @@ +From 3c08f16493b5fcd97add715e8b752803ca1914f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 12 May 2023 15:20:24 +0200 +Subject: KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated + permission faults + +From: Sean Christopherson + +[ Upstream commit cf9f4c0eb1699d306e348b1fd0225af7b2c282d3 ] + +Refresh the MMU's snapshot of the vCPU's CR0.WP prior to checking for +permission faults when emulating a guest memory access and CR0.WP may be +guest owned. If the guest toggles only CR0.WP and triggers emulation of +a supervisor write, e.g. when KVM is emulating UMIP, KVM may consume a +stale CR0.WP, i.e. use stale protection bits metadata. + +Note, KVM passes through CR0.WP if and only if EPT is enabled as CR0.WP +is part of the MMU role for legacy shadow paging, and SVM (NPT) doesn't +support per-bit interception controls for CR0. Don't bother checking for +EPT vs. NPT as the "old == new" check will always be true under NPT, i.e. +the only cost is the read of vcpu->arch.cr4 (SVM unconditionally grabs CR0 +from the VMCB on VM-Exit). + +Reported-by: Mathias Krause +Link: https://lkml.kernel.org/r/677169b4-051f-fcae-756b-9a3e1bb9f8fe%40grsecurity.net +Fixes: fb509f76acc8 ("KVM: VMX: Make CR0.WP a guest owned bit") +Tested-by: Mathias Krause +Link: https://lore.kernel.org/r/20230405002608.418442-1-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause # backport to v6.3.x +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.h | 26 +++++++++++++++++++++++++- + arch/x86/kvm/mmu/mmu.c | 15 +++++++++++++++ + 2 files changed, 40 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h +index 168c46fd8dd18..0f38b78ab04b7 100644 +--- a/arch/x86/kvm/mmu.h ++++ b/arch/x86/kvm/mmu.h +@@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, + bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); + int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + u64 fault_address, char *insn, int insn_len); ++void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu); + + int kvm_mmu_load(struct kvm_vcpu *vcpu); + void kvm_mmu_unload(struct kvm_vcpu *vcpu); +@@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu) + vcpu->arch.mmu->root_role.level); + } + ++static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ /* ++ * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e. ++ * @mmu's snapshot of CR0.WP and thus all related paging metadata may ++ * be stale. Refresh CR0.WP and the metadata on-demand when checking ++ * for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing ++ * nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does ++ * need to refresh nested_mmu, a.k.a. the walker used to translate L2 ++ * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP. ++ */ ++ if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu) ++ return; ++ ++ __kvm_mmu_refresh_passthrough_bits(vcpu, mmu); ++} ++ + /* + * Check if a given access (described through the I/D, W/R and U/S bits of a + * page fault error code pfec) causes a permission fault with the given PTE +@@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; + bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; + int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1; +- bool fault = (mmu->permissions[index] >> pte_access) & 1; + u32 errcode = PFERR_PRESENT_MASK; ++ bool fault; ++ ++ kvm_mmu_refresh_passthrough_bits(vcpu, mmu); ++ ++ fault = (mmu->permissions[index] >> pte_access) & 1; + + WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); + if (unlikely(mmu->pkru_mask)) { +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index 18c0deeaa2ec4..d3812de54b02c 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -5121,6 +5121,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs) + return role; + } + ++void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ const bool cr0_wp = !!kvm_read_cr0_bits(vcpu, X86_CR0_WP); ++ ++ BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP); ++ BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS)); ++ ++ if (is_cr0_wp(mmu) == cr0_wp) ++ return; ++ ++ mmu->cpu_role.base.cr0_wp = cr0_wp; ++ reset_guest_paging_metadata(vcpu, mmu); ++} ++ + static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) + { + /* tdp_root_level is architecture forced level, use it if nonzero */ +-- +2.39.2 + diff --git a/queue-6.3/series b/queue-6.3/series index d9f052c23a9..307187505de 100644 --- a/queue-6.3/series +++ b/queue-6.3/series @@ -118,3 +118,8 @@ crypto-engine-fix-crypto_queue-backlog-handling.patch perf-symbols-fix-return-incorrect-build_id-size-in-e.patch perf-tracepoint-fix-memory-leak-in-is_valid_tracepoi.patch perf-stat-separate-bperf-from-bpf_profiler.patch +kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch +kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch +kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch +kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch +kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch -- 2.47.3