From: Sasha Levin Date: Fri, 12 May 2023 01:49:26 +0000 (-0400) Subject: Fixes for 6.2 X-Git-Tag: v4.14.315~113 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=de34a66790dc5ecff64f9bbc7f9785f6e89c1a7d;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.2 Signed-off-by: Sasha Levin --- diff --git a/queue-6.2/drm-amd-display-ext-displays-with-dock-can-t-recogni.patch b/queue-6.2/drm-amd-display-ext-displays-with-dock-can-t-recogni.patch new file mode 100644 index 00000000000..700e0326317 --- /dev/null +++ b/queue-6.2/drm-amd-display-ext-displays-with-dock-can-t-recogni.patch @@ -0,0 +1,56 @@ +From f3268add4fccab9378ce7da01d29c3966232de94 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 11:01:20 -0500 +Subject: drm/amd/display: Ext displays with dock can't recognized after resume + +From: Ryan Lin + +[ Upstream commit 1e5d4d8eb8c0f15d90c50e7abd686c980e54e42e ] + +[Why] +Needs to set the default value of the LTTPR timeout after resume. + +[How] +Set the default (3.2ms) timeout at resuming if the sink supports +LTTPR + +Reviewed-by: Jerry Zuo +Acked-by: Qingqing Zhuo +Signed-off-by: Ryan Lin +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index 422909d1f352b..58fdd39f5bde9 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -39,6 +39,7 @@ + #include "dc/dc_edid_parser.h" + #include "dc/dc_stat.h" + #include "amdgpu_dm_trace.h" ++#include "dc/inc/dc_link_ddc.h" + + #include "vid.h" + #include "amdgpu.h" +@@ -2262,6 +2263,14 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) + if (suspend) { + drm_dp_mst_topology_mgr_suspend(mgr); + } else { ++ /* if extended timeout is supported in hardware, ++ * default to LTTPR timeout (3.2ms) first as a W/A for DP link layer ++ * CTS 4.2.1.1 regression introduced by CTS specs requirement update. ++ */ ++ dc_link_aux_try_to_configure_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD); ++ if (!dp_is_lttpr_present(aconnector->dc_link)) ++ dc_link_aux_try_to_configure_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD); ++ + ret = drm_dp_mst_topology_mgr_resume(mgr, true); + if (ret < 0) { + dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, +-- +2.39.2 + diff --git a/queue-6.2/fs-ntfs3-fix-null-ptr-deref-on-inode-i_op-in-ntfs_lo.patch b/queue-6.2/fs-ntfs3-fix-null-ptr-deref-on-inode-i_op-in-ntfs_lo.patch new file mode 100644 index 00000000000..e4df9984780 --- /dev/null +++ b/queue-6.2/fs-ntfs3-fix-null-ptr-deref-on-inode-i_op-in-ntfs_lo.patch @@ -0,0 +1,80 @@ +From 5b9f7040ee3d447a1593b0286df26452bc188d6f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Nov 2022 10:21:59 +0000 +Subject: fs/ntfs3: Fix null-ptr-deref on inode->i_op in ntfs_lookup() + +From: ZhangPeng + +[ Upstream commit 254e69f284d7270e0abdc023ee53b71401c3ba0c ] + +Syzbot reported a null-ptr-deref bug: + +ntfs3: loop0: Different NTFS' sector size (1024) and media sector size +(512) +ntfs3: loop0: Mark volume as dirty due to NTFS errors +general protection fault, probably for non-canonical address +0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN +KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] +RIP: 0010:d_flags_for_inode fs/dcache.c:1980 [inline] +RIP: 0010:__d_add+0x5ce/0x800 fs/dcache.c:2796 +Call Trace: + + d_splice_alias+0x122/0x3b0 fs/dcache.c:3191 + lookup_open fs/namei.c:3391 [inline] + open_last_lookups fs/namei.c:3481 [inline] + path_openat+0x10e6/0x2df0 fs/namei.c:3688 + do_filp_open+0x264/0x4f0 fs/namei.c:3718 + do_sys_openat2+0x124/0x4e0 fs/open.c:1310 + do_sys_open fs/open.c:1326 [inline] + __do_sys_open fs/open.c:1334 [inline] + __se_sys_open fs/open.c:1330 [inline] + __x64_sys_open+0x221/0x270 fs/open.c:1330 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +If the MFT record of ntfs inode is not a base record, inode->i_op can be +NULL. And a null-ptr-deref may happen: + +ntfs_lookup() + dir_search_u() # inode->i_op is set to NULL + d_splice_alias() + __d_add() + d_flags_for_inode() # inode->i_op->get_link null-ptr-deref + +Fix this by adding a Check on inode->i_op before calling the +d_splice_alias() function. + +Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") +Reported-by: syzbot+a8f26a403c169b7593fe@syzkaller.appspotmail.com +Signed-off-by: ZhangPeng +Signed-off-by: Konstantin Komarov +Signed-off-by: Sasha Levin +--- + fs/ntfs3/namei.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c +index c8db35e2ae172..3db34d5c03dc7 100644 +--- a/fs/ntfs3/namei.c ++++ b/fs/ntfs3/namei.c +@@ -88,6 +88,16 @@ static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *dentry, + __putname(uni); + } + ++ /* ++ * Check for a null pointer ++ * If the MFT record of ntfs inode is not a base record, inode->i_op can be NULL. ++ * This causes null pointer dereference in d_splice_alias(). ++ */ ++ if (!IS_ERR(inode) && inode->i_op == NULL) { ++ iput(inode); ++ inode = ERR_PTR(-EINVAL); ++ } ++ + return d_splice_alias(inode, dentry); + } + +-- +2.39.2 + diff --git a/queue-6.2/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch b/queue-6.2/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch new file mode 100644 index 00000000000..2c4c5ca7728 --- /dev/null +++ b/queue-6.2/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch @@ -0,0 +1,118 @@ +From ea23ff815e83586314e4d979608f810c20879f54 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:44:56 +0200 +Subject: KVM: VMX: Make CR0.WP a guest owned bit + +From: Mathias Krause + +[ Upstream commit fb509f76acc8d42bed11bca308404f81c2be856a ] + +Guests like grsecurity that make heavy use of CR0.WP to implement kernel +level W^X will suffer from the implied VMEXITs. + +With EPT there is no need to intercept a guest change of CR0.WP, so +simply make it a guest owned bit if we can do so. + +This implies that a read of a guest's CR0.WP bit might need a VMREAD. +However, the only potentially affected user seems to be kvm_init_mmu() +which is a heavy operation to begin with. But also most callers already +cache the full value of CR0 anyway, so no additional VMREAD is needed. +The only exception is nested_vmx_load_cr3(). + +This change is VMX-specific, as SVM has no such fine grained control +register intercept control. + +Suggested-by: Sean Christopherson +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-7-minipli@grsecurity.net +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause # backport to v6.2.x +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/kvm_cache_regs.h | 2 +- + arch/x86/kvm/vmx/nested.c | 4 ++-- + arch/x86/kvm/vmx/vmx.c | 2 +- + arch/x86/kvm/vmx/vmx.h | 18 ++++++++++++++++++ + 4 files changed, 22 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h +index c09174f73a344..451697a96cf33 100644 +--- a/arch/x86/kvm/kvm_cache_regs.h ++++ b/arch/x86/kvm/kvm_cache_regs.h +@@ -4,7 +4,7 @@ + + #include + +-#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS ++#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP) + #define KVM_POSSIBLE_CR4_GUEST_BITS \ + (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index b7f2e59d50ee4..579ceaf75dde7 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4488,7 +4488,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, + * CR0_GUEST_HOST_MASK is already set in the original vmcs01 + * (KVM doesn't change it); + */ +- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmx_set_cr0(vcpu, vmcs12->host_cr0); + + /* Same as above - no reason to call set_cr4_guest_host_mask(). */ +@@ -4639,7 +4639,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) + */ + vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); + +- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); + + vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index ca8eca4ec0e38..57a73954980ac 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -4728,7 +4728,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) + /* 22.2.1, 20.8.1 */ + vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); + +- vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); + + set_cr4_guest_host_mask(vmx); +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index a3da84f4ea456..e2b04f4c0fef3 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) + (1 << VCPU_EXREG_EXIT_INFO_1) | \ + (1 << VCPU_EXREG_EXIT_INFO_2)) + ++static inline unsigned long vmx_l1_guest_owned_cr0_bits(void) ++{ ++ unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ ++ /* ++ * CR0.WP needs to be intercepted when KVM is shadowing legacy paging ++ * in order to construct shadow PTEs with the correct protections. ++ * Note! CR0.WP technically can be passed through to the guest if ++ * paging is disabled, but checking CR0.PG would generate a cyclical ++ * dependency of sorts due to forcing the caller to ensure CR0 holds ++ * the correct value prior to determining which CR0 bits can be owned ++ * by L1. Keep it simple and limit the optimization to EPT. ++ */ ++ if (!enable_ept) ++ bits &= ~X86_CR0_WP; ++ return bits; ++} ++ + static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) + { + return container_of(kvm, struct kvm_vmx, kvm); +-- +2.39.2 + diff --git a/queue-6.2/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch b/queue-6.2/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch new file mode 100644 index 00000000000..3b3b545ab23 --- /dev/null +++ b/queue-6.2/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch @@ -0,0 +1,69 @@ +From 628f4da5f3fb14501382f5e51453a5f3827eed5e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:44:54 +0200 +Subject: KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP + enabled + +From: Mathias Krause + +[ Upstream commit 01b31714bd90be2784f7145bf93b7f78f3d081e1 ] + +There is no need to unload the MMU roots with TDP enabled when only +CR0.WP has changed -- the paging structures are still valid, only the +permission bitmap needs to be updated. + +One heavy user of toggling CR0.WP is grsecurity's KERNEXEC feature to +implement kernel W^X. + +The optimization brings a huge performance gain for this case as the +following micro-benchmark running 'ssdd 10 50000' from rt-tests[1] on a +grsecurity L1 VM shows (runtime in seconds, lower is better): + + legacy TDP shadow +kvm-x86/next@d8708b 8.43s 9.45s 70.3s + +patch 5.39s 5.63s 70.2s + +For legacy MMU this is ~36% faster, for TDP MMU even ~40% faster. Also +TDP and legacy MMU now both have a similar runtime which vanishes the +need to disable TDP MMU for grsecurity. + +Shadow MMU sees no measurable difference and is still slow, as expected. + +[1] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git + +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-3-minipli@grsecurity.net +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/x86.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 2d76c254582b0..35cd87a326ace 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -904,6 +904,18 @@ EXPORT_SYMBOL_GPL(load_pdptrs); + + void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0) + { ++ /* ++ * CR0.WP is incorporated into the MMU role, but only for non-nested, ++ * indirect shadow MMUs. If TDP is enabled, the MMU's metadata needs ++ * to be updated, e.g. so that emulating guest translations does the ++ * right thing, but there's no need to unload the root as CR0.WP ++ * doesn't affect SPTEs. ++ */ ++ if (tdp_enabled && (cr0 ^ old_cr0) == X86_CR0_WP) { ++ kvm_init_mmu(vcpu); ++ return; ++ } ++ + if ((cr0 ^ old_cr0) & X86_CR0_PG) { + kvm_clear_async_pf_completion_queue(vcpu); + kvm_async_pf_hash_reset(vcpu); +-- +2.39.2 + diff --git a/queue-6.2/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch b/queue-6.2/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch new file mode 100644 index 00000000000..b2b3ac1985b --- /dev/null +++ b/queue-6.2/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch @@ -0,0 +1,66 @@ +From 1250e61963ca7e1dc6781db9d67d739ad344d329 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:44:55 +0200 +Subject: KVM: x86: Make use of kvm_read_cr*_bits() when testing bits + +From: Mathias Krause + +[ Upstream commit 74cdc836919bf34684ef66f995273f35e2189daf ] + +Make use of the kvm_read_cr{0,4}_bits() helper functions when we only +want to know the state of certain bits instead of the whole register. + +This not only makes the intent cleaner, it also avoids a potential +VMREAD in case the tested bits aren't guest owned. + +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-5-minipli@grsecurity.net +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/pmu.c | 4 ++-- + arch/x86/kvm/vmx/vmx.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c +index eb594620dd75a..8be583a05de70 100644 +--- a/arch/x86/kvm/pmu.c ++++ b/arch/x86/kvm/pmu.c +@@ -438,9 +438,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) + if (!pmc) + return 1; + +- if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) && ++ if (!(kvm_read_cr4_bits(vcpu, X86_CR4_PCE)) && + (static_call(kvm_x86_get_cpl)(vcpu) != 0) && +- (kvm_read_cr0(vcpu) & X86_CR0_PE)) ++ (kvm_read_cr0_bits(vcpu, X86_CR0_PE))) + return 1; + + *data = pmc_read_counter(pmc) & mask; +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 53034045cb6e6..ca8eca4ec0e38 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -5450,7 +5450,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) + break; + case 3: /* lmsw */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; +- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); ++ trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val)); + kvm_lmsw(vcpu, val); + + return kvm_skip_emulated_instruction(vcpu); +@@ -7531,7 +7531,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) + return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT; + +- if (kvm_read_cr0(vcpu) & X86_CR0_CD) { ++ if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) { + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cache = MTRR_TYPE_WRBACK; + else +-- +2.39.2 + diff --git a/queue-6.2/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch b/queue-6.2/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch new file mode 100644 index 00000000000..b6d49d23aae --- /dev/null +++ b/queue-6.2/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch @@ -0,0 +1,132 @@ +From 0902e243ba35d3e3af991c60fd9e555b7987b992 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:44:53 +0200 +Subject: KVM: x86/mmu: Avoid indirect call for get_cr3 + +From: Paolo Bonzini + +[ Upstream commit 2fdcc1b324189b5fb20655baebd40cd82e2bdf0c ] + +Most of the time, calls to get_guest_pgd result in calling +kvm_read_cr3 (the exception is only nested TDP). Hardcode +the default instead of using the get_cr3 function, avoiding +a retpoline if they are enabled. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-2-minipli@grsecurity.net +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause # backport to v6.2.x +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu/mmu.c | 31 ++++++++++++++++++++----------- + arch/x86/kvm/mmu/paging_tmpl.h | 2 +- + 2 files changed, 21 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index 835426254e768..2faea9e873629 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -233,6 +233,20 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu) + return regs; + } + ++static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu) ++{ ++ return kvm_read_cr3(vcpu); ++} ++ ++static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3) ++ return kvm_read_cr3(vcpu); ++ ++ return mmu->get_guest_pgd(vcpu); ++} ++ + static inline bool kvm_available_flush_tlb_with_range(void) + { + return kvm_x86_ops.tlb_remote_flush_with_range; +@@ -3699,7 +3713,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) + int quadrant, i, r; + hpa_t root; + +- root_pgd = mmu->get_guest_pgd(vcpu); ++ root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu); + root_gfn = root_pgd >> PAGE_SHIFT; + + if (mmu_check_root(vcpu, root_gfn)) +@@ -4149,7 +4163,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + arch.token = alloc_apf_token(vcpu); + arch.gfn = gfn; + arch.direct_map = vcpu->arch.mmu->root_role.direct; +- arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); ++ arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); + + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); +@@ -4168,7 +4182,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) + return; + + if (!vcpu->arch.mmu->root_role.direct && +- work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu)) ++ work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) + return; + + kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); +@@ -4530,11 +4544,6 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) + } + EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); + +-static unsigned long get_cr3(struct kvm_vcpu *vcpu) +-{ +- return kvm_read_cr3(vcpu); +-} +- + static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, + unsigned int access) + { +@@ -5085,7 +5094,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, + context->page_fault = kvm_tdp_page_fault; + context->sync_page = nonpaging_sync_page; + context->invlpg = NULL; +- context->get_guest_pgd = get_cr3; ++ context->get_guest_pgd = get_guest_cr3; + context->get_pdptr = kvm_pdptr_read; + context->inject_page_fault = kvm_inject_page_fault; + +@@ -5235,7 +5244,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu, + + kvm_init_shadow_mmu(vcpu, cpu_role); + +- context->get_guest_pgd = get_cr3; ++ context->get_guest_pgd = get_guest_cr3; + context->get_pdptr = kvm_pdptr_read; + context->inject_page_fault = kvm_inject_page_fault; + } +@@ -5249,7 +5258,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu, + return; + + g_context->cpu_role.as_u64 = new_mode.as_u64; +- g_context->get_guest_pgd = get_cr3; ++ g_context->get_guest_pgd = get_guest_cr3; + g_context->get_pdptr = kvm_pdptr_read; + g_context->inject_page_fault = kvm_inject_page_fault; + +diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h +index 0f64550720557..89b19b7ef4f9f 100644 +--- a/arch/x86/kvm/mmu/paging_tmpl.h ++++ b/arch/x86/kvm/mmu/paging_tmpl.h +@@ -324,7 +324,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, + trace_kvm_mmu_pagetable_walk(addr, access); + retry_walk: + walker->level = mmu->cpu_role.base.level; +- pte = mmu->get_guest_pgd(vcpu); ++ pte = kvm_mmu_get_guest_pgd(vcpu, mmu); + have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); + + #if PTTYPE == 64 +-- +2.39.2 + diff --git a/queue-6.2/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch b/queue-6.2/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch new file mode 100644 index 00000000000..b2b59708f2a --- /dev/null +++ b/queue-6.2/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch @@ -0,0 +1,117 @@ +From b4b1fb90cb95345fb6ceb558e0ee5f8e4f712b4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:44:57 +0200 +Subject: KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated + permission faults + +From: Sean Christopherson + +[ Upstream commit cf9f4c0eb1699d306e348b1fd0225af7b2c282d3 ] + +Refresh the MMU's snapshot of the vCPU's CR0.WP prior to checking for +permission faults when emulating a guest memory access and CR0.WP may be +guest owned. If the guest toggles only CR0.WP and triggers emulation of +a supervisor write, e.g. when KVM is emulating UMIP, KVM may consume a +stale CR0.WP, i.e. use stale protection bits metadata. + +Note, KVM passes through CR0.WP if and only if EPT is enabled as CR0.WP +is part of the MMU role for legacy shadow paging, and SVM (NPT) doesn't +support per-bit interception controls for CR0. Don't bother checking for +EPT vs. NPT as the "old == new" check will always be true under NPT, i.e. +the only cost is the read of vcpu->arch.cr4 (SVM unconditionally grabs CR0 +from the VMCB on VM-Exit). + +Reported-by: Mathias Krause +Link: https://lkml.kernel.org/r/677169b4-051f-fcae-756b-9a3e1bb9f8fe%40grsecurity.net +Fixes: fb509f76acc8 ("KVM: VMX: Make CR0.WP a guest owned bit") +Tested-by: Mathias Krause +Link: https://lore.kernel.org/r/20230405002608.418442-1-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause # backport to v6.2.x +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.h | 26 +++++++++++++++++++++++++- + arch/x86/kvm/mmu/mmu.c | 15 +++++++++++++++ + 2 files changed, 40 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h +index 6bdaacb6faa07..59804be91b5b0 100644 +--- a/arch/x86/kvm/mmu.h ++++ b/arch/x86/kvm/mmu.h +@@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, + bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); + int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + u64 fault_address, char *insn, int insn_len); ++void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu); + + int kvm_mmu_load(struct kvm_vcpu *vcpu); + void kvm_mmu_unload(struct kvm_vcpu *vcpu); +@@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu) + vcpu->arch.mmu->root_role.level); + } + ++static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ /* ++ * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e. ++ * @mmu's snapshot of CR0.WP and thus all related paging metadata may ++ * be stale. Refresh CR0.WP and the metadata on-demand when checking ++ * for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing ++ * nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does ++ * need to refresh nested_mmu, a.k.a. the walker used to translate L2 ++ * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP. ++ */ ++ if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu) ++ return; ++ ++ __kvm_mmu_refresh_passthrough_bits(vcpu, mmu); ++} ++ + /* + * Check if a given access (described through the I/D, W/R and U/S bits of a + * page fault error code pfec) causes a permission fault with the given PTE +@@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; + bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; + int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1; +- bool fault = (mmu->permissions[index] >> pte_access) & 1; + u32 errcode = PFERR_PRESENT_MASK; ++ bool fault; ++ ++ kvm_mmu_refresh_passthrough_bits(vcpu, mmu); ++ ++ fault = (mmu->permissions[index] >> pte_access) & 1; + + WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); + if (unlikely(mmu->pkru_mask)) { +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index 2faea9e873629..ce135539145fd 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -5047,6 +5047,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs) + return role; + } + ++void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ const bool cr0_wp = !!kvm_read_cr0_bits(vcpu, X86_CR0_WP); ++ ++ BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP); ++ BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS)); ++ ++ if (is_cr0_wp(mmu) == cr0_wp) ++ return; ++ ++ mmu->cpu_role.base.cr0_wp = cr0_wp; ++ reset_guest_paging_metadata(vcpu, mmu); ++} ++ + static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) + { + /* tdp_root_level is architecture forced level, use it if nonzero */ +-- +2.39.2 + diff --git a/queue-6.2/series b/queue-6.2/series index 8c753e7150a..c24912326a6 100644 --- a/queue-6.2/series +++ b/queue-6.2/series @@ -10,3 +10,10 @@ soc-qcom-llcc-do-not-create-edac-platform-device-on-.patch mailbox-zynq-switch-to-flexible-array-to-simplify-co.patch mailbox-zynqmp-fix-counts-of-child-nodes.patch mtd-spi-nor-spansion-enable-jffs2-write-buffer-for-i.patch-16651 +fs-ntfs3-fix-null-ptr-deref-on-inode-i_op-in-ntfs_lo.patch +drm-amd-display-ext-displays-with-dock-can-t-recogni.patch +kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch +kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch +kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch +kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch +kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch