From: Sasha Levin Date: Fri, 12 May 2023 01:49:27 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v4.14.315~112 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=05aeeae3204a8092e01daee832c4be0cb4fb8ab7;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/drm-amd-display-ext-displays-with-dock-can-t-recogni.patch b/queue-6.1/drm-amd-display-ext-displays-with-dock-can-t-recogni.patch new file mode 100644 index 00000000000..311da9f98ee --- /dev/null +++ b/queue-6.1/drm-amd-display-ext-displays-with-dock-can-t-recogni.patch @@ -0,0 +1,56 @@ +From 620e3c918c976bc94722cc4ebbc53cc8c921e96e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 11:01:20 -0500 +Subject: drm/amd/display: Ext displays with dock can't recognized after resume + +From: Ryan Lin + +[ Upstream commit 1e5d4d8eb8c0f15d90c50e7abd686c980e54e42e ] + +[Why] +Needs to set the default value of the LTTPR timeout after resume. + +[How] +Set the default (3.2ms) timeout at resuming if the sink supports +LTTPR + +Reviewed-by: Jerry Zuo +Acked-by: Qingqing Zhuo +Signed-off-by: Ryan Lin +Tested-by: Daniel Wheeler +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +index 99b99f0b42c06..9f637e360755d 100644 +--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c ++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +@@ -39,6 +39,7 @@ + #include "dc/dc_edid_parser.h" + #include "dc/dc_stat.h" + #include "amdgpu_dm_trace.h" ++#include "dc/inc/dc_link_ddc.h" + + #include "vid.h" + #include "amdgpu.h" +@@ -2254,6 +2255,14 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) + if (suspend) { + drm_dp_mst_topology_mgr_suspend(mgr); + } else { ++ /* if extended timeout is supported in hardware, ++ * default to LTTPR timeout (3.2ms) first as a W/A for DP link layer ++ * CTS 4.2.1.1 regression introduced by CTS specs requirement update. ++ */ ++ dc_link_aux_try_to_configure_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD); ++ if (!dp_is_lttpr_present(aconnector->dc_link)) ++ dc_link_aux_try_to_configure_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD); ++ + ret = drm_dp_mst_topology_mgr_resume(mgr, true); + if (ret < 0) { + dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, +-- +2.39.2 + diff --git a/queue-6.1/fs-ntfs3-fix-null-ptr-deref-on-inode-i_op-in-ntfs_lo.patch b/queue-6.1/fs-ntfs3-fix-null-ptr-deref-on-inode-i_op-in-ntfs_lo.patch new file mode 100644 index 00000000000..8ec0391c870 --- /dev/null +++ b/queue-6.1/fs-ntfs3-fix-null-ptr-deref-on-inode-i_op-in-ntfs_lo.patch @@ -0,0 +1,80 @@ +From 4da509a8177f6620c577c576d15d321259ccefd6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Nov 2022 10:21:59 +0000 +Subject: fs/ntfs3: Fix null-ptr-deref on inode->i_op in ntfs_lookup() + +From: ZhangPeng + +[ Upstream commit 254e69f284d7270e0abdc023ee53b71401c3ba0c ] + +Syzbot reported a null-ptr-deref bug: + +ntfs3: loop0: Different NTFS' sector size (1024) and media sector size +(512) +ntfs3: loop0: Mark volume as dirty due to NTFS errors +general protection fault, probably for non-canonical address +0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN +KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] +RIP: 0010:d_flags_for_inode fs/dcache.c:1980 [inline] +RIP: 0010:__d_add+0x5ce/0x800 fs/dcache.c:2796 +Call Trace: + + d_splice_alias+0x122/0x3b0 fs/dcache.c:3191 + lookup_open fs/namei.c:3391 [inline] + open_last_lookups fs/namei.c:3481 [inline] + path_openat+0x10e6/0x2df0 fs/namei.c:3688 + do_filp_open+0x264/0x4f0 fs/namei.c:3718 + do_sys_openat2+0x124/0x4e0 fs/open.c:1310 + do_sys_open fs/open.c:1326 [inline] + __do_sys_open fs/open.c:1334 [inline] + __se_sys_open fs/open.c:1330 [inline] + __x64_sys_open+0x221/0x270 fs/open.c:1330 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +If the MFT record of ntfs inode is not a base record, inode->i_op can be +NULL. And a null-ptr-deref may happen: + +ntfs_lookup() + dir_search_u() # inode->i_op is set to NULL + d_splice_alias() + __d_add() + d_flags_for_inode() # inode->i_op->get_link null-ptr-deref + +Fix this by adding a Check on inode->i_op before calling the +d_splice_alias() function. + +Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") +Reported-by: syzbot+a8f26a403c169b7593fe@syzkaller.appspotmail.com +Signed-off-by: ZhangPeng +Signed-off-by: Konstantin Komarov +Signed-off-by: Sasha Levin +--- + fs/ntfs3/namei.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c +index bc22cc321a74b..7760aedc06728 100644 +--- a/fs/ntfs3/namei.c ++++ b/fs/ntfs3/namei.c +@@ -86,6 +86,16 @@ static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *dentry, + __putname(uni); + } + ++ /* ++ * Check for a null pointer ++ * If the MFT record of ntfs inode is not a base record, inode->i_op can be NULL. ++ * This causes null pointer dereference in d_splice_alias(). ++ */ ++ if (!IS_ERR(inode) && inode->i_op == NULL) { ++ iput(inode); ++ inode = ERR_PTR(-EINVAL); ++ } ++ + return d_splice_alias(inode, dentry); + } + +-- +2.39.2 + diff --git a/queue-6.1/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch b/queue-6.1/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch new file mode 100644 index 00000000000..3f7a97b2197 --- /dev/null +++ b/queue-6.1/kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch @@ -0,0 +1,118 @@ +From 8ccf5dc3b7f7615c04f975bb8025d13949b5a436 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:46:01 +0200 +Subject: KVM: VMX: Make CR0.WP a guest owned bit + +From: Mathias Krause + +[ Upstream commit fb509f76acc8d42bed11bca308404f81c2be856a ] + +Guests like grsecurity that make heavy use of CR0.WP to implement kernel +level W^X will suffer from the implied VMEXITs. + +With EPT there is no need to intercept a guest change of CR0.WP, so +simply make it a guest owned bit if we can do so. + +This implies that a read of a guest's CR0.WP bit might need a VMREAD. +However, the only potentially affected user seems to be kvm_init_mmu() +which is a heavy operation to begin with. But also most callers already +cache the full value of CR0 anyway, so no additional VMREAD is needed. +The only exception is nested_vmx_load_cr3(). + +This change is VMX-specific, as SVM has no such fine grained control +register intercept control. + +Suggested-by: Sean Christopherson +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-7-minipli@grsecurity.net +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause # backport to v6.1.x +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/kvm_cache_regs.h | 2 +- + arch/x86/kvm/vmx/nested.c | 4 ++-- + arch/x86/kvm/vmx/vmx.c | 2 +- + arch/x86/kvm/vmx/vmx.h | 18 ++++++++++++++++++ + 4 files changed, 22 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h +index 3febc342360cc..896cc73949442 100644 +--- a/arch/x86/kvm/kvm_cache_regs.h ++++ b/arch/x86/kvm/kvm_cache_regs.h +@@ -4,7 +4,7 @@ + + #include + +-#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS ++#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP) + #define KVM_POSSIBLE_CR4_GUEST_BITS \ + (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index 8e56ec6e72e9d..9d683b6067c7b 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -4460,7 +4460,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, + * CR0_GUEST_HOST_MASK is already set in the original vmcs01 + * (KVM doesn't change it); + */ +- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmx_set_cr0(vcpu, vmcs12->host_cr0); + + /* Same as above - no reason to call set_cr4_guest_host_mask(). */ +@@ -4611,7 +4611,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) + */ + vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); + +- vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); + + vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 4984357c5d441..07aab85922441 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -4695,7 +4695,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) + /* 22.2.1, 20.8.1 */ + vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); + +- vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits(); + vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits); + + set_cr4_guest_host_mask(vmx); +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index a3da84f4ea456..e2b04f4c0fef3 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -640,6 +640,24 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) + (1 << VCPU_EXREG_EXIT_INFO_1) | \ + (1 << VCPU_EXREG_EXIT_INFO_2)) + ++static inline unsigned long vmx_l1_guest_owned_cr0_bits(void) ++{ ++ unsigned long bits = KVM_POSSIBLE_CR0_GUEST_BITS; ++ ++ /* ++ * CR0.WP needs to be intercepted when KVM is shadowing legacy paging ++ * in order to construct shadow PTEs with the correct protections. ++ * Note! CR0.WP technically can be passed through to the guest if ++ * paging is disabled, but checking CR0.PG would generate a cyclical ++ * dependency of sorts due to forcing the caller to ensure CR0 holds ++ * the correct value prior to determining which CR0 bits can be owned ++ * by L1. Keep it simple and limit the optimization to EPT. ++ */ ++ if (!enable_ept) ++ bits &= ~X86_CR0_WP; ++ return bits; ++} ++ + static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) + { + return container_of(kvm, struct kvm_vmx, kvm); +-- +2.39.2 + diff --git a/queue-6.1/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch b/queue-6.1/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch new file mode 100644 index 00000000000..093f01473df --- /dev/null +++ b/queue-6.1/kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch @@ -0,0 +1,69 @@ +From ce5a0367d81a5b34fd37af1ad992a67511c42198 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:45:59 +0200 +Subject: KVM: x86: Do not unload MMU roots when only toggling CR0.WP with TDP + enabled + +From: Mathias Krause + +[ Upstream commit 01b31714bd90be2784f7145bf93b7f78f3d081e1 ] + +There is no need to unload the MMU roots with TDP enabled when only +CR0.WP has changed -- the paging structures are still valid, only the +permission bitmap needs to be updated. + +One heavy user of toggling CR0.WP is grsecurity's KERNEXEC feature to +implement kernel W^X. + +The optimization brings a huge performance gain for this case as the +following micro-benchmark running 'ssdd 10 50000' from rt-tests[1] on a +grsecurity L1 VM shows (runtime in seconds, lower is better): + + legacy TDP shadow +kvm-x86/next@d8708b 8.43s 9.45s 70.3s + +patch 5.39s 5.63s 70.2s + +For legacy MMU this is ~36% faster, for TDP MMU even ~40% faster. Also +TDP and legacy MMU now both have a similar runtime which vanishes the +need to disable TDP MMU for grsecurity. + +Shadow MMU sees no measurable difference and is still slow, as expected. + +[1] https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git + +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-3-minipli@grsecurity.net +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/x86.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 3463ef7f30196..d7af225b63d89 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -910,6 +910,18 @@ EXPORT_SYMBOL_GPL(load_pdptrs); + + void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0) + { ++ /* ++ * CR0.WP is incorporated into the MMU role, but only for non-nested, ++ * indirect shadow MMUs. If TDP is enabled, the MMU's metadata needs ++ * to be updated, e.g. so that emulating guest translations does the ++ * right thing, but there's no need to unload the root as CR0.WP ++ * doesn't affect SPTEs. ++ */ ++ if (tdp_enabled && (cr0 ^ old_cr0) == X86_CR0_WP) { ++ kvm_init_mmu(vcpu); ++ return; ++ } ++ + if ((cr0 ^ old_cr0) & X86_CR0_PG) { + kvm_clear_async_pf_completion_queue(vcpu); + kvm_async_pf_hash_reset(vcpu); +-- +2.39.2 + diff --git a/queue-6.1/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch b/queue-6.1/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch new file mode 100644 index 00000000000..8954c3949cc --- /dev/null +++ b/queue-6.1/kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch @@ -0,0 +1,66 @@ +From 856a4af630ed0db8d78efa0fd1b84c34abe180b4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:46:00 +0200 +Subject: KVM: x86: Make use of kvm_read_cr*_bits() when testing bits + +From: Mathias Krause + +[ Upstream commit 74cdc836919bf34684ef66f995273f35e2189daf ] + +Make use of the kvm_read_cr{0,4}_bits() helper functions when we only +want to know the state of certain bits instead of the whole register. + +This not only makes the intent cleaner, it also avoids a potential +VMREAD in case the tested bits aren't guest owned. + +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-5-minipli@grsecurity.net +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/pmu.c | 4 ++-- + arch/x86/kvm/vmx/vmx.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c +index de1fd73697365..20cd746cf4678 100644 +--- a/arch/x86/kvm/pmu.c ++++ b/arch/x86/kvm/pmu.c +@@ -418,9 +418,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) + if (!pmc) + return 1; + +- if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) && ++ if (!(kvm_read_cr4_bits(vcpu, X86_CR4_PCE)) && + (static_call(kvm_x86_get_cpl)(vcpu) != 0) && +- (kvm_read_cr0(vcpu) & X86_CR0_PE)) ++ (kvm_read_cr0_bits(vcpu, X86_CR0_PE))) + return 1; + + *data = pmc_read_counter(pmc) & mask; +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 5db21d9ef6710..4984357c5d441 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -5417,7 +5417,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) + break; + case 3: /* lmsw */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; +- trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); ++ trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val)); + kvm_lmsw(vcpu, val); + + return kvm_skip_emulated_instruction(vcpu); +@@ -7496,7 +7496,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) + return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT; + +- if (kvm_read_cr0(vcpu) & X86_CR0_CD) { ++ if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) { + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cache = MTRR_TYPE_WRBACK; + else +-- +2.39.2 + diff --git a/queue-6.1/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch b/queue-6.1/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch new file mode 100644 index 00000000000..3099da9fd51 --- /dev/null +++ b/queue-6.1/kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch @@ -0,0 +1,132 @@ +From bc9919795748c276b096379a1c423f94a8ec703a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:45:58 +0200 +Subject: KVM: x86/mmu: Avoid indirect call for get_cr3 + +From: Paolo Bonzini + +[ Upstream commit 2fdcc1b324189b5fb20655baebd40cd82e2bdf0c ] + +Most of the time, calls to get_guest_pgd result in calling +kvm_read_cr3 (the exception is only nested TDP). Hardcode +the default instead of using the get_cr3 function, avoiding +a retpoline if they are enabled. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Mathias Krause +Link: https://lore.kernel.org/r/20230322013731.102955-2-minipli@grsecurity.net +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause # backport to v6.1.x +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu/mmu.c | 31 ++++++++++++++++++++----------- + arch/x86/kvm/mmu/paging_tmpl.h | 2 +- + 2 files changed, 21 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index b6f96d47e596d..f2a10c7d13697 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -232,6 +232,20 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu) + return regs; + } + ++static unsigned long get_guest_cr3(struct kvm_vcpu *vcpu) ++{ ++ return kvm_read_cr3(vcpu); ++} ++ ++static inline unsigned long kvm_mmu_get_guest_pgd(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ if (IS_ENABLED(CONFIG_RETPOLINE) && mmu->get_guest_pgd == get_guest_cr3) ++ return kvm_read_cr3(vcpu); ++ ++ return mmu->get_guest_pgd(vcpu); ++} ++ + static inline bool kvm_available_flush_tlb_with_range(void) + { + return kvm_x86_ops.tlb_remote_flush_with_range; +@@ -3661,7 +3675,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) + int quadrant, i, r; + hpa_t root; + +- root_pgd = mmu->get_guest_pgd(vcpu); ++ root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu); + root_gfn = root_pgd >> PAGE_SHIFT; + + if (mmu_check_root(vcpu, root_gfn)) +@@ -4112,7 +4126,7 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + arch.token = alloc_apf_token(vcpu); + arch.gfn = gfn; + arch.direct_map = vcpu->arch.mmu->root_role.direct; +- arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); ++ arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu); + + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); +@@ -4131,7 +4145,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) + return; + + if (!vcpu->arch.mmu->root_role.direct && +- work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu)) ++ work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu)) + return; + + kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); +@@ -4488,11 +4502,6 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) + } + EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); + +-static unsigned long get_cr3(struct kvm_vcpu *vcpu) +-{ +- return kvm_read_cr3(vcpu); +-} +- + static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, + unsigned int access) + { +@@ -5043,7 +5052,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu, + context->page_fault = kvm_tdp_page_fault; + context->sync_page = nonpaging_sync_page; + context->invlpg = NULL; +- context->get_guest_pgd = get_cr3; ++ context->get_guest_pgd = get_guest_cr3; + context->get_pdptr = kvm_pdptr_read; + context->inject_page_fault = kvm_inject_page_fault; + +@@ -5193,7 +5202,7 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu, + + kvm_init_shadow_mmu(vcpu, cpu_role); + +- context->get_guest_pgd = get_cr3; ++ context->get_guest_pgd = get_guest_cr3; + context->get_pdptr = kvm_pdptr_read; + context->inject_page_fault = kvm_inject_page_fault; + } +@@ -5207,7 +5216,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu, + return; + + g_context->cpu_role.as_u64 = new_mode.as_u64; +- g_context->get_guest_pgd = get_cr3; ++ g_context->get_guest_pgd = get_guest_cr3; + g_context->get_pdptr = kvm_pdptr_read; + g_context->inject_page_fault = kvm_inject_page_fault; + +diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h +index 5ab5f94dcb6fd..1f4f5e703f136 100644 +--- a/arch/x86/kvm/mmu/paging_tmpl.h ++++ b/arch/x86/kvm/mmu/paging_tmpl.h +@@ -324,7 +324,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, + trace_kvm_mmu_pagetable_walk(addr, access); + retry_walk: + walker->level = mmu->cpu_role.base.level; +- pte = mmu->get_guest_pgd(vcpu); ++ pte = kvm_mmu_get_guest_pgd(vcpu, mmu); + have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); + + #if PTTYPE == 64 +-- +2.39.2 + diff --git a/queue-6.1/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch b/queue-6.1/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch new file mode 100644 index 00000000000..d3f73afd4be --- /dev/null +++ b/queue-6.1/kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch @@ -0,0 +1,117 @@ +From 92832c0d07b179bb04e872b90a03c57098a513f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 May 2023 17:46:02 +0200 +Subject: KVM: x86/mmu: Refresh CR0.WP prior to checking for emulated + permission faults + +From: Sean Christopherson + +[ Upstream commit cf9f4c0eb1699d306e348b1fd0225af7b2c282d3 ] + +Refresh the MMU's snapshot of the vCPU's CR0.WP prior to checking for +permission faults when emulating a guest memory access and CR0.WP may be +guest owned. If the guest toggles only CR0.WP and triggers emulation of +a supervisor write, e.g. when KVM is emulating UMIP, KVM may consume a +stale CR0.WP, i.e. use stale protection bits metadata. + +Note, KVM passes through CR0.WP if and only if EPT is enabled as CR0.WP +is part of the MMU role for legacy shadow paging, and SVM (NPT) doesn't +support per-bit interception controls for CR0. Don't bother checking for +EPT vs. NPT as the "old == new" check will always be true under NPT, i.e. +the only cost is the read of vcpu->arch.cr4 (SVM unconditionally grabs CR0 +from the VMCB on VM-Exit). + +Reported-by: Mathias Krause +Link: https://lkml.kernel.org/r/677169b4-051f-fcae-756b-9a3e1bb9f8fe%40grsecurity.net +Fixes: fb509f76acc8 ("KVM: VMX: Make CR0.WP a guest owned bit") +Tested-by: Mathias Krause +Link: https://lore.kernel.org/r/20230405002608.418442-1-seanjc@google.com +Signed-off-by: Sean Christopherson +Signed-off-by: Mathias Krause # backport to v6.1.x +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/mmu.h | 26 +++++++++++++++++++++++++- + arch/x86/kvm/mmu/mmu.c | 15 +++++++++++++++ + 2 files changed, 40 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h +index 6bdaacb6faa07..59804be91b5b0 100644 +--- a/arch/x86/kvm/mmu.h ++++ b/arch/x86/kvm/mmu.h +@@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, + bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); + int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, + u64 fault_address, char *insn, int insn_len); ++void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu); + + int kvm_mmu_load(struct kvm_vcpu *vcpu); + void kvm_mmu_unload(struct kvm_vcpu *vcpu); +@@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu) + vcpu->arch.mmu->root_role.level); + } + ++static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ /* ++ * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e. ++ * @mmu's snapshot of CR0.WP and thus all related paging metadata may ++ * be stale. Refresh CR0.WP and the metadata on-demand when checking ++ * for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing ++ * nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does ++ * need to refresh nested_mmu, a.k.a. the walker used to translate L2 ++ * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP. ++ */ ++ if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu) ++ return; ++ ++ __kvm_mmu_refresh_passthrough_bits(vcpu, mmu); ++} ++ + /* + * Check if a given access (described through the I/D, W/R and U/S bits of a + * page fault error code pfec) causes a permission fault with the given PTE +@@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; + bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; + int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1; +- bool fault = (mmu->permissions[index] >> pte_access) & 1; + u32 errcode = PFERR_PRESENT_MASK; ++ bool fault; ++ ++ kvm_mmu_refresh_passthrough_bits(vcpu, mmu); ++ ++ fault = (mmu->permissions[index] >> pte_access) & 1; + + WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK)); + if (unlikely(mmu->pkru_mask)) { +diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c +index f2a10c7d13697..230108a90cf39 100644 +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -5005,6 +5005,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs) + return role; + } + ++void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, ++ struct kvm_mmu *mmu) ++{ ++ const bool cr0_wp = !!kvm_read_cr0_bits(vcpu, X86_CR0_WP); ++ ++ BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP); ++ BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS)); ++ ++ if (is_cr0_wp(mmu) == cr0_wp) ++ return; ++ ++ mmu->cpu_role.base.cr0_wp = cr0_wp; ++ reset_guest_paging_metadata(vcpu, mmu); ++} ++ + static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu) + { + /* tdp_root_level is architecture forced level, use it if nonzero */ +-- +2.39.2 + diff --git a/queue-6.1/series b/queue-6.1/series index e010c42e179..ca75f09ce7e 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -15,3 +15,10 @@ soc-qcom-llcc-do-not-create-edac-platform-device-on-.patch mailbox-zynq-switch-to-flexible-array-to-simplify-co.patch mailbox-zynqmp-fix-counts-of-child-nodes.patch mtd-spi-nor-spansion-enable-jffs2-write-buffer-for-i.patch-26314 +fs-ntfs3-fix-null-ptr-deref-on-inode-i_op-in-ntfs_lo.patch +drm-amd-display-ext-displays-with-dock-can-t-recogni.patch +kvm-x86-mmu-avoid-indirect-call-for-get_cr3.patch +kvm-x86-do-not-unload-mmu-roots-when-only-toggling-c.patch +kvm-x86-make-use-of-kvm_read_cr-_bits-when-testing-b.patch +kvm-vmx-make-cr0.wp-a-guest-owned-bit.patch +kvm-x86-mmu-refresh-cr0.wp-prior-to-checking-for-emu.patch