From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 13 May 2023 09:30:18 +0000 (+0900)
Subject: 5.10-stable patches
X-Git-Tag: v4.14.315~66
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7f26109b47feb236135188b06ddb34b2d4cc744a;p=thirdparty%2Fkernel%2Fstable-queue.git

5.10-stable patches

added patches:
	kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch
	kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch
---

diff --git a/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch b/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch
new file mode 100644
index 00000000000..0e612f52c8f
--- /dev/null
+++ b/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch
@@ -0,0 +1,126 @@
+From 6cd88243c7e03845a450795e134b488fc2afb736 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 7 Jun 2022 10:09:03 -0400
+Subject: KVM: x86: do not report a vCPU as preempted outside instruction boundaries
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 6cd88243c7e03845a450795e134b488fc2afb736 upstream.
+
+If a vCPU is outside guest mode and is scheduled out, it might be in the
+process of making a memory access.  A problem occurs if another vCPU uses
+the PV TLB flush feature during the period when the vCPU is scheduled
+out, and a virtual address has already been translated but has not yet
+been accessed, because this is equivalent to using a stale TLB entry.
+
+To avoid this, only report a vCPU as preempted if sure that the guest
+is at an instruction boundary.  A rescheduling request will be delivered
+to the host physical CPU as an external interrupt, so for simplicity
+consider any vmexit *not* instruction boundary except for external
+interrupts.
+
+It would in principle be okay to report the vCPU as preempted also
+if it is sleeping in kvm_vcpu_block(): a TLB flush IPI will incur the
+vmentry/vmexit overhead unnecessarily, and optimistic spinning is
+also unlikely to succeed.  However, leave it for later because right
+now kvm_vcpu_check_block() is doing memory accesses.  Even
+though the TLB flush issue only applies to virtual memory address,
+it's very much preferrable to be conservative.
+
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[OP: use VCPU_STAT() for debugfs entries]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    3 +++
+ arch/x86/kvm/svm/svm.c          |    2 ++
+ arch/x86/kvm/vmx/vmx.c          |    1 +
+ arch/x86/kvm/x86.c              |   22 ++++++++++++++++++++++
+ 4 files changed, 28 insertions(+)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -553,6 +553,7 @@ struct kvm_vcpu_arch {
+ 	u64 ia32_misc_enable_msr;
+ 	u64 smbase;
+ 	u64 smi_count;
++	bool at_instruction_boundary;
+ 	bool tpr_access_reporting;
+ 	bool xsaves_enabled;
+ 	u64 ia32_xss;
+@@ -1061,6 +1062,8 @@ struct kvm_vcpu_stat {
+ 	u64 req_event;
+ 	u64 halt_poll_success_ns;
+ 	u64 halt_poll_fail_ns;
++	u64 preemption_reported;
++	u64 preemption_other;
+ };
+ 
+ struct x86_instruction_info;
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3983,6 +3983,8 @@ out:
+ 
+ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+ {
++	if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
++		vcpu->arch.at_instruction_boundary = true;
+ }
+ 
+ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6510,6 +6510,7 @@ static void handle_external_interrupt_ir
+ 		return;
+ 
+ 	handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
++	vcpu->arch.at_instruction_boundary = true;
+ }
+ 
+ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -231,6 +231,8 @@ struct kvm_stats_debugfs_item debugfs_en
+ 	VCPU_STAT("l1d_flush", l1d_flush),
+ 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+ 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
++	VCPU_STAT("preemption_reported", preemption_reported),
++	VCPU_STAT("preemption_other", preemption_other),
+ 	VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
+ 	VM_STAT("mmu_pte_write", mmu_pte_write),
+ 	VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
+@@ -4052,6 +4054,19 @@ static void kvm_steal_time_set_preempted
+ 	struct kvm_host_map map;
+ 	struct kvm_steal_time *st;
+ 
++	/*
++	 * The vCPU can be marked preempted if and only if the VM-Exit was on
++	 * an instruction boundary and will not trigger guest emulation of any
++	 * kind (see vcpu_run).  Vendor specific code controls (conservatively)
++	 * when this is true, for example allowing the vCPU to be marked
++	 * preempted if and only if the VM-Exit was due to a host interrupt.
++	 */
++	if (!vcpu->arch.at_instruction_boundary) {
++		vcpu->stat.preemption_other++;
++		return;
++	}
++
++	vcpu->stat.preemption_reported++;
+ 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ 		return;
+ 
+@@ -9357,6 +9372,13 @@ static int vcpu_run(struct kvm_vcpu *vcp
+ 	vcpu->arch.l1tf_flush_l1d = true;
+ 
+ 	for (;;) {
++		/*
++		 * If another guest vCPU requests a PV TLB flush in the middle
++		 * of instruction emulation, the rest of the emulation could
++		 * use a stale page translation. Assume that any code after
++		 * this point can start executing an instruction.
++		 */
++		vcpu->arch.at_instruction_boundary = false;
+ 		if (kvm_vcpu_running(vcpu)) {
+ 			r = vcpu_enter_guest(vcpu);
+ 		} else {
diff --git a/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch b/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch
new file mode 100644
index 00000000000..663c4548685
--- /dev/null
+++ b/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch
@@ -0,0 +1,58 @@
+From 6470accc7ba948b0b3aca22b273fe84ec638a116 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Fri, 3 Sep 2021 09:51:36 +0200
+Subject: KVM: x86: hyper-v: Avoid calling kvm_make_vcpus_request_mask() with vcpu_mask==NULL
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 6470accc7ba948b0b3aca22b273fe84ec638a116 upstream.
+
+In preparation to making kvm_make_vcpus_request_mask() use for_each_set_bit()
+switch kvm_hv_flush_tlb() to calling kvm_make_all_cpus_request() for 'all cpus'
+case.
+
+Note: kvm_make_all_cpus_request() (unlike kvm_make_vcpus_request_mask())
+currently dynamically allocates cpumask on each call and this is suboptimal.
+Both kvm_make_all_cpus_request() and kvm_make_vcpus_request_mask() are
+going to be switched to using pre-allocated per-cpu masks.
+
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Message-Id: <20210903075141.403071-4-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Acked-by: Sean Christopherson <seanjc@google.com>
+Fixes: 6100066358ee ("KVM: Optimize kvm_make_vcpus_request_mask() a bit")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.c
++++ b/arch/x86/kvm/hyperv.c
+@@ -1562,16 +1562,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_v
+ 
+ 	cpumask_clear(&hv_vcpu->tlb_flush);
+ 
+-	vcpu_mask = all_cpus ? NULL :
+-		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+-					vp_bitmap, vcpu_bitmap);
+-
+ 	/*
+ 	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
+ 	 * analyze it here, flush TLB regardless of the specified address space.
+ 	 */
+-	kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
+-				    NULL, vcpu_mask, &hv_vcpu->tlb_flush);
++	if (all_cpus) {
++		kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
++	} else {
++		vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
++						    vp_bitmap, vcpu_bitmap);
++
++		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
++					    NULL, vcpu_mask, &hv_vcpu->tlb_flush);
++	}
+ 
+ ret_success:
+ 	/* We always do full TLB flush, set rep_done = rep_cnt. */
diff --git a/queue-5.10/series b/queue-5.10/series
index b8ba1b0f07e..c38c95e7abb 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -357,3 +357,5 @@ drm-amdgpu-gfx-disable-gfx9-cp_ecc_error_irq-only-when-enabling-legacy-gfx-ras.p
 drm-amdgpu-disable-sdma-ecc-irq-only-when-sdma-ras-is-enabled-in-suspend.patch
 hid-wacom-set-a-default-resolution-for-older-tablets.patch
 hid-wacom-insert-timestamp-to-packed-bluetooth-bt-events.patch
+kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch
+kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch