5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 13 May 2023 09:30:18 +0000 (18:30 +0900)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 13 May 2023 09:30:18 +0000 (18:30 +0900)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 13 May 2023 09:30:18 +0000 (18:30 +0900)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 13 May 2023 09:30:18 +0000 (18:30 +0900)
diff --git a/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch b/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch

new file mode 100644 (file)

index 0000000..0e612f5
--- /dev/null
+++ b/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch
@@ -0,0 +1,126 @@
+From 6cd88243c7e03845a450795e134b488fc2afb736 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 7 Jun 2022 10:09:03 -0400
+Subject: KVM: x86: do not report a vCPU as preempted outside instruction boundaries
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit 6cd88243c7e03845a450795e134b488fc2afb736 upstream.
+
+If a vCPU is outside guest mode and is scheduled out, it might be in the
+process of making a memory access.  A problem occurs if another vCPU uses
+the PV TLB flush feature during the period when the vCPU is scheduled
+out, and a virtual address has already been translated but has not yet
+been accessed, because this is equivalent to using a stale TLB entry.
+
+To avoid this, only report a vCPU as preempted if sure that the guest
+is at an instruction boundary.  A rescheduling request will be delivered
+to the host physical CPU as an external interrupt, so for simplicity
+consider any vmexit *not* instruction boundary except for external
+interrupts.
+
+It would in principle be okay to report the vCPU as preempted also
+if it is sleeping in kvm_vcpu_block(): a TLB flush IPI will incur the
+vmentry/vmexit overhead unnecessarily, and optimistic spinning is
+also unlikely to succeed.  However, leave it for later because right
+now kvm_vcpu_check_block() is doing memory accesses.  Even
+though the TLB flush issue only applies to virtual memory address,
+it's very much preferrable to be conservative.
+
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[OP: use VCPU_STAT() for debugfs entries]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    3 +++
+ arch/x86/kvm/svm/svm.c          |    2 ++
+ arch/x86/kvm/vmx/vmx.c          |    1 +
+ arch/x86/kvm/x86.c              |   22 ++++++++++++++++++++++
+ 4 files changed, 28 insertions(+)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -553,6 +553,7 @@ struct kvm_vcpu_arch {
+       u64 ia32_misc_enable_msr;
+       u64 smbase;
+       u64 smi_count;
++      bool at_instruction_boundary;
+       bool tpr_access_reporting;
+       bool xsaves_enabled;
+       u64 ia32_xss;
+@@ -1061,6 +1062,8 @@ struct kvm_vcpu_stat {
+       u64 req_event;
+       u64 halt_poll_success_ns;
+       u64 halt_poll_fail_ns;
++      u64 preemption_reported;
++      u64 preemption_other;
+ };
+ 
+ struct x86_instruction_info;
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3983,6 +3983,8 @@ out:
+ 
+ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+ {
++      if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
++              vcpu->arch.at_instruction_boundary = true;
+ }
+ 
+ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6510,6 +6510,7 @@ static void handle_external_interrupt_ir
+               return;
+ 
+       handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
++      vcpu->arch.at_instruction_boundary = true;
+ }
+ 
+ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -231,6 +231,8 @@ struct kvm_stats_debugfs_item debugfs_en
+       VCPU_STAT("l1d_flush", l1d_flush),
+       VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+       VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
++      VCPU_STAT("preemption_reported", preemption_reported),
++      VCPU_STAT("preemption_other", preemption_other),
+       VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
+       VM_STAT("mmu_pte_write", mmu_pte_write),
+       VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
+@@ -4052,6 +4054,19 @@ static void kvm_steal_time_set_preempted
+       struct kvm_host_map map;
+       struct kvm_steal_time *st;
+ 
++      /*
++       * The vCPU can be marked preempted if and only if the VM-Exit was on
++       * an instruction boundary and will not trigger guest emulation of any
++       * kind (see vcpu_run).  Vendor specific code controls (conservatively)
++       * when this is true, for example allowing the vCPU to be marked
++       * preempted if and only if the VM-Exit was due to a host interrupt.
++       */
++      if (!vcpu->arch.at_instruction_boundary) {
++              vcpu->stat.preemption_other++;
++              return;
++      }
++
++      vcpu->stat.preemption_reported++;
+       if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+               return;
+ 
+@@ -9357,6 +9372,13 @@ static int vcpu_run(struct kvm_vcpu *vcp
+       vcpu->arch.l1tf_flush_l1d = true;
+ 
+       for (;;) {
++              /*
++               * If another guest vCPU requests a PV TLB flush in the middle
++               * of instruction emulation, the rest of the emulation could
++               * use a stale page translation. Assume that any code after
++               * this point can start executing an instruction.
++               */
++              vcpu->arch.at_instruction_boundary = false;
+               if (kvm_vcpu_running(vcpu)) {
+                       r = vcpu_enter_guest(vcpu);
+               } else {
diff --git a/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch b/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch

new file mode 100644 (file)

index 0000000..663c454
--- /dev/null
+++ b/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch
@@ -0,0 +1,58 @@
+From 6470accc7ba948b0b3aca22b273fe84ec638a116 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Fri, 3 Sep 2021 09:51:36 +0200
+Subject: KVM: x86: hyper-v: Avoid calling kvm_make_vcpus_request_mask() with vcpu_mask==NULL
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+commit 6470accc7ba948b0b3aca22b273fe84ec638a116 upstream.
+
+In preparation to making kvm_make_vcpus_request_mask() use for_each_set_bit()
+switch kvm_hv_flush_tlb() to calling kvm_make_all_cpus_request() for 'all cpus'
+case.
+
+Note: kvm_make_all_cpus_request() (unlike kvm_make_vcpus_request_mask())
+currently dynamically allocates cpumask on each call and this is suboptimal.
+Both kvm_make_all_cpus_request() and kvm_make_vcpus_request_mask() are
+going to be switched to using pre-allocated per-cpu masks.
+
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Message-Id: <20210903075141.403071-4-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Acked-by: Sean Christopherson <seanjc@google.com>
+Fixes: 6100066358ee ("KVM: Optimize kvm_make_vcpus_request_mask() a bit")
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/hyperv.c |   15 +++++++++------
+ 1 file changed, 9 insertions(+), 6 deletions(-)
+
+--- a/arch/x86/kvm/hyperv.c
++++ b/arch/x86/kvm/hyperv.c
+@@ -1562,16 +1562,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_v
+ 
+       cpumask_clear(&hv_vcpu->tlb_flush);
+ 
+-      vcpu_mask = all_cpus ? NULL :
+-              sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+-                                      vp_bitmap, vcpu_bitmap);
+-
+       /*
+        * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
+        * analyze it here, flush TLB regardless of the specified address space.
+        */
+-      kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
+-                                  NULL, vcpu_mask, &hv_vcpu->tlb_flush);
++      if (all_cpus) {
++              kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
++      } else {
++              vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
++                                                  vp_bitmap, vcpu_bitmap);
++
++              kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
++                                          NULL, vcpu_mask, &hv_vcpu->tlb_flush);
++      }
+ 
+ ret_success:
+       /* We always do full TLB flush, set rep_done = rep_cnt. */
diff --git a/queue-5.10/series b/queue-5.10/series

index b8ba1b0f07e0578495528801c0bd6aeeff5a8229..c38c95e7abba6fae7105002c543b5b268d64dfef 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -357,3 +357,5 @@ drm-amdgpu-gfx-disable-gfx9-cp_ecc_error_irq-only-when-enabling-legacy-gfx-ras.p
  drm-amdgpu-disable-sdma-ecc-irq-only-when-sdma-ras-is-enabled-in-suspend.patch
  hid-wacom-set-a-default-resolution-for-older-tablets.patch
  hid-wacom-insert-timestamp-to-packed-bluetooth-bt-events.patch
+kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch
+kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 13 May 2023 09:30:18 +0000 (18:30 +0900)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 13 May 2023 09:30:18 +0000 (18:30 +0900)
queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history