From: Greg Kroah-Hartman Date: Sat, 13 May 2023 09:30:18 +0000 (+0900) Subject: 5.10-stable patches X-Git-Tag: v4.14.315~66 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7f26109b47feb236135188b06ddb34b2d4cc744a;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch --- diff --git a/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch b/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch new file mode 100644 index 00000000000..0e612f52c8f --- /dev/null +++ b/queue-5.10/kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch @@ -0,0 +1,126 @@ +From 6cd88243c7e03845a450795e134b488fc2afb736 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 7 Jun 2022 10:09:03 -0400 +Subject: KVM: x86: do not report a vCPU as preempted outside instruction boundaries + +From: Paolo Bonzini + +commit 6cd88243c7e03845a450795e134b488fc2afb736 upstream. + +If a vCPU is outside guest mode and is scheduled out, it might be in the +process of making a memory access. A problem occurs if another vCPU uses +the PV TLB flush feature during the period when the vCPU is scheduled +out, and a virtual address has already been translated but has not yet +been accessed, because this is equivalent to using a stale TLB entry. + +To avoid this, only report a vCPU as preempted if sure that the guest +is at an instruction boundary. A rescheduling request will be delivered +to the host physical CPU as an external interrupt, so for simplicity +consider any vmexit *not* instruction boundary except for external +interrupts. + +It would in principle be okay to report the vCPU as preempted also +if it is sleeping in kvm_vcpu_block(): a TLB flush IPI will incur the +vmentry/vmexit overhead unnecessarily, and optimistic spinning is +also unlikely to succeed. However, leave it for later because right +now kvm_vcpu_check_block() is doing memory accesses. Even +though the TLB flush issue only applies to virtual memory address, +it's very much preferrable to be conservative. + +Reported-by: Jann Horn +Signed-off-by: Paolo Bonzini +[OP: use VCPU_STAT() for debugfs entries] +Signed-off-by: Ovidiu Panait +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/kvm_host.h | 3 +++ + arch/x86/kvm/svm/svm.c | 2 ++ + arch/x86/kvm/vmx/vmx.c | 1 + + arch/x86/kvm/x86.c | 22 ++++++++++++++++++++++ + 4 files changed, 28 insertions(+) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -553,6 +553,7 @@ struct kvm_vcpu_arch { + u64 ia32_misc_enable_msr; + u64 smbase; + u64 smi_count; ++ bool at_instruction_boundary; + bool tpr_access_reporting; + bool xsaves_enabled; + u64 ia32_xss; +@@ -1061,6 +1062,8 @@ struct kvm_vcpu_stat { + u64 req_event; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; ++ u64 preemption_reported; ++ u64 preemption_other; + }; + + struct x86_instruction_info; +--- a/arch/x86/kvm/svm/svm.c ++++ b/arch/x86/kvm/svm/svm.c +@@ -3983,6 +3983,8 @@ out: + + static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) + { ++ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR) ++ vcpu->arch.at_instruction_boundary = true; + } + + static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6510,6 +6510,7 @@ static void handle_external_interrupt_ir + return; + + handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); ++ vcpu->arch.at_instruction_boundary = true; + } + + static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -231,6 +231,8 @@ struct kvm_stats_debugfs_item debugfs_en + VCPU_STAT("l1d_flush", l1d_flush), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), ++ VCPU_STAT("preemption_reported", preemption_reported), ++ VCPU_STAT("preemption_other", preemption_other), + VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), + VM_STAT("mmu_pte_write", mmu_pte_write), + VM_STAT("mmu_pde_zapped", mmu_pde_zapped), +@@ -4052,6 +4054,19 @@ static void kvm_steal_time_set_preempted + struct kvm_host_map map; + struct kvm_steal_time *st; + ++ /* ++ * The vCPU can be marked preempted if and only if the VM-Exit was on ++ * an instruction boundary and will not trigger guest emulation of any ++ * kind (see vcpu_run). Vendor specific code controls (conservatively) ++ * when this is true, for example allowing the vCPU to be marked ++ * preempted if and only if the VM-Exit was due to a host interrupt. ++ */ ++ if (!vcpu->arch.at_instruction_boundary) { ++ vcpu->stat.preemption_other++; ++ return; ++ } ++ ++ vcpu->stat.preemption_reported++; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + +@@ -9357,6 +9372,13 @@ static int vcpu_run(struct kvm_vcpu *vcp + vcpu->arch.l1tf_flush_l1d = true; + + for (;;) { ++ /* ++ * If another guest vCPU requests a PV TLB flush in the middle ++ * of instruction emulation, the rest of the emulation could ++ * use a stale page translation. Assume that any code after ++ * this point can start executing an instruction. ++ */ ++ vcpu->arch.at_instruction_boundary = false; + if (kvm_vcpu_running(vcpu)) { + r = vcpu_enter_guest(vcpu); + } else { diff --git a/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch b/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch new file mode 100644 index 00000000000..663c4548685 --- /dev/null +++ b/queue-5.10/kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch @@ -0,0 +1,58 @@ +From 6470accc7ba948b0b3aca22b273fe84ec638a116 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Fri, 3 Sep 2021 09:51:36 +0200 +Subject: KVM: x86: hyper-v: Avoid calling kvm_make_vcpus_request_mask() with vcpu_mask==NULL + +From: Vitaly Kuznetsov + +commit 6470accc7ba948b0b3aca22b273fe84ec638a116 upstream. + +In preparation to making kvm_make_vcpus_request_mask() use for_each_set_bit() +switch kvm_hv_flush_tlb() to calling kvm_make_all_cpus_request() for 'all cpus' +case. + +Note: kvm_make_all_cpus_request() (unlike kvm_make_vcpus_request_mask()) +currently dynamically allocates cpumask on each call and this is suboptimal. +Both kvm_make_all_cpus_request() and kvm_make_vcpus_request_mask() are +going to be switched to using pre-allocated per-cpu masks. + +Reviewed-by: Sean Christopherson +Signed-off-by: Vitaly Kuznetsov +Signed-off-by: Paolo Bonzini +Message-Id: <20210903075141.403071-4-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +Acked-by: Sean Christopherson +Fixes: 6100066358ee ("KVM: Optimize kvm_make_vcpus_request_mask() a bit") +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/hyperv.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/hyperv.c ++++ b/arch/x86/kvm/hyperv.c +@@ -1562,16 +1562,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_v + + cpumask_clear(&hv_vcpu->tlb_flush); + +- vcpu_mask = all_cpus ? NULL : +- sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, +- vp_bitmap, vcpu_bitmap); +- + /* + * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't + * analyze it here, flush TLB regardless of the specified address space. + */ +- kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, +- NULL, vcpu_mask, &hv_vcpu->tlb_flush); ++ if (all_cpus) { ++ kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST); ++ } else { ++ vcpu_mask = sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, ++ vp_bitmap, vcpu_bitmap); ++ ++ kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, ++ NULL, vcpu_mask, &hv_vcpu->tlb_flush); ++ } + + ret_success: + /* We always do full TLB flush, set rep_done = rep_cnt. */ diff --git a/queue-5.10/series b/queue-5.10/series index b8ba1b0f07e..c38c95e7abb 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -357,3 +357,5 @@ drm-amdgpu-gfx-disable-gfx9-cp_ecc_error_irq-only-when-enabling-legacy-gfx-ras.p drm-amdgpu-disable-sdma-ecc-irq-only-when-sdma-ras-is-enabled-in-suspend.patch hid-wacom-set-a-default-resolution-for-older-tablets.patch hid-wacom-insert-timestamp-to-packed-bluetooth-bt-events.patch +kvm-x86-hyper-v-avoid-calling-kvm_make_vcpus_request_mask-with-vcpu_mask-null.patch +kvm-x86-do-not-report-a-vcpu-as-preempted-outside-instruction-boundaries.patch