--- /dev/null
+From 2464e2253fce3d174d78d95e6266a72659ab8476 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 May 2022 13:08:40 -0500
+Subject: entry/kvm: Exit to user mode when TIF_NOTIFY_SIGNAL is set
+
+From: Seth Forshee <sforshee@digitalocean.com>
+
+[ Upstream commit 3e684903a8574ffc9475fdf13c4780a7adb506ad ]
+
+A livepatch transition may stall indefinitely when a kvm vCPU is heavily
+loaded. To the host, the vCPU task is a user thread which is spending a
+very long time in the ioctl(KVM_RUN) syscall. During livepatch
+transition, set_notify_signal() will be called on such tasks to
+interrupt the syscall so that the task can be transitioned. This
+interrupts guest execution, but when xfer_to_guest_mode_work() sees that
+TIF_NOTIFY_SIGNAL is set but not TIF_SIGPENDING it concludes that an
+exit to user mode is unnecessary, and guest execution is resumed without
+transitioning the task for the livepatch.
+
+This handling of TIF_NOTIFY_SIGNAL is incorrect, as set_notify_signal()
+is expected to break tasks out of interruptible kernel loops and cause
+them to return to userspace. Change xfer_to_guest_mode_work() to handle
+TIF_NOTIFY_SIGNAL the same as TIF_SIGPENDING, signaling to the vCPU run
+loop that an exit to userpsace is needed. Any pending task_work will be
+run when get_signal() is called from exit_to_user_mode_loop(), so there
+is no longer any need to run task work from xfer_to_guest_mode_work().
+
+Suggested-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Cc: Petr Mladek <pmladek@suse.com>
+Signed-off-by: Seth Forshee <sforshee@digitalocean.com>
+Message-Id: <20220504180840.2907296-1-sforshee@digitalocean.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/entry/kvm.c | 6 ------
+ 1 file changed, 6 deletions(-)
+
+diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
+index 9d09f489b60e..2e0f75bcb7fd 100644
+--- a/kernel/entry/kvm.c
++++ b/kernel/entry/kvm.c
+@@ -9,12 +9,6 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
+ int ret;
+
+ if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
+- clear_notify_signal();
+- if (task_work_pending(current))
+- task_work_run();
+- }
+-
+- if (ti_work & _TIF_SIGPENDING) {
+ kvm_handle_signal_exit(vcpu);
+ return -EINTR;
+ }
+--
+2.35.1
+
--- /dev/null
+From e7c7b8f2f24abbc4915f07595c198d089552495b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Jun 2022 16:43:22 +0200
+Subject: KVM: selftests: Make hyperv_clock selftest more stable
+
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+[ Upstream commit eae260be3a0111a28fe95923e117a55dddec0384 ]
+
+hyperv_clock doesn't always give a stable test result, especially with
+AMD CPUs. The test compares Hyper-V MSR clocksource (acquired either
+with rdmsr() from within the guest or KVM_GET_MSRS from the host)
+against rdtsc(). To increase the accuracy, increase the measured delay
+(done with nop loop) by two orders of magnitude and take the mean rdtsc()
+value before and after rdmsr()/KVM_GET_MSRS.
+
+Reported-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20220601144322.1968742-1-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/kvm/x86_64/hyperv_clock.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+index e0b2bb1339b1..3330fb183c68 100644
+--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
++++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+@@ -44,7 +44,7 @@ static inline void nop_loop(void)
+ {
+ int i;
+
+- for (i = 0; i < 1000000; i++)
++ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+ }
+
+@@ -56,12 +56,14 @@ static inline void check_tsc_msr_rdtsc(void)
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+ GUEST_ASSERT(tsc_freq > 0);
+
+- /* First, check MSR-based clocksource */
++ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+ r1 = rdtsc();
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
++ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
++ r2 = (r2 + rdtsc()) / 2;
+
+ GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+@@ -181,12 +183,14 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
+ tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+- /* First, check MSR-based clocksource */
++ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+ r1 = rdtsc();
+ t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
++ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
++ r2 = (r2 + rdtsc()) / 2;
+
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+--
+2.35.1
+
--- /dev/null
+From 153db4a959fd4efda9196c92f49e655a9204d9da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 May 2022 23:32:49 +0000
+Subject: KVM: selftests: Restrict test region to 48-bit physical addresses
+ when using nested
+
+From: David Matlack <dmatlack@google.com>
+
+[ Upstream commit e0f3f46e42064a51573914766897b4ab95d943e3 ]
+
+The selftests nested code only supports 4-level paging at the moment.
+This means it cannot map nested guest physical addresses with more than
+48 bits. Allow perf_test_util nested mode to work on hosts with more
+than 48 physical addresses by restricting the guest test region to
+48-bits.
+
+While here, opportunistically fix an off-by-one error when dealing with
+vm_get_max_gfn(). perf_test_util.c was treating this as the maximum
+number of GFNs, rather than the maximum allowed GFN. This didn't result
+in any correctness issues, but it did end up shifting the test region
+down slightly when using huge pages.
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: David Matlack <dmatlack@google.com>
+Message-Id: <20220520233249.3776001-12-dmatlack@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../testing/selftests/kvm/lib/perf_test_util.c | 18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
+index 722df3a28791..ddd68ba0c99f 100644
+--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
++++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
+@@ -110,6 +110,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
+ struct kvm_vm *vm;
+ uint64_t guest_num_pages;
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
++ uint64_t region_end_gfn;
+ int i;
+
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+@@ -144,18 +145,29 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
+
+ pta->vm = vm;
+
++ /* Put the test region at the top guest physical memory. */
++ region_end_gfn = vm_get_max_gfn(vm) + 1;
++
++#ifdef __x86_64__
++ /*
++ * When running vCPUs in L2, restrict the test region to 48 bits to
++ * avoid needing 5-level page tables to identity map L2.
++ */
++ if (pta->nested)
++ region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
++#endif
+ /*
+ * If there should be more memory in the guest test region than there
+ * can be pages in the guest, it will definitely cause problems.
+ */
+- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
++ TEST_ASSERT(guest_num_pages < region_end_gfn,
+ "Requested more guest memory than address space allows.\n"
+ " guest pages: %" PRIx64 " max gfn: %" PRIx64
+ " vcpus: %d wss: %" PRIx64 "]\n",
+- guest_num_pages, vm_get_max_gfn(vm), vcpus,
++ guest_num_pages, region_end_gfn - 1, vcpus,
+ vcpu_memory_bytes);
+
+- pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
++ pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
+ pta->gpa = align_down(pta->gpa, backing_src_pagesz);
+ #ifdef __s390x__
+ /* Align to 1M (segment size) */
+--
+2.35.1
+
--- /dev/null
+From 51da2a430f49e1115c2e805118fe91667ced63cf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Jun 2022 21:08:28 +0300
+Subject: KVM: x86: disable preemption around the call to
+ kvm_arch_vcpu_{un|}blocking
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+[ Upstream commit 18869f26df1a11ed11031dfb7392bc7d774062e8 ]
+
+On SVM, if preemption happens right after the call to finish_rcuwait
+but before call to kvm_arch_vcpu_unblocking on SVM/AVIC, it itself
+will re-enable AVIC, and then we will try to re-enable it again
+in kvm_arch_vcpu_unblocking which will lead to a warning
+in __avic_vcpu_load.
+
+The same problem can happen if the vCPU is preempted right after the call
+to kvm_arch_vcpu_blocking but before the call to prepare_to_rcuwait
+and in this case, we will end up with AVIC enabled during sleep -
+Ooops.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20220606180829.102503-7-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ virt/kvm/kvm_main.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 24cb37d19c63..7f1d19689701 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -3327,9 +3327,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
+
+ vcpu->stat.generic.blocking = 1;
+
++ preempt_disable();
+ kvm_arch_vcpu_blocking(vcpu);
+-
+ prepare_to_rcuwait(wait);
++ preempt_enable();
++
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+@@ -3339,9 +3341,11 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
+ waited = true;
+ schedule();
+ }
+- finish_rcuwait(wait);
+
++ preempt_disable();
++ finish_rcuwait(wait);
+ kvm_arch_vcpu_unblocking(vcpu);
++ preempt_enable();
+
+ vcpu->stat.generic.blocking = 0;
+
+--
+2.35.1
+
--- /dev/null
+From 4fc1688d87063c8fba524640fb2ab8073c7836c3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Jun 2022 21:08:27 +0300
+Subject: KVM: x86: disable preemption while updating apicv inhibition
+
+From: Maxim Levitsky <mlevitsk@redhat.com>
+
+[ Upstream commit 66c768d30e64e1280520f34dbef83419f55f3459 ]
+
+Currently nothing prevents preemption in kvm_vcpu_update_apicv.
+
+On SVM, If the preemption happens after we update the
+vcpu->arch.apicv_active, the preemption itself will
+'update' the inhibition since the AVIC will be first disabled
+on vCPU unload and then enabled, when the current task
+is loaded again.
+
+Then we will try to update it again, which will lead to a warning
+in __avic_vcpu_load, that the AVIC is already enabled.
+
+Fix this by disabling preemption in this code.
+
+Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
+Message-Id: <20220606180829.102503-6-mlevitsk@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 91d887fd10ab..65b0ec28bd52 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -9784,6 +9784,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+ return;
+
+ down_read(&vcpu->kvm->arch.apicv_update_lock);
++ preempt_disable();
+
+ activate = kvm_apicv_activated(vcpu->kvm);
+ if (vcpu->arch.apicv_active == activate)
+@@ -9803,6 +9804,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+ out:
++ preempt_enable();
+ up_read(&vcpu->kvm->arch.apicv_update_lock);
+ }
+ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
+--
+2.35.1
+
--- /dev/null
+From 491ff8c3605ee8f022817a87843afbb2dea86fc9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jun 2022 10:09:03 -0400
+Subject: KVM: x86: do not report a vCPU as preempted outside instruction
+ boundaries
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 6cd88243c7e03845a450795e134b488fc2afb736 ]
+
+If a vCPU is outside guest mode and is scheduled out, it might be in the
+process of making a memory access. A problem occurs if another vCPU uses
+the PV TLB flush feature during the period when the vCPU is scheduled
+out, and a virtual address has already been translated but has not yet
+been accessed, because this is equivalent to using a stale TLB entry.
+
+To avoid this, only report a vCPU as preempted if sure that the guest
+is at an instruction boundary. A rescheduling request will be delivered
+to the host physical CPU as an external interrupt, so for simplicity
+consider any vmexit *not* instruction boundary except for external
+interrupts.
+
+It would in principle be okay to report the vCPU as preempted also
+if it is sleeping in kvm_vcpu_block(): a TLB flush IPI will incur the
+vmentry/vmexit overhead unnecessarily, and optimistic spinning is
+also unlikely to succeed. However, leave it for later because right
+now kvm_vcpu_check_block() is doing memory accesses. Even
+though the TLB flush issue only applies to virtual memory address,
+it's very much preferrable to be conservative.
+
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/kvm_host.h | 3 +++
+ arch/x86/kvm/svm/svm.c | 2 ++
+ arch/x86/kvm/vmx/vmx.c | 1 +
+ arch/x86/kvm/x86.c | 22 ++++++++++++++++++++++
+ 4 files changed, 28 insertions(+)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 4ff36610af6a..9fdaa847d4b6 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -651,6 +651,7 @@ struct kvm_vcpu_arch {
+ u64 ia32_misc_enable_msr;
+ u64 smbase;
+ u64 smi_count;
++ bool at_instruction_boundary;
+ bool tpr_access_reporting;
+ bool xsaves_enabled;
+ bool xfd_no_write_intercept;
+@@ -1289,6 +1290,8 @@ struct kvm_vcpu_stat {
+ u64 nested_run;
+ u64 directed_yield_attempted;
+ u64 directed_yield_successful;
++ u64 preemption_reported;
++ u64 preemption_other;
+ u64 guest_mode;
+ };
+
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 6bfb0b0e66bd..c667214c630b 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4166,6 +4166,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
+
+ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+ {
++ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
++ vcpu->arch.at_instruction_boundary = true;
+ }
+
+ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 4b6a0268c78e..597c3c08da50 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -6630,6 +6630,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
+ return;
+
+ handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
++ vcpu->arch.at_instruction_boundary = true;
+ }
+
+ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 53b6fdf30c99..df74ec51c7f3 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -291,6 +291,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ STATS_DESC_COUNTER(VCPU, nested_run),
+ STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
+ STATS_DESC_COUNTER(VCPU, directed_yield_successful),
++ STATS_DESC_COUNTER(VCPU, preemption_reported),
++ STATS_DESC_COUNTER(VCPU, preemption_other),
+ STATS_DESC_ICOUNTER(VCPU, guest_mode)
+ };
+
+@@ -4607,6 +4609,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+ struct kvm_memslots *slots;
+ static const u8 preempted = KVM_VCPU_PREEMPTED;
+
++ /*
++ * The vCPU can be marked preempted if and only if the VM-Exit was on
++ * an instruction boundary and will not trigger guest emulation of any
++ * kind (see vcpu_run). Vendor specific code controls (conservatively)
++ * when this is true, for example allowing the vCPU to be marked
++ * preempted if and only if the VM-Exit was due to a host interrupt.
++ */
++ if (!vcpu->arch.at_instruction_boundary) {
++ vcpu->stat.preemption_other++;
++ return;
++ }
++
++ vcpu->stat.preemption_reported++;
+ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ return;
+
+@@ -10363,6 +10378,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
+ vcpu->arch.l1tf_flush_l1d = true;
+
+ for (;;) {
++ /*
++ * If another guest vCPU requests a PV TLB flush in the middle
++ * of instruction emulation, the rest of the emulation could
++ * use a stale page translation. Assume that any code after
++ * this point can start executing an instruction.
++ */
++ vcpu->arch.at_instruction_boundary = false;
+ if (kvm_vcpu_running(vcpu)) {
+ r = vcpu_enter_guest(vcpu);
+ } else {
+--
+2.35.1
+
--- /dev/null
+From 24c49abc49511c32c73ab946cd5b29bdf71c21cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jun 2022 10:07:11 -0400
+Subject: KVM: x86: do not set st->preempted when going back to user space
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+[ Upstream commit 54aa83c90198e68eee8b0850c749bc70efb548da ]
+
+Similar to the Xen path, only change the vCPU's reported state if the vCPU
+was actually preempted. The reason for KVM's behavior is that for example
+optimistic spinning might not be a good idea if the guest is doing repeated
+exits to userspace; however, it is confusing and unlikely to make a difference,
+because well-tuned guests will hardly ever exit KVM_RUN in the first place.
+
+Suggested-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 26 ++++++++++++++------------
+ arch/x86/kvm/xen.h | 6 ++++--
+ 2 files changed, 18 insertions(+), 14 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index df74ec51c7f3..91d887fd10ab 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -4651,19 +4651,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+ {
+ int idx;
+
+- if (vcpu->preempted && !vcpu->arch.guest_state_protected)
+- vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
++ if (vcpu->preempted) {
++ if (!vcpu->arch.guest_state_protected)
++ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+
+- /*
+- * Take the srcu lock as memslots will be accessed to check the gfn
+- * cache generation against the memslots generation.
+- */
+- idx = srcu_read_lock(&vcpu->kvm->srcu);
+- if (kvm_xen_msr_enabled(vcpu->kvm))
+- kvm_xen_runstate_set_preempted(vcpu);
+- else
+- kvm_steal_time_set_preempted(vcpu);
+- srcu_read_unlock(&vcpu->kvm->srcu, idx);
++ /*
++ * Take the srcu lock as memslots will be accessed to check the gfn
++ * cache generation against the memslots generation.
++ */
++ idx = srcu_read_lock(&vcpu->kvm->srcu);
++ if (kvm_xen_msr_enabled(vcpu->kvm))
++ kvm_xen_runstate_set_preempted(vcpu);
++ else
++ kvm_steal_time_set_preempted(vcpu);
++ srcu_read_unlock(&vcpu->kvm->srcu, idx);
++ }
+
+ static_call(kvm_x86_vcpu_put)(vcpu);
+ vcpu->arch.last_host_tsc = rdtsc();
+diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
+index adbcc9ed59db..fda1413f8af9 100644
+--- a/arch/x86/kvm/xen.h
++++ b/arch/x86/kvm/xen.h
+@@ -103,8 +103,10 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+ * behalf of the vCPU. Only if the VMM does actually block
+ * does it need to enter RUNSTATE_blocked.
+ */
+- if (vcpu->preempted)
+- kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
++ if (WARN_ON_ONCE(!vcpu->preempted))
++ return;
++
++ kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
+ }
+
+ /* 32-bit compatibility definitions, also used natively in 32-bit build */
+--
+2.35.1
+
--- /dev/null
+From c8b327fe59257653dfee8d80ebfce83def9c1d2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 May 2022 23:09:04 +0000
+Subject: KVM: x86/MMU: Zap non-leaf SPTEs when disabling dirty logging
+
+From: Ben Gardon <bgardon@google.com>
+
+[ Upstream commit 5ba7c4c6d1c7af47a916f728bb5940669684a087 ]
+
+Currently disabling dirty logging with the TDP MMU is extremely slow.
+On a 96 vCPU / 96G VM backed with gigabyte pages, it takes ~200 seconds
+to disable dirty logging with the TDP MMU, as opposed to ~4 seconds with
+the shadow MMU.
+
+When disabling dirty logging, zap non-leaf parent entries to allow
+replacement with huge pages instead of recursing and zapping all of the
+child, leaf entries. This reduces the number of TLB flushes required.
+and reduces the disable dirty log time with the TDP MMU to ~3 seconds.
+
+Opportunistically add a WARN() to catch GFNs that are mapped at a
+higher level than their max level.
+
+Signed-off-by: Ben Gardon <bgardon@google.com>
+Message-Id: <20220525230904.1584480-1-bgardon@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/tdp_iter.c | 9 +++++++++
+ arch/x86/kvm/mmu/tdp_iter.h | 1 +
+ arch/x86/kvm/mmu/tdp_mmu.c | 38 +++++++++++++++++++++++++++++++------
+ 3 files changed, 42 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
+index 6d3b3e5a5533..ee4802d7b36c 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.c
++++ b/arch/x86/kvm/mmu/tdp_iter.c
+@@ -145,6 +145,15 @@ static bool try_step_up(struct tdp_iter *iter)
+ return true;
+ }
+
++/*
++ * Step the iterator back up a level in the paging structure. Should only be
++ * used when the iterator is below the root level.
++ */
++void tdp_iter_step_up(struct tdp_iter *iter)
++{
++ WARN_ON(!try_step_up(iter));
++}
++
+ /*
+ * Step to the next SPTE in a pre-order traversal of the paging structure.
+ * To get to the next SPTE, the iterator either steps down towards the goal
+diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
+index f0af385c56e0..adfca0cf94d3 100644
+--- a/arch/x86/kvm/mmu/tdp_iter.h
++++ b/arch/x86/kvm/mmu/tdp_iter.h
+@@ -114,5 +114,6 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
+ int min_level, gfn_t next_last_level_gfn);
+ void tdp_iter_next(struct tdp_iter *iter);
+ void tdp_iter_restart(struct tdp_iter *iter);
++void tdp_iter_step_up(struct tdp_iter *iter);
+
+ #endif /* __KVM_X86_MMU_TDP_ITER_H */
+diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
+index 922b06bf4b94..b61a11d462cc 100644
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -1748,12 +1748,12 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
+ gfn_t start = slot->base_gfn;
+ gfn_t end = start + slot->npages;
+ struct tdp_iter iter;
++ int max_mapping_level;
+ kvm_pfn_t pfn;
+
+ rcu_read_lock();
+
+ tdp_root_for_each_pte(iter, root, start, end) {
+-retry:
+ if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
+ continue;
+
+@@ -1761,15 +1761,41 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
+ !is_last_spte(iter.old_spte, iter.level))
+ continue;
+
++ /*
++ * This is a leaf SPTE. Check if the PFN it maps can
++ * be mapped at a higher level.
++ */
+ pfn = spte_to_pfn(iter.old_spte);
+- if (kvm_is_reserved_pfn(pfn) ||
+- iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
+- pfn, PG_LEVEL_NUM))
++
++ if (kvm_is_reserved_pfn(pfn))
+ continue;
+
++ max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
++ iter.gfn, pfn, PG_LEVEL_NUM);
++
++ WARN_ON(max_mapping_level < iter.level);
++
++ /*
++ * If this page is already mapped at the highest
++ * viable level, there's nothing more to do.
++ */
++ if (max_mapping_level == iter.level)
++ continue;
++
++ /*
++ * The page can be remapped at a higher level, so step
++ * up to zap the parent SPTE.
++ */
++ while (max_mapping_level > iter.level)
++ tdp_iter_step_up(&iter);
++
+ /* Note, a successful atomic zap also does a remote TLB flush. */
+- if (tdp_mmu_zap_spte_atomic(kvm, &iter))
+- goto retry;
++ tdp_mmu_zap_spte_atomic(kvm, &iter);
++
++ /*
++ * If the atomic zap fails, the iter will recurse back into
++ * the same subtree to retry.
++ */
+ }
+
+ rcu_read_unlock();
+--
+2.35.1
+
--- /dev/null
+From dc63942c964430e4cd9989784898f4bc54b59b96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jun 2022 17:18:58 +0000
+Subject: KVM: x86/svm: add __GFP_ACCOUNT to __sev_dbg_{en,de}crypt_user()
+
+From: Mingwei Zhang <mizhang@google.com>
+
+[ Upstream commit ebdec859faa8cfbfef9f6c1f83d79dd6c8f4ab8c ]
+
+Adding the accounting flag when allocating pages within the SEV function,
+since these memory pages should belong to individual VM.
+
+No functional change intended.
+
+Signed-off-by: Mingwei Zhang <mizhang@google.com>
+Message-Id: <20220623171858.2083637-1-mizhang@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/sev.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
+index 76e9e6eb71d6..7aa1ce34a520 100644
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -844,7 +844,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+
+ /* If source buffer is not aligned then use an intermediate buffer */
+ if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
+- src_tpage = alloc_page(GFP_KERNEL);
++ src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
+ if (!src_tpage)
+ return -ENOMEM;
+
+@@ -865,7 +865,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+ if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+ int dst_offset;
+
+- dst_tpage = alloc_page(GFP_KERNEL);
++ dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
+ if (!dst_tpage) {
+ ret = -ENOMEM;
+ goto e_free;
+--
+2.35.1
+
--- /dev/null
+From 803343021ef3a73530cf46c9ad37759888b6f078 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 18:57:06 +0000
+Subject: selftests: KVM: Handle compiler optimizations in ucall
+
+From: Raghavendra Rao Ananta <rananta@google.com>
+
+[ Upstream commit 9e2f6498efbbc880d7caa7935839e682b64fe5a6 ]
+
+The selftests, when built with newer versions of clang, is found
+to have over optimized guests' ucall() function, and eliminating
+the stores for uc.cmd (perhaps due to no immediate readers). This
+resulted in the userspace side always reading a value of '0', and
+causing multiple test failures.
+
+As a result, prevent the compiler from optimizing the stores in
+ucall() with WRITE_ONCE().
+
+Suggested-by: Ricardo Koller <ricarkol@google.com>
+Suggested-by: Reiji Watanabe <reijiw@google.com>
+Signed-off-by: Raghavendra Rao Ananta <rananta@google.com>
+Message-Id: <20220615185706.1099208-1-rananta@google.com>
+Reviewed-by: Andrew Jones <drjones@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/kvm/lib/aarch64/ucall.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+index e0b0164e9af8..be1d9728c4ce 100644
+--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
++++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+@@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm)
+
+ void ucall(uint64_t cmd, int nargs, ...)
+ {
+- struct ucall uc = {
+- .cmd = cmd,
+- };
++ struct ucall uc = {};
+ va_list va;
+ int i;
+
++ WRITE_ONCE(uc.cmd, cmd);
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+- uc.args[i] = va_arg(va, uint64_t);
++ WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
+ va_end(va);
+
+- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
++ WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
+ }
+
+ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+--
+2.35.1
+
acpi-video-shortening-quirk-list-by-identifying-clevo-by-board_name-only.patch
acpi-apei-better-fix-to-avoid-spamming-the-console-with-old-error-logs.patch
crypto-arm64-poly1305-fix-a-read-out-of-bound.patch
+kvm-x86-do-not-report-a-vcpu-as-preempted-outside-in.patch
+kvm-x86-do-not-set-st-preempted-when-going-back-to-u.patch
+kvm-selftests-make-hyperv_clock-selftest-more-stable.patch
+kvm-x86-mmu-zap-non-leaf-sptes-when-disabling-dirty-.patch
+entry-kvm-exit-to-user-mode-when-tif_notify_signal-i.patch
+kvm-x86-disable-preemption-while-updating-apicv-inhi.patch
+kvm-x86-disable-preemption-around-the-call-to-kvm_ar.patch
+kvm-selftests-restrict-test-region-to-48-bit-physica.patch
+tools-kvm_stat-fix-display-of-error-when-multiple-pr.patch
+selftests-kvm-handle-compiler-optimizations-in-ucall.patch
+kvm-x86-svm-add-__gfp_account-to-__sev_dbg_-en-de-cr.patch
--- /dev/null
+From 10ae01db75615479483fc1f09f3970477727bee3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jun 2022 15:11:41 +0300
+Subject: tools/kvm_stat: fix display of error when multiple processes are
+ found
+
+From: Dmitry Klochkov <kdmitry556@gmail.com>
+
+[ Upstream commit 933b5f9f98da29af646b51b36a0753692908ef64 ]
+
+Instead of printing an error message, kvm_stat script fails when we
+restrict statistics to a guest by its name and there are multiple guests
+with such name:
+
+ # kvm_stat -g my_vm
+ Traceback (most recent call last):
+ File "/usr/bin/kvm_stat", line 1819, in <module>
+ main()
+ File "/usr/bin/kvm_stat", line 1779, in main
+ options = get_options()
+ File "/usr/bin/kvm_stat", line 1718, in get_options
+ options = argparser.parse_args()
+ File "/usr/lib64/python3.10/argparse.py", line 1825, in parse_args
+ args, argv = self.parse_known_args(args, namespace)
+ File "/usr/lib64/python3.10/argparse.py", line 1858, in parse_known_args
+ namespace, args = self._parse_known_args(args, namespace)
+ File "/usr/lib64/python3.10/argparse.py", line 2067, in _parse_known_args
+ start_index = consume_optional(start_index)
+ File "/usr/lib64/python3.10/argparse.py", line 2007, in consume_optional
+ take_action(action, args, option_string)
+ File "/usr/lib64/python3.10/argparse.py", line 1935, in take_action
+ action(self, namespace, argument_values, option_string)
+ File "/usr/bin/kvm_stat", line 1649, in __call__
+ ' to specify the desired pid'.format(" ".join(pids)))
+ TypeError: sequence item 0: expected str instance, int found
+
+To avoid this, it's needed to convert pids int values to strings before
+pass them to join().
+
+Signed-off-by: Dmitry Klochkov <kdmitry556@gmail.com>
+Message-Id: <20220614121141.160689-1-kdmitry556@gmail.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/kvm/kvm_stat/kvm_stat | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
+index 5a5bd74f55bd..9c366b3a676d 100755
+--- a/tools/kvm/kvm_stat/kvm_stat
++++ b/tools/kvm/kvm_stat/kvm_stat
+@@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately.
+ .format(values))
+ if len(pids) > 1:
+ sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
+- ' to specify the desired pid'.format(" ".join(pids)))
++ ' to specify the desired pid'
++ .format(" ".join(map(str, pids))))
+ namespace.pid = pids[0]
+
+ argparser = argparse.ArgumentParser(description=description_text,
+--
+2.35.1
+