--- /dev/null
+From 94ac0835391efc1a30feda6fc908913ec012951e Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Mon, 12 Apr 2021 17:00:34 +0200
+Subject: KVM: arm/arm64: Fix KVM_VGIC_V3_ADDR_TYPE_REDIST read
+
+From: Eric Auger <eric.auger@redhat.com>
+
+commit 94ac0835391efc1a30feda6fc908913ec012951e upstream.
+
+When reading the base address of the a REDIST region
+through KVM_VGIC_V3_ADDR_TYPE_REDIST we expect the
+redistributor region list to be populated with a single
+element.
+
+However list_first_entry() expects the list to be non empty.
+Instead we should use list_first_entry_or_null which effectively
+returns NULL if the list is empty.
+
+Fixes: dbd9733ab674 ("KVM: arm/arm64: Replace the single rdist region by a list")
+Cc: <Stable@vger.kernel.org> # v4.18+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Reported-by: Gavin Shan <gshan@redhat.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20210412150034.29185-1-eric.auger@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/vgic/vgic-kvm-device.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
++++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
+@@ -87,8 +87,8 @@ int kvm_vgic_addr(struct kvm *kvm, unsig
+ r = vgic_v3_set_redist_base(kvm, 0, *addr, 0);
+ goto out;
+ }
+- rdreg = list_first_entry(&vgic->rd_regions,
+- struct vgic_redist_region, list);
++ rdreg = list_first_entry_or_null(&vgic->rd_regions,
++ struct vgic_redist_region, list);
+ if (!rdreg)
+ addr_ptr = &undef_value;
+ else
--- /dev/null
+From 53b16dd6ba5cf64ed147ac3523ec34651d553cb0 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Mon, 5 Apr 2021 18:39:34 +0200
+Subject: KVM: arm64: Fix KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION read
+
+From: Eric Auger <eric.auger@redhat.com>
+
+commit 53b16dd6ba5cf64ed147ac3523ec34651d553cb0 upstream.
+
+The doc says:
+"The characteristics of a specific redistributor region can
+ be read by presetting the index field in the attr data.
+ Only valid for KVM_DEV_TYPE_ARM_VGIC_V3"
+
+Unfortunately the existing code fails to read the input attr data.
+
+Fixes: 04c110932225 ("KVM: arm/arm64: Implement KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION")
+Cc: stable@vger.kernel.org#v4.17+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20210405163941.510258-3-eric.auger@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/vgic/vgic-kvm-device.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
++++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
+@@ -226,6 +226,9 @@ static int vgic_get_common_attr(struct k
+ u64 addr;
+ unsigned long type = (unsigned long)attr->attr;
+
++ if (copy_from_user(&addr, uaddr, sizeof(addr)))
++ return -EFAULT;
++
+ r = kvm_vgic_addr(dev->kvm, type, &addr, false);
+ if (r)
+ return (r == -ENODEV) ? -ENXIO : r;
--- /dev/null
+From 85d703746154cdc6794b6654b587b0b0354c97e9 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Wed, 7 Apr 2021 18:54:16 +0100
+Subject: KVM: arm64: Fully zero the vcpu state on reset
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 85d703746154cdc6794b6654b587b0b0354c97e9 upstream.
+
+On vcpu reset, we expect all the registers to be brought back
+to their initial state, which happens to be a bunch of zeroes.
+
+However, some recent commit broke this, and is now leaving a bunch
+of registers (such as the FP state) with whatever was left by the
+guest. My bad.
+
+Zero the reset of the state (32bit SPSRs and FPSIMD state).
+
+Cc: stable@vger.kernel.org
+Fixes: e47c2055c68e ("KVM: arm64: Make struct kvm_regs userspace-only")
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kvm/reset.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/arch/arm64/kvm/reset.c
++++ b/arch/arm64/kvm/reset.c
+@@ -242,6 +242,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu
+
+ /* Reset core registers */
+ memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
++ memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
++ vcpu->arch.ctxt.spsr_abt = 0;
++ vcpu->arch.ctxt.spsr_und = 0;
++ vcpu->arch.ctxt.spsr_irq = 0;
++ vcpu->arch.ctxt.spsr_fiq = 0;
+ vcpu_gp_regs(vcpu)->pstate = pstate;
+
+ /* Reset system registers */
--- /dev/null
+From 2ee3757424be7c1cd1d0bbfa6db29a7edd82a250 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 12 Apr 2021 15:20:48 -0700
+Subject: KVM: Destroy I/O bus devices on unregister failure _after_ sync'ing SRCU
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 2ee3757424be7c1cd1d0bbfa6db29a7edd82a250 upstream.
+
+If allocating a new instance of an I/O bus fails when unregistering a
+device, wait to destroy the device until after all readers are guaranteed
+to see the new null bus. Destroying devices before the bus is nullified
+could lead to use-after-free since readers expect the devices on their
+reference of the bus to remain valid.
+
+Fixes: f65886606c2d ("KVM: fix memory leak in kvm_io_bus_unregister_dev()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210412222050.876100-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -4487,7 +4487,13 @@ void kvm_io_bus_unregister_dev(struct kv
+ new_bus->dev_count--;
+ memcpy(new_bus->range + i, bus->range + i + 1,
+ flex_array_size(new_bus, range, new_bus->dev_count - i));
+- } else {
++ }
++
++ rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
++ synchronize_srcu_expedited(&kvm->srcu);
++
++ /* Destroy the old bus _after_ installing the (null) bus. */
++ if (!new_bus) {
+ pr_err("kvm: failed to shrink bus, removing it completely\n");
+ for (j = 0; j < bus->dev_count; j++) {
+ if (j == i)
+@@ -4496,8 +4502,6 @@ void kvm_io_bus_unregister_dev(struct kv
+ }
+ }
+
+- rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
+- synchronize_srcu_expedited(&kvm->srcu);
+ kfree(bus);
+ return;
+ }
--- /dev/null
+From a3322d5cd87fef5ec0037fd1b14068a533f9a60f Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 4 Mar 2021 17:10:45 -0800
+Subject: KVM: nSVM: Set the shadow root level to the TDP level for nested NPT
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit a3322d5cd87fef5ec0037fd1b14068a533f9a60f upstream.
+
+Override the shadow root level in the MMU context when configuring
+NPT for shadowing nested NPT. The level is always tied to the TDP level
+of the host, not whatever level the guest happens to be using.
+
+Fixes: 096586fda522 ("KVM: nSVM: Correctly set the shadow NPT root level in its MMU role")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210305011101.3597423-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -4630,12 +4630,17 @@ void kvm_init_shadow_npt_mmu(struct kvm_
+ struct kvm_mmu *context = &vcpu->arch.guest_mmu;
+ union kvm_mmu_role new_role = kvm_calc_shadow_npt_root_page_role(vcpu);
+
+- context->shadow_root_level = new_role.base.level;
+-
+ __kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base, false, false);
+
+- if (new_role.as_u64 != context->mmu_role.as_u64)
++ if (new_role.as_u64 != context->mmu_role.as_u64) {
+ shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, new_role);
++
++ /*
++ * Override the level set by the common init helper, nested TDP
++ * always uses the host's TDP configuration.
++ */
++ context->shadow_root_level = new_role.base.level;
++ }
+ }
+ EXPORT_SYMBOL_GPL(kvm_init_shadow_npt_mmu);
+
--- /dev/null
+From c805f5d5585ab5e0cdac6b1ccf7086eb120fb7db Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 4 Mar 2021 17:10:57 -0800
+Subject: KVM: nVMX: Defer the MMU reload to the normal path on an EPTP switch
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c805f5d5585ab5e0cdac6b1ccf7086eb120fb7db upstream.
+
+Defer reloading the MMU after a EPTP successful EPTP switch. The VMFUNC
+instruction itself is executed in the previous EPTP context, any side
+effects, e.g. updating RIP, should occur in the old context. Practically
+speaking, this bug is benign as VMX doesn't touch the MMU when skipping
+an emulated instruction, nor does queuing a single-step #DB. No other
+post-switch side effects exist.
+
+Fixes: 41ab93727467 ("KVM: nVMX: Emulate EPTP switching for the L1 hypervisor")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210305011101.3597423-14-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -5517,16 +5517,11 @@ static int nested_vmx_eptp_switching(str
+ if (!nested_vmx_check_eptp(vcpu, new_eptp))
+ return 1;
+
+- kvm_mmu_unload(vcpu);
+ mmu->ept_ad = accessed_dirty;
+ mmu->mmu_role.base.ad_disabled = !accessed_dirty;
+ vmcs12->ept_pointer = new_eptp;
+- /*
+- * TODO: Check what's the correct approach in case
+- * mmu reload fails. Currently, we just let the next
+- * reload potentially fail
+- */
+- kvm_mmu_reload(vcpu);
++
++ kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+ }
+
+ return 0;
--- /dev/null
+From 82277eeed65eed6c6ee5b8f97bd978763eab148f Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 21 Apr 2021 19:21:25 -0700
+Subject: KVM: nVMX: Truncate base/index GPR value on address calc in !64-bit
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 82277eeed65eed6c6ee5b8f97bd978763eab148f upstream.
+
+Drop bits 63:32 of the base and/or index GPRs when calculating the
+effective address of a VMX instruction memory operand. Outside of 64-bit
+mode, memory encodings are strictly limited to E*X and below.
+
+Fixes: 064aea774768 ("KVM: nVMX: Decoding memory operands of VMX instructions")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210422022128.3464144-7-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4639,9 +4639,9 @@ int get_vmx_mem_address(struct kvm_vcpu
+ else if (addr_size == 0)
+ off = (gva_t)sign_extend64(off, 15);
+ if (base_is_valid)
+- off += kvm_register_read(vcpu, base_reg);
++ off += kvm_register_readl(vcpu, base_reg);
+ if (index_is_valid)
+- off += kvm_register_read(vcpu, index_reg) << scaling;
++ off += kvm_register_readl(vcpu, index_reg) << scaling;
+ vmx_get_segment(vcpu, &s, seg_reg);
+
+ /*
--- /dev/null
+From ee050a577523dfd5fac95e6cc182ebe0293ead59 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 21 Apr 2021 19:21:24 -0700
+Subject: KVM: nVMX: Truncate bits 63:32 of VMCS field on nested check in !64-bit
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit ee050a577523dfd5fac95e6cc182ebe0293ead59 upstream.
+
+Drop bits 63:32 of the VMCS field encoding when checking for a nested
+VM-Exit on VMREAD/VMWRITE in !64-bit mode. VMREAD and VMWRITE always
+use 32-bit operands outside of 64-bit mode.
+
+The actual emulation of VMREAD/VMWRITE does the right thing, this bug is
+purely limited to incorrectly causing a nested VM-Exit if a GPR happens
+to have bits 63:32 set outside of 64-bit mode.
+
+Fixes: a7cde481b6e8 ("KVM: nVMX: Do not forward VMREAD/VMWRITE VMExits to L1 if required so by vmcs12 vmread/vmwrite bitmaps")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210422022128.3464144-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/vmx/nested.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -5750,7 +5750,7 @@ static bool nested_vmx_exit_handled_vmcs
+
+ /* Decode instruction info and find the field to access */
+ vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+- field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
++ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
+
+ /* Out-of-range fields always cause a VM exit from L2 to L1 */
+ if (field >> 15)
--- /dev/null
+From bf1e15a82e3b74ee86bb119d6038b41e1ed2b319 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 20 Apr 2021 04:13:03 -0400
+Subject: KVM: selftests: Always run vCPU thread with blocked SIG_IPI
+
+From: Paolo Bonzini <pbonzini@redhat.com>
+
+commit bf1e15a82e3b74ee86bb119d6038b41e1ed2b319 upstream.
+
+The main thread could start to send SIG_IPI at any time, even before signal
+blocked on vcpu thread. Therefore, start the vcpu thread with the signal
+blocked.
+
+Without this patch, on very busy cores the dirty_log_test could fail directly
+on receiving a SIGUSR1 without a handler (when vcpu runs far slower than main).
+
+Reported-by: Peter Xu <peterx@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/kvm/dirty_log_test.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/tools/testing/selftests/kvm/dirty_log_test.c
++++ b/tools/testing/selftests/kvm/dirty_log_test.c
+@@ -527,9 +527,8 @@ static void *vcpu_worker(void *data)
+ */
+ sigmask->len = 8;
+ pthread_sigmask(0, NULL, sigset);
++ sigdelset(sigset, SIG_IPI);
+ vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask);
+- sigaddset(sigset, SIG_IPI);
+- pthread_sigmask(SIG_BLOCK, sigset, NULL);
+
+ sigemptyset(sigset);
+ sigaddset(sigset, SIG_IPI);
+@@ -858,6 +857,7 @@ int main(int argc, char *argv[])
+ .interval = TEST_HOST_LOOP_INTERVAL,
+ };
+ int opt, i;
++ sigset_t sigset;
+
+ sem_init(&sem_vcpu_stop, 0, 0);
+ sem_init(&sem_vcpu_cont, 0, 0);
+@@ -916,6 +916,11 @@ int main(int argc, char *argv[])
+
+ srandom(time(0));
+
++ /* Ensure that vCPU threads start with SIG_IPI blocked. */
++ sigemptyset(&sigset);
++ sigaddset(&sigset, SIG_IPI);
++ pthread_sigmask(SIG_BLOCK, &sigset, NULL);
++
+ if (host_log_mode_option == LOG_MODE_ALL) {
+ /* Run each log mode */
+ for (i = 0; i < LOG_MODE_NUM; i++) {
--- /dev/null
+From 016ff1a442d9a8f36dcb3beca0bcdfc35e281e18 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Sat, 17 Apr 2021 10:36:01 -0400
+Subject: KVM: selftests: Sync data verify of dirty logging with guest sync
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 016ff1a442d9a8f36dcb3beca0bcdfc35e281e18 upstream.
+
+This fixes a bug that can trigger with e.g. "taskset -c 0 ./dirty_log_test" or
+when the testing host is very busy.
+
+A similar previous attempt is done [1] but that is not enough, the reason is
+stated in the reply [2].
+
+As a summary (partly quotting from [2]):
+
+The problem is I think one guest memory write operation (of this specific test)
+contains a few micro-steps when page is during kvm dirty tracking (here I'm
+only considering write-protect rather than pml but pml should be similar at
+least when the log buffer is full):
+
+ (1) Guest read 'iteration' number into register, prepare to write, page fault
+ (2) Set dirty bit in either dirty bitmap or dirty ring
+ (3) Return to guest, data written
+
+When we verify the data, we assumed that all these steps are "atomic", say,
+when (1) happened for this page, we assume (2) & (3) must have happened. We
+had some trick to workaround "un-atomicity" of above three steps, as previous
+version of this patch wanted to fix atomicity of step (2)+(3) by explicitly
+letting the main thread wait for at least one vmenter of vcpu thread, which
+should work. However what I overlooked is probably that we still have race
+when (1) and (2) can be interrupted.
+
+One example calltrace when it could happen that we read an old interation, got
+interrupted before even setting the dirty bit and flushing data:
+
+ __schedule+1742
+ __cond_resched+52
+ __get_user_pages+530
+ get_user_pages_unlocked+197
+ hva_to_pfn+206
+ try_async_pf+132
+ direct_page_fault+320
+ kvm_mmu_page_fault+103
+ vmx_handle_exit+288
+ vcpu_enter_guest+2460
+ kvm_arch_vcpu_ioctl_run+325
+ kvm_vcpu_ioctl+526
+ __x64_sys_ioctl+131
+ do_syscall_64+51
+ entry_SYSCALL_64_after_hwframe+68
+
+It means iteration number cached in vcpu register can be very old when dirty
+bit set and data flushed.
+
+So far I don't see an easy way to guarantee all steps 1-3 atomicity but to sync
+at the GUEST_SYNC() point of guest code when we do verification of the dirty
+bits as what this patch does.
+
+[1] https://lore.kernel.org/lkml/20210413213641.23742-1-peterx@redhat.com/
+[2] https://lore.kernel.org/lkml/20210417140956.GV4440@xz-x1/
+
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: Andrew Jones <drjones@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Message-Id: <20210417143602.215059-2-peterx@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/kvm/dirty_log_test.c | 60 ++++++++++++++++++++++-----
+ 1 file changed, 50 insertions(+), 10 deletions(-)
+
+--- a/tools/testing/selftests/kvm/dirty_log_test.c
++++ b/tools/testing/selftests/kvm/dirty_log_test.c
+@@ -17,6 +17,7 @@
+ #include <linux/bitmap.h>
+ #include <linux/bitops.h>
+ #include <asm/barrier.h>
++#include <linux/atomic.h>
+
+ #include "kvm_util.h"
+ #include "test_util.h"
+@@ -137,12 +138,20 @@ static uint64_t host_clear_count;
+ static uint64_t host_track_next_count;
+
+ /* Whether dirty ring reset is requested, or finished */
+-static sem_t dirty_ring_vcpu_stop;
+-static sem_t dirty_ring_vcpu_cont;
++static sem_t sem_vcpu_stop;
++static sem_t sem_vcpu_cont;
++/*
++ * This is only set by main thread, and only cleared by vcpu thread. It is
++ * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
++ * is the only place that we'll guarantee both "dirty bit" and "dirty data"
++ * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
++ * after setting dirty bit but before the data is written.
++ */
++static atomic_t vcpu_sync_stop_requested;
+ /*
+ * This is updated by the vcpu thread to tell the host whether it's a
+ * ring-full event. It should only be read until a sem_wait() of
+- * dirty_ring_vcpu_stop and before vcpu continues to run.
++ * sem_vcpu_stop and before vcpu continues to run.
+ */
+ static bool dirty_ring_vcpu_ring_full;
+ /*
+@@ -234,6 +243,17 @@ static void clear_log_collect_dirty_page
+ kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
+ }
+
++/* Should only be called after a GUEST_SYNC */
++static void vcpu_handle_sync_stop(void)
++{
++ if (atomic_read(&vcpu_sync_stop_requested)) {
++ /* It means main thread is sleeping waiting */
++ atomic_set(&vcpu_sync_stop_requested, false);
++ sem_post(&sem_vcpu_stop);
++ sem_wait_until(&sem_vcpu_cont);
++ }
++}
++
+ static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+ {
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+@@ -244,6 +264,8 @@ static void default_after_vcpu_run(struc
+ TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
++
++ vcpu_handle_sync_stop();
+ }
+
+ static bool dirty_ring_supported(void)
+@@ -301,13 +323,13 @@ static void dirty_ring_wait_vcpu(void)
+ {
+ /* This makes sure that hardware PML cache flushed */
+ vcpu_kick();
+- sem_wait_until(&dirty_ring_vcpu_stop);
++ sem_wait_until(&sem_vcpu_stop);
+ }
+
+ static void dirty_ring_continue_vcpu(void)
+ {
+ pr_info("Notifying vcpu to continue\n");
+- sem_post(&dirty_ring_vcpu_cont);
++ sem_post(&sem_vcpu_cont);
+ }
+
+ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
+@@ -361,11 +383,11 @@ static void dirty_ring_after_vcpu_run(st
+ /* Update the flag first before pause */
+ WRITE_ONCE(dirty_ring_vcpu_ring_full,
+ run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
+- sem_post(&dirty_ring_vcpu_stop);
++ sem_post(&sem_vcpu_stop);
+ pr_info("vcpu stops because %s...\n",
+ dirty_ring_vcpu_ring_full ?
+ "dirty ring is full" : "vcpu is kicked out");
+- sem_wait_until(&dirty_ring_vcpu_cont);
++ sem_wait_until(&sem_vcpu_cont);
+ pr_info("vcpu continues now.\n");
+ } else {
+ TEST_ASSERT(false, "Invalid guest sync status: "
+@@ -377,7 +399,7 @@ static void dirty_ring_after_vcpu_run(st
+ static void dirty_ring_before_vcpu_join(void)
+ {
+ /* Kick another round of vcpu just to make sure it will quit */
+- sem_post(&dirty_ring_vcpu_cont);
++ sem_post(&sem_vcpu_cont);
+ }
+
+ struct log_mode {
+@@ -768,7 +790,25 @@ static void run_test(enum vm_guest_mode
+ usleep(p->interval * 1000);
+ log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
+ bmap, host_num_pages);
++
++ /*
++ * See vcpu_sync_stop_requested definition for details on why
++ * we need to stop vcpu when verify data.
++ */
++ atomic_set(&vcpu_sync_stop_requested, true);
++ sem_wait_until(&sem_vcpu_stop);
++ /*
++ * NOTE: for dirty ring, it's possible that we didn't stop at
++ * GUEST_SYNC but instead we stopped because ring is full;
++ * that's okay too because ring full means we're only missing
++ * the flush of the last page, and since we handle the last
++ * page specially verification will succeed anyway.
++ */
++ assert(host_log_mode == LOG_MODE_DIRTY_RING ||
++ atomic_read(&vcpu_sync_stop_requested) == false);
+ vm_dirty_log_verify(mode, bmap);
++ sem_post(&sem_vcpu_cont);
++
+ iteration++;
+ sync_global_to_guest(vm, iteration);
+ }
+@@ -819,8 +859,8 @@ int main(int argc, char *argv[])
+ };
+ int opt, i;
+
+- sem_init(&dirty_ring_vcpu_stop, 0, 0);
+- sem_init(&dirty_ring_vcpu_cont, 0, 0);
++ sem_init(&sem_vcpu_stop, 0, 0);
++ sem_init(&sem_vcpu_cont, 0, 0);
+
+ guest_modes_append_default();
+
--- /dev/null
+From 5d3c4c79384af06e3c8e25b7770b6247496b4417 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Mon, 12 Apr 2021 15:20:49 -0700
+Subject: KVM: Stop looking for coalesced MMIO zones if the bus is destroyed
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 5d3c4c79384af06e3c8e25b7770b6247496b4417 upstream.
+
+Abort the walk of coalesced MMIO zones if kvm_io_bus_unregister_dev()
+fails to allocate memory for the new instance of the bus. If it can't
+instantiate a new bus, unregister_dev() destroys all devices _except_ the
+target device. But, it doesn't tell the caller that it obliterated the
+bus and invoked the destructor for all devices that were on the bus. In
+the coalesced MMIO case, this can result in a deleted list entry
+dereference due to attempting to continue iterating on coalesced_zones
+after future entries (in the walk) have been deleted.
+
+Opportunistically add curly braces to the for-loop, which encompasses
+many lines but sneaks by without braces due to the guts being a single
+if statement.
+
+Fixes: f65886606c2d ("KVM: fix memory leak in kvm_io_bus_unregister_dev()")
+Cc: stable@vger.kernel.org
+Reported-by: Hao Sun <sunhao.th@gmail.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210412222050.876100-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/kvm_host.h | 4 ++--
+ virt/kvm/coalesced_mmio.c | 19 +++++++++++++++++--
+ virt/kvm/kvm_main.c | 10 +++++-----
+ 3 files changed, 24 insertions(+), 9 deletions(-)
+
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -191,8 +191,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcp
+ int len, void *val);
+ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ int len, struct kvm_io_device *dev);
+-void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+- struct kvm_io_device *dev);
++int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
++ struct kvm_io_device *dev);
+ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ gpa_t addr);
+
+--- a/virt/kvm/coalesced_mmio.c
++++ b/virt/kvm/coalesced_mmio.c
+@@ -174,21 +174,36 @@ int kvm_vm_ioctl_unregister_coalesced_mm
+ struct kvm_coalesced_mmio_zone *zone)
+ {
+ struct kvm_coalesced_mmio_dev *dev, *tmp;
++ int r;
+
+ if (zone->pio != 1 && zone->pio != 0)
+ return -EINVAL;
+
+ mutex_lock(&kvm->slots_lock);
+
+- list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list)
++ list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) {
+ if (zone->pio == dev->zone.pio &&
+ coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
+- kvm_io_bus_unregister_dev(kvm,
++ r = kvm_io_bus_unregister_dev(kvm,
+ zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
+ kvm_iodevice_destructor(&dev->dev);
++
++ /*
++ * On failure, unregister destroys all devices on the
++ * bus _except_ the target device, i.e. coalesced_zones
++ * has been modified. No need to restart the walk as
++ * there aren't any zones left.
++ */
++ if (r)
++ break;
+ }
++ }
+
+ mutex_unlock(&kvm->slots_lock);
+
++ /*
++ * Ignore the result of kvm_io_bus_unregister_dev(), from userspace's
++ * perspective, the coalesced MMIO is most definitely unregistered.
++ */
+ return 0;
+ }
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -4462,15 +4462,15 @@ int kvm_io_bus_register_dev(struct kvm *
+ }
+
+ /* Caller must hold slots_lock. */
+-void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+- struct kvm_io_device *dev)
++int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
++ struct kvm_io_device *dev)
+ {
+ int i, j;
+ struct kvm_io_bus *new_bus, *bus;
+
+ bus = kvm_get_bus(kvm, bus_idx);
+ if (!bus)
+- return;
++ return 0;
+
+ for (i = 0; i < bus->dev_count; i++)
+ if (bus->range[i].dev == dev) {
+@@ -4478,7 +4478,7 @@ void kvm_io_bus_unregister_dev(struct kv
+ }
+
+ if (i == bus->dev_count)
+- return;
++ return 0;
+
+ new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
+ GFP_KERNEL_ACCOUNT);
+@@ -4503,7 +4503,7 @@ void kvm_io_bus_unregister_dev(struct kv
+ }
+
+ kfree(bus);
+- return;
++ return new_bus ? 0 : -ENOMEM;
+ }
+
+ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
--- /dev/null
+From 8727906fde6ea665b52e68ddc58833772537f40a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 30 Mar 2021 20:19:36 -0700
+Subject: KVM: SVM: Do not allow SEV/SEV-ES initialization after vCPUs are created
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 8727906fde6ea665b52e68ddc58833772537f40a upstream.
+
+Reject KVM_SEV_INIT and KVM_SEV_ES_INIT if they are attempted after one
+or more vCPUs have been created. KVM assumes a VM is tagged SEV/SEV-ES
+prior to vCPU creation, e.g. init_vmcb() needs to mark the VMCB as SEV
+enabled, and svm_create_vcpu() needs to allocate the VMSA. At best,
+creating vCPUs before SEV/SEV-ES init will lead to unexpected errors
+and/or behavior, and at worst it will crash the host, e.g.
+sev_launch_update_vmsa() will dereference a null svm->vmsa pointer.
+
+Fixes: 1654efcbc431 ("KVM: SVM: Add KVM_SEV_INIT command")
+Fixes: ad73109ae7ec ("KVM: SVM: Provide support to launch and run an SEV-ES guest")
+Cc: stable@vger.kernel.org
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210331031936.2495277-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -181,6 +181,9 @@ static int sev_guest_init(struct kvm *kv
+ bool es_active = argp->id == KVM_SEV_ES_INIT;
+ int asid, ret;
+
++ if (kvm->created_vcpus)
++ return -EINVAL;
++
+ ret = -EBUSY;
+ if (unlikely(sev->active))
+ return ret;
--- /dev/null
+From 9fa1521daafb58d878d03d75f6863a11312fae22 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 30 Mar 2021 20:19:35 -0700
+Subject: KVM: SVM: Do not set sev->es_active until KVM_SEV_ES_INIT completes
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 9fa1521daafb58d878d03d75f6863a11312fae22 upstream.
+
+Set sev->es_active only after the guts of KVM_SEV_ES_INIT succeeds. If
+the command fails, e.g. because SEV is already active or there are no
+available ASIDs, then es_active will be left set even though the VM is
+not fully SEV-ES capable.
+
+Refactor the code so that "es_active" is passed on the stack instead of
+being prematurely shoved into sev_info, both to avoid having to unwind
+sev_info and so that it's more obvious what actually consumes es_active
+in sev_guest_init() and its helpers.
+
+Fixes: ad73109ae7ec ("KVM: SVM: Provide support to launch and run an SEV-ES guest")
+Cc: stable@vger.kernel.org
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210331031936.2495277-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c | 29 ++++++++++++-----------------
+ 1 file changed, 12 insertions(+), 17 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -86,7 +86,7 @@ static bool __sev_recycle_asids(int min_
+ return true;
+ }
+
+-static int sev_asid_new(struct kvm_sev_info *sev)
++static int sev_asid_new(bool es_active)
+ {
+ int pos, min_asid, max_asid;
+ bool retry = true;
+@@ -97,8 +97,8 @@ static int sev_asid_new(struct kvm_sev_i
+ * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+ * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+ */
+- min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+- max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
++ min_asid = es_active ? 0 : min_sev_asid - 1;
++ max_asid = es_active ? min_sev_asid - 1 : max_sev_asid;
+ again:
+ pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
+ if (pos >= max_asid) {
+@@ -178,13 +178,14 @@ static void sev_unbind_asid(struct kvm *
+ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
++ bool es_active = argp->id == KVM_SEV_ES_INIT;
+ int asid, ret;
+
+ ret = -EBUSY;
+ if (unlikely(sev->active))
+ return ret;
+
+- asid = sev_asid_new(sev);
++ asid = sev_asid_new(es_active);
+ if (asid < 0)
+ return ret;
+
+@@ -193,6 +194,7 @@ static int sev_guest_init(struct kvm *kv
+ goto e_free;
+
+ sev->active = true;
++ sev->es_active = es_active;
+ sev->asid = asid;
+ INIT_LIST_HEAD(&sev->regions_list);
+
+@@ -203,16 +205,6 @@ e_free:
+ return ret;
+ }
+
+-static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+-{
+- if (!sev_es)
+- return -ENOTTY;
+-
+- to_kvm_svm(kvm)->sev_info.es_active = true;
+-
+- return sev_guest_init(kvm, argp);
+-}
+-
+ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
+ {
+ struct sev_data_activate *data;
+@@ -1059,12 +1051,15 @@ int svm_mem_enc_op(struct kvm *kvm, void
+ mutex_lock(&kvm->lock);
+
+ switch (sev_cmd.id) {
++ case KVM_SEV_ES_INIT:
++ if (!sev_es) {
++ r = -ENOTTY;
++ goto out;
++ }
++ fallthrough;
+ case KVM_SEV_INIT:
+ r = sev_guest_init(kvm, &sev_cmd);
+ break;
+- case KVM_SEV_ES_INIT:
+- r = sev_es_guest_init(kvm, &sev_cmd);
+- break;
+ case KVM_SEV_LAUNCH_START:
+ r = sev_launch_start(kvm, &sev_cmd);
+ break;
--- /dev/null
+From 6d1b867d045699d6ce0dfa0ef35d1b87dd36db56 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 4 Mar 2021 17:10:56 -0800
+Subject: KVM: SVM: Don't strip the C-bit from CR2 on #PF interception
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 6d1b867d045699d6ce0dfa0ef35d1b87dd36db56 upstream.
+
+Don't strip the C-bit from the faulting address on an intercepted #PF,
+the address is a virtual address, not a physical address.
+
+Fixes: 0ede79e13224 ("KVM: SVM: Clear C-bit from the page fault address")
+Cc: stable@vger.kernel.org
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210305011101.3597423-13-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -1888,7 +1888,7 @@ static void svm_set_dr7(struct kvm_vcpu
+
+ static int pf_interception(struct vcpu_svm *svm)
+ {
+- u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
++ u64 fault_address = svm->vmcb->control.exit_info_2;
+ u64 error_code = svm->vmcb->control.exit_info_1;
+
+ return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
--- /dev/null
+From 6f2b296aa6432d8274e258cc3220047ca04f5de0 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 23 Apr 2021 15:34:01 -0700
+Subject: KVM: SVM: Inject #GP on guest MSR_TSC_AUX accesses if RDTSCP unsupported
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 6f2b296aa6432d8274e258cc3220047ca04f5de0 upstream.
+
+Inject #GP on guest accesses to MSR_TSC_AUX if RDTSCP is unsupported in
+the guest's CPUID model.
+
+Fixes: 46896c73c1a4 ("KVM: svm: add support for RDTSCP")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210423223404.3860547-2-seanjc@google.com>
+Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/svm.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -2651,6 +2651,9 @@ static int svm_get_msr(struct kvm_vcpu *
+ case MSR_TSC_AUX:
+ if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+ return 1;
++ if (!msr_info->host_initiated &&
++ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
++ return 1;
+ msr_info->data = svm->tsc_aux;
+ break;
+ /*
+@@ -2859,6 +2862,10 @@ static int svm_set_msr(struct kvm_vcpu *
+ if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+ return 1;
+
++ if (!msr->host_initiated &&
++ !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
++ return 1;
++
+ /*
+ * This is rare, so we update the MSR here instead of using
+ * direct_access_msrs. Doing that would require a rdmsr in
--- /dev/null
+From c36b16d29f3af5f32fc1b2a3401bf48f71cabee1 Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Tue, 30 Mar 2021 20:19:34 -0700
+Subject: KVM: SVM: Use online_vcpus, not created_vcpus, to iterate over vCPUs
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c36b16d29f3af5f32fc1b2a3401bf48f71cabee1 upstream.
+
+Use the kvm_for_each_vcpu() helper to iterate over vCPUs when encrypting
+VMSAs for SEV, which effectively switches to use online_vcpus instead of
+created_vcpus. This fixes a possible null-pointer dereference as
+created_vcpus does not guarantee a vCPU exists, since it is updated at
+the very beginning of KVM_CREATE_VCPU. created_vcpus exists to allow the
+bulk of vCPU creation to run in parallel, while still correctly
+restricting the max number of max vCPUs.
+
+Fixes: ad73109ae7ec ("KVM: SVM: Provide support to launch and run an SEV-ES guest")
+Cc: stable@vger.kernel.org
+Cc: Brijesh Singh <brijesh.singh@amd.com>
+Cc: Tom Lendacky <thomas.lendacky@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210331031936.2495277-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/svm/sev.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -563,6 +563,7 @@ static int sev_launch_update_vmsa(struct
+ {
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ struct sev_data_launch_update_vmsa *vmsa;
++ struct kvm_vcpu *vcpu;
+ int i, ret;
+
+ if (!sev_es_guest(kvm))
+@@ -572,8 +573,8 @@ static int sev_launch_update_vmsa(struct
+ if (!vmsa)
+ return -ENOMEM;
+
+- for (i = 0; i < kvm->created_vcpus; i++) {
+- struct vcpu_svm *svm = to_svm(kvm->vcpus[i]);
++ kvm_for_each_vcpu(i, vcpu, kvm) {
++ struct vcpu_svm *svm = to_svm(vcpu);
+
+ /* Perform some pre-encryption checks against the VMSA */
+ ret = sev_es_sync_vmsa(svm);
--- /dev/null
+From b86bb11e3a79ac0db9a6786b1fe80f74321cb076 Mon Sep 17 00:00:00 2001
+From: Wanpeng Li <wanpengli@tencent.com>
+Date: Thu, 22 Apr 2021 16:34:19 +0800
+Subject: KVM: X86: Fix failure to boost kernel lock holder candidate in SEV-ES guests
+
+From: Wanpeng Li <wanpengli@tencent.com>
+
+commit b86bb11e3a79ac0db9a6786b1fe80f74321cb076 upstream.
+
+Commit f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under
+SEV-ES") prevents hypervisor accesses guest register state when the guest is
+running under SEV-ES. The initial value of vcpu->arch.guest_state_protected
+is false, it will not be updated in preemption notifiers after this commit which
+means that the kernel spinlock lock holder will always be skipped to boost. Let's
+fix it by always treating preempted is in the guest kernel mode, false positive
+is better than skip completely.
+
+Fixes: f1c6366e3043 (KVM: SVM: Add required changes to support intercepts under SEV-ES)
+Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
+Message-Id: <1619080459-30032-1-git-send-email-wanpengli@tencent.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/x86.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10888,6 +10888,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcp
+
+ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+ {
++ if (vcpu->arch.guest_state_protected)
++ return true;
++
+ return vcpu->arch.preempted_in_kernel;
+ }
+
--- /dev/null
+From 04d45551a1eefbea42655da52f56e846c0af721a Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Thu, 4 Mar 2021 17:10:46 -0800
+Subject: KVM: x86/mmu: Alloc page for PDPTEs when shadowing 32-bit NPT with 64-bit
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit 04d45551a1eefbea42655da52f56e846c0af721a upstream.
+
+Allocate the so called pae_root page on-demand, along with the lm_root
+page, when shadowing 32-bit NPT with 64-bit NPT, i.e. when running a
+32-bit L1. KVM currently only allocates the page when NPT is disabled,
+or when L0 is 32-bit (using PAE paging).
+
+Note, there is an existing memory leak involving the MMU roots, as KVM
+fails to free the PAE roots on failure. This will be addressed in a
+future commit.
+
+Fixes: ee6268ba3a68 ("KVM: x86: Skip pae_root shadow allocation if tdp enabled")
+Fixes: b6b80c78af83 ("KVM: x86/mmu: Allocate PAE root array when using SVM's 32-bit NPT")
+Cc: stable@vger.kernel.org
+Reviewed-by: Ben Gardon <bgardon@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210305011101.3597423-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 44 +++++++++++++++++++++++++++++---------------
+ 1 file changed, 29 insertions(+), 15 deletions(-)
+
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -3203,14 +3203,14 @@ void kvm_mmu_free_roots(struct kvm_vcpu
+ if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+ (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+ mmu_free_root_page(kvm, &mmu->root_hpa, &invalid_list);
+- } else {
++ } else if (mmu->pae_root) {
+ for (i = 0; i < 4; ++i)
+ if (mmu->pae_root[i] != 0)
+ mmu_free_root_page(kvm,
+ &mmu->pae_root[i],
+ &invalid_list);
+- mmu->root_hpa = INVALID_PAGE;
+ }
++ mmu->root_hpa = INVALID_PAGE;
+ mmu->root_pgd = 0;
+ }
+
+@@ -3322,9 +3322,23 @@ static int mmu_alloc_shadow_roots(struct
+ * the shadow page table may be a PAE or a long mode page table.
+ */
+ pm_mask = PT_PRESENT_MASK;
+- if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL)
++ if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+ pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
+
++ /*
++ * Allocate the page for the PDPTEs when shadowing 32-bit NPT
++ * with 64-bit only when needed. Unlike 32-bit NPT, it doesn't
++ * need to be in low mem. See also lm_root below.
++ */
++ if (!vcpu->arch.mmu->pae_root) {
++ WARN_ON_ONCE(!tdp_enabled);
++
++ vcpu->arch.mmu->pae_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
++ if (!vcpu->arch.mmu->pae_root)
++ return -ENOMEM;
++ }
++ }
++
+ for (i = 0; i < 4; ++i) {
+ MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i]));
+ if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
+@@ -3347,21 +3361,19 @@ static int mmu_alloc_shadow_roots(struct
+ vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
+
+ /*
+- * If we shadow a 32 bit page table with a long mode page
+- * table we enter this path.
++ * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
++ * tables are allocated and initialized at MMU creation as there is no
++ * equivalent level in the guest's NPT to shadow. Allocate the tables
++ * on demand, as running a 32-bit L1 VMM is very rare. The PDP is
++ * handled above (to share logic with PAE), deal with the PML4 here.
+ */
+ if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+ if (vcpu->arch.mmu->lm_root == NULL) {
+- /*
+- * The additional page necessary for this is only
+- * allocated on demand.
+- */
+-
+ u64 *lm_root;
+
+ lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+- if (lm_root == NULL)
+- return 1;
++ if (!lm_root)
++ return -ENOMEM;
+
+ lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask;
+
+@@ -5310,9 +5322,11 @@ static int __kvm_mmu_create(struct kvm_v
+ * while the PDP table is a per-vCPU construct that's allocated at MMU
+ * creation. When emulating 32-bit mode, cr3 is only 32 bits even on
+ * x86_64. Therefore we need to allocate the PDP table in the first
+- * 4GB of memory, which happens to fit the DMA32 zone. Except for
+- * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can
+- * skip allocating the PDP table.
++ * 4GB of memory, which happens to fit the DMA32 zone. TDP paging
++ * generally doesn't use PAE paging and can skip allocating the PDP
++ * table. The main exception, handled here, is SVM's 32-bit NPT. The
++ * other exception is for shadowing L1's 32-bit or PAE NPT on 64-bit
++ * KVM; that horror is handled on-demand by mmu_alloc_shadow_roots().
+ */
+ if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
+ return 0;
--- /dev/null
+From d0fe7b6404408835ed60232cb3bf28324b2f95db Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Wed, 21 Apr 2021 19:21:20 -0700
+Subject: KVM: x86: Remove emulator's broken checks on CR0/CR3/CR4 loads
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit d0fe7b6404408835ed60232cb3bf28324b2f95db upstream.
+
+Remove the emulator's checks for illegal CR0, CR3, and CR4 values, as
+the checks are redundant, outdated, and in the case of SEV's C-bit,
+broken. The emulator manually calculates MAXPHYADDR from CPUID and
+neglects to mask off the C-bit. For all other checks, kvm_set_cr*() are
+a superset of the emulator checks, e.g. see CR4.LA57.
+
+Fixes: a780a3ea6282 ("KVM: X86: Fix reserved bits check for MOV to CR3")
+Cc: Babu Moger <babu.moger@amd.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20210422022128.3464144-2-seanjc@google.com>
+Cc: stable@vger.kernel.org
+[Unify check_cr_read and check_cr_write. - Paolo]
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/emulate.c | 80 +------------------------------------------------
+ 1 file changed, 3 insertions(+), 77 deletions(-)
+
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -4220,7 +4220,7 @@ static bool valid_cr(int nr)
+ }
+ }
+
+-static int check_cr_read(struct x86_emulate_ctxt *ctxt)
++static int check_cr_access(struct x86_emulate_ctxt *ctxt)
+ {
+ if (!valid_cr(ctxt->modrm_reg))
+ return emulate_ud(ctxt);
+@@ -4228,80 +4228,6 @@ static int check_cr_read(struct x86_emul
+ return X86EMUL_CONTINUE;
+ }
+
+-static int check_cr_write(struct x86_emulate_ctxt *ctxt)
+-{
+- u64 new_val = ctxt->src.val64;
+- int cr = ctxt->modrm_reg;
+- u64 efer = 0;
+-
+- static u64 cr_reserved_bits[] = {
+- 0xffffffff00000000ULL,
+- 0, 0, 0, /* CR3 checked later */
+- CR4_RESERVED_BITS,
+- 0, 0, 0,
+- CR8_RESERVED_BITS,
+- };
+-
+- if (!valid_cr(cr))
+- return emulate_ud(ctxt);
+-
+- if (new_val & cr_reserved_bits[cr])
+- return emulate_gp(ctxt, 0);
+-
+- switch (cr) {
+- case 0: {
+- u64 cr4;
+- if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
+- ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
+- return emulate_gp(ctxt, 0);
+-
+- cr4 = ctxt->ops->get_cr(ctxt, 4);
+- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+-
+- if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
+- !(cr4 & X86_CR4_PAE))
+- return emulate_gp(ctxt, 0);
+-
+- break;
+- }
+- case 3: {
+- u64 rsvd = 0;
+-
+- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+- if (efer & EFER_LMA) {
+- u64 maxphyaddr;
+- u32 eax, ebx, ecx, edx;
+-
+- eax = 0x80000008;
+- ecx = 0;
+- if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx,
+- &edx, true))
+- maxphyaddr = eax & 0xff;
+- else
+- maxphyaddr = 36;
+- rsvd = rsvd_bits(maxphyaddr, 63);
+- if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE)
+- rsvd &= ~X86_CR3_PCID_NOFLUSH;
+- }
+-
+- if (new_val & rsvd)
+- return emulate_gp(ctxt, 0);
+-
+- break;
+- }
+- case 4: {
+- ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+-
+- if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
+- return emulate_gp(ctxt, 0);
+-
+- break;
+- }
+- }
+-
+- return X86EMUL_CONTINUE;
+-}
+-
+ static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
+ {
+ unsigned long dr7;
+@@ -4841,10 +4767,10 @@ static const struct opcode twobyte_table
+ D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
+ D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
+ /* 0x20 - 0x2F */
+- DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
++ DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
+ DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
+ IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
+- check_cr_write),
++ check_cr_access),
+ IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
+ check_dr_write),
+ N, N, N, N,
kvm-s390-vsie-fix-mvpg-handling-for-prefixing-and-mso.patch
kvm-s390-split-kvm_s390_real_to_abs.patch
kvm-s390-extend-kvm_s390_shadow_fault-to-return-entry-pointer.patch
+kvm-x86-mmu-alloc-page-for-pdptes-when-shadowing-32-bit-npt-with-64-bit.patch
+kvm-x86-fix-failure-to-boost-kernel-lock-holder-candidate-in-sev-es-guests.patch
+kvm-x86-remove-emulator-s-broken-checks-on-cr0-cr3-cr4-loads.patch
+kvm-nsvm-set-the-shadow-root-level-to-the-tdp-level-for-nested-npt.patch
+kvm-svm-don-t-strip-the-c-bit-from-cr2-on-pf-interception.patch
+kvm-svm-use-online_vcpus-not-created_vcpus-to-iterate-over-vcpus.patch
+kvm-svm-do-not-set-sev-es_active-until-kvm_sev_es_init-completes.patch
+kvm-svm-do-not-allow-sev-sev-es-initialization-after-vcpus-are-created.patch
+kvm-svm-inject-gp-on-guest-msr_tsc_aux-accesses-if-rdtscp-unsupported.patch
+kvm-nvmx-defer-the-mmu-reload-to-the-normal-path-on-an-eptp-switch.patch
+kvm-nvmx-truncate-bits-63-32-of-vmcs-field-on-nested-check-in-64-bit.patch
+kvm-nvmx-truncate-base-index-gpr-value-on-address-calc-in-64-bit.patch
+kvm-arm-arm64-fix-kvm_vgic_v3_addr_type_redist-read.patch
+kvm-destroy-i-o-bus-devices-on-unregister-failure-_after_-sync-ing-srcu.patch
+kvm-stop-looking-for-coalesced-mmio-zones-if-the-bus-is-destroyed.patch
+kvm-arm64-fully-zero-the-vcpu-state-on-reset.patch
+kvm-arm64-fix-kvm_vgic_v3_addr_type_redist_region-read.patch
+kvm-selftests-sync-data-verify-of-dirty-logging-with-guest-sync.patch
+kvm-selftests-always-run-vcpu-thread-with-blocked-sig_ipi.patch