--- /dev/null
+From b321c31c9b7b309dcde5e8854b741c8e6a9a05f0 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Thu, 13 Jul 2023 08:06:57 +0100
+Subject: KVM: arm64: vgic-v4: Make the doorbell request robust w.r.t preemption
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit b321c31c9b7b309dcde5e8854b741c8e6a9a05f0 upstream.
+
+Xiang reports that VMs occasionally fail to boot on GICv4.1 systems when
+running a preemptible kernel, as it is possible that a vCPU is blocked
+without requesting a doorbell interrupt.
+
+The issue is that any preemption that occurs between vgic_v4_put() and
+schedule() on the block path will mark the vPE as nonresident and *not*
+request a doorbell irq. This occurs because when the vcpu thread is
+resumed on its way to block, vcpu_load() will make the vPE resident
+again. Once the vcpu actually blocks, we don't request a doorbell
+anymore, and the vcpu won't be woken up on interrupt delivery.
+
+Fix it by tracking that we're entering WFI, and key the doorbell
+request on that flag. This allows us not to make the vPE resident
+when going through a preempt/schedule cycle, meaning we don't lose
+any state.
+
+Cc: stable@vger.kernel.org
+Fixes: 8e01d9a396e6 ("KVM: arm64: vgic-v4: Move the GICv4 residency flow to be driven by vcpu_load/put")
+Reported-by: Xiang Chen <chenxiang66@hisilicon.com>
+Suggested-by: Zenghui Yu <yuzenghui@huawei.com>
+Tested-by: Xiang Chen <chenxiang66@hisilicon.com>
+Co-developed-by: Oliver Upton <oliver.upton@linux.dev>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Acked-by: Zenghui Yu <yuzenghui@huawei.com>
+Link: https://lore.kernel.org/r/20230713070657.3873244-1-maz@kernel.org
+Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
+[ modified to wrangle the vCPU flags directly instead of going through
+ the flag helper macros as they have not yet been introduced. Also doing
+ the flag wranging in the kvm_arch_vcpu_{un}blocking() hooks as the
+ introduction of kvm_vcpu_wfi has not yet happened. See:
+ 6109c5a6ab7f ("KVM: arm64: Move vGIC v4 handling for WFI out arch callback hook") ]
+Signed-off-by: James Gowans <jgowans@amazon.com>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/kvm_host.h | 1 +
+ arch/arm64/kvm/arm.c | 6 ++++--
+ arch/arm64/kvm/vgic/vgic-v3.c | 2 +-
+ arch/arm64/kvm/vgic/vgic-v4.c | 8 ++++++--
+ include/kvm/arm_vgic.h | 2 +-
+ 5 files changed, 13 insertions(+), 6 deletions(-)
+
+--- a/arch/arm64/include/asm/kvm_host.h
++++ b/arch/arm64/include/asm/kvm_host.h
+@@ -419,6 +419,7 @@ struct kvm_vcpu_arch {
+ #define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
+ #define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
+ #define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
++#define KVM_ARM64_VCPU_IN_WFI (1 << 14) /* WFI instruction trapped */
+
+ #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
+ KVM_GUESTDBG_USE_SW_BP | \
+--- a/arch/arm64/kvm/arm.c
++++ b/arch/arm64/kvm/arm.c
+@@ -379,13 +379,15 @@ void kvm_arch_vcpu_blocking(struct kvm_v
+ */
+ preempt_disable();
+ kvm_vgic_vmcr_sync(vcpu);
+- vgic_v4_put(vcpu, true);
++ vcpu->arch.flags |= KVM_ARM64_VCPU_IN_WFI;
++ vgic_v4_put(vcpu);
+ preempt_enable();
+ }
+
+ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+ {
+ preempt_disable();
++ vcpu->arch.flags &= ~KVM_ARM64_VCPU_IN_WFI;
+ vgic_v4_load(vcpu);
+ preempt_enable();
+ }
+@@ -696,7 +698,7 @@ static void check_vcpu_requests(struct k
+ if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
+ /* The distributor enable bits were changed */
+ preempt_disable();
+- vgic_v4_put(vcpu, false);
++ vgic_v4_put(vcpu);
+ vgic_v4_load(vcpu);
+ preempt_enable();
+ }
+--- a/arch/arm64/kvm/vgic/vgic-v3.c
++++ b/arch/arm64/kvm/vgic/vgic-v3.c
+@@ -715,7 +715,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
+ {
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+- WARN_ON(vgic_v4_put(vcpu, false));
++ WARN_ON(vgic_v4_put(vcpu));
+
+ vgic_v3_vmcr_sync(vcpu);
+
+--- a/arch/arm64/kvm/vgic/vgic-v4.c
++++ b/arch/arm64/kvm/vgic/vgic-v4.c
+@@ -333,14 +333,15 @@ void vgic_v4_teardown(struct kvm *kvm)
+ its_vm->vpes = NULL;
+ }
+
+-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
++int vgic_v4_put(struct kvm_vcpu *vcpu)
+ {
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+
+ if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
+ return 0;
+
+- return its_make_vpe_non_resident(vpe, need_db);
++ return its_make_vpe_non_resident(vpe,
++ vcpu->arch.flags & KVM_ARM64_VCPU_IN_WFI);
+ }
+
+ int vgic_v4_load(struct kvm_vcpu *vcpu)
+@@ -351,6 +352,9 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
+ if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
+ return 0;
+
++ if (vcpu->arch.flags & KVM_ARM64_VCPU_IN_WFI)
++ return 0;
++
+ /*
+ * Before making the VPE resident, make sure the redistributor
+ * corresponding to our current CPU expects us here. See the
+--- a/include/kvm/arm_vgic.h
++++ b/include/kvm/arm_vgic.h
+@@ -423,6 +423,6 @@ int kvm_vgic_v4_unset_forwarding(struct
+
+ int vgic_v4_load(struct kvm_vcpu *vcpu);
+ void vgic_v4_commit(struct kvm_vcpu *vcpu);
+-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
++int vgic_v4_put(struct kvm_vcpu *vcpu);
+
+ #endif /* __KVM_ARM_VGIC_H */