]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
KVM: PPC: Book3S HV: Stop using vc->dpdes for nested KVM guests
authorGautam Menghani <gautam@linux.ibm.com>
Sat, 9 Nov 2024 06:32:56 +0000 (12:02 +0530)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Dec 2024 12:53:41 +0000 (13:53 +0100)
[ Upstream commit 0d3c6b28896f9889c8864dab469e0343a0ad1c0c ]

commit 6398326b9ba1 ("KVM: PPC: Book3S HV P9: Stop using vc->dpdes")
introduced an optimization to use only vcpu->doorbell_request for SMT
emulation for Power9 and above guests, but the code for nested guests
still relies on the old way of handling doorbells, due to which an L2
guest (see [1]) cannot be booted with XICS with SMT>1. The command to
repro this issue is:

// To be run in L1

qemu-system-ppc64 \
-drive file=rhel.qcow2,format=qcow2 \
-m 20G \
-smp 8,cores=1,threads=8 \
-cpu  host \
-nographic \
-machine pseries,ic-mode=xics -accel kvm

Fix the plumbing to utilize vcpu->doorbell_request instead of vcore->dpdes
for nested KVM guests on P9 and above.

[1] Terminology
1. L0 : PowerNV linux running with HV privileges
2. L1 : Pseries KVM guest running on top of L0
2. L2 : Nested KVM guest running on top of L1

Fixes: 6398326b9ba1 ("KVM: PPC: Book3S HV P9: Stop using vc->dpdes")
Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://patch.msgid.link/20241109063301.105289-3-gautam@linux.ibm.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_nested.c

index 0ed5c5c7a350d86424b3a8db77a4b5a2b55ff4b4..ccc9a8431b944e9b274d4dd0c9f912dede887ab2 100644 (file)
@@ -4303,6 +4303,15 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
        }
        hvregs.hdec_expiry = time_limit;
 
+       /*
+        * hvregs has the doorbell status, so zero it here which
+        * enables us to receive doorbells when H_ENTER_NESTED is
+        * in progress for this vCPU
+        */
+
+       if (vcpu->arch.doorbell_request)
+               vcpu->arch.doorbell_request = 0;
+
        /*
         * When setting DEC, we must always deal with irq_work_raise
         * via NMI vs setting DEC. The problem occurs right as we
index 05f5220960c63bfccf930ccb868648fc9361a204..125440a606ee3bc196c5f879151b6a9a75d19c5c 100644 (file)
@@ -32,7 +32,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
        hr->pcr = vc->pcr | PCR_MASK;
-       hr->dpdes = vc->dpdes;
+       hr->dpdes = vcpu->arch.doorbell_request;
        hr->hfscr = vcpu->arch.hfscr;
        hr->tb_offset = vc->tb_offset;
        hr->dawr0 = vcpu->arch.dawr0;
@@ -105,7 +105,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu,
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
-       hr->dpdes = vc->dpdes;
+       hr->dpdes = vcpu->arch.doorbell_request;
        hr->purr = vcpu->arch.purr;
        hr->spurr = vcpu->arch.spurr;
        hr->ic = vcpu->arch.ic;
@@ -143,7 +143,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
        vc->pcr = hr->pcr | PCR_MASK;
-       vc->dpdes = hr->dpdes;
+       vcpu->arch.doorbell_request = hr->dpdes;
        vcpu->arch.hfscr = hr->hfscr;
        vcpu->arch.dawr0 = hr->dawr0;
        vcpu->arch.dawrx0 = hr->dawrx0;
@@ -170,7 +170,13 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
-       vc->dpdes = hr->dpdes;
+       /*
+        * This L2 vCPU might have received a doorbell while H_ENTER_NESTED was being handled.
+        * Make sure we preserve the doorbell if it was either:
+        *   a) Sent after H_ENTER_NESTED was called on this vCPU (arch.doorbell_request would be 1)
+        *   b) Doorbell was not handled and L2 exited for some other reason (hr->dpdes would be 1)
+        */
+       vcpu->arch.doorbell_request = vcpu->arch.doorbell_request | hr->dpdes;
        vcpu->arch.hfscr = hr->hfscr;
        vcpu->arch.purr = hr->purr;
        vcpu->arch.spurr = hr->spurr;