--- /dev/null
+From c2fee09fc167c74a64adb08656cb993ea475197e Mon Sep 17 00:00:00 2001
+From: Sean Christopherson <seanjc@google.com>
+Date: Fri, 24 Jan 2025 17:18:33 -0800
+Subject: KVM: x86: Load DR6 with guest value only before entering .vcpu_run() loop
+
+From: Sean Christopherson <seanjc@google.com>
+
+commit c2fee09fc167c74a64adb08656cb993ea475197e upstream.
+
+Move the conditional loading of hardware DR6 with the guest's DR6 value
+out of the core .vcpu_run() loop to fix a bug where KVM can load hardware
+with a stale vcpu->arch.dr6.
+
+When the guest accesses a DR and host userspace isn't debugging the guest,
+KVM disables DR interception and loads the guest's values into hardware on
+VM-Enter and saves them on VM-Exit. This allows the guest to access DRs
+at will, e.g. so that a sequence of DR accesses to configure a breakpoint
+only generates one VM-Exit.
+
+For DR0-DR3, the logic/behavior is identical between VMX and SVM, and also
+identical between KVM_DEBUGREG_BP_ENABLED (userspace debugging the guest)
+and KVM_DEBUGREG_WONT_EXIT (guest using DRs), and so KVM handles loading
+DR0-DR3 in common code, _outside_ of the core kvm_x86_ops.vcpu_run() loop.
+
+But for DR6, the guest's value doesn't need to be loaded into hardware for
+KVM_DEBUGREG_BP_ENABLED, and SVM provides a dedicated VMCB field whereas
+VMX requires software to manually load the guest value, and so loading the
+guest's value into DR6 is handled by {svm,vmx}_vcpu_run(), i.e. is done
+_inside_ the core run loop.
+
+Unfortunately, saving the guest values on VM-Exit is initiated by common
+x86, again outside of the core run loop. If the guest modifies DR6 (in
+hardware, when DR interception is disabled), and then the next VM-Exit is
+a fastpath VM-Exit, KVM will reload hardware DR6 with vcpu->arch.dr6 and
+clobber the guest's actual value.
+
+The bug shows up primarily with nested VMX because KVM handles the VMX
+preemption timer in the fastpath, and the window between hardware DR6
+being modified (in guest context) and DR6 being read by guest software is
+orders of magnitude larger in a nested setup. E.g. in non-nested, the
+VMX preemption timer would need to fire precisely between #DB injection
+and the #DB handler's read of DR6, whereas with a KVM-on-KVM setup, the
+window where hardware DR6 is "dirty" extends all the way from L1 writing
+DR6 to VMRESUME (in L1).
+
+ L1's view:
+ ==========
+ <L1 disables DR interception>
+ CPU 0/KVM-7289 [023] d.... 2925.640961: kvm_entry: vcpu 0
+ A: L1 Writes DR6
+ CPU 0/KVM-7289 [023] d.... 2925.640963: <hack>: Set DRs, DR6 = 0xffff0ff1
+
+ B: CPU 0/KVM-7289 [023] d.... 2925.640967: kvm_exit: vcpu 0 reason EXTERNAL_INTERRUPT intr_info 0x800000ec
+
+ D: L1 reads DR6, arch.dr6 = 0
+ CPU 0/KVM-7289 [023] d.... 2925.640969: <hack>: Sync DRs, DR6 = 0xffff0ff0
+
+ CPU 0/KVM-7289 [023] d.... 2925.640976: kvm_entry: vcpu 0
+ L2 reads DR6, L1 disables DR interception
+ CPU 0/KVM-7289 [023] d.... 2925.640980: kvm_exit: vcpu 0 reason DR_ACCESS info1 0x0000000000000216
+ CPU 0/KVM-7289 [023] d.... 2925.640983: kvm_entry: vcpu 0
+
+ CPU 0/KVM-7289 [023] d.... 2925.640983: <hack>: Set DRs, DR6 = 0xffff0ff0
+
+ L2 detects failure
+ CPU 0/KVM-7289 [023] d.... 2925.640987: kvm_exit: vcpu 0 reason HLT
+ L1 reads DR6 (confirms failure)
+ CPU 0/KVM-7289 [023] d.... 2925.640990: <hack>: Sync DRs, DR6 = 0xffff0ff0
+
+ L0's view:
+ ==========
+ L2 reads DR6, arch.dr6 = 0
+ CPU 23/KVM-5046 [001] d.... 3410.005610: kvm_exit: vcpu 23 reason DR_ACCESS info1 0x0000000000000216
+ CPU 23/KVM-5046 [001] ..... 3410.005610: kvm_nested_vmexit: vcpu 23 reason DR_ACCESS info1 0x0000000000000216
+
+ L2 => L1 nested VM-Exit
+ CPU 23/KVM-5046 [001] ..... 3410.005610: kvm_nested_vmexit_inject: reason: DR_ACCESS ext_inf1: 0x0000000000000216
+
+ CPU 23/KVM-5046 [001] d.... 3410.005610: kvm_entry: vcpu 23
+ CPU 23/KVM-5046 [001] d.... 3410.005611: kvm_exit: vcpu 23 reason VMREAD
+ CPU 23/KVM-5046 [001] d.... 3410.005611: kvm_entry: vcpu 23
+ CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_exit: vcpu 23 reason VMREAD
+ CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_entry: vcpu 23
+
+ L1 writes DR7, L0 disables DR interception
+ CPU 23/KVM-5046 [001] d.... 3410.005612: kvm_exit: vcpu 23 reason DR_ACCESS info1 0x0000000000000007
+ CPU 23/KVM-5046 [001] d.... 3410.005613: kvm_entry: vcpu 23
+
+ L0 writes DR6 = 0 (arch.dr6)
+ CPU 23/KVM-5046 [001] d.... 3410.005613: <hack>: Set DRs, DR6 = 0xffff0ff0
+
+ A: <L1 writes DR6 = 1, no interception, arch.dr6 is still '0'>
+
+ B: CPU 23/KVM-5046 [001] d.... 3410.005614: kvm_exit: vcpu 23 reason PREEMPTION_TIMER
+ CPU 23/KVM-5046 [001] d.... 3410.005614: kvm_entry: vcpu 23
+
+ C: L0 writes DR6 = 0 (arch.dr6)
+ CPU 23/KVM-5046 [001] d.... 3410.005614: <hack>: Set DRs, DR6 = 0xffff0ff0
+
+ L1 => L2 nested VM-Enter
+ CPU 23/KVM-5046 [001] d.... 3410.005616: kvm_exit: vcpu 23 reason VMRESUME
+
+ L0 reads DR6, arch.dr6 = 0
+
+Reported-by: John Stultz <jstultz@google.com>
+Closes: https://lkml.kernel.org/r/CANDhNCq5_F3HfFYABqFGCA1bPd_%2BxgNj-iDQhH4tDk%2Bwi8iZZg%40mail.gmail.com
+Fixes: 375e28ffc0cf ("KVM: X86: Set host DR6 only on VMX and for KVM_DEBUGREG_WONT_EXIT")
+Fixes: d67668e9dd76 ("KVM: x86, SVM: isolate vcpu->arch.dr6 from vmcb->save.dr6")
+Cc: stable@vger.kernel.org
+Cc: Jim Mattson <jmattson@google.com>
+Tested-by: John Stultz <jstultz@google.com>
+Link: https://lore.kernel.org/r/20250125011833.3644371-1-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+[jth: Handled conflicts with kvm_x86_ops reshuffle]
+Signed-off-by: James Houghton <jthoughton@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm-x86-ops.h | 1 +
+ arch/x86/include/asm/kvm_host.h | 1 +
+ arch/x86/kvm/svm/svm.c | 13 ++++++-------
+ arch/x86/kvm/vmx/vmx.c | 11 +++++++----
+ arch/x86/kvm/x86.c | 3 +++
+ 5 files changed, 18 insertions(+), 11 deletions(-)
+
+--- a/arch/x86/include/asm/kvm-x86-ops.h
++++ b/arch/x86/include/asm/kvm-x86-ops.h
+@@ -47,6 +47,7 @@ KVM_X86_OP(set_idt)
+ KVM_X86_OP(get_gdt)
+ KVM_X86_OP(set_gdt)
+ KVM_X86_OP(sync_dirty_debug_regs)
++KVM_X86_OP(set_dr6)
+ KVM_X86_OP(set_dr7)
+ KVM_X86_OP(cache_reg)
+ KVM_X86_OP(get_rflags)
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1499,6 +1499,7 @@ struct kvm_x86_ops {
+ void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+ void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+ void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
++ void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
+ void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
+ void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
+ unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -1920,11 +1920,11 @@ static void new_asid(struct vcpu_svm *sv
+ svm->asid = sd->next_asid++;
+ }
+
+-static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
++static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
+ {
+- struct vmcb *vmcb = svm->vmcb;
++ struct vmcb *vmcb = to_svm(vcpu)->vmcb;
+
+- if (svm->vcpu.arch.guest_state_protected)
++ if (vcpu->arch.guest_state_protected)
+ return;
+
+ if (unlikely(value != vmcb->save.dr6)) {
+@@ -4035,10 +4035,8 @@ static __no_kcsan fastpath_t svm_vcpu_ru
+ * Run with all-zero DR6 unless needed, so that we can get the exact cause
+ * of a #DB.
+ */
+- if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
+- svm_set_dr6(svm, vcpu->arch.dr6);
+- else
+- svm_set_dr6(svm, DR6_ACTIVE_LOW);
++ if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)))
++ svm_set_dr6(vcpu, DR6_ACTIVE_LOW);
+
+ clgi();
+ kvm_load_guest_xsave_state(vcpu);
+@@ -4807,6 +4805,7 @@ static struct kvm_x86_ops svm_x86_ops __
+ .set_idt = svm_set_idt,
+ .get_gdt = svm_get_gdt,
+ .set_gdt = svm_set_gdt,
++ .set_dr6 = svm_set_dr6,
+ .set_dr7 = svm_set_dr7,
+ .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
+ .cache_reg = svm_cache_reg,
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -5536,6 +5536,12 @@ static void vmx_sync_dirty_debug_regs(st
+ set_debugreg(DR6_RESERVED, 6);
+ }
+
++static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
++{
++ lockdep_assert_irqs_disabled();
++ set_debugreg(vcpu->arch.dr6, 6);
++}
++
+ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
+ {
+ vmcs_writel(GUEST_DR7, val);
+@@ -7220,10 +7226,6 @@ static fastpath_t vmx_vcpu_run(struct kv
+ vmx->loaded_vmcs->host_state.cr4 = cr4;
+ }
+
+- /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
+- if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
+- set_debugreg(vcpu->arch.dr6, 6);
+-
+ /* When single-stepping over STI and MOV SS, we must clear the
+ * corresponding interruptibility bits in the guest state. Otherwise
+ * vmentry fails as it then expects bit 14 (BS) in pending debug
+@@ -8168,6 +8170,7 @@ static struct kvm_x86_ops vmx_x86_ops __
+ .set_idt = vmx_set_idt,
+ .get_gdt = vmx_get_gdt,
+ .set_gdt = vmx_set_gdt,
++ .set_dr6 = vmx_set_dr6,
+ .set_dr7 = vmx_set_dr7,
+ .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
+ .cache_reg = vmx_cache_reg,
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -10841,6 +10841,9 @@ static int vcpu_enter_guest(struct kvm_v
+ set_debugreg(vcpu->arch.eff_db[1], 1);
+ set_debugreg(vcpu->arch.eff_db[2], 2);
+ set_debugreg(vcpu->arch.eff_db[3], 3);
++ /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
++ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
++ static_call(kvm_x86_set_dr6)(vcpu, vcpu->arch.dr6);
+ } else if (unlikely(hw_breakpoint_active())) {
+ set_debugreg(0, 7);
+ }