From: Sean Christopherson Date: Thu, 9 Apr 2026 22:42:30 +0000 (-0700) Subject: KVM: x86: Add dedicated storage for guest RIP X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=3b4ec7dcdf89d7a88d0d0ca2ca723955b1586eb3;p=thirdparty%2Fkernel%2Flinux.git KVM: x86: Add dedicated storage for guest RIP Add kvm_vcpu_arch.rip to track guest RIP instead of including it in the generic regs[] array. Decoupling RIP from regs[] will allow using a *completely* arbitrary index for RIP, as opposed to the mostly-arbitrary index that is currently used. That in turn will allow using indices 16-31 to track R16-R31 that are coming with APX. Note, although RIP can used for addressing, it does NOT have an architecturally defined index, and so can't be reached via flows like get_vmx_mem_address() where KVM "blindly" reads a general purpose register given the SIB information reported by hardware. For RIP-relative addressing, hardware reports the full "offset" in vmcs.EXIT_QUALIFICATION. Note #2, keep the available/dirty tracking as RSP is context switched through the VMCS, i.e. needs to be cached for VMX. Opportunistically rename NR_VCPU_REGS to NR_VCPU_GENERAL_PURPOSE_REGS to better capture what it tracks, and so that KVM can slot in R16-R13 without running into weirdness where KVM's definition of "EXREG" doesn't line up with APX's definition of "extended reg". No functional change intended. Cc: Chang S. Bae Signed-off-by: Sean Christopherson Reviewed-by: Chang S. Bae Reviewed-by: Kai Huang Tested-by: Kai Huang Message-ID: <20260409224236.2021562-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c470e40a00aa4..68a11325e8bc8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -191,10 +191,11 @@ enum kvm_reg { VCPU_REGS_R14 = __VCPU_REGS_R14, VCPU_REGS_R15 = __VCPU_REGS_R15, #endif - VCPU_REGS_RIP, - NR_VCPU_REGS, + NR_VCPU_GENERAL_PURPOSE_REGS, - VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_REG_RIP = NR_VCPU_GENERAL_PURPOSE_REGS, + + VCPU_EXREG_PDPTR, VCPU_EXREG_CR0, /* * Alias AMD's ERAPS (not a real register) to CR3 so that common code @@ -799,7 +800,8 @@ struct kvm_vcpu_arch { * rip and regs accesses must go through * kvm_{register,rip}_{read,write} functions. */ - unsigned long regs[NR_VCPU_REGS]; + unsigned long regs[NR_VCPU_GENERAL_PURPOSE_REGS]; + unsigned long rip; u32 regs_avail; u32 regs_dirty; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 8ddb01191d6f6..9b7df9de0e879 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -112,7 +112,7 @@ static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu */ static inline unsigned long kvm_register_read_raw(struct kvm_vcpu *vcpu, int reg) { - if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) + if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_GENERAL_PURPOSE_REGS)) return 0; if (!kvm_register_is_available(vcpu, reg)) @@ -124,7 +124,7 @@ static inline unsigned long kvm_register_read_raw(struct kvm_vcpu *vcpu, int reg static inline void kvm_register_write_raw(struct kvm_vcpu *vcpu, int reg, unsigned long val) { - if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) + if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_GENERAL_PURPOSE_REGS)) return; vcpu->arch.regs[reg] = val; @@ -133,12 +133,16 @@ static inline void kvm_register_write_raw(struct kvm_vcpu *vcpu, int reg, static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) { - return kvm_register_read_raw(vcpu, VCPU_REGS_RIP); + if (!kvm_register_is_available(vcpu, VCPU_REG_RIP)) + kvm_x86_call(cache_reg)(vcpu, VCPU_REG_RIP); + + return vcpu->arch.rip; } static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) { - kvm_register_write_raw(vcpu, VCPU_REGS_RIP, val); + vcpu->arch.rip = val; + kvm_register_mark_dirty(vcpu, VCPU_REG_RIP); } static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c2126b3c30724..940b97d4a8523 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -967,7 +967,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14]; save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15]; #endif - save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP]; + save->rip = svm->vcpu.arch.rip; /* Sync some non-GPR registers before encrypting */ save->xcr0 = svm->vcpu.arch.xcr0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e7fdd7a9c280d..85edaee27b03e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4420,7 +4420,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; - svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; + svm->vmcb->save.rip = vcpu->arch.rip; /* * Disable singlestep if we're injecting an interrupt/exception. @@ -4506,7 +4506,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) vcpu->arch.cr2 = svm->vmcb->save.cr2; vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; - vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + vcpu->arch.rip = svm->vmcb->save.rip; } vcpu->arch.regs_dirty = 0; @@ -4946,7 +4946,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; - svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; + svm->vmcb->save.rip = vcpu->arch.rip; nested_svm_simple_vmexit(svm, SVM_EXIT_SW); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index a29896a9ef145..577b0c6286ad4 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2604,8 +2604,8 @@ void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) case VCPU_REGS_RSP: vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); break; - case VCPU_REGS_RIP: - vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); + case VCPU_REG_RIP: + vcpu->arch.rip = vmcs_readl(GUEST_RIP); break; case VCPU_EXREG_PDPTR: if (enable_ept) @@ -7536,8 +7536,8 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags) if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP)) vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP)) - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); + if (kvm_register_is_dirty(vcpu, VCPU_REG_RIP)) + vmcs_writel(GUEST_RIP, vcpu->arch.rip); vcpu->arch.regs_dirty = 0; if (run_flags & KVM_RUN_LOAD_GUEST_DR6) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index db84e8001da58..d0cc5f6c6879d 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -620,7 +620,7 @@ BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64) * cache on demand. Other registers not listed here are synced to * the cache immediately after VM-Exit. */ -#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REGS_RIP) | \ +#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REG_RIP) | \ (1 << VCPU_REGS_RSP) | \ (1 << VCPU_EXREG_RFLAGS) | \ (1 << VCPU_EXREG_PDPTR) | \