1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine driver for Linux
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
11 * Yaniv Kamay <yaniv@qumranet.com>
12 * Avi Kivity <avi@qumranet.com>
15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
17 #include <linux/kvm_types.h>
18 #include <linux/kvm_host.h>
19 #include <linux/kernel.h>
21 #include <asm/msr-index.h>
22 #include <asm/debugreg.h>
24 #include "kvm_emulate.h"
34 #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
36 static void nested_svm_inject_npf_exit(struct kvm_vcpu
*vcpu
,
37 struct x86_exception
*fault
)
39 struct vcpu_svm
*svm
= to_svm(vcpu
);
40 struct vmcb
*vmcb
= svm
->vmcb
;
42 if (vmcb
->control
.exit_code
!= SVM_EXIT_NPF
) {
44 * TODO: track the cause of the nested page fault, and
45 * correctly fill in the high bits of exit_info_1.
47 vmcb
->control
.exit_code
= SVM_EXIT_NPF
;
48 vmcb
->control
.exit_code_hi
= 0;
49 vmcb
->control
.exit_info_1
= (1ULL << 32);
50 vmcb
->control
.exit_info_2
= fault
->address
;
53 vmcb
->control
.exit_info_1
&= ~0xffffffffULL
;
54 vmcb
->control
.exit_info_1
|= fault
->error_code
;
56 nested_svm_vmexit(svm
);
59 static u64
nested_svm_get_tdp_pdptr(struct kvm_vcpu
*vcpu
, int index
)
61 struct vcpu_svm
*svm
= to_svm(vcpu
);
62 u64 cr3
= svm
->nested
.ctl
.nested_cr3
;
66 ret
= kvm_vcpu_read_guest_page(vcpu
, gpa_to_gfn(cr3
), &pdpte
,
67 offset_in_page(cr3
) + index
* 8, 8);
73 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu
*vcpu
)
75 struct vcpu_svm
*svm
= to_svm(vcpu
);
77 return svm
->nested
.ctl
.nested_cr3
;
80 static void nested_svm_init_mmu_context(struct kvm_vcpu
*vcpu
)
82 struct vcpu_svm
*svm
= to_svm(vcpu
);
84 WARN_ON(mmu_is_nested(vcpu
));
86 vcpu
->arch
.mmu
= &vcpu
->arch
.guest_mmu
;
89 * The NPT format depends on L1's CR4 and EFER, which is in vmcb01. Note,
90 * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
91 * vCPU state. CR0.WP is explicitly ignored, while CR0.PG is required.
93 kvm_init_shadow_npt_mmu(vcpu
, X86_CR0_PG
, svm
->vmcb01
.ptr
->save
.cr4
,
94 svm
->vmcb01
.ptr
->save
.efer
,
95 svm
->nested
.ctl
.nested_cr3
);
96 vcpu
->arch
.mmu
->get_guest_pgd
= nested_svm_get_tdp_cr3
;
97 vcpu
->arch
.mmu
->get_pdptr
= nested_svm_get_tdp_pdptr
;
98 vcpu
->arch
.mmu
->inject_page_fault
= nested_svm_inject_npf_exit
;
99 vcpu
->arch
.walk_mmu
= &vcpu
->arch
.nested_mmu
;
102 static void nested_svm_uninit_mmu_context(struct kvm_vcpu
*vcpu
)
104 vcpu
->arch
.mmu
= &vcpu
->arch
.root_mmu
;
105 vcpu
->arch
.walk_mmu
= &vcpu
->arch
.root_mmu
;
108 static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm
*svm
)
110 if (!guest_can_use(&svm
->vcpu
, X86_FEATURE_V_VMSAVE_VMLOAD
))
113 if (!nested_npt_enabled(svm
))
116 if (!(svm
->nested
.ctl
.virt_ext
& VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK
))
122 void recalc_intercepts(struct vcpu_svm
*svm
)
124 struct vmcb_control_area
*c
, *h
;
125 struct vmcb_ctrl_area_cached
*g
;
128 vmcb_mark_dirty(svm
->vmcb
, VMCB_INTERCEPTS
);
130 if (!is_guest_mode(&svm
->vcpu
))
133 c
= &svm
->vmcb
->control
;
134 h
= &svm
->vmcb01
.ptr
->control
;
135 g
= &svm
->nested
.ctl
;
137 for (i
= 0; i
< MAX_INTERCEPT
; i
++)
138 c
->intercepts
[i
] = h
->intercepts
[i
];
140 if (g
->int_ctl
& V_INTR_MASKING_MASK
) {
142 * If L2 is active and V_INTR_MASKING is enabled in vmcb12,
143 * disable intercept of CR8 writes as L2's CR8 does not affect
144 * any interrupt KVM may want to inject.
146 * Similarly, disable intercept of virtual interrupts (used to
147 * detect interrupt windows) if the saved RFLAGS.IF is '0', as
148 * the effective RFLAGS.IF for L1 interrupts will never be set
149 * while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
151 vmcb_clr_intercept(c
, INTERCEPT_CR8_WRITE
);
152 if (!(svm
->vmcb01
.ptr
->save
.rflags
& X86_EFLAGS_IF
))
153 vmcb_clr_intercept(c
, INTERCEPT_VINTR
);
157 * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
158 * flush feature is enabled.
160 if (!nested_svm_l2_tlb_flush_enabled(&svm
->vcpu
))
161 vmcb_clr_intercept(c
, INTERCEPT_VMMCALL
);
163 for (i
= 0; i
< MAX_INTERCEPT
; i
++)
164 c
->intercepts
[i
] |= g
->intercepts
[i
];
166 /* If SMI is not intercepted, ignore guest SMI intercept as well */
168 vmcb_clr_intercept(c
, INTERCEPT_SMI
);
170 if (nested_vmcb_needs_vls_intercept(svm
)) {
172 * If the virtual VMLOAD/VMSAVE is not enabled for the L2,
173 * we must intercept these instructions to correctly
174 * emulate them in case L1 doesn't intercept them.
176 vmcb_set_intercept(c
, INTERCEPT_VMLOAD
);
177 vmcb_set_intercept(c
, INTERCEPT_VMSAVE
);
179 WARN_ON(!(c
->virt_ext
& VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK
));
184 * Merge L0's (KVM) and L1's (Nested VMCB) MSR permission bitmaps. The function
185 * is optimized in that it only merges the parts where KVM MSR permission bitmap
186 * may contain zero bits.
188 static bool nested_svm_vmrun_msrpm(struct vcpu_svm
*svm
)
193 * MSR bitmap update can be skipped when:
194 * - MSR bitmap for L1 hasn't changed.
195 * - Nested hypervisor (L1) is attempting to launch the same L2 as
197 * - Nested hypervisor (L1) is using Hyper-V emulation interface and
198 * tells KVM (L0) there were no changes in MSR bitmap for L2.
200 #ifdef CONFIG_KVM_HYPERV
201 if (!svm
->nested
.force_msr_bitmap_recalc
) {
202 struct hv_vmcb_enlightenments
*hve
= &svm
->nested
.ctl
.hv_enlightenments
;
204 if (kvm_hv_hypercall_enabled(&svm
->vcpu
) &&
205 hve
->hv_enlightenments_control
.msr_bitmap
&&
206 (svm
->nested
.ctl
.clean
& BIT(HV_VMCB_NESTED_ENLIGHTENMENTS
)))
207 goto set_msrpm_base_pa
;
211 if (!(vmcb12_is_intercept(&svm
->nested
.ctl
, INTERCEPT_MSR_PROT
)))
214 for (i
= 0; i
< MSRPM_OFFSETS
; i
++) {
218 if (msrpm_offsets
[i
] == 0xffffffff)
221 p
= msrpm_offsets
[i
];
223 /* x2apic msrs are intercepted always for the nested guest */
224 if (is_x2apic_msrpm_offset(p
))
227 offset
= svm
->nested
.ctl
.msrpm_base_pa
+ (p
* 4);
229 if (kvm_vcpu_read_guest(&svm
->vcpu
, offset
, &value
, 4))
232 svm
->nested
.msrpm
[p
] = svm
->msrpm
[p
] | value
;
235 svm
->nested
.force_msr_bitmap_recalc
= false;
237 #ifdef CONFIG_KVM_HYPERV
240 svm
->vmcb
->control
.msrpm_base_pa
= __sme_set(__pa(svm
->nested
.msrpm
));
246 * Bits 11:0 of bitmap address are ignored by hardware
248 static bool nested_svm_check_bitmap_pa(struct kvm_vcpu
*vcpu
, u64 pa
, u32 size
)
250 u64 addr
= PAGE_ALIGN(pa
);
252 return kvm_vcpu_is_legal_gpa(vcpu
, addr
) &&
253 kvm_vcpu_is_legal_gpa(vcpu
, addr
+ size
- 1);
256 static bool nested_svm_check_tlb_ctl(struct kvm_vcpu
*vcpu
, u8 tlb_ctl
)
258 /* Nested FLUSHBYASID is not supported yet. */
260 case TLB_CONTROL_DO_NOTHING
:
261 case TLB_CONTROL_FLUSH_ALL_ASID
:
268 static bool __nested_vmcb_check_controls(struct kvm_vcpu
*vcpu
,
269 struct vmcb_ctrl_area_cached
*control
)
271 if (CC(!vmcb12_is_intercept(control
, INTERCEPT_VMRUN
)))
274 if (CC(control
->asid
== 0))
277 if (CC((control
->nested_ctl
& SVM_NESTED_CTL_NP_ENABLE
) && !npt_enabled
))
280 if (CC(!nested_svm_check_bitmap_pa(vcpu
, control
->msrpm_base_pa
,
283 if (CC(!nested_svm_check_bitmap_pa(vcpu
, control
->iopm_base_pa
,
287 if (CC(!nested_svm_check_tlb_ctl(vcpu
, control
->tlb_ctl
)))
290 if (CC((control
->int_ctl
& V_NMI_ENABLE_MASK
) &&
291 !vmcb12_is_intercept(control
, INTERCEPT_NMI
))) {
298 /* Common checks that apply to both L1 and L2 state. */
299 static bool __nested_vmcb_check_save(struct kvm_vcpu
*vcpu
,
300 struct vmcb_save_area_cached
*save
)
302 if (CC(!(save
->efer
& EFER_SVME
)))
305 if (CC((save
->cr0
& X86_CR0_CD
) == 0 && (save
->cr0
& X86_CR0_NW
)) ||
306 CC(save
->cr0
& ~0xffffffffULL
))
309 if (CC(!kvm_dr6_valid(save
->dr6
)) || CC(!kvm_dr7_valid(save
->dr7
)))
313 * These checks are also performed by KVM_SET_SREGS,
314 * except that EFER.LMA is not checked by SVM against
315 * CR0.PG && EFER.LME.
317 if ((save
->efer
& EFER_LME
) && (save
->cr0
& X86_CR0_PG
)) {
318 if (CC(!(save
->cr4
& X86_CR4_PAE
)) ||
319 CC(!(save
->cr0
& X86_CR0_PE
)) ||
320 CC(!kvm_vcpu_is_legal_cr3(vcpu
, save
->cr3
)))
324 /* Note, SVM doesn't have any additional restrictions on CR4. */
325 if (CC(!__kvm_is_valid_cr4(vcpu
, save
->cr4
)))
328 if (CC(!kvm_valid_efer(vcpu
, save
->efer
)))
334 static bool nested_vmcb_check_save(struct kvm_vcpu
*vcpu
)
336 struct vcpu_svm
*svm
= to_svm(vcpu
);
337 struct vmcb_save_area_cached
*save
= &svm
->nested
.save
;
339 return __nested_vmcb_check_save(vcpu
, save
);
342 static bool nested_vmcb_check_controls(struct kvm_vcpu
*vcpu
)
344 struct vcpu_svm
*svm
= to_svm(vcpu
);
345 struct vmcb_ctrl_area_cached
*ctl
= &svm
->nested
.ctl
;
347 return __nested_vmcb_check_controls(vcpu
, ctl
);
351 void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu
*vcpu
,
352 struct vmcb_ctrl_area_cached
*to
,
353 struct vmcb_control_area
*from
)
357 for (i
= 0; i
< MAX_INTERCEPT
; i
++)
358 to
->intercepts
[i
] = from
->intercepts
[i
];
360 to
->iopm_base_pa
= from
->iopm_base_pa
;
361 to
->msrpm_base_pa
= from
->msrpm_base_pa
;
362 to
->tsc_offset
= from
->tsc_offset
;
363 to
->tlb_ctl
= from
->tlb_ctl
;
364 to
->int_ctl
= from
->int_ctl
;
365 to
->int_vector
= from
->int_vector
;
366 to
->int_state
= from
->int_state
;
367 to
->exit_code
= from
->exit_code
;
368 to
->exit_code_hi
= from
->exit_code_hi
;
369 to
->exit_info_1
= from
->exit_info_1
;
370 to
->exit_info_2
= from
->exit_info_2
;
371 to
->exit_int_info
= from
->exit_int_info
;
372 to
->exit_int_info_err
= from
->exit_int_info_err
;
373 to
->nested_ctl
= from
->nested_ctl
;
374 to
->event_inj
= from
->event_inj
;
375 to
->event_inj_err
= from
->event_inj_err
;
376 to
->next_rip
= from
->next_rip
;
377 to
->nested_cr3
= from
->nested_cr3
;
378 to
->virt_ext
= from
->virt_ext
;
379 to
->pause_filter_count
= from
->pause_filter_count
;
380 to
->pause_filter_thresh
= from
->pause_filter_thresh
;
382 /* Copy asid here because nested_vmcb_check_controls will check it. */
383 to
->asid
= from
->asid
;
384 to
->msrpm_base_pa
&= ~0x0fffULL
;
385 to
->iopm_base_pa
&= ~0x0fffULL
;
387 #ifdef CONFIG_KVM_HYPERV
388 /* Hyper-V extensions (Enlightened VMCB) */
389 if (kvm_hv_hypercall_enabled(vcpu
)) {
390 to
->clean
= from
->clean
;
391 memcpy(&to
->hv_enlightenments
, &from
->hv_enlightenments
,
392 sizeof(to
->hv_enlightenments
));
397 void nested_copy_vmcb_control_to_cache(struct vcpu_svm
*svm
,
398 struct vmcb_control_area
*control
)
400 __nested_copy_vmcb_control_to_cache(&svm
->vcpu
, &svm
->nested
.ctl
, control
);
403 static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached
*to
,
404 struct vmcb_save_area
*from
)
407 * Copy only fields that are validated, as we need them
408 * to avoid TOC/TOU races.
410 to
->efer
= from
->efer
;
419 void nested_copy_vmcb_save_to_cache(struct vcpu_svm
*svm
,
420 struct vmcb_save_area
*save
)
422 __nested_copy_vmcb_save_to_cache(&svm
->nested
.save
, save
);
426 * Synchronize fields that are written by the processor, so that
427 * they can be copied back into the vmcb12.
429 void nested_sync_control_from_vmcb02(struct vcpu_svm
*svm
)
432 svm
->nested
.ctl
.event_inj
= svm
->vmcb
->control
.event_inj
;
433 svm
->nested
.ctl
.event_inj_err
= svm
->vmcb
->control
.event_inj_err
;
435 /* Only a few fields of int_ctl are written by the processor. */
436 mask
= V_IRQ_MASK
| V_TPR_MASK
;
438 * Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting
439 * virtual interrupts in order to request an interrupt window, as KVM
440 * has usurped vmcb02's int_ctl. If an interrupt window opens before
441 * the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl.
442 * If no window opens, V_IRQ will be correctly preserved in vmcb12's
443 * int_ctl (because it was never recognized while L2 was running).
445 if (svm_is_intercept(svm
, INTERCEPT_VINTR
) &&
446 !test_bit(INTERCEPT_VINTR
, (unsigned long *)svm
->nested
.ctl
.intercepts
))
449 if (nested_vgif_enabled(svm
))
452 if (nested_vnmi_enabled(svm
))
453 mask
|= V_NMI_BLOCKING_MASK
| V_NMI_PENDING_MASK
;
455 svm
->nested
.ctl
.int_ctl
&= ~mask
;
456 svm
->nested
.ctl
.int_ctl
|= svm
->vmcb
->control
.int_ctl
& mask
;
460 * Transfer any event that L0 or L1 wanted to inject into L2 to
463 static void nested_save_pending_event_to_vmcb12(struct vcpu_svm
*svm
,
466 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
467 u32 exit_int_info
= 0;
470 if (vcpu
->arch
.exception
.injected
) {
471 nr
= vcpu
->arch
.exception
.vector
;
472 exit_int_info
= nr
| SVM_EVTINJ_VALID
| SVM_EVTINJ_TYPE_EXEPT
;
474 if (vcpu
->arch
.exception
.has_error_code
) {
475 exit_int_info
|= SVM_EVTINJ_VALID_ERR
;
476 vmcb12
->control
.exit_int_info_err
=
477 vcpu
->arch
.exception
.error_code
;
480 } else if (vcpu
->arch
.nmi_injected
) {
481 exit_int_info
= SVM_EVTINJ_VALID
| SVM_EVTINJ_TYPE_NMI
;
483 } else if (vcpu
->arch
.interrupt
.injected
) {
484 nr
= vcpu
->arch
.interrupt
.nr
;
485 exit_int_info
= nr
| SVM_EVTINJ_VALID
;
487 if (vcpu
->arch
.interrupt
.soft
)
488 exit_int_info
|= SVM_EVTINJ_TYPE_SOFT
;
490 exit_int_info
|= SVM_EVTINJ_TYPE_INTR
;
493 vmcb12
->control
.exit_int_info
= exit_int_info
;
496 static void nested_svm_transition_tlb_flush(struct kvm_vcpu
*vcpu
)
498 /* Handle pending Hyper-V TLB flush requests */
499 kvm_hv_nested_transtion_tlb_flush(vcpu
, npt_enabled
);
502 * TODO: optimize unconditional TLB flush/MMU sync. A partial list of
503 * things to fix before this can be conditional:
505 * - Flush TLBs for both L1 and L2 remote TLB flush
506 * - Honor L1's request to flush an ASID on nested VMRUN
507 * - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
508 * - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
509 * - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
511 * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
512 * NPT guest-physical mappings on VMRUN.
514 kvm_make_request(KVM_REQ_MMU_SYNC
, vcpu
);
515 kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT
, vcpu
);
519 * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
520 * if we are emulating VM-Entry into a guest with NPT enabled.
522 static int nested_svm_load_cr3(struct kvm_vcpu
*vcpu
, unsigned long cr3
,
523 bool nested_npt
, bool reload_pdptrs
)
525 if (CC(!kvm_vcpu_is_legal_cr3(vcpu
, cr3
)))
528 if (reload_pdptrs
&& !nested_npt
&& is_pae_paging(vcpu
) &&
529 CC(!load_pdptrs(vcpu
, cr3
)))
532 vcpu
->arch
.cr3
= cr3
;
534 /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
538 kvm_mmu_new_pgd(vcpu
, cr3
);
543 void nested_vmcb02_compute_g_pat(struct vcpu_svm
*svm
)
545 if (!svm
->nested
.vmcb02
.ptr
)
548 /* FIXME: merge g_pat from vmcb01 and vmcb12. */
549 svm
->nested
.vmcb02
.ptr
->save
.g_pat
= svm
->vmcb01
.ptr
->save
.g_pat
;
552 static void nested_vmcb02_prepare_save(struct vcpu_svm
*svm
, struct vmcb
*vmcb12
)
554 bool new_vmcb12
= false;
555 struct vmcb
*vmcb01
= svm
->vmcb01
.ptr
;
556 struct vmcb
*vmcb02
= svm
->nested
.vmcb02
.ptr
;
557 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
559 nested_vmcb02_compute_g_pat(svm
);
561 /* Load the nested guest state */
562 if (svm
->nested
.vmcb12_gpa
!= svm
->nested
.last_vmcb12_gpa
) {
564 svm
->nested
.last_vmcb12_gpa
= svm
->nested
.vmcb12_gpa
;
565 svm
->nested
.force_msr_bitmap_recalc
= true;
568 if (unlikely(new_vmcb12
|| vmcb_is_dirty(vmcb12
, VMCB_SEG
))) {
569 vmcb02
->save
.es
= vmcb12
->save
.es
;
570 vmcb02
->save
.cs
= vmcb12
->save
.cs
;
571 vmcb02
->save
.ss
= vmcb12
->save
.ss
;
572 vmcb02
->save
.ds
= vmcb12
->save
.ds
;
573 vmcb02
->save
.cpl
= vmcb12
->save
.cpl
;
574 vmcb_mark_dirty(vmcb02
, VMCB_SEG
);
577 if (unlikely(new_vmcb12
|| vmcb_is_dirty(vmcb12
, VMCB_DT
))) {
578 vmcb02
->save
.gdtr
= vmcb12
->save
.gdtr
;
579 vmcb02
->save
.idtr
= vmcb12
->save
.idtr
;
580 vmcb_mark_dirty(vmcb02
, VMCB_DT
);
583 kvm_set_rflags(vcpu
, vmcb12
->save
.rflags
| X86_EFLAGS_FIXED
);
585 svm_set_efer(vcpu
, svm
->nested
.save
.efer
);
587 svm_set_cr0(vcpu
, svm
->nested
.save
.cr0
);
588 svm_set_cr4(vcpu
, svm
->nested
.save
.cr4
);
590 svm
->vcpu
.arch
.cr2
= vmcb12
->save
.cr2
;
592 kvm_rax_write(vcpu
, vmcb12
->save
.rax
);
593 kvm_rsp_write(vcpu
, vmcb12
->save
.rsp
);
594 kvm_rip_write(vcpu
, vmcb12
->save
.rip
);
596 /* In case we don't even reach vcpu_run, the fields are not updated */
597 vmcb02
->save
.rax
= vmcb12
->save
.rax
;
598 vmcb02
->save
.rsp
= vmcb12
->save
.rsp
;
599 vmcb02
->save
.rip
= vmcb12
->save
.rip
;
601 /* These bits will be set properly on the first execution when new_vmc12 is true */
602 if (unlikely(new_vmcb12
|| vmcb_is_dirty(vmcb12
, VMCB_DR
))) {
603 vmcb02
->save
.dr7
= svm
->nested
.save
.dr7
| DR7_FIXED_1
;
604 svm
->vcpu
.arch
.dr6
= svm
->nested
.save
.dr6
| DR6_ACTIVE_LOW
;
605 vmcb_mark_dirty(vmcb02
, VMCB_DR
);
608 if (unlikely(guest_can_use(vcpu
, X86_FEATURE_LBRV
) &&
609 (svm
->nested
.ctl
.virt_ext
& LBR_CTL_ENABLE_MASK
))) {
611 * Reserved bits of DEBUGCTL are ignored. Be consistent with
612 * svm_set_msr's definition of reserved bits.
614 svm_copy_lbrs(vmcb02
, vmcb12
);
615 vmcb02
->save
.dbgctl
&= ~DEBUGCTL_RESERVED_BITS
;
616 svm_update_lbrv(&svm
->vcpu
);
618 } else if (unlikely(vmcb01
->control
.virt_ext
& LBR_CTL_ENABLE_MASK
)) {
619 svm_copy_lbrs(vmcb02
, vmcb01
);
623 static inline bool is_evtinj_soft(u32 evtinj
)
625 u32 type
= evtinj
& SVM_EVTINJ_TYPE_MASK
;
626 u8 vector
= evtinj
& SVM_EVTINJ_VEC_MASK
;
628 if (!(evtinj
& SVM_EVTINJ_VALID
))
631 if (type
== SVM_EVTINJ_TYPE_SOFT
)
634 return type
== SVM_EVTINJ_TYPE_EXEPT
&& kvm_exception_is_soft(vector
);
637 static bool is_evtinj_nmi(u32 evtinj
)
639 u32 type
= evtinj
& SVM_EVTINJ_TYPE_MASK
;
641 if (!(evtinj
& SVM_EVTINJ_VALID
))
644 return type
== SVM_EVTINJ_TYPE_NMI
;
647 static void nested_vmcb02_prepare_control(struct vcpu_svm
*svm
,
648 unsigned long vmcb12_rip
,
649 unsigned long vmcb12_csbase
)
651 u32 int_ctl_vmcb01_bits
= V_INTR_MASKING_MASK
;
652 u32 int_ctl_vmcb12_bits
= V_TPR_MASK
| V_IRQ_INJECTION_BITS_MASK
;
654 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
655 struct vmcb
*vmcb01
= svm
->vmcb01
.ptr
;
656 struct vmcb
*vmcb02
= svm
->nested
.vmcb02
.ptr
;
661 * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
662 * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
665 if (guest_can_use(vcpu
, X86_FEATURE_VGIF
) &&
666 (svm
->nested
.ctl
.int_ctl
& V_GIF_ENABLE_MASK
))
667 int_ctl_vmcb12_bits
|= (V_GIF_MASK
| V_GIF_ENABLE_MASK
);
669 int_ctl_vmcb01_bits
|= (V_GIF_MASK
| V_GIF_ENABLE_MASK
);
672 if (vmcb01
->control
.int_ctl
& V_NMI_PENDING_MASK
) {
673 svm
->vcpu
.arch
.nmi_pending
++;
674 kvm_make_request(KVM_REQ_EVENT
, &svm
->vcpu
);
676 if (nested_vnmi_enabled(svm
))
677 int_ctl_vmcb12_bits
|= (V_NMI_PENDING_MASK
|
679 V_NMI_BLOCKING_MASK
);
682 /* Copied from vmcb01. msrpm_base can be overwritten later. */
683 vmcb02
->control
.nested_ctl
= vmcb01
->control
.nested_ctl
;
684 vmcb02
->control
.iopm_base_pa
= vmcb01
->control
.iopm_base_pa
;
685 vmcb02
->control
.msrpm_base_pa
= vmcb01
->control
.msrpm_base_pa
;
687 /* Done at vmrun: asid. */
689 /* Also overwritten later if necessary. */
690 vmcb02
->control
.tlb_ctl
= TLB_CONTROL_DO_NOTHING
;
693 if (nested_npt_enabled(svm
))
694 nested_svm_init_mmu_context(vcpu
);
696 vcpu
->arch
.tsc_offset
= kvm_calc_nested_tsc_offset(
697 vcpu
->arch
.l1_tsc_offset
,
698 svm
->nested
.ctl
.tsc_offset
,
701 vmcb02
->control
.tsc_offset
= vcpu
->arch
.tsc_offset
;
703 if (guest_can_use(vcpu
, X86_FEATURE_TSCRATEMSR
) &&
704 svm
->tsc_ratio_msr
!= kvm_caps
.default_tsc_scaling_ratio
)
705 nested_svm_update_tsc_ratio_msr(vcpu
);
707 vmcb02
->control
.int_ctl
=
708 (svm
->nested
.ctl
.int_ctl
& int_ctl_vmcb12_bits
) |
709 (vmcb01
->control
.int_ctl
& int_ctl_vmcb01_bits
);
711 vmcb02
->control
.int_vector
= svm
->nested
.ctl
.int_vector
;
712 vmcb02
->control
.int_state
= svm
->nested
.ctl
.int_state
;
713 vmcb02
->control
.event_inj
= svm
->nested
.ctl
.event_inj
;
714 vmcb02
->control
.event_inj_err
= svm
->nested
.ctl
.event_inj_err
;
717 * next_rip is consumed on VMRUN as the return address pushed on the
718 * stack for injected soft exceptions/interrupts. If nrips is exposed
719 * to L1, take it verbatim from vmcb12. If nrips is supported in
720 * hardware but not exposed to L1, stuff the actual L2 RIP to emulate
721 * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
722 * prior to injecting the event).
724 if (guest_can_use(vcpu
, X86_FEATURE_NRIPS
))
725 vmcb02
->control
.next_rip
= svm
->nested
.ctl
.next_rip
;
726 else if (boot_cpu_has(X86_FEATURE_NRIPS
))
727 vmcb02
->control
.next_rip
= vmcb12_rip
;
729 svm
->nmi_l1_to_l2
= is_evtinj_nmi(vmcb02
->control
.event_inj
);
730 if (is_evtinj_soft(vmcb02
->control
.event_inj
)) {
731 svm
->soft_int_injected
= true;
732 svm
->soft_int_csbase
= vmcb12_csbase
;
733 svm
->soft_int_old_rip
= vmcb12_rip
;
734 if (guest_can_use(vcpu
, X86_FEATURE_NRIPS
))
735 svm
->soft_int_next_rip
= svm
->nested
.ctl
.next_rip
;
737 svm
->soft_int_next_rip
= vmcb12_rip
;
740 vmcb02
->control
.virt_ext
= vmcb01
->control
.virt_ext
&
742 if (guest_can_use(vcpu
, X86_FEATURE_LBRV
))
743 vmcb02
->control
.virt_ext
|=
744 (svm
->nested
.ctl
.virt_ext
& LBR_CTL_ENABLE_MASK
);
746 if (!nested_vmcb_needs_vls_intercept(svm
))
747 vmcb02
->control
.virt_ext
|= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK
;
749 if (guest_can_use(vcpu
, X86_FEATURE_PAUSEFILTER
))
750 pause_count12
= svm
->nested
.ctl
.pause_filter_count
;
753 if (guest_can_use(vcpu
, X86_FEATURE_PFTHRESHOLD
))
754 pause_thresh12
= svm
->nested
.ctl
.pause_filter_thresh
;
757 if (kvm_pause_in_guest(svm
->vcpu
.kvm
)) {
758 /* use guest values since host doesn't intercept PAUSE */
759 vmcb02
->control
.pause_filter_count
= pause_count12
;
760 vmcb02
->control
.pause_filter_thresh
= pause_thresh12
;
763 /* start from host values otherwise */
764 vmcb02
->control
.pause_filter_count
= vmcb01
->control
.pause_filter_count
;
765 vmcb02
->control
.pause_filter_thresh
= vmcb01
->control
.pause_filter_thresh
;
767 /* ... but ensure filtering is disabled if so requested. */
768 if (vmcb12_is_intercept(&svm
->nested
.ctl
, INTERCEPT_PAUSE
)) {
770 vmcb02
->control
.pause_filter_count
= 0;
772 vmcb02
->control
.pause_filter_thresh
= 0;
776 nested_svm_transition_tlb_flush(vcpu
);
778 /* Enter Guest-Mode */
779 enter_guest_mode(vcpu
);
782 * Merge guest and host intercepts - must be called with vcpu in
783 * guest-mode to take effect.
785 recalc_intercepts(svm
);
788 static void nested_svm_copy_common_state(struct vmcb
*from_vmcb
, struct vmcb
*to_vmcb
)
791 * Some VMCB state is shared between L1 and L2 and thus has to be
792 * moved at the time of nested vmrun and vmexit.
794 * VMLOAD/VMSAVE state would also belong in this category, but KVM
795 * always performs VMLOAD and VMSAVE from the VMCB01.
797 to_vmcb
->save
.spec_ctrl
= from_vmcb
->save
.spec_ctrl
;
800 int enter_svm_guest_mode(struct kvm_vcpu
*vcpu
, u64 vmcb12_gpa
,
801 struct vmcb
*vmcb12
, bool from_vmrun
)
803 struct vcpu_svm
*svm
= to_svm(vcpu
);
806 trace_kvm_nested_vmenter(svm
->vmcb
->save
.rip
,
809 vmcb12
->control
.int_ctl
,
810 vmcb12
->control
.event_inj
,
811 vmcb12
->control
.nested_ctl
,
812 vmcb12
->control
.nested_cr3
,
816 trace_kvm_nested_intercepts(vmcb12
->control
.intercepts
[INTERCEPT_CR
] & 0xffff,
817 vmcb12
->control
.intercepts
[INTERCEPT_CR
] >> 16,
818 vmcb12
->control
.intercepts
[INTERCEPT_EXCEPTION
],
819 vmcb12
->control
.intercepts
[INTERCEPT_WORD3
],
820 vmcb12
->control
.intercepts
[INTERCEPT_WORD4
],
821 vmcb12
->control
.intercepts
[INTERCEPT_WORD5
]);
824 svm
->nested
.vmcb12_gpa
= vmcb12_gpa
;
826 WARN_ON(svm
->vmcb
== svm
->nested
.vmcb02
.ptr
);
828 nested_svm_copy_common_state(svm
->vmcb01
.ptr
, svm
->nested
.vmcb02
.ptr
);
830 svm_switch_vmcb(svm
, &svm
->nested
.vmcb02
);
831 nested_vmcb02_prepare_control(svm
, vmcb12
->save
.rip
, vmcb12
->save
.cs
.base
);
832 nested_vmcb02_prepare_save(svm
, vmcb12
);
834 ret
= nested_svm_load_cr3(&svm
->vcpu
, svm
->nested
.save
.cr3
,
835 nested_npt_enabled(svm
), from_vmrun
);
840 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES
, vcpu
);
842 svm_set_gif(svm
, true);
844 if (kvm_vcpu_apicv_active(vcpu
))
845 kvm_make_request(KVM_REQ_APICV_UPDATE
, vcpu
);
847 nested_svm_hv_update_vm_vp_ids(vcpu
);
852 int nested_svm_vmrun(struct kvm_vcpu
*vcpu
)
854 struct vcpu_svm
*svm
= to_svm(vcpu
);
857 struct kvm_host_map map
;
859 struct vmcb
*vmcb01
= svm
->vmcb01
.ptr
;
861 if (!svm
->nested
.hsave_msr
) {
862 kvm_inject_gp(vcpu
, 0);
867 kvm_queue_exception(vcpu
, UD_VECTOR
);
871 /* This fails when VP assist page is enabled but the supplied GPA is bogus */
872 ret
= kvm_hv_verify_vp_assist(vcpu
);
874 kvm_inject_gp(vcpu
, 0);
878 vmcb12_gpa
= svm
->vmcb
->save
.rax
;
879 ret
= kvm_vcpu_map(vcpu
, gpa_to_gfn(vmcb12_gpa
), &map
);
880 if (ret
== -EINVAL
) {
881 kvm_inject_gp(vcpu
, 0);
884 return kvm_skip_emulated_instruction(vcpu
);
887 ret
= kvm_skip_emulated_instruction(vcpu
);
891 if (WARN_ON_ONCE(!svm
->nested
.initialized
))
894 nested_copy_vmcb_control_to_cache(svm
, &vmcb12
->control
);
895 nested_copy_vmcb_save_to_cache(svm
, &vmcb12
->save
);
897 if (!nested_vmcb_check_save(vcpu
) ||
898 !nested_vmcb_check_controls(vcpu
)) {
899 vmcb12
->control
.exit_code
= SVM_EXIT_ERR
;
900 vmcb12
->control
.exit_code_hi
= 0;
901 vmcb12
->control
.exit_info_1
= 0;
902 vmcb12
->control
.exit_info_2
= 0;
907 * Since vmcb01 is not in use, we can use it to store some of the L1
910 vmcb01
->save
.efer
= vcpu
->arch
.efer
;
911 vmcb01
->save
.cr0
= kvm_read_cr0(vcpu
);
912 vmcb01
->save
.cr4
= vcpu
->arch
.cr4
;
913 vmcb01
->save
.rflags
= kvm_get_rflags(vcpu
);
914 vmcb01
->save
.rip
= kvm_rip_read(vcpu
);
917 vmcb01
->save
.cr3
= kvm_read_cr3(vcpu
);
919 svm
->nested
.nested_run_pending
= 1;
921 if (enter_svm_guest_mode(vcpu
, vmcb12_gpa
, vmcb12
, true))
924 if (nested_svm_vmrun_msrpm(svm
))
928 svm
->nested
.nested_run_pending
= 0;
929 svm
->nmi_l1_to_l2
= false;
930 svm
->soft_int_injected
= false;
932 svm
->vmcb
->control
.exit_code
= SVM_EXIT_ERR
;
933 svm
->vmcb
->control
.exit_code_hi
= 0;
934 svm
->vmcb
->control
.exit_info_1
= 0;
935 svm
->vmcb
->control
.exit_info_2
= 0;
937 nested_svm_vmexit(svm
);
940 kvm_vcpu_unmap(vcpu
, &map
, true);
945 /* Copy state save area fields which are handled by VMRUN */
946 void svm_copy_vmrun_state(struct vmcb_save_area
*to_save
,
947 struct vmcb_save_area
*from_save
)
949 to_save
->es
= from_save
->es
;
950 to_save
->cs
= from_save
->cs
;
951 to_save
->ss
= from_save
->ss
;
952 to_save
->ds
= from_save
->ds
;
953 to_save
->gdtr
= from_save
->gdtr
;
954 to_save
->idtr
= from_save
->idtr
;
955 to_save
->rflags
= from_save
->rflags
| X86_EFLAGS_FIXED
;
956 to_save
->efer
= from_save
->efer
;
957 to_save
->cr0
= from_save
->cr0
;
958 to_save
->cr3
= from_save
->cr3
;
959 to_save
->cr4
= from_save
->cr4
;
960 to_save
->rax
= from_save
->rax
;
961 to_save
->rsp
= from_save
->rsp
;
962 to_save
->rip
= from_save
->rip
;
966 void svm_copy_vmloadsave_state(struct vmcb
*to_vmcb
, struct vmcb
*from_vmcb
)
968 to_vmcb
->save
.fs
= from_vmcb
->save
.fs
;
969 to_vmcb
->save
.gs
= from_vmcb
->save
.gs
;
970 to_vmcb
->save
.tr
= from_vmcb
->save
.tr
;
971 to_vmcb
->save
.ldtr
= from_vmcb
->save
.ldtr
;
972 to_vmcb
->save
.kernel_gs_base
= from_vmcb
->save
.kernel_gs_base
;
973 to_vmcb
->save
.star
= from_vmcb
->save
.star
;
974 to_vmcb
->save
.lstar
= from_vmcb
->save
.lstar
;
975 to_vmcb
->save
.cstar
= from_vmcb
->save
.cstar
;
976 to_vmcb
->save
.sfmask
= from_vmcb
->save
.sfmask
;
977 to_vmcb
->save
.sysenter_cs
= from_vmcb
->save
.sysenter_cs
;
978 to_vmcb
->save
.sysenter_esp
= from_vmcb
->save
.sysenter_esp
;
979 to_vmcb
->save
.sysenter_eip
= from_vmcb
->save
.sysenter_eip
;
982 int nested_svm_vmexit(struct vcpu_svm
*svm
)
984 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
985 struct vmcb
*vmcb01
= svm
->vmcb01
.ptr
;
986 struct vmcb
*vmcb02
= svm
->nested
.vmcb02
.ptr
;
988 struct kvm_host_map map
;
991 rc
= kvm_vcpu_map(vcpu
, gpa_to_gfn(svm
->nested
.vmcb12_gpa
), &map
);
994 kvm_inject_gp(vcpu
, 0);
1000 /* Exit Guest-Mode */
1001 leave_guest_mode(vcpu
);
1002 svm
->nested
.vmcb12_gpa
= 0;
1003 WARN_ON_ONCE(svm
->nested
.nested_run_pending
);
1005 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES
, vcpu
);
1007 /* in case we halted in L2 */
1008 svm
->vcpu
.arch
.mp_state
= KVM_MP_STATE_RUNNABLE
;
1010 /* Give the current vmcb to the guest */
1012 vmcb12
->save
.es
= vmcb02
->save
.es
;
1013 vmcb12
->save
.cs
= vmcb02
->save
.cs
;
1014 vmcb12
->save
.ss
= vmcb02
->save
.ss
;
1015 vmcb12
->save
.ds
= vmcb02
->save
.ds
;
1016 vmcb12
->save
.gdtr
= vmcb02
->save
.gdtr
;
1017 vmcb12
->save
.idtr
= vmcb02
->save
.idtr
;
1018 vmcb12
->save
.efer
= svm
->vcpu
.arch
.efer
;
1019 vmcb12
->save
.cr0
= kvm_read_cr0(vcpu
);
1020 vmcb12
->save
.cr3
= kvm_read_cr3(vcpu
);
1021 vmcb12
->save
.cr2
= vmcb02
->save
.cr2
;
1022 vmcb12
->save
.cr4
= svm
->vcpu
.arch
.cr4
;
1023 vmcb12
->save
.rflags
= kvm_get_rflags(vcpu
);
1024 vmcb12
->save
.rip
= kvm_rip_read(vcpu
);
1025 vmcb12
->save
.rsp
= kvm_rsp_read(vcpu
);
1026 vmcb12
->save
.rax
= kvm_rax_read(vcpu
);
1027 vmcb12
->save
.dr7
= vmcb02
->save
.dr7
;
1028 vmcb12
->save
.dr6
= svm
->vcpu
.arch
.dr6
;
1029 vmcb12
->save
.cpl
= vmcb02
->save
.cpl
;
1031 vmcb12
->control
.int_state
= vmcb02
->control
.int_state
;
1032 vmcb12
->control
.exit_code
= vmcb02
->control
.exit_code
;
1033 vmcb12
->control
.exit_code_hi
= vmcb02
->control
.exit_code_hi
;
1034 vmcb12
->control
.exit_info_1
= vmcb02
->control
.exit_info_1
;
1035 vmcb12
->control
.exit_info_2
= vmcb02
->control
.exit_info_2
;
1037 if (vmcb12
->control
.exit_code
!= SVM_EXIT_ERR
)
1038 nested_save_pending_event_to_vmcb12(svm
, vmcb12
);
1040 if (guest_can_use(vcpu
, X86_FEATURE_NRIPS
))
1041 vmcb12
->control
.next_rip
= vmcb02
->control
.next_rip
;
1043 vmcb12
->control
.int_ctl
= svm
->nested
.ctl
.int_ctl
;
1044 vmcb12
->control
.event_inj
= svm
->nested
.ctl
.event_inj
;
1045 vmcb12
->control
.event_inj_err
= svm
->nested
.ctl
.event_inj_err
;
1047 if (!kvm_pause_in_guest(vcpu
->kvm
)) {
1048 vmcb01
->control
.pause_filter_count
= vmcb02
->control
.pause_filter_count
;
1049 vmcb_mark_dirty(vmcb01
, VMCB_INTERCEPTS
);
1053 nested_svm_copy_common_state(svm
->nested
.vmcb02
.ptr
, svm
->vmcb01
.ptr
);
1055 svm_switch_vmcb(svm
, &svm
->vmcb01
);
1058 * Rules for synchronizing int_ctl bits from vmcb02 to vmcb01:
1060 * V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR: If L1 doesn't
1061 * intercept interrupts, then KVM will use vmcb02's V_IRQ (and related
1062 * flags) to detect interrupt windows for L1 IRQs (even if L1 uses
1063 * virtual interrupt masking). Raise KVM_REQ_EVENT to ensure that
1064 * KVM re-requests an interrupt window if necessary, which implicitly
1065 * copies this bits from vmcb02 to vmcb01.
1067 * V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR
1068 * is stored in vmcb02, but its value doesn't need to be copied from/to
1069 * vmcb01 because it is copied from/to the virtual APIC's TPR register
1070 * on each VM entry/exit.
1072 * V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's
1073 * V_GIF. However, GIF is architecturally clear on each VM exit, thus
1074 * there is no need to copy V_GIF from vmcb02 to vmcb01.
1076 if (!nested_exit_on_intr(svm
))
1077 kvm_make_request(KVM_REQ_EVENT
, &svm
->vcpu
);
1079 if (unlikely(guest_can_use(vcpu
, X86_FEATURE_LBRV
) &&
1080 (svm
->nested
.ctl
.virt_ext
& LBR_CTL_ENABLE_MASK
))) {
1081 svm_copy_lbrs(vmcb12
, vmcb02
);
1082 svm_update_lbrv(vcpu
);
1083 } else if (unlikely(vmcb01
->control
.virt_ext
& LBR_CTL_ENABLE_MASK
)) {
1084 svm_copy_lbrs(vmcb01
, vmcb02
);
1085 svm_update_lbrv(vcpu
);
1089 if (vmcb02
->control
.int_ctl
& V_NMI_BLOCKING_MASK
)
1090 vmcb01
->control
.int_ctl
|= V_NMI_BLOCKING_MASK
;
1092 vmcb01
->control
.int_ctl
&= ~V_NMI_BLOCKING_MASK
;
1094 if (vcpu
->arch
.nmi_pending
) {
1095 vcpu
->arch
.nmi_pending
--;
1096 vmcb01
->control
.int_ctl
|= V_NMI_PENDING_MASK
;
1098 vmcb01
->control
.int_ctl
&= ~V_NMI_PENDING_MASK
;
1103 * On vmexit the GIF is set to false and
1104 * no event can be injected in L1.
1106 svm_set_gif(svm
, false);
1107 vmcb01
->control
.exit_int_info
= 0;
1109 svm
->vcpu
.arch
.tsc_offset
= svm
->vcpu
.arch
.l1_tsc_offset
;
1110 if (vmcb01
->control
.tsc_offset
!= svm
->vcpu
.arch
.tsc_offset
) {
1111 vmcb01
->control
.tsc_offset
= svm
->vcpu
.arch
.tsc_offset
;
1112 vmcb_mark_dirty(vmcb01
, VMCB_INTERCEPTS
);
1115 if (kvm_caps
.has_tsc_control
&&
1116 vcpu
->arch
.tsc_scaling_ratio
!= vcpu
->arch
.l1_tsc_scaling_ratio
) {
1117 vcpu
->arch
.tsc_scaling_ratio
= vcpu
->arch
.l1_tsc_scaling_ratio
;
1118 svm_write_tsc_multiplier(vcpu
);
1121 svm
->nested
.ctl
.nested_cr3
= 0;
1124 * Restore processor state that had been saved in vmcb01
1126 kvm_set_rflags(vcpu
, vmcb01
->save
.rflags
);
1127 svm_set_efer(vcpu
, vmcb01
->save
.efer
);
1128 svm_set_cr0(vcpu
, vmcb01
->save
.cr0
| X86_CR0_PE
);
1129 svm_set_cr4(vcpu
, vmcb01
->save
.cr4
);
1130 kvm_rax_write(vcpu
, vmcb01
->save
.rax
);
1131 kvm_rsp_write(vcpu
, vmcb01
->save
.rsp
);
1132 kvm_rip_write(vcpu
, vmcb01
->save
.rip
);
1134 svm
->vcpu
.arch
.dr7
= DR7_FIXED_1
;
1135 kvm_update_dr7(&svm
->vcpu
);
1137 trace_kvm_nested_vmexit_inject(vmcb12
->control
.exit_code
,
1138 vmcb12
->control
.exit_info_1
,
1139 vmcb12
->control
.exit_info_2
,
1140 vmcb12
->control
.exit_int_info
,
1141 vmcb12
->control
.exit_int_info_err
,
1144 kvm_vcpu_unmap(vcpu
, &map
, true);
1146 nested_svm_transition_tlb_flush(vcpu
);
1148 nested_svm_uninit_mmu_context(vcpu
);
1150 rc
= nested_svm_load_cr3(vcpu
, vmcb01
->save
.cr3
, false, true);
1155 * Drop what we picked up for L2 via svm_complete_interrupts() so it
1156 * doesn't end up in L1.
1158 svm
->vcpu
.arch
.nmi_injected
= false;
1159 kvm_clear_exception_queue(vcpu
);
1160 kvm_clear_interrupt_queue(vcpu
);
1163 * If we are here following the completion of a VMRUN that
1164 * is being single-stepped, queue the pending #DB intercept
1165 * right now so that it an be accounted for before we execute
1166 * L1's next instruction.
1168 if (unlikely(vmcb01
->save
.rflags
& X86_EFLAGS_TF
))
1169 kvm_queue_exception(&(svm
->vcpu
), DB_VECTOR
);
1172 * Un-inhibit the AVIC right away, so that other vCPUs can start
1173 * to benefit from it right away.
1175 if (kvm_apicv_activated(vcpu
->kvm
))
1176 __kvm_vcpu_update_apicv(vcpu
);
1181 static void nested_svm_triple_fault(struct kvm_vcpu
*vcpu
)
1183 struct vcpu_svm
*svm
= to_svm(vcpu
);
1185 if (!vmcb12_is_intercept(&svm
->nested
.ctl
, INTERCEPT_SHUTDOWN
))
1188 kvm_clear_request(KVM_REQ_TRIPLE_FAULT
, vcpu
);
1189 nested_svm_simple_vmexit(to_svm(vcpu
), SVM_EXIT_SHUTDOWN
);
1192 int svm_allocate_nested(struct vcpu_svm
*svm
)
1194 struct page
*vmcb02_page
;
1196 if (svm
->nested
.initialized
)
1199 vmcb02_page
= alloc_page(GFP_KERNEL_ACCOUNT
| __GFP_ZERO
);
1202 svm
->nested
.vmcb02
.ptr
= page_address(vmcb02_page
);
1203 svm
->nested
.vmcb02
.pa
= __sme_set(page_to_pfn(vmcb02_page
) << PAGE_SHIFT
);
1205 svm
->nested
.msrpm
= svm_vcpu_alloc_msrpm();
1206 if (!svm
->nested
.msrpm
)
1207 goto err_free_vmcb02
;
1208 svm_vcpu_init_msrpm(&svm
->vcpu
, svm
->nested
.msrpm
);
1210 svm
->nested
.initialized
= true;
1214 __free_page(vmcb02_page
);
1218 void svm_free_nested(struct vcpu_svm
*svm
)
1220 if (!svm
->nested
.initialized
)
1223 if (WARN_ON_ONCE(svm
->vmcb
!= svm
->vmcb01
.ptr
))
1224 svm_switch_vmcb(svm
, &svm
->vmcb01
);
1226 svm_vcpu_free_msrpm(svm
->nested
.msrpm
);
1227 svm
->nested
.msrpm
= NULL
;
1229 __free_page(virt_to_page(svm
->nested
.vmcb02
.ptr
));
1230 svm
->nested
.vmcb02
.ptr
= NULL
;
1233 * When last_vmcb12_gpa matches the current vmcb12 gpa,
1234 * some vmcb12 fields are not loaded if they are marked clean
1235 * in the vmcb12, since in this case they are up to date already.
1237 * When the vmcb02 is freed, this optimization becomes invalid.
1239 svm
->nested
.last_vmcb12_gpa
= INVALID_GPA
;
1241 svm
->nested
.initialized
= false;
1244 void svm_leave_nested(struct kvm_vcpu
*vcpu
)
1246 struct vcpu_svm
*svm
= to_svm(vcpu
);
1248 if (is_guest_mode(vcpu
)) {
1249 svm
->nested
.nested_run_pending
= 0;
1250 svm
->nested
.vmcb12_gpa
= INVALID_GPA
;
1252 leave_guest_mode(vcpu
);
1254 svm_switch_vmcb(svm
, &svm
->vmcb01
);
1256 nested_svm_uninit_mmu_context(vcpu
);
1257 vmcb_mark_all_dirty(svm
->vmcb
);
1259 if (kvm_apicv_activated(vcpu
->kvm
))
1260 kvm_make_request(KVM_REQ_APICV_UPDATE
, vcpu
);
1263 kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES
, vcpu
);
1266 static int nested_svm_exit_handled_msr(struct vcpu_svm
*svm
)
1268 u32 offset
, msr
, value
;
1271 if (!(vmcb12_is_intercept(&svm
->nested
.ctl
, INTERCEPT_MSR_PROT
)))
1272 return NESTED_EXIT_HOST
;
1274 msr
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RCX
];
1275 offset
= svm_msrpm_offset(msr
);
1276 write
= svm
->vmcb
->control
.exit_info_1
& 1;
1277 mask
= 1 << ((2 * (msr
& 0xf)) + write
);
1279 if (offset
== MSR_INVALID
)
1280 return NESTED_EXIT_DONE
;
1282 /* Offset is in 32 bit units but need in 8 bit units */
1285 if (kvm_vcpu_read_guest(&svm
->vcpu
, svm
->nested
.ctl
.msrpm_base_pa
+ offset
, &value
, 4))
1286 return NESTED_EXIT_DONE
;
1288 return (value
& mask
) ? NESTED_EXIT_DONE
: NESTED_EXIT_HOST
;
1291 static int nested_svm_intercept_ioio(struct vcpu_svm
*svm
)
1293 unsigned port
, size
, iopm_len
;
1298 if (!(vmcb12_is_intercept(&svm
->nested
.ctl
, INTERCEPT_IOIO_PROT
)))
1299 return NESTED_EXIT_HOST
;
1301 port
= svm
->vmcb
->control
.exit_info_1
>> 16;
1302 size
= (svm
->vmcb
->control
.exit_info_1
& SVM_IOIO_SIZE_MASK
) >>
1303 SVM_IOIO_SIZE_SHIFT
;
1304 gpa
= svm
->nested
.ctl
.iopm_base_pa
+ (port
/ 8);
1305 start_bit
= port
% 8;
1306 iopm_len
= (start_bit
+ size
> 8) ? 2 : 1;
1307 mask
= (0xf >> (4 - size
)) << start_bit
;
1310 if (kvm_vcpu_read_guest(&svm
->vcpu
, gpa
, &val
, iopm_len
))
1311 return NESTED_EXIT_DONE
;
1313 return (val
& mask
) ? NESTED_EXIT_DONE
: NESTED_EXIT_HOST
;
1316 static int nested_svm_intercept(struct vcpu_svm
*svm
)
1318 u32 exit_code
= svm
->vmcb
->control
.exit_code
;
1319 int vmexit
= NESTED_EXIT_HOST
;
1321 switch (exit_code
) {
1323 vmexit
= nested_svm_exit_handled_msr(svm
);
1326 vmexit
= nested_svm_intercept_ioio(svm
);
1328 case SVM_EXIT_READ_CR0
... SVM_EXIT_WRITE_CR8
: {
1329 if (vmcb12_is_intercept(&svm
->nested
.ctl
, exit_code
))
1330 vmexit
= NESTED_EXIT_DONE
;
1333 case SVM_EXIT_READ_DR0
... SVM_EXIT_WRITE_DR7
: {
1334 if (vmcb12_is_intercept(&svm
->nested
.ctl
, exit_code
))
1335 vmexit
= NESTED_EXIT_DONE
;
1338 case SVM_EXIT_EXCP_BASE
... SVM_EXIT_EXCP_BASE
+ 0x1f: {
1340 * Host-intercepted exceptions have been checked already in
1341 * nested_svm_exit_special. There is nothing to do here,
1342 * the vmexit is injected by svm_check_nested_events.
1344 vmexit
= NESTED_EXIT_DONE
;
1347 case SVM_EXIT_ERR
: {
1348 vmexit
= NESTED_EXIT_DONE
;
1352 if (vmcb12_is_intercept(&svm
->nested
.ctl
, exit_code
))
1353 vmexit
= NESTED_EXIT_DONE
;
1360 int nested_svm_exit_handled(struct vcpu_svm
*svm
)
1364 vmexit
= nested_svm_intercept(svm
);
1366 if (vmexit
== NESTED_EXIT_DONE
)
1367 nested_svm_vmexit(svm
);
1372 int nested_svm_check_permissions(struct kvm_vcpu
*vcpu
)
1374 if (!(vcpu
->arch
.efer
& EFER_SVME
) || !is_paging(vcpu
)) {
1375 kvm_queue_exception(vcpu
, UD_VECTOR
);
1379 if (to_svm(vcpu
)->vmcb
->save
.cpl
) {
1380 kvm_inject_gp(vcpu
, 0);
1387 static bool nested_svm_is_exception_vmexit(struct kvm_vcpu
*vcpu
, u8 vector
,
1390 struct vcpu_svm
*svm
= to_svm(vcpu
);
1392 return (svm
->nested
.ctl
.intercepts
[INTERCEPT_EXCEPTION
] & BIT(vector
));
1395 static void nested_svm_inject_exception_vmexit(struct kvm_vcpu
*vcpu
)
1397 struct kvm_queued_exception
*ex
= &vcpu
->arch
.exception_vmexit
;
1398 struct vcpu_svm
*svm
= to_svm(vcpu
);
1399 struct vmcb
*vmcb
= svm
->vmcb
;
1401 vmcb
->control
.exit_code
= SVM_EXIT_EXCP_BASE
+ ex
->vector
;
1402 vmcb
->control
.exit_code_hi
= 0;
1404 if (ex
->has_error_code
)
1405 vmcb
->control
.exit_info_1
= ex
->error_code
;
1408 * EXITINFO2 is undefined for all exception intercepts other
1411 if (ex
->vector
== PF_VECTOR
) {
1412 if (ex
->has_payload
)
1413 vmcb
->control
.exit_info_2
= ex
->payload
;
1415 vmcb
->control
.exit_info_2
= vcpu
->arch
.cr2
;
1416 } else if (ex
->vector
== DB_VECTOR
) {
1417 /* See kvm_check_and_inject_events(). */
1418 kvm_deliver_exception_payload(vcpu
, ex
);
1420 if (vcpu
->arch
.dr7
& DR7_GD
) {
1421 vcpu
->arch
.dr7
&= ~DR7_GD
;
1422 kvm_update_dr7(vcpu
);
1425 WARN_ON(ex
->has_payload
);
1428 nested_svm_vmexit(svm
);
1431 static inline bool nested_exit_on_init(struct vcpu_svm
*svm
)
1433 return vmcb12_is_intercept(&svm
->nested
.ctl
, INTERCEPT_INIT
);
1436 static int svm_check_nested_events(struct kvm_vcpu
*vcpu
)
1438 struct kvm_lapic
*apic
= vcpu
->arch
.apic
;
1439 struct vcpu_svm
*svm
= to_svm(vcpu
);
1441 * Only a pending nested run blocks a pending exception. If there is a
1442 * previously injected event, the pending exception occurred while said
1443 * event was being delivered and thus needs to be handled.
1445 bool block_nested_exceptions
= svm
->nested
.nested_run_pending
;
1447 * New events (not exceptions) are only recognized at instruction
1448 * boundaries. If an event needs reinjection, then KVM is handling a
1449 * VM-Exit that occurred _during_ instruction execution; new events are
1450 * blocked until the instruction completes.
1452 bool block_nested_events
= block_nested_exceptions
||
1453 kvm_event_needs_reinjection(vcpu
);
1455 if (lapic_in_kernel(vcpu
) &&
1456 test_bit(KVM_APIC_INIT
, &apic
->pending_events
)) {
1457 if (block_nested_events
)
1459 if (!nested_exit_on_init(svm
))
1461 nested_svm_simple_vmexit(svm
, SVM_EXIT_INIT
);
1465 if (vcpu
->arch
.exception_vmexit
.pending
) {
1466 if (block_nested_exceptions
)
1468 nested_svm_inject_exception_vmexit(vcpu
);
1472 if (vcpu
->arch
.exception
.pending
) {
1473 if (block_nested_exceptions
)
1478 #ifdef CONFIG_KVM_SMM
1479 if (vcpu
->arch
.smi_pending
&& !svm_smi_blocked(vcpu
)) {
1480 if (block_nested_events
)
1482 if (!nested_exit_on_smi(svm
))
1484 nested_svm_simple_vmexit(svm
, SVM_EXIT_SMI
);
1489 if (vcpu
->arch
.nmi_pending
&& !svm_nmi_blocked(vcpu
)) {
1490 if (block_nested_events
)
1492 if (!nested_exit_on_nmi(svm
))
1494 nested_svm_simple_vmexit(svm
, SVM_EXIT_NMI
);
1498 if (kvm_cpu_has_interrupt(vcpu
) && !svm_interrupt_blocked(vcpu
)) {
1499 if (block_nested_events
)
1501 if (!nested_exit_on_intr(svm
))
1503 trace_kvm_nested_intr_vmexit(svm
->vmcb
->save
.rip
);
1504 nested_svm_simple_vmexit(svm
, SVM_EXIT_INTR
);
1511 int nested_svm_exit_special(struct vcpu_svm
*svm
)
1513 u32 exit_code
= svm
->vmcb
->control
.exit_code
;
1514 struct kvm_vcpu
*vcpu
= &svm
->vcpu
;
1516 switch (exit_code
) {
1520 return NESTED_EXIT_HOST
;
1521 case SVM_EXIT_EXCP_BASE
... SVM_EXIT_EXCP_BASE
+ 0x1f: {
1522 u32 excp_bits
= 1 << (exit_code
- SVM_EXIT_EXCP_BASE
);
1524 if (svm
->vmcb01
.ptr
->control
.intercepts
[INTERCEPT_EXCEPTION
] &
1526 return NESTED_EXIT_HOST
;
1527 else if (exit_code
== SVM_EXIT_EXCP_BASE
+ PF_VECTOR
&&
1528 svm
->vcpu
.arch
.apf
.host_apf_flags
)
1529 /* Trap async PF even if not shadowing */
1530 return NESTED_EXIT_HOST
;
1533 case SVM_EXIT_VMMCALL
:
1534 /* Hyper-V L2 TLB flush hypercall is handled by L0 */
1535 if (guest_hv_cpuid_has_l2_tlb_flush(vcpu
) &&
1536 nested_svm_l2_tlb_flush_enabled(vcpu
) &&
1537 kvm_hv_is_tlb_flush_hcall(vcpu
))
1538 return NESTED_EXIT_HOST
;
1544 return NESTED_EXIT_CONTINUE
;
1547 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu
*vcpu
)
1549 struct vcpu_svm
*svm
= to_svm(vcpu
);
1551 vcpu
->arch
.tsc_scaling_ratio
=
1552 kvm_calc_nested_tsc_multiplier(vcpu
->arch
.l1_tsc_scaling_ratio
,
1553 svm
->tsc_ratio_msr
);
1554 svm_write_tsc_multiplier(vcpu
);
1557 /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
1558 static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area
*dst
,
1559 struct vmcb_ctrl_area_cached
*from
)
1563 memset(dst
, 0, sizeof(struct vmcb_control_area
));
1565 for (i
= 0; i
< MAX_INTERCEPT
; i
++)
1566 dst
->intercepts
[i
] = from
->intercepts
[i
];
1568 dst
->iopm_base_pa
= from
->iopm_base_pa
;
1569 dst
->msrpm_base_pa
= from
->msrpm_base_pa
;
1570 dst
->tsc_offset
= from
->tsc_offset
;
1571 dst
->asid
= from
->asid
;
1572 dst
->tlb_ctl
= from
->tlb_ctl
;
1573 dst
->int_ctl
= from
->int_ctl
;
1574 dst
->int_vector
= from
->int_vector
;
1575 dst
->int_state
= from
->int_state
;
1576 dst
->exit_code
= from
->exit_code
;
1577 dst
->exit_code_hi
= from
->exit_code_hi
;
1578 dst
->exit_info_1
= from
->exit_info_1
;
1579 dst
->exit_info_2
= from
->exit_info_2
;
1580 dst
->exit_int_info
= from
->exit_int_info
;
1581 dst
->exit_int_info_err
= from
->exit_int_info_err
;
1582 dst
->nested_ctl
= from
->nested_ctl
;
1583 dst
->event_inj
= from
->event_inj
;
1584 dst
->event_inj_err
= from
->event_inj_err
;
1585 dst
->next_rip
= from
->next_rip
;
1586 dst
->nested_cr3
= from
->nested_cr3
;
1587 dst
->virt_ext
= from
->virt_ext
;
1588 dst
->pause_filter_count
= from
->pause_filter_count
;
1589 dst
->pause_filter_thresh
= from
->pause_filter_thresh
;
1590 /* 'clean' and 'hv_enlightenments' are not changed by KVM */
1593 static int svm_get_nested_state(struct kvm_vcpu
*vcpu
,
1594 struct kvm_nested_state __user
*user_kvm_nested_state
,
1597 struct vcpu_svm
*svm
;
1598 struct vmcb_control_area
*ctl
;
1600 struct kvm_nested_state kvm_state
= {
1602 .format
= KVM_STATE_NESTED_FORMAT_SVM
,
1603 .size
= sizeof(kvm_state
),
1605 struct vmcb __user
*user_vmcb
= (struct vmcb __user
*)
1606 &user_kvm_nested_state
->data
.svm
[0];
1609 return kvm_state
.size
+ KVM_STATE_NESTED_SVM_VMCB_SIZE
;
1613 if (user_data_size
< kvm_state
.size
)
1616 /* First fill in the header and copy it out. */
1617 if (is_guest_mode(vcpu
)) {
1618 kvm_state
.hdr
.svm
.vmcb_pa
= svm
->nested
.vmcb12_gpa
;
1619 kvm_state
.size
+= KVM_STATE_NESTED_SVM_VMCB_SIZE
;
1620 kvm_state
.flags
|= KVM_STATE_NESTED_GUEST_MODE
;
1622 if (svm
->nested
.nested_run_pending
)
1623 kvm_state
.flags
|= KVM_STATE_NESTED_RUN_PENDING
;
1627 kvm_state
.flags
|= KVM_STATE_NESTED_GIF_SET
;
1629 if (copy_to_user(user_kvm_nested_state
, &kvm_state
, sizeof(kvm_state
)))
1632 if (!is_guest_mode(vcpu
))
1636 * Copy over the full size of the VMCB rather than just the size
1639 if (clear_user(user_vmcb
, KVM_STATE_NESTED_SVM_VMCB_SIZE
))
1642 ctl
= kzalloc(sizeof(*ctl
), GFP_KERNEL
);
1646 nested_copy_vmcb_cache_to_control(ctl
, &svm
->nested
.ctl
);
1647 r
= copy_to_user(&user_vmcb
->control
, ctl
,
1648 sizeof(user_vmcb
->control
));
1653 if (copy_to_user(&user_vmcb
->save
, &svm
->vmcb01
.ptr
->save
,
1654 sizeof(user_vmcb
->save
)))
1657 return kvm_state
.size
;
1660 static int svm_set_nested_state(struct kvm_vcpu
*vcpu
,
1661 struct kvm_nested_state __user
*user_kvm_nested_state
,
1662 struct kvm_nested_state
*kvm_state
)
1664 struct vcpu_svm
*svm
= to_svm(vcpu
);
1665 struct vmcb __user
*user_vmcb
= (struct vmcb __user
*)
1666 &user_kvm_nested_state
->data
.svm
[0];
1667 struct vmcb_control_area
*ctl
;
1668 struct vmcb_save_area
*save
;
1669 struct vmcb_save_area_cached save_cached
;
1670 struct vmcb_ctrl_area_cached ctl_cached
;
1674 BUILD_BUG_ON(sizeof(struct vmcb_control_area
) + sizeof(struct vmcb_save_area
) >
1675 KVM_STATE_NESTED_SVM_VMCB_SIZE
);
1677 if (kvm_state
->format
!= KVM_STATE_NESTED_FORMAT_SVM
)
1680 if (kvm_state
->flags
& ~(KVM_STATE_NESTED_GUEST_MODE
|
1681 KVM_STATE_NESTED_RUN_PENDING
|
1682 KVM_STATE_NESTED_GIF_SET
))
1686 * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
1687 * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
1689 if (!(vcpu
->arch
.efer
& EFER_SVME
)) {
1690 /* GIF=1 and no guest mode are required if SVME=0. */
1691 if (kvm_state
->flags
!= KVM_STATE_NESTED_GIF_SET
)
1695 /* SMM temporarily disables SVM, so we cannot be in guest mode. */
1696 if (is_smm(vcpu
) && (kvm_state
->flags
& KVM_STATE_NESTED_GUEST_MODE
))
1699 if (!(kvm_state
->flags
& KVM_STATE_NESTED_GUEST_MODE
)) {
1700 svm_leave_nested(vcpu
);
1701 svm_set_gif(svm
, !!(kvm_state
->flags
& KVM_STATE_NESTED_GIF_SET
));
1705 if (!page_address_valid(vcpu
, kvm_state
->hdr
.svm
.vmcb_pa
))
1707 if (kvm_state
->size
< sizeof(*kvm_state
) + KVM_STATE_NESTED_SVM_VMCB_SIZE
)
1711 ctl
= kzalloc(sizeof(*ctl
), GFP_KERNEL_ACCOUNT
);
1712 save
= kzalloc(sizeof(*save
), GFP_KERNEL_ACCOUNT
);
1717 if (copy_from_user(ctl
, &user_vmcb
->control
, sizeof(*ctl
)))
1719 if (copy_from_user(save
, &user_vmcb
->save
, sizeof(*save
)))
1723 __nested_copy_vmcb_control_to_cache(vcpu
, &ctl_cached
, ctl
);
1724 if (!__nested_vmcb_check_controls(vcpu
, &ctl_cached
))
1728 * Processor state contains L2 state. Check that it is
1729 * valid for guest mode (see nested_vmcb_check_save).
1731 cr0
= kvm_read_cr0(vcpu
);
1732 if (((cr0
& X86_CR0_CD
) == 0) && (cr0
& X86_CR0_NW
))
1736 * Validate host state saved from before VMRUN (see
1737 * nested_svm_check_permissions).
1739 __nested_copy_vmcb_save_to_cache(&save_cached
, save
);
1740 if (!(save
->cr0
& X86_CR0_PG
) ||
1741 !(save
->cr0
& X86_CR0_PE
) ||
1742 (save
->rflags
& X86_EFLAGS_VM
) ||
1743 !__nested_vmcb_check_save(vcpu
, &save_cached
))
1748 * All checks done, we can enter guest mode. Userspace provides
1749 * vmcb12.control, which will be combined with L1 and stored into
1750 * vmcb02, and the L1 save state which we store in vmcb01.
1751 * L2 registers if needed are moved from the current VMCB to VMCB02.
1754 if (is_guest_mode(vcpu
))
1755 svm_leave_nested(vcpu
);
1757 svm
->nested
.vmcb02
.ptr
->save
= svm
->vmcb01
.ptr
->save
;
1759 svm_set_gif(svm
, !!(kvm_state
->flags
& KVM_STATE_NESTED_GIF_SET
));
1761 svm
->nested
.nested_run_pending
=
1762 !!(kvm_state
->flags
& KVM_STATE_NESTED_RUN_PENDING
);
1764 svm
->nested
.vmcb12_gpa
= kvm_state
->hdr
.svm
.vmcb_pa
;
1766 svm_copy_vmrun_state(&svm
->vmcb01
.ptr
->save
, save
);
1767 nested_copy_vmcb_control_to_cache(svm
, ctl
);
1769 svm_switch_vmcb(svm
, &svm
->nested
.vmcb02
);
1770 nested_vmcb02_prepare_control(svm
, svm
->vmcb
->save
.rip
, svm
->vmcb
->save
.cs
.base
);
1773 * While the nested guest CR3 is already checked and set by
1774 * KVM_SET_SREGS, it was set when nested state was yet loaded,
1775 * thus MMU might not be initialized correctly.
1776 * Set it again to fix this.
1779 ret
= nested_svm_load_cr3(&svm
->vcpu
, vcpu
->arch
.cr3
,
1780 nested_npt_enabled(svm
), false);
1781 if (WARN_ON_ONCE(ret
))
1784 svm
->nested
.force_msr_bitmap_recalc
= true;
1786 kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES
, vcpu
);
1795 static bool svm_get_nested_state_pages(struct kvm_vcpu
*vcpu
)
1797 struct vcpu_svm
*svm
= to_svm(vcpu
);
1799 if (WARN_ON(!is_guest_mode(vcpu
)))
1802 if (!vcpu
->arch
.pdptrs_from_userspace
&&
1803 !nested_npt_enabled(svm
) && is_pae_paging(vcpu
))
1805 * Reload the guest's PDPTRs since after a migration
1806 * the guest CR3 might be restored prior to setting the nested
1807 * state which can lead to a load of wrong PDPTRs.
1809 if (CC(!load_pdptrs(vcpu
, vcpu
->arch
.cr3
)))
1812 if (!nested_svm_vmrun_msrpm(svm
)) {
1813 vcpu
->run
->exit_reason
= KVM_EXIT_INTERNAL_ERROR
;
1814 vcpu
->run
->internal
.suberror
=
1815 KVM_INTERNAL_ERROR_EMULATION
;
1816 vcpu
->run
->internal
.ndata
= 0;
1820 if (kvm_hv_verify_vp_assist(vcpu
))
1826 struct kvm_x86_nested_ops svm_nested_ops
= {
1827 .leave_nested
= svm_leave_nested
,
1828 .is_exception_vmexit
= nested_svm_is_exception_vmexit
,
1829 .check_events
= svm_check_nested_events
,
1830 .triple_fault
= nested_svm_triple_fault
,
1831 .get_nested_state_pages
= svm_get_nested_state_pages
,
1832 .get_state
= svm_get_nested_state
,
1833 .set_state
= svm_set_nested_state
,
1834 .hv_inject_synthetic_vmexit_post_tlb_flush
= svm_hv_inject_synthetic_vmexit_post_tlb_flush
,