1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine driver for Linux
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
11 * Yaniv Kamay <yaniv@qumranet.com>
12 * Avi Kivity <avi@qumranet.com>
15 #define pr_fmt(fmt) "SVM: " fmt
17 #include <linux/kvm_types.h>
18 #include <linux/kvm_host.h>
19 #include <linux/kernel.h>
21 #include <asm/msr-index.h>
22 #include <asm/debugreg.h>
24 #include "kvm_emulate.h"
30 static void nested_svm_inject_npf_exit(struct kvm_vcpu
*vcpu
,
31 struct x86_exception
*fault
)
33 struct vcpu_svm
*svm
= to_svm(vcpu
);
35 if (svm
->vmcb
->control
.exit_code
!= SVM_EXIT_NPF
) {
37 * TODO: track the cause of the nested page fault, and
38 * correctly fill in the high bits of exit_info_1.
40 svm
->vmcb
->control
.exit_code
= SVM_EXIT_NPF
;
41 svm
->vmcb
->control
.exit_code_hi
= 0;
42 svm
->vmcb
->control
.exit_info_1
= (1ULL << 32);
43 svm
->vmcb
->control
.exit_info_2
= fault
->address
;
46 svm
->vmcb
->control
.exit_info_1
&= ~0xffffffffULL
;
47 svm
->vmcb
->control
.exit_info_1
|= fault
->error_code
;
50 * The present bit is always zero for page structure faults on real
53 if (svm
->vmcb
->control
.exit_info_1
& (2ULL << 32))
54 svm
->vmcb
->control
.exit_info_1
&= ~1;
56 nested_svm_vmexit(svm
);
59 static u64
nested_svm_get_tdp_pdptr(struct kvm_vcpu
*vcpu
, int index
)
61 struct vcpu_svm
*svm
= to_svm(vcpu
);
62 u64 cr3
= svm
->nested
.nested_cr3
;
66 ret
= kvm_vcpu_read_guest_page(vcpu
, gpa_to_gfn(__sme_clr(cr3
)), &pdpte
,
67 offset_in_page(cr3
) + index
* 8, 8);
73 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu
*vcpu
)
75 struct vcpu_svm
*svm
= to_svm(vcpu
);
77 return svm
->nested
.nested_cr3
;
80 static void nested_svm_init_mmu_context(struct kvm_vcpu
*vcpu
)
82 WARN_ON(mmu_is_nested(vcpu
));
84 vcpu
->arch
.mmu
= &vcpu
->arch
.guest_mmu
;
85 kvm_init_shadow_mmu(vcpu
);
86 vcpu
->arch
.mmu
->get_guest_pgd
= nested_svm_get_tdp_cr3
;
87 vcpu
->arch
.mmu
->get_pdptr
= nested_svm_get_tdp_pdptr
;
88 vcpu
->arch
.mmu
->inject_page_fault
= nested_svm_inject_npf_exit
;
89 vcpu
->arch
.mmu
->shadow_root_level
= kvm_x86_ops
.get_tdp_level(vcpu
);
90 reset_shadow_zero_bits_mask(vcpu
, vcpu
->arch
.mmu
);
91 vcpu
->arch
.walk_mmu
= &vcpu
->arch
.nested_mmu
;
94 static void nested_svm_uninit_mmu_context(struct kvm_vcpu
*vcpu
)
96 vcpu
->arch
.mmu
= &vcpu
->arch
.root_mmu
;
97 vcpu
->arch
.walk_mmu
= &vcpu
->arch
.root_mmu
;
100 void recalc_intercepts(struct vcpu_svm
*svm
)
102 struct vmcb_control_area
*c
, *h
;
103 struct nested_state
*g
;
105 mark_dirty(svm
->vmcb
, VMCB_INTERCEPTS
);
107 if (!is_guest_mode(&svm
->vcpu
))
110 c
= &svm
->vmcb
->control
;
111 h
= &svm
->nested
.hsave
->control
;
114 c
->intercept_cr
= h
->intercept_cr
;
115 c
->intercept_dr
= h
->intercept_dr
;
116 c
->intercept_exceptions
= h
->intercept_exceptions
;
117 c
->intercept
= h
->intercept
;
119 if (svm
->vcpu
.arch
.hflags
& HF_VINTR_MASK
) {
120 /* We only want the cr8 intercept bits of L1 */
121 c
->intercept_cr
&= ~(1U << INTERCEPT_CR8_READ
);
122 c
->intercept_cr
&= ~(1U << INTERCEPT_CR8_WRITE
);
125 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
126 * affect any interrupt we may want to inject; therefore,
127 * interrupt window vmexits are irrelevant to L0.
129 c
->intercept
&= ~(1ULL << INTERCEPT_VINTR
);
132 /* We don't want to see VMMCALLs from a nested guest */
133 c
->intercept
&= ~(1ULL << INTERCEPT_VMMCALL
);
135 c
->intercept_cr
|= g
->intercept_cr
;
136 c
->intercept_dr
|= g
->intercept_dr
;
137 c
->intercept_exceptions
|= g
->intercept_exceptions
;
138 c
->intercept
|= g
->intercept
;
141 static void copy_vmcb_control_area(struct vmcb
*dst_vmcb
, struct vmcb
*from_vmcb
)
143 struct vmcb_control_area
*dst
= &dst_vmcb
->control
;
144 struct vmcb_control_area
*from
= &from_vmcb
->control
;
146 dst
->intercept_cr
= from
->intercept_cr
;
147 dst
->intercept_dr
= from
->intercept_dr
;
148 dst
->intercept_exceptions
= from
->intercept_exceptions
;
149 dst
->intercept
= from
->intercept
;
150 dst
->iopm_base_pa
= from
->iopm_base_pa
;
151 dst
->msrpm_base_pa
= from
->msrpm_base_pa
;
152 dst
->tsc_offset
= from
->tsc_offset
;
153 dst
->asid
= from
->asid
;
154 dst
->tlb_ctl
= from
->tlb_ctl
;
155 dst
->int_ctl
= from
->int_ctl
;
156 dst
->int_vector
= from
->int_vector
;
157 dst
->int_state
= from
->int_state
;
158 dst
->exit_code
= from
->exit_code
;
159 dst
->exit_code_hi
= from
->exit_code_hi
;
160 dst
->exit_info_1
= from
->exit_info_1
;
161 dst
->exit_info_2
= from
->exit_info_2
;
162 dst
->exit_int_info
= from
->exit_int_info
;
163 dst
->exit_int_info_err
= from
->exit_int_info_err
;
164 dst
->nested_ctl
= from
->nested_ctl
;
165 dst
->event_inj
= from
->event_inj
;
166 dst
->event_inj_err
= from
->event_inj_err
;
167 dst
->nested_cr3
= from
->nested_cr3
;
168 dst
->virt_ext
= from
->virt_ext
;
169 dst
->pause_filter_count
= from
->pause_filter_count
;
170 dst
->pause_filter_thresh
= from
->pause_filter_thresh
;
173 static bool nested_svm_vmrun_msrpm(struct vcpu_svm
*svm
)
176 * This function merges the msr permission bitmaps of kvm and the
177 * nested vmcb. It is optimized in that it only merges the parts where
178 * the kvm msr permission bitmap may contain zero bits
182 if (!(svm
->nested
.intercept
& (1ULL << INTERCEPT_MSR_PROT
)))
185 for (i
= 0; i
< MSRPM_OFFSETS
; i
++) {
189 if (msrpm_offsets
[i
] == 0xffffffff)
192 p
= msrpm_offsets
[i
];
193 offset
= svm
->nested
.vmcb_msrpm
+ (p
* 4);
195 if (kvm_vcpu_read_guest(&svm
->vcpu
, offset
, &value
, 4))
198 svm
->nested
.msrpm
[p
] = svm
->msrpm
[p
] | value
;
201 svm
->vmcb
->control
.msrpm_base_pa
= __sme_set(__pa(svm
->nested
.msrpm
));
206 static bool nested_vmcb_checks(struct vmcb
*vmcb
)
208 if ((vmcb
->save
.efer
& EFER_SVME
) == 0)
211 if ((vmcb
->control
.intercept
& (1ULL << INTERCEPT_VMRUN
)) == 0)
214 if (vmcb
->control
.asid
== 0)
217 if ((vmcb
->control
.nested_ctl
& SVM_NESTED_CTL_NP_ENABLE
) &&
224 void enter_svm_guest_mode(struct vcpu_svm
*svm
, u64 vmcb_gpa
,
225 struct vmcb
*nested_vmcb
, struct kvm_host_map
*map
)
227 bool evaluate_pending_interrupts
=
228 is_intercept(svm
, INTERCEPT_VINTR
) ||
229 is_intercept(svm
, INTERCEPT_IRET
);
231 if (kvm_get_rflags(&svm
->vcpu
) & X86_EFLAGS_IF
)
232 svm
->vcpu
.arch
.hflags
|= HF_HIF_MASK
;
234 svm
->vcpu
.arch
.hflags
&= ~HF_HIF_MASK
;
236 if (nested_vmcb
->control
.nested_ctl
& SVM_NESTED_CTL_NP_ENABLE
) {
237 svm
->nested
.nested_cr3
= nested_vmcb
->control
.nested_cr3
;
238 nested_svm_init_mmu_context(&svm
->vcpu
);
241 /* Load the nested guest state */
242 svm
->vmcb
->save
.es
= nested_vmcb
->save
.es
;
243 svm
->vmcb
->save
.cs
= nested_vmcb
->save
.cs
;
244 svm
->vmcb
->save
.ss
= nested_vmcb
->save
.ss
;
245 svm
->vmcb
->save
.ds
= nested_vmcb
->save
.ds
;
246 svm
->vmcb
->save
.gdtr
= nested_vmcb
->save
.gdtr
;
247 svm
->vmcb
->save
.idtr
= nested_vmcb
->save
.idtr
;
248 kvm_set_rflags(&svm
->vcpu
, nested_vmcb
->save
.rflags
);
249 svm_set_efer(&svm
->vcpu
, nested_vmcb
->save
.efer
);
250 svm_set_cr0(&svm
->vcpu
, nested_vmcb
->save
.cr0
);
251 svm_set_cr4(&svm
->vcpu
, nested_vmcb
->save
.cr4
);
253 svm
->vmcb
->save
.cr3
= nested_vmcb
->save
.cr3
;
254 svm
->vcpu
.arch
.cr3
= nested_vmcb
->save
.cr3
;
256 (void)kvm_set_cr3(&svm
->vcpu
, nested_vmcb
->save
.cr3
);
258 /* Guest paging mode is active - reset mmu */
259 kvm_mmu_reset_context(&svm
->vcpu
);
261 svm
->vmcb
->save
.cr2
= svm
->vcpu
.arch
.cr2
= nested_vmcb
->save
.cr2
;
262 kvm_rax_write(&svm
->vcpu
, nested_vmcb
->save
.rax
);
263 kvm_rsp_write(&svm
->vcpu
, nested_vmcb
->save
.rsp
);
264 kvm_rip_write(&svm
->vcpu
, nested_vmcb
->save
.rip
);
266 /* In case we don't even reach vcpu_run, the fields are not updated */
267 svm
->vmcb
->save
.rax
= nested_vmcb
->save
.rax
;
268 svm
->vmcb
->save
.rsp
= nested_vmcb
->save
.rsp
;
269 svm
->vmcb
->save
.rip
= nested_vmcb
->save
.rip
;
270 svm
->vmcb
->save
.dr7
= nested_vmcb
->save
.dr7
;
271 svm
->vcpu
.arch
.dr6
= nested_vmcb
->save
.dr6
;
272 svm
->vmcb
->save
.cpl
= nested_vmcb
->save
.cpl
;
274 svm
->nested
.vmcb_msrpm
= nested_vmcb
->control
.msrpm_base_pa
& ~0x0fffULL
;
275 svm
->nested
.vmcb_iopm
= nested_vmcb
->control
.iopm_base_pa
& ~0x0fffULL
;
277 /* cache intercepts */
278 svm
->nested
.intercept_cr
= nested_vmcb
->control
.intercept_cr
;
279 svm
->nested
.intercept_dr
= nested_vmcb
->control
.intercept_dr
;
280 svm
->nested
.intercept_exceptions
= nested_vmcb
->control
.intercept_exceptions
;
281 svm
->nested
.intercept
= nested_vmcb
->control
.intercept
;
283 svm_flush_tlb(&svm
->vcpu
, true);
284 svm
->vmcb
->control
.int_ctl
= nested_vmcb
->control
.int_ctl
| V_INTR_MASKING_MASK
;
285 if (nested_vmcb
->control
.int_ctl
& V_INTR_MASKING_MASK
)
286 svm
->vcpu
.arch
.hflags
|= HF_VINTR_MASK
;
288 svm
->vcpu
.arch
.hflags
&= ~HF_VINTR_MASK
;
290 svm
->vcpu
.arch
.tsc_offset
+= nested_vmcb
->control
.tsc_offset
;
291 svm
->vmcb
->control
.tsc_offset
= svm
->vcpu
.arch
.tsc_offset
;
293 svm
->vmcb
->control
.virt_ext
= nested_vmcb
->control
.virt_ext
;
294 svm
->vmcb
->control
.int_vector
= nested_vmcb
->control
.int_vector
;
295 svm
->vmcb
->control
.int_state
= nested_vmcb
->control
.int_state
;
296 svm
->vmcb
->control
.event_inj
= nested_vmcb
->control
.event_inj
;
297 svm
->vmcb
->control
.event_inj_err
= nested_vmcb
->control
.event_inj_err
;
299 svm
->vmcb
->control
.pause_filter_count
=
300 nested_vmcb
->control
.pause_filter_count
;
301 svm
->vmcb
->control
.pause_filter_thresh
=
302 nested_vmcb
->control
.pause_filter_thresh
;
304 kvm_vcpu_unmap(&svm
->vcpu
, map
, true);
306 /* Enter Guest-Mode */
307 enter_guest_mode(&svm
->vcpu
);
310 * Merge guest and host intercepts - must be called with vcpu in
311 * guest-mode to take affect here
313 recalc_intercepts(svm
);
315 svm
->nested
.vmcb
= vmcb_gpa
;
318 * If L1 had a pending IRQ/NMI before executing VMRUN,
319 * which wasn't delivered because it was disallowed (e.g.
320 * interrupts disabled), L0 needs to evaluate if this pending
321 * event should cause an exit from L2 to L1 or be delivered
324 * Usually this would be handled by the processor noticing an
325 * IRQ/NMI window request. However, VMRUN can unblock interrupts
326 * by implicitly setting GIF, so force L0 to perform pending event
327 * evaluation by requesting a KVM_REQ_EVENT.
330 if (unlikely(evaluate_pending_interrupts
))
331 kvm_make_request(KVM_REQ_EVENT
, &svm
->vcpu
);
333 mark_all_dirty(svm
->vmcb
);
336 int nested_svm_vmrun(struct vcpu_svm
*svm
)
339 struct vmcb
*nested_vmcb
;
340 struct vmcb
*hsave
= svm
->nested
.hsave
;
341 struct vmcb
*vmcb
= svm
->vmcb
;
342 struct kvm_host_map map
;
345 vmcb_gpa
= svm
->vmcb
->save
.rax
;
347 ret
= kvm_vcpu_map(&svm
->vcpu
, gpa_to_gfn(vmcb_gpa
), &map
);
348 if (ret
== -EINVAL
) {
349 kvm_inject_gp(&svm
->vcpu
, 0);
352 return kvm_skip_emulated_instruction(&svm
->vcpu
);
355 ret
= kvm_skip_emulated_instruction(&svm
->vcpu
);
357 nested_vmcb
= map
.hva
;
359 if (!nested_vmcb_checks(nested_vmcb
)) {
360 nested_vmcb
->control
.exit_code
= SVM_EXIT_ERR
;
361 nested_vmcb
->control
.exit_code_hi
= 0;
362 nested_vmcb
->control
.exit_info_1
= 0;
363 nested_vmcb
->control
.exit_info_2
= 0;
365 kvm_vcpu_unmap(&svm
->vcpu
, &map
, true);
370 trace_kvm_nested_vmrun(svm
->vmcb
->save
.rip
, vmcb_gpa
,
371 nested_vmcb
->save
.rip
,
372 nested_vmcb
->control
.int_ctl
,
373 nested_vmcb
->control
.event_inj
,
374 nested_vmcb
->control
.nested_ctl
);
376 trace_kvm_nested_intercepts(nested_vmcb
->control
.intercept_cr
& 0xffff,
377 nested_vmcb
->control
.intercept_cr
>> 16,
378 nested_vmcb
->control
.intercept_exceptions
,
379 nested_vmcb
->control
.intercept
);
381 /* Clear internal status */
382 kvm_clear_exception_queue(&svm
->vcpu
);
383 kvm_clear_interrupt_queue(&svm
->vcpu
);
386 * Save the old vmcb, so we don't need to pick what we save, but can
387 * restore everything when a VMEXIT occurs
389 hsave
->save
.es
= vmcb
->save
.es
;
390 hsave
->save
.cs
= vmcb
->save
.cs
;
391 hsave
->save
.ss
= vmcb
->save
.ss
;
392 hsave
->save
.ds
= vmcb
->save
.ds
;
393 hsave
->save
.gdtr
= vmcb
->save
.gdtr
;
394 hsave
->save
.idtr
= vmcb
->save
.idtr
;
395 hsave
->save
.efer
= svm
->vcpu
.arch
.efer
;
396 hsave
->save
.cr0
= kvm_read_cr0(&svm
->vcpu
);
397 hsave
->save
.cr4
= svm
->vcpu
.arch
.cr4
;
398 hsave
->save
.rflags
= kvm_get_rflags(&svm
->vcpu
);
399 hsave
->save
.rip
= kvm_rip_read(&svm
->vcpu
);
400 hsave
->save
.rsp
= vmcb
->save
.rsp
;
401 hsave
->save
.rax
= vmcb
->save
.rax
;
403 hsave
->save
.cr3
= vmcb
->save
.cr3
;
405 hsave
->save
.cr3
= kvm_read_cr3(&svm
->vcpu
);
407 copy_vmcb_control_area(hsave
, vmcb
);
409 enter_svm_guest_mode(svm
, vmcb_gpa
, nested_vmcb
, &map
);
411 if (!nested_svm_vmrun_msrpm(svm
)) {
412 svm
->vmcb
->control
.exit_code
= SVM_EXIT_ERR
;
413 svm
->vmcb
->control
.exit_code_hi
= 0;
414 svm
->vmcb
->control
.exit_info_1
= 0;
415 svm
->vmcb
->control
.exit_info_2
= 0;
417 nested_svm_vmexit(svm
);
423 void nested_svm_vmloadsave(struct vmcb
*from_vmcb
, struct vmcb
*to_vmcb
)
425 to_vmcb
->save
.fs
= from_vmcb
->save
.fs
;
426 to_vmcb
->save
.gs
= from_vmcb
->save
.gs
;
427 to_vmcb
->save
.tr
= from_vmcb
->save
.tr
;
428 to_vmcb
->save
.ldtr
= from_vmcb
->save
.ldtr
;
429 to_vmcb
->save
.kernel_gs_base
= from_vmcb
->save
.kernel_gs_base
;
430 to_vmcb
->save
.star
= from_vmcb
->save
.star
;
431 to_vmcb
->save
.lstar
= from_vmcb
->save
.lstar
;
432 to_vmcb
->save
.cstar
= from_vmcb
->save
.cstar
;
433 to_vmcb
->save
.sfmask
= from_vmcb
->save
.sfmask
;
434 to_vmcb
->save
.sysenter_cs
= from_vmcb
->save
.sysenter_cs
;
435 to_vmcb
->save
.sysenter_esp
= from_vmcb
->save
.sysenter_esp
;
436 to_vmcb
->save
.sysenter_eip
= from_vmcb
->save
.sysenter_eip
;
439 int nested_svm_vmexit(struct vcpu_svm
*svm
)
442 struct vmcb
*nested_vmcb
;
443 struct vmcb
*hsave
= svm
->nested
.hsave
;
444 struct vmcb
*vmcb
= svm
->vmcb
;
445 struct kvm_host_map map
;
447 trace_kvm_nested_vmexit_inject(vmcb
->control
.exit_code
,
448 vmcb
->control
.exit_info_1
,
449 vmcb
->control
.exit_info_2
,
450 vmcb
->control
.exit_int_info
,
451 vmcb
->control
.exit_int_info_err
,
454 rc
= kvm_vcpu_map(&svm
->vcpu
, gpa_to_gfn(svm
->nested
.vmcb
), &map
);
457 kvm_inject_gp(&svm
->vcpu
, 0);
461 nested_vmcb
= map
.hva
;
463 /* Exit Guest-Mode */
464 leave_guest_mode(&svm
->vcpu
);
465 svm
->nested
.vmcb
= 0;
467 /* Give the current vmcb to the guest */
470 nested_vmcb
->save
.es
= vmcb
->save
.es
;
471 nested_vmcb
->save
.cs
= vmcb
->save
.cs
;
472 nested_vmcb
->save
.ss
= vmcb
->save
.ss
;
473 nested_vmcb
->save
.ds
= vmcb
->save
.ds
;
474 nested_vmcb
->save
.gdtr
= vmcb
->save
.gdtr
;
475 nested_vmcb
->save
.idtr
= vmcb
->save
.idtr
;
476 nested_vmcb
->save
.efer
= svm
->vcpu
.arch
.efer
;
477 nested_vmcb
->save
.cr0
= kvm_read_cr0(&svm
->vcpu
);
478 nested_vmcb
->save
.cr3
= kvm_read_cr3(&svm
->vcpu
);
479 nested_vmcb
->save
.cr2
= vmcb
->save
.cr2
;
480 nested_vmcb
->save
.cr4
= svm
->vcpu
.arch
.cr4
;
481 nested_vmcb
->save
.rflags
= kvm_get_rflags(&svm
->vcpu
);
482 nested_vmcb
->save
.rip
= vmcb
->save
.rip
;
483 nested_vmcb
->save
.rsp
= vmcb
->save
.rsp
;
484 nested_vmcb
->save
.rax
= vmcb
->save
.rax
;
485 nested_vmcb
->save
.dr7
= vmcb
->save
.dr7
;
486 nested_vmcb
->save
.dr6
= svm
->vcpu
.arch
.dr6
;
487 nested_vmcb
->save
.cpl
= vmcb
->save
.cpl
;
489 nested_vmcb
->control
.int_ctl
= vmcb
->control
.int_ctl
;
490 nested_vmcb
->control
.int_vector
= vmcb
->control
.int_vector
;
491 nested_vmcb
->control
.int_state
= vmcb
->control
.int_state
;
492 nested_vmcb
->control
.exit_code
= vmcb
->control
.exit_code
;
493 nested_vmcb
->control
.exit_code_hi
= vmcb
->control
.exit_code_hi
;
494 nested_vmcb
->control
.exit_info_1
= vmcb
->control
.exit_info_1
;
495 nested_vmcb
->control
.exit_info_2
= vmcb
->control
.exit_info_2
;
496 nested_vmcb
->control
.exit_int_info
= vmcb
->control
.exit_int_info
;
497 nested_vmcb
->control
.exit_int_info_err
= vmcb
->control
.exit_int_info_err
;
499 if (svm
->nrips_enabled
)
500 nested_vmcb
->control
.next_rip
= vmcb
->control
.next_rip
;
503 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
504 * to make sure that we do not lose injected events. So check event_inj
505 * here and copy it to exit_int_info if it is valid.
506 * Exit_int_info and event_inj can't be both valid because the case
507 * below only happens on a VMRUN instruction intercept which has
508 * no valid exit_int_info set.
510 if (vmcb
->control
.event_inj
& SVM_EVTINJ_VALID
) {
511 struct vmcb_control_area
*nc
= &nested_vmcb
->control
;
513 nc
->exit_int_info
= vmcb
->control
.event_inj
;
514 nc
->exit_int_info_err
= vmcb
->control
.event_inj_err
;
517 nested_vmcb
->control
.tlb_ctl
= 0;
518 nested_vmcb
->control
.event_inj
= 0;
519 nested_vmcb
->control
.event_inj_err
= 0;
521 nested_vmcb
->control
.pause_filter_count
=
522 svm
->vmcb
->control
.pause_filter_count
;
523 nested_vmcb
->control
.pause_filter_thresh
=
524 svm
->vmcb
->control
.pause_filter_thresh
;
526 /* We always set V_INTR_MASKING and remember the old value in hflags */
527 if (!(svm
->vcpu
.arch
.hflags
& HF_VINTR_MASK
))
528 nested_vmcb
->control
.int_ctl
&= ~V_INTR_MASKING_MASK
;
530 /* Restore the original control entries */
531 copy_vmcb_control_area(vmcb
, hsave
);
533 svm
->vcpu
.arch
.tsc_offset
= svm
->vmcb
->control
.tsc_offset
;
534 kvm_clear_exception_queue(&svm
->vcpu
);
535 kvm_clear_interrupt_queue(&svm
->vcpu
);
537 svm
->nested
.nested_cr3
= 0;
539 /* Restore selected save entries */
540 svm
->vmcb
->save
.es
= hsave
->save
.es
;
541 svm
->vmcb
->save
.cs
= hsave
->save
.cs
;
542 svm
->vmcb
->save
.ss
= hsave
->save
.ss
;
543 svm
->vmcb
->save
.ds
= hsave
->save
.ds
;
544 svm
->vmcb
->save
.gdtr
= hsave
->save
.gdtr
;
545 svm
->vmcb
->save
.idtr
= hsave
->save
.idtr
;
546 kvm_set_rflags(&svm
->vcpu
, hsave
->save
.rflags
);
547 svm_set_efer(&svm
->vcpu
, hsave
->save
.efer
);
548 svm_set_cr0(&svm
->vcpu
, hsave
->save
.cr0
| X86_CR0_PE
);
549 svm_set_cr4(&svm
->vcpu
, hsave
->save
.cr4
);
551 svm
->vmcb
->save
.cr3
= hsave
->save
.cr3
;
552 svm
->vcpu
.arch
.cr3
= hsave
->save
.cr3
;
554 (void)kvm_set_cr3(&svm
->vcpu
, hsave
->save
.cr3
);
556 kvm_rax_write(&svm
->vcpu
, hsave
->save
.rax
);
557 kvm_rsp_write(&svm
->vcpu
, hsave
->save
.rsp
);
558 kvm_rip_write(&svm
->vcpu
, hsave
->save
.rip
);
559 svm
->vmcb
->save
.dr7
= 0;
560 svm
->vmcb
->save
.cpl
= 0;
561 svm
->vmcb
->control
.exit_int_info
= 0;
563 mark_all_dirty(svm
->vmcb
);
565 kvm_vcpu_unmap(&svm
->vcpu
, &map
, true);
567 nested_svm_uninit_mmu_context(&svm
->vcpu
);
568 kvm_mmu_reset_context(&svm
->vcpu
);
569 kvm_mmu_load(&svm
->vcpu
);
572 * Drop what we picked up for L2 via svm_complete_interrupts() so it
573 * doesn't end up in L1.
575 svm
->vcpu
.arch
.nmi_injected
= false;
576 kvm_clear_exception_queue(&svm
->vcpu
);
577 kvm_clear_interrupt_queue(&svm
->vcpu
);
582 static int nested_svm_exit_handled_msr(struct vcpu_svm
*svm
)
584 u32 offset
, msr
, value
;
587 if (!(svm
->nested
.intercept
& (1ULL << INTERCEPT_MSR_PROT
)))
588 return NESTED_EXIT_HOST
;
590 msr
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RCX
];
591 offset
= svm_msrpm_offset(msr
);
592 write
= svm
->vmcb
->control
.exit_info_1
& 1;
593 mask
= 1 << ((2 * (msr
& 0xf)) + write
);
595 if (offset
== MSR_INVALID
)
596 return NESTED_EXIT_DONE
;
598 /* Offset is in 32 bit units but need in 8 bit units */
601 if (kvm_vcpu_read_guest(&svm
->vcpu
, svm
->nested
.vmcb_msrpm
+ offset
, &value
, 4))
602 return NESTED_EXIT_DONE
;
604 return (value
& mask
) ? NESTED_EXIT_DONE
: NESTED_EXIT_HOST
;
607 /* DB exceptions for our internal use must not cause vmexit */
608 static int nested_svm_intercept_db(struct vcpu_svm
*svm
)
610 unsigned long dr6
= svm
->vmcb
->save
.dr6
;
612 /* Always catch it and pass it to userspace if debugging. */
613 if (svm
->vcpu
.guest_debug
&
614 (KVM_GUESTDBG_SINGLESTEP
| KVM_GUESTDBG_USE_HW_BP
))
615 return NESTED_EXIT_HOST
;
617 /* if we're not singlestepping, it's not ours */
618 if (!svm
->nmi_singlestep
)
621 /* if it's not a singlestep exception, it's not ours */
625 /* if the guest is singlestepping, it should get the vmexit */
626 if (svm
->nmi_singlestep_guest_rflags
& X86_EFLAGS_TF
) {
627 disable_nmi_singlestep(svm
);
631 /* it's ours, the nested hypervisor must not see this one */
632 return NESTED_EXIT_HOST
;
636 * Synchronize guest DR6 here just like in kvm_deliver_exception_payload;
637 * it will be moved into the nested VMCB by nested_svm_vmexit. Once
638 * exceptions will be moved to svm_check_nested_events, all this stuff
639 * will just go away and we could just return NESTED_EXIT_HOST
640 * unconditionally. db_interception will queue the exception, which
641 * will be processed by svm_check_nested_events if a nested vmexit is
642 * required, and we will just use kvm_deliver_exception_payload to copy
643 * the payload to DR6 before vmexit.
645 WARN_ON(svm
->vcpu
.arch
.switch_db_regs
& KVM_DEBUGREG_WONT_EXIT
);
646 svm
->vcpu
.arch
.dr6
&= ~(DR_TRAP_BITS
| DR6_RTM
);
647 svm
->vcpu
.arch
.dr6
|= dr6
& ~DR6_FIXED_1
;
648 return NESTED_EXIT_DONE
;
651 static int nested_svm_intercept_ioio(struct vcpu_svm
*svm
)
653 unsigned port
, size
, iopm_len
;
658 if (!(svm
->nested
.intercept
& (1ULL << INTERCEPT_IOIO_PROT
)))
659 return NESTED_EXIT_HOST
;
661 port
= svm
->vmcb
->control
.exit_info_1
>> 16;
662 size
= (svm
->vmcb
->control
.exit_info_1
& SVM_IOIO_SIZE_MASK
) >>
664 gpa
= svm
->nested
.vmcb_iopm
+ (port
/ 8);
665 start_bit
= port
% 8;
666 iopm_len
= (start_bit
+ size
> 8) ? 2 : 1;
667 mask
= (0xf >> (4 - size
)) << start_bit
;
670 if (kvm_vcpu_read_guest(&svm
->vcpu
, gpa
, &val
, iopm_len
))
671 return NESTED_EXIT_DONE
;
673 return (val
& mask
) ? NESTED_EXIT_DONE
: NESTED_EXIT_HOST
;
676 static int nested_svm_intercept(struct vcpu_svm
*svm
)
678 u32 exit_code
= svm
->vmcb
->control
.exit_code
;
679 int vmexit
= NESTED_EXIT_HOST
;
683 vmexit
= nested_svm_exit_handled_msr(svm
);
686 vmexit
= nested_svm_intercept_ioio(svm
);
688 case SVM_EXIT_READ_CR0
... SVM_EXIT_WRITE_CR8
: {
689 u32 bit
= 1U << (exit_code
- SVM_EXIT_READ_CR0
);
690 if (svm
->nested
.intercept_cr
& bit
)
691 vmexit
= NESTED_EXIT_DONE
;
694 case SVM_EXIT_READ_DR0
... SVM_EXIT_WRITE_DR7
: {
695 u32 bit
= 1U << (exit_code
- SVM_EXIT_READ_DR0
);
696 if (svm
->nested
.intercept_dr
& bit
)
697 vmexit
= NESTED_EXIT_DONE
;
700 case SVM_EXIT_EXCP_BASE
... SVM_EXIT_EXCP_BASE
+ 0x1f: {
701 u32 excp_bits
= 1 << (exit_code
- SVM_EXIT_EXCP_BASE
);
702 if (svm
->nested
.intercept_exceptions
& excp_bits
) {
703 if (exit_code
== SVM_EXIT_EXCP_BASE
+ DB_VECTOR
)
704 vmexit
= nested_svm_intercept_db(svm
);
705 else if (exit_code
== SVM_EXIT_EXCP_BASE
+ BP_VECTOR
&&
706 svm
->vcpu
.guest_debug
& KVM_GUESTDBG_USE_SW_BP
)
707 vmexit
= NESTED_EXIT_HOST
;
709 vmexit
= NESTED_EXIT_DONE
;
711 /* async page fault always cause vmexit */
712 else if ((exit_code
== SVM_EXIT_EXCP_BASE
+ PF_VECTOR
) &&
713 svm
->vcpu
.arch
.exception
.nested_apf
!= 0)
714 vmexit
= NESTED_EXIT_DONE
;
718 vmexit
= NESTED_EXIT_DONE
;
722 u64 exit_bits
= 1ULL << (exit_code
- SVM_EXIT_INTR
);
723 if (svm
->nested
.intercept
& exit_bits
)
724 vmexit
= NESTED_EXIT_DONE
;
731 int nested_svm_exit_handled(struct vcpu_svm
*svm
)
735 vmexit
= nested_svm_intercept(svm
);
737 if (vmexit
== NESTED_EXIT_DONE
)
738 nested_svm_vmexit(svm
);
743 int nested_svm_check_permissions(struct vcpu_svm
*svm
)
745 if (!(svm
->vcpu
.arch
.efer
& EFER_SVME
) ||
746 !is_paging(&svm
->vcpu
)) {
747 kvm_queue_exception(&svm
->vcpu
, UD_VECTOR
);
751 if (svm
->vmcb
->save
.cpl
) {
752 kvm_inject_gp(&svm
->vcpu
, 0);
759 int nested_svm_check_exception(struct vcpu_svm
*svm
, unsigned nr
,
760 bool has_error_code
, u32 error_code
)
764 if (!is_guest_mode(&svm
->vcpu
))
767 vmexit
= nested_svm_intercept(svm
);
768 if (vmexit
!= NESTED_EXIT_DONE
)
771 svm
->vmcb
->control
.exit_code
= SVM_EXIT_EXCP_BASE
+ nr
;
772 svm
->vmcb
->control
.exit_code_hi
= 0;
773 svm
->vmcb
->control
.exit_info_1
= error_code
;
776 * EXITINFO2 is undefined for all exception intercepts other
779 if (svm
->vcpu
.arch
.exception
.nested_apf
)
780 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.apf
.nested_apf_token
;
781 else if (svm
->vcpu
.arch
.exception
.has_payload
)
782 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.exception
.payload
;
784 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.cr2
;
786 svm
->nested
.exit_required
= true;
790 static void nested_svm_intr(struct vcpu_svm
*svm
)
792 svm
->vmcb
->control
.exit_code
= SVM_EXIT_INTR
;
793 svm
->vmcb
->control
.exit_info_1
= 0;
794 svm
->vmcb
->control
.exit_info_2
= 0;
796 /* nested_svm_vmexit this gets called afterwards from handle_exit */
797 svm
->nested
.exit_required
= true;
798 trace_kvm_nested_intr_vmexit(svm
->vmcb
->save
.rip
);
801 static bool nested_exit_on_intr(struct vcpu_svm
*svm
)
803 return (svm
->nested
.intercept
& 1ULL);
806 int svm_check_nested_events(struct kvm_vcpu
*vcpu
)
808 struct vcpu_svm
*svm
= to_svm(vcpu
);
809 bool block_nested_events
=
810 kvm_event_needs_reinjection(vcpu
) || svm
->nested
.exit_required
;
812 if (kvm_cpu_has_interrupt(vcpu
) && nested_exit_on_intr(svm
)) {
813 if (block_nested_events
)
815 nested_svm_intr(svm
);
822 int nested_svm_exit_special(struct vcpu_svm
*svm
)
824 u32 exit_code
= svm
->vmcb
->control
.exit_code
;
829 case SVM_EXIT_EXCP_BASE
+ MC_VECTOR
:
830 return NESTED_EXIT_HOST
;
832 /* For now we are always handling NPFs when using them */
834 return NESTED_EXIT_HOST
;
836 case SVM_EXIT_EXCP_BASE
+ PF_VECTOR
:
837 /* When we're shadowing, trap PFs, but not async PF */
838 if (!npt_enabled
&& svm
->vcpu
.arch
.apf
.host_apf_reason
== 0)
839 return NESTED_EXIT_HOST
;
845 return NESTED_EXIT_CONTINUE
;