1 // SPDX-License-Identifier: GPL-2.0-only
3 * Kernel-based Virtual Machine driver for Linux
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
11 * Yaniv Kamay <yaniv@qumranet.com>
12 * Avi Kivity <avi@qumranet.com>
15 #define pr_fmt(fmt) "SVM: " fmt
17 #include <linux/kvm_types.h>
18 #include <linux/kvm_host.h>
19 #include <linux/kernel.h>
21 #include <asm/msr-index.h>
23 #include "kvm_emulate.h"
29 static void nested_svm_inject_npf_exit(struct kvm_vcpu
*vcpu
,
30 struct x86_exception
*fault
)
32 struct vcpu_svm
*svm
= to_svm(vcpu
);
34 if (svm
->vmcb
->control
.exit_code
!= SVM_EXIT_NPF
) {
36 * TODO: track the cause of the nested page fault, and
37 * correctly fill in the high bits of exit_info_1.
39 svm
->vmcb
->control
.exit_code
= SVM_EXIT_NPF
;
40 svm
->vmcb
->control
.exit_code_hi
= 0;
41 svm
->vmcb
->control
.exit_info_1
= (1ULL << 32);
42 svm
->vmcb
->control
.exit_info_2
= fault
->address
;
45 svm
->vmcb
->control
.exit_info_1
&= ~0xffffffffULL
;
46 svm
->vmcb
->control
.exit_info_1
|= fault
->error_code
;
49 * The present bit is always zero for page structure faults on real
52 if (svm
->vmcb
->control
.exit_info_1
& (2ULL << 32))
53 svm
->vmcb
->control
.exit_info_1
&= ~1;
55 nested_svm_vmexit(svm
);
58 static u64
nested_svm_get_tdp_pdptr(struct kvm_vcpu
*vcpu
, int index
)
60 struct vcpu_svm
*svm
= to_svm(vcpu
);
61 u64 cr3
= svm
->nested
.nested_cr3
;
65 ret
= kvm_vcpu_read_guest_page(vcpu
, gpa_to_gfn(__sme_clr(cr3
)), &pdpte
,
66 offset_in_page(cr3
) + index
* 8, 8);
72 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu
*vcpu
)
74 struct vcpu_svm
*svm
= to_svm(vcpu
);
76 return svm
->nested
.nested_cr3
;
79 static void nested_svm_init_mmu_context(struct kvm_vcpu
*vcpu
)
81 WARN_ON(mmu_is_nested(vcpu
));
83 vcpu
->arch
.mmu
= &vcpu
->arch
.guest_mmu
;
84 kvm_init_shadow_mmu(vcpu
);
85 vcpu
->arch
.mmu
->get_guest_pgd
= nested_svm_get_tdp_cr3
;
86 vcpu
->arch
.mmu
->get_pdptr
= nested_svm_get_tdp_pdptr
;
87 vcpu
->arch
.mmu
->inject_page_fault
= nested_svm_inject_npf_exit
;
88 vcpu
->arch
.mmu
->shadow_root_level
= kvm_x86_ops
.get_tdp_level(vcpu
);
89 reset_shadow_zero_bits_mask(vcpu
, vcpu
->arch
.mmu
);
90 vcpu
->arch
.walk_mmu
= &vcpu
->arch
.nested_mmu
;
93 static void nested_svm_uninit_mmu_context(struct kvm_vcpu
*vcpu
)
95 vcpu
->arch
.mmu
= &vcpu
->arch
.root_mmu
;
96 vcpu
->arch
.walk_mmu
= &vcpu
->arch
.root_mmu
;
99 void recalc_intercepts(struct vcpu_svm
*svm
)
101 struct vmcb_control_area
*c
, *h
;
102 struct nested_state
*g
;
104 mark_dirty(svm
->vmcb
, VMCB_INTERCEPTS
);
106 if (!is_guest_mode(&svm
->vcpu
))
109 c
= &svm
->vmcb
->control
;
110 h
= &svm
->nested
.hsave
->control
;
113 c
->intercept_cr
= h
->intercept_cr
;
114 c
->intercept_dr
= h
->intercept_dr
;
115 c
->intercept_exceptions
= h
->intercept_exceptions
;
116 c
->intercept
= h
->intercept
;
118 if (svm
->vcpu
.arch
.hflags
& HF_VINTR_MASK
) {
119 /* We only want the cr8 intercept bits of L1 */
120 c
->intercept_cr
&= ~(1U << INTERCEPT_CR8_READ
);
121 c
->intercept_cr
&= ~(1U << INTERCEPT_CR8_WRITE
);
124 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
125 * affect any interrupt we may want to inject; therefore,
126 * interrupt window vmexits are irrelevant to L0.
128 c
->intercept
&= ~(1ULL << INTERCEPT_VINTR
);
131 /* We don't want to see VMMCALLs from a nested guest */
132 c
->intercept
&= ~(1ULL << INTERCEPT_VMMCALL
);
134 c
->intercept_cr
|= g
->intercept_cr
;
135 c
->intercept_dr
|= g
->intercept_dr
;
136 c
->intercept_exceptions
|= g
->intercept_exceptions
;
137 c
->intercept
|= g
->intercept
;
140 static void copy_vmcb_control_area(struct vmcb
*dst_vmcb
, struct vmcb
*from_vmcb
)
142 struct vmcb_control_area
*dst
= &dst_vmcb
->control
;
143 struct vmcb_control_area
*from
= &from_vmcb
->control
;
145 dst
->intercept_cr
= from
->intercept_cr
;
146 dst
->intercept_dr
= from
->intercept_dr
;
147 dst
->intercept_exceptions
= from
->intercept_exceptions
;
148 dst
->intercept
= from
->intercept
;
149 dst
->iopm_base_pa
= from
->iopm_base_pa
;
150 dst
->msrpm_base_pa
= from
->msrpm_base_pa
;
151 dst
->tsc_offset
= from
->tsc_offset
;
152 dst
->asid
= from
->asid
;
153 dst
->tlb_ctl
= from
->tlb_ctl
;
154 dst
->int_ctl
= from
->int_ctl
;
155 dst
->int_vector
= from
->int_vector
;
156 dst
->int_state
= from
->int_state
;
157 dst
->exit_code
= from
->exit_code
;
158 dst
->exit_code_hi
= from
->exit_code_hi
;
159 dst
->exit_info_1
= from
->exit_info_1
;
160 dst
->exit_info_2
= from
->exit_info_2
;
161 dst
->exit_int_info
= from
->exit_int_info
;
162 dst
->exit_int_info_err
= from
->exit_int_info_err
;
163 dst
->nested_ctl
= from
->nested_ctl
;
164 dst
->event_inj
= from
->event_inj
;
165 dst
->event_inj_err
= from
->event_inj_err
;
166 dst
->nested_cr3
= from
->nested_cr3
;
167 dst
->virt_ext
= from
->virt_ext
;
168 dst
->pause_filter_count
= from
->pause_filter_count
;
169 dst
->pause_filter_thresh
= from
->pause_filter_thresh
;
172 static bool nested_svm_vmrun_msrpm(struct vcpu_svm
*svm
)
175 * This function merges the msr permission bitmaps of kvm and the
176 * nested vmcb. It is optimized in that it only merges the parts where
177 * the kvm msr permission bitmap may contain zero bits
181 if (!(svm
->nested
.intercept
& (1ULL << INTERCEPT_MSR_PROT
)))
184 for (i
= 0; i
< MSRPM_OFFSETS
; i
++) {
188 if (msrpm_offsets
[i
] == 0xffffffff)
191 p
= msrpm_offsets
[i
];
192 offset
= svm
->nested
.vmcb_msrpm
+ (p
* 4);
194 if (kvm_vcpu_read_guest(&svm
->vcpu
, offset
, &value
, 4))
197 svm
->nested
.msrpm
[p
] = svm
->msrpm
[p
] | value
;
200 svm
->vmcb
->control
.msrpm_base_pa
= __sme_set(__pa(svm
->nested
.msrpm
));
205 static bool nested_vmcb_checks(struct vmcb
*vmcb
)
207 if ((vmcb
->save
.efer
& EFER_SVME
) == 0)
210 if ((vmcb
->control
.intercept
& (1ULL << INTERCEPT_VMRUN
)) == 0)
213 if (vmcb
->control
.asid
== 0)
216 if ((vmcb
->control
.nested_ctl
& SVM_NESTED_CTL_NP_ENABLE
) &&
223 void enter_svm_guest_mode(struct vcpu_svm
*svm
, u64 vmcb_gpa
,
224 struct vmcb
*nested_vmcb
, struct kvm_host_map
*map
)
226 bool evaluate_pending_interrupts
=
227 is_intercept(svm
, INTERCEPT_VINTR
) ||
228 is_intercept(svm
, INTERCEPT_IRET
);
230 if (kvm_get_rflags(&svm
->vcpu
) & X86_EFLAGS_IF
)
231 svm
->vcpu
.arch
.hflags
|= HF_HIF_MASK
;
233 svm
->vcpu
.arch
.hflags
&= ~HF_HIF_MASK
;
235 if (nested_vmcb
->control
.nested_ctl
& SVM_NESTED_CTL_NP_ENABLE
) {
236 svm
->nested
.nested_cr3
= nested_vmcb
->control
.nested_cr3
;
237 nested_svm_init_mmu_context(&svm
->vcpu
);
240 /* Load the nested guest state */
241 svm
->vmcb
->save
.es
= nested_vmcb
->save
.es
;
242 svm
->vmcb
->save
.cs
= nested_vmcb
->save
.cs
;
243 svm
->vmcb
->save
.ss
= nested_vmcb
->save
.ss
;
244 svm
->vmcb
->save
.ds
= nested_vmcb
->save
.ds
;
245 svm
->vmcb
->save
.gdtr
= nested_vmcb
->save
.gdtr
;
246 svm
->vmcb
->save
.idtr
= nested_vmcb
->save
.idtr
;
247 kvm_set_rflags(&svm
->vcpu
, nested_vmcb
->save
.rflags
);
248 svm_set_efer(&svm
->vcpu
, nested_vmcb
->save
.efer
);
249 svm_set_cr0(&svm
->vcpu
, nested_vmcb
->save
.cr0
);
250 svm_set_cr4(&svm
->vcpu
, nested_vmcb
->save
.cr4
);
252 svm
->vmcb
->save
.cr3
= nested_vmcb
->save
.cr3
;
253 svm
->vcpu
.arch
.cr3
= nested_vmcb
->save
.cr3
;
255 (void)kvm_set_cr3(&svm
->vcpu
, nested_vmcb
->save
.cr3
);
257 /* Guest paging mode is active - reset mmu */
258 kvm_mmu_reset_context(&svm
->vcpu
);
260 svm
->vmcb
->save
.cr2
= svm
->vcpu
.arch
.cr2
= nested_vmcb
->save
.cr2
;
261 kvm_rax_write(&svm
->vcpu
, nested_vmcb
->save
.rax
);
262 kvm_rsp_write(&svm
->vcpu
, nested_vmcb
->save
.rsp
);
263 kvm_rip_write(&svm
->vcpu
, nested_vmcb
->save
.rip
);
265 /* In case we don't even reach vcpu_run, the fields are not updated */
266 svm
->vmcb
->save
.rax
= nested_vmcb
->save
.rax
;
267 svm
->vmcb
->save
.rsp
= nested_vmcb
->save
.rsp
;
268 svm
->vmcb
->save
.rip
= nested_vmcb
->save
.rip
;
269 svm
->vmcb
->save
.dr7
= nested_vmcb
->save
.dr7
;
270 svm
->vmcb
->save
.dr6
= nested_vmcb
->save
.dr6
;
271 svm
->vmcb
->save
.cpl
= nested_vmcb
->save
.cpl
;
273 svm
->nested
.vmcb_msrpm
= nested_vmcb
->control
.msrpm_base_pa
& ~0x0fffULL
;
274 svm
->nested
.vmcb_iopm
= nested_vmcb
->control
.iopm_base_pa
& ~0x0fffULL
;
276 /* cache intercepts */
277 svm
->nested
.intercept_cr
= nested_vmcb
->control
.intercept_cr
;
278 svm
->nested
.intercept_dr
= nested_vmcb
->control
.intercept_dr
;
279 svm
->nested
.intercept_exceptions
= nested_vmcb
->control
.intercept_exceptions
;
280 svm
->nested
.intercept
= nested_vmcb
->control
.intercept
;
282 svm_flush_tlb(&svm
->vcpu
, true);
283 svm
->vmcb
->control
.int_ctl
= nested_vmcb
->control
.int_ctl
| V_INTR_MASKING_MASK
;
284 if (nested_vmcb
->control
.int_ctl
& V_INTR_MASKING_MASK
)
285 svm
->vcpu
.arch
.hflags
|= HF_VINTR_MASK
;
287 svm
->vcpu
.arch
.hflags
&= ~HF_VINTR_MASK
;
289 svm
->vcpu
.arch
.tsc_offset
+= nested_vmcb
->control
.tsc_offset
;
290 svm
->vmcb
->control
.tsc_offset
= svm
->vcpu
.arch
.tsc_offset
;
292 svm
->vmcb
->control
.virt_ext
= nested_vmcb
->control
.virt_ext
;
293 svm
->vmcb
->control
.int_vector
= nested_vmcb
->control
.int_vector
;
294 svm
->vmcb
->control
.int_state
= nested_vmcb
->control
.int_state
;
295 svm
->vmcb
->control
.event_inj
= nested_vmcb
->control
.event_inj
;
296 svm
->vmcb
->control
.event_inj_err
= nested_vmcb
->control
.event_inj_err
;
298 svm
->vmcb
->control
.pause_filter_count
=
299 nested_vmcb
->control
.pause_filter_count
;
300 svm
->vmcb
->control
.pause_filter_thresh
=
301 nested_vmcb
->control
.pause_filter_thresh
;
303 kvm_vcpu_unmap(&svm
->vcpu
, map
, true);
305 /* Enter Guest-Mode */
306 enter_guest_mode(&svm
->vcpu
);
309 * Merge guest and host intercepts - must be called with vcpu in
310 * guest-mode to take affect here
312 recalc_intercepts(svm
);
314 svm
->nested
.vmcb
= vmcb_gpa
;
317 * If L1 had a pending IRQ/NMI before executing VMRUN,
318 * which wasn't delivered because it was disallowed (e.g.
319 * interrupts disabled), L0 needs to evaluate if this pending
320 * event should cause an exit from L2 to L1 or be delivered
323 * Usually this would be handled by the processor noticing an
324 * IRQ/NMI window request. However, VMRUN can unblock interrupts
325 * by implicitly setting GIF, so force L0 to perform pending event
326 * evaluation by requesting a KVM_REQ_EVENT.
329 if (unlikely(evaluate_pending_interrupts
))
330 kvm_make_request(KVM_REQ_EVENT
, &svm
->vcpu
);
332 mark_all_dirty(svm
->vmcb
);
335 int nested_svm_vmrun(struct vcpu_svm
*svm
)
338 struct vmcb
*nested_vmcb
;
339 struct vmcb
*hsave
= svm
->nested
.hsave
;
340 struct vmcb
*vmcb
= svm
->vmcb
;
341 struct kvm_host_map map
;
344 vmcb_gpa
= svm
->vmcb
->save
.rax
;
346 ret
= kvm_vcpu_map(&svm
->vcpu
, gpa_to_gfn(vmcb_gpa
), &map
);
347 if (ret
== -EINVAL
) {
348 kvm_inject_gp(&svm
->vcpu
, 0);
351 return kvm_skip_emulated_instruction(&svm
->vcpu
);
354 ret
= kvm_skip_emulated_instruction(&svm
->vcpu
);
356 nested_vmcb
= map
.hva
;
358 if (!nested_vmcb_checks(nested_vmcb
)) {
359 nested_vmcb
->control
.exit_code
= SVM_EXIT_ERR
;
360 nested_vmcb
->control
.exit_code_hi
= 0;
361 nested_vmcb
->control
.exit_info_1
= 0;
362 nested_vmcb
->control
.exit_info_2
= 0;
364 kvm_vcpu_unmap(&svm
->vcpu
, &map
, true);
369 trace_kvm_nested_vmrun(svm
->vmcb
->save
.rip
, vmcb_gpa
,
370 nested_vmcb
->save
.rip
,
371 nested_vmcb
->control
.int_ctl
,
372 nested_vmcb
->control
.event_inj
,
373 nested_vmcb
->control
.nested_ctl
);
375 trace_kvm_nested_intercepts(nested_vmcb
->control
.intercept_cr
& 0xffff,
376 nested_vmcb
->control
.intercept_cr
>> 16,
377 nested_vmcb
->control
.intercept_exceptions
,
378 nested_vmcb
->control
.intercept
);
380 /* Clear internal status */
381 kvm_clear_exception_queue(&svm
->vcpu
);
382 kvm_clear_interrupt_queue(&svm
->vcpu
);
385 * Save the old vmcb, so we don't need to pick what we save, but can
386 * restore everything when a VMEXIT occurs
388 hsave
->save
.es
= vmcb
->save
.es
;
389 hsave
->save
.cs
= vmcb
->save
.cs
;
390 hsave
->save
.ss
= vmcb
->save
.ss
;
391 hsave
->save
.ds
= vmcb
->save
.ds
;
392 hsave
->save
.gdtr
= vmcb
->save
.gdtr
;
393 hsave
->save
.idtr
= vmcb
->save
.idtr
;
394 hsave
->save
.efer
= svm
->vcpu
.arch
.efer
;
395 hsave
->save
.cr0
= kvm_read_cr0(&svm
->vcpu
);
396 hsave
->save
.cr4
= svm
->vcpu
.arch
.cr4
;
397 hsave
->save
.rflags
= kvm_get_rflags(&svm
->vcpu
);
398 hsave
->save
.rip
= kvm_rip_read(&svm
->vcpu
);
399 hsave
->save
.rsp
= vmcb
->save
.rsp
;
400 hsave
->save
.rax
= vmcb
->save
.rax
;
402 hsave
->save
.cr3
= vmcb
->save
.cr3
;
404 hsave
->save
.cr3
= kvm_read_cr3(&svm
->vcpu
);
406 copy_vmcb_control_area(hsave
, vmcb
);
408 enter_svm_guest_mode(svm
, vmcb_gpa
, nested_vmcb
, &map
);
410 if (!nested_svm_vmrun_msrpm(svm
)) {
411 svm
->vmcb
->control
.exit_code
= SVM_EXIT_ERR
;
412 svm
->vmcb
->control
.exit_code_hi
= 0;
413 svm
->vmcb
->control
.exit_info_1
= 0;
414 svm
->vmcb
->control
.exit_info_2
= 0;
416 nested_svm_vmexit(svm
);
422 void nested_svm_vmloadsave(struct vmcb
*from_vmcb
, struct vmcb
*to_vmcb
)
424 to_vmcb
->save
.fs
= from_vmcb
->save
.fs
;
425 to_vmcb
->save
.gs
= from_vmcb
->save
.gs
;
426 to_vmcb
->save
.tr
= from_vmcb
->save
.tr
;
427 to_vmcb
->save
.ldtr
= from_vmcb
->save
.ldtr
;
428 to_vmcb
->save
.kernel_gs_base
= from_vmcb
->save
.kernel_gs_base
;
429 to_vmcb
->save
.star
= from_vmcb
->save
.star
;
430 to_vmcb
->save
.lstar
= from_vmcb
->save
.lstar
;
431 to_vmcb
->save
.cstar
= from_vmcb
->save
.cstar
;
432 to_vmcb
->save
.sfmask
= from_vmcb
->save
.sfmask
;
433 to_vmcb
->save
.sysenter_cs
= from_vmcb
->save
.sysenter_cs
;
434 to_vmcb
->save
.sysenter_esp
= from_vmcb
->save
.sysenter_esp
;
435 to_vmcb
->save
.sysenter_eip
= from_vmcb
->save
.sysenter_eip
;
438 int nested_svm_vmexit(struct vcpu_svm
*svm
)
441 struct vmcb
*nested_vmcb
;
442 struct vmcb
*hsave
= svm
->nested
.hsave
;
443 struct vmcb
*vmcb
= svm
->vmcb
;
444 struct kvm_host_map map
;
446 trace_kvm_nested_vmexit_inject(vmcb
->control
.exit_code
,
447 vmcb
->control
.exit_info_1
,
448 vmcb
->control
.exit_info_2
,
449 vmcb
->control
.exit_int_info
,
450 vmcb
->control
.exit_int_info_err
,
453 rc
= kvm_vcpu_map(&svm
->vcpu
, gpa_to_gfn(svm
->nested
.vmcb
), &map
);
456 kvm_inject_gp(&svm
->vcpu
, 0);
460 nested_vmcb
= map
.hva
;
462 /* Exit Guest-Mode */
463 leave_guest_mode(&svm
->vcpu
);
464 svm
->nested
.vmcb
= 0;
466 /* Give the current vmcb to the guest */
469 nested_vmcb
->save
.es
= vmcb
->save
.es
;
470 nested_vmcb
->save
.cs
= vmcb
->save
.cs
;
471 nested_vmcb
->save
.ss
= vmcb
->save
.ss
;
472 nested_vmcb
->save
.ds
= vmcb
->save
.ds
;
473 nested_vmcb
->save
.gdtr
= vmcb
->save
.gdtr
;
474 nested_vmcb
->save
.idtr
= vmcb
->save
.idtr
;
475 nested_vmcb
->save
.efer
= svm
->vcpu
.arch
.efer
;
476 nested_vmcb
->save
.cr0
= kvm_read_cr0(&svm
->vcpu
);
477 nested_vmcb
->save
.cr3
= kvm_read_cr3(&svm
->vcpu
);
478 nested_vmcb
->save
.cr2
= vmcb
->save
.cr2
;
479 nested_vmcb
->save
.cr4
= svm
->vcpu
.arch
.cr4
;
480 nested_vmcb
->save
.rflags
= kvm_get_rflags(&svm
->vcpu
);
481 nested_vmcb
->save
.rip
= vmcb
->save
.rip
;
482 nested_vmcb
->save
.rsp
= vmcb
->save
.rsp
;
483 nested_vmcb
->save
.rax
= vmcb
->save
.rax
;
484 nested_vmcb
->save
.dr7
= vmcb
->save
.dr7
;
485 nested_vmcb
->save
.dr6
= vmcb
->save
.dr6
;
486 nested_vmcb
->save
.cpl
= vmcb
->save
.cpl
;
488 nested_vmcb
->control
.int_ctl
= vmcb
->control
.int_ctl
;
489 nested_vmcb
->control
.int_vector
= vmcb
->control
.int_vector
;
490 nested_vmcb
->control
.int_state
= vmcb
->control
.int_state
;
491 nested_vmcb
->control
.exit_code
= vmcb
->control
.exit_code
;
492 nested_vmcb
->control
.exit_code_hi
= vmcb
->control
.exit_code_hi
;
493 nested_vmcb
->control
.exit_info_1
= vmcb
->control
.exit_info_1
;
494 nested_vmcb
->control
.exit_info_2
= vmcb
->control
.exit_info_2
;
495 nested_vmcb
->control
.exit_int_info
= vmcb
->control
.exit_int_info
;
496 nested_vmcb
->control
.exit_int_info_err
= vmcb
->control
.exit_int_info_err
;
498 if (svm
->nrips_enabled
)
499 nested_vmcb
->control
.next_rip
= vmcb
->control
.next_rip
;
502 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
503 * to make sure that we do not lose injected events. So check event_inj
504 * here and copy it to exit_int_info if it is valid.
505 * Exit_int_info and event_inj can't be both valid because the case
506 * below only happens on a VMRUN instruction intercept which has
507 * no valid exit_int_info set.
509 if (vmcb
->control
.event_inj
& SVM_EVTINJ_VALID
) {
510 struct vmcb_control_area
*nc
= &nested_vmcb
->control
;
512 nc
->exit_int_info
= vmcb
->control
.event_inj
;
513 nc
->exit_int_info_err
= vmcb
->control
.event_inj_err
;
516 nested_vmcb
->control
.tlb_ctl
= 0;
517 nested_vmcb
->control
.event_inj
= 0;
518 nested_vmcb
->control
.event_inj_err
= 0;
520 nested_vmcb
->control
.pause_filter_count
=
521 svm
->vmcb
->control
.pause_filter_count
;
522 nested_vmcb
->control
.pause_filter_thresh
=
523 svm
->vmcb
->control
.pause_filter_thresh
;
525 /* We always set V_INTR_MASKING and remember the old value in hflags */
526 if (!(svm
->vcpu
.arch
.hflags
& HF_VINTR_MASK
))
527 nested_vmcb
->control
.int_ctl
&= ~V_INTR_MASKING_MASK
;
529 /* Restore the original control entries */
530 copy_vmcb_control_area(vmcb
, hsave
);
532 svm
->vcpu
.arch
.tsc_offset
= svm
->vmcb
->control
.tsc_offset
;
533 kvm_clear_exception_queue(&svm
->vcpu
);
534 kvm_clear_interrupt_queue(&svm
->vcpu
);
536 svm
->nested
.nested_cr3
= 0;
538 /* Restore selected save entries */
539 svm
->vmcb
->save
.es
= hsave
->save
.es
;
540 svm
->vmcb
->save
.cs
= hsave
->save
.cs
;
541 svm
->vmcb
->save
.ss
= hsave
->save
.ss
;
542 svm
->vmcb
->save
.ds
= hsave
->save
.ds
;
543 svm
->vmcb
->save
.gdtr
= hsave
->save
.gdtr
;
544 svm
->vmcb
->save
.idtr
= hsave
->save
.idtr
;
545 kvm_set_rflags(&svm
->vcpu
, hsave
->save
.rflags
);
546 svm_set_efer(&svm
->vcpu
, hsave
->save
.efer
);
547 svm_set_cr0(&svm
->vcpu
, hsave
->save
.cr0
| X86_CR0_PE
);
548 svm_set_cr4(&svm
->vcpu
, hsave
->save
.cr4
);
550 svm
->vmcb
->save
.cr3
= hsave
->save
.cr3
;
551 svm
->vcpu
.arch
.cr3
= hsave
->save
.cr3
;
553 (void)kvm_set_cr3(&svm
->vcpu
, hsave
->save
.cr3
);
555 kvm_rax_write(&svm
->vcpu
, hsave
->save
.rax
);
556 kvm_rsp_write(&svm
->vcpu
, hsave
->save
.rsp
);
557 kvm_rip_write(&svm
->vcpu
, hsave
->save
.rip
);
558 svm
->vmcb
->save
.dr7
= 0;
559 svm
->vmcb
->save
.cpl
= 0;
560 svm
->vmcb
->control
.exit_int_info
= 0;
562 mark_all_dirty(svm
->vmcb
);
564 kvm_vcpu_unmap(&svm
->vcpu
, &map
, true);
566 nested_svm_uninit_mmu_context(&svm
->vcpu
);
567 kvm_mmu_reset_context(&svm
->vcpu
);
568 kvm_mmu_load(&svm
->vcpu
);
571 * Drop what we picked up for L2 via svm_complete_interrupts() so it
572 * doesn't end up in L1.
574 svm
->vcpu
.arch
.nmi_injected
= false;
575 kvm_clear_exception_queue(&svm
->vcpu
);
576 kvm_clear_interrupt_queue(&svm
->vcpu
);
581 static int nested_svm_exit_handled_msr(struct vcpu_svm
*svm
)
583 u32 offset
, msr
, value
;
586 if (!(svm
->nested
.intercept
& (1ULL << INTERCEPT_MSR_PROT
)))
587 return NESTED_EXIT_HOST
;
589 msr
= svm
->vcpu
.arch
.regs
[VCPU_REGS_RCX
];
590 offset
= svm_msrpm_offset(msr
);
591 write
= svm
->vmcb
->control
.exit_info_1
& 1;
592 mask
= 1 << ((2 * (msr
& 0xf)) + write
);
594 if (offset
== MSR_INVALID
)
595 return NESTED_EXIT_DONE
;
597 /* Offset is in 32 bit units but need in 8 bit units */
600 if (kvm_vcpu_read_guest(&svm
->vcpu
, svm
->nested
.vmcb_msrpm
+ offset
, &value
, 4))
601 return NESTED_EXIT_DONE
;
603 return (value
& mask
) ? NESTED_EXIT_DONE
: NESTED_EXIT_HOST
;
606 /* DB exceptions for our internal use must not cause vmexit */
607 static int nested_svm_intercept_db(struct vcpu_svm
*svm
)
611 /* if we're not singlestepping, it's not ours */
612 if (!svm
->nmi_singlestep
)
613 return NESTED_EXIT_DONE
;
615 /* if it's not a singlestep exception, it's not ours */
616 if (kvm_get_dr(&svm
->vcpu
, 6, &dr6
))
617 return NESTED_EXIT_DONE
;
619 return NESTED_EXIT_DONE
;
621 /* if the guest is singlestepping, it should get the vmexit */
622 if (svm
->nmi_singlestep_guest_rflags
& X86_EFLAGS_TF
) {
623 disable_nmi_singlestep(svm
);
624 return NESTED_EXIT_DONE
;
627 /* it's ours, the nested hypervisor must not see this one */
628 return NESTED_EXIT_HOST
;
631 static int nested_svm_intercept_ioio(struct vcpu_svm
*svm
)
633 unsigned port
, size
, iopm_len
;
638 if (!(svm
->nested
.intercept
& (1ULL << INTERCEPT_IOIO_PROT
)))
639 return NESTED_EXIT_HOST
;
641 port
= svm
->vmcb
->control
.exit_info_1
>> 16;
642 size
= (svm
->vmcb
->control
.exit_info_1
& SVM_IOIO_SIZE_MASK
) >>
644 gpa
= svm
->nested
.vmcb_iopm
+ (port
/ 8);
645 start_bit
= port
% 8;
646 iopm_len
= (start_bit
+ size
> 8) ? 2 : 1;
647 mask
= (0xf >> (4 - size
)) << start_bit
;
650 if (kvm_vcpu_read_guest(&svm
->vcpu
, gpa
, &val
, iopm_len
))
651 return NESTED_EXIT_DONE
;
653 return (val
& mask
) ? NESTED_EXIT_DONE
: NESTED_EXIT_HOST
;
656 static int nested_svm_intercept(struct vcpu_svm
*svm
)
658 u32 exit_code
= svm
->vmcb
->control
.exit_code
;
659 int vmexit
= NESTED_EXIT_HOST
;
663 vmexit
= nested_svm_exit_handled_msr(svm
);
666 vmexit
= nested_svm_intercept_ioio(svm
);
668 case SVM_EXIT_READ_CR0
... SVM_EXIT_WRITE_CR8
: {
669 u32 bit
= 1U << (exit_code
- SVM_EXIT_READ_CR0
);
670 if (svm
->nested
.intercept_cr
& bit
)
671 vmexit
= NESTED_EXIT_DONE
;
674 case SVM_EXIT_READ_DR0
... SVM_EXIT_WRITE_DR7
: {
675 u32 bit
= 1U << (exit_code
- SVM_EXIT_READ_DR0
);
676 if (svm
->nested
.intercept_dr
& bit
)
677 vmexit
= NESTED_EXIT_DONE
;
680 case SVM_EXIT_EXCP_BASE
... SVM_EXIT_EXCP_BASE
+ 0x1f: {
681 u32 excp_bits
= 1 << (exit_code
- SVM_EXIT_EXCP_BASE
);
682 if (svm
->nested
.intercept_exceptions
& excp_bits
) {
683 if (exit_code
== SVM_EXIT_EXCP_BASE
+ DB_VECTOR
)
684 vmexit
= nested_svm_intercept_db(svm
);
686 vmexit
= NESTED_EXIT_DONE
;
688 /* async page fault always cause vmexit */
689 else if ((exit_code
== SVM_EXIT_EXCP_BASE
+ PF_VECTOR
) &&
690 svm
->vcpu
.arch
.exception
.nested_apf
!= 0)
691 vmexit
= NESTED_EXIT_DONE
;
695 vmexit
= NESTED_EXIT_DONE
;
699 u64 exit_bits
= 1ULL << (exit_code
- SVM_EXIT_INTR
);
700 if (svm
->nested
.intercept
& exit_bits
)
701 vmexit
= NESTED_EXIT_DONE
;
708 int nested_svm_exit_handled(struct vcpu_svm
*svm
)
712 vmexit
= nested_svm_intercept(svm
);
714 if (vmexit
== NESTED_EXIT_DONE
)
715 nested_svm_vmexit(svm
);
720 int nested_svm_check_permissions(struct vcpu_svm
*svm
)
722 if (!(svm
->vcpu
.arch
.efer
& EFER_SVME
) ||
723 !is_paging(&svm
->vcpu
)) {
724 kvm_queue_exception(&svm
->vcpu
, UD_VECTOR
);
728 if (svm
->vmcb
->save
.cpl
) {
729 kvm_inject_gp(&svm
->vcpu
, 0);
736 int nested_svm_check_exception(struct vcpu_svm
*svm
, unsigned nr
,
737 bool has_error_code
, u32 error_code
)
741 if (!is_guest_mode(&svm
->vcpu
))
744 vmexit
= nested_svm_intercept(svm
);
745 if (vmexit
!= NESTED_EXIT_DONE
)
748 svm
->vmcb
->control
.exit_code
= SVM_EXIT_EXCP_BASE
+ nr
;
749 svm
->vmcb
->control
.exit_code_hi
= 0;
750 svm
->vmcb
->control
.exit_info_1
= error_code
;
753 * EXITINFO2 is undefined for all exception intercepts other
756 if (svm
->vcpu
.arch
.exception
.nested_apf
)
757 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.apf
.nested_apf_token
;
758 else if (svm
->vcpu
.arch
.exception
.has_payload
)
759 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.exception
.payload
;
761 svm
->vmcb
->control
.exit_info_2
= svm
->vcpu
.arch
.cr2
;
763 svm
->nested
.exit_required
= true;
767 static void nested_svm_intr(struct vcpu_svm
*svm
)
769 svm
->vmcb
->control
.exit_code
= SVM_EXIT_INTR
;
770 svm
->vmcb
->control
.exit_info_1
= 0;
771 svm
->vmcb
->control
.exit_info_2
= 0;
773 /* nested_svm_vmexit this gets called afterwards from handle_exit */
774 svm
->nested
.exit_required
= true;
775 trace_kvm_nested_intr_vmexit(svm
->vmcb
->save
.rip
);
778 static bool nested_exit_on_intr(struct vcpu_svm
*svm
)
780 return (svm
->nested
.intercept
& 1ULL);
783 int svm_check_nested_events(struct kvm_vcpu
*vcpu
)
785 struct vcpu_svm
*svm
= to_svm(vcpu
);
786 bool block_nested_events
=
787 kvm_event_needs_reinjection(vcpu
) || svm
->nested
.exit_required
;
789 if (kvm_cpu_has_interrupt(vcpu
) && nested_exit_on_intr(svm
)) {
790 if (block_nested_events
)
792 nested_svm_intr(svm
);
799 int nested_svm_exit_special(struct vcpu_svm
*svm
)
801 u32 exit_code
= svm
->vmcb
->control
.exit_code
;
806 case SVM_EXIT_EXCP_BASE
+ MC_VECTOR
:
807 return NESTED_EXIT_HOST
;
809 /* For now we are always handling NPFs when using them */
811 return NESTED_EXIT_HOST
;
813 case SVM_EXIT_EXCP_BASE
+ PF_VECTOR
:
814 /* When we're shadowing, trap PFs, but not async PF */
815 if (!npt_enabled
&& svm
->vcpu
.arch
.apf
.host_apf_reason
== 0)
816 return NESTED_EXIT_HOST
;
822 return NESTED_EXIT_CONTINUE
;