]> git.ipfire.org Git - thirdparty/linux.git/blame - arch/x86/kvm/svm/nested.c
Merge tag 'io_uring-5.7-2020-05-22' of git://git.kernel.dk/linux-block
[thirdparty/linux.git] / arch / x86 / kvm / svm / nested.c
CommitLineData
883b0a91
JR
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Kernel-based Virtual Machine driver for Linux
4 *
5 * AMD SVM support
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9 *
10 * Authors:
11 * Yaniv Kamay <yaniv@qumranet.com>
12 * Avi Kivity <avi@qumranet.com>
13 */
14
15#define pr_fmt(fmt) "SVM: " fmt
16
17#include <linux/kvm_types.h>
18#include <linux/kvm_host.h>
19#include <linux/kernel.h>
20
21#include <asm/msr-index.h>
5679b803 22#include <asm/debugreg.h>
883b0a91
JR
23
24#include "kvm_emulate.h"
25#include "trace.h"
26#include "mmu.h"
27#include "x86.h"
28#include "svm.h"
29
30static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
31 struct x86_exception *fault)
32{
33 struct vcpu_svm *svm = to_svm(vcpu);
34
35 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
36 /*
37 * TODO: track the cause of the nested page fault, and
38 * correctly fill in the high bits of exit_info_1.
39 */
40 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
41 svm->vmcb->control.exit_code_hi = 0;
42 svm->vmcb->control.exit_info_1 = (1ULL << 32);
43 svm->vmcb->control.exit_info_2 = fault->address;
44 }
45
46 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
47 svm->vmcb->control.exit_info_1 |= fault->error_code;
48
49 /*
50 * The present bit is always zero for page structure faults on real
51 * hardware.
52 */
53 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
54 svm->vmcb->control.exit_info_1 &= ~1;
55
56 nested_svm_vmexit(svm);
57}
58
59static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
60{
61 struct vcpu_svm *svm = to_svm(vcpu);
62 u64 cr3 = svm->nested.nested_cr3;
63 u64 pdpte;
64 int ret;
65
66 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
67 offset_in_page(cr3) + index * 8, 8);
68 if (ret)
69 return 0;
70 return pdpte;
71}
72
73static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
74{
75 struct vcpu_svm *svm = to_svm(vcpu);
76
77 return svm->nested.nested_cr3;
78}
79
80static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
81{
82 WARN_ON(mmu_is_nested(vcpu));
83
84 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
85 kvm_init_shadow_mmu(vcpu);
86 vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
87 vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
88 vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
89 vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu);
90 reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
91 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
92}
93
94static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
95{
96 vcpu->arch.mmu = &vcpu->arch.root_mmu;
97 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
98}
99
100void recalc_intercepts(struct vcpu_svm *svm)
101{
102 struct vmcb_control_area *c, *h;
103 struct nested_state *g;
104
105 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
106
107 if (!is_guest_mode(&svm->vcpu))
108 return;
109
110 c = &svm->vmcb->control;
111 h = &svm->nested.hsave->control;
112 g = &svm->nested;
113
114 c->intercept_cr = h->intercept_cr;
115 c->intercept_dr = h->intercept_dr;
116 c->intercept_exceptions = h->intercept_exceptions;
117 c->intercept = h->intercept;
118
119 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
120 /* We only want the cr8 intercept bits of L1 */
121 c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
122 c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
123
124 /*
125 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
126 * affect any interrupt we may want to inject; therefore,
127 * interrupt window vmexits are irrelevant to L0.
128 */
129 c->intercept &= ~(1ULL << INTERCEPT_VINTR);
130 }
131
132 /* We don't want to see VMMCALLs from a nested guest */
133 c->intercept &= ~(1ULL << INTERCEPT_VMMCALL);
134
135 c->intercept_cr |= g->intercept_cr;
136 c->intercept_dr |= g->intercept_dr;
137 c->intercept_exceptions |= g->intercept_exceptions;
138 c->intercept |= g->intercept;
139}
140
141static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
142{
143 struct vmcb_control_area *dst = &dst_vmcb->control;
144 struct vmcb_control_area *from = &from_vmcb->control;
145
146 dst->intercept_cr = from->intercept_cr;
147 dst->intercept_dr = from->intercept_dr;
148 dst->intercept_exceptions = from->intercept_exceptions;
149 dst->intercept = from->intercept;
150 dst->iopm_base_pa = from->iopm_base_pa;
151 dst->msrpm_base_pa = from->msrpm_base_pa;
152 dst->tsc_offset = from->tsc_offset;
153 dst->asid = from->asid;
154 dst->tlb_ctl = from->tlb_ctl;
155 dst->int_ctl = from->int_ctl;
156 dst->int_vector = from->int_vector;
157 dst->int_state = from->int_state;
158 dst->exit_code = from->exit_code;
159 dst->exit_code_hi = from->exit_code_hi;
160 dst->exit_info_1 = from->exit_info_1;
161 dst->exit_info_2 = from->exit_info_2;
162 dst->exit_int_info = from->exit_int_info;
163 dst->exit_int_info_err = from->exit_int_info_err;
164 dst->nested_ctl = from->nested_ctl;
165 dst->event_inj = from->event_inj;
166 dst->event_inj_err = from->event_inj_err;
167 dst->nested_cr3 = from->nested_cr3;
168 dst->virt_ext = from->virt_ext;
169 dst->pause_filter_count = from->pause_filter_count;
170 dst->pause_filter_thresh = from->pause_filter_thresh;
171}
172
173static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
174{
175 /*
176 * This function merges the msr permission bitmaps of kvm and the
177 * nested vmcb. It is optimized in that it only merges the parts where
178 * the kvm msr permission bitmap may contain zero bits
179 */
180 int i;
181
182 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
183 return true;
184
185 for (i = 0; i < MSRPM_OFFSETS; i++) {
186 u32 value, p;
187 u64 offset;
188
189 if (msrpm_offsets[i] == 0xffffffff)
190 break;
191
192 p = msrpm_offsets[i];
193 offset = svm->nested.vmcb_msrpm + (p * 4);
194
195 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
196 return false;
197
198 svm->nested.msrpm[p] = svm->msrpm[p] | value;
199 }
200
201 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
202
203 return true;
204}
205
206static bool nested_vmcb_checks(struct vmcb *vmcb)
207{
208 if ((vmcb->save.efer & EFER_SVME) == 0)
209 return false;
210
211 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
212 return false;
213
214 if (vmcb->control.asid == 0)
215 return false;
216
217 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
218 !npt_enabled)
219 return false;
220
221 return true;
222}
223
224void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
225 struct vmcb *nested_vmcb, struct kvm_host_map *map)
226{
227 bool evaluate_pending_interrupts =
228 is_intercept(svm, INTERCEPT_VINTR) ||
229 is_intercept(svm, INTERCEPT_IRET);
230
231 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
232 svm->vcpu.arch.hflags |= HF_HIF_MASK;
233 else
234 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
235
236 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
237 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
238 nested_svm_init_mmu_context(&svm->vcpu);
239 }
240
241 /* Load the nested guest state */
242 svm->vmcb->save.es = nested_vmcb->save.es;
243 svm->vmcb->save.cs = nested_vmcb->save.cs;
244 svm->vmcb->save.ss = nested_vmcb->save.ss;
245 svm->vmcb->save.ds = nested_vmcb->save.ds;
246 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
247 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
248 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
249 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
250 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
251 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
252 if (npt_enabled) {
253 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
254 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
255 } else
256 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
257
258 /* Guest paging mode is active - reset mmu */
259 kvm_mmu_reset_context(&svm->vcpu);
260
261 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
262 kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
263 kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
264 kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
265
266 /* In case we don't even reach vcpu_run, the fields are not updated */
267 svm->vmcb->save.rax = nested_vmcb->save.rax;
268 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
269 svm->vmcb->save.rip = nested_vmcb->save.rip;
270 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
5679b803 271 svm->vcpu.arch.dr6 = nested_vmcb->save.dr6;
883b0a91
JR
272 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
273
274 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
275 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
276
277 /* cache intercepts */
278 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
279 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
280 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
281 svm->nested.intercept = nested_vmcb->control.intercept;
282
283 svm_flush_tlb(&svm->vcpu, true);
284 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
285 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
286 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
287 else
288 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
289
290 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
291 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
292
293 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
294 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
295 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
296 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
297 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
298
299 svm->vmcb->control.pause_filter_count =
300 nested_vmcb->control.pause_filter_count;
301 svm->vmcb->control.pause_filter_thresh =
302 nested_vmcb->control.pause_filter_thresh;
303
304 kvm_vcpu_unmap(&svm->vcpu, map, true);
305
306 /* Enter Guest-Mode */
307 enter_guest_mode(&svm->vcpu);
308
309 /*
310 * Merge guest and host intercepts - must be called with vcpu in
311 * guest-mode to take affect here
312 */
313 recalc_intercepts(svm);
314
315 svm->nested.vmcb = vmcb_gpa;
316
317 /*
318 * If L1 had a pending IRQ/NMI before executing VMRUN,
319 * which wasn't delivered because it was disallowed (e.g.
320 * interrupts disabled), L0 needs to evaluate if this pending
321 * event should cause an exit from L2 to L1 or be delivered
322 * directly to L2.
323 *
324 * Usually this would be handled by the processor noticing an
325 * IRQ/NMI window request. However, VMRUN can unblock interrupts
326 * by implicitly setting GIF, so force L0 to perform pending event
327 * evaluation by requesting a KVM_REQ_EVENT.
328 */
329 enable_gif(svm);
330 if (unlikely(evaluate_pending_interrupts))
331 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
332
333 mark_all_dirty(svm->vmcb);
334}
335
336int nested_svm_vmrun(struct vcpu_svm *svm)
337{
338 int ret;
339 struct vmcb *nested_vmcb;
340 struct vmcb *hsave = svm->nested.hsave;
341 struct vmcb *vmcb = svm->vmcb;
342 struct kvm_host_map map;
343 u64 vmcb_gpa;
344
345 vmcb_gpa = svm->vmcb->save.rax;
346
347 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
348 if (ret == -EINVAL) {
349 kvm_inject_gp(&svm->vcpu, 0);
350 return 1;
351 } else if (ret) {
352 return kvm_skip_emulated_instruction(&svm->vcpu);
353 }
354
355 ret = kvm_skip_emulated_instruction(&svm->vcpu);
356
357 nested_vmcb = map.hva;
358
359 if (!nested_vmcb_checks(nested_vmcb)) {
360 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
361 nested_vmcb->control.exit_code_hi = 0;
362 nested_vmcb->control.exit_info_1 = 0;
363 nested_vmcb->control.exit_info_2 = 0;
364
365 kvm_vcpu_unmap(&svm->vcpu, &map, true);
366
367 return ret;
368 }
369
370 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
371 nested_vmcb->save.rip,
372 nested_vmcb->control.int_ctl,
373 nested_vmcb->control.event_inj,
374 nested_vmcb->control.nested_ctl);
375
376 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
377 nested_vmcb->control.intercept_cr >> 16,
378 nested_vmcb->control.intercept_exceptions,
379 nested_vmcb->control.intercept);
380
381 /* Clear internal status */
382 kvm_clear_exception_queue(&svm->vcpu);
383 kvm_clear_interrupt_queue(&svm->vcpu);
384
385 /*
386 * Save the old vmcb, so we don't need to pick what we save, but can
387 * restore everything when a VMEXIT occurs
388 */
389 hsave->save.es = vmcb->save.es;
390 hsave->save.cs = vmcb->save.cs;
391 hsave->save.ss = vmcb->save.ss;
392 hsave->save.ds = vmcb->save.ds;
393 hsave->save.gdtr = vmcb->save.gdtr;
394 hsave->save.idtr = vmcb->save.idtr;
395 hsave->save.efer = svm->vcpu.arch.efer;
396 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
397 hsave->save.cr4 = svm->vcpu.arch.cr4;
398 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
399 hsave->save.rip = kvm_rip_read(&svm->vcpu);
400 hsave->save.rsp = vmcb->save.rsp;
401 hsave->save.rax = vmcb->save.rax;
402 if (npt_enabled)
403 hsave->save.cr3 = vmcb->save.cr3;
404 else
405 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
406
407 copy_vmcb_control_area(hsave, vmcb);
408
409 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
410
411 if (!nested_svm_vmrun_msrpm(svm)) {
412 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
413 svm->vmcb->control.exit_code_hi = 0;
414 svm->vmcb->control.exit_info_1 = 0;
415 svm->vmcb->control.exit_info_2 = 0;
416
417 nested_svm_vmexit(svm);
418 }
419
420 return ret;
421}
422
423void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
424{
425 to_vmcb->save.fs = from_vmcb->save.fs;
426 to_vmcb->save.gs = from_vmcb->save.gs;
427 to_vmcb->save.tr = from_vmcb->save.tr;
428 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
429 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
430 to_vmcb->save.star = from_vmcb->save.star;
431 to_vmcb->save.lstar = from_vmcb->save.lstar;
432 to_vmcb->save.cstar = from_vmcb->save.cstar;
433 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
434 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
435 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
436 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
437}
438
439int nested_svm_vmexit(struct vcpu_svm *svm)
440{
441 int rc;
442 struct vmcb *nested_vmcb;
443 struct vmcb *hsave = svm->nested.hsave;
444 struct vmcb *vmcb = svm->vmcb;
445 struct kvm_host_map map;
446
447 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
448 vmcb->control.exit_info_1,
449 vmcb->control.exit_info_2,
450 vmcb->control.exit_int_info,
451 vmcb->control.exit_int_info_err,
452 KVM_ISA_SVM);
453
454 rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
455 if (rc) {
456 if (rc == -EINVAL)
457 kvm_inject_gp(&svm->vcpu, 0);
458 return 1;
459 }
460
461 nested_vmcb = map.hva;
462
463 /* Exit Guest-Mode */
464 leave_guest_mode(&svm->vcpu);
465 svm->nested.vmcb = 0;
466
467 /* Give the current vmcb to the guest */
468 disable_gif(svm);
469
470 nested_vmcb->save.es = vmcb->save.es;
471 nested_vmcb->save.cs = vmcb->save.cs;
472 nested_vmcb->save.ss = vmcb->save.ss;
473 nested_vmcb->save.ds = vmcb->save.ds;
474 nested_vmcb->save.gdtr = vmcb->save.gdtr;
475 nested_vmcb->save.idtr = vmcb->save.idtr;
476 nested_vmcb->save.efer = svm->vcpu.arch.efer;
477 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
478 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
479 nested_vmcb->save.cr2 = vmcb->save.cr2;
480 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
481 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
482 nested_vmcb->save.rip = vmcb->save.rip;
483 nested_vmcb->save.rsp = vmcb->save.rsp;
484 nested_vmcb->save.rax = vmcb->save.rax;
485 nested_vmcb->save.dr7 = vmcb->save.dr7;
5679b803 486 nested_vmcb->save.dr6 = svm->vcpu.arch.dr6;
883b0a91
JR
487 nested_vmcb->save.cpl = vmcb->save.cpl;
488
489 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
490 nested_vmcb->control.int_vector = vmcb->control.int_vector;
491 nested_vmcb->control.int_state = vmcb->control.int_state;
492 nested_vmcb->control.exit_code = vmcb->control.exit_code;
493 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
494 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
495 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
496 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
497 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
498
499 if (svm->nrips_enabled)
500 nested_vmcb->control.next_rip = vmcb->control.next_rip;
501
502 /*
503 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
504 * to make sure that we do not lose injected events. So check event_inj
505 * here and copy it to exit_int_info if it is valid.
506 * Exit_int_info and event_inj can't be both valid because the case
507 * below only happens on a VMRUN instruction intercept which has
508 * no valid exit_int_info set.
509 */
510 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
511 struct vmcb_control_area *nc = &nested_vmcb->control;
512
513 nc->exit_int_info = vmcb->control.event_inj;
514 nc->exit_int_info_err = vmcb->control.event_inj_err;
515 }
516
517 nested_vmcb->control.tlb_ctl = 0;
518 nested_vmcb->control.event_inj = 0;
519 nested_vmcb->control.event_inj_err = 0;
520
521 nested_vmcb->control.pause_filter_count =
522 svm->vmcb->control.pause_filter_count;
523 nested_vmcb->control.pause_filter_thresh =
524 svm->vmcb->control.pause_filter_thresh;
525
526 /* We always set V_INTR_MASKING and remember the old value in hflags */
527 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
528 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
529
530 /* Restore the original control entries */
531 copy_vmcb_control_area(vmcb, hsave);
532
533 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
534 kvm_clear_exception_queue(&svm->vcpu);
535 kvm_clear_interrupt_queue(&svm->vcpu);
536
537 svm->nested.nested_cr3 = 0;
538
539 /* Restore selected save entries */
540 svm->vmcb->save.es = hsave->save.es;
541 svm->vmcb->save.cs = hsave->save.cs;
542 svm->vmcb->save.ss = hsave->save.ss;
543 svm->vmcb->save.ds = hsave->save.ds;
544 svm->vmcb->save.gdtr = hsave->save.gdtr;
545 svm->vmcb->save.idtr = hsave->save.idtr;
546 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
547 svm_set_efer(&svm->vcpu, hsave->save.efer);
548 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
549 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
550 if (npt_enabled) {
551 svm->vmcb->save.cr3 = hsave->save.cr3;
552 svm->vcpu.arch.cr3 = hsave->save.cr3;
553 } else {
554 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
555 }
556 kvm_rax_write(&svm->vcpu, hsave->save.rax);
557 kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
558 kvm_rip_write(&svm->vcpu, hsave->save.rip);
559 svm->vmcb->save.dr7 = 0;
560 svm->vmcb->save.cpl = 0;
561 svm->vmcb->control.exit_int_info = 0;
562
563 mark_all_dirty(svm->vmcb);
564
565 kvm_vcpu_unmap(&svm->vcpu, &map, true);
566
567 nested_svm_uninit_mmu_context(&svm->vcpu);
568 kvm_mmu_reset_context(&svm->vcpu);
569 kvm_mmu_load(&svm->vcpu);
570
571 /*
572 * Drop what we picked up for L2 via svm_complete_interrupts() so it
573 * doesn't end up in L1.
574 */
575 svm->vcpu.arch.nmi_injected = false;
576 kvm_clear_exception_queue(&svm->vcpu);
577 kvm_clear_interrupt_queue(&svm->vcpu);
578
579 return 0;
580}
581
582static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
583{
584 u32 offset, msr, value;
585 int write, mask;
586
587 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
588 return NESTED_EXIT_HOST;
589
590 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
591 offset = svm_msrpm_offset(msr);
592 write = svm->vmcb->control.exit_info_1 & 1;
593 mask = 1 << ((2 * (msr & 0xf)) + write);
594
595 if (offset == MSR_INVALID)
596 return NESTED_EXIT_DONE;
597
598 /* Offset is in 32 bit units but need in 8 bit units */
599 offset *= 4;
600
601 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
602 return NESTED_EXIT_DONE;
603
604 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
605}
606
607/* DB exceptions for our internal use must not cause vmexit */
608static int nested_svm_intercept_db(struct vcpu_svm *svm)
609{
5679b803 610 unsigned long dr6 = svm->vmcb->save.dr6;
883b0a91 611
2c19dba6
PB
612 /* Always catch it and pass it to userspace if debugging. */
613 if (svm->vcpu.guest_debug &
614 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
615 return NESTED_EXIT_HOST;
616
883b0a91
JR
617 /* if we're not singlestepping, it's not ours */
618 if (!svm->nmi_singlestep)
5679b803 619 goto reflected_db;
883b0a91
JR
620
621 /* if it's not a singlestep exception, it's not ours */
883b0a91 622 if (!(dr6 & DR6_BS))
5679b803 623 goto reflected_db;
883b0a91
JR
624
625 /* if the guest is singlestepping, it should get the vmexit */
626 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
627 disable_nmi_singlestep(svm);
5679b803 628 goto reflected_db;
883b0a91
JR
629 }
630
631 /* it's ours, the nested hypervisor must not see this one */
632 return NESTED_EXIT_HOST;
5679b803
PB
633
634reflected_db:
635 /*
d67668e9
PB
636 * Synchronize guest DR6 here just like in kvm_deliver_exception_payload;
637 * it will be moved into the nested VMCB by nested_svm_vmexit. Once
638 * exceptions will be moved to svm_check_nested_events, all this stuff
639 * will just go away and we could just return NESTED_EXIT_HOST
640 * unconditionally. db_interception will queue the exception, which
641 * will be processed by svm_check_nested_events if a nested vmexit is
642 * required, and we will just use kvm_deliver_exception_payload to copy
643 * the payload to DR6 before vmexit.
5679b803 644 */
d67668e9
PB
645 WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT);
646 svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM);
647 svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1;
5679b803 648 return NESTED_EXIT_DONE;
883b0a91
JR
649}
650
651static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
652{
653 unsigned port, size, iopm_len;
654 u16 val, mask;
655 u8 start_bit;
656 u64 gpa;
657
658 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
659 return NESTED_EXIT_HOST;
660
661 port = svm->vmcb->control.exit_info_1 >> 16;
662 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
663 SVM_IOIO_SIZE_SHIFT;
664 gpa = svm->nested.vmcb_iopm + (port / 8);
665 start_bit = port % 8;
666 iopm_len = (start_bit + size > 8) ? 2 : 1;
667 mask = (0xf >> (4 - size)) << start_bit;
668 val = 0;
669
670 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
671 return NESTED_EXIT_DONE;
672
673 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
674}
675
676static int nested_svm_intercept(struct vcpu_svm *svm)
677{
678 u32 exit_code = svm->vmcb->control.exit_code;
679 int vmexit = NESTED_EXIT_HOST;
680
681 switch (exit_code) {
682 case SVM_EXIT_MSR:
683 vmexit = nested_svm_exit_handled_msr(svm);
684 break;
685 case SVM_EXIT_IOIO:
686 vmexit = nested_svm_intercept_ioio(svm);
687 break;
688 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
689 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
690 if (svm->nested.intercept_cr & bit)
691 vmexit = NESTED_EXIT_DONE;
692 break;
693 }
694 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
695 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
696 if (svm->nested.intercept_dr & bit)
697 vmexit = NESTED_EXIT_DONE;
698 break;
699 }
700 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
701 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
702 if (svm->nested.intercept_exceptions & excp_bits) {
703 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
704 vmexit = nested_svm_intercept_db(svm);
2c19dba6
PB
705 else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR &&
706 svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP)
707 vmexit = NESTED_EXIT_HOST;
883b0a91
JR
708 else
709 vmexit = NESTED_EXIT_DONE;
710 }
711 /* async page fault always cause vmexit */
712 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
713 svm->vcpu.arch.exception.nested_apf != 0)
714 vmexit = NESTED_EXIT_DONE;
715 break;
716 }
717 case SVM_EXIT_ERR: {
718 vmexit = NESTED_EXIT_DONE;
719 break;
720 }
721 default: {
722 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
723 if (svm->nested.intercept & exit_bits)
724 vmexit = NESTED_EXIT_DONE;
725 }
726 }
727
728 return vmexit;
729}
730
731int nested_svm_exit_handled(struct vcpu_svm *svm)
732{
733 int vmexit;
734
735 vmexit = nested_svm_intercept(svm);
736
737 if (vmexit == NESTED_EXIT_DONE)
738 nested_svm_vmexit(svm);
739
740 return vmexit;
741}
742
743int nested_svm_check_permissions(struct vcpu_svm *svm)
744{
745 if (!(svm->vcpu.arch.efer & EFER_SVME) ||
746 !is_paging(&svm->vcpu)) {
747 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
748 return 1;
749 }
750
751 if (svm->vmcb->save.cpl) {
752 kvm_inject_gp(&svm->vcpu, 0);
753 return 1;
754 }
755
756 return 0;
757}
758
759int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
760 bool has_error_code, u32 error_code)
761{
762 int vmexit;
763
764 if (!is_guest_mode(&svm->vcpu))
765 return 0;
766
767 vmexit = nested_svm_intercept(svm);
768 if (vmexit != NESTED_EXIT_DONE)
769 return 0;
770
771 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
772 svm->vmcb->control.exit_code_hi = 0;
773 svm->vmcb->control.exit_info_1 = error_code;
774
775 /*
776 * EXITINFO2 is undefined for all exception intercepts other
777 * than #PF.
778 */
779 if (svm->vcpu.arch.exception.nested_apf)
780 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
781 else if (svm->vcpu.arch.exception.has_payload)
782 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
783 else
784 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
785
786 svm->nested.exit_required = true;
787 return vmexit;
788}
789
790static void nested_svm_intr(struct vcpu_svm *svm)
791{
792 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
793 svm->vmcb->control.exit_info_1 = 0;
794 svm->vmcb->control.exit_info_2 = 0;
795
796 /* nested_svm_vmexit this gets called afterwards from handle_exit */
797 svm->nested.exit_required = true;
798 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
799}
800
801static bool nested_exit_on_intr(struct vcpu_svm *svm)
802{
803 return (svm->nested.intercept & 1ULL);
804}
805
806int svm_check_nested_events(struct kvm_vcpu *vcpu)
807{
808 struct vcpu_svm *svm = to_svm(vcpu);
809 bool block_nested_events =
810 kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required;
811
812 if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) {
813 if (block_nested_events)
814 return -EBUSY;
815 nested_svm_intr(svm);
816 return 0;
817 }
818
819 return 0;
820}
821
822int nested_svm_exit_special(struct vcpu_svm *svm)
823{
824 u32 exit_code = svm->vmcb->control.exit_code;
825
826 switch (exit_code) {
827 case SVM_EXIT_INTR:
828 case SVM_EXIT_NMI:
829 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
830 return NESTED_EXIT_HOST;
831 case SVM_EXIT_NPF:
832 /* For now we are always handling NPFs when using them */
833 if (npt_enabled)
834 return NESTED_EXIT_HOST;
835 break;
836 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
837 /* When we're shadowing, trap PFs, but not async PF */
838 if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
839 return NESTED_EXIT_HOST;
840 break;
841 default:
842 break;
843 }
844
845 return NESTED_EXIT_CONTINUE;
846}