1 // SPDX-License-Identifier: GPL-2.0
3 * kvm nested virtualization support for s390x
5 * Copyright IBM Corp. 2016, 2018
7 * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
9 #include <linux/vmalloc.h>
10 #include <linux/kvm_host.h>
11 #include <linux/bug.h>
12 #include <linux/list.h>
13 #include <linux/bitmap.h>
14 #include <linux/sched/signal.h>
17 #include <asm/mmu_context.h>
21 #include <asm/fpu/api.h>
22 #include <asm/facility.h>
27 struct kvm_s390_sie_block scb_s
; /* 0x0000 */
29 * the backup info for machine check. ensure it's at
30 * the same offset as that in struct sie_page!
32 struct mcck_volatile_info mcck_info
; /* 0x0200 */
34 * The pinned original scb. Be aware that other VCPUs can modify
35 * it while we read from it. Values that are used for conditions or
36 * are reused conditionally, should be accessed via READ_ONCE.
38 struct kvm_s390_sie_block
*scb_o
; /* 0x0218 */
39 /* the shadow gmap in use by the vsie_page */
40 struct gmap
*gmap
; /* 0x0220 */
41 /* address of the last reported fault to guest2 */
42 unsigned long fault_addr
; /* 0x0228 */
43 /* calculated guest addresses of satellite control blocks */
44 gpa_t sca_gpa
; /* 0x0230 */
45 gpa_t itdba_gpa
; /* 0x0238 */
46 gpa_t gvrd_gpa
; /* 0x0240 */
47 gpa_t riccbd_gpa
; /* 0x0248 */
48 gpa_t sdnx_gpa
; /* 0x0250 */
49 __u8 reserved
[0x0700 - 0x0258]; /* 0x0258 */
50 struct kvm_s390_crypto_cb crycb
; /* 0x0700 */
51 __u8 fac
[S390_ARCH_FAC_LIST_SIZE_BYTE
]; /* 0x0800 */
54 /* trigger a validity icpt for the given scb */
55 static int set_validity_icpt(struct kvm_s390_sie_block
*scb
,
59 scb
->ipb
= ((__u32
) reason_code
) << 16;
60 scb
->icptcode
= ICPT_VALIDITY
;
64 /* mark the prefix as unmapped, this will block the VSIE */
65 static void prefix_unmapped(struct vsie_page
*vsie_page
)
67 atomic_or(PROG_REQUEST
, &vsie_page
->scb_s
.prog20
);
70 /* mark the prefix as unmapped and wait until the VSIE has been left */
71 static void prefix_unmapped_sync(struct vsie_page
*vsie_page
)
73 prefix_unmapped(vsie_page
);
74 if (vsie_page
->scb_s
.prog0c
& PROG_IN_SIE
)
75 atomic_or(CPUSTAT_STOP_INT
, &vsie_page
->scb_s
.cpuflags
);
76 while (vsie_page
->scb_s
.prog0c
& PROG_IN_SIE
)
80 /* mark the prefix as mapped, this will allow the VSIE to run */
81 static void prefix_mapped(struct vsie_page
*vsie_page
)
83 atomic_andnot(PROG_REQUEST
, &vsie_page
->scb_s
.prog20
);
86 /* test if the prefix is mapped into the gmap shadow */
87 static int prefix_is_mapped(struct vsie_page
*vsie_page
)
89 return !(atomic_read(&vsie_page
->scb_s
.prog20
) & PROG_REQUEST
);
92 /* copy the updated intervention request bits into the shadow scb */
93 static void update_intervention_requests(struct vsie_page
*vsie_page
)
95 const int bits
= CPUSTAT_STOP_INT
| CPUSTAT_IO_INT
| CPUSTAT_EXT_INT
;
98 cpuflags
= atomic_read(&vsie_page
->scb_o
->cpuflags
);
99 atomic_andnot(bits
, &vsie_page
->scb_s
.cpuflags
);
100 atomic_or(cpuflags
& bits
, &vsie_page
->scb_s
.cpuflags
);
103 /* shadow (filter and validate) the cpuflags */
104 static int prepare_cpuflags(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
106 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
107 struct kvm_s390_sie_block
*scb_o
= vsie_page
->scb_o
;
108 int newflags
, cpuflags
= atomic_read(&scb_o
->cpuflags
);
110 /* we don't allow ESA/390 guests */
111 if (!(cpuflags
& CPUSTAT_ZARCH
))
112 return set_validity_icpt(scb_s
, 0x0001U
);
114 if (cpuflags
& (CPUSTAT_RRF
| CPUSTAT_MCDS
))
115 return set_validity_icpt(scb_s
, 0x0001U
);
116 else if (cpuflags
& (CPUSTAT_SLSV
| CPUSTAT_SLSR
))
117 return set_validity_icpt(scb_s
, 0x0007U
);
119 /* intervention requests will be set later */
120 newflags
= CPUSTAT_ZARCH
;
121 if (cpuflags
& CPUSTAT_GED
&& test_kvm_facility(vcpu
->kvm
, 8))
122 newflags
|= CPUSTAT_GED
;
123 if (cpuflags
& CPUSTAT_GED2
&& test_kvm_facility(vcpu
->kvm
, 78)) {
124 if (cpuflags
& CPUSTAT_GED
)
125 return set_validity_icpt(scb_s
, 0x0001U
);
126 newflags
|= CPUSTAT_GED2
;
128 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_GPERE
))
129 newflags
|= cpuflags
& CPUSTAT_P
;
130 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_GSLS
))
131 newflags
|= cpuflags
& CPUSTAT_SM
;
132 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_IBS
))
133 newflags
|= cpuflags
& CPUSTAT_IBS
;
134 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_KSS
))
135 newflags
|= cpuflags
& CPUSTAT_KSS
;
137 atomic_set(&scb_s
->cpuflags
, newflags
);
140 /* Copy to APCB FORMAT1 from APCB FORMAT0 */
141 static int setup_apcb10(struct kvm_vcpu
*vcpu
, struct kvm_s390_apcb1
*apcb_s
,
142 unsigned long crycb_gpa
, struct kvm_s390_apcb1
*apcb_h
)
144 struct kvm_s390_apcb0 tmp
;
145 unsigned long apcb_gpa
;
147 apcb_gpa
= crycb_gpa
+ offsetof(struct kvm_s390_crypto_cb
, apcb0
);
149 if (read_guest_real(vcpu
, apcb_gpa
, &tmp
,
150 sizeof(struct kvm_s390_apcb0
)))
153 apcb_s
->apm
[0] = apcb_h
->apm
[0] & tmp
.apm
[0];
154 apcb_s
->aqm
[0] = apcb_h
->aqm
[0] & tmp
.aqm
[0] & 0xffff000000000000UL
;
155 apcb_s
->adm
[0] = apcb_h
->adm
[0] & tmp
.adm
[0] & 0xffff000000000000UL
;
162 * setup_apcb00 - Copy to APCB FORMAT0 from APCB FORMAT0
163 * @vcpu: pointer to the virtual CPU
164 * @apcb_s: pointer to start of apcb in the shadow crycb
165 * @crycb_gpa: guest physical address to start of original guest crycb
166 * @apcb_h: pointer to start of apcb in the guest1
168 * Returns 0 and -EFAULT on error reading guest apcb
170 static int setup_apcb00(struct kvm_vcpu
*vcpu
, unsigned long *apcb_s
,
171 unsigned long crycb_gpa
, unsigned long *apcb_h
)
173 unsigned long apcb_gpa
;
175 apcb_gpa
= crycb_gpa
+ offsetof(struct kvm_s390_crypto_cb
, apcb0
);
177 if (read_guest_real(vcpu
, apcb_gpa
, apcb_s
,
178 sizeof(struct kvm_s390_apcb0
)))
181 bitmap_and(apcb_s
, apcb_s
, apcb_h
,
182 BITS_PER_BYTE
* sizeof(struct kvm_s390_apcb0
));
188 * setup_apcb11 - Copy the FORMAT1 APCB from the guest to the shadow CRYCB
189 * @vcpu: pointer to the virtual CPU
190 * @apcb_s: pointer to start of apcb in the shadow crycb
191 * @crycb_gpa: guest physical address to start of original guest crycb
192 * @apcb_h: pointer to start of apcb in the host
194 * Returns 0 and -EFAULT on error reading guest apcb
196 static int setup_apcb11(struct kvm_vcpu
*vcpu
, unsigned long *apcb_s
,
197 unsigned long crycb_gpa
,
198 unsigned long *apcb_h
)
200 unsigned long apcb_gpa
;
202 apcb_gpa
= crycb_gpa
+ offsetof(struct kvm_s390_crypto_cb
, apcb1
);
204 if (read_guest_real(vcpu
, apcb_gpa
, apcb_s
,
205 sizeof(struct kvm_s390_apcb1
)))
208 bitmap_and(apcb_s
, apcb_s
, apcb_h
,
209 BITS_PER_BYTE
* sizeof(struct kvm_s390_apcb1
));
215 * setup_apcb - Create a shadow copy of the apcb.
216 * @vcpu: pointer to the virtual CPU
217 * @crycb_s: pointer to shadow crycb
218 * @crycb_gpa: guest physical address of original guest crycb
219 * @crycb_h: pointer to the host crycb
220 * @fmt_o: format of the original guest crycb.
221 * @fmt_h: format of the host crycb.
223 * Checks the compatibility between the guest and host crycb and calls the
224 * appropriate copy function.
226 * Return 0 or an error number if the guest and host crycb are incompatible.
228 static int setup_apcb(struct kvm_vcpu
*vcpu
, struct kvm_s390_crypto_cb
*crycb_s
,
230 struct kvm_s390_crypto_cb
*crycb_h
,
231 int fmt_o
, int fmt_h
)
235 if ((crycb_gpa
& PAGE_MASK
) != ((crycb_gpa
+ 256) & PAGE_MASK
))
237 if (fmt_h
!= CRYCB_FORMAT2
)
239 return setup_apcb11(vcpu
, (unsigned long *)&crycb_s
->apcb1
,
241 (unsigned long *)&crycb_h
->apcb1
);
245 return setup_apcb10(vcpu
, &crycb_s
->apcb1
,
249 return setup_apcb00(vcpu
,
250 (unsigned long *) &crycb_s
->apcb0
,
252 (unsigned long *) &crycb_h
->apcb0
);
256 if ((crycb_gpa
& PAGE_MASK
) != ((crycb_gpa
+ 32) & PAGE_MASK
))
261 return setup_apcb10(vcpu
, &crycb_s
->apcb1
,
266 return setup_apcb00(vcpu
,
267 (unsigned long *) &crycb_s
->apcb0
,
269 (unsigned long *) &crycb_h
->apcb0
);
276 * shadow_crycb - Create a shadow copy of the crycb block
277 * @vcpu: a pointer to the virtual CPU
278 * @vsie_page: a pointer to internal date used for the vSIE
280 * Create a shadow copy of the crycb block and setup key wrapping, if
281 * requested for guest 3 and enabled for guest 2.
283 * We accept format-1 or format-2, but we convert format-1 into format-2
284 * in the shadow CRYCB.
285 * Using format-2 enables the firmware to choose the right format when
286 * scheduling the SIE.
287 * There is nothing to do for format-0.
289 * This function centralize the issuing of set_validity_icpt() for all
290 * the subfunctions working on the crycb.
292 * Returns: - 0 if shadowed or nothing to do
293 * - > 0 if control has to be given to guest 2
295 static int shadow_crycb(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
297 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
298 struct kvm_s390_sie_block
*scb_o
= vsie_page
->scb_o
;
299 const uint32_t crycbd_o
= READ_ONCE(scb_o
->crycbd
);
300 const u32 crycb_addr
= crycbd_o
& 0x7ffffff8U
;
301 unsigned long *b1
, *b2
;
306 int key_msk
= test_kvm_facility(vcpu
->kvm
, 76);
307 int fmt_o
= crycbd_o
& CRYCB_FORMAT_MASK
;
308 int fmt_h
= vcpu
->arch
.sie_block
->crycbd
& CRYCB_FORMAT_MASK
;
313 apie_h
= vcpu
->arch
.sie_block
->eca
& ECA_APIE
;
314 apie_s
= apie_h
& scb_o
->eca
;
315 if (!apie_s
&& (!key_msk
|| (fmt_o
== CRYCB_FORMAT0
)))
319 return set_validity_icpt(scb_s
, 0x0039U
);
321 if (fmt_o
== CRYCB_FORMAT1
)
322 if ((crycb_addr
& PAGE_MASK
) !=
323 ((crycb_addr
+ 128) & PAGE_MASK
))
324 return set_validity_icpt(scb_s
, 0x003CU
);
327 ret
= setup_apcb(vcpu
, &vsie_page
->crycb
, crycb_addr
,
328 vcpu
->kvm
->arch
.crypto
.crycb
,
332 scb_s
->eca
|= scb_o
->eca
& ECA_APIE
;
335 /* we may only allow it if enabled for guest 2 */
336 ecb3_flags
= scb_o
->ecb3
& vcpu
->arch
.sie_block
->ecb3
&
337 (ECB3_AES
| ECB3_DEA
);
338 ecd_flags
= scb_o
->ecd
& vcpu
->arch
.sie_block
->ecd
& ECD_ECC
;
339 if (!ecb3_flags
&& !ecd_flags
)
342 /* copy only the wrapping keys */
343 if (read_guest_real(vcpu
, crycb_addr
+ 72,
344 vsie_page
->crycb
.dea_wrapping_key_mask
, 56))
345 return set_validity_icpt(scb_s
, 0x0035U
);
347 scb_s
->ecb3
|= ecb3_flags
;
348 scb_s
->ecd
|= ecd_flags
;
350 /* xor both blocks in one run */
351 b1
= (unsigned long *) vsie_page
->crycb
.dea_wrapping_key_mask
;
352 b2
= (unsigned long *)
353 vcpu
->kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
;
354 /* as 56%8 == 0, bitmap_xor won't overwrite any data */
355 bitmap_xor(b1
, b1
, b2
, BITS_PER_BYTE
* 56);
359 return set_validity_icpt(scb_s
, 0x0022U
);
361 return set_validity_icpt(scb_s
, 0x0035U
);
363 return set_validity_icpt(scb_s
, 0x003CU
);
365 scb_s
->crycbd
= ((__u32
)(__u64
) &vsie_page
->crycb
) | CRYCB_FORMAT2
;
369 /* shadow (round up/down) the ibc to avoid validity icpt */
370 static void prepare_ibc(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
372 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
373 struct kvm_s390_sie_block
*scb_o
= vsie_page
->scb_o
;
374 /* READ_ONCE does not work on bitfields - use a temporary variable */
375 const uint32_t __new_ibc
= scb_o
->ibc
;
376 const uint32_t new_ibc
= READ_ONCE(__new_ibc
) & 0x0fffU
;
377 __u64 min_ibc
= (sclp
.ibc
>> 16) & 0x0fffU
;
380 /* ibc installed in g2 and requested for g3 */
381 if (vcpu
->kvm
->arch
.model
.ibc
&& new_ibc
) {
382 scb_s
->ibc
= new_ibc
;
383 /* takte care of the minimum ibc level of the machine */
384 if (scb_s
->ibc
< min_ibc
)
385 scb_s
->ibc
= min_ibc
;
386 /* take care of the maximum ibc level set for the guest */
387 if (scb_s
->ibc
> vcpu
->kvm
->arch
.model
.ibc
)
388 scb_s
->ibc
= vcpu
->kvm
->arch
.model
.ibc
;
392 /* unshadow the scb, copying parameters back to the real scb */
393 static void unshadow_scb(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
395 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
396 struct kvm_s390_sie_block
*scb_o
= vsie_page
->scb_o
;
399 scb_o
->icptcode
= scb_s
->icptcode
;
400 scb_o
->icptstatus
= scb_s
->icptstatus
;
401 scb_o
->ipa
= scb_s
->ipa
;
402 scb_o
->ipb
= scb_s
->ipb
;
403 scb_o
->gbea
= scb_s
->gbea
;
406 scb_o
->cputm
= scb_s
->cputm
;
407 scb_o
->ckc
= scb_s
->ckc
;
408 scb_o
->todpr
= scb_s
->todpr
;
411 scb_o
->gpsw
= scb_s
->gpsw
;
412 scb_o
->gg14
= scb_s
->gg14
;
413 scb_o
->gg15
= scb_s
->gg15
;
414 memcpy(scb_o
->gcr
, scb_s
->gcr
, 128);
415 scb_o
->pp
= scb_s
->pp
;
417 /* branch prediction */
418 if (test_kvm_facility(vcpu
->kvm
, 82)) {
419 scb_o
->fpf
&= ~FPF_BPBC
;
420 scb_o
->fpf
|= scb_s
->fpf
& FPF_BPBC
;
423 /* interrupt intercept */
424 switch (scb_s
->icptcode
) {
428 memcpy((void *)((u64
)scb_o
+ 0xc0),
429 (void *)((u64
)scb_s
+ 0xc0), 0xf0 - 0xc0);
433 if (scb_s
->ihcpu
!= 0xffffU
)
434 scb_o
->ihcpu
= scb_s
->ihcpu
;
438 * Setup the shadow scb by copying and checking the relevant parts of the g2
441 * Returns: - 0 if the scb has been shadowed
442 * - > 0 if control has to be given to guest 2
444 static int shadow_scb(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
446 struct kvm_s390_sie_block
*scb_o
= vsie_page
->scb_o
;
447 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
448 /* READ_ONCE does not work on bitfields - use a temporary variable */
449 const uint32_t __new_prefix
= scb_o
->prefix
;
450 const uint32_t new_prefix
= READ_ONCE(__new_prefix
);
451 const bool wants_tx
= READ_ONCE(scb_o
->ecb
) & ECB_TE
;
452 bool had_tx
= scb_s
->ecb
& ECB_TE
;
453 unsigned long new_mso
= 0;
456 /* make sure we don't have any leftovers when reusing the scb */
466 rc
= prepare_cpuflags(vcpu
, vsie_page
);
471 scb_s
->cputm
= scb_o
->cputm
;
472 scb_s
->ckc
= scb_o
->ckc
;
473 scb_s
->todpr
= scb_o
->todpr
;
474 scb_s
->epoch
= scb_o
->epoch
;
477 scb_s
->gpsw
= scb_o
->gpsw
;
478 scb_s
->gg14
= scb_o
->gg14
;
479 scb_s
->gg15
= scb_o
->gg15
;
480 memcpy(scb_s
->gcr
, scb_o
->gcr
, 128);
481 scb_s
->pp
= scb_o
->pp
;
483 /* interception / execution handling */
484 scb_s
->gbea
= scb_o
->gbea
;
485 scb_s
->lctl
= scb_o
->lctl
;
486 scb_s
->svcc
= scb_o
->svcc
;
487 scb_s
->ictl
= scb_o
->ictl
;
489 * SKEY handling functions can't deal with false setting of PTE invalid
490 * bits. Therefore we cannot provide interpretation and would later
491 * have to provide own emulation handlers.
493 if (!(atomic_read(&scb_s
->cpuflags
) & CPUSTAT_KSS
))
494 scb_s
->ictl
|= ICTL_ISKE
| ICTL_SSKE
| ICTL_RRBE
;
496 scb_s
->icpua
= scb_o
->icpua
;
498 if (!(atomic_read(&scb_s
->cpuflags
) & CPUSTAT_SM
))
499 new_mso
= READ_ONCE(scb_o
->mso
) & 0xfffffffffff00000UL
;
500 /* if the hva of the prefix changes, we have to remap the prefix */
501 if (scb_s
->mso
!= new_mso
|| scb_s
->prefix
!= new_prefix
)
502 prefix_unmapped(vsie_page
);
503 /* SIE will do mso/msl validity and exception checks for us */
504 scb_s
->msl
= scb_o
->msl
& 0xfffffffffff00000UL
;
505 scb_s
->mso
= new_mso
;
506 scb_s
->prefix
= new_prefix
;
508 /* We have to definitely flush the tlb if this scb never ran */
509 if (scb_s
->ihcpu
!= 0xffffU
)
510 scb_s
->ihcpu
= scb_o
->ihcpu
;
512 /* MVPG and Protection Exception Interpretation are always available */
513 scb_s
->eca
|= scb_o
->eca
& (ECA_MVPGI
| ECA_PROTEXCI
);
514 /* Host-protection-interruption introduced with ESOP */
515 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_ESOP
))
516 scb_s
->ecb
|= scb_o
->ecb
& ECB_HOSTPROTINT
;
519 * This facility only uses the utility field of the SCA and none of
520 * the cpu entries that are problematic with the other interpretation
521 * facilities so we can pass it through
523 if (test_kvm_facility(vcpu
->kvm
, 11))
524 scb_s
->ecb
|= scb_o
->ecb
& ECB_PTF
;
525 /* transactional execution */
526 if (test_kvm_facility(vcpu
->kvm
, 73) && wants_tx
) {
527 /* remap the prefix is tx is toggled on */
529 prefix_unmapped(vsie_page
);
530 scb_s
->ecb
|= ECB_TE
;
532 /* specification exception interpretation */
533 scb_s
->ecb
|= scb_o
->ecb
& ECB_SPECI
;
534 /* branch prediction */
535 if (test_kvm_facility(vcpu
->kvm
, 82))
536 scb_s
->fpf
|= scb_o
->fpf
& FPF_BPBC
;
538 if (test_kvm_facility(vcpu
->kvm
, 129)) {
539 scb_s
->eca
|= scb_o
->eca
& ECA_VX
;
540 scb_s
->ecd
|= scb_o
->ecd
& ECD_HOSTREGMGMT
;
542 /* Run-time-Instrumentation */
543 if (test_kvm_facility(vcpu
->kvm
, 64))
544 scb_s
->ecb3
|= scb_o
->ecb3
& ECB3_RI
;
545 /* Instruction Execution Prevention */
546 if (test_kvm_facility(vcpu
->kvm
, 130))
547 scb_s
->ecb2
|= scb_o
->ecb2
& ECB2_IEP
;
548 /* Guarded Storage */
549 if (test_kvm_facility(vcpu
->kvm
, 133)) {
550 scb_s
->ecb
|= scb_o
->ecb
& ECB_GS
;
551 scb_s
->ecd
|= scb_o
->ecd
& ECD_HOSTREGMGMT
;
553 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_SIIF
))
554 scb_s
->eca
|= scb_o
->eca
& ECA_SII
;
555 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_IB
))
556 scb_s
->eca
|= scb_o
->eca
& ECA_IB
;
557 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_CEI
))
558 scb_s
->eca
|= scb_o
->eca
& ECA_CEI
;
559 /* Epoch Extension */
560 if (test_kvm_facility(vcpu
->kvm
, 139)) {
561 scb_s
->ecd
|= scb_o
->ecd
& ECD_MEF
;
562 scb_s
->epdx
= scb_o
->epdx
;
566 if (test_kvm_facility(vcpu
->kvm
, 156))
567 scb_s
->ecd
|= scb_o
->ecd
& ECD_ETOKENF
;
569 scb_s
->hpid
= HPID_VSIE
;
570 scb_s
->cpnc
= scb_o
->cpnc
;
572 prepare_ibc(vcpu
, vsie_page
);
573 rc
= shadow_crycb(vcpu
, vsie_page
);
576 unshadow_scb(vcpu
, vsie_page
);
580 void kvm_s390_vsie_gmap_notifier(struct gmap
*gmap
, unsigned long start
,
583 struct kvm
*kvm
= gmap
->private;
584 struct vsie_page
*cur
;
585 unsigned long prefix
;
589 if (!gmap_is_shadow(gmap
))
592 * Only new shadow blocks are added to the list during runtime,
593 * therefore we can safely reference them all the time.
595 for (i
= 0; i
< kvm
->arch
.vsie
.page_count
; i
++) {
596 page
= READ_ONCE(kvm
->arch
.vsie
.pages
[i
]);
599 cur
= page_to_virt(page
);
600 if (READ_ONCE(cur
->gmap
) != gmap
)
602 prefix
= cur
->scb_s
.prefix
<< GUEST_PREFIX_SHIFT
;
603 /* with mso/msl, the prefix lies at an offset */
604 prefix
+= cur
->scb_s
.mso
;
605 if (prefix
<= end
&& start
<= prefix
+ 2 * PAGE_SIZE
- 1)
606 prefix_unmapped_sync(cur
);
611 * Map the first prefix page and if tx is enabled also the second prefix page.
613 * The prefix will be protected, a gmap notifier will inform about unmaps.
614 * The shadow scb must not be executed until the prefix is remapped, this is
615 * guaranteed by properly handling PROG_REQUEST.
617 * Returns: - 0 on if successfully mapped or already mapped
618 * - > 0 if control has to be given to guest 2
619 * - -EAGAIN if the caller can retry immediately
620 * - -ENOMEM if out of memory
622 static int map_prefix(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
624 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
625 u64 prefix
= scb_s
->prefix
<< GUEST_PREFIX_SHIFT
;
628 if (prefix_is_mapped(vsie_page
))
631 /* mark it as mapped so we can catch any concurrent unmappers */
632 prefix_mapped(vsie_page
);
634 /* with mso/msl, the prefix lies at offset *mso* */
635 prefix
+= scb_s
->mso
;
637 rc
= kvm_s390_shadow_fault(vcpu
, vsie_page
->gmap
, prefix
, NULL
);
638 if (!rc
&& (scb_s
->ecb
& ECB_TE
))
639 rc
= kvm_s390_shadow_fault(vcpu
, vsie_page
->gmap
,
640 prefix
+ PAGE_SIZE
, NULL
);
642 * We don't have to mprotect, we will be called for all unshadows.
643 * SIE will detect if protection applies and trigger a validity.
646 prefix_unmapped(vsie_page
);
647 if (rc
> 0 || rc
== -EFAULT
)
648 rc
= set_validity_icpt(scb_s
, 0x0037U
);
653 * Pin the guest page given by gpa and set hpa to the pinned host address.
654 * Will always be pinned writable.
656 * Returns: - 0 on success
657 * - -EINVAL if the gpa is not valid guest storage
659 static int pin_guest_page(struct kvm
*kvm
, gpa_t gpa
, hpa_t
*hpa
)
663 page
= gfn_to_page(kvm
, gpa_to_gfn(gpa
));
664 if (is_error_page(page
))
666 *hpa
= (hpa_t
)page_to_phys(page
) + (gpa
& ~PAGE_MASK
);
670 /* Unpins a page previously pinned via pin_guest_page, marking it as dirty. */
671 static void unpin_guest_page(struct kvm
*kvm
, gpa_t gpa
, hpa_t hpa
)
673 kvm_release_pfn_dirty(hpa
>> PAGE_SHIFT
);
674 /* mark the page always as dirty for migration */
675 mark_page_dirty(kvm
, gpa_to_gfn(gpa
));
678 /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
679 static void unpin_blocks(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
681 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
684 hpa
= (u64
) scb_s
->scaoh
<< 32 | scb_s
->scaol
;
686 unpin_guest_page(vcpu
->kvm
, vsie_page
->sca_gpa
, hpa
);
687 vsie_page
->sca_gpa
= 0;
694 unpin_guest_page(vcpu
->kvm
, vsie_page
->itdba_gpa
, hpa
);
695 vsie_page
->itdba_gpa
= 0;
701 unpin_guest_page(vcpu
->kvm
, vsie_page
->gvrd_gpa
, hpa
);
702 vsie_page
->gvrd_gpa
= 0;
708 unpin_guest_page(vcpu
->kvm
, vsie_page
->riccbd_gpa
, hpa
);
709 vsie_page
->riccbd_gpa
= 0;
715 unpin_guest_page(vcpu
->kvm
, vsie_page
->sdnx_gpa
, hpa
);
716 vsie_page
->sdnx_gpa
= 0;
722 * Instead of shadowing some blocks, we can simply forward them because the
723 * addresses in the scb are 64 bit long.
725 * This works as long as the data lies in one page. If blocks ever exceed one
726 * page, we have to fall back to shadowing.
728 * As we reuse the sca, the vcpu pointers contained in it are invalid. We must
729 * therefore not enable any facilities that access these pointers (e.g. SIGPIF).
731 * Returns: - 0 if all blocks were pinned.
732 * - > 0 if control has to be given to guest 2
733 * - -ENOMEM if out of memory
735 static int pin_blocks(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
737 struct kvm_s390_sie_block
*scb_o
= vsie_page
->scb_o
;
738 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
743 gpa
= READ_ONCE(scb_o
->scaol
) & ~0xfUL
;
744 if (test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_64BSCAO
))
745 gpa
|= (u64
) READ_ONCE(scb_o
->scaoh
) << 32;
747 if (gpa
< 2 * PAGE_SIZE
)
748 rc
= set_validity_icpt(scb_s
, 0x0038U
);
749 else if ((gpa
& ~0x1fffUL
) == kvm_s390_get_prefix(vcpu
))
750 rc
= set_validity_icpt(scb_s
, 0x0011U
);
751 else if ((gpa
& PAGE_MASK
) !=
752 ((gpa
+ sizeof(struct bsca_block
) - 1) & PAGE_MASK
))
753 rc
= set_validity_icpt(scb_s
, 0x003bU
);
755 rc
= pin_guest_page(vcpu
->kvm
, gpa
, &hpa
);
757 rc
= set_validity_icpt(scb_s
, 0x0034U
);
761 vsie_page
->sca_gpa
= gpa
;
762 scb_s
->scaoh
= (u32
)((u64
)hpa
>> 32);
763 scb_s
->scaol
= (u32
)(u64
)hpa
;
766 gpa
= READ_ONCE(scb_o
->itdba
) & ~0xffUL
;
767 if (gpa
&& (scb_s
->ecb
& ECB_TE
)) {
768 if (gpa
< 2 * PAGE_SIZE
) {
769 rc
= set_validity_icpt(scb_s
, 0x0080U
);
772 /* 256 bytes cannot cross page boundaries */
773 rc
= pin_guest_page(vcpu
->kvm
, gpa
, &hpa
);
775 rc
= set_validity_icpt(scb_s
, 0x0080U
);
778 vsie_page
->itdba_gpa
= gpa
;
782 gpa
= READ_ONCE(scb_o
->gvrd
) & ~0x1ffUL
;
783 if (gpa
&& (scb_s
->eca
& ECA_VX
) && !(scb_s
->ecd
& ECD_HOSTREGMGMT
)) {
784 if (gpa
< 2 * PAGE_SIZE
) {
785 rc
= set_validity_icpt(scb_s
, 0x1310U
);
789 * 512 bytes vector registers cannot cross page boundaries
790 * if this block gets bigger, we have to shadow it.
792 rc
= pin_guest_page(vcpu
->kvm
, gpa
, &hpa
);
794 rc
= set_validity_icpt(scb_s
, 0x1310U
);
797 vsie_page
->gvrd_gpa
= gpa
;
801 gpa
= READ_ONCE(scb_o
->riccbd
) & ~0x3fUL
;
802 if (gpa
&& (scb_s
->ecb3
& ECB3_RI
)) {
803 if (gpa
< 2 * PAGE_SIZE
) {
804 rc
= set_validity_icpt(scb_s
, 0x0043U
);
807 /* 64 bytes cannot cross page boundaries */
808 rc
= pin_guest_page(vcpu
->kvm
, gpa
, &hpa
);
810 rc
= set_validity_icpt(scb_s
, 0x0043U
);
813 /* Validity 0x0044 will be checked by SIE */
814 vsie_page
->riccbd_gpa
= gpa
;
817 if (((scb_s
->ecb
& ECB_GS
) && !(scb_s
->ecd
& ECD_HOSTREGMGMT
)) ||
818 (scb_s
->ecd
& ECD_ETOKENF
)) {
821 gpa
= READ_ONCE(scb_o
->sdnxo
) & ~0xfUL
;
822 sdnxc
= READ_ONCE(scb_o
->sdnxo
) & 0xfUL
;
823 if (!gpa
|| gpa
< 2 * PAGE_SIZE
) {
824 rc
= set_validity_icpt(scb_s
, 0x10b0U
);
827 if (sdnxc
< 6 || sdnxc
> 12) {
828 rc
= set_validity_icpt(scb_s
, 0x10b1U
);
831 if (gpa
& ((1 << sdnxc
) - 1)) {
832 rc
= set_validity_icpt(scb_s
, 0x10b2U
);
835 /* Due to alignment rules (checked above) this cannot
836 * cross page boundaries
838 rc
= pin_guest_page(vcpu
->kvm
, gpa
, &hpa
);
840 rc
= set_validity_icpt(scb_s
, 0x10b0U
);
843 vsie_page
->sdnx_gpa
= gpa
;
844 scb_s
->sdnxo
= hpa
| sdnxc
;
848 unpin_blocks(vcpu
, vsie_page
);
852 /* unpin the scb provided by guest 2, marking it as dirty */
853 static void unpin_scb(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
,
856 hpa_t hpa
= (hpa_t
) vsie_page
->scb_o
;
859 unpin_guest_page(vcpu
->kvm
, gpa
, hpa
);
860 vsie_page
->scb_o
= NULL
;
864 * Pin the scb at gpa provided by guest 2 at vsie_page->scb_o.
866 * Returns: - 0 if the scb was pinned.
867 * - > 0 if control has to be given to guest 2
869 static int pin_scb(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
,
875 rc
= pin_guest_page(vcpu
->kvm
, gpa
, &hpa
);
877 rc
= kvm_s390_inject_program_int(vcpu
, PGM_ADDRESSING
);
881 vsie_page
->scb_o
= phys_to_virt(hpa
);
886 * Inject a fault into guest 2.
888 * Returns: - > 0 if control has to be given to guest 2
889 * < 0 if an error occurred during injection.
891 static int inject_fault(struct kvm_vcpu
*vcpu
, __u16 code
, __u64 vaddr
,
894 struct kvm_s390_pgm_info pgm
= {
897 /* 0-51: virtual address */
898 (vaddr
& 0xfffffffffffff000UL
) |
899 /* 52-53: store / fetch */
900 (((unsigned int) !write_flag
) + 1) << 10,
901 /* 62-63: asce id (always primary == 0) */
902 .exc_access_id
= 0, /* always primary */
903 .op_access_id
= 0, /* not MVPG */
907 if (code
== PGM_PROTECTION
)
908 pgm
.trans_exc_code
|= 0x4UL
;
910 rc
= kvm_s390_inject_prog_irq(vcpu
, &pgm
);
915 * Handle a fault during vsie execution on a gmap shadow.
917 * Returns: - 0 if the fault was resolved
918 * - > 0 if control has to be given to guest 2
919 * - < 0 if an error occurred
921 static int handle_fault(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
925 if (current
->thread
.gmap_int_code
== PGM_PROTECTION
)
926 /* we can directly forward all protection exceptions */
927 return inject_fault(vcpu
, PGM_PROTECTION
,
928 current
->thread
.gmap_addr
, 1);
930 rc
= kvm_s390_shadow_fault(vcpu
, vsie_page
->gmap
,
931 current
->thread
.gmap_addr
, NULL
);
933 rc
= inject_fault(vcpu
, rc
,
934 current
->thread
.gmap_addr
,
935 current
->thread
.gmap_write_flag
);
937 vsie_page
->fault_addr
= current
->thread
.gmap_addr
;
943 * Retry the previous fault that required guest 2 intervention. This avoids
944 * one superfluous SIE re-entry and direct exit.
946 * Will ignore any errors. The next SIE fault will do proper fault handling.
948 static void handle_last_fault(struct kvm_vcpu
*vcpu
,
949 struct vsie_page
*vsie_page
)
951 if (vsie_page
->fault_addr
)
952 kvm_s390_shadow_fault(vcpu
, vsie_page
->gmap
,
953 vsie_page
->fault_addr
, NULL
);
954 vsie_page
->fault_addr
= 0;
957 static inline void clear_vsie_icpt(struct vsie_page
*vsie_page
)
959 vsie_page
->scb_s
.icptcode
= 0;
962 /* rewind the psw and clear the vsie icpt, so we can retry execution */
963 static void retry_vsie_icpt(struct vsie_page
*vsie_page
)
965 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
966 int ilen
= insn_length(scb_s
->ipa
>> 8);
968 /* take care of EXECUTE instructions */
969 if (scb_s
->icptstatus
& 1) {
970 ilen
= (scb_s
->icptstatus
>> 4) & 0x6;
974 scb_s
->gpsw
.addr
= __rewind_psw(scb_s
->gpsw
, ilen
);
975 clear_vsie_icpt(vsie_page
);
979 * Try to shadow + enable the guest 2 provided facility list.
980 * Retry instruction execution if enabled for and provided by guest 2.
982 * Returns: - 0 if handled (retry or guest 2 icpt)
983 * - > 0 if control has to be given to guest 2
985 static int handle_stfle(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
987 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
988 __u32 fac
= READ_ONCE(vsie_page
->scb_o
->fac
);
991 * Alternate-STFLE-Interpretive-Execution facilities are not supported
994 if (fac
&& test_kvm_facility(vcpu
->kvm
, 7)) {
995 retry_vsie_icpt(vsie_page
);
997 * The facility list origin (FLO) is in bits 1 - 28 of the FLD
998 * so we need to mask here before reading.
1000 fac
= fac
& 0x7ffffff8U
;
1002 * format-0 -> size of nested guest's facility list == guest's size
1003 * guest's size == host's size, since STFLE is interpretatively executed
1004 * using a format-0 for the guest, too.
1006 if (read_guest_real(vcpu
, fac
, &vsie_page
->fac
,
1007 stfle_size() * sizeof(u64
)))
1008 return set_validity_icpt(scb_s
, 0x1090U
);
1009 scb_s
->fac
= (__u32
)(__u64
) &vsie_page
->fac
;
1015 * Get a register for a nested guest.
1016 * @vcpu the vcpu of the guest
1017 * @vsie_page the vsie_page for the nested guest
1018 * @reg the register number, the upper 4 bits are ignored.
1019 * returns: the value of the register.
1021 static u64
vsie_get_register(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
, u8 reg
)
1023 /* no need to validate the parameter and/or perform error handling */
1027 return vsie_page
->scb_s
.gg15
;
1029 return vsie_page
->scb_s
.gg14
;
1031 return vcpu
->run
->s
.regs
.gprs
[reg
];
1035 static int vsie_handle_mvpg(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
1037 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
1038 unsigned long pei_dest
, pei_src
, src
, dest
, mask
, prefix
;
1039 u64
*pei_block
= &vsie_page
->scb_o
->mcic
;
1040 int edat
, rc_dest
, rc_src
;
1043 cr0
.val
= vcpu
->arch
.sie_block
->gcr
[0];
1044 edat
= cr0
.edat
&& test_kvm_facility(vcpu
->kvm
, 8);
1045 mask
= _kvm_s390_logical_to_effective(&scb_s
->gpsw
, PAGE_MASK
);
1046 prefix
= scb_s
->prefix
<< GUEST_PREFIX_SHIFT
;
1048 dest
= vsie_get_register(vcpu
, vsie_page
, scb_s
->ipb
>> 20) & mask
;
1049 dest
= _kvm_s390_real_to_abs(prefix
, dest
) + scb_s
->mso
;
1050 src
= vsie_get_register(vcpu
, vsie_page
, scb_s
->ipb
>> 16) & mask
;
1051 src
= _kvm_s390_real_to_abs(prefix
, src
) + scb_s
->mso
;
1053 rc_dest
= kvm_s390_shadow_fault(vcpu
, vsie_page
->gmap
, dest
, &pei_dest
);
1054 rc_src
= kvm_s390_shadow_fault(vcpu
, vsie_page
->gmap
, src
, &pei_src
);
1056 * Either everything went well, or something non-critical went wrong
1057 * e.g. because of a race. In either case, simply retry.
1059 if (rc_dest
== -EAGAIN
|| rc_src
== -EAGAIN
|| (!rc_dest
&& !rc_src
)) {
1060 retry_vsie_icpt(vsie_page
);
1063 /* Something more serious went wrong, propagate the error */
1069 /* The only possible suppressing exception: just deliver it */
1070 if (rc_dest
== PGM_TRANSLATION_SPEC
|| rc_src
== PGM_TRANSLATION_SPEC
) {
1071 clear_vsie_icpt(vsie_page
);
1072 rc_dest
= kvm_s390_inject_program_int(vcpu
, PGM_TRANSLATION_SPEC
);
1073 WARN_ON_ONCE(rc_dest
);
1078 * Forward the PEI intercept to the guest if it was a page fault, or
1079 * also for segment and region table faults if EDAT applies.
1082 rc_dest
= rc_dest
== PGM_ASCE_TYPE
? rc_dest
: 0;
1083 rc_src
= rc_src
== PGM_ASCE_TYPE
? rc_src
: 0;
1085 rc_dest
= rc_dest
!= PGM_PAGE_TRANSLATION
? rc_dest
: 0;
1086 rc_src
= rc_src
!= PGM_PAGE_TRANSLATION
? rc_src
: 0;
1088 if (!rc_dest
&& !rc_src
) {
1089 pei_block
[0] = pei_dest
;
1090 pei_block
[1] = pei_src
;
1094 retry_vsie_icpt(vsie_page
);
1097 * The host has edat, and the guest does not, or it was an ASCE type
1098 * exception. The host needs to inject the appropriate DAT interrupts
1102 return inject_fault(vcpu
, rc_dest
, dest
, 1);
1103 return inject_fault(vcpu
, rc_src
, src
, 0);
1107 * Run the vsie on a shadow scb and a shadow gmap, without any further
1108 * sanity checks, handling SIE faults.
1110 * Returns: - 0 everything went fine
1111 * - > 0 if control has to be given to guest 2
1112 * - < 0 if an error occurred
1114 static int do_vsie_run(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
1115 __releases(vcpu
->kvm
->srcu
)
1116 __acquires(vcpu
->kvm
->srcu
)
1118 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
1119 struct kvm_s390_sie_block
*scb_o
= vsie_page
->scb_o
;
1120 int guest_bp_isolation
;
1123 handle_last_fault(vcpu
, vsie_page
);
1125 kvm_vcpu_srcu_read_unlock(vcpu
);
1127 /* save current guest state of bp isolation override */
1128 guest_bp_isolation
= test_thread_flag(TIF_ISOLATE_BP_GUEST
);
1131 * The guest is running with BPBC, so we have to force it on for our
1132 * nested guest. This is done by enabling BPBC globally, so the BPBC
1133 * control in the SCB (which the nested guest can modify) is simply
1136 if (test_kvm_facility(vcpu
->kvm
, 82) &&
1137 vcpu
->arch
.sie_block
->fpf
& FPF_BPBC
)
1138 set_thread_flag(TIF_ISOLATE_BP_GUEST
);
1140 local_irq_disable();
1141 guest_enter_irqoff();
1145 * Simulate a SIE entry of the VCPU (see sie64a), so VCPU blocking
1146 * and VCPU requests also hinder the vSIE from running and lead
1147 * to an immediate exit. kvm_s390_vsie_kick() has to be used to
1148 * also kick the vSIE.
1150 vcpu
->arch
.sie_block
->prog0c
|= PROG_IN_SIE
;
1152 if (test_cpu_flag(CIF_FPU
))
1154 if (!kvm_s390_vcpu_sie_inhibited(vcpu
))
1155 rc
= sie64a(scb_s
, vcpu
->run
->s
.regs
.gprs
);
1157 vcpu
->arch
.sie_block
->prog0c
&= ~PROG_IN_SIE
;
1159 local_irq_disable();
1160 guest_exit_irqoff();
1163 /* restore guest state for bp isolation override */
1164 if (!guest_bp_isolation
)
1165 clear_thread_flag(TIF_ISOLATE_BP_GUEST
);
1167 kvm_vcpu_srcu_read_lock(vcpu
);
1170 VCPU_EVENT(vcpu
, 3, "%s", "machine check");
1171 kvm_s390_reinject_machine_check(vcpu
, &vsie_page
->mcck_info
);
1176 rc
= 0; /* we could still have an icpt */
1177 else if (rc
== -EFAULT
)
1178 return handle_fault(vcpu
, vsie_page
);
1180 switch (scb_s
->icptcode
) {
1182 if (scb_s
->ipa
== 0xb2b0)
1183 rc
= handle_stfle(vcpu
, vsie_page
);
1186 /* stop not requested by g2 - must have been a kick */
1187 if (!(atomic_read(&scb_o
->cpuflags
) & CPUSTAT_STOP_INT
))
1188 clear_vsie_icpt(vsie_page
);
1191 if ((scb_s
->ipa
& 0xf000) != 0xf000)
1192 scb_s
->ipa
+= 0x1000;
1195 if (scb_s
->ipa
== 0xb254)
1196 rc
= vsie_handle_mvpg(vcpu
, vsie_page
);
1202 static void release_gmap_shadow(struct vsie_page
*vsie_page
)
1204 if (vsie_page
->gmap
)
1205 gmap_put(vsie_page
->gmap
);
1206 WRITE_ONCE(vsie_page
->gmap
, NULL
);
1207 prefix_unmapped(vsie_page
);
1210 static int acquire_gmap_shadow(struct kvm_vcpu
*vcpu
,
1211 struct vsie_page
*vsie_page
)
1218 asce
= vcpu
->arch
.sie_block
->gcr
[1];
1219 cr0
.val
= vcpu
->arch
.sie_block
->gcr
[0];
1220 edat
= cr0
.edat
&& test_kvm_facility(vcpu
->kvm
, 8);
1221 edat
+= edat
&& test_kvm_facility(vcpu
->kvm
, 78);
1224 * ASCE or EDAT could have changed since last icpt, or the gmap
1225 * we're holding has been unshadowed. If the gmap is still valid,
1226 * we can safely reuse it.
1228 if (vsie_page
->gmap
&& gmap_shadow_valid(vsie_page
->gmap
, asce
, edat
)) {
1229 vcpu
->kvm
->stat
.gmap_shadow_reuse
++;
1233 /* release the old shadow - if any, and mark the prefix as unmapped */
1234 release_gmap_shadow(vsie_page
);
1235 gmap
= gmap_shadow(vcpu
->arch
.gmap
, asce
, edat
);
1237 return PTR_ERR(gmap
);
1238 vcpu
->kvm
->stat
.gmap_shadow_create
++;
1239 WRITE_ONCE(vsie_page
->gmap
, gmap
);
1244 * Register the shadow scb at the VCPU, e.g. for kicking out of vsie.
1246 static void register_shadow_scb(struct kvm_vcpu
*vcpu
,
1247 struct vsie_page
*vsie_page
)
1249 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
1251 WRITE_ONCE(vcpu
->arch
.vsie_block
, &vsie_page
->scb_s
);
1253 * External calls have to lead to a kick of the vcpu and
1254 * therefore the vsie -> Simulate Wait state.
1256 kvm_s390_set_cpuflags(vcpu
, CPUSTAT_WAIT
);
1258 * We have to adjust the g3 epoch by the g2 epoch. The epoch will
1259 * automatically be adjusted on tod clock changes via kvm_sync_clock.
1262 scb_s
->epoch
+= vcpu
->kvm
->arch
.epoch
;
1264 if (scb_s
->ecd
& ECD_MEF
) {
1265 scb_s
->epdx
+= vcpu
->kvm
->arch
.epdx
;
1266 if (scb_s
->epoch
< vcpu
->kvm
->arch
.epoch
)
1274 * Unregister a shadow scb from a VCPU.
1276 static void unregister_shadow_scb(struct kvm_vcpu
*vcpu
)
1278 kvm_s390_clear_cpuflags(vcpu
, CPUSTAT_WAIT
);
1279 WRITE_ONCE(vcpu
->arch
.vsie_block
, NULL
);
1283 * Run the vsie on a shadowed scb, managing the gmap shadow, handling
1284 * prefix pages and faults.
1286 * Returns: - 0 if no errors occurred
1287 * - > 0 if control has to be given to guest 2
1288 * - -ENOMEM if out of memory
1290 static int vsie_run(struct kvm_vcpu
*vcpu
, struct vsie_page
*vsie_page
)
1292 struct kvm_s390_sie_block
*scb_s
= &vsie_page
->scb_s
;
1296 rc
= acquire_gmap_shadow(vcpu
, vsie_page
);
1298 rc
= map_prefix(vcpu
, vsie_page
);
1300 gmap_enable(vsie_page
->gmap
);
1301 update_intervention_requests(vsie_page
);
1302 rc
= do_vsie_run(vcpu
, vsie_page
);
1303 gmap_enable(vcpu
->arch
.gmap
);
1305 atomic_andnot(PROG_BLOCK_SIE
, &scb_s
->prog20
);
1309 if (rc
|| scb_s
->icptcode
|| signal_pending(current
) ||
1310 kvm_s390_vcpu_has_irq(vcpu
, 0) ||
1311 kvm_s390_vcpu_sie_inhibited(vcpu
))
1316 if (rc
== -EFAULT
) {
1318 * Addressing exceptions are always presentes as intercepts.
1319 * As addressing exceptions are suppressing and our guest 3 PSW
1320 * points at the responsible instruction, we have to
1321 * forward the PSW and set the ilc. If we can't read guest 3
1322 * instruction, we can use an arbitrary ilc. Let's always use
1323 * ilen = 4 for now, so we can avoid reading in guest 3 virtual
1324 * memory. (we could also fake the shadow so the hardware
1327 scb_s
->icptcode
= ICPT_PROGI
;
1328 scb_s
->iprcc
= PGM_ADDRESSING
;
1330 scb_s
->gpsw
.addr
= __rewind_psw(scb_s
->gpsw
, 4);
1337 * Get or create a vsie page for a scb address.
1339 * Returns: - address of a vsie page (cached or new one)
1340 * - NULL if the same scb address is already used by another VCPU
1341 * - ERR_PTR(-ENOMEM) if out of memory
1343 static struct vsie_page
*get_vsie_page(struct kvm
*kvm
, unsigned long addr
)
1345 struct vsie_page
*vsie_page
;
1350 page
= radix_tree_lookup(&kvm
->arch
.vsie
.addr_to_page
, addr
>> 9);
1353 if (page_ref_inc_return(page
) == 2)
1354 return page_to_virt(page
);
1359 * We want at least #online_vcpus shadows, so every VCPU can execute
1360 * the VSIE in parallel.
1362 nr_vcpus
= atomic_read(&kvm
->online_vcpus
);
1364 mutex_lock(&kvm
->arch
.vsie
.mutex
);
1365 if (kvm
->arch
.vsie
.page_count
< nr_vcpus
) {
1366 page
= alloc_page(GFP_KERNEL_ACCOUNT
| __GFP_ZERO
| GFP_DMA
);
1368 mutex_unlock(&kvm
->arch
.vsie
.mutex
);
1369 return ERR_PTR(-ENOMEM
);
1372 kvm
->arch
.vsie
.pages
[kvm
->arch
.vsie
.page_count
] = page
;
1373 kvm
->arch
.vsie
.page_count
++;
1375 /* reuse an existing entry that belongs to nobody */
1377 page
= kvm
->arch
.vsie
.pages
[kvm
->arch
.vsie
.next
];
1378 if (page_ref_inc_return(page
) == 2)
1381 kvm
->arch
.vsie
.next
++;
1382 kvm
->arch
.vsie
.next
%= nr_vcpus
;
1384 radix_tree_delete(&kvm
->arch
.vsie
.addr_to_page
, page
->index
>> 9);
1387 /* double use of the same address */
1388 if (radix_tree_insert(&kvm
->arch
.vsie
.addr_to_page
, addr
>> 9, page
)) {
1390 mutex_unlock(&kvm
->arch
.vsie
.mutex
);
1393 mutex_unlock(&kvm
->arch
.vsie
.mutex
);
1395 vsie_page
= page_to_virt(page
);
1396 memset(&vsie_page
->scb_s
, 0, sizeof(struct kvm_s390_sie_block
));
1397 release_gmap_shadow(vsie_page
);
1398 vsie_page
->fault_addr
= 0;
1399 vsie_page
->scb_s
.ihcpu
= 0xffffU
;
1403 /* put a vsie page acquired via get_vsie_page */
1404 static void put_vsie_page(struct kvm
*kvm
, struct vsie_page
*vsie_page
)
1406 struct page
*page
= pfn_to_page(__pa(vsie_page
) >> PAGE_SHIFT
);
1411 int kvm_s390_handle_vsie(struct kvm_vcpu
*vcpu
)
1413 struct vsie_page
*vsie_page
;
1414 unsigned long scb_addr
;
1417 vcpu
->stat
.instruction_sie
++;
1418 if (!test_kvm_cpu_feat(vcpu
->kvm
, KVM_S390_VM_CPU_FEAT_SIEF2
))
1420 if (vcpu
->arch
.sie_block
->gpsw
.mask
& PSW_MASK_PSTATE
)
1421 return kvm_s390_inject_program_int(vcpu
, PGM_PRIVILEGED_OP
);
1423 BUILD_BUG_ON(sizeof(struct vsie_page
) != PAGE_SIZE
);
1424 scb_addr
= kvm_s390_get_base_disp_s(vcpu
, NULL
);
1426 /* 512 byte alignment */
1427 if (unlikely(scb_addr
& 0x1ffUL
))
1428 return kvm_s390_inject_program_int(vcpu
, PGM_SPECIFICATION
);
1430 if (signal_pending(current
) || kvm_s390_vcpu_has_irq(vcpu
, 0) ||
1431 kvm_s390_vcpu_sie_inhibited(vcpu
))
1434 vsie_page
= get_vsie_page(vcpu
->kvm
, scb_addr
);
1435 if (IS_ERR(vsie_page
))
1436 return PTR_ERR(vsie_page
);
1437 else if (!vsie_page
)
1438 /* double use of sie control block - simply do nothing */
1441 rc
= pin_scb(vcpu
, vsie_page
, scb_addr
);
1444 rc
= shadow_scb(vcpu
, vsie_page
);
1447 rc
= pin_blocks(vcpu
, vsie_page
);
1450 register_shadow_scb(vcpu
, vsie_page
);
1451 rc
= vsie_run(vcpu
, vsie_page
);
1452 unregister_shadow_scb(vcpu
);
1453 unpin_blocks(vcpu
, vsie_page
);
1455 unshadow_scb(vcpu
, vsie_page
);
1457 unpin_scb(vcpu
, vsie_page
, scb_addr
);
1459 put_vsie_page(vcpu
->kvm
, vsie_page
);
1461 return rc
< 0 ? rc
: 0;
1464 /* Init the vsie data structures. To be called when a vm is initialized. */
1465 void kvm_s390_vsie_init(struct kvm
*kvm
)
1467 mutex_init(&kvm
->arch
.vsie
.mutex
);
1468 INIT_RADIX_TREE(&kvm
->arch
.vsie
.addr_to_page
, GFP_KERNEL_ACCOUNT
);
1471 /* Destroy the vsie data structures. To be called when a vm is destroyed. */
1472 void kvm_s390_vsie_destroy(struct kvm
*kvm
)
1474 struct vsie_page
*vsie_page
;
1478 mutex_lock(&kvm
->arch
.vsie
.mutex
);
1479 for (i
= 0; i
< kvm
->arch
.vsie
.page_count
; i
++) {
1480 page
= kvm
->arch
.vsie
.pages
[i
];
1481 kvm
->arch
.vsie
.pages
[i
] = NULL
;
1482 vsie_page
= page_to_virt(page
);
1483 release_gmap_shadow(vsie_page
);
1484 /* free the radix tree entry */
1485 radix_tree_delete(&kvm
->arch
.vsie
.addr_to_page
, page
->index
>> 9);
1488 kvm
->arch
.vsie
.page_count
= 0;
1489 mutex_unlock(&kvm
->arch
.vsie
.mutex
);
1492 void kvm_s390_vsie_kick(struct kvm_vcpu
*vcpu
)
1494 struct kvm_s390_sie_block
*scb
= READ_ONCE(vcpu
->arch
.vsie_block
);
1497 * Even if the VCPU lets go of the shadow sie block reference, it is
1498 * still valid in the cache. So we can safely kick it.
1501 atomic_or(PROG_BLOCK_SIE
, &scb
->prog20
);
1502 if (scb
->prog0c
& PROG_IN_SIE
)
1503 atomic_or(CPUSTAT_STOP_INT
, &scb
->cpuflags
);