]> git.ipfire.org Git - people/arne_f/kernel.git/blob - arch/s390/kvm/kvm-s390.c
Merge tag 'kvm-s390-next-4.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[people/arne_f/kernel.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
130 { NULL }
131 };
132
133 struct kvm_s390_tod_clock_ext {
134 __u8 epoch_idx;
135 __u64 tod;
136 __u8 reserved[7];
137 } __packed;
138
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165 /* every s390 is virtualization enabled ;-) */
166 return 0;
167 }
168
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 unsigned long end);
171
172 /*
173 * This callback is executed during stop_machine(). All CPUs are therefore
174 * temporarily stopped. In order not to change guest behavior, we have to
175 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176 * so a CPU won't be stopped while calculating with the epoch.
177 */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179 void *v)
180 {
181 struct kvm *kvm;
182 struct kvm_vcpu *vcpu;
183 int i;
184 unsigned long long *delta = v;
185
186 list_for_each_entry(kvm, &vm_list, vm_list) {
187 kvm->arch.epoch -= *delta;
188 kvm_for_each_vcpu(i, vcpu, kvm) {
189 vcpu->arch.sie_block->epoch -= *delta;
190 if (vcpu->arch.cputm_enabled)
191 vcpu->arch.cputm_start += *delta;
192 if (vcpu->arch.vsie_block)
193 vcpu->arch.vsie_block->epoch -= *delta;
194 }
195 }
196 return NOTIFY_OK;
197 }
198
199 static struct notifier_block kvm_clock_notifier = {
200 .notifier_call = kvm_clock_sync,
201 };
202
203 int kvm_arch_hardware_setup(void)
204 {
205 gmap_notifier.notifier_call = kvm_gmap_notifier;
206 gmap_register_pte_notifier(&gmap_notifier);
207 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208 gmap_register_pte_notifier(&vsie_gmap_notifier);
209 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210 &kvm_clock_notifier);
211 return 0;
212 }
213
214 void kvm_arch_hardware_unsetup(void)
215 {
216 gmap_unregister_pte_notifier(&gmap_notifier);
217 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219 &kvm_clock_notifier);
220 }
221
222 static void allow_cpu_feat(unsigned long nr)
223 {
224 set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226
227 static inline int plo_test_bit(unsigned char nr)
228 {
229 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230 int cc;
231
232 asm volatile(
233 /* Parameter registers are ignored for "test bit" */
234 " plo 0,0,0,0(0)\n"
235 " ipm %0\n"
236 " srl %0,28\n"
237 : "=d" (cc)
238 : "d" (r0)
239 : "cc");
240 return cc == 0;
241 }
242
243 static void kvm_s390_cpu_feat_init(void)
244 {
245 int i;
246
247 for (i = 0; i < 256; ++i) {
248 if (plo_test_bit(i))
249 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250 }
251
252 if (test_facility(28)) /* TOD-clock steering */
253 ptff(kvm_s390_available_subfunc.ptff,
254 sizeof(kvm_s390_available_subfunc.ptff),
255 PTFF_QAF);
256
257 if (test_facility(17)) { /* MSA */
258 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kmac);
260 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.kmc);
262 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
263 kvm_s390_available_subfunc.km);
264 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.kimd);
266 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.klmd);
268 }
269 if (test_facility(76)) /* MSA3 */
270 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.pckmo);
272 if (test_facility(77)) { /* MSA4 */
273 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.kmctr);
275 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276 kvm_s390_available_subfunc.kmf);
277 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.kmo);
279 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280 kvm_s390_available_subfunc.pcc);
281 }
282 if (test_facility(57)) /* MSA5 */
283 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284 kvm_s390_available_subfunc.ppno);
285
286 if (test_facility(146)) /* MSA8 */
287 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288 kvm_s390_available_subfunc.kma);
289
290 if (MACHINE_HAS_ESOP)
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292 /*
293 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295 */
296 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297 !test_facility(3) || !nested)
298 return;
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300 if (sclp.has_64bscao)
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302 if (sclp.has_siif)
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304 if (sclp.has_gpere)
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306 if (sclp.has_gsls)
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308 if (sclp.has_ib)
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310 if (sclp.has_cei)
311 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312 if (sclp.has_ibs)
313 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314 if (sclp.has_kss)
315 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316 /*
317 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318 * all skey handling functions read/set the skey from the PGSTE
319 * instead of the real storage key.
320 *
321 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322 * pages being detected as preserved although they are resident.
323 *
324 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326 *
327 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330 *
331 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332 * cannot easily shadow the SCA because of the ipte lock.
333 */
334 }
335
336 int kvm_arch_init(void *opaque)
337 {
338 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339 if (!kvm_s390_dbf)
340 return -ENOMEM;
341
342 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343 debug_unregister(kvm_s390_dbf);
344 return -ENOMEM;
345 }
346
347 kvm_s390_cpu_feat_init();
348
349 /* Register floating interrupt controller interface. */
350 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352
353 void kvm_arch_exit(void)
354 {
355 debug_unregister(kvm_s390_dbf);
356 }
357
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360 unsigned int ioctl, unsigned long arg)
361 {
362 if (ioctl == KVM_S390_ENABLE_SIE)
363 return s390_enable_sie();
364 return -EINVAL;
365 }
366
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369 int r;
370
371 switch (ext) {
372 case KVM_CAP_S390_PSW:
373 case KVM_CAP_S390_GMAP:
374 case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376 case KVM_CAP_S390_UCONTROL:
377 #endif
378 case KVM_CAP_ASYNC_PF:
379 case KVM_CAP_SYNC_REGS:
380 case KVM_CAP_ONE_REG:
381 case KVM_CAP_ENABLE_CAP:
382 case KVM_CAP_S390_CSS_SUPPORT:
383 case KVM_CAP_IOEVENTFD:
384 case KVM_CAP_DEVICE_CTRL:
385 case KVM_CAP_ENABLE_CAP_VM:
386 case KVM_CAP_S390_IRQCHIP:
387 case KVM_CAP_VM_ATTRIBUTES:
388 case KVM_CAP_MP_STATE:
389 case KVM_CAP_IMMEDIATE_EXIT:
390 case KVM_CAP_S390_INJECT_IRQ:
391 case KVM_CAP_S390_USER_SIGP:
392 case KVM_CAP_S390_USER_STSI:
393 case KVM_CAP_S390_SKEYS:
394 case KVM_CAP_S390_IRQ_STATE:
395 case KVM_CAP_S390_USER_INSTR0:
396 case KVM_CAP_S390_CMMA_MIGRATION:
397 case KVM_CAP_S390_AIS:
398 r = 1;
399 break;
400 case KVM_CAP_S390_MEM_OP:
401 r = MEM_OP_MAX_SIZE;
402 break;
403 case KVM_CAP_NR_VCPUS:
404 case KVM_CAP_MAX_VCPUS:
405 r = KVM_S390_BSCA_CPU_SLOTS;
406 if (!kvm_s390_use_sca_entries())
407 r = KVM_MAX_VCPUS;
408 else if (sclp.has_esca && sclp.has_64bscao)
409 r = KVM_S390_ESCA_CPU_SLOTS;
410 break;
411 case KVM_CAP_NR_MEMSLOTS:
412 r = KVM_USER_MEM_SLOTS;
413 break;
414 case KVM_CAP_S390_COW:
415 r = MACHINE_HAS_ESOP;
416 break;
417 case KVM_CAP_S390_VECTOR_REGISTERS:
418 r = MACHINE_HAS_VX;
419 break;
420 case KVM_CAP_S390_RI:
421 r = test_facility(64);
422 break;
423 case KVM_CAP_S390_GS:
424 r = test_facility(133);
425 break;
426 default:
427 r = 0;
428 }
429 return r;
430 }
431
432 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
433 struct kvm_memory_slot *memslot)
434 {
435 gfn_t cur_gfn, last_gfn;
436 unsigned long address;
437 struct gmap *gmap = kvm->arch.gmap;
438
439 /* Loop over all guest pages */
440 last_gfn = memslot->base_gfn + memslot->npages;
441 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
442 address = gfn_to_hva_memslot(memslot, cur_gfn);
443
444 if (test_and_clear_guest_dirty(gmap->mm, address))
445 mark_page_dirty(kvm, cur_gfn);
446 if (fatal_signal_pending(current))
447 return;
448 cond_resched();
449 }
450 }
451
452 /* Section: vm related */
453 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
454
455 /*
456 * Get (and clear) the dirty memory log for a memory slot.
457 */
458 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
459 struct kvm_dirty_log *log)
460 {
461 int r;
462 unsigned long n;
463 struct kvm_memslots *slots;
464 struct kvm_memory_slot *memslot;
465 int is_dirty = 0;
466
467 if (kvm_is_ucontrol(kvm))
468 return -EINVAL;
469
470 mutex_lock(&kvm->slots_lock);
471
472 r = -EINVAL;
473 if (log->slot >= KVM_USER_MEM_SLOTS)
474 goto out;
475
476 slots = kvm_memslots(kvm);
477 memslot = id_to_memslot(slots, log->slot);
478 r = -ENOENT;
479 if (!memslot->dirty_bitmap)
480 goto out;
481
482 kvm_s390_sync_dirty_log(kvm, memslot);
483 r = kvm_get_dirty_log(kvm, log, &is_dirty);
484 if (r)
485 goto out;
486
487 /* Clear the dirty log */
488 if (is_dirty) {
489 n = kvm_dirty_bitmap_bytes(memslot);
490 memset(memslot->dirty_bitmap, 0, n);
491 }
492 r = 0;
493 out:
494 mutex_unlock(&kvm->slots_lock);
495 return r;
496 }
497
498 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
499 {
500 unsigned int i;
501 struct kvm_vcpu *vcpu;
502
503 kvm_for_each_vcpu(i, vcpu, kvm) {
504 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
505 }
506 }
507
508 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
509 {
510 int r;
511
512 if (cap->flags)
513 return -EINVAL;
514
515 switch (cap->cap) {
516 case KVM_CAP_S390_IRQCHIP:
517 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
518 kvm->arch.use_irqchip = 1;
519 r = 0;
520 break;
521 case KVM_CAP_S390_USER_SIGP:
522 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
523 kvm->arch.user_sigp = 1;
524 r = 0;
525 break;
526 case KVM_CAP_S390_VECTOR_REGISTERS:
527 mutex_lock(&kvm->lock);
528 if (kvm->created_vcpus) {
529 r = -EBUSY;
530 } else if (MACHINE_HAS_VX) {
531 set_kvm_facility(kvm->arch.model.fac_mask, 129);
532 set_kvm_facility(kvm->arch.model.fac_list, 129);
533 if (test_facility(134)) {
534 set_kvm_facility(kvm->arch.model.fac_mask, 134);
535 set_kvm_facility(kvm->arch.model.fac_list, 134);
536 }
537 if (test_facility(135)) {
538 set_kvm_facility(kvm->arch.model.fac_mask, 135);
539 set_kvm_facility(kvm->arch.model.fac_list, 135);
540 }
541 r = 0;
542 } else
543 r = -EINVAL;
544 mutex_unlock(&kvm->lock);
545 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
546 r ? "(not available)" : "(success)");
547 break;
548 case KVM_CAP_S390_RI:
549 r = -EINVAL;
550 mutex_lock(&kvm->lock);
551 if (kvm->created_vcpus) {
552 r = -EBUSY;
553 } else if (test_facility(64)) {
554 set_kvm_facility(kvm->arch.model.fac_mask, 64);
555 set_kvm_facility(kvm->arch.model.fac_list, 64);
556 r = 0;
557 }
558 mutex_unlock(&kvm->lock);
559 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
560 r ? "(not available)" : "(success)");
561 break;
562 case KVM_CAP_S390_AIS:
563 mutex_lock(&kvm->lock);
564 if (kvm->created_vcpus) {
565 r = -EBUSY;
566 } else {
567 set_kvm_facility(kvm->arch.model.fac_mask, 72);
568 set_kvm_facility(kvm->arch.model.fac_list, 72);
569 r = 0;
570 }
571 mutex_unlock(&kvm->lock);
572 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
573 r ? "(not available)" : "(success)");
574 break;
575 case KVM_CAP_S390_GS:
576 r = -EINVAL;
577 mutex_lock(&kvm->lock);
578 if (atomic_read(&kvm->online_vcpus)) {
579 r = -EBUSY;
580 } else if (test_facility(133)) {
581 set_kvm_facility(kvm->arch.model.fac_mask, 133);
582 set_kvm_facility(kvm->arch.model.fac_list, 133);
583 r = 0;
584 }
585 mutex_unlock(&kvm->lock);
586 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
587 r ? "(not available)" : "(success)");
588 break;
589 case KVM_CAP_S390_USER_STSI:
590 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
591 kvm->arch.user_stsi = 1;
592 r = 0;
593 break;
594 case KVM_CAP_S390_USER_INSTR0:
595 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
596 kvm->arch.user_instr0 = 1;
597 icpt_operexc_on_all_vcpus(kvm);
598 r = 0;
599 break;
600 default:
601 r = -EINVAL;
602 break;
603 }
604 return r;
605 }
606
607 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609 int ret;
610
611 switch (attr->attr) {
612 case KVM_S390_VM_MEM_LIMIT_SIZE:
613 ret = 0;
614 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
615 kvm->arch.mem_limit);
616 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
617 ret = -EFAULT;
618 break;
619 default:
620 ret = -ENXIO;
621 break;
622 }
623 return ret;
624 }
625
626 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
627 {
628 int ret;
629 unsigned int idx;
630 switch (attr->attr) {
631 case KVM_S390_VM_MEM_ENABLE_CMMA:
632 ret = -ENXIO;
633 if (!sclp.has_cmma)
634 break;
635
636 ret = -EBUSY;
637 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
638 mutex_lock(&kvm->lock);
639 if (!kvm->created_vcpus) {
640 kvm->arch.use_cmma = 1;
641 ret = 0;
642 }
643 mutex_unlock(&kvm->lock);
644 break;
645 case KVM_S390_VM_MEM_CLR_CMMA:
646 ret = -ENXIO;
647 if (!sclp.has_cmma)
648 break;
649 ret = -EINVAL;
650 if (!kvm->arch.use_cmma)
651 break;
652
653 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
654 mutex_lock(&kvm->lock);
655 idx = srcu_read_lock(&kvm->srcu);
656 s390_reset_cmma(kvm->arch.gmap->mm);
657 srcu_read_unlock(&kvm->srcu, idx);
658 mutex_unlock(&kvm->lock);
659 ret = 0;
660 break;
661 case KVM_S390_VM_MEM_LIMIT_SIZE: {
662 unsigned long new_limit;
663
664 if (kvm_is_ucontrol(kvm))
665 return -EINVAL;
666
667 if (get_user(new_limit, (u64 __user *)attr->addr))
668 return -EFAULT;
669
670 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
671 new_limit > kvm->arch.mem_limit)
672 return -E2BIG;
673
674 if (!new_limit)
675 return -EINVAL;
676
677 /* gmap_create takes last usable address */
678 if (new_limit != KVM_S390_NO_MEM_LIMIT)
679 new_limit -= 1;
680
681 ret = -EBUSY;
682 mutex_lock(&kvm->lock);
683 if (!kvm->created_vcpus) {
684 /* gmap_create will round the limit up */
685 struct gmap *new = gmap_create(current->mm, new_limit);
686
687 if (!new) {
688 ret = -ENOMEM;
689 } else {
690 gmap_remove(kvm->arch.gmap);
691 new->private = kvm;
692 kvm->arch.gmap = new;
693 ret = 0;
694 }
695 }
696 mutex_unlock(&kvm->lock);
697 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
698 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
699 (void *) kvm->arch.gmap->asce);
700 break;
701 }
702 default:
703 ret = -ENXIO;
704 break;
705 }
706 return ret;
707 }
708
709 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
710
711 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
712 {
713 struct kvm_vcpu *vcpu;
714 int i;
715
716 if (!test_kvm_facility(kvm, 76))
717 return -EINVAL;
718
719 mutex_lock(&kvm->lock);
720 switch (attr->attr) {
721 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
722 get_random_bytes(
723 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
724 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
725 kvm->arch.crypto.aes_kw = 1;
726 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
727 break;
728 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
729 get_random_bytes(
730 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
731 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
732 kvm->arch.crypto.dea_kw = 1;
733 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
734 break;
735 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
736 kvm->arch.crypto.aes_kw = 0;
737 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
738 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
739 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
740 break;
741 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
742 kvm->arch.crypto.dea_kw = 0;
743 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
744 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
745 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
746 break;
747 default:
748 mutex_unlock(&kvm->lock);
749 return -ENXIO;
750 }
751
752 kvm_for_each_vcpu(i, vcpu, kvm) {
753 kvm_s390_vcpu_crypto_setup(vcpu);
754 exit_sie(vcpu);
755 }
756 mutex_unlock(&kvm->lock);
757 return 0;
758 }
759
760 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
761 {
762 int cx;
763 struct kvm_vcpu *vcpu;
764
765 kvm_for_each_vcpu(cx, vcpu, kvm)
766 kvm_s390_sync_request(req, vcpu);
767 }
768
769 /*
770 * Must be called with kvm->srcu held to avoid races on memslots, and with
771 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
772 */
773 static int kvm_s390_vm_start_migration(struct kvm *kvm)
774 {
775 struct kvm_s390_migration_state *mgs;
776 struct kvm_memory_slot *ms;
777 /* should be the only one */
778 struct kvm_memslots *slots;
779 unsigned long ram_pages;
780 int slotnr;
781
782 /* migration mode already enabled */
783 if (kvm->arch.migration_state)
784 return 0;
785
786 slots = kvm_memslots(kvm);
787 if (!slots || !slots->used_slots)
788 return -EINVAL;
789
790 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
791 if (!mgs)
792 return -ENOMEM;
793 kvm->arch.migration_state = mgs;
794
795 if (kvm->arch.use_cmma) {
796 /*
797 * Get the last slot. They should be sorted by base_gfn, so the
798 * last slot is also the one at the end of the address space.
799 * We have verified above that at least one slot is present.
800 */
801 ms = slots->memslots + slots->used_slots - 1;
802 /* round up so we only use full longs */
803 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
804 /* allocate enough bytes to store all the bits */
805 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
806 if (!mgs->pgste_bitmap) {
807 kfree(mgs);
808 kvm->arch.migration_state = NULL;
809 return -ENOMEM;
810 }
811
812 mgs->bitmap_size = ram_pages;
813 atomic64_set(&mgs->dirty_pages, ram_pages);
814 /* mark all the pages in active slots as dirty */
815 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
816 ms = slots->memslots + slotnr;
817 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
818 }
819
820 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
821 }
822 return 0;
823 }
824
825 /*
826 * Must be called with kvm->lock to avoid races with ourselves and
827 * kvm_s390_vm_start_migration.
828 */
829 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
830 {
831 struct kvm_s390_migration_state *mgs;
832
833 /* migration mode already disabled */
834 if (!kvm->arch.migration_state)
835 return 0;
836 mgs = kvm->arch.migration_state;
837 kvm->arch.migration_state = NULL;
838
839 if (kvm->arch.use_cmma) {
840 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
841 vfree(mgs->pgste_bitmap);
842 }
843 kfree(mgs);
844 return 0;
845 }
846
847 static int kvm_s390_vm_set_migration(struct kvm *kvm,
848 struct kvm_device_attr *attr)
849 {
850 int idx, res = -ENXIO;
851
852 mutex_lock(&kvm->lock);
853 switch (attr->attr) {
854 case KVM_S390_VM_MIGRATION_START:
855 idx = srcu_read_lock(&kvm->srcu);
856 res = kvm_s390_vm_start_migration(kvm);
857 srcu_read_unlock(&kvm->srcu, idx);
858 break;
859 case KVM_S390_VM_MIGRATION_STOP:
860 res = kvm_s390_vm_stop_migration(kvm);
861 break;
862 default:
863 break;
864 }
865 mutex_unlock(&kvm->lock);
866
867 return res;
868 }
869
870 static int kvm_s390_vm_get_migration(struct kvm *kvm,
871 struct kvm_device_attr *attr)
872 {
873 u64 mig = (kvm->arch.migration_state != NULL);
874
875 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
876 return -ENXIO;
877
878 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
879 return -EFAULT;
880 return 0;
881 }
882
883 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
884 {
885 struct kvm_s390_vm_tod_clock gtod;
886
887 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
888 return -EFAULT;
889
890 if (test_kvm_facility(kvm, 139))
891 kvm_s390_set_tod_clock_ext(kvm, &gtod);
892 else if (gtod.epoch_idx == 0)
893 kvm_s390_set_tod_clock(kvm, gtod.tod);
894 else
895 return -EINVAL;
896
897 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
898 gtod.epoch_idx, gtod.tod);
899
900 return 0;
901 }
902
903 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
904 {
905 u8 gtod_high;
906
907 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
908 sizeof(gtod_high)))
909 return -EFAULT;
910
911 if (gtod_high != 0)
912 return -EINVAL;
913 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
914
915 return 0;
916 }
917
918 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
919 {
920 u64 gtod;
921
922 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
923 return -EFAULT;
924
925 kvm_s390_set_tod_clock(kvm, gtod);
926 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
927 return 0;
928 }
929
930 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
931 {
932 int ret;
933
934 if (attr->flags)
935 return -EINVAL;
936
937 switch (attr->attr) {
938 case KVM_S390_VM_TOD_EXT:
939 ret = kvm_s390_set_tod_ext(kvm, attr);
940 break;
941 case KVM_S390_VM_TOD_HIGH:
942 ret = kvm_s390_set_tod_high(kvm, attr);
943 break;
944 case KVM_S390_VM_TOD_LOW:
945 ret = kvm_s390_set_tod_low(kvm, attr);
946 break;
947 default:
948 ret = -ENXIO;
949 break;
950 }
951 return ret;
952 }
953
954 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
955 struct kvm_s390_vm_tod_clock *gtod)
956 {
957 struct kvm_s390_tod_clock_ext htod;
958
959 preempt_disable();
960
961 get_tod_clock_ext((char *)&htod);
962
963 gtod->tod = htod.tod + kvm->arch.epoch;
964 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
965
966 if (gtod->tod < htod.tod)
967 gtod->epoch_idx += 1;
968
969 preempt_enable();
970 }
971
972 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
973 {
974 struct kvm_s390_vm_tod_clock gtod;
975
976 memset(&gtod, 0, sizeof(gtod));
977
978 if (test_kvm_facility(kvm, 139))
979 kvm_s390_get_tod_clock_ext(kvm, &gtod);
980 else
981 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
982
983 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
984 return -EFAULT;
985
986 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
987 gtod.epoch_idx, gtod.tod);
988 return 0;
989 }
990
991 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
992 {
993 u8 gtod_high = 0;
994
995 if (copy_to_user((void __user *)attr->addr, &gtod_high,
996 sizeof(gtod_high)))
997 return -EFAULT;
998 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
999
1000 return 0;
1001 }
1002
1003 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1004 {
1005 u64 gtod;
1006
1007 gtod = kvm_s390_get_tod_clock_fast(kvm);
1008 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1009 return -EFAULT;
1010 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1011
1012 return 0;
1013 }
1014
1015 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017 int ret;
1018
1019 if (attr->flags)
1020 return -EINVAL;
1021
1022 switch (attr->attr) {
1023 case KVM_S390_VM_TOD_EXT:
1024 ret = kvm_s390_get_tod_ext(kvm, attr);
1025 break;
1026 case KVM_S390_VM_TOD_HIGH:
1027 ret = kvm_s390_get_tod_high(kvm, attr);
1028 break;
1029 case KVM_S390_VM_TOD_LOW:
1030 ret = kvm_s390_get_tod_low(kvm, attr);
1031 break;
1032 default:
1033 ret = -ENXIO;
1034 break;
1035 }
1036 return ret;
1037 }
1038
1039 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041 struct kvm_s390_vm_cpu_processor *proc;
1042 u16 lowest_ibc, unblocked_ibc;
1043 int ret = 0;
1044
1045 mutex_lock(&kvm->lock);
1046 if (kvm->created_vcpus) {
1047 ret = -EBUSY;
1048 goto out;
1049 }
1050 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1051 if (!proc) {
1052 ret = -ENOMEM;
1053 goto out;
1054 }
1055 if (!copy_from_user(proc, (void __user *)attr->addr,
1056 sizeof(*proc))) {
1057 kvm->arch.model.cpuid = proc->cpuid;
1058 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1059 unblocked_ibc = sclp.ibc & 0xfff;
1060 if (lowest_ibc && proc->ibc) {
1061 if (proc->ibc > unblocked_ibc)
1062 kvm->arch.model.ibc = unblocked_ibc;
1063 else if (proc->ibc < lowest_ibc)
1064 kvm->arch.model.ibc = lowest_ibc;
1065 else
1066 kvm->arch.model.ibc = proc->ibc;
1067 }
1068 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1069 S390_ARCH_FAC_LIST_SIZE_BYTE);
1070 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1071 kvm->arch.model.ibc,
1072 kvm->arch.model.cpuid);
1073 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1074 kvm->arch.model.fac_list[0],
1075 kvm->arch.model.fac_list[1],
1076 kvm->arch.model.fac_list[2]);
1077 } else
1078 ret = -EFAULT;
1079 kfree(proc);
1080 out:
1081 mutex_unlock(&kvm->lock);
1082 return ret;
1083 }
1084
1085 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1086 struct kvm_device_attr *attr)
1087 {
1088 struct kvm_s390_vm_cpu_feat data;
1089 int ret = -EBUSY;
1090
1091 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1092 return -EFAULT;
1093 if (!bitmap_subset((unsigned long *) data.feat,
1094 kvm_s390_available_cpu_feat,
1095 KVM_S390_VM_CPU_FEAT_NR_BITS))
1096 return -EINVAL;
1097
1098 mutex_lock(&kvm->lock);
1099 if (!atomic_read(&kvm->online_vcpus)) {
1100 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101 KVM_S390_VM_CPU_FEAT_NR_BITS);
1102 ret = 0;
1103 }
1104 mutex_unlock(&kvm->lock);
1105 return ret;
1106 }
1107
1108 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1109 struct kvm_device_attr *attr)
1110 {
1111 /*
1112 * Once supported by kernel + hw, we have to store the subfunctions
1113 * in kvm->arch and remember that user space configured them.
1114 */
1115 return -ENXIO;
1116 }
1117
1118 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1119 {
1120 int ret = -ENXIO;
1121
1122 switch (attr->attr) {
1123 case KVM_S390_VM_CPU_PROCESSOR:
1124 ret = kvm_s390_set_processor(kvm, attr);
1125 break;
1126 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1127 ret = kvm_s390_set_processor_feat(kvm, attr);
1128 break;
1129 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1130 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1131 break;
1132 }
1133 return ret;
1134 }
1135
1136 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1137 {
1138 struct kvm_s390_vm_cpu_processor *proc;
1139 int ret = 0;
1140
1141 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1142 if (!proc) {
1143 ret = -ENOMEM;
1144 goto out;
1145 }
1146 proc->cpuid = kvm->arch.model.cpuid;
1147 proc->ibc = kvm->arch.model.ibc;
1148 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1149 S390_ARCH_FAC_LIST_SIZE_BYTE);
1150 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1151 kvm->arch.model.ibc,
1152 kvm->arch.model.cpuid);
1153 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1154 kvm->arch.model.fac_list[0],
1155 kvm->arch.model.fac_list[1],
1156 kvm->arch.model.fac_list[2]);
1157 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1158 ret = -EFAULT;
1159 kfree(proc);
1160 out:
1161 return ret;
1162 }
1163
1164 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1165 {
1166 struct kvm_s390_vm_cpu_machine *mach;
1167 int ret = 0;
1168
1169 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1170 if (!mach) {
1171 ret = -ENOMEM;
1172 goto out;
1173 }
1174 get_cpu_id((struct cpuid *) &mach->cpuid);
1175 mach->ibc = sclp.ibc;
1176 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1177 S390_ARCH_FAC_LIST_SIZE_BYTE);
1178 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1179 sizeof(S390_lowcore.stfle_fac_list));
1180 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1181 kvm->arch.model.ibc,
1182 kvm->arch.model.cpuid);
1183 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1184 mach->fac_mask[0],
1185 mach->fac_mask[1],
1186 mach->fac_mask[2]);
1187 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1188 mach->fac_list[0],
1189 mach->fac_list[1],
1190 mach->fac_list[2]);
1191 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1192 ret = -EFAULT;
1193 kfree(mach);
1194 out:
1195 return ret;
1196 }
1197
1198 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1199 struct kvm_device_attr *attr)
1200 {
1201 struct kvm_s390_vm_cpu_feat data;
1202
1203 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1204 KVM_S390_VM_CPU_FEAT_NR_BITS);
1205 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1206 return -EFAULT;
1207 return 0;
1208 }
1209
1210 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1211 struct kvm_device_attr *attr)
1212 {
1213 struct kvm_s390_vm_cpu_feat data;
1214
1215 bitmap_copy((unsigned long *) data.feat,
1216 kvm_s390_available_cpu_feat,
1217 KVM_S390_VM_CPU_FEAT_NR_BITS);
1218 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1219 return -EFAULT;
1220 return 0;
1221 }
1222
1223 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1224 struct kvm_device_attr *attr)
1225 {
1226 /*
1227 * Once we can actually configure subfunctions (kernel + hw support),
1228 * we have to check if they were already set by user space, if so copy
1229 * them from kvm->arch.
1230 */
1231 return -ENXIO;
1232 }
1233
1234 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1235 struct kvm_device_attr *attr)
1236 {
1237 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1238 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1239 return -EFAULT;
1240 return 0;
1241 }
1242 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244 int ret = -ENXIO;
1245
1246 switch (attr->attr) {
1247 case KVM_S390_VM_CPU_PROCESSOR:
1248 ret = kvm_s390_get_processor(kvm, attr);
1249 break;
1250 case KVM_S390_VM_CPU_MACHINE:
1251 ret = kvm_s390_get_machine(kvm, attr);
1252 break;
1253 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1254 ret = kvm_s390_get_processor_feat(kvm, attr);
1255 break;
1256 case KVM_S390_VM_CPU_MACHINE_FEAT:
1257 ret = kvm_s390_get_machine_feat(kvm, attr);
1258 break;
1259 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1260 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1261 break;
1262 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1263 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1264 break;
1265 }
1266 return ret;
1267 }
1268
1269 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1270 {
1271 int ret;
1272
1273 switch (attr->group) {
1274 case KVM_S390_VM_MEM_CTRL:
1275 ret = kvm_s390_set_mem_control(kvm, attr);
1276 break;
1277 case KVM_S390_VM_TOD:
1278 ret = kvm_s390_set_tod(kvm, attr);
1279 break;
1280 case KVM_S390_VM_CPU_MODEL:
1281 ret = kvm_s390_set_cpu_model(kvm, attr);
1282 break;
1283 case KVM_S390_VM_CRYPTO:
1284 ret = kvm_s390_vm_set_crypto(kvm, attr);
1285 break;
1286 case KVM_S390_VM_MIGRATION:
1287 ret = kvm_s390_vm_set_migration(kvm, attr);
1288 break;
1289 default:
1290 ret = -ENXIO;
1291 break;
1292 }
1293
1294 return ret;
1295 }
1296
1297 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1298 {
1299 int ret;
1300
1301 switch (attr->group) {
1302 case KVM_S390_VM_MEM_CTRL:
1303 ret = kvm_s390_get_mem_control(kvm, attr);
1304 break;
1305 case KVM_S390_VM_TOD:
1306 ret = kvm_s390_get_tod(kvm, attr);
1307 break;
1308 case KVM_S390_VM_CPU_MODEL:
1309 ret = kvm_s390_get_cpu_model(kvm, attr);
1310 break;
1311 case KVM_S390_VM_MIGRATION:
1312 ret = kvm_s390_vm_get_migration(kvm, attr);
1313 break;
1314 default:
1315 ret = -ENXIO;
1316 break;
1317 }
1318
1319 return ret;
1320 }
1321
1322 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1323 {
1324 int ret;
1325
1326 switch (attr->group) {
1327 case KVM_S390_VM_MEM_CTRL:
1328 switch (attr->attr) {
1329 case KVM_S390_VM_MEM_ENABLE_CMMA:
1330 case KVM_S390_VM_MEM_CLR_CMMA:
1331 ret = sclp.has_cmma ? 0 : -ENXIO;
1332 break;
1333 case KVM_S390_VM_MEM_LIMIT_SIZE:
1334 ret = 0;
1335 break;
1336 default:
1337 ret = -ENXIO;
1338 break;
1339 }
1340 break;
1341 case KVM_S390_VM_TOD:
1342 switch (attr->attr) {
1343 case KVM_S390_VM_TOD_LOW:
1344 case KVM_S390_VM_TOD_HIGH:
1345 ret = 0;
1346 break;
1347 default:
1348 ret = -ENXIO;
1349 break;
1350 }
1351 break;
1352 case KVM_S390_VM_CPU_MODEL:
1353 switch (attr->attr) {
1354 case KVM_S390_VM_CPU_PROCESSOR:
1355 case KVM_S390_VM_CPU_MACHINE:
1356 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1357 case KVM_S390_VM_CPU_MACHINE_FEAT:
1358 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1359 ret = 0;
1360 break;
1361 /* configuring subfunctions is not supported yet */
1362 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1363 default:
1364 ret = -ENXIO;
1365 break;
1366 }
1367 break;
1368 case KVM_S390_VM_CRYPTO:
1369 switch (attr->attr) {
1370 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1371 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1372 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1373 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1374 ret = 0;
1375 break;
1376 default:
1377 ret = -ENXIO;
1378 break;
1379 }
1380 break;
1381 case KVM_S390_VM_MIGRATION:
1382 ret = 0;
1383 break;
1384 default:
1385 ret = -ENXIO;
1386 break;
1387 }
1388
1389 return ret;
1390 }
1391
1392 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1393 {
1394 uint8_t *keys;
1395 uint64_t hva;
1396 int srcu_idx, i, r = 0;
1397
1398 if (args->flags != 0)
1399 return -EINVAL;
1400
1401 /* Is this guest using storage keys? */
1402 if (!mm_use_skey(current->mm))
1403 return KVM_S390_GET_SKEYS_NONE;
1404
1405 /* Enforce sane limit on memory allocation */
1406 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1407 return -EINVAL;
1408
1409 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1410 if (!keys)
1411 return -ENOMEM;
1412
1413 down_read(&current->mm->mmap_sem);
1414 srcu_idx = srcu_read_lock(&kvm->srcu);
1415 for (i = 0; i < args->count; i++) {
1416 hva = gfn_to_hva(kvm, args->start_gfn + i);
1417 if (kvm_is_error_hva(hva)) {
1418 r = -EFAULT;
1419 break;
1420 }
1421
1422 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1423 if (r)
1424 break;
1425 }
1426 srcu_read_unlock(&kvm->srcu, srcu_idx);
1427 up_read(&current->mm->mmap_sem);
1428
1429 if (!r) {
1430 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1431 sizeof(uint8_t) * args->count);
1432 if (r)
1433 r = -EFAULT;
1434 }
1435
1436 kvfree(keys);
1437 return r;
1438 }
1439
1440 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1441 {
1442 uint8_t *keys;
1443 uint64_t hva;
1444 int srcu_idx, i, r = 0;
1445
1446 if (args->flags != 0)
1447 return -EINVAL;
1448
1449 /* Enforce sane limit on memory allocation */
1450 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1451 return -EINVAL;
1452
1453 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1454 if (!keys)
1455 return -ENOMEM;
1456
1457 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1458 sizeof(uint8_t) * args->count);
1459 if (r) {
1460 r = -EFAULT;
1461 goto out;
1462 }
1463
1464 /* Enable storage key handling for the guest */
1465 r = s390_enable_skey();
1466 if (r)
1467 goto out;
1468
1469 down_read(&current->mm->mmap_sem);
1470 srcu_idx = srcu_read_lock(&kvm->srcu);
1471 for (i = 0; i < args->count; i++) {
1472 hva = gfn_to_hva(kvm, args->start_gfn + i);
1473 if (kvm_is_error_hva(hva)) {
1474 r = -EFAULT;
1475 break;
1476 }
1477
1478 /* Lowest order bit is reserved */
1479 if (keys[i] & 0x01) {
1480 r = -EINVAL;
1481 break;
1482 }
1483
1484 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1485 if (r)
1486 break;
1487 }
1488 srcu_read_unlock(&kvm->srcu, srcu_idx);
1489 up_read(&current->mm->mmap_sem);
1490 out:
1491 kvfree(keys);
1492 return r;
1493 }
1494
1495 /*
1496 * Base address and length must be sent at the start of each block, therefore
1497 * it's cheaper to send some clean data, as long as it's less than the size of
1498 * two longs.
1499 */
1500 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1501 /* for consistency */
1502 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1503
1504 /*
1505 * This function searches for the next page with dirty CMMA attributes, and
1506 * saves the attributes in the buffer up to either the end of the buffer or
1507 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1508 * no trailing clean bytes are saved.
1509 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1510 * output buffer will indicate 0 as length.
1511 */
1512 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1513 struct kvm_s390_cmma_log *args)
1514 {
1515 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1516 unsigned long bufsize, hva, pgstev, i, next, cur;
1517 int srcu_idx, peek, r = 0, rr;
1518 u8 *res;
1519
1520 cur = args->start_gfn;
1521 i = next = pgstev = 0;
1522
1523 if (unlikely(!kvm->arch.use_cmma))
1524 return -ENXIO;
1525 /* Invalid/unsupported flags were specified */
1526 if (args->flags & ~KVM_S390_CMMA_PEEK)
1527 return -EINVAL;
1528 /* Migration mode query, and we are not doing a migration */
1529 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1530 if (!peek && !s)
1531 return -EINVAL;
1532 /* CMMA is disabled or was not used, or the buffer has length zero */
1533 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1534 if (!bufsize || !kvm->mm->context.use_cmma) {
1535 memset(args, 0, sizeof(*args));
1536 return 0;
1537 }
1538
1539 if (!peek) {
1540 /* We are not peeking, and there are no dirty pages */
1541 if (!atomic64_read(&s->dirty_pages)) {
1542 memset(args, 0, sizeof(*args));
1543 return 0;
1544 }
1545 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1546 args->start_gfn);
1547 if (cur >= s->bitmap_size) /* nothing found, loop back */
1548 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1549 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1550 memset(args, 0, sizeof(*args));
1551 return 0;
1552 }
1553 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1554 }
1555
1556 res = vmalloc(bufsize);
1557 if (!res)
1558 return -ENOMEM;
1559
1560 args->start_gfn = cur;
1561
1562 down_read(&kvm->mm->mmap_sem);
1563 srcu_idx = srcu_read_lock(&kvm->srcu);
1564 while (i < bufsize) {
1565 hva = gfn_to_hva(kvm, cur);
1566 if (kvm_is_error_hva(hva)) {
1567 r = -EFAULT;
1568 break;
1569 }
1570 /* decrement only if we actually flipped the bit to 0 */
1571 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1572 atomic64_dec(&s->dirty_pages);
1573 r = get_pgste(kvm->mm, hva, &pgstev);
1574 if (r < 0)
1575 pgstev = 0;
1576 /* save the value */
1577 res[i++] = (pgstev >> 24) & 0x43;
1578 /*
1579 * if the next bit is too far away, stop.
1580 * if we reached the previous "next", find the next one
1581 */
1582 if (!peek) {
1583 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1584 break;
1585 if (cur == next)
1586 next = find_next_bit(s->pgste_bitmap,
1587 s->bitmap_size, cur + 1);
1588 /* reached the end of the bitmap or of the buffer, stop */
1589 if ((next >= s->bitmap_size) ||
1590 (next >= args->start_gfn + bufsize))
1591 break;
1592 }
1593 cur++;
1594 }
1595 srcu_read_unlock(&kvm->srcu, srcu_idx);
1596 up_read(&kvm->mm->mmap_sem);
1597 args->count = i;
1598 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1599
1600 rr = copy_to_user((void __user *)args->values, res, args->count);
1601 if (rr)
1602 r = -EFAULT;
1603
1604 vfree(res);
1605 return r;
1606 }
1607
1608 /*
1609 * This function sets the CMMA attributes for the given pages. If the input
1610 * buffer has zero length, no action is taken, otherwise the attributes are
1611 * set and the mm->context.use_cmma flag is set.
1612 */
1613 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1614 const struct kvm_s390_cmma_log *args)
1615 {
1616 unsigned long hva, mask, pgstev, i;
1617 uint8_t *bits;
1618 int srcu_idx, r = 0;
1619
1620 mask = args->mask;
1621
1622 if (!kvm->arch.use_cmma)
1623 return -ENXIO;
1624 /* invalid/unsupported flags */
1625 if (args->flags != 0)
1626 return -EINVAL;
1627 /* Enforce sane limit on memory allocation */
1628 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1629 return -EINVAL;
1630 /* Nothing to do */
1631 if (args->count == 0)
1632 return 0;
1633
1634 bits = vmalloc(sizeof(*bits) * args->count);
1635 if (!bits)
1636 return -ENOMEM;
1637
1638 r = copy_from_user(bits, (void __user *)args->values, args->count);
1639 if (r) {
1640 r = -EFAULT;
1641 goto out;
1642 }
1643
1644 down_read(&kvm->mm->mmap_sem);
1645 srcu_idx = srcu_read_lock(&kvm->srcu);
1646 for (i = 0; i < args->count; i++) {
1647 hva = gfn_to_hva(kvm, args->start_gfn + i);
1648 if (kvm_is_error_hva(hva)) {
1649 r = -EFAULT;
1650 break;
1651 }
1652
1653 pgstev = bits[i];
1654 pgstev = pgstev << 24;
1655 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1656 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1657 }
1658 srcu_read_unlock(&kvm->srcu, srcu_idx);
1659 up_read(&kvm->mm->mmap_sem);
1660
1661 if (!kvm->mm->context.use_cmma) {
1662 down_write(&kvm->mm->mmap_sem);
1663 kvm->mm->context.use_cmma = 1;
1664 up_write(&kvm->mm->mmap_sem);
1665 }
1666 out:
1667 vfree(bits);
1668 return r;
1669 }
1670
1671 long kvm_arch_vm_ioctl(struct file *filp,
1672 unsigned int ioctl, unsigned long arg)
1673 {
1674 struct kvm *kvm = filp->private_data;
1675 void __user *argp = (void __user *)arg;
1676 struct kvm_device_attr attr;
1677 int r;
1678
1679 switch (ioctl) {
1680 case KVM_S390_INTERRUPT: {
1681 struct kvm_s390_interrupt s390int;
1682
1683 r = -EFAULT;
1684 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1685 break;
1686 r = kvm_s390_inject_vm(kvm, &s390int);
1687 break;
1688 }
1689 case KVM_ENABLE_CAP: {
1690 struct kvm_enable_cap cap;
1691 r = -EFAULT;
1692 if (copy_from_user(&cap, argp, sizeof(cap)))
1693 break;
1694 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1695 break;
1696 }
1697 case KVM_CREATE_IRQCHIP: {
1698 struct kvm_irq_routing_entry routing;
1699
1700 r = -EINVAL;
1701 if (kvm->arch.use_irqchip) {
1702 /* Set up dummy routing. */
1703 memset(&routing, 0, sizeof(routing));
1704 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1705 }
1706 break;
1707 }
1708 case KVM_SET_DEVICE_ATTR: {
1709 r = -EFAULT;
1710 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1711 break;
1712 r = kvm_s390_vm_set_attr(kvm, &attr);
1713 break;
1714 }
1715 case KVM_GET_DEVICE_ATTR: {
1716 r = -EFAULT;
1717 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1718 break;
1719 r = kvm_s390_vm_get_attr(kvm, &attr);
1720 break;
1721 }
1722 case KVM_HAS_DEVICE_ATTR: {
1723 r = -EFAULT;
1724 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1725 break;
1726 r = kvm_s390_vm_has_attr(kvm, &attr);
1727 break;
1728 }
1729 case KVM_S390_GET_SKEYS: {
1730 struct kvm_s390_skeys args;
1731
1732 r = -EFAULT;
1733 if (copy_from_user(&args, argp,
1734 sizeof(struct kvm_s390_skeys)))
1735 break;
1736 r = kvm_s390_get_skeys(kvm, &args);
1737 break;
1738 }
1739 case KVM_S390_SET_SKEYS: {
1740 struct kvm_s390_skeys args;
1741
1742 r = -EFAULT;
1743 if (copy_from_user(&args, argp,
1744 sizeof(struct kvm_s390_skeys)))
1745 break;
1746 r = kvm_s390_set_skeys(kvm, &args);
1747 break;
1748 }
1749 case KVM_S390_GET_CMMA_BITS: {
1750 struct kvm_s390_cmma_log args;
1751
1752 r = -EFAULT;
1753 if (copy_from_user(&args, argp, sizeof(args)))
1754 break;
1755 r = kvm_s390_get_cmma_bits(kvm, &args);
1756 if (!r) {
1757 r = copy_to_user(argp, &args, sizeof(args));
1758 if (r)
1759 r = -EFAULT;
1760 }
1761 break;
1762 }
1763 case KVM_S390_SET_CMMA_BITS: {
1764 struct kvm_s390_cmma_log args;
1765
1766 r = -EFAULT;
1767 if (copy_from_user(&args, argp, sizeof(args)))
1768 break;
1769 r = kvm_s390_set_cmma_bits(kvm, &args);
1770 break;
1771 }
1772 default:
1773 r = -ENOTTY;
1774 }
1775
1776 return r;
1777 }
1778
1779 static int kvm_s390_query_ap_config(u8 *config)
1780 {
1781 u32 fcn_code = 0x04000000UL;
1782 u32 cc = 0;
1783
1784 memset(config, 0, 128);
1785 asm volatile(
1786 "lgr 0,%1\n"
1787 "lgr 2,%2\n"
1788 ".long 0xb2af0000\n" /* PQAP(QCI) */
1789 "0: ipm %0\n"
1790 "srl %0,28\n"
1791 "1:\n"
1792 EX_TABLE(0b, 1b)
1793 : "+r" (cc)
1794 : "r" (fcn_code), "r" (config)
1795 : "cc", "0", "2", "memory"
1796 );
1797
1798 return cc;
1799 }
1800
1801 static int kvm_s390_apxa_installed(void)
1802 {
1803 u8 config[128];
1804 int cc;
1805
1806 if (test_facility(12)) {
1807 cc = kvm_s390_query_ap_config(config);
1808
1809 if (cc)
1810 pr_err("PQAP(QCI) failed with cc=%d", cc);
1811 else
1812 return config[0] & 0x40;
1813 }
1814
1815 return 0;
1816 }
1817
1818 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1819 {
1820 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1821
1822 if (kvm_s390_apxa_installed())
1823 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1824 else
1825 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1826 }
1827
1828 static u64 kvm_s390_get_initial_cpuid(void)
1829 {
1830 struct cpuid cpuid;
1831
1832 get_cpu_id(&cpuid);
1833 cpuid.version = 0xff;
1834 return *((u64 *) &cpuid);
1835 }
1836
1837 static void kvm_s390_crypto_init(struct kvm *kvm)
1838 {
1839 if (!test_kvm_facility(kvm, 76))
1840 return;
1841
1842 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1843 kvm_s390_set_crycb_format(kvm);
1844
1845 /* Enable AES/DEA protected key functions by default */
1846 kvm->arch.crypto.aes_kw = 1;
1847 kvm->arch.crypto.dea_kw = 1;
1848 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1849 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1850 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1851 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1852 }
1853
1854 static void sca_dispose(struct kvm *kvm)
1855 {
1856 if (kvm->arch.use_esca)
1857 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1858 else
1859 free_page((unsigned long)(kvm->arch.sca));
1860 kvm->arch.sca = NULL;
1861 }
1862
1863 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1864 {
1865 gfp_t alloc_flags = GFP_KERNEL;
1866 int i, rc;
1867 char debug_name[16];
1868 static unsigned long sca_offset;
1869
1870 rc = -EINVAL;
1871 #ifdef CONFIG_KVM_S390_UCONTROL
1872 if (type & ~KVM_VM_S390_UCONTROL)
1873 goto out_err;
1874 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1875 goto out_err;
1876 #else
1877 if (type)
1878 goto out_err;
1879 #endif
1880
1881 rc = s390_enable_sie();
1882 if (rc)
1883 goto out_err;
1884
1885 rc = -ENOMEM;
1886
1887 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1888
1889 kvm->arch.use_esca = 0; /* start with basic SCA */
1890 if (!sclp.has_64bscao)
1891 alloc_flags |= GFP_DMA;
1892 rwlock_init(&kvm->arch.sca_lock);
1893 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1894 if (!kvm->arch.sca)
1895 goto out_err;
1896 spin_lock(&kvm_lock);
1897 sca_offset += 16;
1898 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899 sca_offset = 0;
1900 kvm->arch.sca = (struct bsca_block *)
1901 ((char *) kvm->arch.sca + sca_offset);
1902 spin_unlock(&kvm_lock);
1903
1904 sprintf(debug_name, "kvm-%u", current->pid);
1905
1906 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1907 if (!kvm->arch.dbf)
1908 goto out_err;
1909
1910 kvm->arch.sie_page2 =
1911 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912 if (!kvm->arch.sie_page2)
1913 goto out_err;
1914
1915 /* Populate the facility mask initially. */
1916 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917 sizeof(S390_lowcore.stfle_fac_list));
1918 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919 if (i < kvm_s390_fac_list_mask_size())
1920 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921 else
1922 kvm->arch.model.fac_mask[i] = 0UL;
1923 }
1924
1925 /* Populate the facility list initially. */
1926 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928 S390_ARCH_FAC_LIST_SIZE_BYTE);
1929
1930 /* we are always in czam mode - even on pre z14 machines */
1931 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932 set_kvm_facility(kvm->arch.model.fac_list, 138);
1933 /* we emulate STHYI in kvm */
1934 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935 set_kvm_facility(kvm->arch.model.fac_list, 74);
1936 if (MACHINE_HAS_TLB_GUEST) {
1937 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938 set_kvm_facility(kvm->arch.model.fac_list, 147);
1939 }
1940
1941 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1943
1944 kvm_s390_crypto_init(kvm);
1945
1946 mutex_init(&kvm->arch.float_int.ais_lock);
1947 kvm->arch.float_int.simm = 0;
1948 kvm->arch.float_int.nimm = 0;
1949 spin_lock_init(&kvm->arch.float_int.lock);
1950 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952 init_waitqueue_head(&kvm->arch.ipte_wq);
1953 mutex_init(&kvm->arch.ipte_mutex);
1954
1955 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1957
1958 if (type & KVM_VM_S390_UCONTROL) {
1959 kvm->arch.gmap = NULL;
1960 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1961 } else {
1962 if (sclp.hamax == U64_MAX)
1963 kvm->arch.mem_limit = TASK_SIZE_MAX;
1964 else
1965 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1966 sclp.hamax + 1);
1967 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968 if (!kvm->arch.gmap)
1969 goto out_err;
1970 kvm->arch.gmap->private = kvm;
1971 kvm->arch.gmap->pfault_enabled = 0;
1972 }
1973
1974 kvm->arch.css_support = 0;
1975 kvm->arch.use_irqchip = 0;
1976 kvm->arch.epoch = 0;
1977
1978 spin_lock_init(&kvm->arch.start_stop_lock);
1979 kvm_s390_vsie_init(kvm);
1980 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1981
1982 return 0;
1983 out_err:
1984 free_page((unsigned long)kvm->arch.sie_page2);
1985 debug_unregister(kvm->arch.dbf);
1986 sca_dispose(kvm);
1987 KVM_EVENT(3, "creation of vm failed: %d", rc);
1988 return rc;
1989 }
1990
1991 bool kvm_arch_has_vcpu_debugfs(void)
1992 {
1993 return false;
1994 }
1995
1996 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1997 {
1998 return 0;
1999 }
2000
2001 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2002 {
2003 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005 kvm_s390_clear_local_irqs(vcpu);
2006 kvm_clear_async_pf_completion_queue(vcpu);
2007 if (!kvm_is_ucontrol(vcpu->kvm))
2008 sca_del_vcpu(vcpu);
2009
2010 if (kvm_is_ucontrol(vcpu->kvm))
2011 gmap_remove(vcpu->arch.gmap);
2012
2013 if (vcpu->kvm->arch.use_cmma)
2014 kvm_s390_vcpu_unsetup_cmma(vcpu);
2015 free_page((unsigned long)(vcpu->arch.sie_block));
2016
2017 kvm_vcpu_uninit(vcpu);
2018 kmem_cache_free(kvm_vcpu_cache, vcpu);
2019 }
2020
2021 static void kvm_free_vcpus(struct kvm *kvm)
2022 {
2023 unsigned int i;
2024 struct kvm_vcpu *vcpu;
2025
2026 kvm_for_each_vcpu(i, vcpu, kvm)
2027 kvm_arch_vcpu_destroy(vcpu);
2028
2029 mutex_lock(&kvm->lock);
2030 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031 kvm->vcpus[i] = NULL;
2032
2033 atomic_set(&kvm->online_vcpus, 0);
2034 mutex_unlock(&kvm->lock);
2035 }
2036
2037 void kvm_arch_destroy_vm(struct kvm *kvm)
2038 {
2039 kvm_free_vcpus(kvm);
2040 sca_dispose(kvm);
2041 debug_unregister(kvm->arch.dbf);
2042 free_page((unsigned long)kvm->arch.sie_page2);
2043 if (!kvm_is_ucontrol(kvm))
2044 gmap_remove(kvm->arch.gmap);
2045 kvm_s390_destroy_adapters(kvm);
2046 kvm_s390_clear_float_irqs(kvm);
2047 kvm_s390_vsie_destroy(kvm);
2048 if (kvm->arch.migration_state) {
2049 vfree(kvm->arch.migration_state->pgste_bitmap);
2050 kfree(kvm->arch.migration_state);
2051 }
2052 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2053 }
2054
2055 /* Section: vcpu related */
2056 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2057 {
2058 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059 if (!vcpu->arch.gmap)
2060 return -ENOMEM;
2061 vcpu->arch.gmap->private = vcpu->kvm;
2062
2063 return 0;
2064 }
2065
2066 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2067 {
2068 if (!kvm_s390_use_sca_entries())
2069 return;
2070 read_lock(&vcpu->kvm->arch.sca_lock);
2071 if (vcpu->kvm->arch.use_esca) {
2072 struct esca_block *sca = vcpu->kvm->arch.sca;
2073
2074 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075 sca->cpu[vcpu->vcpu_id].sda = 0;
2076 } else {
2077 struct bsca_block *sca = vcpu->kvm->arch.sca;
2078
2079 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080 sca->cpu[vcpu->vcpu_id].sda = 0;
2081 }
2082 read_unlock(&vcpu->kvm->arch.sca_lock);
2083 }
2084
2085 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2086 {
2087 if (!kvm_s390_use_sca_entries()) {
2088 struct bsca_block *sca = vcpu->kvm->arch.sca;
2089
2090 /* we still need the basic sca for the ipte control */
2091 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2093 }
2094 read_lock(&vcpu->kvm->arch.sca_lock);
2095 if (vcpu->kvm->arch.use_esca) {
2096 struct esca_block *sca = vcpu->kvm->arch.sca;
2097
2098 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2103 } else {
2104 struct bsca_block *sca = vcpu->kvm->arch.sca;
2105
2106 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2110 }
2111 read_unlock(&vcpu->kvm->arch.sca_lock);
2112 }
2113
2114 /* Basic SCA to Extended SCA data copy routines */
2115 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2116 {
2117 d->sda = s->sda;
2118 d->sigp_ctrl.c = s->sigp_ctrl.c;
2119 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2120 }
2121
2122 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2123 {
2124 int i;
2125
2126 d->ipte_control = s->ipte_control;
2127 d->mcn[0] = s->mcn;
2128 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2130 }
2131
2132 static int sca_switch_to_extended(struct kvm *kvm)
2133 {
2134 struct bsca_block *old_sca = kvm->arch.sca;
2135 struct esca_block *new_sca;
2136 struct kvm_vcpu *vcpu;
2137 unsigned int vcpu_idx;
2138 u32 scaol, scaoh;
2139
2140 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2141 if (!new_sca)
2142 return -ENOMEM;
2143
2144 scaoh = (u32)((u64)(new_sca) >> 32);
2145 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2146
2147 kvm_s390_vcpu_block_all(kvm);
2148 write_lock(&kvm->arch.sca_lock);
2149
2150 sca_copy_b_to_e(new_sca, old_sca);
2151
2152 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153 vcpu->arch.sie_block->scaoh = scaoh;
2154 vcpu->arch.sie_block->scaol = scaol;
2155 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2156 }
2157 kvm->arch.sca = new_sca;
2158 kvm->arch.use_esca = 1;
2159
2160 write_unlock(&kvm->arch.sca_lock);
2161 kvm_s390_vcpu_unblock_all(kvm);
2162
2163 free_page((unsigned long)old_sca);
2164
2165 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166 old_sca, kvm->arch.sca);
2167 return 0;
2168 }
2169
2170 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2171 {
2172 int rc;
2173
2174 if (!kvm_s390_use_sca_entries()) {
2175 if (id < KVM_MAX_VCPUS)
2176 return true;
2177 return false;
2178 }
2179 if (id < KVM_S390_BSCA_CPU_SLOTS)
2180 return true;
2181 if (!sclp.has_esca || !sclp.has_64bscao)
2182 return false;
2183
2184 mutex_lock(&kvm->lock);
2185 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186 mutex_unlock(&kvm->lock);
2187
2188 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2189 }
2190
2191 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2192 {
2193 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194 kvm_clear_async_pf_completion_queue(vcpu);
2195 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2196 KVM_SYNC_GPRS |
2197 KVM_SYNC_ACRS |
2198 KVM_SYNC_CRS |
2199 KVM_SYNC_ARCH0 |
2200 KVM_SYNC_PFAULT;
2201 kvm_s390_set_prefix(vcpu, 0);
2202 if (test_kvm_facility(vcpu->kvm, 64))
2203 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204 if (test_kvm_facility(vcpu->kvm, 133))
2205 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2206 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2207 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2208 */
2209 if (MACHINE_HAS_VX)
2210 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2211 else
2212 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2213
2214 if (kvm_is_ucontrol(vcpu->kvm))
2215 return __kvm_ucontrol_vcpu_init(vcpu);
2216
2217 return 0;
2218 }
2219
2220 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2221 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2222 {
2223 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2224 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2225 vcpu->arch.cputm_start = get_tod_clock_fast();
2226 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2227 }
2228
2229 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2230 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2231 {
2232 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2233 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2234 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2235 vcpu->arch.cputm_start = 0;
2236 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2237 }
2238
2239 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2240 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2241 {
2242 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2243 vcpu->arch.cputm_enabled = true;
2244 __start_cpu_timer_accounting(vcpu);
2245 }
2246
2247 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2249 {
2250 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2251 __stop_cpu_timer_accounting(vcpu);
2252 vcpu->arch.cputm_enabled = false;
2253 }
2254
2255 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2256 {
2257 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2258 __enable_cpu_timer_accounting(vcpu);
2259 preempt_enable();
2260 }
2261
2262 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2263 {
2264 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2265 __disable_cpu_timer_accounting(vcpu);
2266 preempt_enable();
2267 }
2268
2269 /* set the cpu timer - may only be called from the VCPU thread itself */
2270 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2271 {
2272 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2274 if (vcpu->arch.cputm_enabled)
2275 vcpu->arch.cputm_start = get_tod_clock_fast();
2276 vcpu->arch.sie_block->cputm = cputm;
2277 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2278 preempt_enable();
2279 }
2280
2281 /* update and get the cpu timer - can also be called from other VCPU threads */
2282 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2283 {
2284 unsigned int seq;
2285 __u64 value;
2286
2287 if (unlikely(!vcpu->arch.cputm_enabled))
2288 return vcpu->arch.sie_block->cputm;
2289
2290 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2291 do {
2292 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2293 /*
2294 * If the writer would ever execute a read in the critical
2295 * section, e.g. in irq context, we have a deadlock.
2296 */
2297 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2298 value = vcpu->arch.sie_block->cputm;
2299 /* if cputm_start is 0, accounting is being started/stopped */
2300 if (likely(vcpu->arch.cputm_start))
2301 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2302 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2303 preempt_enable();
2304 return value;
2305 }
2306
2307 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2308 {
2309
2310 gmap_enable(vcpu->arch.enabled_gmap);
2311 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2312 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2313 __start_cpu_timer_accounting(vcpu);
2314 vcpu->cpu = cpu;
2315 }
2316
2317 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2318 {
2319 vcpu->cpu = -1;
2320 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321 __stop_cpu_timer_accounting(vcpu);
2322 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2323 vcpu->arch.enabled_gmap = gmap_get_enabled();
2324 gmap_disable(vcpu->arch.enabled_gmap);
2325
2326 }
2327
2328 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2329 {
2330 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2331 vcpu->arch.sie_block->gpsw.mask = 0UL;
2332 vcpu->arch.sie_block->gpsw.addr = 0UL;
2333 kvm_s390_set_prefix(vcpu, 0);
2334 kvm_s390_set_cpu_timer(vcpu, 0);
2335 vcpu->arch.sie_block->ckc = 0UL;
2336 vcpu->arch.sie_block->todpr = 0;
2337 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2338 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2339 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2340 /* make sure the new fpc will be lazily loaded */
2341 save_fpu_regs();
2342 current->thread.fpu.fpc = 0;
2343 vcpu->arch.sie_block->gbea = 1;
2344 vcpu->arch.sie_block->pp = 0;
2345 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2346 kvm_clear_async_pf_completion_queue(vcpu);
2347 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2348 kvm_s390_vcpu_stop(vcpu);
2349 kvm_s390_clear_local_irqs(vcpu);
2350 }
2351
2352 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2353 {
2354 mutex_lock(&vcpu->kvm->lock);
2355 preempt_disable();
2356 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2357 preempt_enable();
2358 mutex_unlock(&vcpu->kvm->lock);
2359 if (!kvm_is_ucontrol(vcpu->kvm)) {
2360 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2361 sca_add_vcpu(vcpu);
2362 }
2363 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2364 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2365 /* make vcpu_load load the right gmap on the first trigger */
2366 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2367 }
2368
2369 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2370 {
2371 if (!test_kvm_facility(vcpu->kvm, 76))
2372 return;
2373
2374 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2375
2376 if (vcpu->kvm->arch.crypto.aes_kw)
2377 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2378 if (vcpu->kvm->arch.crypto.dea_kw)
2379 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2380
2381 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2382 }
2383
2384 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2385 {
2386 free_page(vcpu->arch.sie_block->cbrlo);
2387 vcpu->arch.sie_block->cbrlo = 0;
2388 }
2389
2390 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2391 {
2392 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2393 if (!vcpu->arch.sie_block->cbrlo)
2394 return -ENOMEM;
2395
2396 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2397 return 0;
2398 }
2399
2400 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2401 {
2402 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2403
2404 vcpu->arch.sie_block->ibc = model->ibc;
2405 if (test_kvm_facility(vcpu->kvm, 7))
2406 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2407 }
2408
2409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2410 {
2411 int rc = 0;
2412
2413 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2414 CPUSTAT_SM |
2415 CPUSTAT_STOPPED);
2416
2417 if (test_kvm_facility(vcpu->kvm, 78))
2418 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2419 else if (test_kvm_facility(vcpu->kvm, 8))
2420 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2421
2422 kvm_s390_vcpu_setup_model(vcpu);
2423
2424 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2425 if (MACHINE_HAS_ESOP)
2426 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2427 if (test_kvm_facility(vcpu->kvm, 9))
2428 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2429 if (test_kvm_facility(vcpu->kvm, 73))
2430 vcpu->arch.sie_block->ecb |= ECB_TE;
2431
2432 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2433 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2434 if (test_kvm_facility(vcpu->kvm, 130))
2435 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2436 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2437 if (sclp.has_cei)
2438 vcpu->arch.sie_block->eca |= ECA_CEI;
2439 if (sclp.has_ib)
2440 vcpu->arch.sie_block->eca |= ECA_IB;
2441 if (sclp.has_siif)
2442 vcpu->arch.sie_block->eca |= ECA_SII;
2443 if (sclp.has_sigpif)
2444 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2445 if (test_kvm_facility(vcpu->kvm, 129)) {
2446 vcpu->arch.sie_block->eca |= ECA_VX;
2447 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2448 }
2449 if (test_kvm_facility(vcpu->kvm, 139))
2450 vcpu->arch.sie_block->ecd |= ECD_MEF;
2451
2452 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2453 | SDNXC;
2454 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2455
2456 if (sclp.has_kss)
2457 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2458 else
2459 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2460
2461 if (vcpu->kvm->arch.use_cmma) {
2462 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2463 if (rc)
2464 return rc;
2465 }
2466 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2467 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2468
2469 kvm_s390_vcpu_crypto_setup(vcpu);
2470
2471 return rc;
2472 }
2473
2474 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2475 unsigned int id)
2476 {
2477 struct kvm_vcpu *vcpu;
2478 struct sie_page *sie_page;
2479 int rc = -EINVAL;
2480
2481 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2482 goto out;
2483
2484 rc = -ENOMEM;
2485
2486 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2487 if (!vcpu)
2488 goto out;
2489
2490 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2491 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2492 if (!sie_page)
2493 goto out_free_cpu;
2494
2495 vcpu->arch.sie_block = &sie_page->sie_block;
2496 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2497
2498 /* the real guest size will always be smaller than msl */
2499 vcpu->arch.sie_block->mso = 0;
2500 vcpu->arch.sie_block->msl = sclp.hamax;
2501
2502 vcpu->arch.sie_block->icpua = id;
2503 spin_lock_init(&vcpu->arch.local_int.lock);
2504 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2505 vcpu->arch.local_int.wq = &vcpu->wq;
2506 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2507 seqcount_init(&vcpu->arch.cputm_seqcount);
2508
2509 rc = kvm_vcpu_init(vcpu, kvm, id);
2510 if (rc)
2511 goto out_free_sie_block;
2512 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2513 vcpu->arch.sie_block);
2514 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2515
2516 return vcpu;
2517 out_free_sie_block:
2518 free_page((unsigned long)(vcpu->arch.sie_block));
2519 out_free_cpu:
2520 kmem_cache_free(kvm_vcpu_cache, vcpu);
2521 out:
2522 return ERR_PTR(rc);
2523 }
2524
2525 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2526 {
2527 return kvm_s390_vcpu_has_irq(vcpu, 0);
2528 }
2529
2530 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2531 {
2532 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2533 }
2534
2535 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2536 {
2537 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2538 exit_sie(vcpu);
2539 }
2540
2541 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2542 {
2543 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2544 }
2545
2546 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2547 {
2548 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2549 exit_sie(vcpu);
2550 }
2551
2552 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2553 {
2554 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2555 }
2556
2557 /*
2558 * Kick a guest cpu out of SIE and wait until SIE is not running.
2559 * If the CPU is not running (e.g. waiting as idle) the function will
2560 * return immediately. */
2561 void exit_sie(struct kvm_vcpu *vcpu)
2562 {
2563 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2564 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2565 cpu_relax();
2566 }
2567
2568 /* Kick a guest cpu out of SIE to process a request synchronously */
2569 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2570 {
2571 kvm_make_request(req, vcpu);
2572 kvm_s390_vcpu_request(vcpu);
2573 }
2574
2575 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2576 unsigned long end)
2577 {
2578 struct kvm *kvm = gmap->private;
2579 struct kvm_vcpu *vcpu;
2580 unsigned long prefix;
2581 int i;
2582
2583 if (gmap_is_shadow(gmap))
2584 return;
2585 if (start >= 1UL << 31)
2586 /* We are only interested in prefix pages */
2587 return;
2588 kvm_for_each_vcpu(i, vcpu, kvm) {
2589 /* match against both prefix pages */
2590 prefix = kvm_s390_get_prefix(vcpu);
2591 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2592 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2593 start, end);
2594 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2595 }
2596 }
2597 }
2598
2599 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2600 {
2601 /* kvm common code refers to this, but never calls it */
2602 BUG();
2603 return 0;
2604 }
2605
2606 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2607 struct kvm_one_reg *reg)
2608 {
2609 int r = -EINVAL;
2610
2611 switch (reg->id) {
2612 case KVM_REG_S390_TODPR:
2613 r = put_user(vcpu->arch.sie_block->todpr,
2614 (u32 __user *)reg->addr);
2615 break;
2616 case KVM_REG_S390_EPOCHDIFF:
2617 r = put_user(vcpu->arch.sie_block->epoch,
2618 (u64 __user *)reg->addr);
2619 break;
2620 case KVM_REG_S390_CPU_TIMER:
2621 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2622 (u64 __user *)reg->addr);
2623 break;
2624 case KVM_REG_S390_CLOCK_COMP:
2625 r = put_user(vcpu->arch.sie_block->ckc,
2626 (u64 __user *)reg->addr);
2627 break;
2628 case KVM_REG_S390_PFTOKEN:
2629 r = put_user(vcpu->arch.pfault_token,
2630 (u64 __user *)reg->addr);
2631 break;
2632 case KVM_REG_S390_PFCOMPARE:
2633 r = put_user(vcpu->arch.pfault_compare,
2634 (u64 __user *)reg->addr);
2635 break;
2636 case KVM_REG_S390_PFSELECT:
2637 r = put_user(vcpu->arch.pfault_select,
2638 (u64 __user *)reg->addr);
2639 break;
2640 case KVM_REG_S390_PP:
2641 r = put_user(vcpu->arch.sie_block->pp,
2642 (u64 __user *)reg->addr);
2643 break;
2644 case KVM_REG_S390_GBEA:
2645 r = put_user(vcpu->arch.sie_block->gbea,
2646 (u64 __user *)reg->addr);
2647 break;
2648 default:
2649 break;
2650 }
2651
2652 return r;
2653 }
2654
2655 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2656 struct kvm_one_reg *reg)
2657 {
2658 int r = -EINVAL;
2659 __u64 val;
2660
2661 switch (reg->id) {
2662 case KVM_REG_S390_TODPR:
2663 r = get_user(vcpu->arch.sie_block->todpr,
2664 (u32 __user *)reg->addr);
2665 break;
2666 case KVM_REG_S390_EPOCHDIFF:
2667 r = get_user(vcpu->arch.sie_block->epoch,
2668 (u64 __user *)reg->addr);
2669 break;
2670 case KVM_REG_S390_CPU_TIMER:
2671 r = get_user(val, (u64 __user *)reg->addr);
2672 if (!r)
2673 kvm_s390_set_cpu_timer(vcpu, val);
2674 break;
2675 case KVM_REG_S390_CLOCK_COMP:
2676 r = get_user(vcpu->arch.sie_block->ckc,
2677 (u64 __user *)reg->addr);
2678 break;
2679 case KVM_REG_S390_PFTOKEN:
2680 r = get_user(vcpu->arch.pfault_token,
2681 (u64 __user *)reg->addr);
2682 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2683 kvm_clear_async_pf_completion_queue(vcpu);
2684 break;
2685 case KVM_REG_S390_PFCOMPARE:
2686 r = get_user(vcpu->arch.pfault_compare,
2687 (u64 __user *)reg->addr);
2688 break;
2689 case KVM_REG_S390_PFSELECT:
2690 r = get_user(vcpu->arch.pfault_select,
2691 (u64 __user *)reg->addr);
2692 break;
2693 case KVM_REG_S390_PP:
2694 r = get_user(vcpu->arch.sie_block->pp,
2695 (u64 __user *)reg->addr);
2696 break;
2697 case KVM_REG_S390_GBEA:
2698 r = get_user(vcpu->arch.sie_block->gbea,
2699 (u64 __user *)reg->addr);
2700 break;
2701 default:
2702 break;
2703 }
2704
2705 return r;
2706 }
2707
2708 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2709 {
2710 kvm_s390_vcpu_initial_reset(vcpu);
2711 return 0;
2712 }
2713
2714 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2715 {
2716 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2717 return 0;
2718 }
2719
2720 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2721 {
2722 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2723 return 0;
2724 }
2725
2726 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2727 struct kvm_sregs *sregs)
2728 {
2729 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2730 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2731 return 0;
2732 }
2733
2734 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2735 struct kvm_sregs *sregs)
2736 {
2737 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2738 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2739 return 0;
2740 }
2741
2742 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2743 {
2744 if (test_fp_ctl(fpu->fpc))
2745 return -EINVAL;
2746 vcpu->run->s.regs.fpc = fpu->fpc;
2747 if (MACHINE_HAS_VX)
2748 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2749 (freg_t *) fpu->fprs);
2750 else
2751 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2752 return 0;
2753 }
2754
2755 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2756 {
2757 /* make sure we have the latest values */
2758 save_fpu_regs();
2759 if (MACHINE_HAS_VX)
2760 convert_vx_to_fp((freg_t *) fpu->fprs,
2761 (__vector128 *) vcpu->run->s.regs.vrs);
2762 else
2763 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2764 fpu->fpc = vcpu->run->s.regs.fpc;
2765 return 0;
2766 }
2767
2768 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2769 {
2770 int rc = 0;
2771
2772 if (!is_vcpu_stopped(vcpu))
2773 rc = -EBUSY;
2774 else {
2775 vcpu->run->psw_mask = psw.mask;
2776 vcpu->run->psw_addr = psw.addr;
2777 }
2778 return rc;
2779 }
2780
2781 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2782 struct kvm_translation *tr)
2783 {
2784 return -EINVAL; /* not implemented yet */
2785 }
2786
2787 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2788 KVM_GUESTDBG_USE_HW_BP | \
2789 KVM_GUESTDBG_ENABLE)
2790
2791 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2792 struct kvm_guest_debug *dbg)
2793 {
2794 int rc = 0;
2795
2796 vcpu->guest_debug = 0;
2797 kvm_s390_clear_bp_data(vcpu);
2798
2799 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2800 return -EINVAL;
2801 if (!sclp.has_gpere)
2802 return -EINVAL;
2803
2804 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2805 vcpu->guest_debug = dbg->control;
2806 /* enforce guest PER */
2807 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2808
2809 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2810 rc = kvm_s390_import_bp_data(vcpu, dbg);
2811 } else {
2812 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2813 vcpu->arch.guestdbg.last_bp = 0;
2814 }
2815
2816 if (rc) {
2817 vcpu->guest_debug = 0;
2818 kvm_s390_clear_bp_data(vcpu);
2819 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2820 }
2821
2822 return rc;
2823 }
2824
2825 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2826 struct kvm_mp_state *mp_state)
2827 {
2828 /* CHECK_STOP and LOAD are not supported yet */
2829 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2830 KVM_MP_STATE_OPERATING;
2831 }
2832
2833 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2834 struct kvm_mp_state *mp_state)
2835 {
2836 int rc = 0;
2837
2838 /* user space knows about this interface - let it control the state */
2839 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2840
2841 switch (mp_state->mp_state) {
2842 case KVM_MP_STATE_STOPPED:
2843 kvm_s390_vcpu_stop(vcpu);
2844 break;
2845 case KVM_MP_STATE_OPERATING:
2846 kvm_s390_vcpu_start(vcpu);
2847 break;
2848 case KVM_MP_STATE_LOAD:
2849 case KVM_MP_STATE_CHECK_STOP:
2850 /* fall through - CHECK_STOP and LOAD are not supported yet */
2851 default:
2852 rc = -ENXIO;
2853 }
2854
2855 return rc;
2856 }
2857
2858 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2859 {
2860 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2861 }
2862
2863 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2864 {
2865 retry:
2866 kvm_s390_vcpu_request_handled(vcpu);
2867 if (!kvm_request_pending(vcpu))
2868 return 0;
2869 /*
2870 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2871 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2872 * This ensures that the ipte instruction for this request has
2873 * already finished. We might race against a second unmapper that
2874 * wants to set the blocking bit. Lets just retry the request loop.
2875 */
2876 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2877 int rc;
2878 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2879 kvm_s390_get_prefix(vcpu),
2880 PAGE_SIZE * 2, PROT_WRITE);
2881 if (rc) {
2882 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2883 return rc;
2884 }
2885 goto retry;
2886 }
2887
2888 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2889 vcpu->arch.sie_block->ihcpu = 0xffff;
2890 goto retry;
2891 }
2892
2893 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2894 if (!ibs_enabled(vcpu)) {
2895 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2896 atomic_or(CPUSTAT_IBS,
2897 &vcpu->arch.sie_block->cpuflags);
2898 }
2899 goto retry;
2900 }
2901
2902 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2903 if (ibs_enabled(vcpu)) {
2904 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2905 atomic_andnot(CPUSTAT_IBS,
2906 &vcpu->arch.sie_block->cpuflags);
2907 }
2908 goto retry;
2909 }
2910
2911 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2912 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2913 goto retry;
2914 }
2915
2916 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2917 /*
2918 * Disable CMMA virtualization; we will emulate the ESSA
2919 * instruction manually, in order to provide additional
2920 * functionalities needed for live migration.
2921 */
2922 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2923 goto retry;
2924 }
2925
2926 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2927 /*
2928 * Re-enable CMMA virtualization if CMMA is available and
2929 * was used.
2930 */
2931 if ((vcpu->kvm->arch.use_cmma) &&
2932 (vcpu->kvm->mm->context.use_cmma))
2933 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2934 goto retry;
2935 }
2936
2937 /* nothing to do, just clear the request */
2938 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2939
2940 return 0;
2941 }
2942
2943 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2944 const struct kvm_s390_vm_tod_clock *gtod)
2945 {
2946 struct kvm_vcpu *vcpu;
2947 struct kvm_s390_tod_clock_ext htod;
2948 int i;
2949
2950 mutex_lock(&kvm->lock);
2951 preempt_disable();
2952
2953 get_tod_clock_ext((char *)&htod);
2954
2955 kvm->arch.epoch = gtod->tod - htod.tod;
2956 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2957
2958 if (kvm->arch.epoch > gtod->tod)
2959 kvm->arch.epdx -= 1;
2960
2961 kvm_s390_vcpu_block_all(kvm);
2962 kvm_for_each_vcpu(i, vcpu, kvm) {
2963 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2964 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2965 }
2966
2967 kvm_s390_vcpu_unblock_all(kvm);
2968 preempt_enable();
2969 mutex_unlock(&kvm->lock);
2970 }
2971
2972 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2973 {
2974 struct kvm_vcpu *vcpu;
2975 int i;
2976
2977 mutex_lock(&kvm->lock);
2978 preempt_disable();
2979 kvm->arch.epoch = tod - get_tod_clock();
2980 kvm_s390_vcpu_block_all(kvm);
2981 kvm_for_each_vcpu(i, vcpu, kvm)
2982 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2983 kvm_s390_vcpu_unblock_all(kvm);
2984 preempt_enable();
2985 mutex_unlock(&kvm->lock);
2986 }
2987
2988 /**
2989 * kvm_arch_fault_in_page - fault-in guest page if necessary
2990 * @vcpu: The corresponding virtual cpu
2991 * @gpa: Guest physical address
2992 * @writable: Whether the page should be writable or not
2993 *
2994 * Make sure that a guest page has been faulted-in on the host.
2995 *
2996 * Return: Zero on success, negative error code otherwise.
2997 */
2998 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2999 {
3000 return gmap_fault(vcpu->arch.gmap, gpa,
3001 writable ? FAULT_FLAG_WRITE : 0);
3002 }
3003
3004 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3005 unsigned long token)
3006 {
3007 struct kvm_s390_interrupt inti;
3008 struct kvm_s390_irq irq;
3009
3010 if (start_token) {
3011 irq.u.ext.ext_params2 = token;
3012 irq.type = KVM_S390_INT_PFAULT_INIT;
3013 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3014 } else {
3015 inti.type = KVM_S390_INT_PFAULT_DONE;
3016 inti.parm64 = token;
3017 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3018 }
3019 }
3020
3021 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3022 struct kvm_async_pf *work)
3023 {
3024 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3025 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3026 }
3027
3028 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3029 struct kvm_async_pf *work)
3030 {
3031 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3032 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3033 }
3034
3035 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3036 struct kvm_async_pf *work)
3037 {
3038 /* s390 will always inject the page directly */
3039 }
3040
3041 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3042 {
3043 /*
3044 * s390 will always inject the page directly,
3045 * but we still want check_async_completion to cleanup
3046 */
3047 return true;
3048 }
3049
3050 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3051 {
3052 hva_t hva;
3053 struct kvm_arch_async_pf arch;
3054 int rc;
3055
3056 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3057 return 0;
3058 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3059 vcpu->arch.pfault_compare)
3060 return 0;
3061 if (psw_extint_disabled(vcpu))
3062 return 0;
3063 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3064 return 0;
3065 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3066 return 0;
3067 if (!vcpu->arch.gmap->pfault_enabled)
3068 return 0;
3069
3070 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3071 hva += current->thread.gmap_addr & ~PAGE_MASK;
3072 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3073 return 0;
3074
3075 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3076 return rc;
3077 }
3078
3079 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3080 {
3081 int rc, cpuflags;
3082
3083 /*
3084 * On s390 notifications for arriving pages will be delivered directly
3085 * to the guest but the house keeping for completed pfaults is
3086 * handled outside the worker.
3087 */
3088 kvm_check_async_pf_completion(vcpu);
3089
3090 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3091 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3092
3093 if (need_resched())
3094 schedule();
3095
3096 if (test_cpu_flag(CIF_MCCK_PENDING))
3097 s390_handle_mcck();
3098
3099 if (!kvm_is_ucontrol(vcpu->kvm)) {
3100 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3101 if (rc)
3102 return rc;
3103 }
3104
3105 rc = kvm_s390_handle_requests(vcpu);
3106 if (rc)
3107 return rc;
3108
3109 if (guestdbg_enabled(vcpu)) {
3110 kvm_s390_backup_guest_per_regs(vcpu);
3111 kvm_s390_patch_guest_per_regs(vcpu);
3112 }
3113
3114 vcpu->arch.sie_block->icptcode = 0;
3115 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3116 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3117 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3118
3119 return 0;
3120 }
3121
3122 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3123 {
3124 struct kvm_s390_pgm_info pgm_info = {
3125 .code = PGM_ADDRESSING,
3126 };
3127 u8 opcode, ilen;
3128 int rc;
3129
3130 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3131 trace_kvm_s390_sie_fault(vcpu);
3132
3133 /*
3134 * We want to inject an addressing exception, which is defined as a
3135 * suppressing or terminating exception. However, since we came here
3136 * by a DAT access exception, the PSW still points to the faulting
3137 * instruction since DAT exceptions are nullifying. So we've got
3138 * to look up the current opcode to get the length of the instruction
3139 * to be able to forward the PSW.
3140 */
3141 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3142 ilen = insn_length(opcode);
3143 if (rc < 0) {
3144 return rc;
3145 } else if (rc) {
3146 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3147 * Forward by arbitrary ilc, injection will take care of
3148 * nullification if necessary.
3149 */
3150 pgm_info = vcpu->arch.pgm;
3151 ilen = 4;
3152 }
3153 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3154 kvm_s390_forward_psw(vcpu, ilen);
3155 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3156 }
3157
3158 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3159 {
3160 struct mcck_volatile_info *mcck_info;
3161 struct sie_page *sie_page;
3162
3163 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3164 vcpu->arch.sie_block->icptcode);
3165 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3166
3167 if (guestdbg_enabled(vcpu))
3168 kvm_s390_restore_guest_per_regs(vcpu);
3169
3170 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3171 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3172
3173 if (exit_reason == -EINTR) {
3174 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3175 sie_page = container_of(vcpu->arch.sie_block,
3176 struct sie_page, sie_block);
3177 mcck_info = &sie_page->mcck_info;
3178 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3179 return 0;
3180 }
3181
3182 if (vcpu->arch.sie_block->icptcode > 0) {
3183 int rc = kvm_handle_sie_intercept(vcpu);
3184
3185 if (rc != -EOPNOTSUPP)
3186 return rc;
3187 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3188 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3189 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3190 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3191 return -EREMOTE;
3192 } else if (exit_reason != -EFAULT) {
3193 vcpu->stat.exit_null++;
3194 return 0;
3195 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3196 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3197 vcpu->run->s390_ucontrol.trans_exc_code =
3198 current->thread.gmap_addr;
3199 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3200 return -EREMOTE;
3201 } else if (current->thread.gmap_pfault) {
3202 trace_kvm_s390_major_guest_pfault(vcpu);
3203 current->thread.gmap_pfault = 0;
3204 if (kvm_arch_setup_async_pf(vcpu))
3205 return 0;
3206 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3207 }
3208 return vcpu_post_run_fault_in_sie(vcpu);
3209 }
3210
3211 static int __vcpu_run(struct kvm_vcpu *vcpu)
3212 {
3213 int rc, exit_reason;
3214
3215 /*
3216 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3217 * ning the guest), so that memslots (and other stuff) are protected
3218 */
3219 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3220
3221 do {
3222 rc = vcpu_pre_run(vcpu);
3223 if (rc)
3224 break;
3225
3226 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3227 /*
3228 * As PF_VCPU will be used in fault handler, between
3229 * guest_enter and guest_exit should be no uaccess.
3230 */
3231 local_irq_disable();
3232 guest_enter_irqoff();
3233 __disable_cpu_timer_accounting(vcpu);
3234 local_irq_enable();
3235 exit_reason = sie64a(vcpu->arch.sie_block,
3236 vcpu->run->s.regs.gprs);
3237 local_irq_disable();
3238 __enable_cpu_timer_accounting(vcpu);
3239 guest_exit_irqoff();
3240 local_irq_enable();
3241 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3242
3243 rc = vcpu_post_run(vcpu, exit_reason);
3244 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3245
3246 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3247 return rc;
3248 }
3249
3250 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3251 {
3252 struct runtime_instr_cb *riccb;
3253 struct gs_cb *gscb;
3254
3255 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3256 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3257 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3258 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3259 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3260 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3261 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3262 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3263 /* some control register changes require a tlb flush */
3264 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3265 }
3266 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3267 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3268 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3269 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3270 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3271 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3272 }
3273 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3274 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3275 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3276 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3277 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3278 kvm_clear_async_pf_completion_queue(vcpu);
3279 }
3280 /*
3281 * If userspace sets the riccb (e.g. after migration) to a valid state,
3282 * we should enable RI here instead of doing the lazy enablement.
3283 */
3284 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3285 test_kvm_facility(vcpu->kvm, 64) &&
3286 riccb->valid &&
3287 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3288 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3289 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3290 }
3291 /*
3292 * If userspace sets the gscb (e.g. after migration) to non-zero,
3293 * we should enable GS here instead of doing the lazy enablement.
3294 */
3295 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3296 test_kvm_facility(vcpu->kvm, 133) &&
3297 gscb->gssm &&
3298 !vcpu->arch.gs_enabled) {
3299 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3300 vcpu->arch.sie_block->ecb |= ECB_GS;
3301 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3302 vcpu->arch.gs_enabled = 1;
3303 }
3304 save_access_regs(vcpu->arch.host_acrs);
3305 restore_access_regs(vcpu->run->s.regs.acrs);
3306 /* save host (userspace) fprs/vrs */
3307 save_fpu_regs();
3308 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3309 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3310 if (MACHINE_HAS_VX)
3311 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3312 else
3313 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3314 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3315 if (test_fp_ctl(current->thread.fpu.fpc))
3316 /* User space provided an invalid FPC, let's clear it */
3317 current->thread.fpu.fpc = 0;
3318 if (MACHINE_HAS_GS) {
3319 preempt_disable();
3320 __ctl_set_bit(2, 4);
3321 if (current->thread.gs_cb) {
3322 vcpu->arch.host_gscb = current->thread.gs_cb;
3323 save_gs_cb(vcpu->arch.host_gscb);
3324 }
3325 if (vcpu->arch.gs_enabled) {
3326 current->thread.gs_cb = (struct gs_cb *)
3327 &vcpu->run->s.regs.gscb;
3328 restore_gs_cb(current->thread.gs_cb);
3329 }
3330 preempt_enable();
3331 }
3332
3333 kvm_run->kvm_dirty_regs = 0;
3334 }
3335
3336 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337 {
3338 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3339 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3340 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3341 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3342 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3343 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3344 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3345 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3346 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3347 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3348 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3349 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3350 save_access_regs(vcpu->run->s.regs.acrs);
3351 restore_access_regs(vcpu->arch.host_acrs);
3352 /* Save guest register state */
3353 save_fpu_regs();
3354 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3355 /* Restore will be done lazily at return */
3356 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3357 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3358 if (MACHINE_HAS_GS) {
3359 __ctl_set_bit(2, 4);
3360 if (vcpu->arch.gs_enabled)
3361 save_gs_cb(current->thread.gs_cb);
3362 preempt_disable();
3363 current->thread.gs_cb = vcpu->arch.host_gscb;
3364 restore_gs_cb(vcpu->arch.host_gscb);
3365 preempt_enable();
3366 if (!vcpu->arch.host_gscb)
3367 __ctl_clear_bit(2, 4);
3368 vcpu->arch.host_gscb = NULL;
3369 }
3370
3371 }
3372
3373 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3374 {
3375 int rc;
3376 sigset_t sigsaved;
3377
3378 if (kvm_run->immediate_exit)
3379 return -EINTR;
3380
3381 if (guestdbg_exit_pending(vcpu)) {
3382 kvm_s390_prepare_debug_exit(vcpu);
3383 return 0;
3384 }
3385
3386 if (vcpu->sigset_active)
3387 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3388
3389 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3390 kvm_s390_vcpu_start(vcpu);
3391 } else if (is_vcpu_stopped(vcpu)) {
3392 pr_err_ratelimited("can't run stopped vcpu %d\n",
3393 vcpu->vcpu_id);
3394 return -EINVAL;
3395 }
3396
3397 sync_regs(vcpu, kvm_run);
3398 enable_cpu_timer_accounting(vcpu);
3399
3400 might_fault();
3401 rc = __vcpu_run(vcpu);
3402
3403 if (signal_pending(current) && !rc) {
3404 kvm_run->exit_reason = KVM_EXIT_INTR;
3405 rc = -EINTR;
3406 }
3407
3408 if (guestdbg_exit_pending(vcpu) && !rc) {
3409 kvm_s390_prepare_debug_exit(vcpu);
3410 rc = 0;
3411 }
3412
3413 if (rc == -EREMOTE) {
3414 /* userspace support is needed, kvm_run has been prepared */
3415 rc = 0;
3416 }
3417
3418 disable_cpu_timer_accounting(vcpu);
3419 store_regs(vcpu, kvm_run);
3420
3421 if (vcpu->sigset_active)
3422 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3423
3424 vcpu->stat.exit_userspace++;
3425 return rc;
3426 }
3427
3428 /*
3429 * store status at address
3430 * we use have two special cases:
3431 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3432 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3433 */
3434 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3435 {
3436 unsigned char archmode = 1;
3437 freg_t fprs[NUM_FPRS];
3438 unsigned int px;
3439 u64 clkcomp, cputm;
3440 int rc;
3441
3442 px = kvm_s390_get_prefix(vcpu);
3443 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3444 if (write_guest_abs(vcpu, 163, &archmode, 1))
3445 return -EFAULT;
3446 gpa = 0;
3447 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3448 if (write_guest_real(vcpu, 163, &archmode, 1))
3449 return -EFAULT;
3450 gpa = px;
3451 } else
3452 gpa -= __LC_FPREGS_SAVE_AREA;
3453
3454 /* manually convert vector registers if necessary */
3455 if (MACHINE_HAS_VX) {
3456 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3457 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3458 fprs, 128);
3459 } else {
3460 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3461 vcpu->run->s.regs.fprs, 128);
3462 }
3463 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3464 vcpu->run->s.regs.gprs, 128);
3465 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3466 &vcpu->arch.sie_block->gpsw, 16);
3467 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3468 &px, 4);
3469 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3470 &vcpu->run->s.regs.fpc, 4);
3471 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3472 &vcpu->arch.sie_block->todpr, 4);
3473 cputm = kvm_s390_get_cpu_timer(vcpu);
3474 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3475 &cputm, 8);
3476 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3477 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3478 &clkcomp, 8);
3479 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3480 &vcpu->run->s.regs.acrs, 64);
3481 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3482 &vcpu->arch.sie_block->gcr, 128);
3483 return rc ? -EFAULT : 0;
3484 }
3485
3486 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3487 {
3488 /*
3489 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3490 * switch in the run ioctl. Let's update our copies before we save
3491 * it into the save area
3492 */
3493 save_fpu_regs();
3494 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3495 save_access_regs(vcpu->run->s.regs.acrs);
3496
3497 return kvm_s390_store_status_unloaded(vcpu, addr);
3498 }
3499
3500 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3501 {
3502 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3503 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3504 }
3505
3506 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3507 {
3508 unsigned int i;
3509 struct kvm_vcpu *vcpu;
3510
3511 kvm_for_each_vcpu(i, vcpu, kvm) {
3512 __disable_ibs_on_vcpu(vcpu);
3513 }
3514 }
3515
3516 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3517 {
3518 if (!sclp.has_ibs)
3519 return;
3520 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3521 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3522 }
3523
3524 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3525 {
3526 int i, online_vcpus, started_vcpus = 0;
3527
3528 if (!is_vcpu_stopped(vcpu))
3529 return;
3530
3531 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3532 /* Only one cpu at a time may enter/leave the STOPPED state. */
3533 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3534 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3535
3536 for (i = 0; i < online_vcpus; i++) {
3537 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3538 started_vcpus++;
3539 }
3540
3541 if (started_vcpus == 0) {
3542 /* we're the only active VCPU -> speed it up */
3543 __enable_ibs_on_vcpu(vcpu);
3544 } else if (started_vcpus == 1) {
3545 /*
3546 * As we are starting a second VCPU, we have to disable
3547 * the IBS facility on all VCPUs to remove potentially
3548 * oustanding ENABLE requests.
3549 */
3550 __disable_ibs_on_all_vcpus(vcpu->kvm);
3551 }
3552
3553 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3554 /*
3555 * Another VCPU might have used IBS while we were offline.
3556 * Let's play safe and flush the VCPU at startup.
3557 */
3558 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3559 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3560 return;
3561 }
3562
3563 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3564 {
3565 int i, online_vcpus, started_vcpus = 0;
3566 struct kvm_vcpu *started_vcpu = NULL;
3567
3568 if (is_vcpu_stopped(vcpu))
3569 return;
3570
3571 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3572 /* Only one cpu at a time may enter/leave the STOPPED state. */
3573 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3574 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3575
3576 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3577 kvm_s390_clear_stop_irq(vcpu);
3578
3579 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3580 __disable_ibs_on_vcpu(vcpu);
3581
3582 for (i = 0; i < online_vcpus; i++) {
3583 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3584 started_vcpus++;
3585 started_vcpu = vcpu->kvm->vcpus[i];
3586 }
3587 }
3588
3589 if (started_vcpus == 1) {
3590 /*
3591 * As we only have one VCPU left, we want to enable the
3592 * IBS facility for that VCPU to speed it up.
3593 */
3594 __enable_ibs_on_vcpu(started_vcpu);
3595 }
3596
3597 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3598 return;
3599 }
3600
3601 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3602 struct kvm_enable_cap *cap)
3603 {
3604 int r;
3605
3606 if (cap->flags)
3607 return -EINVAL;
3608
3609 switch (cap->cap) {
3610 case KVM_CAP_S390_CSS_SUPPORT:
3611 if (!vcpu->kvm->arch.css_support) {
3612 vcpu->kvm->arch.css_support = 1;
3613 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3614 trace_kvm_s390_enable_css(vcpu->kvm);
3615 }
3616 r = 0;
3617 break;
3618 default:
3619 r = -EINVAL;
3620 break;
3621 }
3622 return r;
3623 }
3624
3625 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3626 struct kvm_s390_mem_op *mop)
3627 {
3628 void __user *uaddr = (void __user *)mop->buf;
3629 void *tmpbuf = NULL;
3630 int r, srcu_idx;
3631 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3632 | KVM_S390_MEMOP_F_CHECK_ONLY;
3633
3634 if (mop->flags & ~supported_flags)
3635 return -EINVAL;
3636
3637 if (mop->size > MEM_OP_MAX_SIZE)
3638 return -E2BIG;
3639
3640 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3641 tmpbuf = vmalloc(mop->size);
3642 if (!tmpbuf)
3643 return -ENOMEM;
3644 }
3645
3646 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3647
3648 switch (mop->op) {
3649 case KVM_S390_MEMOP_LOGICAL_READ:
3650 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3651 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3652 mop->size, GACC_FETCH);
3653 break;
3654 }
3655 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3656 if (r == 0) {
3657 if (copy_to_user(uaddr, tmpbuf, mop->size))
3658 r = -EFAULT;
3659 }
3660 break;
3661 case KVM_S390_MEMOP_LOGICAL_WRITE:
3662 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3663 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3664 mop->size, GACC_STORE);
3665 break;
3666 }
3667 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3668 r = -EFAULT;
3669 break;
3670 }
3671 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3672 break;
3673 default:
3674 r = -EINVAL;
3675 }
3676
3677 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3678
3679 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3680 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3681
3682 vfree(tmpbuf);
3683 return r;
3684 }
3685
3686 long kvm_arch_vcpu_ioctl(struct file *filp,
3687 unsigned int ioctl, unsigned long arg)
3688 {
3689 struct kvm_vcpu *vcpu = filp->private_data;
3690 void __user *argp = (void __user *)arg;
3691 int idx;
3692 long r;
3693
3694 switch (ioctl) {
3695 case KVM_S390_IRQ: {
3696 struct kvm_s390_irq s390irq;
3697
3698 r = -EFAULT;
3699 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3700 break;
3701 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3702 break;
3703 }
3704 case KVM_S390_INTERRUPT: {
3705 struct kvm_s390_interrupt s390int;
3706 struct kvm_s390_irq s390irq;
3707
3708 r = -EFAULT;
3709 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3710 break;
3711 if (s390int_to_s390irq(&s390int, &s390irq))
3712 return -EINVAL;
3713 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3714 break;
3715 }
3716 case KVM_S390_STORE_STATUS:
3717 idx = srcu_read_lock(&vcpu->kvm->srcu);
3718 r = kvm_s390_vcpu_store_status(vcpu, arg);
3719 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3720 break;
3721 case KVM_S390_SET_INITIAL_PSW: {
3722 psw_t psw;
3723
3724 r = -EFAULT;
3725 if (copy_from_user(&psw, argp, sizeof(psw)))
3726 break;
3727 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3728 break;
3729 }
3730 case KVM_S390_INITIAL_RESET:
3731 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3732 break;
3733 case KVM_SET_ONE_REG:
3734 case KVM_GET_ONE_REG: {
3735 struct kvm_one_reg reg;
3736 r = -EFAULT;
3737 if (copy_from_user(&reg, argp, sizeof(reg)))
3738 break;
3739 if (ioctl == KVM_SET_ONE_REG)
3740 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3741 else
3742 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3743 break;
3744 }
3745 #ifdef CONFIG_KVM_S390_UCONTROL
3746 case KVM_S390_UCAS_MAP: {
3747 struct kvm_s390_ucas_mapping ucasmap;
3748
3749 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3750 r = -EFAULT;
3751 break;
3752 }
3753
3754 if (!kvm_is_ucontrol(vcpu->kvm)) {
3755 r = -EINVAL;
3756 break;
3757 }
3758
3759 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3760 ucasmap.vcpu_addr, ucasmap.length);
3761 break;
3762 }
3763 case KVM_S390_UCAS_UNMAP: {
3764 struct kvm_s390_ucas_mapping ucasmap;
3765
3766 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3767 r = -EFAULT;
3768 break;
3769 }
3770
3771 if (!kvm_is_ucontrol(vcpu->kvm)) {
3772 r = -EINVAL;
3773 break;
3774 }
3775
3776 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3777 ucasmap.length);
3778 break;
3779 }
3780 #endif
3781 case KVM_S390_VCPU_FAULT: {
3782 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3783 break;
3784 }
3785 case KVM_ENABLE_CAP:
3786 {
3787 struct kvm_enable_cap cap;
3788 r = -EFAULT;
3789 if (copy_from_user(&cap, argp, sizeof(cap)))
3790 break;
3791 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3792 break;
3793 }
3794 case KVM_S390_MEM_OP: {
3795 struct kvm_s390_mem_op mem_op;
3796
3797 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3798 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3799 else
3800 r = -EFAULT;
3801 break;
3802 }
3803 case KVM_S390_SET_IRQ_STATE: {
3804 struct kvm_s390_irq_state irq_state;
3805
3806 r = -EFAULT;
3807 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3808 break;
3809 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3810 irq_state.len == 0 ||
3811 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3812 r = -EINVAL;
3813 break;
3814 }
3815 r = kvm_s390_set_irq_state(vcpu,
3816 (void __user *) irq_state.buf,
3817 irq_state.len);
3818 break;
3819 }
3820 case KVM_S390_GET_IRQ_STATE: {
3821 struct kvm_s390_irq_state irq_state;
3822
3823 r = -EFAULT;
3824 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3825 break;
3826 if (irq_state.len == 0) {
3827 r = -EINVAL;
3828 break;
3829 }
3830 r = kvm_s390_get_irq_state(vcpu,
3831 (__u8 __user *) irq_state.buf,
3832 irq_state.len);
3833 break;
3834 }
3835 default:
3836 r = -ENOTTY;
3837 }
3838 return r;
3839 }
3840
3841 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3842 {
3843 #ifdef CONFIG_KVM_S390_UCONTROL
3844 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3845 && (kvm_is_ucontrol(vcpu->kvm))) {
3846 vmf->page = virt_to_page(vcpu->arch.sie_block);
3847 get_page(vmf->page);
3848 return 0;
3849 }
3850 #endif
3851 return VM_FAULT_SIGBUS;
3852 }
3853
3854 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3855 unsigned long npages)
3856 {
3857 return 0;
3858 }
3859
3860 /* Section: memory related */
3861 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3862 struct kvm_memory_slot *memslot,
3863 const struct kvm_userspace_memory_region *mem,
3864 enum kvm_mr_change change)
3865 {
3866 /* A few sanity checks. We can have memory slots which have to be
3867 located/ended at a segment boundary (1MB). The memory in userland is
3868 ok to be fragmented into various different vmas. It is okay to mmap()
3869 and munmap() stuff in this slot after doing this call at any time */
3870
3871 if (mem->userspace_addr & 0xffffful)
3872 return -EINVAL;
3873
3874 if (mem->memory_size & 0xffffful)
3875 return -EINVAL;
3876
3877 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3878 return -EINVAL;
3879
3880 return 0;
3881 }
3882
3883 void kvm_arch_commit_memory_region(struct kvm *kvm,
3884 const struct kvm_userspace_memory_region *mem,
3885 const struct kvm_memory_slot *old,
3886 const struct kvm_memory_slot *new,
3887 enum kvm_mr_change change)
3888 {
3889 int rc;
3890
3891 /* If the basics of the memslot do not change, we do not want
3892 * to update the gmap. Every update causes several unnecessary
3893 * segment translation exceptions. This is usually handled just
3894 * fine by the normal fault handler + gmap, but it will also
3895 * cause faults on the prefix page of running guest CPUs.
3896 */
3897 if (old->userspace_addr == mem->userspace_addr &&
3898 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3899 old->npages * PAGE_SIZE == mem->memory_size)
3900 return;
3901
3902 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3903 mem->guest_phys_addr, mem->memory_size);
3904 if (rc)
3905 pr_warn("failed to commit memory region\n");
3906 return;
3907 }
3908
3909 static inline unsigned long nonhyp_mask(int i)
3910 {
3911 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3912
3913 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3914 }
3915
3916 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3917 {
3918 vcpu->valid_wakeup = false;
3919 }
3920
3921 static int __init kvm_s390_init(void)
3922 {
3923 int i;
3924
3925 if (!sclp.has_sief2) {
3926 pr_info("SIE not available\n");
3927 return -ENODEV;
3928 }
3929
3930 for (i = 0; i < 16; i++)
3931 kvm_s390_fac_list_mask[i] |=
3932 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3933
3934 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3935 }
3936
3937 static void __exit kvm_s390_exit(void)
3938 {
3939 kvm_exit();
3940 }
3941
3942 module_init(kvm_s390_init);
3943 module_exit(kvm_s390_exit);
3944
3945 /*
3946 * Enable autoloading of the kvm module.
3947 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3948 * since x86 takes a different approach.
3949 */
3950 #include <linux/miscdevice.h>
3951 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3952 MODULE_ALIAS("devname:kvm");