arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include "kvm-s390.h"
  49 #include "gaccess.h"
  50
  51 #define CREATE_TRACE_POINTS
  52 #include "trace.h"
  53 #include "trace-s390.h"
  54
  55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  56 #define LOCAL_IRQS 32
  57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  59
  60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  62
  63 struct kvm_stats_debugfs_item debugfs_entries[] = {
  64         { "userspace_handled", VCPU_STAT(exit_userspace) },
  65         { "exit_null", VCPU_STAT(exit_null) },
  66         { "exit_validity", VCPU_STAT(exit_validity) },
  67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  68         { "exit_external_request", VCPU_STAT(exit_external_request) },
  69         { "exit_io_request", VCPU_STAT(exit_io_request) },
  70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  71         { "exit_instruction", VCPU_STAT(exit_instruction) },
  72         { "exit_pei", VCPU_STAT(exit_pei) },
  73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  79         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  80         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  81         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  82         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  83         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  84         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  85         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  86         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  87         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  88         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  89         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  90         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  91         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  92         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  93         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  94         { "deliver_program", VCPU_STAT(deliver_program) },
  95         { "deliver_io", VCPU_STAT(deliver_io) },
  96         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  97         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  98         { "inject_ckc", VCPU_STAT(inject_ckc) },
  99         { "inject_cputm", VCPU_STAT(inject_cputm) },
 100         { "inject_external_call", VCPU_STAT(inject_external_call) },
 101         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 102         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 103         { "inject_io", VM_STAT(inject_io) },
 104         { "inject_mchk", VCPU_STAT(inject_mchk) },
 105         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 106         { "inject_program", VCPU_STAT(inject_program) },
 107         { "inject_restart", VCPU_STAT(inject_restart) },
 108         { "inject_service_signal", VM_STAT(inject_service_signal) },
 109         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 110         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 111         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 112         { "inject_virtio", VM_STAT(inject_virtio) },
 113         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 114         { "instruction_gs", VCPU_STAT(instruction_gs) },
 115         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 116         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 117         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 118         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 119         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 120         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 121         { "instruction_sck", VCPU_STAT(instruction_sck) },
 122         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 123         { "instruction_spx", VCPU_STAT(instruction_spx) },
 124         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 125         { "instruction_stap", VCPU_STAT(instruction_stap) },
 126         { "instruction_iske", VCPU_STAT(instruction_iske) },
 127         { "instruction_ri", VCPU_STAT(instruction_ri) },
 128         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 129         { "instruction_sske", VCPU_STAT(instruction_sske) },
 130         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 131         { "instruction_essa", VCPU_STAT(instruction_essa) },
 132         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 133         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 134         { "instruction_tb", VCPU_STAT(instruction_tb) },
 135         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 136         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 137         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 138         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 139         { "instruction_sie", VCPU_STAT(instruction_sie) },
 140         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 141         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 142         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 143         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 144         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 145         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 146         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 147         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 148         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 149         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 150         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 151         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 152         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 153         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 154         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 155         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 156         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 157         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 158         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 159         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
 160         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 161         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 162         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 163         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 164         { NULL }
 165 };
 166
 167 struct kvm_s390_tod_clock_ext {
 168         __u8 epoch_idx;
 169         __u64 tod;
 170         __u8 reserved[7];
 171 } __packed;
 172
 173 /* allow nested virtualization in KVM (if enabled by user space) */
 174 static int nested;
 175 module_param(nested, int, S_IRUGO);
 176 MODULE_PARM_DESC(nested, "Nested virtualization support");
 177
 178 /* allow 1m huge page guest backing, if !nested */
 179 static int hpage;
 180 module_param(hpage, int, 0444);
 181 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 182
 183 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 184 static u8 halt_poll_max_steal = 10;
 185 module_param(halt_poll_max_steal, byte, 0644);
 186 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 187
 188 /* if set to true, the GISA will be initialized and used if available */
 189 static bool use_gisa  = true;
 190 module_param(use_gisa, bool, 0644);
 191 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 192
 193 /*
 194  * For now we handle at most 16 double words as this is what the s390 base
 195  * kernel handles and stores in the prefix page. If we ever need to go beyond
 196  * this, this requires changes to code, but the external uapi can stay.
 197  */
 198 #define SIZE_INTERNAL 16
 199
 200 /*
 201  * Base feature mask that defines default mask for facilities. Consists of the
 202  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 203  */
 204 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 205 /*
 206  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 207  * and defines the facilities that can be enabled via a cpu model.
 208  */
 209 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 210
 211 static unsigned long kvm_s390_fac_size(void)
 212 {
 213         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 214         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 215         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 216                 sizeof(S390_lowcore.stfle_fac_list));
 217
 218         return SIZE_INTERNAL;
 219 }
 220
 221 /* available cpu features supported by kvm */
 222 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 223 /* available subfunctions indicated via query / "test bit" */
 224 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 225
 226 static struct gmap_notifier gmap_notifier;
 227 static struct gmap_notifier vsie_gmap_notifier;
 228 debug_info_t *kvm_s390_dbf;
 229 debug_info_t *kvm_s390_dbf_uv;
 230
 231 /* Section: not file related */
 232 int kvm_arch_hardware_enable(void)
 233 {
 234         /* every s390 is virtualization enabled ;-) */
 235         return 0;
 236 }
 237
 238 int kvm_arch_check_processor_compat(void *opaque)
 239 {
 240         return 0;
 241 }
 242
 243 /* forward declarations */
 244 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 245                               unsigned long end);
 246 static int sca_switch_to_extended(struct kvm *kvm);
 247
 248 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 249 {
 250         u8 delta_idx = 0;
 251
 252         /*
 253          * The TOD jumps by delta, we have to compensate this by adding
 254          * -delta to the epoch.
 255          */
 256         delta = -delta;
 257
 258         /* sign-extension - we're adding to signed values below */
 259         if ((s64)delta < 0)
 260                 delta_idx = -1;
 261
 262         scb->epoch += delta;
 263         if (scb->ecd & ECD_MEF) {
 264                 scb->epdx += delta_idx;
 265                 if (scb->epoch < delta)
 266                         scb->epdx += 1;
 267         }
 268 }
 269
 270 /*
 271  * This callback is executed during stop_machine(). All CPUs are therefore
 272  * temporarily stopped. In order not to change guest behavior, we have to
 273  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 274  * so a CPU won't be stopped while calculating with the epoch.
 275  */
 276 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 277                           void *v)
 278 {
 279         struct kvm *kvm;
 280         struct kvm_vcpu *vcpu;
 281         int i;
 282         unsigned long long *delta = v;
 283
 284         list_for_each_entry(kvm, &vm_list, vm_list) {
 285                 kvm_for_each_vcpu(i, vcpu, kvm) {
 286                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 287                         if (i == 0) {
 288                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 289                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 290                         }
 291                         if (vcpu->arch.cputm_enabled)
 292                                 vcpu->arch.cputm_start += *delta;
 293                         if (vcpu->arch.vsie_block)
 294                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 295                                                    *delta);
 296                 }
 297         }
 298         return NOTIFY_OK;
 299 }
 300
 301 static struct notifier_block kvm_clock_notifier = {
 302         .notifier_call = kvm_clock_sync,
 303 };
 304
 305 int kvm_arch_hardware_setup(void *opaque)
 306 {
 307         gmap_notifier.notifier_call = kvm_gmap_notifier;
 308         gmap_register_pte_notifier(&gmap_notifier);
 309         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 310         gmap_register_pte_notifier(&vsie_gmap_notifier);
 311         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 312                                        &kvm_clock_notifier);
 313         return 0;
 314 }
 315
 316 void kvm_arch_hardware_unsetup(void)
 317 {
 318         gmap_unregister_pte_notifier(&gmap_notifier);
 319         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 320         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 321                                          &kvm_clock_notifier);
 322 }
 323
 324 static void allow_cpu_feat(unsigned long nr)
 325 {
 326         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 327 }
 328
 329 static inline int plo_test_bit(unsigned char nr)
 330 {
 331         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 332         int cc;
 333
 334         asm volatile(
 335                 /* Parameter registers are ignored for "test bit" */
 336                 "       plo     0,0,0,0(0)\n"
 337                 "       ipm     %0\n"
 338                 "       srl     %0,28\n"
 339                 : "=d" (cc)
 340                 : "d" (r0)
 341                 : "cc");
 342         return cc == 0;
 343 }
 344
 345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 346 {
 347         register unsigned long r0 asm("0") = 0; /* query function */
 348         register unsigned long r1 asm("1") = (unsigned long) query;
 349
 350         asm volatile(
 351                 /* Parameter regs are ignored */
 352                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 353                 :
 354                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 355                 : "cc", "memory");
 356 }
 357
 358 #define INSN_SORTL 0xb938
 359 #define INSN_DFLTCC 0xb939
 360
 361 static void kvm_s390_cpu_feat_init(void)
 362 {
 363         int i;
 364
 365         for (i = 0; i < 256; ++i) {
 366                 if (plo_test_bit(i))
 367                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 368         }
 369
 370         if (test_facility(28)) /* TOD-clock steering */
 371                 ptff(kvm_s390_available_subfunc.ptff,
 372                      sizeof(kvm_s390_available_subfunc.ptff),
 373                      PTFF_QAF);
 374
 375         if (test_facility(17)) { /* MSA */
 376                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 377                               kvm_s390_available_subfunc.kmac);
 378                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 379                               kvm_s390_available_subfunc.kmc);
 380                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 381                               kvm_s390_available_subfunc.km);
 382                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 383                               kvm_s390_available_subfunc.kimd);
 384                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 385                               kvm_s390_available_subfunc.klmd);
 386         }
 387         if (test_facility(76)) /* MSA3 */
 388                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 389                               kvm_s390_available_subfunc.pckmo);
 390         if (test_facility(77)) { /* MSA4 */
 391                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 392                               kvm_s390_available_subfunc.kmctr);
 393                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 394                               kvm_s390_available_subfunc.kmf);
 395                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 396                               kvm_s390_available_subfunc.kmo);
 397                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 398                               kvm_s390_available_subfunc.pcc);
 399         }
 400         if (test_facility(57)) /* MSA5 */
 401                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 402                               kvm_s390_available_subfunc.ppno);
 403
 404         if (test_facility(146)) /* MSA8 */
 405                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 406                               kvm_s390_available_subfunc.kma);
 407
 408         if (test_facility(155)) /* MSA9 */
 409                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 410                               kvm_s390_available_subfunc.kdsa);
 411
 412         if (test_facility(150)) /* SORTL */
 413                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 414
 415         if (test_facility(151)) /* DFLTCC */
 416                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 417
 418         if (MACHINE_HAS_ESOP)
 419                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 420         /*
 421          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 422          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 423          */
 424         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 425             !test_facility(3) || !nested)
 426                 return;
 427         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 428         if (sclp.has_64bscao)
 429                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 430         if (sclp.has_siif)
 431                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 432         if (sclp.has_gpere)
 433                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 434         if (sclp.has_gsls)
 435                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 436         if (sclp.has_ib)
 437                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 438         if (sclp.has_cei)
 439                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 440         if (sclp.has_ibs)
 441                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 442         if (sclp.has_kss)
 443                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 444         /*
 445          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 446          * all skey handling functions read/set the skey from the PGSTE
 447          * instead of the real storage key.
 448          *
 449          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 450          * pages being detected as preserved although they are resident.
 451          *
 452          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 453          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 454          *
 455          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 456          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 457          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 458          *
 459          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 460          * cannot easily shadow the SCA because of the ipte lock.
 461          */
 462 }
 463
 464 int kvm_arch_init(void *opaque)
 465 {
 466         int rc = -ENOMEM;
 467
 468         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 469         if (!kvm_s390_dbf)
 470                 return -ENOMEM;
 471
 472         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 473         if (!kvm_s390_dbf_uv)
 474                 goto out;
 475
 476         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 477             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 478                 goto out;
 479
 480         kvm_s390_cpu_feat_init();
 481
 482         /* Register floating interrupt controller interface. */
 483         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 484         if (rc) {
 485                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 486                 goto out;
 487         }
 488
 489         rc = kvm_s390_gib_init(GAL_ISC);
 490         if (rc)
 491                 goto out;
 492
 493         return 0;
 494
 495 out:
 496         kvm_arch_exit();
 497         return rc;
 498 }
 499
 500 void kvm_arch_exit(void)
 501 {
 502         kvm_s390_gib_destroy();
 503         debug_unregister(kvm_s390_dbf);
 504         debug_unregister(kvm_s390_dbf_uv);
 505 }
 506
 507 /* Section: device related */
 508 long kvm_arch_dev_ioctl(struct file *filp,
 509                         unsigned int ioctl, unsigned long arg)
 510 {
 511         if (ioctl == KVM_S390_ENABLE_SIE)
 512                 return s390_enable_sie();
 513         return -EINVAL;
 514 }
 515
 516 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 517 {
 518         int r;
 519
 520         switch (ext) {
 521         case KVM_CAP_S390_PSW:
 522         case KVM_CAP_S390_GMAP:
 523         case KVM_CAP_SYNC_MMU:
 524 #ifdef CONFIG_KVM_S390_UCONTROL
 525         case KVM_CAP_S390_UCONTROL:
 526 #endif
 527         case KVM_CAP_ASYNC_PF:
 528         case KVM_CAP_SYNC_REGS:
 529         case KVM_CAP_ONE_REG:
 530         case KVM_CAP_ENABLE_CAP:
 531         case KVM_CAP_S390_CSS_SUPPORT:
 532         case KVM_CAP_IOEVENTFD:
 533         case KVM_CAP_DEVICE_CTRL:
 534         case KVM_CAP_S390_IRQCHIP:
 535         case KVM_CAP_VM_ATTRIBUTES:
 536         case KVM_CAP_MP_STATE:
 537         case KVM_CAP_IMMEDIATE_EXIT:
 538         case KVM_CAP_S390_INJECT_IRQ:
 539         case KVM_CAP_S390_USER_SIGP:
 540         case KVM_CAP_S390_USER_STSI:
 541         case KVM_CAP_S390_SKEYS:
 542         case KVM_CAP_S390_IRQ_STATE:
 543         case KVM_CAP_S390_USER_INSTR0:
 544         case KVM_CAP_S390_CMMA_MIGRATION:
 545         case KVM_CAP_S390_AIS:
 546         case KVM_CAP_S390_AIS_MIGRATION:
 547         case KVM_CAP_S390_VCPU_RESETS:
 548                 r = 1;
 549                 break;
 550         case KVM_CAP_S390_HPAGE_1M:
 551                 r = 0;
 552                 if (hpage && !kvm_is_ucontrol(kvm))
 553                         r = 1;
 554                 break;
 555         case KVM_CAP_S390_MEM_OP:
 556                 r = MEM_OP_MAX_SIZE;
 557                 break;
 558         case KVM_CAP_NR_VCPUS:
 559         case KVM_CAP_MAX_VCPUS:
 560         case KVM_CAP_MAX_VCPU_ID:
 561                 r = KVM_S390_BSCA_CPU_SLOTS;
 562                 if (!kvm_s390_use_sca_entries())
 563                         r = KVM_MAX_VCPUS;
 564                 else if (sclp.has_esca && sclp.has_64bscao)
 565                         r = KVM_S390_ESCA_CPU_SLOTS;
 566                 break;
 567         case KVM_CAP_S390_COW:
 568                 r = MACHINE_HAS_ESOP;
 569                 break;
 570         case KVM_CAP_S390_VECTOR_REGISTERS:
 571                 r = MACHINE_HAS_VX;
 572                 break;
 573         case KVM_CAP_S390_RI:
 574                 r = test_facility(64);
 575                 break;
 576         case KVM_CAP_S390_GS:
 577                 r = test_facility(133);
 578                 break;
 579         case KVM_CAP_S390_BPB:
 580                 r = test_facility(82);
 581                 break;
 582         case KVM_CAP_S390_PROTECTED:
 583                 r = is_prot_virt_host();
 584                 break;
 585         default:
 586                 r = 0;
 587         }
 588         return r;
 589 }
 590
 591 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 592 {
 593         int i;
 594         gfn_t cur_gfn, last_gfn;
 595         unsigned long gaddr, vmaddr;
 596         struct gmap *gmap = kvm->arch.gmap;
 597         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 598
 599         /* Loop over all guest segments */
 600         cur_gfn = memslot->base_gfn;
 601         last_gfn = memslot->base_gfn + memslot->npages;
 602         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 603                 gaddr = gfn_to_gpa(cur_gfn);
 604                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 605                 if (kvm_is_error_hva(vmaddr))
 606                         continue;
 607
 608                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 609                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 610                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 611                         if (test_bit(i, bitmap))
 612                                 mark_page_dirty(kvm, cur_gfn + i);
 613                 }
 614
 615                 if (fatal_signal_pending(current))
 616                         return;
 617                 cond_resched();
 618         }
 619 }
 620
 621 /* Section: vm related */
 622 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 623
 624 /*
 625  * Get (and clear) the dirty memory log for a memory slot.
 626  */
 627 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 628                                struct kvm_dirty_log *log)
 629 {
 630         int r;
 631         unsigned long n;
 632         struct kvm_memory_slot *memslot;
 633         int is_dirty;
 634
 635         if (kvm_is_ucontrol(kvm))
 636                 return -EINVAL;
 637
 638         mutex_lock(&kvm->slots_lock);
 639
 640         r = -EINVAL;
 641         if (log->slot >= KVM_USER_MEM_SLOTS)
 642                 goto out;
 643
 644         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 645         if (r)
 646                 goto out;
 647
 648         /* Clear the dirty log */
 649         if (is_dirty) {
 650                 n = kvm_dirty_bitmap_bytes(memslot);
 651                 memset(memslot->dirty_bitmap, 0, n);
 652         }
 653         r = 0;
 654 out:
 655         mutex_unlock(&kvm->slots_lock);
 656         return r;
 657 }
 658
 659 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 660 {
 661         unsigned int i;
 662         struct kvm_vcpu *vcpu;
 663
 664         kvm_for_each_vcpu(i, vcpu, kvm) {
 665                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 666         }
 667 }
 668
 669 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 670 {
 671         int r;
 672
 673         if (cap->flags)
 674                 return -EINVAL;
 675
 676         switch (cap->cap) {
 677         case KVM_CAP_S390_IRQCHIP:
 678                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 679                 kvm->arch.use_irqchip = 1;
 680                 r = 0;
 681                 break;
 682         case KVM_CAP_S390_USER_SIGP:
 683                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 684                 kvm->arch.user_sigp = 1;
 685                 r = 0;
 686                 break;
 687         case KVM_CAP_S390_VECTOR_REGISTERS:
 688                 mutex_lock(&kvm->lock);
 689                 if (kvm->created_vcpus) {
 690                         r = -EBUSY;
 691                 } else if (MACHINE_HAS_VX) {
 692                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 693                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 694                         if (test_facility(134)) {
 695                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 696                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 697                         }
 698                         if (test_facility(135)) {
 699                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 700                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 701                         }
 702                         if (test_facility(148)) {
 703                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 704                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 705                         }
 706                         if (test_facility(152)) {
 707                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 708                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 709                         }
 710                         r = 0;
 711                 } else
 712                         r = -EINVAL;
 713                 mutex_unlock(&kvm->lock);
 714                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 715                          r ? "(not available)" : "(success)");
 716                 break;
 717         case KVM_CAP_S390_RI:
 718                 r = -EINVAL;
 719                 mutex_lock(&kvm->lock);
 720                 if (kvm->created_vcpus) {
 721                         r = -EBUSY;
 722                 } else if (test_facility(64)) {
 723                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 724                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 725                         r = 0;
 726                 }
 727                 mutex_unlock(&kvm->lock);
 728                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 729                          r ? "(not available)" : "(success)");
 730                 break;
 731         case KVM_CAP_S390_AIS:
 732                 mutex_lock(&kvm->lock);
 733                 if (kvm->created_vcpus) {
 734                         r = -EBUSY;
 735                 } else {
 736                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 737                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 738                         r = 0;
 739                 }
 740                 mutex_unlock(&kvm->lock);
 741                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 742                          r ? "(not available)" : "(success)");
 743                 break;
 744         case KVM_CAP_S390_GS:
 745                 r = -EINVAL;
 746                 mutex_lock(&kvm->lock);
 747                 if (kvm->created_vcpus) {
 748                         r = -EBUSY;
 749                 } else if (test_facility(133)) {
 750                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 751                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 752                         r = 0;
 753                 }
 754                 mutex_unlock(&kvm->lock);
 755                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 756                          r ? "(not available)" : "(success)");
 757                 break;
 758         case KVM_CAP_S390_HPAGE_1M:
 759                 mutex_lock(&kvm->lock);
 760                 if (kvm->created_vcpus)
 761                         r = -EBUSY;
 762                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 763                         r = -EINVAL;
 764                 else {
 765                         r = 0;
 766                         down_write(&kvm->mm->mmap_sem);
 767                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 768                         up_write(&kvm->mm->mmap_sem);
 769                         /*
 770                          * We might have to create fake 4k page
 771                          * tables. To avoid that the hardware works on
 772                          * stale PGSTEs, we emulate these instructions.
 773                          */
 774                         kvm->arch.use_skf = 0;
 775                         kvm->arch.use_pfmfi = 0;
 776                 }
 777                 mutex_unlock(&kvm->lock);
 778                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 779                          r ? "(not available)" : "(success)");
 780                 break;
 781         case KVM_CAP_S390_USER_STSI:
 782                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 783                 kvm->arch.user_stsi = 1;
 784                 r = 0;
 785                 break;
 786         case KVM_CAP_S390_USER_INSTR0:
 787                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 788                 kvm->arch.user_instr0 = 1;
 789                 icpt_operexc_on_all_vcpus(kvm);
 790                 r = 0;
 791                 break;
 792         default:
 793                 r = -EINVAL;
 794                 break;
 795         }
 796         return r;
 797 }
 798
 799 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 800 {
 801         int ret;
 802
 803         switch (attr->attr) {
 804         case KVM_S390_VM_MEM_LIMIT_SIZE:
 805                 ret = 0;
 806                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 807                          kvm->arch.mem_limit);
 808                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 809                         ret = -EFAULT;
 810                 break;
 811         default:
 812                 ret = -ENXIO;
 813                 break;
 814         }
 815         return ret;
 816 }
 817
 818 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 819 {
 820         int ret;
 821         unsigned int idx;
 822         switch (attr->attr) {
 823         case KVM_S390_VM_MEM_ENABLE_CMMA:
 824                 ret = -ENXIO;
 825                 if (!sclp.has_cmma)
 826                         break;
 827
 828                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 829                 mutex_lock(&kvm->lock);
 830                 if (kvm->created_vcpus)
 831                         ret = -EBUSY;
 832                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 833                         ret = -EINVAL;
 834                 else {
 835                         kvm->arch.use_cmma = 1;
 836                         /* Not compatible with cmma. */
 837                         kvm->arch.use_pfmfi = 0;
 838                         ret = 0;
 839                 }
 840                 mutex_unlock(&kvm->lock);
 841                 break;
 842         case KVM_S390_VM_MEM_CLR_CMMA:
 843                 ret = -ENXIO;
 844                 if (!sclp.has_cmma)
 845                         break;
 846                 ret = -EINVAL;
 847                 if (!kvm->arch.use_cmma)
 848                         break;
 849
 850                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 851                 mutex_lock(&kvm->lock);
 852                 idx = srcu_read_lock(&kvm->srcu);
 853                 s390_reset_cmma(kvm->arch.gmap->mm);
 854                 srcu_read_unlock(&kvm->srcu, idx);
 855                 mutex_unlock(&kvm->lock);
 856                 ret = 0;
 857                 break;
 858         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 859                 unsigned long new_limit;
 860
 861                 if (kvm_is_ucontrol(kvm))
 862                         return -EINVAL;
 863
 864                 if (get_user(new_limit, (u64 __user *)attr->addr))
 865                         return -EFAULT;
 866
 867                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 868                     new_limit > kvm->arch.mem_limit)
 869                         return -E2BIG;
 870
 871                 if (!new_limit)
 872                         return -EINVAL;
 873
 874                 /* gmap_create takes last usable address */
 875                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 876                         new_limit -= 1;
 877
 878                 ret = -EBUSY;
 879                 mutex_lock(&kvm->lock);
 880                 if (!kvm->created_vcpus) {
 881                         /* gmap_create will round the limit up */
 882                         struct gmap *new = gmap_create(current->mm, new_limit);
 883
 884                         if (!new) {
 885                                 ret = -ENOMEM;
 886                         } else {
 887                                 gmap_remove(kvm->arch.gmap);
 888                                 new->private = kvm;
 889                                 kvm->arch.gmap = new;
 890                                 ret = 0;
 891                         }
 892                 }
 893                 mutex_unlock(&kvm->lock);
 894                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 895                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 896                          (void *) kvm->arch.gmap->asce);
 897                 break;
 898         }
 899         default:
 900                 ret = -ENXIO;
 901                 break;
 902         }
 903         return ret;
 904 }
 905
 906 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 907
 908 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 909 {
 910         struct kvm_vcpu *vcpu;
 911         int i;
 912
 913         kvm_s390_vcpu_block_all(kvm);
 914
 915         kvm_for_each_vcpu(i, vcpu, kvm) {
 916                 kvm_s390_vcpu_crypto_setup(vcpu);
 917                 /* recreate the shadow crycb by leaving the VSIE handler */
 918                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 919         }
 920
 921         kvm_s390_vcpu_unblock_all(kvm);
 922 }
 923
 924 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 925 {
 926         mutex_lock(&kvm->lock);
 927         switch (attr->attr) {
 928         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 929                 if (!test_kvm_facility(kvm, 76)) {
 930                         mutex_unlock(&kvm->lock);
 931                         return -EINVAL;
 932                 }
 933                 get_random_bytes(
 934                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 935                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 936                 kvm->arch.crypto.aes_kw = 1;
 937                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 938                 break;
 939         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 940                 if (!test_kvm_facility(kvm, 76)) {
 941                         mutex_unlock(&kvm->lock);
 942                         return -EINVAL;
 943                 }
 944                 get_random_bytes(
 945                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 946                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 947                 kvm->arch.crypto.dea_kw = 1;
 948                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 949                 break;
 950         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 951                 if (!test_kvm_facility(kvm, 76)) {
 952                         mutex_unlock(&kvm->lock);
 953                         return -EINVAL;
 954                 }
 955                 kvm->arch.crypto.aes_kw = 0;
 956                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 957                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 958                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 959                 break;
 960         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 961                 if (!test_kvm_facility(kvm, 76)) {
 962                         mutex_unlock(&kvm->lock);
 963                         return -EINVAL;
 964                 }
 965                 kvm->arch.crypto.dea_kw = 0;
 966                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 967                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 968                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 969                 break;
 970         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 971                 if (!ap_instructions_available()) {
 972                         mutex_unlock(&kvm->lock);
 973                         return -EOPNOTSUPP;
 974                 }
 975                 kvm->arch.crypto.apie = 1;
 976                 break;
 977         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 978                 if (!ap_instructions_available()) {
 979                         mutex_unlock(&kvm->lock);
 980                         return -EOPNOTSUPP;
 981                 }
 982                 kvm->arch.crypto.apie = 0;
 983                 break;
 984         default:
 985                 mutex_unlock(&kvm->lock);
 986                 return -ENXIO;
 987         }
 988
 989         kvm_s390_vcpu_crypto_reset_all(kvm);
 990         mutex_unlock(&kvm->lock);
 991         return 0;
 992 }
 993
 994 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 995 {
 996         int cx;
 997         struct kvm_vcpu *vcpu;
 998
 999         kvm_for_each_vcpu(cx, vcpu, kvm)
1000                 kvm_s390_sync_request(req, vcpu);
1001 }
1002
1003 /*
1004  * Must be called with kvm->srcu held to avoid races on memslots, and with
1005  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1006  */
1007 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1008 {
1009         struct kvm_memory_slot *ms;
1010         struct kvm_memslots *slots;
1011         unsigned long ram_pages = 0;
1012         int slotnr;
1013
1014         /* migration mode already enabled */
1015         if (kvm->arch.migration_mode)
1016                 return 0;
1017         slots = kvm_memslots(kvm);
1018         if (!slots || !slots->used_slots)
1019                 return -EINVAL;
1020
1021         if (!kvm->arch.use_cmma) {
1022                 kvm->arch.migration_mode = 1;
1023                 return 0;
1024         }
1025         /* mark all the pages in active slots as dirty */
1026         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1027                 ms = slots->memslots + slotnr;
1028                 if (!ms->dirty_bitmap)
1029                         return -EINVAL;
1030                 /*
1031                  * The second half of the bitmap is only used on x86,
1032                  * and would be wasted otherwise, so we put it to good
1033                  * use here to keep track of the state of the storage
1034                  * attributes.
1035                  */
1036                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1037                 ram_pages += ms->npages;
1038         }
1039         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1040         kvm->arch.migration_mode = 1;
1041         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1042         return 0;
1043 }
1044
1045 /*
1046  * Must be called with kvm->slots_lock to avoid races with ourselves and
1047  * kvm_s390_vm_start_migration.
1048  */
1049 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1050 {
1051         /* migration mode already disabled */
1052         if (!kvm->arch.migration_mode)
1053                 return 0;
1054         kvm->arch.migration_mode = 0;
1055         if (kvm->arch.use_cmma)
1056                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1057         return 0;
1058 }
1059
1060 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1061                                      struct kvm_device_attr *attr)
1062 {
1063         int res = -ENXIO;
1064
1065         mutex_lock(&kvm->slots_lock);
1066         switch (attr->attr) {
1067         case KVM_S390_VM_MIGRATION_START:
1068                 res = kvm_s390_vm_start_migration(kvm);
1069                 break;
1070         case KVM_S390_VM_MIGRATION_STOP:
1071                 res = kvm_s390_vm_stop_migration(kvm);
1072                 break;
1073         default:
1074                 break;
1075         }
1076         mutex_unlock(&kvm->slots_lock);
1077
1078         return res;
1079 }
1080
1081 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1082                                      struct kvm_device_attr *attr)
1083 {
1084         u64 mig = kvm->arch.migration_mode;
1085
1086         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1087                 return -ENXIO;
1088
1089         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1090                 return -EFAULT;
1091         return 0;
1092 }
1093
1094 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1095 {
1096         struct kvm_s390_vm_tod_clock gtod;
1097
1098         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1099                 return -EFAULT;
1100
1101         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1102                 return -EINVAL;
1103         kvm_s390_set_tod_clock(kvm, &gtod);
1104
1105         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1106                 gtod.epoch_idx, gtod.tod);
1107
1108         return 0;
1109 }
1110
1111 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1112 {
1113         u8 gtod_high;
1114
1115         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1116                                            sizeof(gtod_high)))
1117                 return -EFAULT;
1118
1119         if (gtod_high != 0)
1120                 return -EINVAL;
1121         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1122
1123         return 0;
1124 }
1125
1126 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1127 {
1128         struct kvm_s390_vm_tod_clock gtod = { 0 };
1129
1130         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1131                            sizeof(gtod.tod)))
1132                 return -EFAULT;
1133
1134         kvm_s390_set_tod_clock(kvm, &gtod);
1135         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1136         return 0;
1137 }
1138
1139 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141         int ret;
1142
1143         if (attr->flags)
1144                 return -EINVAL;
1145
1146         switch (attr->attr) {
1147         case KVM_S390_VM_TOD_EXT:
1148                 ret = kvm_s390_set_tod_ext(kvm, attr);
1149                 break;
1150         case KVM_S390_VM_TOD_HIGH:
1151                 ret = kvm_s390_set_tod_high(kvm, attr);
1152                 break;
1153         case KVM_S390_VM_TOD_LOW:
1154                 ret = kvm_s390_set_tod_low(kvm, attr);
1155                 break;
1156         default:
1157                 ret = -ENXIO;
1158                 break;
1159         }
1160         return ret;
1161 }
1162
1163 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1164                                    struct kvm_s390_vm_tod_clock *gtod)
1165 {
1166         struct kvm_s390_tod_clock_ext htod;
1167
1168         preempt_disable();
1169
1170         get_tod_clock_ext((char *)&htod);
1171
1172         gtod->tod = htod.tod + kvm->arch.epoch;
1173         gtod->epoch_idx = 0;
1174         if (test_kvm_facility(kvm, 139)) {
1175                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1176                 if (gtod->tod < htod.tod)
1177                         gtod->epoch_idx += 1;
1178         }
1179
1180         preempt_enable();
1181 }
1182
1183 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1184 {
1185         struct kvm_s390_vm_tod_clock gtod;
1186
1187         memset(&gtod, 0, sizeof(gtod));
1188         kvm_s390_get_tod_clock(kvm, &gtod);
1189         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1190                 return -EFAULT;
1191
1192         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1193                 gtod.epoch_idx, gtod.tod);
1194         return 0;
1195 }
1196
1197 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1198 {
1199         u8 gtod_high = 0;
1200
1201         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1202                                          sizeof(gtod_high)))
1203                 return -EFAULT;
1204         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1205
1206         return 0;
1207 }
1208
1209 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1210 {
1211         u64 gtod;
1212
1213         gtod = kvm_s390_get_tod_clock_fast(kvm);
1214         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1215                 return -EFAULT;
1216         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1217
1218         return 0;
1219 }
1220
1221 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1222 {
1223         int ret;
1224
1225         if (attr->flags)
1226                 return -EINVAL;
1227
1228         switch (attr->attr) {
1229         case KVM_S390_VM_TOD_EXT:
1230                 ret = kvm_s390_get_tod_ext(kvm, attr);
1231                 break;
1232         case KVM_S390_VM_TOD_HIGH:
1233                 ret = kvm_s390_get_tod_high(kvm, attr);
1234                 break;
1235         case KVM_S390_VM_TOD_LOW:
1236                 ret = kvm_s390_get_tod_low(kvm, attr);
1237                 break;
1238         default:
1239                 ret = -ENXIO;
1240                 break;
1241         }
1242         return ret;
1243 }
1244
1245 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1246 {
1247         struct kvm_s390_vm_cpu_processor *proc;
1248         u16 lowest_ibc, unblocked_ibc;
1249         int ret = 0;
1250
1251         mutex_lock(&kvm->lock);
1252         if (kvm->created_vcpus) {
1253                 ret = -EBUSY;
1254                 goto out;
1255         }
1256         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1257         if (!proc) {
1258                 ret = -ENOMEM;
1259                 goto out;
1260         }
1261         if (!copy_from_user(proc, (void __user *)attr->addr,
1262                             sizeof(*proc))) {
1263                 kvm->arch.model.cpuid = proc->cpuid;
1264                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1265                 unblocked_ibc = sclp.ibc & 0xfff;
1266                 if (lowest_ibc && proc->ibc) {
1267                         if (proc->ibc > unblocked_ibc)
1268                                 kvm->arch.model.ibc = unblocked_ibc;
1269                         else if (proc->ibc < lowest_ibc)
1270                                 kvm->arch.model.ibc = lowest_ibc;
1271                         else
1272                                 kvm->arch.model.ibc = proc->ibc;
1273                 }
1274                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1275                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1276                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1277                          kvm->arch.model.ibc,
1278                          kvm->arch.model.cpuid);
1279                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1280                          kvm->arch.model.fac_list[0],
1281                          kvm->arch.model.fac_list[1],
1282                          kvm->arch.model.fac_list[2]);
1283         } else
1284                 ret = -EFAULT;
1285         kfree(proc);
1286 out:
1287         mutex_unlock(&kvm->lock);
1288         return ret;
1289 }
1290
1291 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1292                                        struct kvm_device_attr *attr)
1293 {
1294         struct kvm_s390_vm_cpu_feat data;
1295
1296         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1297                 return -EFAULT;
1298         if (!bitmap_subset((unsigned long *) data.feat,
1299                            kvm_s390_available_cpu_feat,
1300                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1301                 return -EINVAL;
1302
1303         mutex_lock(&kvm->lock);
1304         if (kvm->created_vcpus) {
1305                 mutex_unlock(&kvm->lock);
1306                 return -EBUSY;
1307         }
1308         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1309                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1310         mutex_unlock(&kvm->lock);
1311         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1312                          data.feat[0],
1313                          data.feat[1],
1314                          data.feat[2]);
1315         return 0;
1316 }
1317
1318 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1319                                           struct kvm_device_attr *attr)
1320 {
1321         mutex_lock(&kvm->lock);
1322         if (kvm->created_vcpus) {
1323                 mutex_unlock(&kvm->lock);
1324                 return -EBUSY;
1325         }
1326
1327         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1328                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1329                 mutex_unlock(&kvm->lock);
1330                 return -EFAULT;
1331         }
1332         mutex_unlock(&kvm->lock);
1333
1334         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1335                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1339         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1342         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1345         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1348         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1351         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1354         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1357         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1360         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1363         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1366         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1375         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1378         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1381         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1386         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1391
1392         return 0;
1393 }
1394
1395 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1396 {
1397         int ret = -ENXIO;
1398
1399         switch (attr->attr) {
1400         case KVM_S390_VM_CPU_PROCESSOR:
1401                 ret = kvm_s390_set_processor(kvm, attr);
1402                 break;
1403         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1404                 ret = kvm_s390_set_processor_feat(kvm, attr);
1405                 break;
1406         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1407                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1408                 break;
1409         }
1410         return ret;
1411 }
1412
1413 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1414 {
1415         struct kvm_s390_vm_cpu_processor *proc;
1416         int ret = 0;
1417
1418         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1419         if (!proc) {
1420                 ret = -ENOMEM;
1421                 goto out;
1422         }
1423         proc->cpuid = kvm->arch.model.cpuid;
1424         proc->ibc = kvm->arch.model.ibc;
1425         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1426                S390_ARCH_FAC_LIST_SIZE_BYTE);
1427         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1428                  kvm->arch.model.ibc,
1429                  kvm->arch.model.cpuid);
1430         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1431                  kvm->arch.model.fac_list[0],
1432                  kvm->arch.model.fac_list[1],
1433                  kvm->arch.model.fac_list[2]);
1434         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1435                 ret = -EFAULT;
1436         kfree(proc);
1437 out:
1438         return ret;
1439 }
1440
1441 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1442 {
1443         struct kvm_s390_vm_cpu_machine *mach;
1444         int ret = 0;
1445
1446         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1447         if (!mach) {
1448                 ret = -ENOMEM;
1449                 goto out;
1450         }
1451         get_cpu_id((struct cpuid *) &mach->cpuid);
1452         mach->ibc = sclp.ibc;
1453         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1454                S390_ARCH_FAC_LIST_SIZE_BYTE);
1455         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1456                sizeof(S390_lowcore.stfle_fac_list));
1457         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1458                  kvm->arch.model.ibc,
1459                  kvm->arch.model.cpuid);
1460         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1461                  mach->fac_mask[0],
1462                  mach->fac_mask[1],
1463                  mach->fac_mask[2]);
1464         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1465                  mach->fac_list[0],
1466                  mach->fac_list[1],
1467                  mach->fac_list[2]);
1468         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1469                 ret = -EFAULT;
1470         kfree(mach);
1471 out:
1472         return ret;
1473 }
1474
1475 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1476                                        struct kvm_device_attr *attr)
1477 {
1478         struct kvm_s390_vm_cpu_feat data;
1479
1480         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1481                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1482         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1483                 return -EFAULT;
1484         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1485                          data.feat[0],
1486                          data.feat[1],
1487                          data.feat[2]);
1488         return 0;
1489 }
1490
1491 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1492                                      struct kvm_device_attr *attr)
1493 {
1494         struct kvm_s390_vm_cpu_feat data;
1495
1496         bitmap_copy((unsigned long *) data.feat,
1497                     kvm_s390_available_cpu_feat,
1498                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1499         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1500                 return -EFAULT;
1501         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1502                          data.feat[0],
1503                          data.feat[1],
1504                          data.feat[2]);
1505         return 0;
1506 }
1507
1508 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1509                                           struct kvm_device_attr *attr)
1510 {
1511         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1512             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1513                 return -EFAULT;
1514
1515         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1516                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1520         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1523         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1526         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1529         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1532         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1535         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1538         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1541         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1544         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1547         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1556         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1559         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1562         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1567         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1572
1573         return 0;
1574 }
1575
1576 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1577                                         struct kvm_device_attr *attr)
1578 {
1579         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1580             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1581                 return -EFAULT;
1582
1583         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1585                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1587                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1588         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1589                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1590                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1591         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1593                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1594         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1596                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1597         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1598                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1599                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1600         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1603         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1604                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1605                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1606         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1607                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1608                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1609         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1610                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1611                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1612         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1613                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1615         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1624         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1627         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1628                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1630         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1631                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1633                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1634                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1635         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1636                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1637                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1639                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1640
1641         return 0;
1642 }
1643
1644 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1645 {
1646         int ret = -ENXIO;
1647
1648         switch (attr->attr) {
1649         case KVM_S390_VM_CPU_PROCESSOR:
1650                 ret = kvm_s390_get_processor(kvm, attr);
1651                 break;
1652         case KVM_S390_VM_CPU_MACHINE:
1653                 ret = kvm_s390_get_machine(kvm, attr);
1654                 break;
1655         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1656                 ret = kvm_s390_get_processor_feat(kvm, attr);
1657                 break;
1658         case KVM_S390_VM_CPU_MACHINE_FEAT:
1659                 ret = kvm_s390_get_machine_feat(kvm, attr);
1660                 break;
1661         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1662                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1663                 break;
1664         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1665                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1666                 break;
1667         }
1668         return ret;
1669 }
1670
1671 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673         int ret;
1674
1675         switch (attr->group) {
1676         case KVM_S390_VM_MEM_CTRL:
1677                 ret = kvm_s390_set_mem_control(kvm, attr);
1678                 break;
1679         case KVM_S390_VM_TOD:
1680                 ret = kvm_s390_set_tod(kvm, attr);
1681                 break;
1682         case KVM_S390_VM_CPU_MODEL:
1683                 ret = kvm_s390_set_cpu_model(kvm, attr);
1684                 break;
1685         case KVM_S390_VM_CRYPTO:
1686                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1687                 break;
1688         case KVM_S390_VM_MIGRATION:
1689                 ret = kvm_s390_vm_set_migration(kvm, attr);
1690                 break;
1691         default:
1692                 ret = -ENXIO;
1693                 break;
1694         }
1695
1696         return ret;
1697 }
1698
1699 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1700 {
1701         int ret;
1702
1703         switch (attr->group) {
1704         case KVM_S390_VM_MEM_CTRL:
1705                 ret = kvm_s390_get_mem_control(kvm, attr);
1706                 break;
1707         case KVM_S390_VM_TOD:
1708                 ret = kvm_s390_get_tod(kvm, attr);
1709                 break;
1710         case KVM_S390_VM_CPU_MODEL:
1711                 ret = kvm_s390_get_cpu_model(kvm, attr);
1712                 break;
1713         case KVM_S390_VM_MIGRATION:
1714                 ret = kvm_s390_vm_get_migration(kvm, attr);
1715                 break;
1716         default:
1717                 ret = -ENXIO;
1718                 break;
1719         }
1720
1721         return ret;
1722 }
1723
1724 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1725 {
1726         int ret;
1727
1728         switch (attr->group) {
1729         case KVM_S390_VM_MEM_CTRL:
1730                 switch (attr->attr) {
1731                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1732                 case KVM_S390_VM_MEM_CLR_CMMA:
1733                         ret = sclp.has_cmma ? 0 : -ENXIO;
1734                         break;
1735                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1736                         ret = 0;
1737                         break;
1738                 default:
1739                         ret = -ENXIO;
1740                         break;
1741                 }
1742                 break;
1743         case KVM_S390_VM_TOD:
1744                 switch (attr->attr) {
1745                 case KVM_S390_VM_TOD_LOW:
1746                 case KVM_S390_VM_TOD_HIGH:
1747                         ret = 0;
1748                         break;
1749                 default:
1750                         ret = -ENXIO;
1751                         break;
1752                 }
1753                 break;
1754         case KVM_S390_VM_CPU_MODEL:
1755                 switch (attr->attr) {
1756                 case KVM_S390_VM_CPU_PROCESSOR:
1757                 case KVM_S390_VM_CPU_MACHINE:
1758                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1759                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1760                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1761                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1762                         ret = 0;
1763                         break;
1764                 default:
1765                         ret = -ENXIO;
1766                         break;
1767                 }
1768                 break;
1769         case KVM_S390_VM_CRYPTO:
1770                 switch (attr->attr) {
1771                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1772                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1773                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1774                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1775                         ret = 0;
1776                         break;
1777                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1778                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1779                         ret = ap_instructions_available() ? 0 : -ENXIO;
1780                         break;
1781                 default:
1782                         ret = -ENXIO;
1783                         break;
1784                 }
1785                 break;
1786         case KVM_S390_VM_MIGRATION:
1787                 ret = 0;
1788                 break;
1789         default:
1790                 ret = -ENXIO;
1791                 break;
1792         }
1793
1794         return ret;
1795 }
1796
1797 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1798 {
1799         uint8_t *keys;
1800         uint64_t hva;
1801         int srcu_idx, i, r = 0;
1802
1803         if (args->flags != 0)
1804                 return -EINVAL;
1805
1806         /* Is this guest using storage keys? */
1807         if (!mm_uses_skeys(current->mm))
1808                 return KVM_S390_GET_SKEYS_NONE;
1809
1810         /* Enforce sane limit on memory allocation */
1811         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1812                 return -EINVAL;
1813
1814         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1815         if (!keys)
1816                 return -ENOMEM;
1817
1818         down_read(&current->mm->mmap_sem);
1819         srcu_idx = srcu_read_lock(&kvm->srcu);
1820         for (i = 0; i < args->count; i++) {
1821                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1822                 if (kvm_is_error_hva(hva)) {
1823                         r = -EFAULT;
1824                         break;
1825                 }
1826
1827                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1828                 if (r)
1829                         break;
1830         }
1831         srcu_read_unlock(&kvm->srcu, srcu_idx);
1832         up_read(&current->mm->mmap_sem);
1833
1834         if (!r) {
1835                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1836                                  sizeof(uint8_t) * args->count);
1837                 if (r)
1838                         r = -EFAULT;
1839         }
1840
1841         kvfree(keys);
1842         return r;
1843 }
1844
1845 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1846 {
1847         uint8_t *keys;
1848         uint64_t hva;
1849         int srcu_idx, i, r = 0;
1850         bool unlocked;
1851
1852         if (args->flags != 0)
1853                 return -EINVAL;
1854
1855         /* Enforce sane limit on memory allocation */
1856         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1857                 return -EINVAL;
1858
1859         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1860         if (!keys)
1861                 return -ENOMEM;
1862
1863         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1864                            sizeof(uint8_t) * args->count);
1865         if (r) {
1866                 r = -EFAULT;
1867                 goto out;
1868         }
1869
1870         /* Enable storage key handling for the guest */
1871         r = s390_enable_skey();
1872         if (r)
1873                 goto out;
1874
1875         i = 0;
1876         down_read(&current->mm->mmap_sem);
1877         srcu_idx = srcu_read_lock(&kvm->srcu);
1878         while (i < args->count) {
1879                 unlocked = false;
1880                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1881                 if (kvm_is_error_hva(hva)) {
1882                         r = -EFAULT;
1883                         break;
1884                 }
1885
1886                 /* Lowest order bit is reserved */
1887                 if (keys[i] & 0x01) {
1888                         r = -EINVAL;
1889                         break;
1890                 }
1891
1892                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1893                 if (r) {
1894                         r = fixup_user_fault(current, current->mm, hva,
1895                                              FAULT_FLAG_WRITE, &unlocked);
1896                         if (r)
1897                                 break;
1898                 }
1899                 if (!r)
1900                         i++;
1901         }
1902         srcu_read_unlock(&kvm->srcu, srcu_idx);
1903         up_read(&current->mm->mmap_sem);
1904 out:
1905         kvfree(keys);
1906         return r;
1907 }
1908
1909 /*
1910  * Base address and length must be sent at the start of each block, therefore
1911  * it's cheaper to send some clean data, as long as it's less than the size of
1912  * two longs.
1913  */
1914 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1915 /* for consistency */
1916 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1917
1918 /*
1919  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1920  * address falls in a hole. In that case the index of one of the memslots
1921  * bordering the hole is returned.
1922  */
1923 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1924 {
1925         int start = 0, end = slots->used_slots;
1926         int slot = atomic_read(&slots->lru_slot);
1927         struct kvm_memory_slot *memslots = slots->memslots;
1928
1929         if (gfn >= memslots[slot].base_gfn &&
1930             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1931                 return slot;
1932
1933         while (start < end) {
1934                 slot = start + (end - start) / 2;
1935
1936                 if (gfn >= memslots[slot].base_gfn)
1937                         end = slot;
1938                 else
1939                         start = slot + 1;
1940         }
1941
1942         if (start >= slots->used_slots)
1943                 return slots->used_slots - 1;
1944
1945         if (gfn >= memslots[start].base_gfn &&
1946             gfn < memslots[start].base_gfn + memslots[start].npages) {
1947                 atomic_set(&slots->lru_slot, start);
1948         }
1949
1950         return start;
1951 }
1952
1953 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1954                               u8 *res, unsigned long bufsize)
1955 {
1956         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1957
1958         args->count = 0;
1959         while (args->count < bufsize) {
1960                 hva = gfn_to_hva(kvm, cur_gfn);
1961                 /*
1962                  * We return an error if the first value was invalid, but we
1963                  * return successfully if at least one value was copied.
1964                  */
1965                 if (kvm_is_error_hva(hva))
1966                         return args->count ? 0 : -EFAULT;
1967                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1968                         pgstev = 0;
1969                 res[args->count++] = (pgstev >> 24) & 0x43;
1970                 cur_gfn++;
1971         }
1972
1973         return 0;
1974 }
1975
1976 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1977                                               unsigned long cur_gfn)
1978 {
1979         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1980         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1981         unsigned long ofs = cur_gfn - ms->base_gfn;
1982
1983         if (ms->base_gfn + ms->npages <= cur_gfn) {
1984                 slotidx--;
1985                 /* If we are above the highest slot, wrap around */
1986                 if (slotidx < 0)
1987                         slotidx = slots->used_slots - 1;
1988
1989                 ms = slots->memslots + slotidx;
1990                 ofs = 0;
1991         }
1992         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1993         while ((slotidx > 0) && (ofs >= ms->npages)) {
1994                 slotidx--;
1995                 ms = slots->memslots + slotidx;
1996                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1997         }
1998         return ms->base_gfn + ofs;
1999 }
2000
2001 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2002                              u8 *res, unsigned long bufsize)
2003 {
2004         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2005         struct kvm_memslots *slots = kvm_memslots(kvm);
2006         struct kvm_memory_slot *ms;
2007
2008         if (unlikely(!slots->used_slots))
2009                 return 0;
2010
2011         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2012         ms = gfn_to_memslot(kvm, cur_gfn);
2013         args->count = 0;
2014         args->start_gfn = cur_gfn;
2015         if (!ms)
2016                 return 0;
2017         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2018         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2019
2020         while (args->count < bufsize) {
2021                 hva = gfn_to_hva(kvm, cur_gfn);
2022                 if (kvm_is_error_hva(hva))
2023                         return 0;
2024                 /* Decrement only if we actually flipped the bit to 0 */
2025                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2026                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2027                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2028                         pgstev = 0;
2029                 /* Save the value */
2030                 res[args->count++] = (pgstev >> 24) & 0x43;
2031                 /* If the next bit is too far away, stop. */
2032                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2033                         return 0;
2034                 /* If we reached the previous "next", find the next one */
2035                 if (cur_gfn == next_gfn)
2036                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2037                 /* Reached the end of memory or of the buffer, stop */
2038                 if ((next_gfn >= mem_end) ||
2039                     (next_gfn - args->start_gfn >= bufsize))
2040                         return 0;
2041                 cur_gfn++;
2042                 /* Reached the end of the current memslot, take the next one. */
2043                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2044                         ms = gfn_to_memslot(kvm, cur_gfn);
2045                         if (!ms)
2046                                 return 0;
2047                 }
2048         }
2049         return 0;
2050 }
2051
2052 /*
2053  * This function searches for the next page with dirty CMMA attributes, and
2054  * saves the attributes in the buffer up to either the end of the buffer or
2055  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2056  * no trailing clean bytes are saved.
2057  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2058  * output buffer will indicate 0 as length.
2059  */
2060 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2061                                   struct kvm_s390_cmma_log *args)
2062 {
2063         unsigned long bufsize;
2064         int srcu_idx, peek, ret;
2065         u8 *values;
2066
2067         if (!kvm->arch.use_cmma)
2068                 return -ENXIO;
2069         /* Invalid/unsupported flags were specified */
2070         if (args->flags & ~KVM_S390_CMMA_PEEK)
2071                 return -EINVAL;
2072         /* Migration mode query, and we are not doing a migration */
2073         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2074         if (!peek && !kvm->arch.migration_mode)
2075                 return -EINVAL;
2076         /* CMMA is disabled or was not used, or the buffer has length zero */
2077         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2078         if (!bufsize || !kvm->mm->context.uses_cmm) {
2079                 memset(args, 0, sizeof(*args));
2080                 return 0;
2081         }
2082         /* We are not peeking, and there are no dirty pages */
2083         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2084                 memset(args, 0, sizeof(*args));
2085                 return 0;
2086         }
2087
2088         values = vmalloc(bufsize);
2089         if (!values)
2090                 return -ENOMEM;
2091
2092         down_read(&kvm->mm->mmap_sem);
2093         srcu_idx = srcu_read_lock(&kvm->srcu);
2094         if (peek)
2095                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2096         else
2097                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2098         srcu_read_unlock(&kvm->srcu, srcu_idx);
2099         up_read(&kvm->mm->mmap_sem);
2100
2101         if (kvm->arch.migration_mode)
2102                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2103         else
2104                 args->remaining = 0;
2105
2106         if (copy_to_user((void __user *)args->values, values, args->count))
2107                 ret = -EFAULT;
2108
2109         vfree(values);
2110         return ret;
2111 }
2112
2113 /*
2114  * This function sets the CMMA attributes for the given pages. If the input
2115  * buffer has zero length, no action is taken, otherwise the attributes are
2116  * set and the mm->context.uses_cmm flag is set.
2117  */
2118 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2119                                   const struct kvm_s390_cmma_log *args)
2120 {
2121         unsigned long hva, mask, pgstev, i;
2122         uint8_t *bits;
2123         int srcu_idx, r = 0;
2124
2125         mask = args->mask;
2126
2127         if (!kvm->arch.use_cmma)
2128                 return -ENXIO;
2129         /* invalid/unsupported flags */
2130         if (args->flags != 0)
2131                 return -EINVAL;
2132         /* Enforce sane limit on memory allocation */
2133         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2134                 return -EINVAL;
2135         /* Nothing to do */
2136         if (args->count == 0)
2137                 return 0;
2138
2139         bits = vmalloc(array_size(sizeof(*bits), args->count));
2140         if (!bits)
2141                 return -ENOMEM;
2142
2143         r = copy_from_user(bits, (void __user *)args->values, args->count);
2144         if (r) {
2145                 r = -EFAULT;
2146                 goto out;
2147         }
2148
2149         down_read(&kvm->mm->mmap_sem);
2150         srcu_idx = srcu_read_lock(&kvm->srcu);
2151         for (i = 0; i < args->count; i++) {
2152                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2153                 if (kvm_is_error_hva(hva)) {
2154                         r = -EFAULT;
2155                         break;
2156                 }
2157
2158                 pgstev = bits[i];
2159                 pgstev = pgstev << 24;
2160                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2161                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2162         }
2163         srcu_read_unlock(&kvm->srcu, srcu_idx);
2164         up_read(&kvm->mm->mmap_sem);
2165
2166         if (!kvm->mm->context.uses_cmm) {
2167                 down_write(&kvm->mm->mmap_sem);
2168                 kvm->mm->context.uses_cmm = 1;
2169                 up_write(&kvm->mm->mmap_sem);
2170         }
2171 out:
2172         vfree(bits);
2173         return r;
2174 }
2175
2176 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2177 {
2178         struct kvm_vcpu *vcpu;
2179         u16 rc, rrc;
2180         int ret = 0;
2181         int i;
2182
2183         /*
2184          * We ignore failures and try to destroy as many CPUs as possible.
2185          * At the same time we must not free the assigned resources when
2186          * this fails, as the ultravisor has still access to that memory.
2187          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2188          * behind.
2189          * We want to return the first failure rc and rrc, though.
2190          */
2191         kvm_for_each_vcpu(i, vcpu, kvm) {
2192                 mutex_lock(&vcpu->mutex);
2193                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2194                         *rcp = rc;
2195                         *rrcp = rrc;
2196                         ret = -EIO;
2197                 }
2198                 mutex_unlock(&vcpu->mutex);
2199         }
2200         return ret;
2201 }
2202
2203 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2204 {
2205         int i, r = 0;
2206         u16 dummy;
2207
2208         struct kvm_vcpu *vcpu;
2209
2210         kvm_for_each_vcpu(i, vcpu, kvm) {
2211                 mutex_lock(&vcpu->mutex);
2212                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2213                 mutex_unlock(&vcpu->mutex);
2214                 if (r)
2215                         break;
2216         }
2217         if (r)
2218                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2219         return r;
2220 }
2221
2222 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2223 {
2224         int r = 0;
2225         u16 dummy;
2226         void __user *argp = (void __user *)cmd->data;
2227
2228         switch (cmd->cmd) {
2229         case KVM_PV_ENABLE: {
2230                 r = -EINVAL;
2231                 if (kvm_s390_pv_is_protected(kvm))
2232                         break;
2233
2234                 /*
2235                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2236                  *  esca, we need no cleanup in the error cases below
2237                  */
2238                 r = sca_switch_to_extended(kvm);
2239                 if (r)
2240                         break;
2241
2242                 down_write(&current->mm->mmap_sem);
2243                 r = gmap_mark_unmergeable();
2244                 up_write(&current->mm->mmap_sem);
2245                 if (r)
2246                         break;
2247
2248                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2249                 if (r)
2250                         break;
2251
2252                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2253                 if (r)
2254                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2255
2256                 /* we need to block service interrupts from now on */
2257                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2258                 break;
2259         }
2260         case KVM_PV_DISABLE: {
2261                 r = -EINVAL;
2262                 if (!kvm_s390_pv_is_protected(kvm))
2263                         break;
2264
2265                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2266                 /*
2267                  * If a CPU could not be destroyed, destroy VM will also fail.
2268                  * There is no point in trying to destroy it. Instead return
2269                  * the rc and rrc from the first CPU that failed destroying.
2270                  */
2271                 if (r)
2272                         break;
2273                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2274
2275                 /* no need to block service interrupts any more */
2276                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2277                 break;
2278         }
2279         case KVM_PV_SET_SEC_PARMS: {
2280                 struct kvm_s390_pv_sec_parm parms = {};
2281                 void *hdr;
2282
2283                 r = -EINVAL;
2284                 if (!kvm_s390_pv_is_protected(kvm))
2285                         break;
2286
2287                 r = -EFAULT;
2288                 if (copy_from_user(&parms, argp, sizeof(parms)))
2289                         break;
2290
2291                 /* Currently restricted to 8KB */
2292                 r = -EINVAL;
2293                 if (parms.length > PAGE_SIZE * 2)
2294                         break;
2295
2296                 r = -ENOMEM;
2297                 hdr = vmalloc(parms.length);
2298                 if (!hdr)
2299                         break;
2300
2301                 r = -EFAULT;
2302                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2303                                     parms.length))
2304                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2305                                                       &cmd->rc, &cmd->rrc);
2306
2307                 vfree(hdr);
2308                 break;
2309         }
2310         case KVM_PV_UNPACK: {
2311                 struct kvm_s390_pv_unp unp = {};
2312
2313                 r = -EINVAL;
2314                 if (!kvm_s390_pv_is_protected(kvm))
2315                         break;
2316
2317                 r = -EFAULT;
2318                 if (copy_from_user(&unp, argp, sizeof(unp)))
2319                         break;
2320
2321                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2322                                        &cmd->rc, &cmd->rrc);
2323                 break;
2324         }
2325         case KVM_PV_VERIFY: {
2326                 r = -EINVAL;
2327                 if (!kvm_s390_pv_is_protected(kvm))
2328                         break;
2329
2330                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2331                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2332                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2333                              cmd->rrc);
2334                 break;
2335         }
2336         case KVM_PV_PREP_RESET: {
2337                 r = -EINVAL;
2338                 if (!kvm_s390_pv_is_protected(kvm))
2339                         break;
2340
2341                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2342                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2343                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2344                              cmd->rc, cmd->rrc);
2345                 break;
2346         }
2347         case KVM_PV_UNSHARE_ALL: {
2348                 r = -EINVAL;
2349                 if (!kvm_s390_pv_is_protected(kvm))
2350                         break;
2351
2352                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2353                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2354                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2355                              cmd->rc, cmd->rrc);
2356                 break;
2357         }
2358         default:
2359                 r = -ENOTTY;
2360         }
2361         return r;
2362 }
2363
2364 long kvm_arch_vm_ioctl(struct file *filp,
2365                        unsigned int ioctl, unsigned long arg)
2366 {
2367         struct kvm *kvm = filp->private_data;
2368         void __user *argp = (void __user *)arg;
2369         struct kvm_device_attr attr;
2370         int r;
2371
2372         switch (ioctl) {
2373         case KVM_S390_INTERRUPT: {
2374                 struct kvm_s390_interrupt s390int;
2375
2376                 r = -EFAULT;
2377                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2378                         break;
2379                 r = kvm_s390_inject_vm(kvm, &s390int);
2380                 break;
2381         }
2382         case KVM_CREATE_IRQCHIP: {
2383                 struct kvm_irq_routing_entry routing;
2384
2385                 r = -EINVAL;
2386                 if (kvm->arch.use_irqchip) {
2387                         /* Set up dummy routing. */
2388                         memset(&routing, 0, sizeof(routing));
2389                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2390                 }
2391                 break;
2392         }
2393         case KVM_SET_DEVICE_ATTR: {
2394                 r = -EFAULT;
2395                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2396                         break;
2397                 r = kvm_s390_vm_set_attr(kvm, &attr);
2398                 break;
2399         }
2400         case KVM_GET_DEVICE_ATTR: {
2401                 r = -EFAULT;
2402                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2403                         break;
2404                 r = kvm_s390_vm_get_attr(kvm, &attr);
2405                 break;
2406         }
2407         case KVM_HAS_DEVICE_ATTR: {
2408                 r = -EFAULT;
2409                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2410                         break;
2411                 r = kvm_s390_vm_has_attr(kvm, &attr);
2412                 break;
2413         }
2414         case KVM_S390_GET_SKEYS: {
2415                 struct kvm_s390_skeys args;
2416
2417                 r = -EFAULT;
2418                 if (copy_from_user(&args, argp,
2419                                    sizeof(struct kvm_s390_skeys)))
2420                         break;
2421                 r = kvm_s390_get_skeys(kvm, &args);
2422                 break;
2423         }
2424         case KVM_S390_SET_SKEYS: {
2425                 struct kvm_s390_skeys args;
2426
2427                 r = -EFAULT;
2428                 if (copy_from_user(&args, argp,
2429                                    sizeof(struct kvm_s390_skeys)))
2430                         break;
2431                 r = kvm_s390_set_skeys(kvm, &args);
2432                 break;
2433         }
2434         case KVM_S390_GET_CMMA_BITS: {
2435                 struct kvm_s390_cmma_log args;
2436
2437                 r = -EFAULT;
2438                 if (copy_from_user(&args, argp, sizeof(args)))
2439                         break;
2440                 mutex_lock(&kvm->slots_lock);
2441                 r = kvm_s390_get_cmma_bits(kvm, &args);
2442                 mutex_unlock(&kvm->slots_lock);
2443                 if (!r) {
2444                         r = copy_to_user(argp, &args, sizeof(args));
2445                         if (r)
2446                                 r = -EFAULT;
2447                 }
2448                 break;
2449         }
2450         case KVM_S390_SET_CMMA_BITS: {
2451                 struct kvm_s390_cmma_log args;
2452
2453                 r = -EFAULT;
2454                 if (copy_from_user(&args, argp, sizeof(args)))
2455                         break;
2456                 mutex_lock(&kvm->slots_lock);
2457                 r = kvm_s390_set_cmma_bits(kvm, &args);
2458                 mutex_unlock(&kvm->slots_lock);
2459                 break;
2460         }
2461         case KVM_S390_PV_COMMAND: {
2462                 struct kvm_pv_cmd args;
2463
2464                 /* protvirt means user sigp */
2465                 kvm->arch.user_cpu_state_ctrl = 1;
2466                 r = 0;
2467                 if (!is_prot_virt_host()) {
2468                         r = -EINVAL;
2469                         break;
2470                 }
2471                 if (copy_from_user(&args, argp, sizeof(args))) {
2472                         r = -EFAULT;
2473                         break;
2474                 }
2475                 if (args.flags) {
2476                         r = -EINVAL;
2477                         break;
2478                 }
2479                 mutex_lock(&kvm->lock);
2480                 r = kvm_s390_handle_pv(kvm, &args);
2481                 mutex_unlock(&kvm->lock);
2482                 if (copy_to_user(argp, &args, sizeof(args))) {
2483                         r = -EFAULT;
2484                         break;
2485                 }
2486                 break;
2487         }
2488         default:
2489                 r = -ENOTTY;
2490         }
2491
2492         return r;
2493 }
2494
2495 static int kvm_s390_apxa_installed(void)
2496 {
2497         struct ap_config_info info;
2498
2499         if (ap_instructions_available()) {
2500                 if (ap_qci(&info) == 0)
2501                         return info.apxa;
2502         }
2503
2504         return 0;
2505 }
2506
2507 /*
2508  * The format of the crypto control block (CRYCB) is specified in the 3 low
2509  * order bits of the CRYCB designation (CRYCBD) field as follows:
2510  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2511  *           AP extended addressing (APXA) facility are installed.
2512  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2513  * Format 2: Both the APXA and MSAX3 facilities are installed
2514  */
2515 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2516 {
2517         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2518
2519         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2520         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2521
2522         /* Check whether MSAX3 is installed */
2523         if (!test_kvm_facility(kvm, 76))
2524                 return;
2525
2526         if (kvm_s390_apxa_installed())
2527                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2528         else
2529                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2530 }
2531
2532 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2533                                unsigned long *aqm, unsigned long *adm)
2534 {
2535         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2536
2537         mutex_lock(&kvm->lock);
2538         kvm_s390_vcpu_block_all(kvm);
2539
2540         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2541         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2542                 memcpy(crycb->apcb1.apm, apm, 32);
2543                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2544                          apm[0], apm[1], apm[2], apm[3]);
2545                 memcpy(crycb->apcb1.aqm, aqm, 32);
2546                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2547                          aqm[0], aqm[1], aqm[2], aqm[3]);
2548                 memcpy(crycb->apcb1.adm, adm, 32);
2549                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2550                          adm[0], adm[1], adm[2], adm[3]);
2551                 break;
2552         case CRYCB_FORMAT1:
2553         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2554                 memcpy(crycb->apcb0.apm, apm, 8);
2555                 memcpy(crycb->apcb0.aqm, aqm, 2);
2556                 memcpy(crycb->apcb0.adm, adm, 2);
2557                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2558                          apm[0], *((unsigned short *)aqm),
2559                          *((unsigned short *)adm));
2560                 break;
2561         default:        /* Can not happen */
2562                 break;
2563         }
2564
2565         /* recreate the shadow crycb for each vcpu */
2566         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2567         kvm_s390_vcpu_unblock_all(kvm);
2568         mutex_unlock(&kvm->lock);
2569 }
2570 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2571
2572 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2573 {
2574         mutex_lock(&kvm->lock);
2575         kvm_s390_vcpu_block_all(kvm);
2576
2577         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2578                sizeof(kvm->arch.crypto.crycb->apcb0));
2579         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2580                sizeof(kvm->arch.crypto.crycb->apcb1));
2581
2582         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2583         /* recreate the shadow crycb for each vcpu */
2584         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2585         kvm_s390_vcpu_unblock_all(kvm);
2586         mutex_unlock(&kvm->lock);
2587 }
2588 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2589
2590 static u64 kvm_s390_get_initial_cpuid(void)
2591 {
2592         struct cpuid cpuid;
2593
2594         get_cpu_id(&cpuid);
2595         cpuid.version = 0xff;
2596         return *((u64 *) &cpuid);
2597 }
2598
2599 static void kvm_s390_crypto_init(struct kvm *kvm)
2600 {
2601         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2602         kvm_s390_set_crycb_format(kvm);
2603
2604         if (!test_kvm_facility(kvm, 76))
2605                 return;
2606
2607         /* Enable AES/DEA protected key functions by default */
2608         kvm->arch.crypto.aes_kw = 1;
2609         kvm->arch.crypto.dea_kw = 1;
2610         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2611                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2612         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2613                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2614 }
2615
2616 static void sca_dispose(struct kvm *kvm)
2617 {
2618         if (kvm->arch.use_esca)
2619                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2620         else
2621                 free_page((unsigned long)(kvm->arch.sca));
2622         kvm->arch.sca = NULL;
2623 }
2624
2625 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2626 {
2627         gfp_t alloc_flags = GFP_KERNEL;
2628         int i, rc;
2629         char debug_name[16];
2630         static unsigned long sca_offset;
2631
2632         rc = -EINVAL;
2633 #ifdef CONFIG_KVM_S390_UCONTROL
2634         if (type & ~KVM_VM_S390_UCONTROL)
2635                 goto out_err;
2636         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2637                 goto out_err;
2638 #else
2639         if (type)
2640                 goto out_err;
2641 #endif
2642
2643         rc = s390_enable_sie();
2644         if (rc)
2645                 goto out_err;
2646
2647         rc = -ENOMEM;
2648
2649         if (!sclp.has_64bscao)
2650                 alloc_flags |= GFP_DMA;
2651         rwlock_init(&kvm->arch.sca_lock);
2652         /* start with basic SCA */
2653         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2654         if (!kvm->arch.sca)
2655                 goto out_err;
2656         mutex_lock(&kvm_lock);
2657         sca_offset += 16;
2658         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2659                 sca_offset = 0;
2660         kvm->arch.sca = (struct bsca_block *)
2661                         ((char *) kvm->arch.sca + sca_offset);
2662         mutex_unlock(&kvm_lock);
2663
2664         sprintf(debug_name, "kvm-%u", current->pid);
2665
2666         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2667         if (!kvm->arch.dbf)
2668                 goto out_err;
2669
2670         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2671         kvm->arch.sie_page2 =
2672              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2673         if (!kvm->arch.sie_page2)
2674                 goto out_err;
2675
2676         kvm->arch.sie_page2->kvm = kvm;
2677         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2678
2679         for (i = 0; i < kvm_s390_fac_size(); i++) {
2680                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2681                                               (kvm_s390_fac_base[i] |
2682                                                kvm_s390_fac_ext[i]);
2683                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2684                                               kvm_s390_fac_base[i];
2685         }
2686         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2687
2688         /* we are always in czam mode - even on pre z14 machines */
2689         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2690         set_kvm_facility(kvm->arch.model.fac_list, 138);
2691         /* we emulate STHYI in kvm */
2692         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2693         set_kvm_facility(kvm->arch.model.fac_list, 74);
2694         if (MACHINE_HAS_TLB_GUEST) {
2695                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2696                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2697         }
2698
2699         if (css_general_characteristics.aiv && test_facility(65))
2700                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2701
2702         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2703         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2704
2705         kvm_s390_crypto_init(kvm);
2706
2707         mutex_init(&kvm->arch.float_int.ais_lock);
2708         spin_lock_init(&kvm->arch.float_int.lock);
2709         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2710                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2711         init_waitqueue_head(&kvm->arch.ipte_wq);
2712         mutex_init(&kvm->arch.ipte_mutex);
2713
2714         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2715         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2716
2717         if (type & KVM_VM_S390_UCONTROL) {
2718                 kvm->arch.gmap = NULL;
2719                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2720         } else {
2721                 if (sclp.hamax == U64_MAX)
2722                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2723                 else
2724                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2725                                                     sclp.hamax + 1);
2726                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2727                 if (!kvm->arch.gmap)
2728                         goto out_err;
2729                 kvm->arch.gmap->private = kvm;
2730                 kvm->arch.gmap->pfault_enabled = 0;
2731         }
2732
2733         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2734         kvm->arch.use_skf = sclp.has_skey;
2735         spin_lock_init(&kvm->arch.start_stop_lock);
2736         kvm_s390_vsie_init(kvm);
2737         if (use_gisa)
2738                 kvm_s390_gisa_init(kvm);
2739         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2740
2741         return 0;
2742 out_err:
2743         free_page((unsigned long)kvm->arch.sie_page2);
2744         debug_unregister(kvm->arch.dbf);
2745         sca_dispose(kvm);
2746         KVM_EVENT(3, "creation of vm failed: %d", rc);
2747         return rc;
2748 }
2749
2750 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2751 {
2752         u16 rc, rrc;
2753
2754         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2755         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2756         kvm_s390_clear_local_irqs(vcpu);
2757         kvm_clear_async_pf_completion_queue(vcpu);
2758         if (!kvm_is_ucontrol(vcpu->kvm))
2759                 sca_del_vcpu(vcpu);
2760
2761         if (kvm_is_ucontrol(vcpu->kvm))
2762                 gmap_remove(vcpu->arch.gmap);
2763
2764         if (vcpu->kvm->arch.use_cmma)
2765                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2766         /* We can not hold the vcpu mutex here, we are already dying */
2767         if (kvm_s390_pv_cpu_get_handle(vcpu))
2768                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2769         free_page((unsigned long)(vcpu->arch.sie_block));
2770 }
2771
2772 static void kvm_free_vcpus(struct kvm *kvm)
2773 {
2774         unsigned int i;
2775         struct kvm_vcpu *vcpu;
2776
2777         kvm_for_each_vcpu(i, vcpu, kvm)
2778                 kvm_vcpu_destroy(vcpu);
2779
2780         mutex_lock(&kvm->lock);
2781         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2782                 kvm->vcpus[i] = NULL;
2783
2784         atomic_set(&kvm->online_vcpus, 0);
2785         mutex_unlock(&kvm->lock);
2786 }
2787
2788 void kvm_arch_destroy_vm(struct kvm *kvm)
2789 {
2790         u16 rc, rrc;
2791
2792         kvm_free_vcpus(kvm);
2793         sca_dispose(kvm);
2794         kvm_s390_gisa_destroy(kvm);
2795         /*
2796          * We are already at the end of life and kvm->lock is not taken.
2797          * This is ok as the file descriptor is closed by now and nobody
2798          * can mess with the pv state. To avoid lockdep_assert_held from
2799          * complaining we do not use kvm_s390_pv_is_protected.
2800          */
2801         if (kvm_s390_pv_get_handle(kvm))
2802                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2803         debug_unregister(kvm->arch.dbf);
2804         free_page((unsigned long)kvm->arch.sie_page2);
2805         if (!kvm_is_ucontrol(kvm))
2806                 gmap_remove(kvm->arch.gmap);
2807         kvm_s390_destroy_adapters(kvm);
2808         kvm_s390_clear_float_irqs(kvm);
2809         kvm_s390_vsie_destroy(kvm);
2810         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2811 }
2812
2813 /* Section: vcpu related */
2814 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2815 {
2816         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2817         if (!vcpu->arch.gmap)
2818                 return -ENOMEM;
2819         vcpu->arch.gmap->private = vcpu->kvm;
2820
2821         return 0;
2822 }
2823
2824 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2825 {
2826         if (!kvm_s390_use_sca_entries())
2827                 return;
2828         read_lock(&vcpu->kvm->arch.sca_lock);
2829         if (vcpu->kvm->arch.use_esca) {
2830                 struct esca_block *sca = vcpu->kvm->arch.sca;
2831
2832                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2833                 sca->cpu[vcpu->vcpu_id].sda = 0;
2834         } else {
2835                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2836
2837                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2838                 sca->cpu[vcpu->vcpu_id].sda = 0;
2839         }
2840         read_unlock(&vcpu->kvm->arch.sca_lock);
2841 }
2842
2843 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2844 {
2845         if (!kvm_s390_use_sca_entries()) {
2846                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2847
2848                 /* we still need the basic sca for the ipte control */
2849                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2850                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2851                 return;
2852         }
2853         read_lock(&vcpu->kvm->arch.sca_lock);
2854         if (vcpu->kvm->arch.use_esca) {
2855                 struct esca_block *sca = vcpu->kvm->arch.sca;
2856
2857                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2858                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2859                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2860                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2861                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2862         } else {
2863                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2864
2865                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2866                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2867                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2868                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2869         }
2870         read_unlock(&vcpu->kvm->arch.sca_lock);
2871 }
2872
2873 /* Basic SCA to Extended SCA data copy routines */
2874 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2875 {
2876         d->sda = s->sda;
2877         d->sigp_ctrl.c = s->sigp_ctrl.c;
2878         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2879 }
2880
2881 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2882 {
2883         int i;
2884
2885         d->ipte_control = s->ipte_control;
2886         d->mcn[0] = s->mcn;
2887         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2888                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2889 }
2890
2891 static int sca_switch_to_extended(struct kvm *kvm)
2892 {
2893         struct bsca_block *old_sca = kvm->arch.sca;
2894         struct esca_block *new_sca;
2895         struct kvm_vcpu *vcpu;
2896         unsigned int vcpu_idx;
2897         u32 scaol, scaoh;
2898
2899         if (kvm->arch.use_esca)
2900                 return 0;
2901
2902         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2903         if (!new_sca)
2904                 return -ENOMEM;
2905
2906         scaoh = (u32)((u64)(new_sca) >> 32);
2907         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2908
2909         kvm_s390_vcpu_block_all(kvm);
2910         write_lock(&kvm->arch.sca_lock);
2911
2912         sca_copy_b_to_e(new_sca, old_sca);
2913
2914         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2915                 vcpu->arch.sie_block->scaoh = scaoh;
2916                 vcpu->arch.sie_block->scaol = scaol;
2917                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2918         }
2919         kvm->arch.sca = new_sca;
2920         kvm->arch.use_esca = 1;
2921
2922         write_unlock(&kvm->arch.sca_lock);
2923         kvm_s390_vcpu_unblock_all(kvm);
2924
2925         free_page((unsigned long)old_sca);
2926
2927         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2928                  old_sca, kvm->arch.sca);
2929         return 0;
2930 }
2931
2932 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2933 {
2934         int rc;
2935
2936         if (!kvm_s390_use_sca_entries()) {
2937                 if (id < KVM_MAX_VCPUS)
2938                         return true;
2939                 return false;
2940         }
2941         if (id < KVM_S390_BSCA_CPU_SLOTS)
2942                 return true;
2943         if (!sclp.has_esca || !sclp.has_64bscao)
2944                 return false;
2945
2946         mutex_lock(&kvm->lock);
2947         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2948         mutex_unlock(&kvm->lock);
2949
2950         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2951 }
2952
2953 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2954 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2955 {
2956         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2957         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2958         vcpu->arch.cputm_start = get_tod_clock_fast();
2959         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2960 }
2961
2962 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2963 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2964 {
2965         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2966         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2967         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2968         vcpu->arch.cputm_start = 0;
2969         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2970 }
2971
2972 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2973 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2974 {
2975         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2976         vcpu->arch.cputm_enabled = true;
2977         __start_cpu_timer_accounting(vcpu);
2978 }
2979
2980 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2981 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2982 {
2983         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2984         __stop_cpu_timer_accounting(vcpu);
2985         vcpu->arch.cputm_enabled = false;
2986 }
2987
2988 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2989 {
2990         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2991         __enable_cpu_timer_accounting(vcpu);
2992         preempt_enable();
2993 }
2994
2995 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2996 {
2997         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2998         __disable_cpu_timer_accounting(vcpu);
2999         preempt_enable();
3000 }
3001
3002 /* set the cpu timer - may only be called from the VCPU thread itself */
3003 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3004 {
3005         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3006         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3007         if (vcpu->arch.cputm_enabled)
3008                 vcpu->arch.cputm_start = get_tod_clock_fast();
3009         vcpu->arch.sie_block->cputm = cputm;
3010         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3011         preempt_enable();
3012 }
3013
3014 /* update and get the cpu timer - can also be called from other VCPU threads */
3015 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3016 {
3017         unsigned int seq;
3018         __u64 value;
3019
3020         if (unlikely(!vcpu->arch.cputm_enabled))
3021                 return vcpu->arch.sie_block->cputm;
3022
3023         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3024         do {
3025                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3026                 /*
3027                  * If the writer would ever execute a read in the critical
3028                  * section, e.g. in irq context, we have a deadlock.
3029                  */
3030                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3031                 value = vcpu->arch.sie_block->cputm;
3032                 /* if cputm_start is 0, accounting is being started/stopped */
3033                 if (likely(vcpu->arch.cputm_start))
3034                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3035         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3036         preempt_enable();
3037         return value;
3038 }
3039
3040 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3041 {
3042
3043         gmap_enable(vcpu->arch.enabled_gmap);
3044         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3045         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3046                 __start_cpu_timer_accounting(vcpu);
3047         vcpu->cpu = cpu;
3048 }
3049
3050 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3051 {
3052         vcpu->cpu = -1;
3053         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3054                 __stop_cpu_timer_accounting(vcpu);
3055         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3056         vcpu->arch.enabled_gmap = gmap_get_enabled();
3057         gmap_disable(vcpu->arch.enabled_gmap);
3058
3059 }
3060
3061 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3062 {
3063         mutex_lock(&vcpu->kvm->lock);
3064         preempt_disable();
3065         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3066         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3067         preempt_enable();
3068         mutex_unlock(&vcpu->kvm->lock);
3069         if (!kvm_is_ucontrol(vcpu->kvm)) {
3070                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3071                 sca_add_vcpu(vcpu);
3072         }
3073         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3074                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3075         /* make vcpu_load load the right gmap on the first trigger */
3076         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3077 }
3078
3079 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3080 {
3081         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3082             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3083                 return true;
3084         return false;
3085 }
3086
3087 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3088 {
3089         /* At least one ECC subfunction must be present */
3090         return kvm_has_pckmo_subfunc(kvm, 32) ||
3091                kvm_has_pckmo_subfunc(kvm, 33) ||
3092                kvm_has_pckmo_subfunc(kvm, 34) ||
3093                kvm_has_pckmo_subfunc(kvm, 40) ||
3094                kvm_has_pckmo_subfunc(kvm, 41);
3095
3096 }
3097
3098 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3099 {
3100         /*
3101          * If the AP instructions are not being interpreted and the MSAX3
3102          * facility is not configured for the guest, there is nothing to set up.
3103          */
3104         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3105                 return;
3106
3107         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3108         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3109         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3110         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3111
3112         if (vcpu->kvm->arch.crypto.apie)
3113                 vcpu->arch.sie_block->eca |= ECA_APIE;
3114
3115         /* Set up protected key support */
3116         if (vcpu->kvm->arch.crypto.aes_kw) {
3117                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3118                 /* ecc is also wrapped with AES key */
3119                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3120                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3121         }
3122
3123         if (vcpu->kvm->arch.crypto.dea_kw)
3124                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3125 }
3126
3127 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3128 {
3129         free_page(vcpu->arch.sie_block->cbrlo);
3130         vcpu->arch.sie_block->cbrlo = 0;
3131 }
3132
3133 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3134 {
3135         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3136         if (!vcpu->arch.sie_block->cbrlo)
3137                 return -ENOMEM;
3138         return 0;
3139 }
3140
3141 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3142 {
3143         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3144
3145         vcpu->arch.sie_block->ibc = model->ibc;
3146         if (test_kvm_facility(vcpu->kvm, 7))
3147                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3148 }
3149
3150 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3151 {
3152         int rc = 0;
3153         u16 uvrc, uvrrc;
3154
3155         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3156                                                     CPUSTAT_SM |
3157                                                     CPUSTAT_STOPPED);
3158
3159         if (test_kvm_facility(vcpu->kvm, 78))
3160                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3161         else if (test_kvm_facility(vcpu->kvm, 8))
3162                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3163
3164         kvm_s390_vcpu_setup_model(vcpu);
3165
3166         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3167         if (MACHINE_HAS_ESOP)
3168                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3169         if (test_kvm_facility(vcpu->kvm, 9))
3170                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3171         if (test_kvm_facility(vcpu->kvm, 73))
3172                 vcpu->arch.sie_block->ecb |= ECB_TE;
3173
3174         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3175                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3176         if (test_kvm_facility(vcpu->kvm, 130))
3177                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3178         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3179         if (sclp.has_cei)
3180                 vcpu->arch.sie_block->eca |= ECA_CEI;
3181         if (sclp.has_ib)
3182                 vcpu->arch.sie_block->eca |= ECA_IB;
3183         if (sclp.has_siif)
3184                 vcpu->arch.sie_block->eca |= ECA_SII;
3185         if (sclp.has_sigpif)
3186                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3187         if (test_kvm_facility(vcpu->kvm, 129)) {
3188                 vcpu->arch.sie_block->eca |= ECA_VX;
3189                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3190         }
3191         if (test_kvm_facility(vcpu->kvm, 139))
3192                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3193         if (test_kvm_facility(vcpu->kvm, 156))
3194                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3195         if (vcpu->arch.sie_block->gd) {
3196                 vcpu->arch.sie_block->eca |= ECA_AIV;
3197                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3198                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3199         }
3200         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3201                                         | SDNXC;
3202         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3203
3204         if (sclp.has_kss)
3205                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3206         else
3207                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3208
3209         if (vcpu->kvm->arch.use_cmma) {
3210                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3211                 if (rc)
3212                         return rc;
3213         }
3214         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3215         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3216
3217         vcpu->arch.sie_block->hpid = HPID_KVM;
3218
3219         kvm_s390_vcpu_crypto_setup(vcpu);
3220
3221         mutex_lock(&vcpu->kvm->lock);
3222         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3223                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3224                 if (rc)
3225                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3226         }
3227         mutex_unlock(&vcpu->kvm->lock);
3228
3229         return rc;
3230 }
3231
3232 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3233 {
3234         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3235                 return -EINVAL;
3236         return 0;
3237 }
3238
3239 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3240 {
3241         struct sie_page *sie_page;
3242         int rc;
3243
3244         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3245         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3246         if (!sie_page)
3247                 return -ENOMEM;
3248
3249         vcpu->arch.sie_block = &sie_page->sie_block;
3250         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3251
3252         /* the real guest size will always be smaller than msl */
3253         vcpu->arch.sie_block->mso = 0;
3254         vcpu->arch.sie_block->msl = sclp.hamax;
3255
3256         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3257         spin_lock_init(&vcpu->arch.local_int.lock);
3258         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3259         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3260                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3261         seqcount_init(&vcpu->arch.cputm_seqcount);
3262
3263         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3264         kvm_clear_async_pf_completion_queue(vcpu);
3265         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3266                                     KVM_SYNC_GPRS |
3267                                     KVM_SYNC_ACRS |
3268                                     KVM_SYNC_CRS |
3269                                     KVM_SYNC_ARCH0 |
3270                                     KVM_SYNC_PFAULT;
3271         kvm_s390_set_prefix(vcpu, 0);
3272         if (test_kvm_facility(vcpu->kvm, 64))
3273                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3274         if (test_kvm_facility(vcpu->kvm, 82))
3275                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3276         if (test_kvm_facility(vcpu->kvm, 133))
3277                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3278         if (test_kvm_facility(vcpu->kvm, 156))
3279                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3280         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3281          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3282          */
3283         if (MACHINE_HAS_VX)
3284                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3285         else
3286                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3287
3288         if (kvm_is_ucontrol(vcpu->kvm)) {
3289                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3290                 if (rc)
3291                         goto out_free_sie_block;
3292         }
3293
3294         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3295                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3296         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3297
3298         rc = kvm_s390_vcpu_setup(vcpu);
3299         if (rc)
3300                 goto out_ucontrol_uninit;
3301         return 0;
3302
3303 out_ucontrol_uninit:
3304         if (kvm_is_ucontrol(vcpu->kvm))
3305                 gmap_remove(vcpu->arch.gmap);
3306 out_free_sie_block:
3307         free_page((unsigned long)(vcpu->arch.sie_block));
3308         return rc;
3309 }
3310
3311 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3312 {
3313         return kvm_s390_vcpu_has_irq(vcpu, 0);
3314 }
3315
3316 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3317 {
3318         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3319 }
3320
3321 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3322 {
3323         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3324         exit_sie(vcpu);
3325 }
3326
3327 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3328 {
3329         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3330 }
3331
3332 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3333 {
3334         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3335         exit_sie(vcpu);
3336 }
3337
3338 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3339 {
3340         return atomic_read(&vcpu->arch.sie_block->prog20) &
3341                (PROG_BLOCK_SIE | PROG_REQUEST);
3342 }
3343
3344 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3345 {
3346         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3347 }
3348
3349 /*
3350  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3351  * If the CPU is not running (e.g. waiting as idle) the function will
3352  * return immediately. */
3353 void exit_sie(struct kvm_vcpu *vcpu)
3354 {
3355         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3356         kvm_s390_vsie_kick(vcpu);
3357         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3358                 cpu_relax();
3359 }
3360
3361 /* Kick a guest cpu out of SIE to process a request synchronously */
3362 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3363 {
3364         kvm_make_request(req, vcpu);
3365         kvm_s390_vcpu_request(vcpu);
3366 }
3367
3368 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3369                               unsigned long end)
3370 {
3371         struct kvm *kvm = gmap->private;
3372         struct kvm_vcpu *vcpu;
3373         unsigned long prefix;
3374         int i;
3375
3376         if (gmap_is_shadow(gmap))
3377                 return;
3378         if (start >= 1UL << 31)
3379                 /* We are only interested in prefix pages */
3380                 return;
3381         kvm_for_each_vcpu(i, vcpu, kvm) {
3382                 /* match against both prefix pages */
3383                 prefix = kvm_s390_get_prefix(vcpu);
3384                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3385                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3386                                    start, end);
3387                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3388                 }
3389         }
3390 }
3391
3392 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3393 {
3394         /* do not poll with more than halt_poll_max_steal percent of steal time */
3395         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3396             halt_poll_max_steal) {
3397                 vcpu->stat.halt_no_poll_steal++;
3398                 return true;
3399         }
3400         return false;
3401 }
3402
3403 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3404 {
3405         /* kvm common code refers to this, but never calls it */
3406         BUG();
3407         return 0;
3408 }
3409
3410 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3411                                            struct kvm_one_reg *reg)
3412 {
3413         int r = -EINVAL;
3414
3415         switch (reg->id) {
3416         case KVM_REG_S390_TODPR:
3417                 r = put_user(vcpu->arch.sie_block->todpr,
3418                              (u32 __user *)reg->addr);
3419                 break;
3420         case KVM_REG_S390_EPOCHDIFF:
3421                 r = put_user(vcpu->arch.sie_block->epoch,
3422                              (u64 __user *)reg->addr);
3423                 break;
3424         case KVM_REG_S390_CPU_TIMER:
3425                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3426                              (u64 __user *)reg->addr);
3427                 break;
3428         case KVM_REG_S390_CLOCK_COMP:
3429                 r = put_user(vcpu->arch.sie_block->ckc,
3430                              (u64 __user *)reg->addr);
3431                 break;
3432         case KVM_REG_S390_PFTOKEN:
3433                 r = put_user(vcpu->arch.pfault_token,
3434                              (u64 __user *)reg->addr);
3435                 break;
3436         case KVM_REG_S390_PFCOMPARE:
3437                 r = put_user(vcpu->arch.pfault_compare,
3438                              (u64 __user *)reg->addr);
3439                 break;
3440         case KVM_REG_S390_PFSELECT:
3441                 r = put_user(vcpu->arch.pfault_select,
3442                              (u64 __user *)reg->addr);
3443                 break;
3444         case KVM_REG_S390_PP:
3445                 r = put_user(vcpu->arch.sie_block->pp,
3446                              (u64 __user *)reg->addr);
3447                 break;
3448         case KVM_REG_S390_GBEA:
3449                 r = put_user(vcpu->arch.sie_block->gbea,
3450                              (u64 __user *)reg->addr);
3451                 break;
3452         default:
3453                 break;
3454         }
3455
3456         return r;
3457 }
3458
3459 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3460                                            struct kvm_one_reg *reg)
3461 {
3462         int r = -EINVAL;
3463         __u64 val;
3464
3465         switch (reg->id) {
3466         case KVM_REG_S390_TODPR:
3467                 r = get_user(vcpu->arch.sie_block->todpr,
3468                              (u32 __user *)reg->addr);
3469                 break;
3470         case KVM_REG_S390_EPOCHDIFF:
3471                 r = get_user(vcpu->arch.sie_block->epoch,
3472                              (u64 __user *)reg->addr);
3473                 break;
3474         case KVM_REG_S390_CPU_TIMER:
3475                 r = get_user(val, (u64 __user *)reg->addr);
3476                 if (!r)
3477                         kvm_s390_set_cpu_timer(vcpu, val);
3478                 break;
3479         case KVM_REG_S390_CLOCK_COMP:
3480                 r = get_user(vcpu->arch.sie_block->ckc,
3481                              (u64 __user *)reg->addr);
3482                 break;
3483         case KVM_REG_S390_PFTOKEN:
3484                 r = get_user(vcpu->arch.pfault_token,
3485                              (u64 __user *)reg->addr);
3486                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3487                         kvm_clear_async_pf_completion_queue(vcpu);
3488                 break;
3489         case KVM_REG_S390_PFCOMPARE:
3490                 r = get_user(vcpu->arch.pfault_compare,
3491                              (u64 __user *)reg->addr);
3492                 break;
3493         case KVM_REG_S390_PFSELECT:
3494                 r = get_user(vcpu->arch.pfault_select,
3495                              (u64 __user *)reg->addr);
3496                 break;
3497         case KVM_REG_S390_PP:
3498                 r = get_user(vcpu->arch.sie_block->pp,
3499                              (u64 __user *)reg->addr);
3500                 break;
3501         case KVM_REG_S390_GBEA:
3502                 r = get_user(vcpu->arch.sie_block->gbea,
3503                              (u64 __user *)reg->addr);
3504                 break;
3505         default:
3506                 break;
3507         }
3508
3509         return r;
3510 }
3511
3512 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3513 {
3514         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3515         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3516         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3517
3518         kvm_clear_async_pf_completion_queue(vcpu);
3519         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3520                 kvm_s390_vcpu_stop(vcpu);
3521         kvm_s390_clear_local_irqs(vcpu);
3522 }
3523
3524 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3525 {
3526         /* Initial reset is a superset of the normal reset */
3527         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3528
3529         /*
3530          * This equals initial cpu reset in pop, but we don't switch to ESA.
3531          * We do not only reset the internal data, but also ...
3532          */
3533         vcpu->arch.sie_block->gpsw.mask = 0;
3534         vcpu->arch.sie_block->gpsw.addr = 0;
3535         kvm_s390_set_prefix(vcpu, 0);
3536         kvm_s390_set_cpu_timer(vcpu, 0);
3537         vcpu->arch.sie_block->ckc = 0;
3538         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3539         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3540         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3541
3542         /* ... the data in sync regs */
3543         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3544         vcpu->run->s.regs.ckc = 0;
3545         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3546         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3547         vcpu->run->psw_addr = 0;
3548         vcpu->run->psw_mask = 0;
3549         vcpu->run->s.regs.todpr = 0;
3550         vcpu->run->s.regs.cputm = 0;
3551         vcpu->run->s.regs.ckc = 0;
3552         vcpu->run->s.regs.pp = 0;
3553         vcpu->run->s.regs.gbea = 1;
3554         vcpu->run->s.regs.fpc = 0;
3555         /*
3556          * Do not reset these registers in the protected case, as some of
3557          * them are overlayed and they are not accessible in this case
3558          * anyway.
3559          */
3560         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3561                 vcpu->arch.sie_block->gbea = 1;
3562                 vcpu->arch.sie_block->pp = 0;
3563                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3564                 vcpu->arch.sie_block->todpr = 0;
3565         }
3566 }
3567
3568 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3569 {
3570         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3571
3572         /* Clear reset is a superset of the initial reset */
3573         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3574
3575         memset(&regs->gprs, 0, sizeof(regs->gprs));
3576         memset(&regs->vrs, 0, sizeof(regs->vrs));
3577         memset(&regs->acrs, 0, sizeof(regs->acrs));
3578         memset(&regs->gscb, 0, sizeof(regs->gscb));
3579
3580         regs->etoken = 0;
3581         regs->etoken_extension = 0;
3582 }
3583
3584 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3585 {
3586         vcpu_load(vcpu);
3587         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3588         vcpu_put(vcpu);
3589         return 0;
3590 }
3591
3592 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3593 {
3594         vcpu_load(vcpu);
3595         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3596         vcpu_put(vcpu);
3597         return 0;
3598 }
3599
3600 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3601                                   struct kvm_sregs *sregs)
3602 {
3603         vcpu_load(vcpu);
3604
3605         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3606         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3607
3608         vcpu_put(vcpu);
3609         return 0;
3610 }
3611
3612 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3613                                   struct kvm_sregs *sregs)
3614 {
3615         vcpu_load(vcpu);
3616
3617         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3618         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3619
3620         vcpu_put(vcpu);
3621         return 0;
3622 }
3623
3624 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3625 {
3626         int ret = 0;
3627
3628         vcpu_load(vcpu);
3629
3630         if (test_fp_ctl(fpu->fpc)) {
3631                 ret = -EINVAL;
3632                 goto out;
3633         }
3634         vcpu->run->s.regs.fpc = fpu->fpc;
3635         if (MACHINE_HAS_VX)
3636                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3637                                  (freg_t *) fpu->fprs);
3638         else
3639                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3640
3641 out:
3642         vcpu_put(vcpu);
3643         return ret;
3644 }
3645
3646 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3647 {
3648         vcpu_load(vcpu);
3649
3650         /* make sure we have the latest values */
3651         save_fpu_regs();
3652         if (MACHINE_HAS_VX)
3653                 convert_vx_to_fp((freg_t *) fpu->fprs,
3654                                  (__vector128 *) vcpu->run->s.regs.vrs);
3655         else
3656                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3657         fpu->fpc = vcpu->run->s.regs.fpc;
3658
3659         vcpu_put(vcpu);
3660         return 0;
3661 }
3662
3663 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3664 {
3665         int rc = 0;
3666
3667         if (!is_vcpu_stopped(vcpu))
3668                 rc = -EBUSY;
3669         else {
3670                 vcpu->run->psw_mask = psw.mask;
3671                 vcpu->run->psw_addr = psw.addr;
3672         }
3673         return rc;
3674 }
3675
3676 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3677                                   struct kvm_translation *tr)
3678 {
3679         return -EINVAL; /* not implemented yet */
3680 }
3681
3682 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3683                               KVM_GUESTDBG_USE_HW_BP | \
3684                               KVM_GUESTDBG_ENABLE)
3685
3686 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3687                                         struct kvm_guest_debug *dbg)
3688 {
3689         int rc = 0;
3690
3691         vcpu_load(vcpu);
3692
3693         vcpu->guest_debug = 0;
3694         kvm_s390_clear_bp_data(vcpu);
3695
3696         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3697                 rc = -EINVAL;
3698                 goto out;
3699         }
3700         if (!sclp.has_gpere) {
3701                 rc = -EINVAL;
3702                 goto out;
3703         }
3704
3705         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3706                 vcpu->guest_debug = dbg->control;
3707                 /* enforce guest PER */
3708                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3709
3710                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3711                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3712         } else {
3713                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3714                 vcpu->arch.guestdbg.last_bp = 0;
3715         }
3716
3717         if (rc) {
3718                 vcpu->guest_debug = 0;
3719                 kvm_s390_clear_bp_data(vcpu);
3720                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3721         }
3722
3723 out:
3724         vcpu_put(vcpu);
3725         return rc;
3726 }
3727
3728 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3729                                     struct kvm_mp_state *mp_state)
3730 {
3731         int ret;
3732
3733         vcpu_load(vcpu);
3734
3735         /* CHECK_STOP and LOAD are not supported yet */
3736         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3737                                       KVM_MP_STATE_OPERATING;
3738
3739         vcpu_put(vcpu);
3740         return ret;
3741 }
3742
3743 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3744                                     struct kvm_mp_state *mp_state)
3745 {
3746         int rc = 0;
3747
3748         vcpu_load(vcpu);
3749
3750         /* user space knows about this interface - let it control the state */
3751         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3752
3753         switch (mp_state->mp_state) {
3754         case KVM_MP_STATE_STOPPED:
3755                 rc = kvm_s390_vcpu_stop(vcpu);
3756                 break;
3757         case KVM_MP_STATE_OPERATING:
3758                 rc = kvm_s390_vcpu_start(vcpu);
3759                 break;
3760         case KVM_MP_STATE_LOAD:
3761                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3762                         rc = -ENXIO;
3763                         break;
3764                 }
3765                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3766                 break;
3767         case KVM_MP_STATE_CHECK_STOP:
3768                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3769         default:
3770                 rc = -ENXIO;
3771         }
3772
3773         vcpu_put(vcpu);
3774         return rc;
3775 }
3776
3777 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3778 {
3779         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3780 }
3781
3782 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3783 {
3784 retry:
3785         kvm_s390_vcpu_request_handled(vcpu);
3786         if (!kvm_request_pending(vcpu))
3787                 return 0;
3788         /*
3789          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3790          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3791          * This ensures that the ipte instruction for this request has
3792          * already finished. We might race against a second unmapper that
3793          * wants to set the blocking bit. Lets just retry the request loop.
3794          */
3795         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3796                 int rc;
3797                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3798                                           kvm_s390_get_prefix(vcpu),
3799                                           PAGE_SIZE * 2, PROT_WRITE);
3800                 if (rc) {
3801                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3802                         return rc;
3803                 }
3804                 goto retry;
3805         }
3806
3807         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3808                 vcpu->arch.sie_block->ihcpu = 0xffff;
3809                 goto retry;
3810         }
3811
3812         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3813                 if (!ibs_enabled(vcpu)) {
3814                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3815                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3816                 }
3817                 goto retry;
3818         }
3819
3820         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3821                 if (ibs_enabled(vcpu)) {
3822                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3823                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3824                 }
3825                 goto retry;
3826         }
3827
3828         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3829                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3830                 goto retry;
3831         }
3832
3833         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3834                 /*
3835                  * Disable CMM virtualization; we will emulate the ESSA
3836                  * instruction manually, in order to provide additional
3837                  * functionalities needed for live migration.
3838                  */
3839                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3840                 goto retry;
3841         }
3842
3843         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3844                 /*
3845                  * Re-enable CMM virtualization if CMMA is available and
3846                  * CMM has been used.
3847                  */
3848                 if ((vcpu->kvm->arch.use_cmma) &&
3849                     (vcpu->kvm->mm->context.uses_cmm))
3850                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3851                 goto retry;
3852         }
3853
3854         /* nothing to do, just clear the request */
3855         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3856         /* we left the vsie handler, nothing to do, just clear the request */
3857         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3858
3859         return 0;
3860 }
3861
3862 void kvm_s390_set_tod_clock(struct kvm *kvm,
3863                             const struct kvm_s390_vm_tod_clock *gtod)
3864 {
3865         struct kvm_vcpu *vcpu;
3866         struct kvm_s390_tod_clock_ext htod;
3867         int i;
3868
3869         mutex_lock(&kvm->lock);
3870         preempt_disable();
3871
3872         get_tod_clock_ext((char *)&htod);
3873
3874         kvm->arch.epoch = gtod->tod - htod.tod;
3875         kvm->arch.epdx = 0;
3876         if (test_kvm_facility(kvm, 139)) {
3877                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3878                 if (kvm->arch.epoch > gtod->tod)
3879                         kvm->arch.epdx -= 1;
3880         }
3881
3882         kvm_s390_vcpu_block_all(kvm);
3883         kvm_for_each_vcpu(i, vcpu, kvm) {
3884                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3885                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3886         }
3887
3888         kvm_s390_vcpu_unblock_all(kvm);
3889         preempt_enable();
3890         mutex_unlock(&kvm->lock);
3891 }
3892
3893 /**
3894  * kvm_arch_fault_in_page - fault-in guest page if necessary
3895  * @vcpu: The corresponding virtual cpu
3896  * @gpa: Guest physical address
3897  * @writable: Whether the page should be writable or not
3898  *
3899  * Make sure that a guest page has been faulted-in on the host.
3900  *
3901  * Return: Zero on success, negative error code otherwise.
3902  */
3903 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3904 {
3905         return gmap_fault(vcpu->arch.gmap, gpa,
3906                           writable ? FAULT_FLAG_WRITE : 0);
3907 }
3908
3909 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3910                                       unsigned long token)
3911 {
3912         struct kvm_s390_interrupt inti;
3913         struct kvm_s390_irq irq;
3914
3915         if (start_token) {
3916                 irq.u.ext.ext_params2 = token;
3917                 irq.type = KVM_S390_INT_PFAULT_INIT;
3918                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3919         } else {
3920                 inti.type = KVM_S390_INT_PFAULT_DONE;
3921                 inti.parm64 = token;
3922                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3923         }
3924 }
3925
3926 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3927                                      struct kvm_async_pf *work)
3928 {
3929         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3930         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3931 }
3932
3933 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3934                                  struct kvm_async_pf *work)
3935 {
3936         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3937         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3938 }
3939
3940 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3941                                struct kvm_async_pf *work)
3942 {
3943         /* s390 will always inject the page directly */
3944 }
3945
3946 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3947 {
3948         /*
3949          * s390 will always inject the page directly,
3950          * but we still want check_async_completion to cleanup
3951          */
3952         return true;
3953 }
3954
3955 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3956 {
3957         hva_t hva;
3958         struct kvm_arch_async_pf arch;
3959         int rc;
3960
3961         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3962                 return 0;
3963         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3964             vcpu->arch.pfault_compare)
3965                 return 0;
3966         if (psw_extint_disabled(vcpu))
3967                 return 0;
3968         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3969                 return 0;
3970         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3971                 return 0;
3972         if (!vcpu->arch.gmap->pfault_enabled)
3973                 return 0;
3974
3975         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3976         hva += current->thread.gmap_addr & ~PAGE_MASK;
3977         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3978                 return 0;
3979
3980         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3981         return rc;
3982 }
3983
3984 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3985 {
3986         int rc, cpuflags;
3987
3988         /*
3989          * On s390 notifications for arriving pages will be delivered directly
3990          * to the guest but the house keeping for completed pfaults is
3991          * handled outside the worker.
3992          */
3993         kvm_check_async_pf_completion(vcpu);
3994
3995         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3996         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3997
3998         if (need_resched())
3999                 schedule();
4000
4001         if (test_cpu_flag(CIF_MCCK_PENDING))
4002                 s390_handle_mcck();
4003
4004         if (!kvm_is_ucontrol(vcpu->kvm)) {
4005                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4006                 if (rc)
4007                         return rc;
4008         }
4009
4010         rc = kvm_s390_handle_requests(vcpu);
4011         if (rc)
4012                 return rc;
4013
4014         if (guestdbg_enabled(vcpu)) {
4015                 kvm_s390_backup_guest_per_regs(vcpu);
4016                 kvm_s390_patch_guest_per_regs(vcpu);
4017         }
4018
4019         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4020
4021         vcpu->arch.sie_block->icptcode = 0;
4022         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4023         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4024         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4025
4026         return 0;
4027 }
4028
4029 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4030 {
4031         struct kvm_s390_pgm_info pgm_info = {
4032                 .code = PGM_ADDRESSING,
4033         };
4034         u8 opcode, ilen;
4035         int rc;
4036
4037         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4038         trace_kvm_s390_sie_fault(vcpu);
4039
4040         /*
4041          * We want to inject an addressing exception, which is defined as a
4042          * suppressing or terminating exception. However, since we came here
4043          * by a DAT access exception, the PSW still points to the faulting
4044          * instruction since DAT exceptions are nullifying. So we've got
4045          * to look up the current opcode to get the length of the instruction
4046          * to be able to forward the PSW.
4047          */
4048         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4049         ilen = insn_length(opcode);
4050         if (rc < 0) {
4051                 return rc;
4052         } else if (rc) {
4053                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4054                  * Forward by arbitrary ilc, injection will take care of
4055                  * nullification if necessary.
4056                  */
4057                 pgm_info = vcpu->arch.pgm;
4058                 ilen = 4;
4059         }
4060         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4061         kvm_s390_forward_psw(vcpu, ilen);
4062         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4063 }
4064
4065 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4066 {
4067         struct mcck_volatile_info *mcck_info;
4068         struct sie_page *sie_page;
4069
4070         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4071                    vcpu->arch.sie_block->icptcode);
4072         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4073
4074         if (guestdbg_enabled(vcpu))
4075                 kvm_s390_restore_guest_per_regs(vcpu);
4076
4077         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4078         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4079
4080         if (exit_reason == -EINTR) {
4081                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4082                 sie_page = container_of(vcpu->arch.sie_block,
4083                                         struct sie_page, sie_block);
4084                 mcck_info = &sie_page->mcck_info;
4085                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4086                 return 0;
4087         }
4088
4089         if (vcpu->arch.sie_block->icptcode > 0) {
4090                 int rc = kvm_handle_sie_intercept(vcpu);
4091
4092                 if (rc != -EOPNOTSUPP)
4093                         return rc;
4094                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4095                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4096                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4097                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4098                 return -EREMOTE;
4099         } else if (exit_reason != -EFAULT) {
4100                 vcpu->stat.exit_null++;
4101                 return 0;
4102         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4103                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4104                 vcpu->run->s390_ucontrol.trans_exc_code =
4105                                                 current->thread.gmap_addr;
4106                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4107                 return -EREMOTE;
4108         } else if (current->thread.gmap_pfault) {
4109                 trace_kvm_s390_major_guest_pfault(vcpu);
4110                 current->thread.gmap_pfault = 0;
4111                 if (kvm_arch_setup_async_pf(vcpu))
4112                         return 0;
4113                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4114         }
4115         return vcpu_post_run_fault_in_sie(vcpu);
4116 }
4117
4118 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4119 static int __vcpu_run(struct kvm_vcpu *vcpu)
4120 {
4121         int rc, exit_reason;
4122         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4123
4124         /*
4125          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4126          * ning the guest), so that memslots (and other stuff) are protected
4127          */
4128         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4129
4130         do {
4131                 rc = vcpu_pre_run(vcpu);
4132                 if (rc)
4133                         break;
4134
4135                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4136                 /*
4137                  * As PF_VCPU will be used in fault handler, between
4138                  * guest_enter and guest_exit should be no uaccess.
4139                  */
4140                 local_irq_disable();
4141                 guest_enter_irqoff();
4142                 __disable_cpu_timer_accounting(vcpu);
4143                 local_irq_enable();
4144                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4145                         memcpy(sie_page->pv_grregs,
4146                                vcpu->run->s.regs.gprs,
4147                                sizeof(sie_page->pv_grregs));
4148                 }
4149                 exit_reason = sie64a(vcpu->arch.sie_block,
4150                                      vcpu->run->s.regs.gprs);
4151                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4152                         memcpy(vcpu->run->s.regs.gprs,
4153                                sie_page->pv_grregs,
4154                                sizeof(sie_page->pv_grregs));
4155                         /*
4156                          * We're not allowed to inject interrupts on intercepts
4157                          * that leave the guest state in an "in-between" state
4158                          * where the next SIE entry will do a continuation.
4159                          * Fence interrupts in our "internal" PSW.
4160                          */
4161                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4162                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4163                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4164                         }
4165                 }
4166                 local_irq_disable();
4167                 __enable_cpu_timer_accounting(vcpu);
4168                 guest_exit_irqoff();
4169                 local_irq_enable();
4170                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4171
4172                 rc = vcpu_post_run(vcpu, exit_reason);
4173         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4174
4175         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4176         return rc;
4177 }
4178
4179 static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4180 {
4181         struct runtime_instr_cb *riccb;
4182         struct gs_cb *gscb;
4183
4184         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4185         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4186         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4187         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4188         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4189                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4190                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4191                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4192         }
4193         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4194                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4195                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4196                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4197                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4198                         kvm_clear_async_pf_completion_queue(vcpu);
4199         }
4200         /*
4201          * If userspace sets the riccb (e.g. after migration) to a valid state,
4202          * we should enable RI here instead of doing the lazy enablement.
4203          */
4204         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4205             test_kvm_facility(vcpu->kvm, 64) &&
4206             riccb->v &&
4207             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4208                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4209                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4210         }
4211         /*
4212          * If userspace sets the gscb (e.g. after migration) to non-zero,
4213          * we should enable GS here instead of doing the lazy enablement.
4214          */
4215         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4216             test_kvm_facility(vcpu->kvm, 133) &&
4217             gscb->gssm &&
4218             !vcpu->arch.gs_enabled) {
4219                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4220                 vcpu->arch.sie_block->ecb |= ECB_GS;
4221                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4222                 vcpu->arch.gs_enabled = 1;
4223         }
4224         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4225             test_kvm_facility(vcpu->kvm, 82)) {
4226                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4227                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4228         }
4229         if (MACHINE_HAS_GS) {
4230                 preempt_disable();
4231                 __ctl_set_bit(2, 4);
4232                 if (current->thread.gs_cb) {
4233                         vcpu->arch.host_gscb = current->thread.gs_cb;
4234                         save_gs_cb(vcpu->arch.host_gscb);
4235                 }
4236                 if (vcpu->arch.gs_enabled) {
4237                         current->thread.gs_cb = (struct gs_cb *)
4238                                                 &vcpu->run->s.regs.gscb;
4239                         restore_gs_cb(current->thread.gs_cb);
4240                 }
4241                 preempt_enable();
4242         }
4243         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4244 }
4245
4246 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4247 {
4248         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4249                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4250         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4251                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4252                 /* some control register changes require a tlb flush */
4253                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4254         }
4255         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4256                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4257                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4258         }
4259         save_access_regs(vcpu->arch.host_acrs);
4260         restore_access_regs(vcpu->run->s.regs.acrs);
4261         /* save host (userspace) fprs/vrs */
4262         save_fpu_regs();
4263         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4264         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4265         if (MACHINE_HAS_VX)
4266                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4267         else
4268                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4269         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4270         if (test_fp_ctl(current->thread.fpu.fpc))
4271                 /* User space provided an invalid FPC, let's clear it */
4272                 current->thread.fpu.fpc = 0;
4273
4274         /* Sync fmt2 only data */
4275         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4276                 sync_regs_fmt2(vcpu, kvm_run);
4277         } else {
4278                 /*
4279                  * In several places we have to modify our internal view to
4280                  * not do things that are disallowed by the ultravisor. For
4281                  * example we must not inject interrupts after specific exits
4282                  * (e.g. 112 prefix page not secure). We do this by turning
4283                  * off the machine check, external and I/O interrupt bits
4284                  * of our PSW copy. To avoid getting validity intercepts, we
4285                  * do only accept the condition code from userspace.
4286                  */
4287                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4288                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4289                                                    PSW_MASK_CC;
4290         }
4291
4292         kvm_run->kvm_dirty_regs = 0;
4293 }
4294
4295 static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4296 {
4297         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4298         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4299         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4300         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4301         if (MACHINE_HAS_GS) {
4302                 __ctl_set_bit(2, 4);
4303                 if (vcpu->arch.gs_enabled)
4304                         save_gs_cb(current->thread.gs_cb);
4305                 preempt_disable();
4306                 current->thread.gs_cb = vcpu->arch.host_gscb;
4307                 restore_gs_cb(vcpu->arch.host_gscb);
4308                 preempt_enable();
4309                 if (!vcpu->arch.host_gscb)
4310                         __ctl_clear_bit(2, 4);
4311                 vcpu->arch.host_gscb = NULL;
4312         }
4313         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4314 }
4315
4316 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4317 {
4318         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4319         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4320         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4321         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4322         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4323         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4324         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4325         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4326         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4327         save_access_regs(vcpu->run->s.regs.acrs);
4328         restore_access_regs(vcpu->arch.host_acrs);
4329         /* Save guest register state */
4330         save_fpu_regs();
4331         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4332         /* Restore will be done lazily at return */
4333         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4334         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4335         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4336                 store_regs_fmt2(vcpu, kvm_run);
4337 }
4338
4339 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4340 {
4341         int rc;
4342
4343         if (kvm_run->immediate_exit)
4344                 return -EINTR;
4345
4346         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4347             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4348                 return -EINVAL;
4349
4350         vcpu_load(vcpu);
4351
4352         if (guestdbg_exit_pending(vcpu)) {
4353                 kvm_s390_prepare_debug_exit(vcpu);
4354                 rc = 0;
4355                 goto out;
4356         }
4357
4358         kvm_sigset_activate(vcpu);
4359
4360         /*
4361          * no need to check the return value of vcpu_start as it can only have
4362          * an error for protvirt, but protvirt means user cpu state
4363          */
4364         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4365                 kvm_s390_vcpu_start(vcpu);
4366         } else if (is_vcpu_stopped(vcpu)) {
4367                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4368                                    vcpu->vcpu_id);
4369                 rc = -EINVAL;
4370                 goto out;
4371         }
4372
4373         sync_regs(vcpu, kvm_run);
4374         enable_cpu_timer_accounting(vcpu);
4375
4376         might_fault();
4377         rc = __vcpu_run(vcpu);
4378
4379         if (signal_pending(current) && !rc) {
4380                 kvm_run->exit_reason = KVM_EXIT_INTR;
4381                 rc = -EINTR;
4382         }
4383
4384         if (guestdbg_exit_pending(vcpu) && !rc)  {
4385                 kvm_s390_prepare_debug_exit(vcpu);
4386                 rc = 0;
4387         }
4388
4389         if (rc == -EREMOTE) {
4390                 /* userspace support is needed, kvm_run has been prepared */
4391                 rc = 0;
4392         }
4393
4394         disable_cpu_timer_accounting(vcpu);
4395         store_regs(vcpu, kvm_run);
4396
4397         kvm_sigset_deactivate(vcpu);
4398
4399         vcpu->stat.exit_userspace++;
4400 out:
4401         vcpu_put(vcpu);
4402         return rc;
4403 }
4404
4405 /*
4406  * store status at address
4407  * we use have two special cases:
4408  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4409  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4410  */
4411 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4412 {
4413         unsigned char archmode = 1;
4414         freg_t fprs[NUM_FPRS];
4415         unsigned int px;
4416         u64 clkcomp, cputm;
4417         int rc;
4418
4419         px = kvm_s390_get_prefix(vcpu);
4420         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4421                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4422                         return -EFAULT;
4423                 gpa = 0;
4424         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4425                 if (write_guest_real(vcpu, 163, &archmode, 1))
4426                         return -EFAULT;
4427                 gpa = px;
4428         } else
4429                 gpa -= __LC_FPREGS_SAVE_AREA;
4430
4431         /* manually convert vector registers if necessary */
4432         if (MACHINE_HAS_VX) {
4433                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4434                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4435                                      fprs, 128);
4436         } else {
4437                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4438                                      vcpu->run->s.regs.fprs, 128);
4439         }
4440         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4441                               vcpu->run->s.regs.gprs, 128);
4442         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4443                               &vcpu->arch.sie_block->gpsw, 16);
4444         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4445                               &px, 4);
4446         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4447                               &vcpu->run->s.regs.fpc, 4);
4448         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4449                               &vcpu->arch.sie_block->todpr, 4);
4450         cputm = kvm_s390_get_cpu_timer(vcpu);
4451         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4452                               &cputm, 8);
4453         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4454         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4455                               &clkcomp, 8);
4456         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4457                               &vcpu->run->s.regs.acrs, 64);
4458         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4459                               &vcpu->arch.sie_block->gcr, 128);
4460         return rc ? -EFAULT : 0;
4461 }
4462
4463 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4464 {
4465         /*
4466          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4467          * switch in the run ioctl. Let's update our copies before we save
4468          * it into the save area
4469          */
4470         save_fpu_regs();
4471         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4472         save_access_regs(vcpu->run->s.regs.acrs);
4473
4474         return kvm_s390_store_status_unloaded(vcpu, addr);
4475 }
4476
4477 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4478 {
4479         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4480         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4481 }
4482
4483 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4484 {
4485         unsigned int i;
4486         struct kvm_vcpu *vcpu;
4487
4488         kvm_for_each_vcpu(i, vcpu, kvm) {
4489                 __disable_ibs_on_vcpu(vcpu);
4490         }
4491 }
4492
4493 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4494 {
4495         if (!sclp.has_ibs)
4496                 return;
4497         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4498         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4499 }
4500
4501 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4502 {
4503         int i, online_vcpus, r = 0, started_vcpus = 0;
4504
4505         if (!is_vcpu_stopped(vcpu))
4506                 return 0;
4507
4508         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4509         /* Only one cpu at a time may enter/leave the STOPPED state. */
4510         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4511         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4512
4513         /* Let's tell the UV that we want to change into the operating state */
4514         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4515                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4516                 if (r) {
4517                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4518                         return r;
4519                 }
4520         }
4521
4522         for (i = 0; i < online_vcpus; i++) {
4523                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4524                         started_vcpus++;
4525         }
4526
4527         if (started_vcpus == 0) {
4528                 /* we're the only active VCPU -> speed it up */
4529                 __enable_ibs_on_vcpu(vcpu);
4530         } else if (started_vcpus == 1) {
4531                 /*
4532                  * As we are starting a second VCPU, we have to disable
4533                  * the IBS facility on all VCPUs to remove potentially
4534                  * oustanding ENABLE requests.
4535                  */
4536                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4537         }
4538
4539         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4540         /*
4541          * The real PSW might have changed due to a RESTART interpreted by the
4542          * ultravisor. We block all interrupts and let the next sie exit
4543          * refresh our view.
4544          */
4545         if (kvm_s390_pv_cpu_is_protected(vcpu))
4546                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4547         /*
4548          * Another VCPU might have used IBS while we were offline.
4549          * Let's play safe and flush the VCPU at startup.
4550          */
4551         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4552         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4553         return 0;
4554 }
4555
4556 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4557 {
4558         int i, online_vcpus, r = 0, started_vcpus = 0;
4559         struct kvm_vcpu *started_vcpu = NULL;
4560
4561         if (is_vcpu_stopped(vcpu))
4562                 return 0;
4563
4564         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4565         /* Only one cpu at a time may enter/leave the STOPPED state. */
4566         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4567         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4568
4569         /* Let's tell the UV that we want to change into the stopped state */
4570         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4571                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4572                 if (r) {
4573                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4574                         return r;
4575                 }
4576         }
4577
4578         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4579         kvm_s390_clear_stop_irq(vcpu);
4580
4581         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4582         __disable_ibs_on_vcpu(vcpu);
4583
4584         for (i = 0; i < online_vcpus; i++) {
4585                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4586                         started_vcpus++;
4587                         started_vcpu = vcpu->kvm->vcpus[i];
4588                 }
4589         }
4590
4591         if (started_vcpus == 1) {
4592                 /*
4593                  * As we only have one VCPU left, we want to enable the
4594                  * IBS facility for that VCPU to speed it up.
4595                  */
4596                 __enable_ibs_on_vcpu(started_vcpu);
4597         }
4598
4599         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4600         return 0;
4601 }
4602
4603 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4604                                      struct kvm_enable_cap *cap)
4605 {
4606         int r;
4607
4608         if (cap->flags)
4609                 return -EINVAL;
4610
4611         switch (cap->cap) {
4612         case KVM_CAP_S390_CSS_SUPPORT:
4613                 if (!vcpu->kvm->arch.css_support) {
4614                         vcpu->kvm->arch.css_support = 1;
4615                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4616                         trace_kvm_s390_enable_css(vcpu->kvm);
4617                 }
4618                 r = 0;
4619                 break;
4620         default:
4621                 r = -EINVAL;
4622                 break;
4623         }
4624         return r;
4625 }
4626
4627 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4628                                    struct kvm_s390_mem_op *mop)
4629 {
4630         void __user *uaddr = (void __user *)mop->buf;
4631         int r = 0;
4632
4633         if (mop->flags || !mop->size)
4634                 return -EINVAL;
4635         if (mop->size + mop->sida_offset < mop->size)
4636                 return -EINVAL;
4637         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4638                 return -E2BIG;
4639
4640         switch (mop->op) {
4641         case KVM_S390_MEMOP_SIDA_READ:
4642                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4643                                  mop->sida_offset), mop->size))
4644                         r = -EFAULT;
4645
4646                 break;
4647         case KVM_S390_MEMOP_SIDA_WRITE:
4648                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4649                                    mop->sida_offset), uaddr, mop->size))
4650                         r = -EFAULT;
4651                 break;
4652         }
4653         return r;
4654 }
4655 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4656                                   struct kvm_s390_mem_op *mop)
4657 {
4658         void __user *uaddr = (void __user *)mop->buf;
4659         void *tmpbuf = NULL;
4660         int r = 0;
4661         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4662                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4663
4664         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4665                 return -EINVAL;
4666
4667         if (mop->size > MEM_OP_MAX_SIZE)
4668                 return -E2BIG;
4669
4670         if (kvm_s390_pv_cpu_is_protected(vcpu))
4671                 return -EINVAL;
4672
4673         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4674                 tmpbuf = vmalloc(mop->size);
4675                 if (!tmpbuf)
4676                         return -ENOMEM;
4677         }
4678
4679         switch (mop->op) {
4680         case KVM_S390_MEMOP_LOGICAL_READ:
4681                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4682                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4683                                             mop->size, GACC_FETCH);
4684                         break;
4685                 }
4686                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4687                 if (r == 0) {
4688                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4689                                 r = -EFAULT;
4690                 }
4691                 break;
4692         case KVM_S390_MEMOP_LOGICAL_WRITE:
4693                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4694                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4695                                             mop->size, GACC_STORE);
4696                         break;
4697                 }
4698                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4699                         r = -EFAULT;
4700                         break;
4701                 }
4702                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4703                 break;
4704         }
4705
4706         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4707                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4708
4709         vfree(tmpbuf);
4710         return r;
4711 }
4712
4713 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4714                                       struct kvm_s390_mem_op *mop)
4715 {
4716         int r, srcu_idx;
4717
4718         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4719
4720         switch (mop->op) {
4721         case KVM_S390_MEMOP_LOGICAL_READ:
4722         case KVM_S390_MEMOP_LOGICAL_WRITE:
4723                 r = kvm_s390_guest_mem_op(vcpu, mop);
4724                 break;
4725         case KVM_S390_MEMOP_SIDA_READ:
4726         case KVM_S390_MEMOP_SIDA_WRITE:
4727                 /* we are locked against sida going away by the vcpu->mutex */
4728                 r = kvm_s390_guest_sida_op(vcpu, mop);
4729                 break;
4730         default:
4731                 r = -EINVAL;
4732         }
4733
4734         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4735         return r;
4736 }
4737
4738 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4739                                unsigned int ioctl, unsigned long arg)
4740 {
4741         struct kvm_vcpu *vcpu = filp->private_data;
4742         void __user *argp = (void __user *)arg;
4743
4744         switch (ioctl) {
4745         case KVM_S390_IRQ: {
4746                 struct kvm_s390_irq s390irq;
4747
4748                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4749                         return -EFAULT;
4750                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4751         }
4752         case KVM_S390_INTERRUPT: {
4753                 struct kvm_s390_interrupt s390int;
4754                 struct kvm_s390_irq s390irq = {};
4755
4756                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4757                         return -EFAULT;
4758                 if (s390int_to_s390irq(&s390int, &s390irq))
4759                         return -EINVAL;
4760                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4761         }
4762         }
4763         return -ENOIOCTLCMD;
4764 }
4765
4766 long kvm_arch_vcpu_ioctl(struct file *filp,
4767                          unsigned int ioctl, unsigned long arg)
4768 {
4769         struct kvm_vcpu *vcpu = filp->private_data;
4770         void __user *argp = (void __user *)arg;
4771         int idx;
4772         long r;
4773         u16 rc, rrc;
4774
4775         vcpu_load(vcpu);
4776
4777         switch (ioctl) {
4778         case KVM_S390_STORE_STATUS:
4779                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4780                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4781                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4782                 break;
4783         case KVM_S390_SET_INITIAL_PSW: {
4784                 psw_t psw;
4785
4786                 r = -EFAULT;
4787                 if (copy_from_user(&psw, argp, sizeof(psw)))
4788                         break;
4789                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4790                 break;
4791         }
4792         case KVM_S390_CLEAR_RESET:
4793                 r = 0;
4794                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4795                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4796                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4797                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4798                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4799                                    rc, rrc);
4800                 }
4801                 break;
4802         case KVM_S390_INITIAL_RESET:
4803                 r = 0;
4804                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4805                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4806                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4807                                           UVC_CMD_CPU_RESET_INITIAL,
4808                                           &rc, &rrc);
4809                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4810                                    rc, rrc);
4811                 }
4812                 break;
4813         case KVM_S390_NORMAL_RESET:
4814                 r = 0;
4815                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4816                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4817                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4818                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4819                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4820                                    rc, rrc);
4821                 }
4822                 break;
4823         case KVM_SET_ONE_REG:
4824         case KVM_GET_ONE_REG: {
4825                 struct kvm_one_reg reg;
4826                 r = -EINVAL;
4827                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4828                         break;
4829                 r = -EFAULT;
4830                 if (copy_from_user(&reg, argp, sizeof(reg)))
4831                         break;
4832                 if (ioctl == KVM_SET_ONE_REG)
4833                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4834                 else
4835                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4836                 break;
4837         }
4838 #ifdef CONFIG_KVM_S390_UCONTROL
4839         case KVM_S390_UCAS_MAP: {
4840                 struct kvm_s390_ucas_mapping ucasmap;
4841
4842                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4843                         r = -EFAULT;
4844                         break;
4845                 }
4846
4847                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4848                         r = -EINVAL;
4849                         break;
4850                 }
4851
4852                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4853                                      ucasmap.vcpu_addr, ucasmap.length);
4854                 break;
4855         }
4856         case KVM_S390_UCAS_UNMAP: {
4857                 struct kvm_s390_ucas_mapping ucasmap;
4858
4859                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4860                         r = -EFAULT;
4861                         break;
4862                 }
4863
4864                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4865                         r = -EINVAL;
4866                         break;
4867                 }
4868
4869                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4870                         ucasmap.length);
4871                 break;
4872         }
4873 #endif
4874         case KVM_S390_VCPU_FAULT: {
4875                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4876                 break;
4877         }
4878         case KVM_ENABLE_CAP:
4879         {
4880                 struct kvm_enable_cap cap;
4881                 r = -EFAULT;
4882                 if (copy_from_user(&cap, argp, sizeof(cap)))
4883                         break;
4884                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4885                 break;
4886         }
4887         case KVM_S390_MEM_OP: {
4888                 struct kvm_s390_mem_op mem_op;
4889
4890                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4891                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4892                 else
4893                         r = -EFAULT;
4894                 break;
4895         }
4896         case KVM_S390_SET_IRQ_STATE: {
4897                 struct kvm_s390_irq_state irq_state;
4898
4899                 r = -EFAULT;
4900                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4901                         break;
4902                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4903                     irq_state.len == 0 ||
4904                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4905                         r = -EINVAL;
4906                         break;
4907                 }
4908                 /* do not use irq_state.flags, it will break old QEMUs */
4909                 r = kvm_s390_set_irq_state(vcpu,
4910                                            (void __user *) irq_state.buf,
4911                                            irq_state.len);
4912                 break;
4913         }
4914         case KVM_S390_GET_IRQ_STATE: {
4915                 struct kvm_s390_irq_state irq_state;
4916
4917                 r = -EFAULT;
4918                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4919                         break;
4920                 if (irq_state.len == 0) {
4921                         r = -EINVAL;
4922                         break;
4923                 }
4924                 /* do not use irq_state.flags, it will break old QEMUs */
4925                 r = kvm_s390_get_irq_state(vcpu,
4926                                            (__u8 __user *)  irq_state.buf,
4927                                            irq_state.len);
4928                 break;
4929         }
4930         default:
4931                 r = -ENOTTY;
4932         }
4933
4934         vcpu_put(vcpu);
4935         return r;
4936 }
4937
4938 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4939 {
4940 #ifdef CONFIG_KVM_S390_UCONTROL
4941         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4942                  && (kvm_is_ucontrol(vcpu->kvm))) {
4943                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4944                 get_page(vmf->page);
4945                 return 0;
4946         }
4947 #endif
4948         return VM_FAULT_SIGBUS;
4949 }
4950
4951 /* Section: memory related */
4952 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4953                                    struct kvm_memory_slot *memslot,
4954                                    const struct kvm_userspace_memory_region *mem,
4955                                    enum kvm_mr_change change)
4956 {
4957         /* A few sanity checks. We can have memory slots which have to be
4958            located/ended at a segment boundary (1MB). The memory in userland is
4959            ok to be fragmented into various different vmas. It is okay to mmap()
4960            and munmap() stuff in this slot after doing this call at any time */
4961
4962         if (mem->userspace_addr & 0xffffful)
4963                 return -EINVAL;
4964
4965         if (mem->memory_size & 0xffffful)
4966                 return -EINVAL;
4967
4968         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4969                 return -EINVAL;
4970
4971         /* When we are protected, we should not change the memory slots */
4972         if (kvm_s390_pv_get_handle(kvm))
4973                 return -EINVAL;
4974         return 0;
4975 }
4976
4977 void kvm_arch_commit_memory_region(struct kvm *kvm,
4978                                 const struct kvm_userspace_memory_region *mem,
4979                                 struct kvm_memory_slot *old,
4980                                 const struct kvm_memory_slot *new,
4981                                 enum kvm_mr_change change)
4982 {
4983         int rc = 0;
4984
4985         switch (change) {
4986         case KVM_MR_DELETE:
4987                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4988                                         old->npages * PAGE_SIZE);
4989                 break;
4990         case KVM_MR_MOVE:
4991                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4992                                         old->npages * PAGE_SIZE);
4993                 if (rc)
4994                         break;
4995                 fallthrough;
4996         case KVM_MR_CREATE:
4997                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4998                                       mem->guest_phys_addr, mem->memory_size);
4999                 break;
5000         case KVM_MR_FLAGS_ONLY:
5001                 break;
5002         default:
5003                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5004         }
5005         if (rc)
5006                 pr_warn("failed to commit memory region\n");
5007         return;
5008 }
5009
5010 static inline unsigned long nonhyp_mask(int i)
5011 {
5012         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5013
5014         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5015 }
5016
5017 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5018 {
5019         vcpu->valid_wakeup = false;
5020 }
5021
5022 static int __init kvm_s390_init(void)
5023 {
5024         int i;
5025
5026         if (!sclp.has_sief2) {
5027                 pr_info("SIE is not available\n");
5028                 return -ENODEV;
5029         }
5030
5031         if (nested && hpage) {
5032                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5033                 return -EINVAL;
5034         }
5035
5036         for (i = 0; i < 16; i++)
5037                 kvm_s390_fac_base[i] |=
5038                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5039
5040         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5041 }
5042
5043 static void __exit kvm_s390_exit(void)
5044 {
5045         kvm_exit();
5046 }
5047
5048 module_init(kvm_s390_init);
5049 module_exit(kvm_s390_exit);
5050
5051 /*
5052  * Enable autoloading of the kvm module.
5053  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5054  * since x86 takes a different approach.
5055  */
5056 #include <linux/miscdevice.h>
5057 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5058 MODULE_ALIAS("devname:kvm");