Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * hosting IBM Z kernel virtual machines (s390x)
0004  *
0005  * Copyright IBM Corp. 2008, 2020
0006  *
0007  *    Author(s): Carsten Otte <cotte@de.ibm.com>
0008  *               Christian Borntraeger <borntraeger@de.ibm.com>
0009  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
0010  *               Jason J. Herne <jjherne@us.ibm.com>
0011  */
0012 
0013 #define KMSG_COMPONENT "kvm-s390"
0014 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
0015 
0016 #include <linux/compiler.h>
0017 #include <linux/err.h>
0018 #include <linux/fs.h>
0019 #include <linux/hrtimer.h>
0020 #include <linux/init.h>
0021 #include <linux/kvm.h>
0022 #include <linux/kvm_host.h>
0023 #include <linux/mman.h>
0024 #include <linux/module.h>
0025 #include <linux/moduleparam.h>
0026 #include <linux/random.h>
0027 #include <linux/slab.h>
0028 #include <linux/timer.h>
0029 #include <linux/vmalloc.h>
0030 #include <linux/bitmap.h>
0031 #include <linux/sched/signal.h>
0032 #include <linux/string.h>
0033 #include <linux/pgtable.h>
0034 #include <linux/mmu_notifier.h>
0035 
0036 #include <asm/asm-offsets.h>
0037 #include <asm/lowcore.h>
0038 #include <asm/stp.h>
0039 #include <asm/gmap.h>
0040 #include <asm/nmi.h>
0041 #include <asm/switch_to.h>
0042 #include <asm/isc.h>
0043 #include <asm/sclp.h>
0044 #include <asm/cpacf.h>
0045 #include <asm/timex.h>
0046 #include <asm/ap.h>
0047 #include <asm/uv.h>
0048 #include <asm/fpu/api.h>
0049 #include "kvm-s390.h"
0050 #include "gaccess.h"
0051 #include "pci.h"
0052 
0053 #define CREATE_TRACE_POINTS
0054 #include "trace.h"
0055 #include "trace-s390.h"
0056 
0057 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
0058 #define LOCAL_IRQS 32
0059 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
0060                (KVM_MAX_VCPUS + LOCAL_IRQS))
0061 
0062 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
0063     KVM_GENERIC_VM_STATS(),
0064     STATS_DESC_COUNTER(VM, inject_io),
0065     STATS_DESC_COUNTER(VM, inject_float_mchk),
0066     STATS_DESC_COUNTER(VM, inject_pfault_done),
0067     STATS_DESC_COUNTER(VM, inject_service_signal),
0068     STATS_DESC_COUNTER(VM, inject_virtio),
0069     STATS_DESC_COUNTER(VM, aen_forward)
0070 };
0071 
0072 const struct kvm_stats_header kvm_vm_stats_header = {
0073     .name_size = KVM_STATS_NAME_SIZE,
0074     .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
0075     .id_offset = sizeof(struct kvm_stats_header),
0076     .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
0077     .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
0078                sizeof(kvm_vm_stats_desc),
0079 };
0080 
0081 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
0082     KVM_GENERIC_VCPU_STATS(),
0083     STATS_DESC_COUNTER(VCPU, exit_userspace),
0084     STATS_DESC_COUNTER(VCPU, exit_null),
0085     STATS_DESC_COUNTER(VCPU, exit_external_request),
0086     STATS_DESC_COUNTER(VCPU, exit_io_request),
0087     STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
0088     STATS_DESC_COUNTER(VCPU, exit_stop_request),
0089     STATS_DESC_COUNTER(VCPU, exit_validity),
0090     STATS_DESC_COUNTER(VCPU, exit_instruction),
0091     STATS_DESC_COUNTER(VCPU, exit_pei),
0092     STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
0093     STATS_DESC_COUNTER(VCPU, instruction_lctl),
0094     STATS_DESC_COUNTER(VCPU, instruction_lctlg),
0095     STATS_DESC_COUNTER(VCPU, instruction_stctl),
0096     STATS_DESC_COUNTER(VCPU, instruction_stctg),
0097     STATS_DESC_COUNTER(VCPU, exit_program_interruption),
0098     STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
0099     STATS_DESC_COUNTER(VCPU, exit_operation_exception),
0100     STATS_DESC_COUNTER(VCPU, deliver_ckc),
0101     STATS_DESC_COUNTER(VCPU, deliver_cputm),
0102     STATS_DESC_COUNTER(VCPU, deliver_external_call),
0103     STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
0104     STATS_DESC_COUNTER(VCPU, deliver_service_signal),
0105     STATS_DESC_COUNTER(VCPU, deliver_virtio),
0106     STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
0107     STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
0108     STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
0109     STATS_DESC_COUNTER(VCPU, deliver_program),
0110     STATS_DESC_COUNTER(VCPU, deliver_io),
0111     STATS_DESC_COUNTER(VCPU, deliver_machine_check),
0112     STATS_DESC_COUNTER(VCPU, exit_wait_state),
0113     STATS_DESC_COUNTER(VCPU, inject_ckc),
0114     STATS_DESC_COUNTER(VCPU, inject_cputm),
0115     STATS_DESC_COUNTER(VCPU, inject_external_call),
0116     STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
0117     STATS_DESC_COUNTER(VCPU, inject_mchk),
0118     STATS_DESC_COUNTER(VCPU, inject_pfault_init),
0119     STATS_DESC_COUNTER(VCPU, inject_program),
0120     STATS_DESC_COUNTER(VCPU, inject_restart),
0121     STATS_DESC_COUNTER(VCPU, inject_set_prefix),
0122     STATS_DESC_COUNTER(VCPU, inject_stop_signal),
0123     STATS_DESC_COUNTER(VCPU, instruction_epsw),
0124     STATS_DESC_COUNTER(VCPU, instruction_gs),
0125     STATS_DESC_COUNTER(VCPU, instruction_io_other),
0126     STATS_DESC_COUNTER(VCPU, instruction_lpsw),
0127     STATS_DESC_COUNTER(VCPU, instruction_lpswe),
0128     STATS_DESC_COUNTER(VCPU, instruction_pfmf),
0129     STATS_DESC_COUNTER(VCPU, instruction_ptff),
0130     STATS_DESC_COUNTER(VCPU, instruction_sck),
0131     STATS_DESC_COUNTER(VCPU, instruction_sckpf),
0132     STATS_DESC_COUNTER(VCPU, instruction_stidp),
0133     STATS_DESC_COUNTER(VCPU, instruction_spx),
0134     STATS_DESC_COUNTER(VCPU, instruction_stpx),
0135     STATS_DESC_COUNTER(VCPU, instruction_stap),
0136     STATS_DESC_COUNTER(VCPU, instruction_iske),
0137     STATS_DESC_COUNTER(VCPU, instruction_ri),
0138     STATS_DESC_COUNTER(VCPU, instruction_rrbe),
0139     STATS_DESC_COUNTER(VCPU, instruction_sske),
0140     STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
0141     STATS_DESC_COUNTER(VCPU, instruction_stsi),
0142     STATS_DESC_COUNTER(VCPU, instruction_stfl),
0143     STATS_DESC_COUNTER(VCPU, instruction_tb),
0144     STATS_DESC_COUNTER(VCPU, instruction_tpi),
0145     STATS_DESC_COUNTER(VCPU, instruction_tprot),
0146     STATS_DESC_COUNTER(VCPU, instruction_tsch),
0147     STATS_DESC_COUNTER(VCPU, instruction_sie),
0148     STATS_DESC_COUNTER(VCPU, instruction_essa),
0149     STATS_DESC_COUNTER(VCPU, instruction_sthyi),
0150     STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
0151     STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
0152     STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
0153     STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
0154     STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
0155     STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
0156     STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
0157     STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
0158     STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
0159     STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
0160     STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
0161     STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
0162     STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
0163     STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
0164     STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
0165     STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
0166     STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
0167     STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
0168     STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
0169     STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
0170     STATS_DESC_COUNTER(VCPU, diag_9c_forward),
0171     STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
0172     STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
0173     STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
0174     STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
0175     STATS_DESC_COUNTER(VCPU, pfault_sync)
0176 };
0177 
0178 const struct kvm_stats_header kvm_vcpu_stats_header = {
0179     .name_size = KVM_STATS_NAME_SIZE,
0180     .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
0181     .id_offset = sizeof(struct kvm_stats_header),
0182     .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
0183     .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
0184                sizeof(kvm_vcpu_stats_desc),
0185 };
0186 
0187 /* allow nested virtualization in KVM (if enabled by user space) */
0188 static int nested;
0189 module_param(nested, int, S_IRUGO);
0190 MODULE_PARM_DESC(nested, "Nested virtualization support");
0191 
0192 /* allow 1m huge page guest backing, if !nested */
0193 static int hpage;
0194 module_param(hpage, int, 0444);
0195 MODULE_PARM_DESC(hpage, "1m huge page backing support");
0196 
0197 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
0198 static u8 halt_poll_max_steal = 10;
0199 module_param(halt_poll_max_steal, byte, 0644);
0200 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
0201 
0202 /* if set to true, the GISA will be initialized and used if available */
0203 static bool use_gisa  = true;
0204 module_param(use_gisa, bool, 0644);
0205 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
0206 
0207 /* maximum diag9c forwarding per second */
0208 unsigned int diag9c_forwarding_hz;
0209 module_param(diag9c_forwarding_hz, uint, 0644);
0210 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
0211 
0212 /*
0213  * For now we handle at most 16 double words as this is what the s390 base
0214  * kernel handles and stores in the prefix page. If we ever need to go beyond
0215  * this, this requires changes to code, but the external uapi can stay.
0216  */
0217 #define SIZE_INTERNAL 16
0218 
0219 /*
0220  * Base feature mask that defines default mask for facilities. Consists of the
0221  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
0222  */
0223 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
0224 /*
0225  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
0226  * and defines the facilities that can be enabled via a cpu model.
0227  */
0228 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
0229 
0230 static unsigned long kvm_s390_fac_size(void)
0231 {
0232     BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
0233     BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
0234     BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
0235         sizeof(stfle_fac_list));
0236 
0237     return SIZE_INTERNAL;
0238 }
0239 
0240 /* available cpu features supported by kvm */
0241 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
0242 /* available subfunctions indicated via query / "test bit" */
0243 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
0244 
0245 static struct gmap_notifier gmap_notifier;
0246 static struct gmap_notifier vsie_gmap_notifier;
0247 debug_info_t *kvm_s390_dbf;
0248 debug_info_t *kvm_s390_dbf_uv;
0249 
0250 /* Section: not file related */
0251 int kvm_arch_hardware_enable(void)
0252 {
0253     /* every s390 is virtualization enabled ;-) */
0254     return 0;
0255 }
0256 
0257 int kvm_arch_check_processor_compat(void *opaque)
0258 {
0259     return 0;
0260 }
0261 
0262 /* forward declarations */
0263 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
0264                   unsigned long end);
0265 static int sca_switch_to_extended(struct kvm *kvm);
0266 
0267 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
0268 {
0269     u8 delta_idx = 0;
0270 
0271     /*
0272      * The TOD jumps by delta, we have to compensate this by adding
0273      * -delta to the epoch.
0274      */
0275     delta = -delta;
0276 
0277     /* sign-extension - we're adding to signed values below */
0278     if ((s64)delta < 0)
0279         delta_idx = -1;
0280 
0281     scb->epoch += delta;
0282     if (scb->ecd & ECD_MEF) {
0283         scb->epdx += delta_idx;
0284         if (scb->epoch < delta)
0285             scb->epdx += 1;
0286     }
0287 }
0288 
0289 /*
0290  * This callback is executed during stop_machine(). All CPUs are therefore
0291  * temporarily stopped. In order not to change guest behavior, we have to
0292  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
0293  * so a CPU won't be stopped while calculating with the epoch.
0294  */
0295 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
0296               void *v)
0297 {
0298     struct kvm *kvm;
0299     struct kvm_vcpu *vcpu;
0300     unsigned long i;
0301     unsigned long long *delta = v;
0302 
0303     list_for_each_entry(kvm, &vm_list, vm_list) {
0304         kvm_for_each_vcpu(i, vcpu, kvm) {
0305             kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
0306             if (i == 0) {
0307                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
0308                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
0309             }
0310             if (vcpu->arch.cputm_enabled)
0311                 vcpu->arch.cputm_start += *delta;
0312             if (vcpu->arch.vsie_block)
0313                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
0314                            *delta);
0315         }
0316     }
0317     return NOTIFY_OK;
0318 }
0319 
0320 static struct notifier_block kvm_clock_notifier = {
0321     .notifier_call = kvm_clock_sync,
0322 };
0323 
0324 int kvm_arch_hardware_setup(void *opaque)
0325 {
0326     gmap_notifier.notifier_call = kvm_gmap_notifier;
0327     gmap_register_pte_notifier(&gmap_notifier);
0328     vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
0329     gmap_register_pte_notifier(&vsie_gmap_notifier);
0330     atomic_notifier_chain_register(&s390_epoch_delta_notifier,
0331                        &kvm_clock_notifier);
0332     return 0;
0333 }
0334 
0335 void kvm_arch_hardware_unsetup(void)
0336 {
0337     gmap_unregister_pte_notifier(&gmap_notifier);
0338     gmap_unregister_pte_notifier(&vsie_gmap_notifier);
0339     atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
0340                      &kvm_clock_notifier);
0341 }
0342 
0343 static void allow_cpu_feat(unsigned long nr)
0344 {
0345     set_bit_inv(nr, kvm_s390_available_cpu_feat);
0346 }
0347 
0348 static inline int plo_test_bit(unsigned char nr)
0349 {
0350     unsigned long function = (unsigned long)nr | 0x100;
0351     int cc;
0352 
0353     asm volatile(
0354         "   lgr 0,%[function]\n"
0355         /* Parameter registers are ignored for "test bit" */
0356         "   plo 0,0,0,0(0)\n"
0357         "   ipm %0\n"
0358         "   srl %0,28\n"
0359         : "=d" (cc)
0360         : [function] "d" (function)
0361         : "cc", "0");
0362     return cc == 0;
0363 }
0364 
0365 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
0366 {
0367     asm volatile(
0368         "   lghi    0,0\n"
0369         "   lgr 1,%[query]\n"
0370         /* Parameter registers are ignored */
0371         "   .insn   rrf,%[opc] << 16,2,4,6,0\n"
0372         :
0373         : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
0374         : "cc", "memory", "0", "1");
0375 }
0376 
0377 #define INSN_SORTL 0xb938
0378 #define INSN_DFLTCC 0xb939
0379 
0380 static void kvm_s390_cpu_feat_init(void)
0381 {
0382     int i;
0383 
0384     for (i = 0; i < 256; ++i) {
0385         if (plo_test_bit(i))
0386             kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
0387     }
0388 
0389     if (test_facility(28)) /* TOD-clock steering */
0390         ptff(kvm_s390_available_subfunc.ptff,
0391              sizeof(kvm_s390_available_subfunc.ptff),
0392              PTFF_QAF);
0393 
0394     if (test_facility(17)) { /* MSA */
0395         __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
0396                   kvm_s390_available_subfunc.kmac);
0397         __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
0398                   kvm_s390_available_subfunc.kmc);
0399         __cpacf_query(CPACF_KM, (cpacf_mask_t *)
0400                   kvm_s390_available_subfunc.km);
0401         __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
0402                   kvm_s390_available_subfunc.kimd);
0403         __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
0404                   kvm_s390_available_subfunc.klmd);
0405     }
0406     if (test_facility(76)) /* MSA3 */
0407         __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
0408                   kvm_s390_available_subfunc.pckmo);
0409     if (test_facility(77)) { /* MSA4 */
0410         __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
0411                   kvm_s390_available_subfunc.kmctr);
0412         __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
0413                   kvm_s390_available_subfunc.kmf);
0414         __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
0415                   kvm_s390_available_subfunc.kmo);
0416         __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
0417                   kvm_s390_available_subfunc.pcc);
0418     }
0419     if (test_facility(57)) /* MSA5 */
0420         __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
0421                   kvm_s390_available_subfunc.ppno);
0422 
0423     if (test_facility(146)) /* MSA8 */
0424         __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
0425                   kvm_s390_available_subfunc.kma);
0426 
0427     if (test_facility(155)) /* MSA9 */
0428         __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
0429                   kvm_s390_available_subfunc.kdsa);
0430 
0431     if (test_facility(150)) /* SORTL */
0432         __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
0433 
0434     if (test_facility(151)) /* DFLTCC */
0435         __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
0436 
0437     if (MACHINE_HAS_ESOP)
0438         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
0439     /*
0440      * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
0441      * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
0442      */
0443     if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
0444         !test_facility(3) || !nested)
0445         return;
0446     allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
0447     if (sclp.has_64bscao)
0448         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
0449     if (sclp.has_siif)
0450         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
0451     if (sclp.has_gpere)
0452         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
0453     if (sclp.has_gsls)
0454         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
0455     if (sclp.has_ib)
0456         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
0457     if (sclp.has_cei)
0458         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
0459     if (sclp.has_ibs)
0460         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
0461     if (sclp.has_kss)
0462         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
0463     /*
0464      * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
0465      * all skey handling functions read/set the skey from the PGSTE
0466      * instead of the real storage key.
0467      *
0468      * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
0469      * pages being detected as preserved although they are resident.
0470      *
0471      * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
0472      * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
0473      *
0474      * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
0475      * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
0476      * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
0477      *
0478      * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
0479      * cannot easily shadow the SCA because of the ipte lock.
0480      */
0481 }
0482 
0483 int kvm_arch_init(void *opaque)
0484 {
0485     int rc = -ENOMEM;
0486 
0487     kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
0488     if (!kvm_s390_dbf)
0489         return -ENOMEM;
0490 
0491     kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
0492     if (!kvm_s390_dbf_uv)
0493         goto out;
0494 
0495     if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
0496         debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
0497         goto out;
0498 
0499     kvm_s390_cpu_feat_init();
0500 
0501     /* Register floating interrupt controller interface. */
0502     rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
0503     if (rc) {
0504         pr_err("A FLIC registration call failed with rc=%d\n", rc);
0505         goto out;
0506     }
0507 
0508     if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
0509         rc = kvm_s390_pci_init();
0510         if (rc) {
0511             pr_err("Unable to allocate AIFT for PCI\n");
0512             goto out;
0513         }
0514     }
0515 
0516     rc = kvm_s390_gib_init(GAL_ISC);
0517     if (rc)
0518         goto out;
0519 
0520     return 0;
0521 
0522 out:
0523     kvm_arch_exit();
0524     return rc;
0525 }
0526 
0527 void kvm_arch_exit(void)
0528 {
0529     kvm_s390_gib_destroy();
0530     if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
0531         kvm_s390_pci_exit();
0532     debug_unregister(kvm_s390_dbf);
0533     debug_unregister(kvm_s390_dbf_uv);
0534 }
0535 
0536 /* Section: device related */
0537 long kvm_arch_dev_ioctl(struct file *filp,
0538             unsigned int ioctl, unsigned long arg)
0539 {
0540     if (ioctl == KVM_S390_ENABLE_SIE)
0541         return s390_enable_sie();
0542     return -EINVAL;
0543 }
0544 
0545 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
0546 {
0547     int r;
0548 
0549     switch (ext) {
0550     case KVM_CAP_S390_PSW:
0551     case KVM_CAP_S390_GMAP:
0552     case KVM_CAP_SYNC_MMU:
0553 #ifdef CONFIG_KVM_S390_UCONTROL
0554     case KVM_CAP_S390_UCONTROL:
0555 #endif
0556     case KVM_CAP_ASYNC_PF:
0557     case KVM_CAP_SYNC_REGS:
0558     case KVM_CAP_ONE_REG:
0559     case KVM_CAP_ENABLE_CAP:
0560     case KVM_CAP_S390_CSS_SUPPORT:
0561     case KVM_CAP_IOEVENTFD:
0562     case KVM_CAP_DEVICE_CTRL:
0563     case KVM_CAP_S390_IRQCHIP:
0564     case KVM_CAP_VM_ATTRIBUTES:
0565     case KVM_CAP_MP_STATE:
0566     case KVM_CAP_IMMEDIATE_EXIT:
0567     case KVM_CAP_S390_INJECT_IRQ:
0568     case KVM_CAP_S390_USER_SIGP:
0569     case KVM_CAP_S390_USER_STSI:
0570     case KVM_CAP_S390_SKEYS:
0571     case KVM_CAP_S390_IRQ_STATE:
0572     case KVM_CAP_S390_USER_INSTR0:
0573     case KVM_CAP_S390_CMMA_MIGRATION:
0574     case KVM_CAP_S390_AIS:
0575     case KVM_CAP_S390_AIS_MIGRATION:
0576     case KVM_CAP_S390_VCPU_RESETS:
0577     case KVM_CAP_SET_GUEST_DEBUG:
0578     case KVM_CAP_S390_DIAG318:
0579     case KVM_CAP_S390_MEM_OP_EXTENSION:
0580         r = 1;
0581         break;
0582     case KVM_CAP_SET_GUEST_DEBUG2:
0583         r = KVM_GUESTDBG_VALID_MASK;
0584         break;
0585     case KVM_CAP_S390_HPAGE_1M:
0586         r = 0;
0587         if (hpage && !kvm_is_ucontrol(kvm))
0588             r = 1;
0589         break;
0590     case KVM_CAP_S390_MEM_OP:
0591         r = MEM_OP_MAX_SIZE;
0592         break;
0593     case KVM_CAP_NR_VCPUS:
0594     case KVM_CAP_MAX_VCPUS:
0595     case KVM_CAP_MAX_VCPU_ID:
0596         r = KVM_S390_BSCA_CPU_SLOTS;
0597         if (!kvm_s390_use_sca_entries())
0598             r = KVM_MAX_VCPUS;
0599         else if (sclp.has_esca && sclp.has_64bscao)
0600             r = KVM_S390_ESCA_CPU_SLOTS;
0601         if (ext == KVM_CAP_NR_VCPUS)
0602             r = min_t(unsigned int, num_online_cpus(), r);
0603         break;
0604     case KVM_CAP_S390_COW:
0605         r = MACHINE_HAS_ESOP;
0606         break;
0607     case KVM_CAP_S390_VECTOR_REGISTERS:
0608         r = MACHINE_HAS_VX;
0609         break;
0610     case KVM_CAP_S390_RI:
0611         r = test_facility(64);
0612         break;
0613     case KVM_CAP_S390_GS:
0614         r = test_facility(133);
0615         break;
0616     case KVM_CAP_S390_BPB:
0617         r = test_facility(82);
0618         break;
0619     case KVM_CAP_S390_PROTECTED:
0620         r = is_prot_virt_host();
0621         break;
0622     case KVM_CAP_S390_PROTECTED_DUMP: {
0623         u64 pv_cmds_dump[] = {
0624             BIT_UVC_CMD_DUMP_INIT,
0625             BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
0626             BIT_UVC_CMD_DUMP_CPU,
0627             BIT_UVC_CMD_DUMP_COMPLETE,
0628         };
0629         int i;
0630 
0631         r = is_prot_virt_host();
0632 
0633         for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) {
0634             if (!test_bit_inv(pv_cmds_dump[i],
0635                       (unsigned long *)&uv_info.inst_calls_list)) {
0636                 r = 0;
0637                 break;
0638             }
0639         }
0640         break;
0641     }
0642     case KVM_CAP_S390_ZPCI_OP:
0643         r = kvm_s390_pci_interp_allowed();
0644         break;
0645     case KVM_CAP_S390_CPU_TOPOLOGY:
0646         r = test_facility(11);
0647         break;
0648     default:
0649         r = 0;
0650     }
0651     return r;
0652 }
0653 
0654 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
0655 {
0656     int i;
0657     gfn_t cur_gfn, last_gfn;
0658     unsigned long gaddr, vmaddr;
0659     struct gmap *gmap = kvm->arch.gmap;
0660     DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
0661 
0662     /* Loop over all guest segments */
0663     cur_gfn = memslot->base_gfn;
0664     last_gfn = memslot->base_gfn + memslot->npages;
0665     for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
0666         gaddr = gfn_to_gpa(cur_gfn);
0667         vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
0668         if (kvm_is_error_hva(vmaddr))
0669             continue;
0670 
0671         bitmap_zero(bitmap, _PAGE_ENTRIES);
0672         gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
0673         for (i = 0; i < _PAGE_ENTRIES; i++) {
0674             if (test_bit(i, bitmap))
0675                 mark_page_dirty(kvm, cur_gfn + i);
0676         }
0677 
0678         if (fatal_signal_pending(current))
0679             return;
0680         cond_resched();
0681     }
0682 }
0683 
0684 /* Section: vm related */
0685 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
0686 
0687 /*
0688  * Get (and clear) the dirty memory log for a memory slot.
0689  */
0690 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
0691                    struct kvm_dirty_log *log)
0692 {
0693     int r;
0694     unsigned long n;
0695     struct kvm_memory_slot *memslot;
0696     int is_dirty;
0697 
0698     if (kvm_is_ucontrol(kvm))
0699         return -EINVAL;
0700 
0701     mutex_lock(&kvm->slots_lock);
0702 
0703     r = -EINVAL;
0704     if (log->slot >= KVM_USER_MEM_SLOTS)
0705         goto out;
0706 
0707     r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
0708     if (r)
0709         goto out;
0710 
0711     /* Clear the dirty log */
0712     if (is_dirty) {
0713         n = kvm_dirty_bitmap_bytes(memslot);
0714         memset(memslot->dirty_bitmap, 0, n);
0715     }
0716     r = 0;
0717 out:
0718     mutex_unlock(&kvm->slots_lock);
0719     return r;
0720 }
0721 
0722 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
0723 {
0724     unsigned long i;
0725     struct kvm_vcpu *vcpu;
0726 
0727     kvm_for_each_vcpu(i, vcpu, kvm) {
0728         kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
0729     }
0730 }
0731 
0732 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
0733 {
0734     int r;
0735 
0736     if (cap->flags)
0737         return -EINVAL;
0738 
0739     switch (cap->cap) {
0740     case KVM_CAP_S390_IRQCHIP:
0741         VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
0742         kvm->arch.use_irqchip = 1;
0743         r = 0;
0744         break;
0745     case KVM_CAP_S390_USER_SIGP:
0746         VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
0747         kvm->arch.user_sigp = 1;
0748         r = 0;
0749         break;
0750     case KVM_CAP_S390_VECTOR_REGISTERS:
0751         mutex_lock(&kvm->lock);
0752         if (kvm->created_vcpus) {
0753             r = -EBUSY;
0754         } else if (MACHINE_HAS_VX) {
0755             set_kvm_facility(kvm->arch.model.fac_mask, 129);
0756             set_kvm_facility(kvm->arch.model.fac_list, 129);
0757             if (test_facility(134)) {
0758                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
0759                 set_kvm_facility(kvm->arch.model.fac_list, 134);
0760             }
0761             if (test_facility(135)) {
0762                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
0763                 set_kvm_facility(kvm->arch.model.fac_list, 135);
0764             }
0765             if (test_facility(148)) {
0766                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
0767                 set_kvm_facility(kvm->arch.model.fac_list, 148);
0768             }
0769             if (test_facility(152)) {
0770                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
0771                 set_kvm_facility(kvm->arch.model.fac_list, 152);
0772             }
0773             if (test_facility(192)) {
0774                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
0775                 set_kvm_facility(kvm->arch.model.fac_list, 192);
0776             }
0777             r = 0;
0778         } else
0779             r = -EINVAL;
0780         mutex_unlock(&kvm->lock);
0781         VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
0782              r ? "(not available)" : "(success)");
0783         break;
0784     case KVM_CAP_S390_RI:
0785         r = -EINVAL;
0786         mutex_lock(&kvm->lock);
0787         if (kvm->created_vcpus) {
0788             r = -EBUSY;
0789         } else if (test_facility(64)) {
0790             set_kvm_facility(kvm->arch.model.fac_mask, 64);
0791             set_kvm_facility(kvm->arch.model.fac_list, 64);
0792             r = 0;
0793         }
0794         mutex_unlock(&kvm->lock);
0795         VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
0796              r ? "(not available)" : "(success)");
0797         break;
0798     case KVM_CAP_S390_AIS:
0799         mutex_lock(&kvm->lock);
0800         if (kvm->created_vcpus) {
0801             r = -EBUSY;
0802         } else {
0803             set_kvm_facility(kvm->arch.model.fac_mask, 72);
0804             set_kvm_facility(kvm->arch.model.fac_list, 72);
0805             r = 0;
0806         }
0807         mutex_unlock(&kvm->lock);
0808         VM_EVENT(kvm, 3, "ENABLE: AIS %s",
0809              r ? "(not available)" : "(success)");
0810         break;
0811     case KVM_CAP_S390_GS:
0812         r = -EINVAL;
0813         mutex_lock(&kvm->lock);
0814         if (kvm->created_vcpus) {
0815             r = -EBUSY;
0816         } else if (test_facility(133)) {
0817             set_kvm_facility(kvm->arch.model.fac_mask, 133);
0818             set_kvm_facility(kvm->arch.model.fac_list, 133);
0819             r = 0;
0820         }
0821         mutex_unlock(&kvm->lock);
0822         VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
0823              r ? "(not available)" : "(success)");
0824         break;
0825     case KVM_CAP_S390_HPAGE_1M:
0826         mutex_lock(&kvm->lock);
0827         if (kvm->created_vcpus)
0828             r = -EBUSY;
0829         else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
0830             r = -EINVAL;
0831         else {
0832             r = 0;
0833             mmap_write_lock(kvm->mm);
0834             kvm->mm->context.allow_gmap_hpage_1m = 1;
0835             mmap_write_unlock(kvm->mm);
0836             /*
0837              * We might have to create fake 4k page
0838              * tables. To avoid that the hardware works on
0839              * stale PGSTEs, we emulate these instructions.
0840              */
0841             kvm->arch.use_skf = 0;
0842             kvm->arch.use_pfmfi = 0;
0843         }
0844         mutex_unlock(&kvm->lock);
0845         VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
0846              r ? "(not available)" : "(success)");
0847         break;
0848     case KVM_CAP_S390_USER_STSI:
0849         VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
0850         kvm->arch.user_stsi = 1;
0851         r = 0;
0852         break;
0853     case KVM_CAP_S390_USER_INSTR0:
0854         VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
0855         kvm->arch.user_instr0 = 1;
0856         icpt_operexc_on_all_vcpus(kvm);
0857         r = 0;
0858         break;
0859     case KVM_CAP_S390_CPU_TOPOLOGY:
0860         r = -EINVAL;
0861         mutex_lock(&kvm->lock);
0862         if (kvm->created_vcpus) {
0863             r = -EBUSY;
0864         } else if (test_facility(11)) {
0865             set_kvm_facility(kvm->arch.model.fac_mask, 11);
0866             set_kvm_facility(kvm->arch.model.fac_list, 11);
0867             r = 0;
0868         }
0869         mutex_unlock(&kvm->lock);
0870         VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
0871              r ? "(not available)" : "(success)");
0872         break;
0873     default:
0874         r = -EINVAL;
0875         break;
0876     }
0877     return r;
0878 }
0879 
0880 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
0881 {
0882     int ret;
0883 
0884     switch (attr->attr) {
0885     case KVM_S390_VM_MEM_LIMIT_SIZE:
0886         ret = 0;
0887         VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
0888              kvm->arch.mem_limit);
0889         if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
0890             ret = -EFAULT;
0891         break;
0892     default:
0893         ret = -ENXIO;
0894         break;
0895     }
0896     return ret;
0897 }
0898 
0899 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
0900 {
0901     int ret;
0902     unsigned int idx;
0903     switch (attr->attr) {
0904     case KVM_S390_VM_MEM_ENABLE_CMMA:
0905         ret = -ENXIO;
0906         if (!sclp.has_cmma)
0907             break;
0908 
0909         VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
0910         mutex_lock(&kvm->lock);
0911         if (kvm->created_vcpus)
0912             ret = -EBUSY;
0913         else if (kvm->mm->context.allow_gmap_hpage_1m)
0914             ret = -EINVAL;
0915         else {
0916             kvm->arch.use_cmma = 1;
0917             /* Not compatible with cmma. */
0918             kvm->arch.use_pfmfi = 0;
0919             ret = 0;
0920         }
0921         mutex_unlock(&kvm->lock);
0922         break;
0923     case KVM_S390_VM_MEM_CLR_CMMA:
0924         ret = -ENXIO;
0925         if (!sclp.has_cmma)
0926             break;
0927         ret = -EINVAL;
0928         if (!kvm->arch.use_cmma)
0929             break;
0930 
0931         VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
0932         mutex_lock(&kvm->lock);
0933         idx = srcu_read_lock(&kvm->srcu);
0934         s390_reset_cmma(kvm->arch.gmap->mm);
0935         srcu_read_unlock(&kvm->srcu, idx);
0936         mutex_unlock(&kvm->lock);
0937         ret = 0;
0938         break;
0939     case KVM_S390_VM_MEM_LIMIT_SIZE: {
0940         unsigned long new_limit;
0941 
0942         if (kvm_is_ucontrol(kvm))
0943             return -EINVAL;
0944 
0945         if (get_user(new_limit, (u64 __user *)attr->addr))
0946             return -EFAULT;
0947 
0948         if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
0949             new_limit > kvm->arch.mem_limit)
0950             return -E2BIG;
0951 
0952         if (!new_limit)
0953             return -EINVAL;
0954 
0955         /* gmap_create takes last usable address */
0956         if (new_limit != KVM_S390_NO_MEM_LIMIT)
0957             new_limit -= 1;
0958 
0959         ret = -EBUSY;
0960         mutex_lock(&kvm->lock);
0961         if (!kvm->created_vcpus) {
0962             /* gmap_create will round the limit up */
0963             struct gmap *new = gmap_create(current->mm, new_limit);
0964 
0965             if (!new) {
0966                 ret = -ENOMEM;
0967             } else {
0968                 gmap_remove(kvm->arch.gmap);
0969                 new->private = kvm;
0970                 kvm->arch.gmap = new;
0971                 ret = 0;
0972             }
0973         }
0974         mutex_unlock(&kvm->lock);
0975         VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
0976         VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
0977              (void *) kvm->arch.gmap->asce);
0978         break;
0979     }
0980     default:
0981         ret = -ENXIO;
0982         break;
0983     }
0984     return ret;
0985 }
0986 
0987 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
0988 
0989 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
0990 {
0991     struct kvm_vcpu *vcpu;
0992     unsigned long i;
0993 
0994     kvm_s390_vcpu_block_all(kvm);
0995 
0996     kvm_for_each_vcpu(i, vcpu, kvm) {
0997         kvm_s390_vcpu_crypto_setup(vcpu);
0998         /* recreate the shadow crycb by leaving the VSIE handler */
0999         kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1000     }
1001 
1002     kvm_s390_vcpu_unblock_all(kvm);
1003 }
1004 
1005 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007     mutex_lock(&kvm->lock);
1008     switch (attr->attr) {
1009     case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1010         if (!test_kvm_facility(kvm, 76)) {
1011             mutex_unlock(&kvm->lock);
1012             return -EINVAL;
1013         }
1014         get_random_bytes(
1015             kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1016             sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1017         kvm->arch.crypto.aes_kw = 1;
1018         VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
1019         break;
1020     case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1021         if (!test_kvm_facility(kvm, 76)) {
1022             mutex_unlock(&kvm->lock);
1023             return -EINVAL;
1024         }
1025         get_random_bytes(
1026             kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1027             sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1028         kvm->arch.crypto.dea_kw = 1;
1029         VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
1030         break;
1031     case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1032         if (!test_kvm_facility(kvm, 76)) {
1033             mutex_unlock(&kvm->lock);
1034             return -EINVAL;
1035         }
1036         kvm->arch.crypto.aes_kw = 0;
1037         memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
1038             sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1039         VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
1040         break;
1041     case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1042         if (!test_kvm_facility(kvm, 76)) {
1043             mutex_unlock(&kvm->lock);
1044             return -EINVAL;
1045         }
1046         kvm->arch.crypto.dea_kw = 0;
1047         memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
1048             sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1049         VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1050         break;
1051     case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1052         if (!ap_instructions_available()) {
1053             mutex_unlock(&kvm->lock);
1054             return -EOPNOTSUPP;
1055         }
1056         kvm->arch.crypto.apie = 1;
1057         break;
1058     case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1059         if (!ap_instructions_available()) {
1060             mutex_unlock(&kvm->lock);
1061             return -EOPNOTSUPP;
1062         }
1063         kvm->arch.crypto.apie = 0;
1064         break;
1065     default:
1066         mutex_unlock(&kvm->lock);
1067         return -ENXIO;
1068     }
1069 
1070     kvm_s390_vcpu_crypto_reset_all(kvm);
1071     mutex_unlock(&kvm->lock);
1072     return 0;
1073 }
1074 
1075 static void kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
1076 {
1077     /* Only set the ECB bits after guest requests zPCI interpretation */
1078     if (!vcpu->kvm->arch.use_zpci_interp)
1079         return;
1080 
1081     vcpu->arch.sie_block->ecb2 |= ECB2_ZPCI_LSI;
1082     vcpu->arch.sie_block->ecb3 |= ECB3_AISII + ECB3_AISI;
1083 }
1084 
1085 void kvm_s390_vcpu_pci_enable_interp(struct kvm *kvm)
1086 {
1087     struct kvm_vcpu *vcpu;
1088     unsigned long i;
1089 
1090     lockdep_assert_held(&kvm->lock);
1091 
1092     if (!kvm_s390_pci_interp_allowed())
1093         return;
1094 
1095     /*
1096      * If host is configured for PCI and the necessary facilities are
1097      * available, turn on interpretation for the life of this guest
1098      */
1099     kvm->arch.use_zpci_interp = 1;
1100 
1101     kvm_s390_vcpu_block_all(kvm);
1102 
1103     kvm_for_each_vcpu(i, vcpu, kvm) {
1104         kvm_s390_vcpu_pci_setup(vcpu);
1105         kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
1106     }
1107 
1108     kvm_s390_vcpu_unblock_all(kvm);
1109 }
1110 
1111 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1112 {
1113     unsigned long cx;
1114     struct kvm_vcpu *vcpu;
1115 
1116     kvm_for_each_vcpu(cx, vcpu, kvm)
1117         kvm_s390_sync_request(req, vcpu);
1118 }
1119 
1120 /*
1121  * Must be called with kvm->srcu held to avoid races on memslots, and with
1122  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1123  */
1124 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1125 {
1126     struct kvm_memory_slot *ms;
1127     struct kvm_memslots *slots;
1128     unsigned long ram_pages = 0;
1129     int bkt;
1130 
1131     /* migration mode already enabled */
1132     if (kvm->arch.migration_mode)
1133         return 0;
1134     slots = kvm_memslots(kvm);
1135     if (!slots || kvm_memslots_empty(slots))
1136         return -EINVAL;
1137 
1138     if (!kvm->arch.use_cmma) {
1139         kvm->arch.migration_mode = 1;
1140         return 0;
1141     }
1142     /* mark all the pages in active slots as dirty */
1143     kvm_for_each_memslot(ms, bkt, slots) {
1144         if (!ms->dirty_bitmap)
1145             return -EINVAL;
1146         /*
1147          * The second half of the bitmap is only used on x86,
1148          * and would be wasted otherwise, so we put it to good
1149          * use here to keep track of the state of the storage
1150          * attributes.
1151          */
1152         memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1153         ram_pages += ms->npages;
1154     }
1155     atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1156     kvm->arch.migration_mode = 1;
1157     kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1158     return 0;
1159 }
1160 
1161 /*
1162  * Must be called with kvm->slots_lock to avoid races with ourselves and
1163  * kvm_s390_vm_start_migration.
1164  */
1165 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1166 {
1167     /* migration mode already disabled */
1168     if (!kvm->arch.migration_mode)
1169         return 0;
1170     kvm->arch.migration_mode = 0;
1171     if (kvm->arch.use_cmma)
1172         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1173     return 0;
1174 }
1175 
1176 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1177                      struct kvm_device_attr *attr)
1178 {
1179     int res = -ENXIO;
1180 
1181     mutex_lock(&kvm->slots_lock);
1182     switch (attr->attr) {
1183     case KVM_S390_VM_MIGRATION_START:
1184         res = kvm_s390_vm_start_migration(kvm);
1185         break;
1186     case KVM_S390_VM_MIGRATION_STOP:
1187         res = kvm_s390_vm_stop_migration(kvm);
1188         break;
1189     default:
1190         break;
1191     }
1192     mutex_unlock(&kvm->slots_lock);
1193 
1194     return res;
1195 }
1196 
1197 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1198                      struct kvm_device_attr *attr)
1199 {
1200     u64 mig = kvm->arch.migration_mode;
1201 
1202     if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1203         return -ENXIO;
1204 
1205     if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1206         return -EFAULT;
1207     return 0;
1208 }
1209 
1210 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212     struct kvm_s390_vm_tod_clock gtod;
1213 
1214     if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1215         return -EFAULT;
1216 
1217     if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1218         return -EINVAL;
1219     kvm_s390_set_tod_clock(kvm, &gtod);
1220 
1221     VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1222         gtod.epoch_idx, gtod.tod);
1223 
1224     return 0;
1225 }
1226 
1227 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228 {
1229     u8 gtod_high;
1230 
1231     if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1232                        sizeof(gtod_high)))
1233         return -EFAULT;
1234 
1235     if (gtod_high != 0)
1236         return -EINVAL;
1237     VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1238 
1239     return 0;
1240 }
1241 
1242 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244     struct kvm_s390_vm_tod_clock gtod = { 0 };
1245 
1246     if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1247                sizeof(gtod.tod)))
1248         return -EFAULT;
1249 
1250     kvm_s390_set_tod_clock(kvm, &gtod);
1251     VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1252     return 0;
1253 }
1254 
1255 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1256 {
1257     int ret;
1258 
1259     if (attr->flags)
1260         return -EINVAL;
1261 
1262     switch (attr->attr) {
1263     case KVM_S390_VM_TOD_EXT:
1264         ret = kvm_s390_set_tod_ext(kvm, attr);
1265         break;
1266     case KVM_S390_VM_TOD_HIGH:
1267         ret = kvm_s390_set_tod_high(kvm, attr);
1268         break;
1269     case KVM_S390_VM_TOD_LOW:
1270         ret = kvm_s390_set_tod_low(kvm, attr);
1271         break;
1272     default:
1273         ret = -ENXIO;
1274         break;
1275     }
1276     return ret;
1277 }
1278 
1279 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1280                    struct kvm_s390_vm_tod_clock *gtod)
1281 {
1282     union tod_clock clk;
1283 
1284     preempt_disable();
1285 
1286     store_tod_clock_ext(&clk);
1287 
1288     gtod->tod = clk.tod + kvm->arch.epoch;
1289     gtod->epoch_idx = 0;
1290     if (test_kvm_facility(kvm, 139)) {
1291         gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1292         if (gtod->tod < clk.tod)
1293             gtod->epoch_idx += 1;
1294     }
1295 
1296     preempt_enable();
1297 }
1298 
1299 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1300 {
1301     struct kvm_s390_vm_tod_clock gtod;
1302 
1303     memset(&gtod, 0, sizeof(gtod));
1304     kvm_s390_get_tod_clock(kvm, &gtod);
1305     if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1306         return -EFAULT;
1307 
1308     VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1309         gtod.epoch_idx, gtod.tod);
1310     return 0;
1311 }
1312 
1313 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1314 {
1315     u8 gtod_high = 0;
1316 
1317     if (copy_to_user((void __user *)attr->addr, &gtod_high,
1318                      sizeof(gtod_high)))
1319         return -EFAULT;
1320     VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1321 
1322     return 0;
1323 }
1324 
1325 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1326 {
1327     u64 gtod;
1328 
1329     gtod = kvm_s390_get_tod_clock_fast(kvm);
1330     if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1331         return -EFAULT;
1332     VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1333 
1334     return 0;
1335 }
1336 
1337 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1338 {
1339     int ret;
1340 
1341     if (attr->flags)
1342         return -EINVAL;
1343 
1344     switch (attr->attr) {
1345     case KVM_S390_VM_TOD_EXT:
1346         ret = kvm_s390_get_tod_ext(kvm, attr);
1347         break;
1348     case KVM_S390_VM_TOD_HIGH:
1349         ret = kvm_s390_get_tod_high(kvm, attr);
1350         break;
1351     case KVM_S390_VM_TOD_LOW:
1352         ret = kvm_s390_get_tod_low(kvm, attr);
1353         break;
1354     default:
1355         ret = -ENXIO;
1356         break;
1357     }
1358     return ret;
1359 }
1360 
1361 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1362 {
1363     struct kvm_s390_vm_cpu_processor *proc;
1364     u16 lowest_ibc, unblocked_ibc;
1365     int ret = 0;
1366 
1367     mutex_lock(&kvm->lock);
1368     if (kvm->created_vcpus) {
1369         ret = -EBUSY;
1370         goto out;
1371     }
1372     proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1373     if (!proc) {
1374         ret = -ENOMEM;
1375         goto out;
1376     }
1377     if (!copy_from_user(proc, (void __user *)attr->addr,
1378                 sizeof(*proc))) {
1379         kvm->arch.model.cpuid = proc->cpuid;
1380         lowest_ibc = sclp.ibc >> 16 & 0xfff;
1381         unblocked_ibc = sclp.ibc & 0xfff;
1382         if (lowest_ibc && proc->ibc) {
1383             if (proc->ibc > unblocked_ibc)
1384                 kvm->arch.model.ibc = unblocked_ibc;
1385             else if (proc->ibc < lowest_ibc)
1386                 kvm->arch.model.ibc = lowest_ibc;
1387             else
1388                 kvm->arch.model.ibc = proc->ibc;
1389         }
1390         memcpy(kvm->arch.model.fac_list, proc->fac_list,
1391                S390_ARCH_FAC_LIST_SIZE_BYTE);
1392         VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1393              kvm->arch.model.ibc,
1394              kvm->arch.model.cpuid);
1395         VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1396              kvm->arch.model.fac_list[0],
1397              kvm->arch.model.fac_list[1],
1398              kvm->arch.model.fac_list[2]);
1399     } else
1400         ret = -EFAULT;
1401     kfree(proc);
1402 out:
1403     mutex_unlock(&kvm->lock);
1404     return ret;
1405 }
1406 
1407 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1408                        struct kvm_device_attr *attr)
1409 {
1410     struct kvm_s390_vm_cpu_feat data;
1411 
1412     if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1413         return -EFAULT;
1414     if (!bitmap_subset((unsigned long *) data.feat,
1415                kvm_s390_available_cpu_feat,
1416                KVM_S390_VM_CPU_FEAT_NR_BITS))
1417         return -EINVAL;
1418 
1419     mutex_lock(&kvm->lock);
1420     if (kvm->created_vcpus) {
1421         mutex_unlock(&kvm->lock);
1422         return -EBUSY;
1423     }
1424     bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1425     mutex_unlock(&kvm->lock);
1426     VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1427              data.feat[0],
1428              data.feat[1],
1429              data.feat[2]);
1430     return 0;
1431 }
1432 
1433 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1434                       struct kvm_device_attr *attr)
1435 {
1436     mutex_lock(&kvm->lock);
1437     if (kvm->created_vcpus) {
1438         mutex_unlock(&kvm->lock);
1439         return -EBUSY;
1440     }
1441 
1442     if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1443                sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1444         mutex_unlock(&kvm->lock);
1445         return -EFAULT;
1446     }
1447     mutex_unlock(&kvm->lock);
1448 
1449     VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1450          ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1451          ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1452          ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1453          ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1454     VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1455          ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1456          ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1457     VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1458          ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1459          ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1460     VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1461          ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1462          ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1463     VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1464          ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1465          ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1466     VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1467          ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1468          ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1469     VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1470          ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1471          ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1472     VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1473          ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1474          ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1475     VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1476          ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1477          ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1478     VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1479          ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1480          ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1481     VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1482          ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1483          ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1484     VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1485          ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1486          ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1487     VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1488          ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1489          ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1490     VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1491          ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1492          ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1493     VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1494          ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1495          ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1496     VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1497          ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1498          ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1499          ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1500          ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1501     VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1502          ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1503          ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1504          ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1505          ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1506 
1507     return 0;
1508 }
1509 
1510 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1511 {
1512     int ret = -ENXIO;
1513 
1514     switch (attr->attr) {
1515     case KVM_S390_VM_CPU_PROCESSOR:
1516         ret = kvm_s390_set_processor(kvm, attr);
1517         break;
1518     case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1519         ret = kvm_s390_set_processor_feat(kvm, attr);
1520         break;
1521     case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1522         ret = kvm_s390_set_processor_subfunc(kvm, attr);
1523         break;
1524     }
1525     return ret;
1526 }
1527 
1528 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1529 {
1530     struct kvm_s390_vm_cpu_processor *proc;
1531     int ret = 0;
1532 
1533     proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1534     if (!proc) {
1535         ret = -ENOMEM;
1536         goto out;
1537     }
1538     proc->cpuid = kvm->arch.model.cpuid;
1539     proc->ibc = kvm->arch.model.ibc;
1540     memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1541            S390_ARCH_FAC_LIST_SIZE_BYTE);
1542     VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1543          kvm->arch.model.ibc,
1544          kvm->arch.model.cpuid);
1545     VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1546          kvm->arch.model.fac_list[0],
1547          kvm->arch.model.fac_list[1],
1548          kvm->arch.model.fac_list[2]);
1549     if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1550         ret = -EFAULT;
1551     kfree(proc);
1552 out:
1553     return ret;
1554 }
1555 
1556 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1557 {
1558     struct kvm_s390_vm_cpu_machine *mach;
1559     int ret = 0;
1560 
1561     mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1562     if (!mach) {
1563         ret = -ENOMEM;
1564         goto out;
1565     }
1566     get_cpu_id((struct cpuid *) &mach->cpuid);
1567     mach->ibc = sclp.ibc;
1568     memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1569            S390_ARCH_FAC_LIST_SIZE_BYTE);
1570     memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1571            sizeof(stfle_fac_list));
1572     VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1573          kvm->arch.model.ibc,
1574          kvm->arch.model.cpuid);
1575     VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1576          mach->fac_mask[0],
1577          mach->fac_mask[1],
1578          mach->fac_mask[2]);
1579     VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1580          mach->fac_list[0],
1581          mach->fac_list[1],
1582          mach->fac_list[2]);
1583     if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1584         ret = -EFAULT;
1585     kfree(mach);
1586 out:
1587     return ret;
1588 }
1589 
1590 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1591                        struct kvm_device_attr *attr)
1592 {
1593     struct kvm_s390_vm_cpu_feat data;
1594 
1595     bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1596     if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1597         return -EFAULT;
1598     VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1599              data.feat[0],
1600              data.feat[1],
1601              data.feat[2]);
1602     return 0;
1603 }
1604 
1605 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1606                      struct kvm_device_attr *attr)
1607 {
1608     struct kvm_s390_vm_cpu_feat data;
1609 
1610     bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1611     if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1612         return -EFAULT;
1613     VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1614              data.feat[0],
1615              data.feat[1],
1616              data.feat[2]);
1617     return 0;
1618 }
1619 
1620 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1621                       struct kvm_device_attr *attr)
1622 {
1623     if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1624         sizeof(struct kvm_s390_vm_cpu_subfunc)))
1625         return -EFAULT;
1626 
1627     VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1628          ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1629          ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1630          ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1631          ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1632     VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1633          ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1634          ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1635     VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1636          ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1637          ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1638     VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1639          ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1640          ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1641     VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1642          ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1643          ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1644     VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1645          ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1646          ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1647     VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1648          ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1649          ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1650     VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1651          ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1652          ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1653     VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1654          ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1655          ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1656     VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1657          ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1658          ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1659     VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1660          ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1661          ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1662     VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1663          ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1664          ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1665     VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1666          ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1667          ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1668     VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1669          ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1670          ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1671     VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1672          ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1673          ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1674     VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1675          ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1676          ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1677          ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1678          ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1679     VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1680          ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1681          ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1682          ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1683          ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1684 
1685     return 0;
1686 }
1687 
1688 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1689                     struct kvm_device_attr *attr)
1690 {
1691     if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1692         sizeof(struct kvm_s390_vm_cpu_subfunc)))
1693         return -EFAULT;
1694 
1695     VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1696          ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1697          ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1698          ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1699          ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1700     VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1701          ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1702          ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1703     VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1704          ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1705          ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1706     VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1707          ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1708          ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1709     VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1710          ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1711          ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1712     VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1713          ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1714          ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1715     VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1716          ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1717          ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1718     VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1719          ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1720          ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1721     VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1722          ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1723          ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1724     VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1725          ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1726          ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1727     VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1728          ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1729          ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1730     VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1731          ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1732          ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1733     VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1734          ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1735          ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1736     VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1737          ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1738          ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1739     VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1740          ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1741          ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1742     VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1743          ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1744          ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1745          ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1746          ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1747     VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1748          ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1749          ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1750          ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1751          ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1752 
1753     return 0;
1754 }
1755 
1756 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1757 {
1758     int ret = -ENXIO;
1759 
1760     switch (attr->attr) {
1761     case KVM_S390_VM_CPU_PROCESSOR:
1762         ret = kvm_s390_get_processor(kvm, attr);
1763         break;
1764     case KVM_S390_VM_CPU_MACHINE:
1765         ret = kvm_s390_get_machine(kvm, attr);
1766         break;
1767     case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1768         ret = kvm_s390_get_processor_feat(kvm, attr);
1769         break;
1770     case KVM_S390_VM_CPU_MACHINE_FEAT:
1771         ret = kvm_s390_get_machine_feat(kvm, attr);
1772         break;
1773     case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1774         ret = kvm_s390_get_processor_subfunc(kvm, attr);
1775         break;
1776     case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1777         ret = kvm_s390_get_machine_subfunc(kvm, attr);
1778         break;
1779     }
1780     return ret;
1781 }
1782 
1783 /**
1784  * kvm_s390_update_topology_change_report - update CPU topology change report
1785  * @kvm: guest KVM description
1786  * @val: set or clear the MTCR bit
1787  *
1788  * Updates the Multiprocessor Topology-Change-Report bit to signal
1789  * the guest with a topology change.
1790  * This is only relevant if the topology facility is present.
1791  *
1792  * The SCA version, bsca or esca, doesn't matter as offset is the same.
1793  */
1794 static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
1795 {
1796     union sca_utility new, old;
1797     struct bsca_block *sca;
1798 
1799     read_lock(&kvm->arch.sca_lock);
1800     sca = kvm->arch.sca;
1801     do {
1802         old = READ_ONCE(sca->utility);
1803         new = old;
1804         new.mtcr = val;
1805     } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
1806     read_unlock(&kvm->arch.sca_lock);
1807 }
1808 
1809 static int kvm_s390_set_topo_change_indication(struct kvm *kvm,
1810                            struct kvm_device_attr *attr)
1811 {
1812     if (!test_kvm_facility(kvm, 11))
1813         return -ENXIO;
1814 
1815     kvm_s390_update_topology_change_report(kvm, !!attr->attr);
1816     return 0;
1817 }
1818 
1819 static int kvm_s390_get_topo_change_indication(struct kvm *kvm,
1820                            struct kvm_device_attr *attr)
1821 {
1822     u8 topo;
1823 
1824     if (!test_kvm_facility(kvm, 11))
1825         return -ENXIO;
1826 
1827     read_lock(&kvm->arch.sca_lock);
1828     topo = ((struct bsca_block *)kvm->arch.sca)->utility.mtcr;
1829     read_unlock(&kvm->arch.sca_lock);
1830 
1831     return put_user(topo, (u8 __user *)attr->addr);
1832 }
1833 
1834 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1835 {
1836     int ret;
1837 
1838     switch (attr->group) {
1839     case KVM_S390_VM_MEM_CTRL:
1840         ret = kvm_s390_set_mem_control(kvm, attr);
1841         break;
1842     case KVM_S390_VM_TOD:
1843         ret = kvm_s390_set_tod(kvm, attr);
1844         break;
1845     case KVM_S390_VM_CPU_MODEL:
1846         ret = kvm_s390_set_cpu_model(kvm, attr);
1847         break;
1848     case KVM_S390_VM_CRYPTO:
1849         ret = kvm_s390_vm_set_crypto(kvm, attr);
1850         break;
1851     case KVM_S390_VM_MIGRATION:
1852         ret = kvm_s390_vm_set_migration(kvm, attr);
1853         break;
1854     case KVM_S390_VM_CPU_TOPOLOGY:
1855         ret = kvm_s390_set_topo_change_indication(kvm, attr);
1856         break;
1857     default:
1858         ret = -ENXIO;
1859         break;
1860     }
1861 
1862     return ret;
1863 }
1864 
1865 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1866 {
1867     int ret;
1868 
1869     switch (attr->group) {
1870     case KVM_S390_VM_MEM_CTRL:
1871         ret = kvm_s390_get_mem_control(kvm, attr);
1872         break;
1873     case KVM_S390_VM_TOD:
1874         ret = kvm_s390_get_tod(kvm, attr);
1875         break;
1876     case KVM_S390_VM_CPU_MODEL:
1877         ret = kvm_s390_get_cpu_model(kvm, attr);
1878         break;
1879     case KVM_S390_VM_MIGRATION:
1880         ret = kvm_s390_vm_get_migration(kvm, attr);
1881         break;
1882     case KVM_S390_VM_CPU_TOPOLOGY:
1883         ret = kvm_s390_get_topo_change_indication(kvm, attr);
1884         break;
1885     default:
1886         ret = -ENXIO;
1887         break;
1888     }
1889 
1890     return ret;
1891 }
1892 
1893 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1894 {
1895     int ret;
1896 
1897     switch (attr->group) {
1898     case KVM_S390_VM_MEM_CTRL:
1899         switch (attr->attr) {
1900         case KVM_S390_VM_MEM_ENABLE_CMMA:
1901         case KVM_S390_VM_MEM_CLR_CMMA:
1902             ret = sclp.has_cmma ? 0 : -ENXIO;
1903             break;
1904         case KVM_S390_VM_MEM_LIMIT_SIZE:
1905             ret = 0;
1906             break;
1907         default:
1908             ret = -ENXIO;
1909             break;
1910         }
1911         break;
1912     case KVM_S390_VM_TOD:
1913         switch (attr->attr) {
1914         case KVM_S390_VM_TOD_LOW:
1915         case KVM_S390_VM_TOD_HIGH:
1916             ret = 0;
1917             break;
1918         default:
1919             ret = -ENXIO;
1920             break;
1921         }
1922         break;
1923     case KVM_S390_VM_CPU_MODEL:
1924         switch (attr->attr) {
1925         case KVM_S390_VM_CPU_PROCESSOR:
1926         case KVM_S390_VM_CPU_MACHINE:
1927         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1928         case KVM_S390_VM_CPU_MACHINE_FEAT:
1929         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1930         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1931             ret = 0;
1932             break;
1933         default:
1934             ret = -ENXIO;
1935             break;
1936         }
1937         break;
1938     case KVM_S390_VM_CRYPTO:
1939         switch (attr->attr) {
1940         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1941         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1942         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1943         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1944             ret = 0;
1945             break;
1946         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1947         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1948             ret = ap_instructions_available() ? 0 : -ENXIO;
1949             break;
1950         default:
1951             ret = -ENXIO;
1952             break;
1953         }
1954         break;
1955     case KVM_S390_VM_MIGRATION:
1956         ret = 0;
1957         break;
1958     case KVM_S390_VM_CPU_TOPOLOGY:
1959         ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
1960         break;
1961     default:
1962         ret = -ENXIO;
1963         break;
1964     }
1965 
1966     return ret;
1967 }
1968 
1969 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1970 {
1971     uint8_t *keys;
1972     uint64_t hva;
1973     int srcu_idx, i, r = 0;
1974 
1975     if (args->flags != 0)
1976         return -EINVAL;
1977 
1978     /* Is this guest using storage keys? */
1979     if (!mm_uses_skeys(current->mm))
1980         return KVM_S390_GET_SKEYS_NONE;
1981 
1982     /* Enforce sane limit on memory allocation */
1983     if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1984         return -EINVAL;
1985 
1986     keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1987     if (!keys)
1988         return -ENOMEM;
1989 
1990     mmap_read_lock(current->mm);
1991     srcu_idx = srcu_read_lock(&kvm->srcu);
1992     for (i = 0; i < args->count; i++) {
1993         hva = gfn_to_hva(kvm, args->start_gfn + i);
1994         if (kvm_is_error_hva(hva)) {
1995             r = -EFAULT;
1996             break;
1997         }
1998 
1999         r = get_guest_storage_key(current->mm, hva, &keys[i]);
2000         if (r)
2001             break;
2002     }
2003     srcu_read_unlock(&kvm->srcu, srcu_idx);
2004     mmap_read_unlock(current->mm);
2005 
2006     if (!r) {
2007         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
2008                  sizeof(uint8_t) * args->count);
2009         if (r)
2010             r = -EFAULT;
2011     }
2012 
2013     kvfree(keys);
2014     return r;
2015 }
2016 
2017 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
2018 {
2019     uint8_t *keys;
2020     uint64_t hva;
2021     int srcu_idx, i, r = 0;
2022     bool unlocked;
2023 
2024     if (args->flags != 0)
2025         return -EINVAL;
2026 
2027     /* Enforce sane limit on memory allocation */
2028     if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
2029         return -EINVAL;
2030 
2031     keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
2032     if (!keys)
2033         return -ENOMEM;
2034 
2035     r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
2036                sizeof(uint8_t) * args->count);
2037     if (r) {
2038         r = -EFAULT;
2039         goto out;
2040     }
2041 
2042     /* Enable storage key handling for the guest */
2043     r = s390_enable_skey();
2044     if (r)
2045         goto out;
2046 
2047     i = 0;
2048     mmap_read_lock(current->mm);
2049     srcu_idx = srcu_read_lock(&kvm->srcu);
2050         while (i < args->count) {
2051         unlocked = false;
2052         hva = gfn_to_hva(kvm, args->start_gfn + i);
2053         if (kvm_is_error_hva(hva)) {
2054             r = -EFAULT;
2055             break;
2056         }
2057 
2058         /* Lowest order bit is reserved */
2059         if (keys[i] & 0x01) {
2060             r = -EINVAL;
2061             break;
2062         }
2063 
2064         r = set_guest_storage_key(current->mm, hva, keys[i], 0);
2065         if (r) {
2066             r = fixup_user_fault(current->mm, hva,
2067                          FAULT_FLAG_WRITE, &unlocked);
2068             if (r)
2069                 break;
2070         }
2071         if (!r)
2072             i++;
2073     }
2074     srcu_read_unlock(&kvm->srcu, srcu_idx);
2075     mmap_read_unlock(current->mm);
2076 out:
2077     kvfree(keys);
2078     return r;
2079 }
2080 
2081 /*
2082  * Base address and length must be sent at the start of each block, therefore
2083  * it's cheaper to send some clean data, as long as it's less than the size of
2084  * two longs.
2085  */
2086 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
2087 /* for consistency */
2088 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
2089 
2090 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2091                   u8 *res, unsigned long bufsize)
2092 {
2093     unsigned long pgstev, hva, cur_gfn = args->start_gfn;
2094 
2095     args->count = 0;
2096     while (args->count < bufsize) {
2097         hva = gfn_to_hva(kvm, cur_gfn);
2098         /*
2099          * We return an error if the first value was invalid, but we
2100          * return successfully if at least one value was copied.
2101          */
2102         if (kvm_is_error_hva(hva))
2103             return args->count ? 0 : -EFAULT;
2104         if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2105             pgstev = 0;
2106         res[args->count++] = (pgstev >> 24) & 0x43;
2107         cur_gfn++;
2108     }
2109 
2110     return 0;
2111 }
2112 
2113 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
2114                              gfn_t gfn)
2115 {
2116     return ____gfn_to_memslot(slots, gfn, true);
2117 }
2118 
2119 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2120                           unsigned long cur_gfn)
2121 {
2122     struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
2123     unsigned long ofs = cur_gfn - ms->base_gfn;
2124     struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
2125 
2126     if (ms->base_gfn + ms->npages <= cur_gfn) {
2127         mnode = rb_next(mnode);
2128         /* If we are above the highest slot, wrap around */
2129         if (!mnode)
2130             mnode = rb_first(&slots->gfn_tree);
2131 
2132         ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2133         ofs = 0;
2134     }
2135     ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2136     while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
2137         ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
2138         ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
2139     }
2140     return ms->base_gfn + ofs;
2141 }
2142 
2143 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2144                  u8 *res, unsigned long bufsize)
2145 {
2146     unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2147     struct kvm_memslots *slots = kvm_memslots(kvm);
2148     struct kvm_memory_slot *ms;
2149 
2150     if (unlikely(kvm_memslots_empty(slots)))
2151         return 0;
2152 
2153     cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2154     ms = gfn_to_memslot(kvm, cur_gfn);
2155     args->count = 0;
2156     args->start_gfn = cur_gfn;
2157     if (!ms)
2158         return 0;
2159     next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2160     mem_end = kvm_s390_get_gfn_end(slots);
2161 
2162     while (args->count < bufsize) {
2163         hva = gfn_to_hva(kvm, cur_gfn);
2164         if (kvm_is_error_hva(hva))
2165             return 0;
2166         /* Decrement only if we actually flipped the bit to 0 */
2167         if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2168             atomic64_dec(&kvm->arch.cmma_dirty_pages);
2169         if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2170             pgstev = 0;
2171         /* Save the value */
2172         res[args->count++] = (pgstev >> 24) & 0x43;
2173         /* If the next bit is too far away, stop. */
2174         if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2175             return 0;
2176         /* If we reached the previous "next", find the next one */
2177         if (cur_gfn == next_gfn)
2178             next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2179         /* Reached the end of memory or of the buffer, stop */
2180         if ((next_gfn >= mem_end) ||
2181             (next_gfn - args->start_gfn >= bufsize))
2182             return 0;
2183         cur_gfn++;
2184         /* Reached the end of the current memslot, take the next one. */
2185         if (cur_gfn - ms->base_gfn >= ms->npages) {
2186             ms = gfn_to_memslot(kvm, cur_gfn);
2187             if (!ms)
2188                 return 0;
2189         }
2190     }
2191     return 0;
2192 }
2193 
2194 /*
2195  * This function searches for the next page with dirty CMMA attributes, and
2196  * saves the attributes in the buffer up to either the end of the buffer or
2197  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2198  * no trailing clean bytes are saved.
2199  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2200  * output buffer will indicate 0 as length.
2201  */
2202 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2203                   struct kvm_s390_cmma_log *args)
2204 {
2205     unsigned long bufsize;
2206     int srcu_idx, peek, ret;
2207     u8 *values;
2208 
2209     if (!kvm->arch.use_cmma)
2210         return -ENXIO;
2211     /* Invalid/unsupported flags were specified */
2212     if (args->flags & ~KVM_S390_CMMA_PEEK)
2213         return -EINVAL;
2214     /* Migration mode query, and we are not doing a migration */
2215     peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2216     if (!peek && !kvm->arch.migration_mode)
2217         return -EINVAL;
2218     /* CMMA is disabled or was not used, or the buffer has length zero */
2219     bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2220     if (!bufsize || !kvm->mm->context.uses_cmm) {
2221         memset(args, 0, sizeof(*args));
2222         return 0;
2223     }
2224     /* We are not peeking, and there are no dirty pages */
2225     if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2226         memset(args, 0, sizeof(*args));
2227         return 0;
2228     }
2229 
2230     values = vmalloc(bufsize);
2231     if (!values)
2232         return -ENOMEM;
2233 
2234     mmap_read_lock(kvm->mm);
2235     srcu_idx = srcu_read_lock(&kvm->srcu);
2236     if (peek)
2237         ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2238     else
2239         ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2240     srcu_read_unlock(&kvm->srcu, srcu_idx);
2241     mmap_read_unlock(kvm->mm);
2242 
2243     if (kvm->arch.migration_mode)
2244         args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2245     else
2246         args->remaining = 0;
2247 
2248     if (copy_to_user((void __user *)args->values, values, args->count))
2249         ret = -EFAULT;
2250 
2251     vfree(values);
2252     return ret;
2253 }
2254 
2255 /*
2256  * This function sets the CMMA attributes for the given pages. If the input
2257  * buffer has zero length, no action is taken, otherwise the attributes are
2258  * set and the mm->context.uses_cmm flag is set.
2259  */
2260 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2261                   const struct kvm_s390_cmma_log *args)
2262 {
2263     unsigned long hva, mask, pgstev, i;
2264     uint8_t *bits;
2265     int srcu_idx, r = 0;
2266 
2267     mask = args->mask;
2268 
2269     if (!kvm->arch.use_cmma)
2270         return -ENXIO;
2271     /* invalid/unsupported flags */
2272     if (args->flags != 0)
2273         return -EINVAL;
2274     /* Enforce sane limit on memory allocation */
2275     if (args->count > KVM_S390_CMMA_SIZE_MAX)
2276         return -EINVAL;
2277     /* Nothing to do */
2278     if (args->count == 0)
2279         return 0;
2280 
2281     bits = vmalloc(array_size(sizeof(*bits), args->count));
2282     if (!bits)
2283         return -ENOMEM;
2284 
2285     r = copy_from_user(bits, (void __user *)args->values, args->count);
2286     if (r) {
2287         r = -EFAULT;
2288         goto out;
2289     }
2290 
2291     mmap_read_lock(kvm->mm);
2292     srcu_idx = srcu_read_lock(&kvm->srcu);
2293     for (i = 0; i < args->count; i++) {
2294         hva = gfn_to_hva(kvm, args->start_gfn + i);
2295         if (kvm_is_error_hva(hva)) {
2296             r = -EFAULT;
2297             break;
2298         }
2299 
2300         pgstev = bits[i];
2301         pgstev = pgstev << 24;
2302         mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2303         set_pgste_bits(kvm->mm, hva, mask, pgstev);
2304     }
2305     srcu_read_unlock(&kvm->srcu, srcu_idx);
2306     mmap_read_unlock(kvm->mm);
2307 
2308     if (!kvm->mm->context.uses_cmm) {
2309         mmap_write_lock(kvm->mm);
2310         kvm->mm->context.uses_cmm = 1;
2311         mmap_write_unlock(kvm->mm);
2312     }
2313 out:
2314     vfree(bits);
2315     return r;
2316 }
2317 
2318 /**
2319  * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to
2320  * non protected.
2321  * @kvm: the VM whose protected vCPUs are to be converted
2322  * @rc: return value for the RC field of the UVC (in case of error)
2323  * @rrc: return value for the RRC field of the UVC (in case of error)
2324  *
2325  * Does not stop in case of error, tries to convert as many
2326  * CPUs as possible. In case of error, the RC and RRC of the last error are
2327  * returned.
2328  *
2329  * Return: 0 in case of success, otherwise -EIO
2330  */
2331 int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2332 {
2333     struct kvm_vcpu *vcpu;
2334     unsigned long i;
2335     u16 _rc, _rrc;
2336     int ret = 0;
2337 
2338     /*
2339      * We ignore failures and try to destroy as many CPUs as possible.
2340      * At the same time we must not free the assigned resources when
2341      * this fails, as the ultravisor has still access to that memory.
2342      * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2343      * behind.
2344      * We want to return the first failure rc and rrc, though.
2345      */
2346     kvm_for_each_vcpu(i, vcpu, kvm) {
2347         mutex_lock(&vcpu->mutex);
2348         if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
2349             *rc = _rc;
2350             *rrc = _rrc;
2351             ret = -EIO;
2352         }
2353         mutex_unlock(&vcpu->mutex);
2354     }
2355     /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2356     if (use_gisa)
2357         kvm_s390_gisa_enable(kvm);
2358     return ret;
2359 }
2360 
2361 /**
2362  * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM
2363  * to protected.
2364  * @kvm: the VM whose protected vCPUs are to be converted
2365  * @rc: return value for the RC field of the UVC (in case of error)
2366  * @rrc: return value for the RRC field of the UVC (in case of error)
2367  *
2368  * Tries to undo the conversion in case of error.
2369  *
2370  * Return: 0 in case of success, otherwise -EIO
2371  */
2372 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2373 {
2374     unsigned long i;
2375     int r = 0;
2376     u16 dummy;
2377 
2378     struct kvm_vcpu *vcpu;
2379 
2380     /* Disable the GISA if the ultravisor does not support AIV. */
2381     if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2382         kvm_s390_gisa_disable(kvm);
2383 
2384     kvm_for_each_vcpu(i, vcpu, kvm) {
2385         mutex_lock(&vcpu->mutex);
2386         r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2387         mutex_unlock(&vcpu->mutex);
2388         if (r)
2389             break;
2390     }
2391     if (r)
2392         kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2393     return r;
2394 }
2395 
2396 /*
2397  * Here we provide user space with a direct interface to query UV
2398  * related data like UV maxima and available features as well as
2399  * feature specific data.
2400  *
2401  * To facilitate future extension of the data structures we'll try to
2402  * write data up to the maximum requested length.
2403  */
2404 static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
2405 {
2406     ssize_t len_min;
2407 
2408     switch (info->header.id) {
2409     case KVM_PV_INFO_VM: {
2410         len_min =  sizeof(info->header) + sizeof(info->vm);
2411 
2412         if (info->header.len_max < len_min)
2413             return -EINVAL;
2414 
2415         memcpy(info->vm.inst_calls_list,
2416                uv_info.inst_calls_list,
2417                sizeof(uv_info.inst_calls_list));
2418 
2419         /* It's max cpuid not max cpus, so it's off by one */
2420         info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
2421         info->vm.max_guests = uv_info.max_num_sec_conf;
2422         info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
2423         info->vm.feature_indication = uv_info.uv_feature_indications;
2424 
2425         return len_min;
2426     }
2427     case KVM_PV_INFO_DUMP: {
2428         len_min =  sizeof(info->header) + sizeof(info->dump);
2429 
2430         if (info->header.len_max < len_min)
2431             return -EINVAL;
2432 
2433         info->dump.dump_cpu_buffer_len = uv_info.guest_cpu_stor_len;
2434         info->dump.dump_config_mem_buffer_per_1m = uv_info.conf_dump_storage_state_len;
2435         info->dump.dump_config_finalize_len = uv_info.conf_dump_finalize_len;
2436         return len_min;
2437     }
2438     default:
2439         return -EINVAL;
2440     }
2441 }
2442 
2443 static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
2444                struct kvm_s390_pv_dmp dmp)
2445 {
2446     int r = -EINVAL;
2447     void __user *result_buff = (void __user *)dmp.buff_addr;
2448 
2449     switch (dmp.subcmd) {
2450     case KVM_PV_DUMP_INIT: {
2451         if (kvm->arch.pv.dumping)
2452             break;
2453 
2454         /*
2455          * Block SIE entry as concurrent dump UVCs could lead
2456          * to validities.
2457          */
2458         kvm_s390_vcpu_block_all(kvm);
2459 
2460         r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2461                   UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
2462         KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
2463                  cmd->rc, cmd->rrc);
2464         if (!r) {
2465             kvm->arch.pv.dumping = true;
2466         } else {
2467             kvm_s390_vcpu_unblock_all(kvm);
2468             r = -EINVAL;
2469         }
2470         break;
2471     }
2472     case KVM_PV_DUMP_CONFIG_STOR_STATE: {
2473         if (!kvm->arch.pv.dumping)
2474             break;
2475 
2476         /*
2477          * gaddr is an output parameter since we might stop
2478          * early. As dmp will be copied back in our caller, we
2479          * don't need to do it ourselves.
2480          */
2481         r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
2482                         &cmd->rc, &cmd->rrc);
2483         break;
2484     }
2485     case KVM_PV_DUMP_COMPLETE: {
2486         if (!kvm->arch.pv.dumping)
2487             break;
2488 
2489         r = -EINVAL;
2490         if (dmp.buff_len < uv_info.conf_dump_finalize_len)
2491             break;
2492 
2493         r = kvm_s390_pv_dump_complete(kvm, result_buff,
2494                           &cmd->rc, &cmd->rrc);
2495         break;
2496     }
2497     default:
2498         r = -ENOTTY;
2499         break;
2500     }
2501 
2502     return r;
2503 }
2504 
2505 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2506 {
2507     int r = 0;
2508     u16 dummy;
2509     void __user *argp = (void __user *)cmd->data;
2510 
2511     switch (cmd->cmd) {
2512     case KVM_PV_ENABLE: {
2513         r = -EINVAL;
2514         if (kvm_s390_pv_is_protected(kvm))
2515             break;
2516 
2517         /*
2518          *  FMT 4 SIE needs esca. As we never switch back to bsca from
2519          *  esca, we need no cleanup in the error cases below
2520          */
2521         r = sca_switch_to_extended(kvm);
2522         if (r)
2523             break;
2524 
2525         mmap_write_lock(current->mm);
2526         r = gmap_mark_unmergeable();
2527         mmap_write_unlock(current->mm);
2528         if (r)
2529             break;
2530 
2531         r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2532         if (r)
2533             break;
2534 
2535         r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2536         if (r)
2537             kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2538 
2539         /* we need to block service interrupts from now on */
2540         set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2541         break;
2542     }
2543     case KVM_PV_DISABLE: {
2544         r = -EINVAL;
2545         if (!kvm_s390_pv_is_protected(kvm))
2546             break;
2547 
2548         r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2549         /*
2550          * If a CPU could not be destroyed, destroy VM will also fail.
2551          * There is no point in trying to destroy it. Instead return
2552          * the rc and rrc from the first CPU that failed destroying.
2553          */
2554         if (r)
2555             break;
2556         r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2557 
2558         /* no need to block service interrupts any more */
2559         clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2560         break;
2561     }
2562     case KVM_PV_SET_SEC_PARMS: {
2563         struct kvm_s390_pv_sec_parm parms = {};
2564         void *hdr;
2565 
2566         r = -EINVAL;
2567         if (!kvm_s390_pv_is_protected(kvm))
2568             break;
2569 
2570         r = -EFAULT;
2571         if (copy_from_user(&parms, argp, sizeof(parms)))
2572             break;
2573 
2574         /* Currently restricted to 8KB */
2575         r = -EINVAL;
2576         if (parms.length > PAGE_SIZE * 2)
2577             break;
2578 
2579         r = -ENOMEM;
2580         hdr = vmalloc(parms.length);
2581         if (!hdr)
2582             break;
2583 
2584         r = -EFAULT;
2585         if (!copy_from_user(hdr, (void __user *)parms.origin,
2586                     parms.length))
2587             r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2588                               &cmd->rc, &cmd->rrc);
2589 
2590         vfree(hdr);
2591         break;
2592     }
2593     case KVM_PV_UNPACK: {
2594         struct kvm_s390_pv_unp unp = {};
2595 
2596         r = -EINVAL;
2597         if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2598             break;
2599 
2600         r = -EFAULT;
2601         if (copy_from_user(&unp, argp, sizeof(unp)))
2602             break;
2603 
2604         r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2605                        &cmd->rc, &cmd->rrc);
2606         break;
2607     }
2608     case KVM_PV_VERIFY: {
2609         r = -EINVAL;
2610         if (!kvm_s390_pv_is_protected(kvm))
2611             break;
2612 
2613         r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2614                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2615         KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2616                  cmd->rrc);
2617         break;
2618     }
2619     case KVM_PV_PREP_RESET: {
2620         r = -EINVAL;
2621         if (!kvm_s390_pv_is_protected(kvm))
2622             break;
2623 
2624         r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2625                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2626         KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2627                  cmd->rc, cmd->rrc);
2628         break;
2629     }
2630     case KVM_PV_UNSHARE_ALL: {
2631         r = -EINVAL;
2632         if (!kvm_s390_pv_is_protected(kvm))
2633             break;
2634 
2635         r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2636                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2637         KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2638                  cmd->rc, cmd->rrc);
2639         break;
2640     }
2641     case KVM_PV_INFO: {
2642         struct kvm_s390_pv_info info = {};
2643         ssize_t data_len;
2644 
2645         /*
2646          * No need to check the VM protection here.
2647          *
2648          * Maybe user space wants to query some of the data
2649          * when the VM is still unprotected. If we see the
2650          * need to fence a new data command we can still
2651          * return an error in the info handler.
2652          */
2653 
2654         r = -EFAULT;
2655         if (copy_from_user(&info, argp, sizeof(info.header)))
2656             break;
2657 
2658         r = -EINVAL;
2659         if (info.header.len_max < sizeof(info.header))
2660             break;
2661 
2662         data_len = kvm_s390_handle_pv_info(&info);
2663         if (data_len < 0) {
2664             r = data_len;
2665             break;
2666         }
2667         /*
2668          * If a data command struct is extended (multiple
2669          * times) this can be used to determine how much of it
2670          * is valid.
2671          */
2672         info.header.len_written = data_len;
2673 
2674         r = -EFAULT;
2675         if (copy_to_user(argp, &info, data_len))
2676             break;
2677 
2678         r = 0;
2679         break;
2680     }
2681     case KVM_PV_DUMP: {
2682         struct kvm_s390_pv_dmp dmp;
2683 
2684         r = -EINVAL;
2685         if (!kvm_s390_pv_is_protected(kvm))
2686             break;
2687 
2688         r = -EFAULT;
2689         if (copy_from_user(&dmp, argp, sizeof(dmp)))
2690             break;
2691 
2692         r = kvm_s390_pv_dmp(kvm, cmd, dmp);
2693         if (r)
2694             break;
2695 
2696         if (copy_to_user(argp, &dmp, sizeof(dmp))) {
2697             r = -EFAULT;
2698             break;
2699         }
2700 
2701         break;
2702     }
2703     default:
2704         r = -ENOTTY;
2705     }
2706     return r;
2707 }
2708 
2709 static bool access_key_invalid(u8 access_key)
2710 {
2711     return access_key > 0xf;
2712 }
2713 
2714 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2715 {
2716     void __user *uaddr = (void __user *)mop->buf;
2717     u64 supported_flags;
2718     void *tmpbuf = NULL;
2719     int r, srcu_idx;
2720 
2721     supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2722               | KVM_S390_MEMOP_F_CHECK_ONLY;
2723     if (mop->flags & ~supported_flags || !mop->size)
2724         return -EINVAL;
2725     if (mop->size > MEM_OP_MAX_SIZE)
2726         return -E2BIG;
2727     /*
2728      * This is technically a heuristic only, if the kvm->lock is not
2729      * taken, it is not guaranteed that the vm is/remains non-protected.
2730      * This is ok from a kernel perspective, wrongdoing is detected
2731      * on the access, -EFAULT is returned and the vm may crash the
2732      * next time it accesses the memory in question.
2733      * There is no sane usecase to do switching and a memop on two
2734      * different CPUs at the same time.
2735      */
2736     if (kvm_s390_pv_get_handle(kvm))
2737         return -EINVAL;
2738     if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2739         if (access_key_invalid(mop->key))
2740             return -EINVAL;
2741     } else {
2742         mop->key = 0;
2743     }
2744     if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2745         tmpbuf = vmalloc(mop->size);
2746         if (!tmpbuf)
2747             return -ENOMEM;
2748     }
2749 
2750     srcu_idx = srcu_read_lock(&kvm->srcu);
2751 
2752     if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2753         r = PGM_ADDRESSING;
2754         goto out_unlock;
2755     }
2756 
2757     switch (mop->op) {
2758     case KVM_S390_MEMOP_ABSOLUTE_READ: {
2759         if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2760             r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2761         } else {
2762             r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2763                               mop->size, GACC_FETCH, mop->key);
2764             if (r == 0) {
2765                 if (copy_to_user(uaddr, tmpbuf, mop->size))
2766                     r = -EFAULT;
2767             }
2768         }
2769         break;
2770     }
2771     case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2772         if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2773             r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2774         } else {
2775             if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2776                 r = -EFAULT;
2777                 break;
2778             }
2779             r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2780                               mop->size, GACC_STORE, mop->key);
2781         }
2782         break;
2783     }
2784     default:
2785         r = -EINVAL;
2786     }
2787 
2788 out_unlock:
2789     srcu_read_unlock(&kvm->srcu, srcu_idx);
2790 
2791     vfree(tmpbuf);
2792     return r;
2793 }
2794 
2795 long kvm_arch_vm_ioctl(struct file *filp,
2796                unsigned int ioctl, unsigned long arg)
2797 {
2798     struct kvm *kvm = filp->private_data;
2799     void __user *argp = (void __user *)arg;
2800     struct kvm_device_attr attr;
2801     int r;
2802 
2803     switch (ioctl) {
2804     case KVM_S390_INTERRUPT: {
2805         struct kvm_s390_interrupt s390int;
2806 
2807         r = -EFAULT;
2808         if (copy_from_user(&s390int, argp, sizeof(s390int)))
2809             break;
2810         r = kvm_s390_inject_vm(kvm, &s390int);
2811         break;
2812     }
2813     case KVM_CREATE_IRQCHIP: {
2814         struct kvm_irq_routing_entry routing;
2815 
2816         r = -EINVAL;
2817         if (kvm->arch.use_irqchip) {
2818             /* Set up dummy routing. */
2819             memset(&routing, 0, sizeof(routing));
2820             r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2821         }
2822         break;
2823     }
2824     case KVM_SET_DEVICE_ATTR: {
2825         r = -EFAULT;
2826         if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2827             break;
2828         r = kvm_s390_vm_set_attr(kvm, &attr);
2829         break;
2830     }
2831     case KVM_GET_DEVICE_ATTR: {
2832         r = -EFAULT;
2833         if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2834             break;
2835         r = kvm_s390_vm_get_attr(kvm, &attr);
2836         break;
2837     }
2838     case KVM_HAS_DEVICE_ATTR: {
2839         r = -EFAULT;
2840         if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2841             break;
2842         r = kvm_s390_vm_has_attr(kvm, &attr);
2843         break;
2844     }
2845     case KVM_S390_GET_SKEYS: {
2846         struct kvm_s390_skeys args;
2847 
2848         r = -EFAULT;
2849         if (copy_from_user(&args, argp,
2850                    sizeof(struct kvm_s390_skeys)))
2851             break;
2852         r = kvm_s390_get_skeys(kvm, &args);
2853         break;
2854     }
2855     case KVM_S390_SET_SKEYS: {
2856         struct kvm_s390_skeys args;
2857 
2858         r = -EFAULT;
2859         if (copy_from_user(&args, argp,
2860                    sizeof(struct kvm_s390_skeys)))
2861             break;
2862         r = kvm_s390_set_skeys(kvm, &args);
2863         break;
2864     }
2865     case KVM_S390_GET_CMMA_BITS: {
2866         struct kvm_s390_cmma_log args;
2867 
2868         r = -EFAULT;
2869         if (copy_from_user(&args, argp, sizeof(args)))
2870             break;
2871         mutex_lock(&kvm->slots_lock);
2872         r = kvm_s390_get_cmma_bits(kvm, &args);
2873         mutex_unlock(&kvm->slots_lock);
2874         if (!r) {
2875             r = copy_to_user(argp, &args, sizeof(args));
2876             if (r)
2877                 r = -EFAULT;
2878         }
2879         break;
2880     }
2881     case KVM_S390_SET_CMMA_BITS: {
2882         struct kvm_s390_cmma_log args;
2883 
2884         r = -EFAULT;
2885         if (copy_from_user(&args, argp, sizeof(args)))
2886             break;
2887         mutex_lock(&kvm->slots_lock);
2888         r = kvm_s390_set_cmma_bits(kvm, &args);
2889         mutex_unlock(&kvm->slots_lock);
2890         break;
2891     }
2892     case KVM_S390_PV_COMMAND: {
2893         struct kvm_pv_cmd args;
2894 
2895         /* protvirt means user cpu state */
2896         kvm_s390_set_user_cpu_state_ctrl(kvm);
2897         r = 0;
2898         if (!is_prot_virt_host()) {
2899             r = -EINVAL;
2900             break;
2901         }
2902         if (copy_from_user(&args, argp, sizeof(args))) {
2903             r = -EFAULT;
2904             break;
2905         }
2906         if (args.flags) {
2907             r = -EINVAL;
2908             break;
2909         }
2910         mutex_lock(&kvm->lock);
2911         r = kvm_s390_handle_pv(kvm, &args);
2912         mutex_unlock(&kvm->lock);
2913         if (copy_to_user(argp, &args, sizeof(args))) {
2914             r = -EFAULT;
2915             break;
2916         }
2917         break;
2918     }
2919     case KVM_S390_MEM_OP: {
2920         struct kvm_s390_mem_op mem_op;
2921 
2922         if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2923             r = kvm_s390_vm_mem_op(kvm, &mem_op);
2924         else
2925             r = -EFAULT;
2926         break;
2927     }
2928     case KVM_S390_ZPCI_OP: {
2929         struct kvm_s390_zpci_op args;
2930 
2931         r = -EINVAL;
2932         if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
2933             break;
2934         if (copy_from_user(&args, argp, sizeof(args))) {
2935             r = -EFAULT;
2936             break;
2937         }
2938         r = kvm_s390_pci_zpci_op(kvm, &args);
2939         break;
2940     }
2941     default:
2942         r = -ENOTTY;
2943     }
2944 
2945     return r;
2946 }
2947 
2948 static int kvm_s390_apxa_installed(void)
2949 {
2950     struct ap_config_info info;
2951 
2952     if (ap_instructions_available()) {
2953         if (ap_qci(&info) == 0)
2954             return info.apxa;
2955     }
2956 
2957     return 0;
2958 }
2959 
2960 /*
2961  * The format of the crypto control block (CRYCB) is specified in the 3 low
2962  * order bits of the CRYCB designation (CRYCBD) field as follows:
2963  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2964  *       AP extended addressing (APXA) facility are installed.
2965  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2966  * Format 2: Both the APXA and MSAX3 facilities are installed
2967  */
2968 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2969 {
2970     kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2971 
2972     /* Clear the CRYCB format bits - i.e., set format 0 by default */
2973     kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2974 
2975     /* Check whether MSAX3 is installed */
2976     if (!test_kvm_facility(kvm, 76))
2977         return;
2978 
2979     if (kvm_s390_apxa_installed())
2980         kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2981     else
2982         kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2983 }
2984 
2985 /*
2986  * kvm_arch_crypto_set_masks
2987  *
2988  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2989  *   to be set.
2990  * @apm: the mask identifying the accessible AP adapters
2991  * @aqm: the mask identifying the accessible AP domains
2992  * @adm: the mask identifying the accessible AP control domains
2993  *
2994  * Set the masks that identify the adapters, domains and control domains to
2995  * which the KVM guest is granted access.
2996  *
2997  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2998  *   function.
2999  */
3000 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
3001                    unsigned long *aqm, unsigned long *adm)
3002 {
3003     struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
3004 
3005     kvm_s390_vcpu_block_all(kvm);
3006 
3007     switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
3008     case CRYCB_FORMAT2: /* APCB1 use 256 bits */
3009         memcpy(crycb->apcb1.apm, apm, 32);
3010         VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
3011              apm[0], apm[1], apm[2], apm[3]);
3012         memcpy(crycb->apcb1.aqm, aqm, 32);
3013         VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
3014              aqm[0], aqm[1], aqm[2], aqm[3]);
3015         memcpy(crycb->apcb1.adm, adm, 32);
3016         VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
3017              adm[0], adm[1], adm[2], adm[3]);
3018         break;
3019     case CRYCB_FORMAT1:
3020     case CRYCB_FORMAT0: /* Fall through both use APCB0 */
3021         memcpy(crycb->apcb0.apm, apm, 8);
3022         memcpy(crycb->apcb0.aqm, aqm, 2);
3023         memcpy(crycb->apcb0.adm, adm, 2);
3024         VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
3025              apm[0], *((unsigned short *)aqm),
3026              *((unsigned short *)adm));
3027         break;
3028     default:    /* Can not happen */
3029         break;
3030     }
3031 
3032     /* recreate the shadow crycb for each vcpu */
3033     kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3034     kvm_s390_vcpu_unblock_all(kvm);
3035 }
3036 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
3037 
3038 /*
3039  * kvm_arch_crypto_clear_masks
3040  *
3041  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
3042  *   to be cleared.
3043  *
3044  * Clear the masks that identify the adapters, domains and control domains to
3045  * which the KVM guest is granted access.
3046  *
3047  * Note: The kvm->lock mutex must be locked by the caller before invoking this
3048  *   function.
3049  */
3050 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
3051 {
3052     kvm_s390_vcpu_block_all(kvm);
3053 
3054     memset(&kvm->arch.crypto.crycb->apcb0, 0,
3055            sizeof(kvm->arch.crypto.crycb->apcb0));
3056     memset(&kvm->arch.crypto.crycb->apcb1, 0,
3057            sizeof(kvm->arch.crypto.crycb->apcb1));
3058 
3059     VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
3060     /* recreate the shadow crycb for each vcpu */
3061     kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
3062     kvm_s390_vcpu_unblock_all(kvm);
3063 }
3064 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
3065 
3066 static u64 kvm_s390_get_initial_cpuid(void)
3067 {
3068     struct cpuid cpuid;
3069 
3070     get_cpu_id(&cpuid);
3071     cpuid.version = 0xff;
3072     return *((u64 *) &cpuid);
3073 }
3074 
3075 static void kvm_s390_crypto_init(struct kvm *kvm)
3076 {
3077     kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
3078     kvm_s390_set_crycb_format(kvm);
3079     init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
3080 
3081     if (!test_kvm_facility(kvm, 76))
3082         return;
3083 
3084     /* Enable AES/DEA protected key functions by default */
3085     kvm->arch.crypto.aes_kw = 1;
3086     kvm->arch.crypto.dea_kw = 1;
3087     get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
3088              sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
3089     get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
3090              sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
3091 }
3092 
3093 static void sca_dispose(struct kvm *kvm)
3094 {
3095     if (kvm->arch.use_esca)
3096         free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
3097     else
3098         free_page((unsigned long)(kvm->arch.sca));
3099     kvm->arch.sca = NULL;
3100 }
3101 
3102 void kvm_arch_free_vm(struct kvm *kvm)
3103 {
3104     if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
3105         kvm_s390_pci_clear_list(kvm);
3106 
3107     __kvm_arch_free_vm(kvm);
3108 }
3109 
3110 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
3111 {
3112     gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
3113     int i, rc;
3114     char debug_name[16];
3115     static unsigned long sca_offset;
3116 
3117     rc = -EINVAL;
3118 #ifdef CONFIG_KVM_S390_UCONTROL
3119     if (type & ~KVM_VM_S390_UCONTROL)
3120         goto out_err;
3121     if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
3122         goto out_err;
3123 #else
3124     if (type)
3125         goto out_err;
3126 #endif
3127 
3128     rc = s390_enable_sie();
3129     if (rc)
3130         goto out_err;
3131 
3132     rc = -ENOMEM;
3133 
3134     if (!sclp.has_64bscao)
3135         alloc_flags |= GFP_DMA;
3136     rwlock_init(&kvm->arch.sca_lock);
3137     /* start with basic SCA */
3138     kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
3139     if (!kvm->arch.sca)
3140         goto out_err;
3141     mutex_lock(&kvm_lock);
3142     sca_offset += 16;
3143     if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
3144         sca_offset = 0;
3145     kvm->arch.sca = (struct bsca_block *)
3146             ((char *) kvm->arch.sca + sca_offset);
3147     mutex_unlock(&kvm_lock);
3148 
3149     sprintf(debug_name, "kvm-%u", current->pid);
3150 
3151     kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
3152     if (!kvm->arch.dbf)
3153         goto out_err;
3154 
3155     BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
3156     kvm->arch.sie_page2 =
3157          (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
3158     if (!kvm->arch.sie_page2)
3159         goto out_err;
3160 
3161     kvm->arch.sie_page2->kvm = kvm;
3162     kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
3163 
3164     for (i = 0; i < kvm_s390_fac_size(); i++) {
3165         kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
3166                           (kvm_s390_fac_base[i] |
3167                            kvm_s390_fac_ext[i]);
3168         kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
3169                           kvm_s390_fac_base[i];
3170     }
3171     kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
3172 
3173     /* we are always in czam mode - even on pre z14 machines */
3174     set_kvm_facility(kvm->arch.model.fac_mask, 138);
3175     set_kvm_facility(kvm->arch.model.fac_list, 138);
3176     /* we emulate STHYI in kvm */
3177     set_kvm_facility(kvm->arch.model.fac_mask, 74);
3178     set_kvm_facility(kvm->arch.model.fac_list, 74);
3179     if (MACHINE_HAS_TLB_GUEST) {
3180         set_kvm_facility(kvm->arch.model.fac_mask, 147);
3181         set_kvm_facility(kvm->arch.model.fac_list, 147);
3182     }
3183 
3184     if (css_general_characteristics.aiv && test_facility(65))
3185         set_kvm_facility(kvm->arch.model.fac_mask, 65);
3186 
3187     kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
3188     kvm->arch.model.ibc = sclp.ibc & 0x0fff;
3189 
3190     kvm_s390_crypto_init(kvm);
3191 
3192     if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
3193         mutex_lock(&kvm->lock);
3194         kvm_s390_pci_init_list(kvm);
3195         kvm_s390_vcpu_pci_enable_interp(kvm);
3196         mutex_unlock(&kvm->lock);
3197     }
3198 
3199     mutex_init(&kvm->arch.float_int.ais_lock);
3200     spin_lock_init(&kvm->arch.float_int.lock);
3201     for (i = 0; i < FIRQ_LIST_COUNT; i++)
3202         INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
3203     init_waitqueue_head(&kvm->arch.ipte_wq);
3204     mutex_init(&kvm->arch.ipte_mutex);
3205 
3206     debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
3207     VM_EVENT(kvm, 3, "vm created with type %lu", type);
3208 
3209     if (type & KVM_VM_S390_UCONTROL) {
3210         kvm->arch.gmap = NULL;
3211         kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
3212     } else {
3213         if (sclp.hamax == U64_MAX)
3214             kvm->arch.mem_limit = TASK_SIZE_MAX;
3215         else
3216             kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
3217                             sclp.hamax + 1);
3218         kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
3219         if (!kvm->arch.gmap)
3220             goto out_err;
3221         kvm->arch.gmap->private = kvm;
3222         kvm->arch.gmap->pfault_enabled = 0;
3223     }
3224 
3225     kvm->arch.use_pfmfi = sclp.has_pfmfi;
3226     kvm->arch.use_skf = sclp.has_skey;
3227     spin_lock_init(&kvm->arch.start_stop_lock);
3228     kvm_s390_vsie_init(kvm);
3229     if (use_gisa)
3230         kvm_s390_gisa_init(kvm);
3231     KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
3232 
3233     return 0;
3234 out_err:
3235     free_page((unsigned long)kvm->arch.sie_page2);
3236     debug_unregister(kvm->arch.dbf);
3237     sca_dispose(kvm);
3238     KVM_EVENT(3, "creation of vm failed: %d", rc);
3239     return rc;
3240 }
3241 
3242 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
3243 {
3244     u16 rc, rrc;
3245 
3246     VCPU_EVENT(vcpu, 3, "%s", "free cpu");
3247     trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
3248     kvm_s390_clear_local_irqs(vcpu);
3249     kvm_clear_async_pf_completion_queue(vcpu);
3250     if (!kvm_is_ucontrol(vcpu->kvm))
3251         sca_del_vcpu(vcpu);
3252     kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3253 
3254     if (kvm_is_ucontrol(vcpu->kvm))
3255         gmap_remove(vcpu->arch.gmap);
3256 
3257     if (vcpu->kvm->arch.use_cmma)
3258         kvm_s390_vcpu_unsetup_cmma(vcpu);
3259     /* We can not hold the vcpu mutex here, we are already dying */
3260     if (kvm_s390_pv_cpu_get_handle(vcpu))
3261         kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
3262     free_page((unsigned long)(vcpu->arch.sie_block));
3263 }
3264 
3265 void kvm_arch_destroy_vm(struct kvm *kvm)
3266 {
3267     u16 rc, rrc;
3268 
3269     kvm_destroy_vcpus(kvm);
3270     sca_dispose(kvm);
3271     kvm_s390_gisa_destroy(kvm);
3272     /*
3273      * We are already at the end of life and kvm->lock is not taken.
3274      * This is ok as the file descriptor is closed by now and nobody
3275      * can mess with the pv state. To avoid lockdep_assert_held from
3276      * complaining we do not use kvm_s390_pv_is_protected.
3277      */
3278     if (kvm_s390_pv_get_handle(kvm))
3279         kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
3280     /*
3281      * Remove the mmu notifier only when the whole KVM VM is torn down,
3282      * and only if one was registered to begin with. If the VM is
3283      * currently not protected, but has been previously been protected,
3284      * then it's possible that the notifier is still registered.
3285      */
3286     if (kvm->arch.pv.mmu_notifier.ops)
3287         mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
3288 
3289     debug_unregister(kvm->arch.dbf);
3290     free_page((unsigned long)kvm->arch.sie_page2);
3291     if (!kvm_is_ucontrol(kvm))
3292         gmap_remove(kvm->arch.gmap);
3293     kvm_s390_destroy_adapters(kvm);
3294     kvm_s390_clear_float_irqs(kvm);
3295     kvm_s390_vsie_destroy(kvm);
3296     KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
3297 }
3298 
3299 /* Section: vcpu related */
3300 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
3301 {
3302     vcpu->arch.gmap = gmap_create(current->mm, -1UL);
3303     if (!vcpu->arch.gmap)
3304         return -ENOMEM;
3305     vcpu->arch.gmap->private = vcpu->kvm;
3306 
3307     return 0;
3308 }
3309 
3310 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
3311 {
3312     if (!kvm_s390_use_sca_entries())
3313         return;
3314     read_lock(&vcpu->kvm->arch.sca_lock);
3315     if (vcpu->kvm->arch.use_esca) {
3316         struct esca_block *sca = vcpu->kvm->arch.sca;
3317 
3318         clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3319         sca->cpu[vcpu->vcpu_id].sda = 0;
3320     } else {
3321         struct bsca_block *sca = vcpu->kvm->arch.sca;
3322 
3323         clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3324         sca->cpu[vcpu->vcpu_id].sda = 0;
3325     }
3326     read_unlock(&vcpu->kvm->arch.sca_lock);
3327 }
3328 
3329 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
3330 {
3331     if (!kvm_s390_use_sca_entries()) {
3332         struct bsca_block *sca = vcpu->kvm->arch.sca;
3333 
3334         /* we still need the basic sca for the ipte control */
3335         vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3336         vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3337         return;
3338     }
3339     read_lock(&vcpu->kvm->arch.sca_lock);
3340     if (vcpu->kvm->arch.use_esca) {
3341         struct esca_block *sca = vcpu->kvm->arch.sca;
3342 
3343         sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3344         vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3345         vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
3346         vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3347         set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
3348     } else {
3349         struct bsca_block *sca = vcpu->kvm->arch.sca;
3350 
3351         sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
3352         vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
3353         vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
3354         set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
3355     }
3356     read_unlock(&vcpu->kvm->arch.sca_lock);
3357 }
3358 
3359 /* Basic SCA to Extended SCA data copy routines */
3360 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
3361 {
3362     d->sda = s->sda;
3363     d->sigp_ctrl.c = s->sigp_ctrl.c;
3364     d->sigp_ctrl.scn = s->sigp_ctrl.scn;
3365 }
3366 
3367 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
3368 {
3369     int i;
3370 
3371     d->ipte_control = s->ipte_control;
3372     d->mcn[0] = s->mcn;
3373     for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
3374         sca_copy_entry(&d->cpu[i], &s->cpu[i]);
3375 }
3376 
3377 static int sca_switch_to_extended(struct kvm *kvm)
3378 {
3379     struct bsca_block *old_sca = kvm->arch.sca;
3380     struct esca_block *new_sca;
3381     struct kvm_vcpu *vcpu;
3382     unsigned long vcpu_idx;
3383     u32 scaol, scaoh;
3384 
3385     if (kvm->arch.use_esca)
3386         return 0;
3387 
3388     new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3389     if (!new_sca)
3390         return -ENOMEM;
3391 
3392     scaoh = (u32)((u64)(new_sca) >> 32);
3393     scaol = (u32)(u64)(new_sca) & ~0x3fU;
3394 
3395     kvm_s390_vcpu_block_all(kvm);
3396     write_lock(&kvm->arch.sca_lock);
3397 
3398     sca_copy_b_to_e(new_sca, old_sca);
3399 
3400     kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3401         vcpu->arch.sie_block->scaoh = scaoh;
3402         vcpu->arch.sie_block->scaol = scaol;
3403         vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3404     }
3405     kvm->arch.sca = new_sca;
3406     kvm->arch.use_esca = 1;
3407 
3408     write_unlock(&kvm->arch.sca_lock);
3409     kvm_s390_vcpu_unblock_all(kvm);
3410 
3411     free_page((unsigned long)old_sca);
3412 
3413     VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3414          old_sca, kvm->arch.sca);
3415     return 0;
3416 }
3417 
3418 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3419 {
3420     int rc;
3421 
3422     if (!kvm_s390_use_sca_entries()) {
3423         if (id < KVM_MAX_VCPUS)
3424             return true;
3425         return false;
3426     }
3427     if (id < KVM_S390_BSCA_CPU_SLOTS)
3428         return true;
3429     if (!sclp.has_esca || !sclp.has_64bscao)
3430         return false;
3431 
3432     rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3433 
3434     return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3435 }
3436 
3437 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3438 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3439 {
3440     WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3441     raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3442     vcpu->arch.cputm_start = get_tod_clock_fast();
3443     raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3444 }
3445 
3446 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3447 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3448 {
3449     WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3450     raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3451     vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3452     vcpu->arch.cputm_start = 0;
3453     raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3454 }
3455 
3456 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3457 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3458 {
3459     WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3460     vcpu->arch.cputm_enabled = true;
3461     __start_cpu_timer_accounting(vcpu);
3462 }
3463 
3464 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3465 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3466 {
3467     WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3468     __stop_cpu_timer_accounting(vcpu);
3469     vcpu->arch.cputm_enabled = false;
3470 }
3471 
3472 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3473 {
3474     preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3475     __enable_cpu_timer_accounting(vcpu);
3476     preempt_enable();
3477 }
3478 
3479 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3480 {
3481     preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3482     __disable_cpu_timer_accounting(vcpu);
3483     preempt_enable();
3484 }
3485 
3486 /* set the cpu timer - may only be called from the VCPU thread itself */
3487 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3488 {
3489     preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3490     raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3491     if (vcpu->arch.cputm_enabled)
3492         vcpu->arch.cputm_start = get_tod_clock_fast();
3493     vcpu->arch.sie_block->cputm = cputm;
3494     raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3495     preempt_enable();
3496 }
3497 
3498 /* update and get the cpu timer - can also be called from other VCPU threads */
3499 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3500 {
3501     unsigned int seq;
3502     __u64 value;
3503 
3504     if (unlikely(!vcpu->arch.cputm_enabled))
3505         return vcpu->arch.sie_block->cputm;
3506 
3507     preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3508     do {
3509         seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3510         /*
3511          * If the writer would ever execute a read in the critical
3512          * section, e.g. in irq context, we have a deadlock.
3513          */
3514         WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3515         value = vcpu->arch.sie_block->cputm;
3516         /* if cputm_start is 0, accounting is being started/stopped */
3517         if (likely(vcpu->arch.cputm_start))
3518             value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3519     } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3520     preempt_enable();
3521     return value;
3522 }
3523 
3524 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3525 {
3526 
3527     gmap_enable(vcpu->arch.enabled_gmap);
3528     kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3529     if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3530         __start_cpu_timer_accounting(vcpu);
3531     vcpu->cpu = cpu;
3532 }
3533 
3534 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3535 {
3536     vcpu->cpu = -1;
3537     if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3538         __stop_cpu_timer_accounting(vcpu);
3539     kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3540     vcpu->arch.enabled_gmap = gmap_get_enabled();
3541     gmap_disable(vcpu->arch.enabled_gmap);
3542 
3543 }
3544 
3545 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3546 {
3547     mutex_lock(&vcpu->kvm->lock);
3548     preempt_disable();
3549     vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3550     vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3551     preempt_enable();
3552     mutex_unlock(&vcpu->kvm->lock);
3553     if (!kvm_is_ucontrol(vcpu->kvm)) {
3554         vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3555         sca_add_vcpu(vcpu);
3556     }
3557     if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3558         vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3559     /* make vcpu_load load the right gmap on the first trigger */
3560     vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3561 }
3562 
3563 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3564 {
3565     if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3566         test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3567         return true;
3568     return false;
3569 }
3570 
3571 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3572 {
3573     /* At least one ECC subfunction must be present */
3574     return kvm_has_pckmo_subfunc(kvm, 32) ||
3575            kvm_has_pckmo_subfunc(kvm, 33) ||
3576            kvm_has_pckmo_subfunc(kvm, 34) ||
3577            kvm_has_pckmo_subfunc(kvm, 40) ||
3578            kvm_has_pckmo_subfunc(kvm, 41);
3579 
3580 }
3581 
3582 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3583 {
3584     /*
3585      * If the AP instructions are not being interpreted and the MSAX3
3586      * facility is not configured for the guest, there is nothing to set up.
3587      */
3588     if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3589         return;
3590 
3591     vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3592     vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3593     vcpu->arch.sie_block->eca &= ~ECA_APIE;
3594     vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3595 
3596     if (vcpu->kvm->arch.crypto.apie)
3597         vcpu->arch.sie_block->eca |= ECA_APIE;
3598 
3599     /* Set up protected key support */
3600     if (vcpu->kvm->arch.crypto.aes_kw) {
3601         vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3602         /* ecc is also wrapped with AES key */
3603         if (kvm_has_pckmo_ecc(vcpu->kvm))
3604             vcpu->arch.sie_block->ecd |= ECD_ECC;
3605     }
3606 
3607     if (vcpu->kvm->arch.crypto.dea_kw)
3608         vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3609 }
3610 
3611 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3612 {
3613     free_page(vcpu->arch.sie_block->cbrlo);
3614     vcpu->arch.sie_block->cbrlo = 0;
3615 }
3616 
3617 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3618 {
3619     vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3620     if (!vcpu->arch.sie_block->cbrlo)
3621         return -ENOMEM;
3622     return 0;
3623 }
3624 
3625 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3626 {
3627     struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3628 
3629     vcpu->arch.sie_block->ibc = model->ibc;
3630     if (test_kvm_facility(vcpu->kvm, 7))
3631         vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3632 }
3633 
3634 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3635 {
3636     int rc = 0;
3637     u16 uvrc, uvrrc;
3638 
3639     atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3640                             CPUSTAT_SM |
3641                             CPUSTAT_STOPPED);
3642 
3643     if (test_kvm_facility(vcpu->kvm, 78))
3644         kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3645     else if (test_kvm_facility(vcpu->kvm, 8))
3646         kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3647 
3648     kvm_s390_vcpu_setup_model(vcpu);
3649 
3650     /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3651     if (MACHINE_HAS_ESOP)
3652         vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3653     if (test_kvm_facility(vcpu->kvm, 9))
3654         vcpu->arch.sie_block->ecb |= ECB_SRSI;
3655     if (test_kvm_facility(vcpu->kvm, 11))
3656         vcpu->arch.sie_block->ecb |= ECB_PTF;
3657     if (test_kvm_facility(vcpu->kvm, 73))
3658         vcpu->arch.sie_block->ecb |= ECB_TE;
3659     if (!kvm_is_ucontrol(vcpu->kvm))
3660         vcpu->arch.sie_block->ecb |= ECB_SPECI;
3661 
3662     if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3663         vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3664     if (test_kvm_facility(vcpu->kvm, 130))
3665         vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3666     vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3667     if (sclp.has_cei)
3668         vcpu->arch.sie_block->eca |= ECA_CEI;
3669     if (sclp.has_ib)
3670         vcpu->arch.sie_block->eca |= ECA_IB;
3671     if (sclp.has_siif)
3672         vcpu->arch.sie_block->eca |= ECA_SII;
3673     if (sclp.has_sigpif)
3674         vcpu->arch.sie_block->eca |= ECA_SIGPI;
3675     if (test_kvm_facility(vcpu->kvm, 129)) {
3676         vcpu->arch.sie_block->eca |= ECA_VX;
3677         vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3678     }
3679     if (test_kvm_facility(vcpu->kvm, 139))
3680         vcpu->arch.sie_block->ecd |= ECD_MEF;
3681     if (test_kvm_facility(vcpu->kvm, 156))
3682         vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3683     if (vcpu->arch.sie_block->gd) {
3684         vcpu->arch.sie_block->eca |= ECA_AIV;
3685         VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3686                vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3687     }
3688     vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3689                     | SDNXC;
3690     vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3691 
3692     if (sclp.has_kss)
3693         kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3694     else
3695         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3696 
3697     if (vcpu->kvm->arch.use_cmma) {
3698         rc = kvm_s390_vcpu_setup_cmma(vcpu);
3699         if (rc)
3700             return rc;
3701     }
3702     hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3703     vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3704 
3705     vcpu->arch.sie_block->hpid = HPID_KVM;
3706 
3707     kvm_s390_vcpu_crypto_setup(vcpu);
3708 
3709     kvm_s390_vcpu_pci_setup(vcpu);
3710 
3711     mutex_lock(&vcpu->kvm->lock);
3712     if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3713         rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3714         if (rc)
3715             kvm_s390_vcpu_unsetup_cmma(vcpu);
3716     }
3717     mutex_unlock(&vcpu->kvm->lock);
3718 
3719     return rc;
3720 }
3721 
3722 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3723 {
3724     if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3725         return -EINVAL;
3726     return 0;
3727 }
3728 
3729 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3730 {
3731     struct sie_page *sie_page;
3732     int rc;
3733 
3734     BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3735     sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3736     if (!sie_page)
3737         return -ENOMEM;
3738 
3739     vcpu->arch.sie_block = &sie_page->sie_block;
3740     vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3741 
3742     /* the real guest size will always be smaller than msl */
3743     vcpu->arch.sie_block->mso = 0;
3744     vcpu->arch.sie_block->msl = sclp.hamax;
3745 
3746     vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3747     spin_lock_init(&vcpu->arch.local_int.lock);
3748     vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3749     seqcount_init(&vcpu->arch.cputm_seqcount);
3750 
3751     vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3752     kvm_clear_async_pf_completion_queue(vcpu);
3753     vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3754                     KVM_SYNC_GPRS |
3755                     KVM_SYNC_ACRS |
3756                     KVM_SYNC_CRS |
3757                     KVM_SYNC_ARCH0 |
3758                     KVM_SYNC_PFAULT |
3759                     KVM_SYNC_DIAG318;
3760     kvm_s390_set_prefix(vcpu, 0);
3761     if (test_kvm_facility(vcpu->kvm, 64))
3762         vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3763     if (test_kvm_facility(vcpu->kvm, 82))
3764         vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3765     if (test_kvm_facility(vcpu->kvm, 133))
3766         vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3767     if (test_kvm_facility(vcpu->kvm, 156))
3768         vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3769     /* fprs can be synchronized via vrs, even if the guest has no vx. With
3770      * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3771      */
3772     if (MACHINE_HAS_VX)
3773         vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3774     else
3775         vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3776 
3777     if (kvm_is_ucontrol(vcpu->kvm)) {
3778         rc = __kvm_ucontrol_vcpu_init(vcpu);
3779         if (rc)
3780             goto out_free_sie_block;
3781     }
3782 
3783     VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3784          vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3785     trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3786 
3787     rc = kvm_s390_vcpu_setup(vcpu);
3788     if (rc)
3789         goto out_ucontrol_uninit;
3790 
3791     kvm_s390_update_topology_change_report(vcpu->kvm, 1);
3792     return 0;
3793 
3794 out_ucontrol_uninit:
3795     if (kvm_is_ucontrol(vcpu->kvm))
3796         gmap_remove(vcpu->arch.gmap);
3797 out_free_sie_block:
3798     free_page((unsigned long)(vcpu->arch.sie_block));
3799     return rc;
3800 }
3801 
3802 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3803 {
3804     clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3805     return kvm_s390_vcpu_has_irq(vcpu, 0);
3806 }
3807 
3808 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3809 {
3810     return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3811 }
3812 
3813 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3814 {
3815     atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3816     exit_sie(vcpu);
3817 }
3818 
3819 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3820 {
3821     atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3822 }
3823 
3824 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3825 {
3826     atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3827     exit_sie(vcpu);
3828 }
3829 
3830 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3831 {
3832     return atomic_read(&vcpu->arch.sie_block->prog20) &
3833            (PROG_BLOCK_SIE | PROG_REQUEST);
3834 }
3835 
3836 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3837 {
3838     atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3839 }
3840 
3841 /*
3842  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3843  * If the CPU is not running (e.g. waiting as idle) the function will
3844  * return immediately. */
3845 void exit_sie(struct kvm_vcpu *vcpu)
3846 {
3847     kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3848     kvm_s390_vsie_kick(vcpu);
3849     while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3850         cpu_relax();
3851 }
3852 
3853 /* Kick a guest cpu out of SIE to process a request synchronously */
3854 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3855 {
3856     __kvm_make_request(req, vcpu);
3857     kvm_s390_vcpu_request(vcpu);
3858 }
3859 
3860 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3861                   unsigned long end)
3862 {
3863     struct kvm *kvm = gmap->private;
3864     struct kvm_vcpu *vcpu;
3865     unsigned long prefix;
3866     unsigned long i;
3867 
3868     if (gmap_is_shadow(gmap))
3869         return;
3870     if (start >= 1UL << 31)
3871         /* We are only interested in prefix pages */
3872         return;
3873     kvm_for_each_vcpu(i, vcpu, kvm) {
3874         /* match against both prefix pages */
3875         prefix = kvm_s390_get_prefix(vcpu);
3876         if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3877             VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3878                    start, end);
3879             kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3880         }
3881     }
3882 }
3883 
3884 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3885 {
3886     /* do not poll with more than halt_poll_max_steal percent of steal time */
3887     if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3888         READ_ONCE(halt_poll_max_steal)) {
3889         vcpu->stat.halt_no_poll_steal++;
3890         return true;
3891     }
3892     return false;
3893 }
3894 
3895 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3896 {
3897     /* kvm common code refers to this, but never calls it */
3898     BUG();
3899     return 0;
3900 }
3901 
3902 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3903                        struct kvm_one_reg *reg)
3904 {
3905     int r = -EINVAL;
3906 
3907     switch (reg->id) {
3908     case KVM_REG_S390_TODPR:
3909         r = put_user(vcpu->arch.sie_block->todpr,
3910                  (u32 __user *)reg->addr);
3911         break;
3912     case KVM_REG_S390_EPOCHDIFF:
3913         r = put_user(vcpu->arch.sie_block->epoch,
3914                  (u64 __user *)reg->addr);
3915         break;
3916     case KVM_REG_S390_CPU_TIMER:
3917         r = put_user(kvm_s390_get_cpu_timer(vcpu),
3918                  (u64 __user *)reg->addr);
3919         break;
3920     case KVM_REG_S390_CLOCK_COMP:
3921         r = put_user(vcpu->arch.sie_block->ckc,
3922                  (u64 __user *)reg->addr);
3923         break;
3924     case KVM_REG_S390_PFTOKEN:
3925         r = put_user(vcpu->arch.pfault_token,
3926                  (u64 __user *)reg->addr);
3927         break;
3928     case KVM_REG_S390_PFCOMPARE:
3929         r = put_user(vcpu->arch.pfault_compare,
3930                  (u64 __user *)reg->addr);
3931         break;
3932     case KVM_REG_S390_PFSELECT:
3933         r = put_user(vcpu->arch.pfault_select,
3934                  (u64 __user *)reg->addr);
3935         break;
3936     case KVM_REG_S390_PP:
3937         r = put_user(vcpu->arch.sie_block->pp,
3938                  (u64 __user *)reg->addr);
3939         break;
3940     case KVM_REG_S390_GBEA:
3941         r = put_user(vcpu->arch.sie_block->gbea,
3942                  (u64 __user *)reg->addr);
3943         break;
3944     default:
3945         break;
3946     }
3947 
3948     return r;
3949 }
3950 
3951 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3952                        struct kvm_one_reg *reg)
3953 {
3954     int r = -EINVAL;
3955     __u64 val;
3956 
3957     switch (reg->id) {
3958     case KVM_REG_S390_TODPR:
3959         r = get_user(vcpu->arch.sie_block->todpr,
3960                  (u32 __user *)reg->addr);
3961         break;
3962     case KVM_REG_S390_EPOCHDIFF:
3963         r = get_user(vcpu->arch.sie_block->epoch,
3964                  (u64 __user *)reg->addr);
3965         break;
3966     case KVM_REG_S390_CPU_TIMER:
3967         r = get_user(val, (u64 __user *)reg->addr);
3968         if (!r)
3969             kvm_s390_set_cpu_timer(vcpu, val);
3970         break;
3971     case KVM_REG_S390_CLOCK_COMP:
3972         r = get_user(vcpu->arch.sie_block->ckc,
3973                  (u64 __user *)reg->addr);
3974         break;
3975     case KVM_REG_S390_PFTOKEN:
3976         r = get_user(vcpu->arch.pfault_token,
3977                  (u64 __user *)reg->addr);
3978         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3979             kvm_clear_async_pf_completion_queue(vcpu);
3980         break;
3981     case KVM_REG_S390_PFCOMPARE:
3982         r = get_user(vcpu->arch.pfault_compare,
3983                  (u64 __user *)reg->addr);
3984         break;
3985     case KVM_REG_S390_PFSELECT:
3986         r = get_user(vcpu->arch.pfault_select,
3987                  (u64 __user *)reg->addr);
3988         break;
3989     case KVM_REG_S390_PP:
3990         r = get_user(vcpu->arch.sie_block->pp,
3991                  (u64 __user *)reg->addr);
3992         break;
3993     case KVM_REG_S390_GBEA:
3994         r = get_user(vcpu->arch.sie_block->gbea,
3995                  (u64 __user *)reg->addr);
3996         break;
3997     default:
3998         break;
3999     }
4000 
4001     return r;
4002 }
4003 
4004 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
4005 {
4006     vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
4007     vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
4008     memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
4009 
4010     kvm_clear_async_pf_completion_queue(vcpu);
4011     if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
4012         kvm_s390_vcpu_stop(vcpu);
4013     kvm_s390_clear_local_irqs(vcpu);
4014 }
4015 
4016 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
4017 {
4018     /* Initial reset is a superset of the normal reset */
4019     kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4020 
4021     /*
4022      * This equals initial cpu reset in pop, but we don't switch to ESA.
4023      * We do not only reset the internal data, but also ...
4024      */
4025     vcpu->arch.sie_block->gpsw.mask = 0;
4026     vcpu->arch.sie_block->gpsw.addr = 0;
4027     kvm_s390_set_prefix(vcpu, 0);
4028     kvm_s390_set_cpu_timer(vcpu, 0);
4029     vcpu->arch.sie_block->ckc = 0;
4030     memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
4031     vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
4032     vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
4033 
4034     /* ... the data in sync regs */
4035     memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
4036     vcpu->run->s.regs.ckc = 0;
4037     vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
4038     vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
4039     vcpu->run->psw_addr = 0;
4040     vcpu->run->psw_mask = 0;
4041     vcpu->run->s.regs.todpr = 0;
4042     vcpu->run->s.regs.cputm = 0;
4043     vcpu->run->s.regs.ckc = 0;
4044     vcpu->run->s.regs.pp = 0;
4045     vcpu->run->s.regs.gbea = 1;
4046     vcpu->run->s.regs.fpc = 0;
4047     /*
4048      * Do not reset these registers in the protected case, as some of
4049      * them are overlayed and they are not accessible in this case
4050      * anyway.
4051      */
4052     if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4053         vcpu->arch.sie_block->gbea = 1;
4054         vcpu->arch.sie_block->pp = 0;
4055         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4056         vcpu->arch.sie_block->todpr = 0;
4057     }
4058 }
4059 
4060 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
4061 {
4062     struct kvm_sync_regs *regs = &vcpu->run->s.regs;
4063 
4064     /* Clear reset is a superset of the initial reset */
4065     kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4066 
4067     memset(&regs->gprs, 0, sizeof(regs->gprs));
4068     memset(&regs->vrs, 0, sizeof(regs->vrs));
4069     memset(&regs->acrs, 0, sizeof(regs->acrs));
4070     memset(&regs->gscb, 0, sizeof(regs->gscb));
4071 
4072     regs->etoken = 0;
4073     regs->etoken_extension = 0;
4074 }
4075 
4076 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4077 {
4078     vcpu_load(vcpu);
4079     memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
4080     vcpu_put(vcpu);
4081     return 0;
4082 }
4083 
4084 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
4085 {
4086     vcpu_load(vcpu);
4087     memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
4088     vcpu_put(vcpu);
4089     return 0;
4090 }
4091 
4092 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4093                   struct kvm_sregs *sregs)
4094 {
4095     vcpu_load(vcpu);
4096 
4097     memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
4098     memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
4099 
4100     vcpu_put(vcpu);
4101     return 0;
4102 }
4103 
4104 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4105                   struct kvm_sregs *sregs)
4106 {
4107     vcpu_load(vcpu);
4108 
4109     memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
4110     memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
4111 
4112     vcpu_put(vcpu);
4113     return 0;
4114 }
4115 
4116 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4117 {
4118     int ret = 0;
4119 
4120     vcpu_load(vcpu);
4121 
4122     if (test_fp_ctl(fpu->fpc)) {
4123         ret = -EINVAL;
4124         goto out;
4125     }
4126     vcpu->run->s.regs.fpc = fpu->fpc;
4127     if (MACHINE_HAS_VX)
4128         convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
4129                  (freg_t *) fpu->fprs);
4130     else
4131         memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
4132 
4133 out:
4134     vcpu_put(vcpu);
4135     return ret;
4136 }
4137 
4138 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
4139 {
4140     vcpu_load(vcpu);
4141 
4142     /* make sure we have the latest values */
4143     save_fpu_regs();
4144     if (MACHINE_HAS_VX)
4145         convert_vx_to_fp((freg_t *) fpu->fprs,
4146                  (__vector128 *) vcpu->run->s.regs.vrs);
4147     else
4148         memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
4149     fpu->fpc = vcpu->run->s.regs.fpc;
4150 
4151     vcpu_put(vcpu);
4152     return 0;
4153 }
4154 
4155 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
4156 {
4157     int rc = 0;
4158 
4159     if (!is_vcpu_stopped(vcpu))
4160         rc = -EBUSY;
4161     else {
4162         vcpu->run->psw_mask = psw.mask;
4163         vcpu->run->psw_addr = psw.addr;
4164     }
4165     return rc;
4166 }
4167 
4168 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4169                   struct kvm_translation *tr)
4170 {
4171     return -EINVAL; /* not implemented yet */
4172 }
4173 
4174 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
4175                   KVM_GUESTDBG_USE_HW_BP | \
4176                   KVM_GUESTDBG_ENABLE)
4177 
4178 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
4179                     struct kvm_guest_debug *dbg)
4180 {
4181     int rc = 0;
4182 
4183     vcpu_load(vcpu);
4184 
4185     vcpu->guest_debug = 0;
4186     kvm_s390_clear_bp_data(vcpu);
4187 
4188     if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
4189         rc = -EINVAL;
4190         goto out;
4191     }
4192     if (!sclp.has_gpere) {
4193         rc = -EINVAL;
4194         goto out;
4195     }
4196 
4197     if (dbg->control & KVM_GUESTDBG_ENABLE) {
4198         vcpu->guest_debug = dbg->control;
4199         /* enforce guest PER */
4200         kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
4201 
4202         if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
4203             rc = kvm_s390_import_bp_data(vcpu, dbg);
4204     } else {
4205         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4206         vcpu->arch.guestdbg.last_bp = 0;
4207     }
4208 
4209     if (rc) {
4210         vcpu->guest_debug = 0;
4211         kvm_s390_clear_bp_data(vcpu);
4212         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
4213     }
4214 
4215 out:
4216     vcpu_put(vcpu);
4217     return rc;
4218 }
4219 
4220 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
4221                     struct kvm_mp_state *mp_state)
4222 {
4223     int ret;
4224 
4225     vcpu_load(vcpu);
4226 
4227     /* CHECK_STOP and LOAD are not supported yet */
4228     ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
4229                       KVM_MP_STATE_OPERATING;
4230 
4231     vcpu_put(vcpu);
4232     return ret;
4233 }
4234 
4235 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
4236                     struct kvm_mp_state *mp_state)
4237 {
4238     int rc = 0;
4239 
4240     vcpu_load(vcpu);
4241 
4242     /* user space knows about this interface - let it control the state */
4243     kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
4244 
4245     switch (mp_state->mp_state) {
4246     case KVM_MP_STATE_STOPPED:
4247         rc = kvm_s390_vcpu_stop(vcpu);
4248         break;
4249     case KVM_MP_STATE_OPERATING:
4250         rc = kvm_s390_vcpu_start(vcpu);
4251         break;
4252     case KVM_MP_STATE_LOAD:
4253         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
4254             rc = -ENXIO;
4255             break;
4256         }
4257         rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
4258         break;
4259     case KVM_MP_STATE_CHECK_STOP:
4260         fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
4261     default:
4262         rc = -ENXIO;
4263     }
4264 
4265     vcpu_put(vcpu);
4266     return rc;
4267 }
4268 
4269 static bool ibs_enabled(struct kvm_vcpu *vcpu)
4270 {
4271     return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
4272 }
4273 
4274 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
4275 {
4276 retry:
4277     kvm_s390_vcpu_request_handled(vcpu);
4278     if (!kvm_request_pending(vcpu))
4279         return 0;
4280     /*
4281      * If the guest prefix changed, re-arm the ipte notifier for the
4282      * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
4283      * This ensures that the ipte instruction for this request has
4284      * already finished. We might race against a second unmapper that
4285      * wants to set the blocking bit. Lets just retry the request loop.
4286      */
4287     if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
4288         int rc;
4289         rc = gmap_mprotect_notify(vcpu->arch.gmap,
4290                       kvm_s390_get_prefix(vcpu),
4291                       PAGE_SIZE * 2, PROT_WRITE);
4292         if (rc) {
4293             kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
4294             return rc;
4295         }
4296         goto retry;
4297     }
4298 
4299     if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
4300         vcpu->arch.sie_block->ihcpu = 0xffff;
4301         goto retry;
4302     }
4303 
4304     if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
4305         if (!ibs_enabled(vcpu)) {
4306             trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
4307             kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
4308         }
4309         goto retry;
4310     }
4311 
4312     if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
4313         if (ibs_enabled(vcpu)) {
4314             trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
4315             kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
4316         }
4317         goto retry;
4318     }
4319 
4320     if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
4321         vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
4322         goto retry;
4323     }
4324 
4325     if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
4326         /*
4327          * Disable CMM virtualization; we will emulate the ESSA
4328          * instruction manually, in order to provide additional
4329          * functionalities needed for live migration.
4330          */
4331         vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
4332         goto retry;
4333     }
4334 
4335     if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
4336         /*
4337          * Re-enable CMM virtualization if CMMA is available and
4338          * CMM has been used.
4339          */
4340         if ((vcpu->kvm->arch.use_cmma) &&
4341             (vcpu->kvm->mm->context.uses_cmm))
4342             vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
4343         goto retry;
4344     }
4345 
4346     /* nothing to do, just clear the request */
4347     kvm_clear_request(KVM_REQ_UNHALT, vcpu);
4348     /* we left the vsie handler, nothing to do, just clear the request */
4349     kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
4350 
4351     return 0;
4352 }
4353 
4354 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4355 {
4356     struct kvm_vcpu *vcpu;
4357     union tod_clock clk;
4358     unsigned long i;
4359 
4360     preempt_disable();
4361 
4362     store_tod_clock_ext(&clk);
4363 
4364     kvm->arch.epoch = gtod->tod - clk.tod;
4365     kvm->arch.epdx = 0;
4366     if (test_kvm_facility(kvm, 139)) {
4367         kvm->arch.epdx = gtod->epoch_idx - clk.ei;
4368         if (kvm->arch.epoch > gtod->tod)
4369             kvm->arch.epdx -= 1;
4370     }
4371 
4372     kvm_s390_vcpu_block_all(kvm);
4373     kvm_for_each_vcpu(i, vcpu, kvm) {
4374         vcpu->arch.sie_block->epoch = kvm->arch.epoch;
4375         vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
4376     }
4377 
4378     kvm_s390_vcpu_unblock_all(kvm);
4379     preempt_enable();
4380 }
4381 
4382 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4383 {
4384     mutex_lock(&kvm->lock);
4385     __kvm_s390_set_tod_clock(kvm, gtod);
4386     mutex_unlock(&kvm->lock);
4387 }
4388 
4389 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4390 {
4391     if (!mutex_trylock(&kvm->lock))
4392         return 0;
4393     __kvm_s390_set_tod_clock(kvm, gtod);
4394     mutex_unlock(&kvm->lock);
4395     return 1;
4396 }
4397 
4398 /**
4399  * kvm_arch_fault_in_page - fault-in guest page if necessary
4400  * @vcpu: The corresponding virtual cpu
4401  * @gpa: Guest physical address
4402  * @writable: Whether the page should be writable or not
4403  *
4404  * Make sure that a guest page has been faulted-in on the host.
4405  *
4406  * Return: Zero on success, negative error code otherwise.
4407  */
4408 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4409 {
4410     return gmap_fault(vcpu->arch.gmap, gpa,
4411               writable ? FAULT_FLAG_WRITE : 0);
4412 }
4413 
4414 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4415                       unsigned long token)
4416 {
4417     struct kvm_s390_interrupt inti;
4418     struct kvm_s390_irq irq;
4419 
4420     if (start_token) {
4421         irq.u.ext.ext_params2 = token;
4422         irq.type = KVM_S390_INT_PFAULT_INIT;
4423         WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4424     } else {
4425         inti.type = KVM_S390_INT_PFAULT_DONE;
4426         inti.parm64 = token;
4427         WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4428     }
4429 }
4430 
4431 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4432                      struct kvm_async_pf *work)
4433 {
4434     trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4435     __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4436 
4437     return true;
4438 }
4439 
4440 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4441                  struct kvm_async_pf *work)
4442 {
4443     trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4444     __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4445 }
4446 
4447 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4448                    struct kvm_async_pf *work)
4449 {
4450     /* s390 will always inject the page directly */
4451 }
4452 
4453 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4454 {
4455     /*
4456      * s390 will always inject the page directly,
4457      * but we still want check_async_completion to cleanup
4458      */
4459     return true;
4460 }
4461 
4462 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4463 {
4464     hva_t hva;
4465     struct kvm_arch_async_pf arch;
4466 
4467     if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4468         return false;
4469     if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4470         vcpu->arch.pfault_compare)
4471         return false;
4472     if (psw_extint_disabled(vcpu))
4473         return false;
4474     if (kvm_s390_vcpu_has_irq(vcpu, 0))
4475         return false;
4476     if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4477         return false;
4478     if (!vcpu->arch.gmap->pfault_enabled)
4479         return false;
4480 
4481     hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4482     hva += current->thread.gmap_addr & ~PAGE_MASK;
4483     if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4484         return false;
4485 
4486     return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4487 }
4488 
4489 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4490 {
4491     int rc, cpuflags;
4492 
4493     /*
4494      * On s390 notifications for arriving pages will be delivered directly
4495      * to the guest but the house keeping for completed pfaults is
4496      * handled outside the worker.
4497      */
4498     kvm_check_async_pf_completion(vcpu);
4499 
4500     vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4501     vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4502 
4503     if (need_resched())
4504         schedule();
4505 
4506     if (!kvm_is_ucontrol(vcpu->kvm)) {
4507         rc = kvm_s390_deliver_pending_interrupts(vcpu);
4508         if (rc)
4509             return rc;
4510     }
4511 
4512     rc = kvm_s390_handle_requests(vcpu);
4513     if (rc)
4514         return rc;
4515 
4516     if (guestdbg_enabled(vcpu)) {
4517         kvm_s390_backup_guest_per_regs(vcpu);
4518         kvm_s390_patch_guest_per_regs(vcpu);
4519     }
4520 
4521     clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4522 
4523     vcpu->arch.sie_block->icptcode = 0;
4524     cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4525     VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4526     trace_kvm_s390_sie_enter(vcpu, cpuflags);
4527 
4528     return 0;
4529 }
4530 
4531 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4532 {
4533     struct kvm_s390_pgm_info pgm_info = {
4534         .code = PGM_ADDRESSING,
4535     };
4536     u8 opcode, ilen;
4537     int rc;
4538 
4539     VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4540     trace_kvm_s390_sie_fault(vcpu);
4541 
4542     /*
4543      * We want to inject an addressing exception, which is defined as a
4544      * suppressing or terminating exception. However, since we came here
4545      * by a DAT access exception, the PSW still points to the faulting
4546      * instruction since DAT exceptions are nullifying. So we've got
4547      * to look up the current opcode to get the length of the instruction
4548      * to be able to forward the PSW.
4549      */
4550     rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4551     ilen = insn_length(opcode);
4552     if (rc < 0) {
4553         return rc;
4554     } else if (rc) {
4555         /* Instruction-Fetching Exceptions - we can't detect the ilen.
4556          * Forward by arbitrary ilc, injection will take care of
4557          * nullification if necessary.
4558          */
4559         pgm_info = vcpu->arch.pgm;
4560         ilen = 4;
4561     }
4562     pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4563     kvm_s390_forward_psw(vcpu, ilen);
4564     return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4565 }
4566 
4567 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4568 {
4569     struct mcck_volatile_info *mcck_info;
4570     struct sie_page *sie_page;
4571 
4572     VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4573            vcpu->arch.sie_block->icptcode);
4574     trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4575 
4576     if (guestdbg_enabled(vcpu))
4577         kvm_s390_restore_guest_per_regs(vcpu);
4578 
4579     vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4580     vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4581 
4582     if (exit_reason == -EINTR) {
4583         VCPU_EVENT(vcpu, 3, "%s", "machine check");
4584         sie_page = container_of(vcpu->arch.sie_block,
4585                     struct sie_page, sie_block);
4586         mcck_info = &sie_page->mcck_info;
4587         kvm_s390_reinject_machine_check(vcpu, mcck_info);
4588         return 0;
4589     }
4590 
4591     if (vcpu->arch.sie_block->icptcode > 0) {
4592         int rc = kvm_handle_sie_intercept(vcpu);
4593 
4594         if (rc != -EOPNOTSUPP)
4595             return rc;
4596         vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4597         vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4598         vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4599         vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4600         return -EREMOTE;
4601     } else if (exit_reason != -EFAULT) {
4602         vcpu->stat.exit_null++;
4603         return 0;
4604     } else if (kvm_is_ucontrol(vcpu->kvm)) {
4605         vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4606         vcpu->run->s390_ucontrol.trans_exc_code =
4607                         current->thread.gmap_addr;
4608         vcpu->run->s390_ucontrol.pgm_code = 0x10;
4609         return -EREMOTE;
4610     } else if (current->thread.gmap_pfault) {
4611         trace_kvm_s390_major_guest_pfault(vcpu);
4612         current->thread.gmap_pfault = 0;
4613         if (kvm_arch_setup_async_pf(vcpu))
4614             return 0;
4615         vcpu->stat.pfault_sync++;
4616         return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4617     }
4618     return vcpu_post_run_fault_in_sie(vcpu);
4619 }
4620 
4621 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4622 static int __vcpu_run(struct kvm_vcpu *vcpu)
4623 {
4624     int rc, exit_reason;
4625     struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4626 
4627     /*
4628      * We try to hold kvm->srcu during most of vcpu_run (except when run-
4629      * ning the guest), so that memslots (and other stuff) are protected
4630      */
4631     kvm_vcpu_srcu_read_lock(vcpu);
4632 
4633     do {
4634         rc = vcpu_pre_run(vcpu);
4635         if (rc)
4636             break;
4637 
4638         kvm_vcpu_srcu_read_unlock(vcpu);
4639         /*
4640          * As PF_VCPU will be used in fault handler, between
4641          * guest_enter and guest_exit should be no uaccess.
4642          */
4643         local_irq_disable();
4644         guest_enter_irqoff();
4645         __disable_cpu_timer_accounting(vcpu);
4646         local_irq_enable();
4647         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4648             memcpy(sie_page->pv_grregs,
4649                    vcpu->run->s.regs.gprs,
4650                    sizeof(sie_page->pv_grregs));
4651         }
4652         if (test_cpu_flag(CIF_FPU))
4653             load_fpu_regs();
4654         exit_reason = sie64a(vcpu->arch.sie_block,
4655                      vcpu->run->s.regs.gprs);
4656         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4657             memcpy(vcpu->run->s.regs.gprs,
4658                    sie_page->pv_grregs,
4659                    sizeof(sie_page->pv_grregs));
4660             /*
4661              * We're not allowed to inject interrupts on intercepts
4662              * that leave the guest state in an "in-between" state
4663              * where the next SIE entry will do a continuation.
4664              * Fence interrupts in our "internal" PSW.
4665              */
4666             if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4667                 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4668                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4669             }
4670         }
4671         local_irq_disable();
4672         __enable_cpu_timer_accounting(vcpu);
4673         guest_exit_irqoff();
4674         local_irq_enable();
4675         kvm_vcpu_srcu_read_lock(vcpu);
4676 
4677         rc = vcpu_post_run(vcpu, exit_reason);
4678     } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4679 
4680     kvm_vcpu_srcu_read_unlock(vcpu);
4681     return rc;
4682 }
4683 
4684 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4685 {
4686     struct kvm_run *kvm_run = vcpu->run;
4687     struct runtime_instr_cb *riccb;
4688     struct gs_cb *gscb;
4689 
4690     riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4691     gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4692     vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4693     vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4694     if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4695         vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4696         vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4697         vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4698     }
4699     if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4700         vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4701         vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4702         vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4703         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4704             kvm_clear_async_pf_completion_queue(vcpu);
4705     }
4706     if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4707         vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4708         vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4709         VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4710     }
4711     /*
4712      * If userspace sets the riccb (e.g. after migration) to a valid state,
4713      * we should enable RI here instead of doing the lazy enablement.
4714      */
4715     if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4716         test_kvm_facility(vcpu->kvm, 64) &&
4717         riccb->v &&
4718         !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4719         VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4720         vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4721     }
4722     /*
4723      * If userspace sets the gscb (e.g. after migration) to non-zero,
4724      * we should enable GS here instead of doing the lazy enablement.
4725      */
4726     if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4727         test_kvm_facility(vcpu->kvm, 133) &&
4728         gscb->gssm &&
4729         !vcpu->arch.gs_enabled) {
4730         VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4731         vcpu->arch.sie_block->ecb |= ECB_GS;
4732         vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4733         vcpu->arch.gs_enabled = 1;
4734     }
4735     if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4736         test_kvm_facility(vcpu->kvm, 82)) {
4737         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4738         vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4739     }
4740     if (MACHINE_HAS_GS) {
4741         preempt_disable();
4742         __ctl_set_bit(2, 4);
4743         if (current->thread.gs_cb) {
4744             vcpu->arch.host_gscb = current->thread.gs_cb;
4745             save_gs_cb(vcpu->arch.host_gscb);
4746         }
4747         if (vcpu->arch.gs_enabled) {
4748             current->thread.gs_cb = (struct gs_cb *)
4749                         &vcpu->run->s.regs.gscb;
4750             restore_gs_cb(current->thread.gs_cb);
4751         }
4752         preempt_enable();
4753     }
4754     /* SIE will load etoken directly from SDNX and therefore kvm_run */
4755 }
4756 
4757 static void sync_regs(struct kvm_vcpu *vcpu)
4758 {
4759     struct kvm_run *kvm_run = vcpu->run;
4760 
4761     if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4762         kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4763     if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4764         memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4765         /* some control register changes require a tlb flush */
4766         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4767     }
4768     if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4769         kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4770         vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4771     }
4772     save_access_regs(vcpu->arch.host_acrs);
4773     restore_access_regs(vcpu->run->s.regs.acrs);
4774     /* save host (userspace) fprs/vrs */
4775     save_fpu_regs();
4776     vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4777     vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4778     if (MACHINE_HAS_VX)
4779         current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4780     else
4781         current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4782     current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4783     if (test_fp_ctl(current->thread.fpu.fpc))
4784         /* User space provided an invalid FPC, let's clear it */
4785         current->thread.fpu.fpc = 0;
4786 
4787     /* Sync fmt2 only data */
4788     if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4789         sync_regs_fmt2(vcpu);
4790     } else {
4791         /*
4792          * In several places we have to modify our internal view to
4793          * not do things that are disallowed by the ultravisor. For
4794          * example we must not inject interrupts after specific exits
4795          * (e.g. 112 prefix page not secure). We do this by turning
4796          * off the machine check, external and I/O interrupt bits
4797          * of our PSW copy. To avoid getting validity intercepts, we
4798          * do only accept the condition code from userspace.
4799          */
4800         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4801         vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4802                            PSW_MASK_CC;
4803     }
4804 
4805     kvm_run->kvm_dirty_regs = 0;
4806 }
4807 
4808 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4809 {
4810     struct kvm_run *kvm_run = vcpu->run;
4811 
4812     kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4813     kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4814     kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4815     kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4816     kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4817     if (MACHINE_HAS_GS) {
4818         preempt_disable();
4819         __ctl_set_bit(2, 4);
4820         if (vcpu->arch.gs_enabled)
4821             save_gs_cb(current->thread.gs_cb);
4822         current->thread.gs_cb = vcpu->arch.host_gscb;
4823         restore_gs_cb(vcpu->arch.host_gscb);
4824         if (!vcpu->arch.host_gscb)
4825             __ctl_clear_bit(2, 4);
4826         vcpu->arch.host_gscb = NULL;
4827         preempt_enable();
4828     }
4829     /* SIE will save etoken directly into SDNX and therefore kvm_run */
4830 }
4831 
4832 static void store_regs(struct kvm_vcpu *vcpu)
4833 {
4834     struct kvm_run *kvm_run = vcpu->run;
4835 
4836     kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4837     kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4838     kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4839     memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4840     kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4841     kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4842     kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4843     kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4844     kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4845     save_access_regs(vcpu->run->s.regs.acrs);
4846     restore_access_regs(vcpu->arch.host_acrs);
4847     /* Save guest register state */
4848     save_fpu_regs();
4849     vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4850     /* Restore will be done lazily at return */
4851     current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4852     current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4853     if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4854         store_regs_fmt2(vcpu);
4855 }
4856 
4857 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4858 {
4859     struct kvm_run *kvm_run = vcpu->run;
4860     int rc;
4861 
4862     /*
4863      * Running a VM while dumping always has the potential to
4864      * produce inconsistent dump data. But for PV vcpus a SIE
4865      * entry while dumping could also lead to a fatal validity
4866      * intercept which we absolutely want to avoid.
4867      */
4868     if (vcpu->kvm->arch.pv.dumping)
4869         return -EINVAL;
4870 
4871     if (kvm_run->immediate_exit)
4872         return -EINTR;
4873 
4874     if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4875         kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4876         return -EINVAL;
4877 
4878     vcpu_load(vcpu);
4879 
4880     if (guestdbg_exit_pending(vcpu)) {
4881         kvm_s390_prepare_debug_exit(vcpu);
4882         rc = 0;
4883         goto out;
4884     }
4885 
4886     kvm_sigset_activate(vcpu);
4887 
4888     /*
4889      * no need to check the return value of vcpu_start as it can only have
4890      * an error for protvirt, but protvirt means user cpu state
4891      */
4892     if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4893         kvm_s390_vcpu_start(vcpu);
4894     } else if (is_vcpu_stopped(vcpu)) {
4895         pr_err_ratelimited("can't run stopped vcpu %d\n",
4896                    vcpu->vcpu_id);
4897         rc = -EINVAL;
4898         goto out;
4899     }
4900 
4901     sync_regs(vcpu);
4902     enable_cpu_timer_accounting(vcpu);
4903 
4904     might_fault();
4905     rc = __vcpu_run(vcpu);
4906 
4907     if (signal_pending(current) && !rc) {
4908         kvm_run->exit_reason = KVM_EXIT_INTR;
4909         rc = -EINTR;
4910     }
4911 
4912     if (guestdbg_exit_pending(vcpu) && !rc)  {
4913         kvm_s390_prepare_debug_exit(vcpu);
4914         rc = 0;
4915     }
4916 
4917     if (rc == -EREMOTE) {
4918         /* userspace support is needed, kvm_run has been prepared */
4919         rc = 0;
4920     }
4921 
4922     disable_cpu_timer_accounting(vcpu);
4923     store_regs(vcpu);
4924 
4925     kvm_sigset_deactivate(vcpu);
4926 
4927     vcpu->stat.exit_userspace++;
4928 out:
4929     vcpu_put(vcpu);
4930     return rc;
4931 }
4932 
4933 /*
4934  * store status at address
4935  * we use have two special cases:
4936  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4937  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4938  */
4939 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4940 {
4941     unsigned char archmode = 1;
4942     freg_t fprs[NUM_FPRS];
4943     unsigned int px;
4944     u64 clkcomp, cputm;
4945     int rc;
4946 
4947     px = kvm_s390_get_prefix(vcpu);
4948     if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4949         if (write_guest_abs(vcpu, 163, &archmode, 1))
4950             return -EFAULT;
4951         gpa = 0;
4952     } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4953         if (write_guest_real(vcpu, 163, &archmode, 1))
4954             return -EFAULT;
4955         gpa = px;
4956     } else
4957         gpa -= __LC_FPREGS_SAVE_AREA;
4958 
4959     /* manually convert vector registers if necessary */
4960     if (MACHINE_HAS_VX) {
4961         convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4962         rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4963                      fprs, 128);
4964     } else {
4965         rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4966                      vcpu->run->s.regs.fprs, 128);
4967     }
4968     rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4969                   vcpu->run->s.regs.gprs, 128);
4970     rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4971                   &vcpu->arch.sie_block->gpsw, 16);
4972     rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4973                   &px, 4);
4974     rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4975                   &vcpu->run->s.regs.fpc, 4);
4976     rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4977                   &vcpu->arch.sie_block->todpr, 4);
4978     cputm = kvm_s390_get_cpu_timer(vcpu);
4979     rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4980                   &cputm, 8);
4981     clkcomp = vcpu->arch.sie_block->ckc >> 8;
4982     rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4983                   &clkcomp, 8);
4984     rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4985                   &vcpu->run->s.regs.acrs, 64);
4986     rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4987                   &vcpu->arch.sie_block->gcr, 128);
4988     return rc ? -EFAULT : 0;
4989 }
4990 
4991 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4992 {
4993     /*
4994      * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4995      * switch in the run ioctl. Let's update our copies before we save
4996      * it into the save area
4997      */
4998     save_fpu_regs();
4999     vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
5000     save_access_regs(vcpu->run->s.regs.acrs);
5001 
5002     return kvm_s390_store_status_unloaded(vcpu, addr);
5003 }
5004 
5005 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5006 {
5007     kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
5008     kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
5009 }
5010 
5011 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
5012 {
5013     unsigned long i;
5014     struct kvm_vcpu *vcpu;
5015 
5016     kvm_for_each_vcpu(i, vcpu, kvm) {
5017         __disable_ibs_on_vcpu(vcpu);
5018     }
5019 }
5020 
5021 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
5022 {
5023     if (!sclp.has_ibs)
5024         return;
5025     kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
5026     kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
5027 }
5028 
5029 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
5030 {
5031     int i, online_vcpus, r = 0, started_vcpus = 0;
5032 
5033     if (!is_vcpu_stopped(vcpu))
5034         return 0;
5035 
5036     trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
5037     /* Only one cpu at a time may enter/leave the STOPPED state. */
5038     spin_lock(&vcpu->kvm->arch.start_stop_lock);
5039     online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5040 
5041     /* Let's tell the UV that we want to change into the operating state */
5042     if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5043         r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
5044         if (r) {
5045             spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5046             return r;
5047         }
5048     }
5049 
5050     for (i = 0; i < online_vcpus; i++) {
5051         if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
5052             started_vcpus++;
5053     }
5054 
5055     if (started_vcpus == 0) {
5056         /* we're the only active VCPU -> speed it up */
5057         __enable_ibs_on_vcpu(vcpu);
5058     } else if (started_vcpus == 1) {
5059         /*
5060          * As we are starting a second VCPU, we have to disable
5061          * the IBS facility on all VCPUs to remove potentially
5062          * outstanding ENABLE requests.
5063          */
5064         __disable_ibs_on_all_vcpus(vcpu->kvm);
5065     }
5066 
5067     kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
5068     /*
5069      * The real PSW might have changed due to a RESTART interpreted by the
5070      * ultravisor. We block all interrupts and let the next sie exit
5071      * refresh our view.
5072      */
5073     if (kvm_s390_pv_cpu_is_protected(vcpu))
5074         vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
5075     /*
5076      * Another VCPU might have used IBS while we were offline.
5077      * Let's play safe and flush the VCPU at startup.
5078      */
5079     kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5080     spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5081     return 0;
5082 }
5083 
5084 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
5085 {
5086     int i, online_vcpus, r = 0, started_vcpus = 0;
5087     struct kvm_vcpu *started_vcpu = NULL;
5088 
5089     if (is_vcpu_stopped(vcpu))
5090         return 0;
5091 
5092     trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
5093     /* Only one cpu at a time may enter/leave the STOPPED state. */
5094     spin_lock(&vcpu->kvm->arch.start_stop_lock);
5095     online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
5096 
5097     /* Let's tell the UV that we want to change into the stopped state */
5098     if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5099         r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
5100         if (r) {
5101             spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5102             return r;
5103         }
5104     }
5105 
5106     /*
5107      * Set the VCPU to STOPPED and THEN clear the interrupt flag,
5108      * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
5109      * have been fully processed. This will ensure that the VCPU
5110      * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
5111      */
5112     kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
5113     kvm_s390_clear_stop_irq(vcpu);
5114 
5115     __disable_ibs_on_vcpu(vcpu);
5116 
5117     for (i = 0; i < online_vcpus; i++) {
5118         struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
5119 
5120         if (!is_vcpu_stopped(tmp)) {
5121             started_vcpus++;
5122             started_vcpu = tmp;
5123         }
5124     }
5125 
5126     if (started_vcpus == 1) {
5127         /*
5128          * As we only have one VCPU left, we want to enable the
5129          * IBS facility for that VCPU to speed it up.
5130          */
5131         __enable_ibs_on_vcpu(started_vcpu);
5132     }
5133 
5134     spin_unlock(&vcpu->kvm->arch.start_stop_lock);
5135     return 0;
5136 }
5137 
5138 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
5139                      struct kvm_enable_cap *cap)
5140 {
5141     int r;
5142 
5143     if (cap->flags)
5144         return -EINVAL;
5145 
5146     switch (cap->cap) {
5147     case KVM_CAP_S390_CSS_SUPPORT:
5148         if (!vcpu->kvm->arch.css_support) {
5149             vcpu->kvm->arch.css_support = 1;
5150             VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
5151             trace_kvm_s390_enable_css(vcpu->kvm);
5152         }
5153         r = 0;
5154         break;
5155     default:
5156         r = -EINVAL;
5157         break;
5158     }
5159     return r;
5160 }
5161 
5162 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
5163                   struct kvm_s390_mem_op *mop)
5164 {
5165     void __user *uaddr = (void __user *)mop->buf;
5166     int r = 0;
5167 
5168     if (mop->flags || !mop->size)
5169         return -EINVAL;
5170     if (mop->size + mop->sida_offset < mop->size)
5171         return -EINVAL;
5172     if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
5173         return -E2BIG;
5174     if (!kvm_s390_pv_cpu_is_protected(vcpu))
5175         return -EINVAL;
5176 
5177     switch (mop->op) {
5178     case KVM_S390_MEMOP_SIDA_READ:
5179         if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
5180                  mop->sida_offset), mop->size))
5181             r = -EFAULT;
5182 
5183         break;
5184     case KVM_S390_MEMOP_SIDA_WRITE:
5185         if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
5186                    mop->sida_offset), uaddr, mop->size))
5187             r = -EFAULT;
5188         break;
5189     }
5190     return r;
5191 }
5192 
5193 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
5194                  struct kvm_s390_mem_op *mop)
5195 {
5196     void __user *uaddr = (void __user *)mop->buf;
5197     void *tmpbuf = NULL;
5198     int r = 0;
5199     const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
5200                     | KVM_S390_MEMOP_F_CHECK_ONLY
5201                     | KVM_S390_MEMOP_F_SKEY_PROTECTION;
5202 
5203     if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
5204         return -EINVAL;
5205     if (mop->size > MEM_OP_MAX_SIZE)
5206         return -E2BIG;
5207     if (kvm_s390_pv_cpu_is_protected(vcpu))
5208         return -EINVAL;
5209     if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
5210         if (access_key_invalid(mop->key))
5211             return -EINVAL;
5212     } else {
5213         mop->key = 0;
5214     }
5215     if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
5216         tmpbuf = vmalloc(mop->size);
5217         if (!tmpbuf)
5218             return -ENOMEM;
5219     }
5220 
5221     switch (mop->op) {
5222     case KVM_S390_MEMOP_LOGICAL_READ:
5223         if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5224             r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5225                         GACC_FETCH, mop->key);
5226             break;
5227         }
5228         r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5229                     mop->size, mop->key);
5230         if (r == 0) {
5231             if (copy_to_user(uaddr, tmpbuf, mop->size))
5232                 r = -EFAULT;
5233         }
5234         break;
5235     case KVM_S390_MEMOP_LOGICAL_WRITE:
5236         if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
5237             r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
5238                         GACC_STORE, mop->key);
5239             break;
5240         }
5241         if (copy_from_user(tmpbuf, uaddr, mop->size)) {
5242             r = -EFAULT;
5243             break;
5244         }
5245         r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
5246                      mop->size, mop->key);
5247         break;
5248     }
5249 
5250     if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
5251         kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
5252 
5253     vfree(tmpbuf);
5254     return r;
5255 }
5256 
5257 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
5258                      struct kvm_s390_mem_op *mop)
5259 {
5260     int r, srcu_idx;
5261 
5262     srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5263 
5264     switch (mop->op) {
5265     case KVM_S390_MEMOP_LOGICAL_READ:
5266     case KVM_S390_MEMOP_LOGICAL_WRITE:
5267         r = kvm_s390_vcpu_mem_op(vcpu, mop);
5268         break;
5269     case KVM_S390_MEMOP_SIDA_READ:
5270     case KVM_S390_MEMOP_SIDA_WRITE:
5271         /* we are locked against sida going away by the vcpu->mutex */
5272         r = kvm_s390_vcpu_sida_op(vcpu, mop);
5273         break;
5274     default:
5275         r = -EINVAL;
5276     }
5277 
5278     srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
5279     return r;
5280 }
5281 
5282 long kvm_arch_vcpu_async_ioctl(struct file *filp,
5283                    unsigned int ioctl, unsigned long arg)
5284 {
5285     struct kvm_vcpu *vcpu = filp->private_data;
5286     void __user *argp = (void __user *)arg;
5287 
5288     switch (ioctl) {
5289     case KVM_S390_IRQ: {
5290         struct kvm_s390_irq s390irq;
5291 
5292         if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
5293             return -EFAULT;
5294         return kvm_s390_inject_vcpu(vcpu, &s390irq);
5295     }
5296     case KVM_S390_INTERRUPT: {
5297         struct kvm_s390_interrupt s390int;
5298         struct kvm_s390_irq s390irq = {};
5299 
5300         if (copy_from_user(&s390int, argp, sizeof(s390int)))
5301             return -EFAULT;
5302         if (s390int_to_s390irq(&s390int, &s390irq))
5303             return -EINVAL;
5304         return kvm_s390_inject_vcpu(vcpu, &s390irq);
5305     }
5306     }
5307     return -ENOIOCTLCMD;
5308 }
5309 
5310 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
5311                     struct kvm_pv_cmd *cmd)
5312 {
5313     struct kvm_s390_pv_dmp dmp;
5314     void *data;
5315     int ret;
5316 
5317     /* Dump initialization is a prerequisite */
5318     if (!vcpu->kvm->arch.pv.dumping)
5319         return -EINVAL;
5320 
5321     if (copy_from_user(&dmp, (__u8 __user *)cmd->data, sizeof(dmp)))
5322         return -EFAULT;
5323 
5324     /* We only handle this subcmd right now */
5325     if (dmp.subcmd != KVM_PV_DUMP_CPU)
5326         return -EINVAL;
5327 
5328     /* CPU dump length is the same as create cpu storage donation. */
5329     if (dmp.buff_len != uv_info.guest_cpu_stor_len)
5330         return -EINVAL;
5331 
5332     data = kvzalloc(uv_info.guest_cpu_stor_len, GFP_KERNEL);
5333     if (!data)
5334         return -ENOMEM;
5335 
5336     ret = kvm_s390_pv_dump_cpu(vcpu, data, &cmd->rc, &cmd->rrc);
5337 
5338     VCPU_EVENT(vcpu, 3, "PROTVIRT DUMP CPU %d rc %x rrc %x",
5339            vcpu->vcpu_id, cmd->rc, cmd->rrc);
5340 
5341     if (ret)
5342         ret = -EINVAL;
5343 
5344     /* On success copy over the dump data */
5345     if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
5346         ret = -EFAULT;
5347 
5348     kvfree(data);
5349     return ret;
5350 }
5351 
5352 long kvm_arch_vcpu_ioctl(struct file *filp,
5353              unsigned int ioctl, unsigned long arg)
5354 {
5355     struct kvm_vcpu *vcpu = filp->private_data;
5356     void __user *argp = (void __user *)arg;
5357     int idx;
5358     long r;
5359     u16 rc, rrc;
5360 
5361     vcpu_load(vcpu);
5362 
5363     switch (ioctl) {
5364     case KVM_S390_STORE_STATUS:
5365         idx = srcu_read_lock(&vcpu->kvm->srcu);
5366         r = kvm_s390_store_status_unloaded(vcpu, arg);
5367         srcu_read_unlock(&vcpu->kvm->srcu, idx);
5368         break;
5369     case KVM_S390_SET_INITIAL_PSW: {
5370         psw_t psw;
5371 
5372         r = -EFAULT;
5373         if (copy_from_user(&psw, argp, sizeof(psw)))
5374             break;
5375         r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
5376         break;
5377     }
5378     case KVM_S390_CLEAR_RESET:
5379         r = 0;
5380         kvm_arch_vcpu_ioctl_clear_reset(vcpu);
5381         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5382             r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5383                       UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
5384             VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
5385                    rc, rrc);
5386         }
5387         break;
5388     case KVM_S390_INITIAL_RESET:
5389         r = 0;
5390         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
5391         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5392             r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5393                       UVC_CMD_CPU_RESET_INITIAL,
5394                       &rc, &rrc);
5395             VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
5396                    rc, rrc);
5397         }
5398         break;
5399     case KVM_S390_NORMAL_RESET:
5400         r = 0;
5401         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
5402         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
5403             r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
5404                       UVC_CMD_CPU_RESET, &rc, &rrc);
5405             VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
5406                    rc, rrc);
5407         }
5408         break;
5409     case KVM_SET_ONE_REG:
5410     case KVM_GET_ONE_REG: {
5411         struct kvm_one_reg reg;
5412         r = -EINVAL;
5413         if (kvm_s390_pv_cpu_is_protected(vcpu))
5414             break;
5415         r = -EFAULT;
5416         if (copy_from_user(&reg, argp, sizeof(reg)))
5417             break;
5418         if (ioctl == KVM_SET_ONE_REG)
5419             r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
5420         else
5421             r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
5422         break;
5423     }
5424 #ifdef CONFIG_KVM_S390_UCONTROL
5425     case KVM_S390_UCAS_MAP: {
5426         struct kvm_s390_ucas_mapping ucasmap;
5427 
5428         if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5429             r = -EFAULT;
5430             break;
5431         }
5432 
5433         if (!kvm_is_ucontrol(vcpu->kvm)) {
5434             r = -EINVAL;
5435             break;
5436         }
5437 
5438         r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5439                      ucasmap.vcpu_addr, ucasmap.length);
5440         break;
5441     }
5442     case KVM_S390_UCAS_UNMAP: {
5443         struct kvm_s390_ucas_mapping ucasmap;
5444 
5445         if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5446             r = -EFAULT;
5447             break;
5448         }
5449 
5450         if (!kvm_is_ucontrol(vcpu->kvm)) {
5451             r = -EINVAL;
5452             break;
5453         }
5454 
5455         r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5456             ucasmap.length);
5457         break;
5458     }
5459 #endif
5460     case KVM_S390_VCPU_FAULT: {
5461         r = gmap_fault(vcpu->arch.gmap, arg, 0);
5462         break;
5463     }
5464     case KVM_ENABLE_CAP:
5465     {
5466         struct kvm_enable_cap cap;
5467         r = -EFAULT;
5468         if (copy_from_user(&cap, argp, sizeof(cap)))
5469             break;
5470         r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5471         break;
5472     }
5473     case KVM_S390_MEM_OP: {
5474         struct kvm_s390_mem_op mem_op;
5475 
5476         if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5477             r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5478         else
5479             r = -EFAULT;
5480         break;
5481     }
5482     case KVM_S390_SET_IRQ_STATE: {
5483         struct kvm_s390_irq_state irq_state;
5484 
5485         r = -EFAULT;
5486         if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5487             break;
5488         if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5489             irq_state.len == 0 ||
5490             irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5491             r = -EINVAL;
5492             break;
5493         }
5494         /* do not use irq_state.flags, it will break old QEMUs */
5495         r = kvm_s390_set_irq_state(vcpu,
5496                        (void __user *) irq_state.buf,
5497                        irq_state.len);
5498         break;
5499     }
5500     case KVM_S390_GET_IRQ_STATE: {
5501         struct kvm_s390_irq_state irq_state;
5502 
5503         r = -EFAULT;
5504         if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5505             break;
5506         if (irq_state.len == 0) {
5507             r = -EINVAL;
5508             break;
5509         }
5510         /* do not use irq_state.flags, it will break old QEMUs */
5511         r = kvm_s390_get_irq_state(vcpu,
5512                        (__u8 __user *)  irq_state.buf,
5513                        irq_state.len);
5514         break;
5515     }
5516     case KVM_S390_PV_CPU_COMMAND: {
5517         struct kvm_pv_cmd cmd;
5518 
5519         r = -EINVAL;
5520         if (!is_prot_virt_host())
5521             break;
5522 
5523         r = -EFAULT;
5524         if (copy_from_user(&cmd, argp, sizeof(cmd)))
5525             break;
5526 
5527         r = -EINVAL;
5528         if (cmd.flags)
5529             break;
5530 
5531         /* We only handle this cmd right now */
5532         if (cmd.cmd != KVM_PV_DUMP)
5533             break;
5534 
5535         r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
5536 
5537         /* Always copy over UV rc / rrc data */
5538         if (copy_to_user((__u8 __user *)argp, &cmd.rc,
5539                  sizeof(cmd.rc) + sizeof(cmd.rrc)))
5540             r = -EFAULT;
5541         break;
5542     }
5543     default:
5544         r = -ENOTTY;
5545     }
5546 
5547     vcpu_put(vcpu);
5548     return r;
5549 }
5550 
5551 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5552 {
5553 #ifdef CONFIG_KVM_S390_UCONTROL
5554     if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5555          && (kvm_is_ucontrol(vcpu->kvm))) {
5556         vmf->page = virt_to_page(vcpu->arch.sie_block);
5557         get_page(vmf->page);
5558         return 0;
5559     }
5560 #endif
5561     return VM_FAULT_SIGBUS;
5562 }
5563 
5564 /* Section: memory related */
5565 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5566                    const struct kvm_memory_slot *old,
5567                    struct kvm_memory_slot *new,
5568                    enum kvm_mr_change change)
5569 {
5570     gpa_t size;
5571 
5572     /* When we are protected, we should not change the memory slots */
5573     if (kvm_s390_pv_get_handle(kvm))
5574         return -EINVAL;
5575 
5576     if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5577         return 0;
5578 
5579     /* A few sanity checks. We can have memory slots which have to be
5580        located/ended at a segment boundary (1MB). The memory in userland is
5581        ok to be fragmented into various different vmas. It is okay to mmap()
5582        and munmap() stuff in this slot after doing this call at any time */
5583 
5584     if (new->userspace_addr & 0xffffful)
5585         return -EINVAL;
5586 
5587     size = new->npages * PAGE_SIZE;
5588     if (size & 0xffffful)
5589         return -EINVAL;
5590 
5591     if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5592         return -EINVAL;
5593 
5594     return 0;
5595 }
5596 
5597 void kvm_arch_commit_memory_region(struct kvm *kvm,
5598                 struct kvm_memory_slot *old,
5599                 const struct kvm_memory_slot *new,
5600                 enum kvm_mr_change change)
5601 {
5602     int rc = 0;
5603 
5604     switch (change) {
5605     case KVM_MR_DELETE:
5606         rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5607                     old->npages * PAGE_SIZE);
5608         break;
5609     case KVM_MR_MOVE:
5610         rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5611                     old->npages * PAGE_SIZE);
5612         if (rc)
5613             break;
5614         fallthrough;
5615     case KVM_MR_CREATE:
5616         rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5617                       new->base_gfn * PAGE_SIZE,
5618                       new->npages * PAGE_SIZE);
5619         break;
5620     case KVM_MR_FLAGS_ONLY:
5621         break;
5622     default:
5623         WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5624     }
5625     if (rc)
5626         pr_warn("failed to commit memory region\n");
5627     return;
5628 }
5629 
5630 static inline unsigned long nonhyp_mask(int i)
5631 {
5632     unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5633 
5634     return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5635 }
5636 
5637 static int __init kvm_s390_init(void)
5638 {
5639     int i;
5640 
5641     if (!sclp.has_sief2) {
5642         pr_info("SIE is not available\n");
5643         return -ENODEV;
5644     }
5645 
5646     if (nested && hpage) {
5647         pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5648         return -EINVAL;
5649     }
5650 
5651     for (i = 0; i < 16; i++)
5652         kvm_s390_fac_base[i] |=
5653             stfle_fac_list[i] & nonhyp_mask(i);
5654 
5655     return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5656 }
5657 
5658 static void __exit kvm_s390_exit(void)
5659 {
5660     kvm_exit();
5661 }
5662 
5663 module_init(kvm_s390_init);
5664 module_exit(kvm_s390_exit);
5665 
5666 /*
5667  * Enable autoloading of the kvm module.
5668  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5669  * since x86 takes a different approach.
5670  */
5671 #include <linux/miscdevice.h>
5672 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5673 MODULE_ALIAS("devname:kvm");