s390/kvm/pv.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Hosting Protected Virtual Machines
0004  *
0005  * Copyright IBM Corp. 2019, 2020
0006  *    Author(s): Janosch Frank <frankja@linux.ibm.com>
0007  */
0008 #include <linux/kvm.h>
0009 #include <linux/kvm_host.h>
0010 #include <linux/minmax.h>
0011 #include <linux/pagemap.h>
0012 #include <linux/sched/signal.h>
0013 #include <asm/gmap.h>
0014 #include <asm/uv.h>
0015 #include <asm/mman.h>
0016 #include <linux/pagewalk.h>
0017 #include <linux/sched/mm.h>
0018 #include <linux/mmu_notifier.h>
0019 #include "kvm-s390.h"
0020
0021 static void kvm_s390_clear_pv_state(struct kvm *kvm)
0022 {
0023     kvm->arch.pv.handle = 0;
0024     kvm->arch.pv.guest_len = 0;
0025     kvm->arch.pv.stor_base = 0;
0026     kvm->arch.pv.stor_var = NULL;
0027 }
0028
0029 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
0030 {
0031     int cc;
0032
0033     if (!kvm_s390_pv_cpu_get_handle(vcpu))
0034         return 0;
0035
0036     cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
0037
0038     KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
0039              vcpu->vcpu_id, *rc, *rrc);
0040     WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
0041
0042     /* Intended memory leak for something that should never happen. */
0043     if (!cc)
0044         free_pages(vcpu->arch.pv.stor_base,
0045                get_order(uv_info.guest_cpu_stor_len));
0046
0047     free_page(sida_origin(vcpu->arch.sie_block));
0048     vcpu->arch.sie_block->pv_handle_cpu = 0;
0049     vcpu->arch.sie_block->pv_handle_config = 0;
0050     memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
0051     vcpu->arch.sie_block->sdf = 0;
0052     /*
0053      * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
0054      * Use the reset value of gbea to avoid leaking the kernel pointer of
0055      * the just freed sida.
0056      */
0057     vcpu->arch.sie_block->gbea = 1;
0058     kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
0059
0060     return cc ? EIO : 0;
0061 }
0062
0063 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
0064 {
0065     struct uv_cb_csc uvcb = {
0066         .header.cmd = UVC_CMD_CREATE_SEC_CPU,
0067         .header.len = sizeof(uvcb),
0068     };
0069     int cc;
0070
0071     if (kvm_s390_pv_cpu_get_handle(vcpu))
0072         return -EINVAL;
0073
0074     vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
0075                            get_order(uv_info.guest_cpu_stor_len));
0076     if (!vcpu->arch.pv.stor_base)
0077         return -ENOMEM;
0078
0079     /* Input */
0080     uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
0081     uvcb.num = vcpu->arch.sie_block->icpua;
0082     uvcb.state_origin = (u64)vcpu->arch.sie_block;
0083     uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
0084
0085     /* Alloc Secure Instruction Data Area Designation */
0086     vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
0087     if (!vcpu->arch.sie_block->sidad) {
0088         free_pages(vcpu->arch.pv.stor_base,
0089                get_order(uv_info.guest_cpu_stor_len));
0090         return -ENOMEM;
0091     }
0092
0093     cc = uv_call(0, (u64)&uvcb);
0094     *rc = uvcb.header.rc;
0095     *rrc = uvcb.header.rrc;
0096     KVM_UV_EVENT(vcpu->kvm, 3,
0097              "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
0098              vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
0099              uvcb.header.rrc);
0100
0101     if (cc) {
0102         u16 dummy;
0103
0104         kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
0105         return -EIO;
0106     }
0107
0108     /* Output */
0109     vcpu->arch.pv.handle = uvcb.cpu_handle;
0110     vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
0111     vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
0112     vcpu->arch.sie_block->sdf = 2;
0113     kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
0114     return 0;
0115 }
0116
0117 /* only free resources when the destroy was successful */
0118 static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
0119 {
0120     vfree(kvm->arch.pv.stor_var);
0121     free_pages(kvm->arch.pv.stor_base,
0122            get_order(uv_info.guest_base_stor_len));
0123     kvm_s390_clear_pv_state(kvm);
0124 }
0125
0126 static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
0127 {
0128     unsigned long base = uv_info.guest_base_stor_len;
0129     unsigned long virt = uv_info.guest_virt_var_stor_len;
0130     unsigned long npages = 0, vlen = 0;
0131
0132     kvm->arch.pv.stor_var = NULL;
0133     kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
0134     if (!kvm->arch.pv.stor_base)
0135         return -ENOMEM;
0136
0137     /*
0138      * Calculate current guest storage for allocation of the
0139      * variable storage, which is based on the length in MB.
0140      *
0141      * Slots are sorted by GFN
0142      */
0143     mutex_lock(&kvm->slots_lock);
0144     npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
0145     mutex_unlock(&kvm->slots_lock);
0146
0147     kvm->arch.pv.guest_len = npages * PAGE_SIZE;
0148
0149     /* Allocate variable storage */
0150     vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
0151     vlen += uv_info.guest_virt_base_stor_len;
0152     kvm->arch.pv.stor_var = vzalloc(vlen);
0153     if (!kvm->arch.pv.stor_var)
0154         goto out_err;
0155     return 0;
0156
0157 out_err:
0158     kvm_s390_pv_dealloc_vm(kvm);
0159     return -ENOMEM;
0160 }
0161
0162 /* this should not fail, but if it does, we must not free the donated memory */
0163 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
0164 {
0165     int cc;
0166
0167     cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
0168                UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
0169     WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
0170     /*
0171      * if the mm still has a mapping, make all its pages accessible
0172      * before destroying the guest
0173      */
0174     if (mmget_not_zero(kvm->mm)) {
0175         s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
0176         mmput(kvm->mm);
0177     }
0178
0179     if (!cc) {
0180         atomic_dec(&kvm->mm->context.protected_count);
0181         kvm_s390_pv_dealloc_vm(kvm);
0182     } else {
0183         /* Intended memory leak on "impossible" error */
0184         s390_replace_asce(kvm->arch.gmap);
0185     }
0186     KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
0187     WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
0188
0189     return cc ? -EIO : 0;
0190 }
0191
0192 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
0193                          struct mm_struct *mm)
0194 {
0195     struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
0196     u16 dummy;
0197
0198     /*
0199      * No locking is needed since this is the last thread of the last user of this
0200      * struct mm.
0201      * When the struct kvm gets deinitialized, this notifier is also
0202      * unregistered. This means that if this notifier runs, then the
0203      * struct kvm is still valid.
0204      */
0205     kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
0206 }
0207
0208 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
0209     .release = kvm_s390_pv_mmu_notifier_release,
0210 };
0211
0212 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
0213 {
0214     struct uv_cb_cgc uvcb = {
0215         .header.cmd = UVC_CMD_CREATE_SEC_CONF,
0216         .header.len = sizeof(uvcb)
0217     };
0218     int cc, ret;
0219     u16 dummy;
0220
0221     ret = kvm_s390_pv_alloc_vm(kvm);
0222     if (ret)
0223         return ret;
0224
0225     /* Inputs */
0226     uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
0227     uvcb.guest_stor_len = kvm->arch.pv.guest_len;
0228     uvcb.guest_asce = kvm->arch.gmap->asce;
0229     uvcb.guest_sca = (unsigned long)kvm->arch.sca;
0230     uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
0231     uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
0232
0233     cc = uv_call_sched(0, (u64)&uvcb);
0234     *rc = uvcb.header.rc;
0235     *rrc = uvcb.header.rrc;
0236     KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
0237              uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
0238
0239     /* Outputs */
0240     kvm->arch.pv.handle = uvcb.guest_handle;
0241
0242     atomic_inc(&kvm->mm->context.protected_count);
0243     if (cc) {
0244         if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
0245             kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
0246         } else {
0247             atomic_dec(&kvm->mm->context.protected_count);
0248             kvm_s390_pv_dealloc_vm(kvm);
0249         }
0250         return -EIO;
0251     }
0252     kvm->arch.gmap->guest_handle = uvcb.guest_handle;
0253     /* Add the notifier only once. No races because we hold kvm->lock */
0254     if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
0255         kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
0256         mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
0257     }
0258     return 0;
0259 }
0260
0261 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
0262                   u16 *rrc)
0263 {
0264     struct uv_cb_ssc uvcb = {
0265         .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
0266         .header.len = sizeof(uvcb),
0267         .sec_header_origin = (u64)hdr,
0268         .sec_header_len = length,
0269         .guest_handle = kvm_s390_pv_get_handle(kvm),
0270     };
0271     int cc = uv_call(0, (u64)&uvcb);
0272
0273     *rc = uvcb.header.rc;
0274     *rrc = uvcb.header.rrc;
0275     KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
0276              *rc, *rrc);
0277     return cc ? -EINVAL : 0;
0278 }
0279
0280 static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
0281               u64 offset, u16 *rc, u16 *rrc)
0282 {
0283     struct uv_cb_unp uvcb = {
0284         .header.cmd = UVC_CMD_UNPACK_IMG,
0285         .header.len = sizeof(uvcb),
0286         .guest_handle = kvm_s390_pv_get_handle(kvm),
0287         .gaddr = addr,
0288         .tweak[0] = tweak,
0289         .tweak[1] = offset,
0290     };
0291     int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
0292
0293     *rc = uvcb.header.rc;
0294     *rrc = uvcb.header.rrc;
0295
0296     if (ret && ret != -EAGAIN)
0297         KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
0298                  uvcb.gaddr, *rc, *rrc);
0299     return ret;
0300 }
0301
0302 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
0303                unsigned long tweak, u16 *rc, u16 *rrc)
0304 {
0305     u64 offset = 0;
0306     int ret = 0;
0307
0308     if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
0309         return -EINVAL;
0310
0311     KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
0312              addr, size);
0313
0314     while (offset < size) {
0315         ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
0316         if (ret == -EAGAIN) {
0317             cond_resched();
0318             if (fatal_signal_pending(current))
0319                 break;
0320             continue;
0321         }
0322         if (ret)
0323             break;
0324         addr += PAGE_SIZE;
0325         offset += PAGE_SIZE;
0326     }
0327     if (!ret)
0328         KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
0329     return ret;
0330 }
0331
0332 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
0333 {
0334     struct uv_cb_cpu_set_state uvcb = {
0335         .header.cmd = UVC_CMD_CPU_SET_STATE,
0336         .header.len = sizeof(uvcb),
0337         .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
0338         .state      = state,
0339     };
0340     int cc;
0341
0342     cc = uv_call(0, (u64)&uvcb);
0343     KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
0344              vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
0345     if (cc)
0346         return -EINVAL;
0347     return 0;
0348 }
0349
0350 int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
0351 {
0352     struct uv_cb_dump_cpu uvcb = {
0353         .header.cmd = UVC_CMD_DUMP_CPU,
0354         .header.len = sizeof(uvcb),
0355         .cpu_handle = vcpu->arch.pv.handle,
0356         .dump_area_origin = (u64)buff,
0357     };
0358     int cc;
0359
0360     cc = uv_call_sched(0, (u64)&uvcb);
0361     *rc = uvcb.header.rc;
0362     *rrc = uvcb.header.rrc;
0363     return cc;
0364 }
0365
0366 /* Size of the cache for the storage state dump data. 1MB for now */
0367 #define DUMP_BUFF_LEN HPAGE_SIZE
0368
0369 /**
0370  * kvm_s390_pv_dump_stor_state
0371  *
0372  * @kvm: pointer to the guest's KVM struct
0373  * @buff_user: Userspace pointer where we will write the results to
0374  * @gaddr: Starting absolute guest address for which the storage state
0375  *     is requested.
0376  * @buff_user_len: Length of the buff_user buffer
0377  * @rc: Pointer to where the uvcb return code is stored
0378  * @rrc: Pointer to where the uvcb return reason code is stored
0379  *
0380  * Stores buff_len bytes of tweak component values to buff_user
0381  * starting with the 1MB block specified by the absolute guest address
0382  * (gaddr). The gaddr pointer will be updated with the last address
0383  * for which data was written when returning to userspace. buff_user
0384  * might be written to even if an error rc is returned. For instance
0385  * if we encounter a fault after writing the first page of data.
0386  *
0387  * Context: kvm->lock needs to be held
0388  *
0389  * Return:
0390  *  0 on success
0391  *  -ENOMEM if allocating the cache fails
0392  *  -EINVAL if gaddr is not aligned to 1MB
0393  *  -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
0394  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
0395  *  -EFAULT if copying the result to buff_user failed
0396  */
0397 int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
0398                 u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
0399 {
0400     struct uv_cb_dump_stor_state uvcb = {
0401         .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
0402         .header.len = sizeof(uvcb),
0403         .config_handle = kvm->arch.pv.handle,
0404         .gaddr = *gaddr,
0405         .dump_area_origin = 0,
0406     };
0407     const u64 increment_len = uv_info.conf_dump_storage_state_len;
0408     size_t buff_kvm_size;
0409     size_t size_done = 0;
0410     u8 *buff_kvm = NULL;
0411     int cc, ret;
0412
0413     ret = -EINVAL;
0414     /* UV call processes 1MB guest storage chunks at a time */
0415     if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
0416         goto out;
0417
0418     /*
0419      * We provide the storage state for 1MB chunks of guest
0420      * storage. The buffer will need to be aligned to
0421      * conf_dump_storage_state_len so we don't end on a partial
0422      * chunk.
0423      */
0424     if (!buff_user_len ||
0425         !IS_ALIGNED(buff_user_len, increment_len))
0426         goto out;
0427
0428     /*
0429      * Allocate a buffer from which we will later copy to the user
0430      * process. We don't want userspace to dictate our buffer size
0431      * so we limit it to DUMP_BUFF_LEN.
0432      */
0433     ret = -ENOMEM;
0434     buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
0435     buff_kvm = vzalloc(buff_kvm_size);
0436     if (!buff_kvm)
0437         goto out;
0438
0439     ret = 0;
0440     uvcb.dump_area_origin = (u64)buff_kvm;
0441     /* We will loop until the user buffer is filled or an error occurs */
0442     do {
0443         /* Get 1MB worth of guest storage state data */
0444         cc = uv_call_sched(0, (u64)&uvcb);
0445
0446         /* All or nothing */
0447         if (cc) {
0448             ret = -EINVAL;
0449             break;
0450         }
0451
0452         size_done += increment_len;
0453         uvcb.dump_area_origin += increment_len;
0454         buff_user_len -= increment_len;
0455         uvcb.gaddr += HPAGE_SIZE;
0456
0457         /* KVM Buffer full, time to copy to the process */
0458         if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
0459             if (copy_to_user(buff_user, buff_kvm, size_done)) {
0460                 ret = -EFAULT;
0461                 break;
0462             }
0463
0464             buff_user += size_done;
0465             size_done = 0;
0466             uvcb.dump_area_origin = (u64)buff_kvm;
0467         }
0468     } while (buff_user_len);
0469
0470     /* Report back where we ended dumping */
0471     *gaddr = uvcb.gaddr;
0472
0473     /* Lets only log errors, we don't want to spam */
0474 out:
0475     if (ret)
0476         KVM_UV_EVENT(kvm, 3,
0477                  "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
0478                  uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
0479     *rc = uvcb.header.rc;
0480     *rrc = uvcb.header.rrc;
0481     vfree(buff_kvm);
0482
0483     return ret;
0484 }
0485
0486 /**
0487  * kvm_s390_pv_dump_complete
0488  *
0489  * @kvm: pointer to the guest's KVM struct
0490  * @buff_user: Userspace pointer where we will write the results to
0491  * @rc: Pointer to where the uvcb return code is stored
0492  * @rrc: Pointer to where the uvcb return reason code is stored
0493  *
0494  * Completes the dumping operation and writes the completion data to
0495  * user space.
0496  *
0497  * Context: kvm->lock needs to be held
0498  *
0499  * Return:
0500  *  0 on success
0501  *  -ENOMEM if allocating the completion buffer fails
0502  *  -EINVAL if the UV call fails, rc and rrc will be set in this case
0503  *  -EFAULT if copying the result to buff_user failed
0504  */
0505 int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
0506                   u16 *rc, u16 *rrc)
0507 {
0508     struct uv_cb_dump_complete complete = {
0509         .header.len = sizeof(complete),
0510         .header.cmd = UVC_CMD_DUMP_COMPLETE,
0511         .config_handle = kvm_s390_pv_get_handle(kvm),
0512     };
0513     u64 *compl_data;
0514     int ret;
0515
0516     /* Allocate dump area */
0517     compl_data = vzalloc(uv_info.conf_dump_finalize_len);
0518     if (!compl_data)
0519         return -ENOMEM;
0520     complete.dump_area_origin = (u64)compl_data;
0521
0522     ret = uv_call_sched(0, (u64)&complete);
0523     *rc = complete.header.rc;
0524     *rrc = complete.header.rrc;
0525     KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
0526              complete.header.rc, complete.header.rrc);
0527
0528     if (!ret) {
0529         /*
0530          * kvm_s390_pv_dealloc_vm() will also (mem)set
0531          * this to false on a reboot or other destroy
0532          * operation for this vm.
0533          */
0534         kvm->arch.pv.dumping = false;
0535         kvm_s390_vcpu_unblock_all(kvm);
0536         ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
0537         if (ret)
0538             ret = -EFAULT;
0539     }
0540     vfree(compl_data);
0541     /* If the UVC returned an error, translate it to -EINVAL */
0542     if (ret > 0)
0543         ret = -EINVAL;
0544     return ret;
0545 }