Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /******************************************************************************
0003  * privcmd.c
0004  *
0005  * Interface to privileged domain-0 commands.
0006  *
0007  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
0008  */
0009 
0010 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
0011 
0012 #include <linux/kernel.h>
0013 #include <linux/module.h>
0014 #include <linux/sched.h>
0015 #include <linux/slab.h>
0016 #include <linux/string.h>
0017 #include <linux/errno.h>
0018 #include <linux/mm.h>
0019 #include <linux/mman.h>
0020 #include <linux/uaccess.h>
0021 #include <linux/swap.h>
0022 #include <linux/highmem.h>
0023 #include <linux/pagemap.h>
0024 #include <linux/seq_file.h>
0025 #include <linux/miscdevice.h>
0026 #include <linux/moduleparam.h>
0027 
0028 #include <asm/xen/hypervisor.h>
0029 #include <asm/xen/hypercall.h>
0030 
0031 #include <xen/xen.h>
0032 #include <xen/privcmd.h>
0033 #include <xen/interface/xen.h>
0034 #include <xen/interface/memory.h>
0035 #include <xen/interface/hvm/dm_op.h>
0036 #include <xen/features.h>
0037 #include <xen/page.h>
0038 #include <xen/xen-ops.h>
0039 #include <xen/balloon.h>
0040 
0041 #include "privcmd.h"
0042 
0043 MODULE_LICENSE("GPL");
0044 
0045 #define PRIV_VMA_LOCKED ((void *)1)
0046 
0047 static unsigned int privcmd_dm_op_max_num = 16;
0048 module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644);
0049 MODULE_PARM_DESC(dm_op_max_nr_bufs,
0050          "Maximum number of buffers per dm_op hypercall");
0051 
0052 static unsigned int privcmd_dm_op_buf_max_size = 4096;
0053 module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint,
0054            0644);
0055 MODULE_PARM_DESC(dm_op_buf_max_size,
0056          "Maximum size of a dm_op hypercall buffer");
0057 
0058 struct privcmd_data {
0059     domid_t domid;
0060 };
0061 
0062 static int privcmd_vma_range_is_mapped(
0063                struct vm_area_struct *vma,
0064                unsigned long addr,
0065                unsigned long nr_pages);
0066 
0067 static long privcmd_ioctl_hypercall(struct file *file, void __user *udata)
0068 {
0069     struct privcmd_data *data = file->private_data;
0070     struct privcmd_hypercall hypercall;
0071     long ret;
0072 
0073     /* Disallow arbitrary hypercalls if restricted */
0074     if (data->domid != DOMID_INVALID)
0075         return -EPERM;
0076 
0077     if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
0078         return -EFAULT;
0079 
0080     xen_preemptible_hcall_begin();
0081     ret = privcmd_call(hypercall.op,
0082                hypercall.arg[0], hypercall.arg[1],
0083                hypercall.arg[2], hypercall.arg[3],
0084                hypercall.arg[4]);
0085     xen_preemptible_hcall_end();
0086 
0087     return ret;
0088 }
0089 
0090 static void free_page_list(struct list_head *pages)
0091 {
0092     struct page *p, *n;
0093 
0094     list_for_each_entry_safe(p, n, pages, lru)
0095         __free_page(p);
0096 
0097     INIT_LIST_HEAD(pages);
0098 }
0099 
0100 /*
0101  * Given an array of items in userspace, return a list of pages
0102  * containing the data.  If copying fails, either because of memory
0103  * allocation failure or a problem reading user memory, return an
0104  * error code; its up to the caller to dispose of any partial list.
0105  */
0106 static int gather_array(struct list_head *pagelist,
0107             unsigned nelem, size_t size,
0108             const void __user *data)
0109 {
0110     unsigned pageidx;
0111     void *pagedata;
0112     int ret;
0113 
0114     if (size > PAGE_SIZE)
0115         return 0;
0116 
0117     pageidx = PAGE_SIZE;
0118     pagedata = NULL;    /* quiet, gcc */
0119     while (nelem--) {
0120         if (pageidx > PAGE_SIZE-size) {
0121             struct page *page = alloc_page(GFP_KERNEL);
0122 
0123             ret = -ENOMEM;
0124             if (page == NULL)
0125                 goto fail;
0126 
0127             pagedata = page_address(page);
0128 
0129             list_add_tail(&page->lru, pagelist);
0130             pageidx = 0;
0131         }
0132 
0133         ret = -EFAULT;
0134         if (copy_from_user(pagedata + pageidx, data, size))
0135             goto fail;
0136 
0137         data += size;
0138         pageidx += size;
0139     }
0140 
0141     ret = 0;
0142 
0143 fail:
0144     return ret;
0145 }
0146 
0147 /*
0148  * Call function "fn" on each element of the array fragmented
0149  * over a list of pages.
0150  */
0151 static int traverse_pages(unsigned nelem, size_t size,
0152               struct list_head *pos,
0153               int (*fn)(void *data, void *state),
0154               void *state)
0155 {
0156     void *pagedata;
0157     unsigned pageidx;
0158     int ret = 0;
0159 
0160     BUG_ON(size > PAGE_SIZE);
0161 
0162     pageidx = PAGE_SIZE;
0163     pagedata = NULL;    /* hush, gcc */
0164 
0165     while (nelem--) {
0166         if (pageidx > PAGE_SIZE-size) {
0167             struct page *page;
0168             pos = pos->next;
0169             page = list_entry(pos, struct page, lru);
0170             pagedata = page_address(page);
0171             pageidx = 0;
0172         }
0173 
0174         ret = (*fn)(pagedata + pageidx, state);
0175         if (ret)
0176             break;
0177         pageidx += size;
0178     }
0179 
0180     return ret;
0181 }
0182 
0183 /*
0184  * Similar to traverse_pages, but use each page as a "block" of
0185  * data to be processed as one unit.
0186  */
0187 static int traverse_pages_block(unsigned nelem, size_t size,
0188                 struct list_head *pos,
0189                 int (*fn)(void *data, int nr, void *state),
0190                 void *state)
0191 {
0192     void *pagedata;
0193     int ret = 0;
0194 
0195     BUG_ON(size > PAGE_SIZE);
0196 
0197     while (nelem) {
0198         int nr = (PAGE_SIZE/size);
0199         struct page *page;
0200         if (nr > nelem)
0201             nr = nelem;
0202         pos = pos->next;
0203         page = list_entry(pos, struct page, lru);
0204         pagedata = page_address(page);
0205         ret = (*fn)(pagedata, nr, state);
0206         if (ret)
0207             break;
0208         nelem -= nr;
0209     }
0210 
0211     return ret;
0212 }
0213 
0214 struct mmap_gfn_state {
0215     unsigned long va;
0216     struct vm_area_struct *vma;
0217     domid_t domain;
0218 };
0219 
0220 static int mmap_gfn_range(void *data, void *state)
0221 {
0222     struct privcmd_mmap_entry *msg = data;
0223     struct mmap_gfn_state *st = state;
0224     struct vm_area_struct *vma = st->vma;
0225     int rc;
0226 
0227     /* Do not allow range to wrap the address space. */
0228     if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
0229         ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
0230         return -EINVAL;
0231 
0232     /* Range chunks must be contiguous in va space. */
0233     if ((msg->va != st->va) ||
0234         ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
0235         return -EINVAL;
0236 
0237     rc = xen_remap_domain_gfn_range(vma,
0238                     msg->va & PAGE_MASK,
0239                     msg->mfn, msg->npages,
0240                     vma->vm_page_prot,
0241                     st->domain, NULL);
0242     if (rc < 0)
0243         return rc;
0244 
0245     st->va += msg->npages << PAGE_SHIFT;
0246 
0247     return 0;
0248 }
0249 
0250 static long privcmd_ioctl_mmap(struct file *file, void __user *udata)
0251 {
0252     struct privcmd_data *data = file->private_data;
0253     struct privcmd_mmap mmapcmd;
0254     struct mm_struct *mm = current->mm;
0255     struct vm_area_struct *vma;
0256     int rc;
0257     LIST_HEAD(pagelist);
0258     struct mmap_gfn_state state;
0259 
0260     /* We only support privcmd_ioctl_mmap_batch for non-auto-translated. */
0261     if (xen_feature(XENFEAT_auto_translated_physmap))
0262         return -ENOSYS;
0263 
0264     if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
0265         return -EFAULT;
0266 
0267     /* If restriction is in place, check the domid matches */
0268     if (data->domid != DOMID_INVALID && data->domid != mmapcmd.dom)
0269         return -EPERM;
0270 
0271     rc = gather_array(&pagelist,
0272               mmapcmd.num, sizeof(struct privcmd_mmap_entry),
0273               mmapcmd.entry);
0274 
0275     if (rc || list_empty(&pagelist))
0276         goto out;
0277 
0278     mmap_write_lock(mm);
0279 
0280     {
0281         struct page *page = list_first_entry(&pagelist,
0282                              struct page, lru);
0283         struct privcmd_mmap_entry *msg = page_address(page);
0284 
0285         vma = find_vma(mm, msg->va);
0286         rc = -EINVAL;
0287 
0288         if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
0289             goto out_up;
0290         vma->vm_private_data = PRIV_VMA_LOCKED;
0291     }
0292 
0293     state.va = vma->vm_start;
0294     state.vma = vma;
0295     state.domain = mmapcmd.dom;
0296 
0297     rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
0298                 &pagelist,
0299                 mmap_gfn_range, &state);
0300 
0301 
0302 out_up:
0303     mmap_write_unlock(mm);
0304 
0305 out:
0306     free_page_list(&pagelist);
0307 
0308     return rc;
0309 }
0310 
0311 struct mmap_batch_state {
0312     domid_t domain;
0313     unsigned long va;
0314     struct vm_area_struct *vma;
0315     int index;
0316     /* A tristate:
0317      *      0 for no errors
0318      *      1 if at least one error has happened (and no
0319      *          -ENOENT errors have happened)
0320      *      -ENOENT if at least 1 -ENOENT has happened.
0321      */
0322     int global_error;
0323     int version;
0324 
0325     /* User-space gfn array to store errors in the second pass for V1. */
0326     xen_pfn_t __user *user_gfn;
0327     /* User-space int array to store errors in the second pass for V2. */
0328     int __user *user_err;
0329 };
0330 
0331 /* auto translated dom0 note: if domU being created is PV, then gfn is
0332  * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP).
0333  */
0334 static int mmap_batch_fn(void *data, int nr, void *state)
0335 {
0336     xen_pfn_t *gfnp = data;
0337     struct mmap_batch_state *st = state;
0338     struct vm_area_struct *vma = st->vma;
0339     struct page **pages = vma->vm_private_data;
0340     struct page **cur_pages = NULL;
0341     int ret;
0342 
0343     if (xen_feature(XENFEAT_auto_translated_physmap))
0344         cur_pages = &pages[st->index];
0345 
0346     BUG_ON(nr < 0);
0347     ret = xen_remap_domain_gfn_array(st->vma, st->va & PAGE_MASK, gfnp, nr,
0348                      (int *)gfnp, st->vma->vm_page_prot,
0349                      st->domain, cur_pages);
0350 
0351     /* Adjust the global_error? */
0352     if (ret != nr) {
0353         if (ret == -ENOENT)
0354             st->global_error = -ENOENT;
0355         else {
0356             /* Record that at least one error has happened. */
0357             if (st->global_error == 0)
0358                 st->global_error = 1;
0359         }
0360     }
0361     st->va += XEN_PAGE_SIZE * nr;
0362     st->index += nr / XEN_PFN_PER_PAGE;
0363 
0364     return 0;
0365 }
0366 
0367 static int mmap_return_error(int err, struct mmap_batch_state *st)
0368 {
0369     int ret;
0370 
0371     if (st->version == 1) {
0372         if (err) {
0373             xen_pfn_t gfn;
0374 
0375             ret = get_user(gfn, st->user_gfn);
0376             if (ret < 0)
0377                 return ret;
0378             /*
0379              * V1 encodes the error codes in the 32bit top
0380              * nibble of the gfn (with its known
0381              * limitations vis-a-vis 64 bit callers).
0382              */
0383             gfn |= (err == -ENOENT) ?
0384                 PRIVCMD_MMAPBATCH_PAGED_ERROR :
0385                 PRIVCMD_MMAPBATCH_MFN_ERROR;
0386             return __put_user(gfn, st->user_gfn++);
0387         } else
0388             st->user_gfn++;
0389     } else { /* st->version == 2 */
0390         if (err)
0391             return __put_user(err, st->user_err++);
0392         else
0393             st->user_err++;
0394     }
0395 
0396     return 0;
0397 }
0398 
0399 static int mmap_return_errors(void *data, int nr, void *state)
0400 {
0401     struct mmap_batch_state *st = state;
0402     int *errs = data;
0403     int i;
0404     int ret;
0405 
0406     for (i = 0; i < nr; i++) {
0407         ret = mmap_return_error(errs[i], st);
0408         if (ret < 0)
0409             return ret;
0410     }
0411     return 0;
0412 }
0413 
0414 /* Allocate pfns that are then mapped with gfns from foreign domid. Update
0415  * the vma with the page info to use later.
0416  * Returns: 0 if success, otherwise -errno
0417  */
0418 static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
0419 {
0420     int rc;
0421     struct page **pages;
0422 
0423     pages = kvcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
0424     if (pages == NULL)
0425         return -ENOMEM;
0426 
0427     rc = xen_alloc_unpopulated_pages(numpgs, pages);
0428     if (rc != 0) {
0429         pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
0430             numpgs, rc);
0431         kvfree(pages);
0432         return -ENOMEM;
0433     }
0434     BUG_ON(vma->vm_private_data != NULL);
0435     vma->vm_private_data = pages;
0436 
0437     return 0;
0438 }
0439 
0440 static const struct vm_operations_struct privcmd_vm_ops;
0441 
0442 static long privcmd_ioctl_mmap_batch(
0443     struct file *file, void __user *udata, int version)
0444 {
0445     struct privcmd_data *data = file->private_data;
0446     int ret;
0447     struct privcmd_mmapbatch_v2 m;
0448     struct mm_struct *mm = current->mm;
0449     struct vm_area_struct *vma;
0450     unsigned long nr_pages;
0451     LIST_HEAD(pagelist);
0452     struct mmap_batch_state state;
0453 
0454     switch (version) {
0455     case 1:
0456         if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
0457             return -EFAULT;
0458         /* Returns per-frame error in m.arr. */
0459         m.err = NULL;
0460         if (!access_ok(m.arr, m.num * sizeof(*m.arr)))
0461             return -EFAULT;
0462         break;
0463     case 2:
0464         if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
0465             return -EFAULT;
0466         /* Returns per-frame error code in m.err. */
0467         if (!access_ok(m.err, m.num * (sizeof(*m.err))))
0468             return -EFAULT;
0469         break;
0470     default:
0471         return -EINVAL;
0472     }
0473 
0474     /* If restriction is in place, check the domid matches */
0475     if (data->domid != DOMID_INVALID && data->domid != m.dom)
0476         return -EPERM;
0477 
0478     nr_pages = DIV_ROUND_UP(m.num, XEN_PFN_PER_PAGE);
0479     if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
0480         return -EINVAL;
0481 
0482     ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
0483 
0484     if (ret)
0485         goto out;
0486     if (list_empty(&pagelist)) {
0487         ret = -EINVAL;
0488         goto out;
0489     }
0490 
0491     if (version == 2) {
0492         /* Zero error array now to only copy back actual errors. */
0493         if (clear_user(m.err, sizeof(int) * m.num)) {
0494             ret = -EFAULT;
0495             goto out;
0496         }
0497     }
0498 
0499     mmap_write_lock(mm);
0500 
0501     vma = find_vma(mm, m.addr);
0502     if (!vma ||
0503         vma->vm_ops != &privcmd_vm_ops) {
0504         ret = -EINVAL;
0505         goto out_unlock;
0506     }
0507 
0508     /*
0509      * Caller must either:
0510      *
0511      * Map the whole VMA range, which will also allocate all the
0512      * pages required for the auto_translated_physmap case.
0513      *
0514      * Or
0515      *
0516      * Map unmapped holes left from a previous map attempt (e.g.,
0517      * because those foreign frames were previously paged out).
0518      */
0519     if (vma->vm_private_data == NULL) {
0520         if (m.addr != vma->vm_start ||
0521             m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
0522             ret = -EINVAL;
0523             goto out_unlock;
0524         }
0525         if (xen_feature(XENFEAT_auto_translated_physmap)) {
0526             ret = alloc_empty_pages(vma, nr_pages);
0527             if (ret < 0)
0528                 goto out_unlock;
0529         } else
0530             vma->vm_private_data = PRIV_VMA_LOCKED;
0531     } else {
0532         if (m.addr < vma->vm_start ||
0533             m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
0534             ret = -EINVAL;
0535             goto out_unlock;
0536         }
0537         if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
0538             ret = -EINVAL;
0539             goto out_unlock;
0540         }
0541     }
0542 
0543     state.domain        = m.dom;
0544     state.vma           = vma;
0545     state.va            = m.addr;
0546     state.index         = 0;
0547     state.global_error  = 0;
0548     state.version       = version;
0549 
0550     BUILD_BUG_ON(((PAGE_SIZE / sizeof(xen_pfn_t)) % XEN_PFN_PER_PAGE) != 0);
0551     /* mmap_batch_fn guarantees ret == 0 */
0552     BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
0553                     &pagelist, mmap_batch_fn, &state));
0554 
0555     mmap_write_unlock(mm);
0556 
0557     if (state.global_error) {
0558         /* Write back errors in second pass. */
0559         state.user_gfn = (xen_pfn_t *)m.arr;
0560         state.user_err = m.err;
0561         ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
0562                        &pagelist, mmap_return_errors, &state);
0563     } else
0564         ret = 0;
0565 
0566     /* If we have not had any EFAULT-like global errors then set the global
0567      * error to -ENOENT if necessary. */
0568     if ((ret == 0) && (state.global_error == -ENOENT))
0569         ret = -ENOENT;
0570 
0571 out:
0572     free_page_list(&pagelist);
0573     return ret;
0574 
0575 out_unlock:
0576     mmap_write_unlock(mm);
0577     goto out;
0578 }
0579 
0580 static int lock_pages(
0581     struct privcmd_dm_op_buf kbufs[], unsigned int num,
0582     struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
0583 {
0584     unsigned int i, off = 0;
0585 
0586     for (i = 0; i < num; ) {
0587         unsigned int requested;
0588         int page_count;
0589 
0590         requested = DIV_ROUND_UP(
0591             offset_in_page(kbufs[i].uptr) + kbufs[i].size,
0592             PAGE_SIZE) - off;
0593         if (requested > nr_pages)
0594             return -ENOSPC;
0595 
0596         page_count = pin_user_pages_fast(
0597             (unsigned long)kbufs[i].uptr + off * PAGE_SIZE,
0598             requested, FOLL_WRITE, pages);
0599         if (page_count <= 0)
0600             return page_count ? : -EFAULT;
0601 
0602         *pinned += page_count;
0603         nr_pages -= page_count;
0604         pages += page_count;
0605 
0606         off = (requested == page_count) ? 0 : off + page_count;
0607         i += !off;
0608     }
0609 
0610     return 0;
0611 }
0612 
0613 static void unlock_pages(struct page *pages[], unsigned int nr_pages)
0614 {
0615     unpin_user_pages_dirty_lock(pages, nr_pages, true);
0616 }
0617 
0618 static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
0619 {
0620     struct privcmd_data *data = file->private_data;
0621     struct privcmd_dm_op kdata;
0622     struct privcmd_dm_op_buf *kbufs;
0623     unsigned int nr_pages = 0;
0624     struct page **pages = NULL;
0625     struct xen_dm_op_buf *xbufs = NULL;
0626     unsigned int i;
0627     long rc;
0628     unsigned int pinned = 0;
0629 
0630     if (copy_from_user(&kdata, udata, sizeof(kdata)))
0631         return -EFAULT;
0632 
0633     /* If restriction is in place, check the domid matches */
0634     if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
0635         return -EPERM;
0636 
0637     if (kdata.num == 0)
0638         return 0;
0639 
0640     if (kdata.num > privcmd_dm_op_max_num)
0641         return -E2BIG;
0642 
0643     kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL);
0644     if (!kbufs)
0645         return -ENOMEM;
0646 
0647     if (copy_from_user(kbufs, kdata.ubufs,
0648                sizeof(*kbufs) * kdata.num)) {
0649         rc = -EFAULT;
0650         goto out;
0651     }
0652 
0653     for (i = 0; i < kdata.num; i++) {
0654         if (kbufs[i].size > privcmd_dm_op_buf_max_size) {
0655             rc = -E2BIG;
0656             goto out;
0657         }
0658 
0659         if (!access_ok(kbufs[i].uptr,
0660                    kbufs[i].size)) {
0661             rc = -EFAULT;
0662             goto out;
0663         }
0664 
0665         nr_pages += DIV_ROUND_UP(
0666             offset_in_page(kbufs[i].uptr) + kbufs[i].size,
0667             PAGE_SIZE);
0668     }
0669 
0670     pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL);
0671     if (!pages) {
0672         rc = -ENOMEM;
0673         goto out;
0674     }
0675 
0676     xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL);
0677     if (!xbufs) {
0678         rc = -ENOMEM;
0679         goto out;
0680     }
0681 
0682     rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
0683     if (rc < 0)
0684         goto out;
0685 
0686     for (i = 0; i < kdata.num; i++) {
0687         set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
0688         xbufs[i].size = kbufs[i].size;
0689     }
0690 
0691     xen_preemptible_hcall_begin();
0692     rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs);
0693     xen_preemptible_hcall_end();
0694 
0695 out:
0696     unlock_pages(pages, pinned);
0697     kfree(xbufs);
0698     kfree(pages);
0699     kfree(kbufs);
0700 
0701     return rc;
0702 }
0703 
0704 static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
0705 {
0706     struct privcmd_data *data = file->private_data;
0707     domid_t dom;
0708 
0709     if (copy_from_user(&dom, udata, sizeof(dom)))
0710         return -EFAULT;
0711 
0712     /* Set restriction to the specified domain, or check it matches */
0713     if (data->domid == DOMID_INVALID)
0714         data->domid = dom;
0715     else if (data->domid != dom)
0716         return -EINVAL;
0717 
0718     return 0;
0719 }
0720 
0721 static long privcmd_ioctl_mmap_resource(struct file *file,
0722                 struct privcmd_mmap_resource __user *udata)
0723 {
0724     struct privcmd_data *data = file->private_data;
0725     struct mm_struct *mm = current->mm;
0726     struct vm_area_struct *vma;
0727     struct privcmd_mmap_resource kdata;
0728     xen_pfn_t *pfns = NULL;
0729     struct xen_mem_acquire_resource xdata = { };
0730     int rc;
0731 
0732     if (copy_from_user(&kdata, udata, sizeof(kdata)))
0733         return -EFAULT;
0734 
0735     /* If restriction is in place, check the domid matches */
0736     if (data->domid != DOMID_INVALID && data->domid != kdata.dom)
0737         return -EPERM;
0738 
0739     /* Both fields must be set or unset */
0740     if (!!kdata.addr != !!kdata.num)
0741         return -EINVAL;
0742 
0743     xdata.domid = kdata.dom;
0744     xdata.type = kdata.type;
0745     xdata.id = kdata.id;
0746 
0747     if (!kdata.addr && !kdata.num) {
0748         /* Query the size of the resource. */
0749         rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata);
0750         if (rc)
0751             return rc;
0752         return __put_user(xdata.nr_frames, &udata->num);
0753     }
0754 
0755     mmap_write_lock(mm);
0756 
0757     vma = find_vma(mm, kdata.addr);
0758     if (!vma || vma->vm_ops != &privcmd_vm_ops) {
0759         rc = -EINVAL;
0760         goto out;
0761     }
0762 
0763     pfns = kcalloc(kdata.num, sizeof(*pfns), GFP_KERNEL);
0764     if (!pfns) {
0765         rc = -ENOMEM;
0766         goto out;
0767     }
0768 
0769     if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) &&
0770         xen_feature(XENFEAT_auto_translated_physmap)) {
0771         unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE);
0772         struct page **pages;
0773         unsigned int i;
0774 
0775         rc = alloc_empty_pages(vma, nr);
0776         if (rc < 0)
0777             goto out;
0778 
0779         pages = vma->vm_private_data;
0780         for (i = 0; i < kdata.num; i++) {
0781             xen_pfn_t pfn =
0782                 page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]);
0783 
0784             pfns[i] = pfn + (i % XEN_PFN_PER_PAGE);
0785         }
0786     } else
0787         vma->vm_private_data = PRIV_VMA_LOCKED;
0788 
0789     xdata.frame = kdata.idx;
0790     xdata.nr_frames = kdata.num;
0791     set_xen_guest_handle(xdata.frame_list, pfns);
0792 
0793     xen_preemptible_hcall_begin();
0794     rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata);
0795     xen_preemptible_hcall_end();
0796 
0797     if (rc)
0798         goto out;
0799 
0800     if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) &&
0801         xen_feature(XENFEAT_auto_translated_physmap)) {
0802         rc = xen_remap_vma_range(vma, kdata.addr, kdata.num << PAGE_SHIFT);
0803     } else {
0804         unsigned int domid =
0805             (xdata.flags & XENMEM_rsrc_acq_caller_owned) ?
0806             DOMID_SELF : kdata.dom;
0807         int num, *errs = (int *)pfns;
0808 
0809         BUILD_BUG_ON(sizeof(*errs) > sizeof(*pfns));
0810         num = xen_remap_domain_mfn_array(vma,
0811                          kdata.addr & PAGE_MASK,
0812                          pfns, kdata.num, errs,
0813                          vma->vm_page_prot,
0814                          domid);
0815         if (num < 0)
0816             rc = num;
0817         else if (num != kdata.num) {
0818             unsigned int i;
0819 
0820             for (i = 0; i < num; i++) {
0821                 rc = errs[i];
0822                 if (rc < 0)
0823                     break;
0824             }
0825         } else
0826             rc = 0;
0827     }
0828 
0829 out:
0830     mmap_write_unlock(mm);
0831     kfree(pfns);
0832 
0833     return rc;
0834 }
0835 
0836 static long privcmd_ioctl(struct file *file,
0837               unsigned int cmd, unsigned long data)
0838 {
0839     int ret = -ENOTTY;
0840     void __user *udata = (void __user *) data;
0841 
0842     switch (cmd) {
0843     case IOCTL_PRIVCMD_HYPERCALL:
0844         ret = privcmd_ioctl_hypercall(file, udata);
0845         break;
0846 
0847     case IOCTL_PRIVCMD_MMAP:
0848         ret = privcmd_ioctl_mmap(file, udata);
0849         break;
0850 
0851     case IOCTL_PRIVCMD_MMAPBATCH:
0852         ret = privcmd_ioctl_mmap_batch(file, udata, 1);
0853         break;
0854 
0855     case IOCTL_PRIVCMD_MMAPBATCH_V2:
0856         ret = privcmd_ioctl_mmap_batch(file, udata, 2);
0857         break;
0858 
0859     case IOCTL_PRIVCMD_DM_OP:
0860         ret = privcmd_ioctl_dm_op(file, udata);
0861         break;
0862 
0863     case IOCTL_PRIVCMD_RESTRICT:
0864         ret = privcmd_ioctl_restrict(file, udata);
0865         break;
0866 
0867     case IOCTL_PRIVCMD_MMAP_RESOURCE:
0868         ret = privcmd_ioctl_mmap_resource(file, udata);
0869         break;
0870 
0871     default:
0872         break;
0873     }
0874 
0875     return ret;
0876 }
0877 
0878 static int privcmd_open(struct inode *ino, struct file *file)
0879 {
0880     struct privcmd_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
0881 
0882     if (!data)
0883         return -ENOMEM;
0884 
0885     /* DOMID_INVALID implies no restriction */
0886     data->domid = DOMID_INVALID;
0887 
0888     file->private_data = data;
0889     return 0;
0890 }
0891 
0892 static int privcmd_release(struct inode *ino, struct file *file)
0893 {
0894     struct privcmd_data *data = file->private_data;
0895 
0896     kfree(data);
0897     return 0;
0898 }
0899 
0900 static void privcmd_close(struct vm_area_struct *vma)
0901 {
0902     struct page **pages = vma->vm_private_data;
0903     int numpgs = vma_pages(vma);
0904     int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT;
0905     int rc;
0906 
0907     if (!xen_feature(XENFEAT_auto_translated_physmap) || !numpgs || !pages)
0908         return;
0909 
0910     rc = xen_unmap_domain_gfn_range(vma, numgfns, pages);
0911     if (rc == 0)
0912         xen_free_unpopulated_pages(numpgs, pages);
0913     else
0914         pr_crit("unable to unmap MFN range: leaking %d pages. rc=%d\n",
0915             numpgs, rc);
0916     kvfree(pages);
0917 }
0918 
0919 static vm_fault_t privcmd_fault(struct vm_fault *vmf)
0920 {
0921     printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
0922            vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
0923            vmf->pgoff, (void *)vmf->address);
0924 
0925     return VM_FAULT_SIGBUS;
0926 }
0927 
0928 static const struct vm_operations_struct privcmd_vm_ops = {
0929     .close = privcmd_close,
0930     .fault = privcmd_fault
0931 };
0932 
0933 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
0934 {
0935     /* DONTCOPY is essential for Xen because copy_page_range doesn't know
0936      * how to recreate these mappings */
0937     vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
0938              VM_DONTEXPAND | VM_DONTDUMP;
0939     vma->vm_ops = &privcmd_vm_ops;
0940     vma->vm_private_data = NULL;
0941 
0942     return 0;
0943 }
0944 
0945 /*
0946  * For MMAPBATCH*. This allows asserting the singleshot mapping
0947  * on a per pfn/pte basis. Mapping calls that fail with ENOENT
0948  * can be then retried until success.
0949  */
0950 static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data)
0951 {
0952     return pte_none(*pte) ? 0 : -EBUSY;
0953 }
0954 
0955 static int privcmd_vma_range_is_mapped(
0956                struct vm_area_struct *vma,
0957                unsigned long addr,
0958                unsigned long nr_pages)
0959 {
0960     return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
0961                    is_mapped_fn, NULL) != 0;
0962 }
0963 
0964 const struct file_operations xen_privcmd_fops = {
0965     .owner = THIS_MODULE,
0966     .unlocked_ioctl = privcmd_ioctl,
0967     .open = privcmd_open,
0968     .release = privcmd_release,
0969     .mmap = privcmd_mmap,
0970 };
0971 EXPORT_SYMBOL_GPL(xen_privcmd_fops);
0972 
0973 static struct miscdevice privcmd_dev = {
0974     .minor = MISC_DYNAMIC_MINOR,
0975     .name = "xen/privcmd",
0976     .fops = &xen_privcmd_fops,
0977 };
0978 
0979 static int __init privcmd_init(void)
0980 {
0981     int err;
0982 
0983     if (!xen_domain())
0984         return -ENODEV;
0985 
0986     err = misc_register(&privcmd_dev);
0987     if (err != 0) {
0988         pr_err("Could not register Xen privcmd device\n");
0989         return err;
0990     }
0991 
0992     err = misc_register(&xen_privcmdbuf_dev);
0993     if (err != 0) {
0994         pr_err("Could not register Xen hypercall-buf device\n");
0995         misc_deregister(&privcmd_dev);
0996         return err;
0997     }
0998 
0999     return 0;
1000 }
1001 
1002 static void __exit privcmd_exit(void)
1003 {
1004     misc_deregister(&privcmd_dev);
1005     misc_deregister(&xen_privcmdbuf_dev);
1006 }
1007 
1008 module_init(privcmd_init);
1009 module_exit(privcmd_exit);