Back to home page

OSCL-LXR

 
 

    


0001 /******************************************************************************
0002  * gntalloc.c
0003  *
0004  * Device for creating grant references (in user-space) that may be shared
0005  * with other domains.
0006  *
0007  * This program is distributed in the hope that it will be useful,
0008  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0009  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0010  * GNU General Public License for more details.
0011  *
0012  * You should have received a copy of the GNU General Public License
0013  * along with this program; if not, write to the Free Software
0014  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
0015  */
0016 
0017 /*
0018  * This driver exists to allow userspace programs in Linux to allocate kernel
0019  * memory that will later be shared with another domain.  Without this device,
0020  * Linux userspace programs cannot create grant references.
0021  *
0022  * How this stuff works:
0023  *   X -> granting a page to Y
0024  *   Y -> mapping the grant from X
0025  *
0026  *   1. X uses the gntalloc device to allocate a page of kernel memory, P.
0027  *   2. X creates an entry in the grant table that says domid(Y) can access P.
0028  *      This is done without a hypercall unless the grant table needs expansion.
0029  *   3. X gives the grant reference identifier, GREF, to Y.
0030  *   4. Y maps the page, either directly into kernel memory for use in a backend
0031  *      driver, or via a the gntdev device to map into the address space of an
0032  *      application running in Y. This is the first point at which Xen does any
0033  *      tracking of the page.
0034  *   5. A program in X mmap()s a segment of the gntalloc device that corresponds
0035  *      to the shared page, and can now communicate with Y over the shared page.
0036  *
0037  *
0038  * NOTE TO USERSPACE LIBRARIES:
0039  *   The grant allocation and mmap()ing are, naturally, two separate operations.
0040  *   You set up the sharing by calling the create ioctl() and then the mmap().
0041  *   Teardown requires munmap() and either close() or ioctl().
0042  *
0043  * WARNING: Since Xen does not allow a guest to forcibly end the use of a grant
0044  * reference, this device can be used to consume kernel memory by leaving grant
0045  * references mapped by another domain when an application exits. Therefore,
0046  * there is a global limit on the number of pages that can be allocated. When
0047  * all references to the page are unmapped, it will be freed during the next
0048  * grant operation.
0049  */
0050 
0051 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
0052 
0053 #include <linux/atomic.h>
0054 #include <linux/module.h>
0055 #include <linux/miscdevice.h>
0056 #include <linux/kernel.h>
0057 #include <linux/init.h>
0058 #include <linux/slab.h>
0059 #include <linux/fs.h>
0060 #include <linux/device.h>
0061 #include <linux/mm.h>
0062 #include <linux/uaccess.h>
0063 #include <linux/types.h>
0064 #include <linux/list.h>
0065 #include <linux/highmem.h>
0066 
0067 #include <xen/xen.h>
0068 #include <xen/page.h>
0069 #include <xen/grant_table.h>
0070 #include <xen/gntalloc.h>
0071 #include <xen/events.h>
0072 
0073 static int limit = 1024;
0074 module_param(limit, int, 0644);
0075 MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
0076         "the gntalloc device");
0077 
0078 static LIST_HEAD(gref_list);
0079 static DEFINE_MUTEX(gref_mutex);
0080 static int gref_size;
0081 
0082 struct notify_info {
0083     uint16_t pgoff:12;    /* Bits 0-11: Offset of the byte to clear */
0084     uint16_t flags:2;     /* Bits 12-13: Unmap notification flags */
0085     int event;            /* Port (event channel) to notify */
0086 };
0087 
0088 /* Metadata on a grant reference. */
0089 struct gntalloc_gref {
0090     struct list_head next_gref;  /* list entry gref_list */
0091     struct list_head next_file;  /* list entry file->list, if open */
0092     struct page *page;       /* The shared page */
0093     uint64_t file_index;         /* File offset for mmap() */
0094     unsigned int users;          /* Use count - when zero, waiting on Xen */
0095     grant_ref_t gref_id;         /* The grant reference number */
0096     struct notify_info notify;   /* Unmap notification */
0097 };
0098 
0099 struct gntalloc_file_private_data {
0100     struct list_head list;
0101     uint64_t index;
0102 };
0103 
0104 struct gntalloc_vma_private_data {
0105     struct gntalloc_gref *gref;
0106     int users;
0107     int count;
0108 };
0109 
0110 static void __del_gref(struct gntalloc_gref *gref);
0111 
0112 static void do_cleanup(void)
0113 {
0114     struct gntalloc_gref *gref, *n;
0115     list_for_each_entry_safe(gref, n, &gref_list, next_gref) {
0116         if (!gref->users)
0117             __del_gref(gref);
0118     }
0119 }
0120 
0121 static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
0122     uint32_t *gref_ids, struct gntalloc_file_private_data *priv)
0123 {
0124     int i, rc, readonly;
0125     LIST_HEAD(queue_gref);
0126     LIST_HEAD(queue_file);
0127     struct gntalloc_gref *gref, *next;
0128 
0129     readonly = !(op->flags & GNTALLOC_FLAG_WRITABLE);
0130     for (i = 0; i < op->count; i++) {
0131         gref = kzalloc(sizeof(*gref), GFP_KERNEL);
0132         if (!gref) {
0133             rc = -ENOMEM;
0134             goto undo;
0135         }
0136         list_add_tail(&gref->next_gref, &queue_gref);
0137         list_add_tail(&gref->next_file, &queue_file);
0138         gref->users = 1;
0139         gref->file_index = op->index + i * PAGE_SIZE;
0140         gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
0141         if (!gref->page) {
0142             rc = -ENOMEM;
0143             goto undo;
0144         }
0145 
0146         /* Grant foreign access to the page. */
0147         rc = gnttab_grant_foreign_access(op->domid,
0148                          xen_page_to_gfn(gref->page),
0149                          readonly);
0150         if (rc < 0)
0151             goto undo;
0152         gref_ids[i] = gref->gref_id = rc;
0153     }
0154 
0155     /* Add to gref lists. */
0156     mutex_lock(&gref_mutex);
0157     list_splice_tail(&queue_gref, &gref_list);
0158     list_splice_tail(&queue_file, &priv->list);
0159     mutex_unlock(&gref_mutex);
0160 
0161     return 0;
0162 
0163 undo:
0164     mutex_lock(&gref_mutex);
0165     gref_size -= (op->count - i);
0166 
0167     list_for_each_entry_safe(gref, next, &queue_file, next_file) {
0168         list_del(&gref->next_file);
0169         __del_gref(gref);
0170     }
0171 
0172     mutex_unlock(&gref_mutex);
0173     return rc;
0174 }
0175 
0176 static void __del_gref(struct gntalloc_gref *gref)
0177 {
0178     if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
0179         uint8_t *tmp = kmap_local_page(gref->page);
0180         tmp[gref->notify.pgoff] = 0;
0181         kunmap_local(tmp);
0182     }
0183     if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
0184         notify_remote_via_evtchn(gref->notify.event);
0185         evtchn_put(gref->notify.event);
0186     }
0187 
0188     gref->notify.flags = 0;
0189 
0190     if (gref->gref_id) {
0191         if (gref->page)
0192             gnttab_end_foreign_access(gref->gref_id, gref->page);
0193         else
0194             gnttab_free_grant_reference(gref->gref_id);
0195     }
0196 
0197     gref_size--;
0198     list_del(&gref->next_gref);
0199 
0200     kfree(gref);
0201 }
0202 
0203 /* finds contiguous grant references in a file, returns the first */
0204 static struct gntalloc_gref *find_grefs(struct gntalloc_file_private_data *priv,
0205         uint64_t index, uint32_t count)
0206 {
0207     struct gntalloc_gref *rv = NULL, *gref;
0208     list_for_each_entry(gref, &priv->list, next_file) {
0209         if (gref->file_index == index && !rv)
0210             rv = gref;
0211         if (rv) {
0212             if (gref->file_index != index)
0213                 return NULL;
0214             index += PAGE_SIZE;
0215             count--;
0216             if (count == 0)
0217                 return rv;
0218         }
0219     }
0220     return NULL;
0221 }
0222 
0223 /*
0224  * -------------------------------------
0225  *  File operations.
0226  * -------------------------------------
0227  */
0228 static int gntalloc_open(struct inode *inode, struct file *filp)
0229 {
0230     struct gntalloc_file_private_data *priv;
0231 
0232     priv = kzalloc(sizeof(*priv), GFP_KERNEL);
0233     if (!priv)
0234         goto out_nomem;
0235     INIT_LIST_HEAD(&priv->list);
0236 
0237     filp->private_data = priv;
0238 
0239     pr_debug("%s: priv %p\n", __func__, priv);
0240 
0241     return 0;
0242 
0243 out_nomem:
0244     return -ENOMEM;
0245 }
0246 
0247 static int gntalloc_release(struct inode *inode, struct file *filp)
0248 {
0249     struct gntalloc_file_private_data *priv = filp->private_data;
0250     struct gntalloc_gref *gref;
0251 
0252     pr_debug("%s: priv %p\n", __func__, priv);
0253 
0254     mutex_lock(&gref_mutex);
0255     while (!list_empty(&priv->list)) {
0256         gref = list_entry(priv->list.next,
0257             struct gntalloc_gref, next_file);
0258         list_del(&gref->next_file);
0259         gref->users--;
0260         if (gref->users == 0)
0261             __del_gref(gref);
0262     }
0263     kfree(priv);
0264     mutex_unlock(&gref_mutex);
0265 
0266     return 0;
0267 }
0268 
0269 static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
0270         struct ioctl_gntalloc_alloc_gref __user *arg)
0271 {
0272     int rc = 0;
0273     struct ioctl_gntalloc_alloc_gref op;
0274     uint32_t *gref_ids;
0275 
0276     pr_debug("%s: priv %p\n", __func__, priv);
0277 
0278     if (copy_from_user(&op, arg, sizeof(op))) {
0279         rc = -EFAULT;
0280         goto out;
0281     }
0282 
0283     gref_ids = kcalloc(op.count, sizeof(gref_ids[0]), GFP_KERNEL);
0284     if (!gref_ids) {
0285         rc = -ENOMEM;
0286         goto out;
0287     }
0288 
0289     mutex_lock(&gref_mutex);
0290     /* Clean up pages that were at zero (local) users but were still mapped
0291      * by remote domains. Since those pages count towards the limit that we
0292      * are about to enforce, removing them here is a good idea.
0293      */
0294     do_cleanup();
0295     if (gref_size + op.count > limit) {
0296         mutex_unlock(&gref_mutex);
0297         rc = -ENOSPC;
0298         goto out_free;
0299     }
0300     gref_size += op.count;
0301     op.index = priv->index;
0302     priv->index += op.count * PAGE_SIZE;
0303     mutex_unlock(&gref_mutex);
0304 
0305     rc = add_grefs(&op, gref_ids, priv);
0306     if (rc < 0)
0307         goto out_free;
0308 
0309     /* Once we finish add_grefs, it is unsafe to touch the new reference,
0310      * since it is possible for a concurrent ioctl to remove it (by guessing
0311      * its index). If the userspace application doesn't provide valid memory
0312      * to write the IDs to, then it will need to close the file in order to
0313      * release - which it will do by segfaulting when it tries to access the
0314      * IDs to close them.
0315      */
0316     if (copy_to_user(arg, &op, sizeof(op))) {
0317         rc = -EFAULT;
0318         goto out_free;
0319     }
0320     if (copy_to_user(arg->gref_ids, gref_ids,
0321             sizeof(gref_ids[0]) * op.count)) {
0322         rc = -EFAULT;
0323         goto out_free;
0324     }
0325 
0326 out_free:
0327     kfree(gref_ids);
0328 out:
0329     return rc;
0330 }
0331 
0332 static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
0333         void __user *arg)
0334 {
0335     int i, rc = 0;
0336     struct ioctl_gntalloc_dealloc_gref op;
0337     struct gntalloc_gref *gref, *n;
0338 
0339     pr_debug("%s: priv %p\n", __func__, priv);
0340 
0341     if (copy_from_user(&op, arg, sizeof(op))) {
0342         rc = -EFAULT;
0343         goto dealloc_grant_out;
0344     }
0345 
0346     mutex_lock(&gref_mutex);
0347     gref = find_grefs(priv, op.index, op.count);
0348     if (gref) {
0349         /* Remove from the file list only, and decrease reference count.
0350          * The later call to do_cleanup() will remove from gref_list and
0351          * free the memory if the pages aren't mapped anywhere.
0352          */
0353         for (i = 0; i < op.count; i++) {
0354             n = list_entry(gref->next_file.next,
0355                 struct gntalloc_gref, next_file);
0356             list_del(&gref->next_file);
0357             gref->users--;
0358             gref = n;
0359         }
0360     } else {
0361         rc = -EINVAL;
0362     }
0363 
0364     do_cleanup();
0365 
0366     mutex_unlock(&gref_mutex);
0367 dealloc_grant_out:
0368     return rc;
0369 }
0370 
0371 static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
0372         void __user *arg)
0373 {
0374     struct ioctl_gntalloc_unmap_notify op;
0375     struct gntalloc_gref *gref;
0376     uint64_t index;
0377     int pgoff;
0378     int rc;
0379 
0380     if (copy_from_user(&op, arg, sizeof(op)))
0381         return -EFAULT;
0382 
0383     index = op.index & ~(PAGE_SIZE - 1);
0384     pgoff = op.index & (PAGE_SIZE - 1);
0385 
0386     mutex_lock(&gref_mutex);
0387 
0388     gref = find_grefs(priv, index, 1);
0389     if (!gref) {
0390         rc = -ENOENT;
0391         goto unlock_out;
0392     }
0393 
0394     if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) {
0395         rc = -EINVAL;
0396         goto unlock_out;
0397     }
0398 
0399     /* We need to grab a reference to the event channel we are going to use
0400      * to send the notify before releasing the reference we may already have
0401      * (if someone has called this ioctl twice). This is required so that
0402      * it is possible to change the clear_byte part of the notification
0403      * without disturbing the event channel part, which may now be the last
0404      * reference to that event channel.
0405      */
0406     if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
0407         if (evtchn_get(op.event_channel_port)) {
0408             rc = -EINVAL;
0409             goto unlock_out;
0410         }
0411     }
0412 
0413     if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
0414         evtchn_put(gref->notify.event);
0415 
0416     gref->notify.flags = op.action;
0417     gref->notify.pgoff = pgoff;
0418     gref->notify.event = op.event_channel_port;
0419     rc = 0;
0420 
0421  unlock_out:
0422     mutex_unlock(&gref_mutex);
0423     return rc;
0424 }
0425 
0426 static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
0427         unsigned long arg)
0428 {
0429     struct gntalloc_file_private_data *priv = filp->private_data;
0430 
0431     switch (cmd) {
0432     case IOCTL_GNTALLOC_ALLOC_GREF:
0433         return gntalloc_ioctl_alloc(priv, (void __user *)arg);
0434 
0435     case IOCTL_GNTALLOC_DEALLOC_GREF:
0436         return gntalloc_ioctl_dealloc(priv, (void __user *)arg);
0437 
0438     case IOCTL_GNTALLOC_SET_UNMAP_NOTIFY:
0439         return gntalloc_ioctl_unmap_notify(priv, (void __user *)arg);
0440 
0441     default:
0442         return -ENOIOCTLCMD;
0443     }
0444 
0445     return 0;
0446 }
0447 
0448 static void gntalloc_vma_open(struct vm_area_struct *vma)
0449 {
0450     struct gntalloc_vma_private_data *priv = vma->vm_private_data;
0451 
0452     if (!priv)
0453         return;
0454 
0455     mutex_lock(&gref_mutex);
0456     priv->users++;
0457     mutex_unlock(&gref_mutex);
0458 }
0459 
0460 static void gntalloc_vma_close(struct vm_area_struct *vma)
0461 {
0462     struct gntalloc_vma_private_data *priv = vma->vm_private_data;
0463     struct gntalloc_gref *gref, *next;
0464     int i;
0465 
0466     if (!priv)
0467         return;
0468 
0469     mutex_lock(&gref_mutex);
0470     priv->users--;
0471     if (priv->users == 0) {
0472         gref = priv->gref;
0473         for (i = 0; i < priv->count; i++) {
0474             gref->users--;
0475             next = list_entry(gref->next_gref.next,
0476                       struct gntalloc_gref, next_gref);
0477             if (gref->users == 0)
0478                 __del_gref(gref);
0479             gref = next;
0480         }
0481         kfree(priv);
0482     }
0483     mutex_unlock(&gref_mutex);
0484 }
0485 
0486 static const struct vm_operations_struct gntalloc_vmops = {
0487     .open = gntalloc_vma_open,
0488     .close = gntalloc_vma_close,
0489 };
0490 
0491 static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
0492 {
0493     struct gntalloc_file_private_data *priv = filp->private_data;
0494     struct gntalloc_vma_private_data *vm_priv;
0495     struct gntalloc_gref *gref;
0496     int count = vma_pages(vma);
0497     int rv, i;
0498 
0499     if (!(vma->vm_flags & VM_SHARED)) {
0500         pr_err("%s: Mapping must be shared\n", __func__);
0501         return -EINVAL;
0502     }
0503 
0504     vm_priv = kmalloc(sizeof(*vm_priv), GFP_KERNEL);
0505     if (!vm_priv)
0506         return -ENOMEM;
0507 
0508     mutex_lock(&gref_mutex);
0509 
0510     pr_debug("%s: priv %p,%p, page %lu+%d\n", __func__,
0511                priv, vm_priv, vma->vm_pgoff, count);
0512 
0513     gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
0514     if (gref == NULL) {
0515         rv = -ENOENT;
0516         pr_debug("%s: Could not find grant reference",
0517                 __func__);
0518         kfree(vm_priv);
0519         goto out_unlock;
0520     }
0521 
0522     vm_priv->gref = gref;
0523     vm_priv->users = 1;
0524     vm_priv->count = count;
0525 
0526     vma->vm_private_data = vm_priv;
0527 
0528     vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
0529 
0530     vma->vm_ops = &gntalloc_vmops;
0531 
0532     for (i = 0; i < count; i++) {
0533         gref->users++;
0534         rv = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
0535                 gref->page);
0536         if (rv)
0537             goto out_unlock;
0538 
0539         gref = list_entry(gref->next_file.next,
0540                 struct gntalloc_gref, next_file);
0541     }
0542     rv = 0;
0543 
0544 out_unlock:
0545     mutex_unlock(&gref_mutex);
0546     return rv;
0547 }
0548 
0549 static const struct file_operations gntalloc_fops = {
0550     .owner = THIS_MODULE,
0551     .open = gntalloc_open,
0552     .release = gntalloc_release,
0553     .unlocked_ioctl = gntalloc_ioctl,
0554     .mmap = gntalloc_mmap
0555 };
0556 
0557 /*
0558  * -------------------------------------
0559  * Module creation/destruction.
0560  * -------------------------------------
0561  */
0562 static struct miscdevice gntalloc_miscdev = {
0563     .minor  = MISC_DYNAMIC_MINOR,
0564     .name   = "xen/gntalloc",
0565     .fops   = &gntalloc_fops,
0566 };
0567 
0568 static int __init gntalloc_init(void)
0569 {
0570     int err;
0571 
0572     if (!xen_domain())
0573         return -ENODEV;
0574 
0575     err = misc_register(&gntalloc_miscdev);
0576     if (err != 0) {
0577         pr_err("Could not register misc gntalloc device\n");
0578         return err;
0579     }
0580 
0581     pr_debug("Created grant allocation device at %d,%d\n",
0582             MISC_MAJOR, gntalloc_miscdev.minor);
0583 
0584     return 0;
0585 }
0586 
0587 static void __exit gntalloc_exit(void)
0588 {
0589     misc_deregister(&gntalloc_miscdev);
0590 }
0591 
0592 module_init(gntalloc_init);
0593 module_exit(gntalloc_exit);
0594 
0595 MODULE_LICENSE("GPL");
0596 MODULE_AUTHOR("Carter Weatherly <carter.weatherly@jhuapl.edu>, "
0597         "Daniel De Graaf <dgdegra@tycho.nsa.gov>");
0598 MODULE_DESCRIPTION("User-space grant reference allocator driver");