0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
0035
0036 #include <linux/module.h>
0037 #include <linux/kernel.h>
0038 #include <linux/sched.h>
0039 #include <linux/slab.h>
0040 #include <linux/string.h>
0041 #include <linux/errno.h>
0042 #include <linux/fs.h>
0043 #include <linux/miscdevice.h>
0044 #include <linux/major.h>
0045 #include <linux/proc_fs.h>
0046 #include <linux/stat.h>
0047 #include <linux/poll.h>
0048 #include <linux/irq.h>
0049 #include <linux/init.h>
0050 #include <linux/mutex.h>
0051 #include <linux/cpu.h>
0052 #include <linux/mm.h>
0053 #include <linux/vmalloc.h>
0054
0055 #include <xen/xen.h>
0056 #include <xen/events.h>
0057 #include <xen/evtchn.h>
0058 #include <xen/xen-ops.h>
0059 #include <asm/xen/hypervisor.h>
0060
0061 struct per_user_data {
0062 struct mutex bind_mutex;
0063 struct rb_root evtchns;
0064 unsigned int nr_evtchns;
0065
0066
0067 unsigned int ring_size;
0068 evtchn_port_t *ring;
0069 unsigned int ring_cons, ring_prod, ring_overflow;
0070 struct mutex ring_cons_mutex;
0071 spinlock_t ring_prod_lock;
0072
0073
0074 wait_queue_head_t evtchn_wait;
0075 struct fasync_struct *evtchn_async_queue;
0076 const char *name;
0077
0078 domid_t restrict_domid;
0079 };
0080
0081 #define UNRESTRICTED_DOMID ((domid_t)-1)
0082
0083 struct user_evtchn {
0084 struct rb_node node;
0085 struct per_user_data *user;
0086 evtchn_port_t port;
0087 bool enabled;
0088 };
0089
0090 static void evtchn_free_ring(evtchn_port_t *ring)
0091 {
0092 kvfree(ring);
0093 }
0094
0095 static unsigned int evtchn_ring_offset(struct per_user_data *u,
0096 unsigned int idx)
0097 {
0098 return idx & (u->ring_size - 1);
0099 }
0100
0101 static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
0102 unsigned int idx)
0103 {
0104 return u->ring + evtchn_ring_offset(u, idx);
0105 }
0106
0107 static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
0108 {
0109 struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
0110
0111 u->nr_evtchns++;
0112
0113 while (*new) {
0114 struct user_evtchn *this;
0115
0116 this = rb_entry(*new, struct user_evtchn, node);
0117
0118 parent = *new;
0119 if (this->port < evtchn->port)
0120 new = &((*new)->rb_left);
0121 else if (this->port > evtchn->port)
0122 new = &((*new)->rb_right);
0123 else
0124 return -EEXIST;
0125 }
0126
0127
0128 rb_link_node(&evtchn->node, parent, new);
0129 rb_insert_color(&evtchn->node, &u->evtchns);
0130
0131 return 0;
0132 }
0133
0134 static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
0135 {
0136 u->nr_evtchns--;
0137 rb_erase(&evtchn->node, &u->evtchns);
0138 kfree(evtchn);
0139 }
0140
0141 static struct user_evtchn *find_evtchn(struct per_user_data *u,
0142 evtchn_port_t port)
0143 {
0144 struct rb_node *node = u->evtchns.rb_node;
0145
0146 while (node) {
0147 struct user_evtchn *evtchn;
0148
0149 evtchn = rb_entry(node, struct user_evtchn, node);
0150
0151 if (evtchn->port < port)
0152 node = node->rb_left;
0153 else if (evtchn->port > port)
0154 node = node->rb_right;
0155 else
0156 return evtchn;
0157 }
0158 return NULL;
0159 }
0160
0161 static irqreturn_t evtchn_interrupt(int irq, void *data)
0162 {
0163 struct user_evtchn *evtchn = data;
0164 struct per_user_data *u = evtchn->user;
0165 unsigned int prod, cons;
0166
0167 WARN(!evtchn->enabled,
0168 "Interrupt for port %u, but apparently not enabled; per-user %p\n",
0169 evtchn->port, u);
0170
0171 evtchn->enabled = false;
0172
0173 spin_lock(&u->ring_prod_lock);
0174
0175 prod = READ_ONCE(u->ring_prod);
0176 cons = READ_ONCE(u->ring_cons);
0177
0178 if ((prod - cons) < u->ring_size) {
0179 *evtchn_ring_entry(u, prod) = evtchn->port;
0180 smp_wmb();
0181 WRITE_ONCE(u->ring_prod, prod + 1);
0182 if (cons == prod) {
0183 wake_up_interruptible(&u->evtchn_wait);
0184 kill_fasync(&u->evtchn_async_queue,
0185 SIGIO, POLL_IN);
0186 }
0187 } else
0188 u->ring_overflow = 1;
0189
0190 spin_unlock(&u->ring_prod_lock);
0191
0192 return IRQ_HANDLED;
0193 }
0194
0195 static ssize_t evtchn_read(struct file *file, char __user *buf,
0196 size_t count, loff_t *ppos)
0197 {
0198 int rc;
0199 unsigned int c, p, bytes1 = 0, bytes2 = 0;
0200 struct per_user_data *u = file->private_data;
0201
0202
0203 count &= ~(sizeof(evtchn_port_t)-1);
0204
0205 if (count == 0)
0206 return 0;
0207
0208 if (count > PAGE_SIZE)
0209 count = PAGE_SIZE;
0210
0211 for (;;) {
0212 mutex_lock(&u->ring_cons_mutex);
0213
0214 rc = -EFBIG;
0215 if (u->ring_overflow)
0216 goto unlock_out;
0217
0218 c = READ_ONCE(u->ring_cons);
0219 p = READ_ONCE(u->ring_prod);
0220 if (c != p)
0221 break;
0222
0223 mutex_unlock(&u->ring_cons_mutex);
0224
0225 if (file->f_flags & O_NONBLOCK)
0226 return -EAGAIN;
0227
0228 rc = wait_event_interruptible(u->evtchn_wait,
0229 READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod));
0230 if (rc)
0231 return rc;
0232 }
0233
0234
0235 if (((c ^ p) & u->ring_size) != 0) {
0236 bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
0237 sizeof(evtchn_port_t);
0238 bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
0239 } else {
0240 bytes1 = (p - c) * sizeof(evtchn_port_t);
0241 bytes2 = 0;
0242 }
0243
0244
0245 if (bytes1 > count) {
0246 bytes1 = count;
0247 bytes2 = 0;
0248 } else if ((bytes1 + bytes2) > count) {
0249 bytes2 = count - bytes1;
0250 }
0251
0252 rc = -EFAULT;
0253 smp_rmb();
0254 if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
0255 ((bytes2 != 0) &&
0256 copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
0257 goto unlock_out;
0258
0259 WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t));
0260 rc = bytes1 + bytes2;
0261
0262 unlock_out:
0263 mutex_unlock(&u->ring_cons_mutex);
0264 return rc;
0265 }
0266
0267 static ssize_t evtchn_write(struct file *file, const char __user *buf,
0268 size_t count, loff_t *ppos)
0269 {
0270 int rc, i;
0271 evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
0272 struct per_user_data *u = file->private_data;
0273
0274 if (kbuf == NULL)
0275 return -ENOMEM;
0276
0277
0278 count &= ~(sizeof(evtchn_port_t)-1);
0279
0280 rc = 0;
0281 if (count == 0)
0282 goto out;
0283
0284 if (count > PAGE_SIZE)
0285 count = PAGE_SIZE;
0286
0287 rc = -EFAULT;
0288 if (copy_from_user(kbuf, buf, count) != 0)
0289 goto out;
0290
0291 mutex_lock(&u->bind_mutex);
0292
0293 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
0294 evtchn_port_t port = kbuf[i];
0295 struct user_evtchn *evtchn;
0296
0297 evtchn = find_evtchn(u, port);
0298 if (evtchn && !evtchn->enabled) {
0299 evtchn->enabled = true;
0300 xen_irq_lateeoi(irq_from_evtchn(port), 0);
0301 }
0302 }
0303
0304 mutex_unlock(&u->bind_mutex);
0305
0306 rc = count;
0307
0308 out:
0309 free_page((unsigned long)kbuf);
0310 return rc;
0311 }
0312
0313 static int evtchn_resize_ring(struct per_user_data *u)
0314 {
0315 unsigned int new_size;
0316 evtchn_port_t *new_ring, *old_ring;
0317
0318
0319
0320
0321
0322 if (u->nr_evtchns <= u->ring_size)
0323 return 0;
0324
0325 if (u->ring_size == 0)
0326 new_size = 64;
0327 else
0328 new_size = 2 * u->ring_size;
0329
0330 new_ring = kvmalloc_array(new_size, sizeof(*new_ring), GFP_KERNEL);
0331 if (!new_ring)
0332 return -ENOMEM;
0333
0334 old_ring = u->ring;
0335
0336
0337
0338
0339
0340 mutex_lock(&u->ring_cons_mutex);
0341 spin_lock_irq(&u->ring_prod_lock);
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354 memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring));
0355 memcpy(new_ring + u->ring_size, old_ring,
0356 u->ring_size * sizeof(*u->ring));
0357
0358 u->ring = new_ring;
0359 u->ring_size = new_size;
0360
0361 spin_unlock_irq(&u->ring_prod_lock);
0362 mutex_unlock(&u->ring_cons_mutex);
0363
0364 evtchn_free_ring(old_ring);
0365
0366 return 0;
0367 }
0368
0369 static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
0370 {
0371 struct user_evtchn *evtchn;
0372 struct evtchn_close close;
0373 int rc = 0;
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384 evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
0385 if (!evtchn)
0386 return -ENOMEM;
0387
0388 evtchn->user = u;
0389 evtchn->port = port;
0390 evtchn->enabled = true;
0391
0392 rc = add_evtchn(u, evtchn);
0393 if (rc < 0)
0394 goto err;
0395
0396 rc = evtchn_resize_ring(u);
0397 if (rc < 0)
0398 goto err;
0399
0400 rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
0401 u->name, evtchn);
0402 if (rc < 0)
0403 goto err;
0404
0405 rc = evtchn_make_refcounted(port);
0406 return rc;
0407
0408 err:
0409
0410 close.port = port;
0411 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
0412 BUG();
0413 del_evtchn(u, evtchn);
0414 return rc;
0415 }
0416
0417 static void evtchn_unbind_from_user(struct per_user_data *u,
0418 struct user_evtchn *evtchn)
0419 {
0420 int irq = irq_from_evtchn(evtchn->port);
0421
0422 BUG_ON(irq < 0);
0423
0424 unbind_from_irqhandler(irq, evtchn);
0425
0426 del_evtchn(u, evtchn);
0427 }
0428
0429 static long evtchn_ioctl(struct file *file,
0430 unsigned int cmd, unsigned long arg)
0431 {
0432 int rc;
0433 struct per_user_data *u = file->private_data;
0434 void __user *uarg = (void __user *) arg;
0435
0436
0437 mutex_lock(&u->bind_mutex);
0438
0439 switch (cmd) {
0440 case IOCTL_EVTCHN_BIND_VIRQ: {
0441 struct ioctl_evtchn_bind_virq bind;
0442 struct evtchn_bind_virq bind_virq;
0443
0444 rc = -EACCES;
0445 if (u->restrict_domid != UNRESTRICTED_DOMID)
0446 break;
0447
0448 rc = -EFAULT;
0449 if (copy_from_user(&bind, uarg, sizeof(bind)))
0450 break;
0451
0452 bind_virq.virq = bind.virq;
0453 bind_virq.vcpu = xen_vcpu_nr(0);
0454 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
0455 &bind_virq);
0456 if (rc != 0)
0457 break;
0458
0459 rc = evtchn_bind_to_user(u, bind_virq.port);
0460 if (rc == 0)
0461 rc = bind_virq.port;
0462 break;
0463 }
0464
0465 case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
0466 struct ioctl_evtchn_bind_interdomain bind;
0467 struct evtchn_bind_interdomain bind_interdomain;
0468
0469 rc = -EFAULT;
0470 if (copy_from_user(&bind, uarg, sizeof(bind)))
0471 break;
0472
0473 rc = -EACCES;
0474 if (u->restrict_domid != UNRESTRICTED_DOMID &&
0475 u->restrict_domid != bind.remote_domain)
0476 break;
0477
0478 bind_interdomain.remote_dom = bind.remote_domain;
0479 bind_interdomain.remote_port = bind.remote_port;
0480 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
0481 &bind_interdomain);
0482 if (rc != 0)
0483 break;
0484
0485 rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
0486 if (rc == 0)
0487 rc = bind_interdomain.local_port;
0488 break;
0489 }
0490
0491 case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
0492 struct ioctl_evtchn_bind_unbound_port bind;
0493 struct evtchn_alloc_unbound alloc_unbound;
0494
0495 rc = -EACCES;
0496 if (u->restrict_domid != UNRESTRICTED_DOMID)
0497 break;
0498
0499 rc = -EFAULT;
0500 if (copy_from_user(&bind, uarg, sizeof(bind)))
0501 break;
0502
0503 alloc_unbound.dom = DOMID_SELF;
0504 alloc_unbound.remote_dom = bind.remote_domain;
0505 rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
0506 &alloc_unbound);
0507 if (rc != 0)
0508 break;
0509
0510 rc = evtchn_bind_to_user(u, alloc_unbound.port);
0511 if (rc == 0)
0512 rc = alloc_unbound.port;
0513 break;
0514 }
0515
0516 case IOCTL_EVTCHN_UNBIND: {
0517 struct ioctl_evtchn_unbind unbind;
0518 struct user_evtchn *evtchn;
0519
0520 rc = -EFAULT;
0521 if (copy_from_user(&unbind, uarg, sizeof(unbind)))
0522 break;
0523
0524 rc = -EINVAL;
0525 if (unbind.port >= xen_evtchn_nr_channels())
0526 break;
0527
0528 rc = -ENOTCONN;
0529 evtchn = find_evtchn(u, unbind.port);
0530 if (!evtchn)
0531 break;
0532
0533 disable_irq(irq_from_evtchn(unbind.port));
0534 evtchn_unbind_from_user(u, evtchn);
0535 rc = 0;
0536 break;
0537 }
0538
0539 case IOCTL_EVTCHN_NOTIFY: {
0540 struct ioctl_evtchn_notify notify;
0541 struct user_evtchn *evtchn;
0542
0543 rc = -EFAULT;
0544 if (copy_from_user(¬ify, uarg, sizeof(notify)))
0545 break;
0546
0547 rc = -ENOTCONN;
0548 evtchn = find_evtchn(u, notify.port);
0549 if (evtchn) {
0550 notify_remote_via_evtchn(notify.port);
0551 rc = 0;
0552 }
0553 break;
0554 }
0555
0556 case IOCTL_EVTCHN_RESET: {
0557
0558 mutex_lock(&u->ring_cons_mutex);
0559 spin_lock_irq(&u->ring_prod_lock);
0560 WRITE_ONCE(u->ring_cons, 0);
0561 WRITE_ONCE(u->ring_prod, 0);
0562 u->ring_overflow = 0;
0563 spin_unlock_irq(&u->ring_prod_lock);
0564 mutex_unlock(&u->ring_cons_mutex);
0565 rc = 0;
0566 break;
0567 }
0568
0569 case IOCTL_EVTCHN_RESTRICT_DOMID: {
0570 struct ioctl_evtchn_restrict_domid ierd;
0571
0572 rc = -EACCES;
0573 if (u->restrict_domid != UNRESTRICTED_DOMID)
0574 break;
0575
0576 rc = -EFAULT;
0577 if (copy_from_user(&ierd, uarg, sizeof(ierd)))
0578 break;
0579
0580 rc = -EINVAL;
0581 if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED)
0582 break;
0583
0584 u->restrict_domid = ierd.domid;
0585 rc = 0;
0586
0587 break;
0588 }
0589
0590 default:
0591 rc = -ENOSYS;
0592 break;
0593 }
0594 mutex_unlock(&u->bind_mutex);
0595
0596 return rc;
0597 }
0598
0599 static __poll_t evtchn_poll(struct file *file, poll_table *wait)
0600 {
0601 __poll_t mask = EPOLLOUT | EPOLLWRNORM;
0602 struct per_user_data *u = file->private_data;
0603
0604 poll_wait(file, &u->evtchn_wait, wait);
0605 if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod))
0606 mask |= EPOLLIN | EPOLLRDNORM;
0607 if (u->ring_overflow)
0608 mask = EPOLLERR;
0609 return mask;
0610 }
0611
0612 static int evtchn_fasync(int fd, struct file *filp, int on)
0613 {
0614 struct per_user_data *u = filp->private_data;
0615 return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
0616 }
0617
0618 static int evtchn_open(struct inode *inode, struct file *filp)
0619 {
0620 struct per_user_data *u;
0621
0622 u = kzalloc(sizeof(*u), GFP_KERNEL);
0623 if (u == NULL)
0624 return -ENOMEM;
0625
0626 u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
0627 if (u->name == NULL) {
0628 kfree(u);
0629 return -ENOMEM;
0630 }
0631
0632 init_waitqueue_head(&u->evtchn_wait);
0633
0634 mutex_init(&u->bind_mutex);
0635 mutex_init(&u->ring_cons_mutex);
0636 spin_lock_init(&u->ring_prod_lock);
0637
0638 u->restrict_domid = UNRESTRICTED_DOMID;
0639
0640 filp->private_data = u;
0641
0642 return stream_open(inode, filp);
0643 }
0644
0645 static int evtchn_release(struct inode *inode, struct file *filp)
0646 {
0647 struct per_user_data *u = filp->private_data;
0648 struct rb_node *node;
0649
0650 while ((node = u->evtchns.rb_node)) {
0651 struct user_evtchn *evtchn;
0652
0653 evtchn = rb_entry(node, struct user_evtchn, node);
0654 disable_irq(irq_from_evtchn(evtchn->port));
0655 evtchn_unbind_from_user(u, evtchn);
0656 }
0657
0658 evtchn_free_ring(u->ring);
0659 kfree(u->name);
0660 kfree(u);
0661
0662 return 0;
0663 }
0664
0665 static const struct file_operations evtchn_fops = {
0666 .owner = THIS_MODULE,
0667 .read = evtchn_read,
0668 .write = evtchn_write,
0669 .unlocked_ioctl = evtchn_ioctl,
0670 .poll = evtchn_poll,
0671 .fasync = evtchn_fasync,
0672 .open = evtchn_open,
0673 .release = evtchn_release,
0674 .llseek = no_llseek,
0675 };
0676
0677 static struct miscdevice evtchn_miscdev = {
0678 .minor = MISC_DYNAMIC_MINOR,
0679 .name = "xen/evtchn",
0680 .fops = &evtchn_fops,
0681 };
0682 static int __init evtchn_init(void)
0683 {
0684 int err;
0685
0686 if (!xen_domain())
0687 return -ENODEV;
0688
0689
0690 err = misc_register(&evtchn_miscdev);
0691 if (err != 0) {
0692 pr_err("Could not register /dev/xen/evtchn\n");
0693 return err;
0694 }
0695
0696 pr_info("Event-channel device installed\n");
0697
0698 return 0;
0699 }
0700
0701 static void __exit evtchn_cleanup(void)
0702 {
0703 misc_deregister(&evtchn_miscdev);
0704 }
0705
0706 module_init(evtchn_init);
0707 module_exit(evtchn_cleanup);
0708
0709 MODULE_LICENSE("GPL");