0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037 #include <linux/module.h>
0038 #include <linux/init.h>
0039 #include <linux/device.h>
0040 #include <linux/err.h>
0041 #include <linux/fs.h>
0042 #include <linux/poll.h>
0043 #include <linux/sched.h>
0044 #include <linux/file.h>
0045 #include <linux/cdev.h>
0046 #include <linux/anon_inodes.h>
0047 #include <linux/slab.h>
0048 #include <linux/sched/mm.h>
0049
0050 #include <linux/uaccess.h>
0051
0052 #include <rdma/ib.h>
0053 #include <rdma/uverbs_std_types.h>
0054 #include <rdma/rdma_netlink.h>
0055
0056 #include "uverbs.h"
0057 #include "core_priv.h"
0058 #include "rdma_core.h"
0059
0060 MODULE_AUTHOR("Roland Dreier");
0061 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
0062 MODULE_LICENSE("Dual BSD/GPL");
0063
0064 enum {
0065 IB_UVERBS_MAJOR = 231,
0066 IB_UVERBS_BASE_MINOR = 192,
0067 IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
0068 IB_UVERBS_NUM_FIXED_MINOR = 32,
0069 IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
0070 };
0071
0072 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
0073
0074 static dev_t dynamic_uverbs_dev;
0075 static struct class *uverbs_class;
0076
0077 static DEFINE_IDA(uverbs_ida);
0078 static int ib_uverbs_add_one(struct ib_device *device);
0079 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
0080
0081
0082
0083
0084
0085 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
0086 {
0087
0088
0089
0090
0091
0092 struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
0093
0094 if (!srcu_dereference(ufile->device->ib_dev,
0095 &ufile->device->disassociate_srcu))
0096 return ERR_PTR(-EIO);
0097
0098 if (!ucontext)
0099 return ERR_PTR(-EINVAL);
0100
0101 return ucontext;
0102 }
0103 EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
0104
0105 int uverbs_dealloc_mw(struct ib_mw *mw)
0106 {
0107 struct ib_pd *pd = mw->pd;
0108 int ret;
0109
0110 ret = mw->device->ops.dealloc_mw(mw);
0111 if (ret)
0112 return ret;
0113
0114 atomic_dec(&pd->usecnt);
0115 kfree(mw);
0116 return ret;
0117 }
0118
0119 static void ib_uverbs_release_dev(struct device *device)
0120 {
0121 struct ib_uverbs_device *dev =
0122 container_of(device, struct ib_uverbs_device, dev);
0123
0124 uverbs_destroy_api(dev->uapi);
0125 cleanup_srcu_struct(&dev->disassociate_srcu);
0126 mutex_destroy(&dev->lists_mutex);
0127 mutex_destroy(&dev->xrcd_tree_mutex);
0128 kfree(dev);
0129 }
0130
0131 void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
0132 struct ib_ucq_object *uobj)
0133 {
0134 struct ib_uverbs_event *evt, *tmp;
0135
0136 if (ev_file) {
0137 spin_lock_irq(&ev_file->ev_queue.lock);
0138 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
0139 list_del(&evt->list);
0140 kfree(evt);
0141 }
0142 spin_unlock_irq(&ev_file->ev_queue.lock);
0143
0144 uverbs_uobject_put(&ev_file->uobj);
0145 }
0146
0147 ib_uverbs_release_uevent(&uobj->uevent);
0148 }
0149
0150 void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
0151 {
0152 struct ib_uverbs_async_event_file *async_file = uobj->event_file;
0153 struct ib_uverbs_event *evt, *tmp;
0154
0155 if (!async_file)
0156 return;
0157
0158 spin_lock_irq(&async_file->ev_queue.lock);
0159 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
0160 list_del(&evt->list);
0161 kfree(evt);
0162 }
0163 spin_unlock_irq(&async_file->ev_queue.lock);
0164 uverbs_uobject_put(&async_file->uobj);
0165 }
0166
0167 void ib_uverbs_detach_umcast(struct ib_qp *qp,
0168 struct ib_uqp_object *uobj)
0169 {
0170 struct ib_uverbs_mcast_entry *mcast, *tmp;
0171
0172 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
0173 ib_detach_mcast(qp, &mcast->gid, mcast->lid);
0174 list_del(&mcast->list);
0175 kfree(mcast);
0176 }
0177 }
0178
0179 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
0180 {
0181 complete(&dev->comp);
0182 }
0183
0184 void ib_uverbs_release_file(struct kref *ref)
0185 {
0186 struct ib_uverbs_file *file =
0187 container_of(ref, struct ib_uverbs_file, ref);
0188 struct ib_device *ib_dev;
0189 int srcu_key;
0190
0191 release_ufile_idr_uobject(file);
0192
0193 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
0194 ib_dev = srcu_dereference(file->device->ib_dev,
0195 &file->device->disassociate_srcu);
0196 if (ib_dev && !ib_dev->ops.disassociate_ucontext)
0197 module_put(ib_dev->ops.owner);
0198 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
0199
0200 if (refcount_dec_and_test(&file->device->refcount))
0201 ib_uverbs_comp_dev(file->device);
0202
0203 if (file->default_async_file)
0204 uverbs_uobject_put(&file->default_async_file->uobj);
0205 put_device(&file->device->dev);
0206
0207 if (file->disassociate_page)
0208 __free_pages(file->disassociate_page, 0);
0209 mutex_destroy(&file->umap_lock);
0210 mutex_destroy(&file->ucontext_lock);
0211 kfree(file);
0212 }
0213
0214 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
0215 struct file *filp, char __user *buf,
0216 size_t count, loff_t *pos,
0217 size_t eventsz)
0218 {
0219 struct ib_uverbs_event *event;
0220 int ret = 0;
0221
0222 spin_lock_irq(&ev_queue->lock);
0223
0224 while (list_empty(&ev_queue->event_list)) {
0225 spin_unlock_irq(&ev_queue->lock);
0226
0227 if (filp->f_flags & O_NONBLOCK)
0228 return -EAGAIN;
0229
0230 if (wait_event_interruptible(ev_queue->poll_wait,
0231 (!list_empty(&ev_queue->event_list) ||
0232 ev_queue->is_closed)))
0233 return -ERESTARTSYS;
0234
0235 spin_lock_irq(&ev_queue->lock);
0236
0237
0238 if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
0239 spin_unlock_irq(&ev_queue->lock);
0240 return -EIO;
0241 }
0242 }
0243
0244 event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
0245
0246 if (eventsz > count) {
0247 ret = -EINVAL;
0248 event = NULL;
0249 } else {
0250 list_del(ev_queue->event_list.next);
0251 if (event->counter) {
0252 ++(*event->counter);
0253 list_del(&event->obj_list);
0254 }
0255 }
0256
0257 spin_unlock_irq(&ev_queue->lock);
0258
0259 if (event) {
0260 if (copy_to_user(buf, event, eventsz))
0261 ret = -EFAULT;
0262 else
0263 ret = eventsz;
0264 }
0265
0266 kfree(event);
0267
0268 return ret;
0269 }
0270
0271 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
0272 size_t count, loff_t *pos)
0273 {
0274 struct ib_uverbs_async_event_file *file = filp->private_data;
0275
0276 return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
0277 sizeof(struct ib_uverbs_async_event_desc));
0278 }
0279
0280 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
0281 size_t count, loff_t *pos)
0282 {
0283 struct ib_uverbs_completion_event_file *comp_ev_file =
0284 filp->private_data;
0285
0286 return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
0287 pos,
0288 sizeof(struct ib_uverbs_comp_event_desc));
0289 }
0290
0291 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
0292 struct file *filp,
0293 struct poll_table_struct *wait)
0294 {
0295 __poll_t pollflags = 0;
0296
0297 poll_wait(filp, &ev_queue->poll_wait, wait);
0298
0299 spin_lock_irq(&ev_queue->lock);
0300 if (!list_empty(&ev_queue->event_list))
0301 pollflags = EPOLLIN | EPOLLRDNORM;
0302 else if (ev_queue->is_closed)
0303 pollflags = EPOLLERR;
0304 spin_unlock_irq(&ev_queue->lock);
0305
0306 return pollflags;
0307 }
0308
0309 static __poll_t ib_uverbs_async_event_poll(struct file *filp,
0310 struct poll_table_struct *wait)
0311 {
0312 struct ib_uverbs_async_event_file *file = filp->private_data;
0313
0314 return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
0315 }
0316
0317 static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
0318 struct poll_table_struct *wait)
0319 {
0320 struct ib_uverbs_completion_event_file *comp_ev_file =
0321 filp->private_data;
0322
0323 return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
0324 }
0325
0326 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
0327 {
0328 struct ib_uverbs_async_event_file *file = filp->private_data;
0329
0330 return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
0331 }
0332
0333 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
0334 {
0335 struct ib_uverbs_completion_event_file *comp_ev_file =
0336 filp->private_data;
0337
0338 return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
0339 }
0340
0341 const struct file_operations uverbs_event_fops = {
0342 .owner = THIS_MODULE,
0343 .read = ib_uverbs_comp_event_read,
0344 .poll = ib_uverbs_comp_event_poll,
0345 .release = uverbs_uobject_fd_release,
0346 .fasync = ib_uverbs_comp_event_fasync,
0347 .llseek = no_llseek,
0348 };
0349
0350 const struct file_operations uverbs_async_event_fops = {
0351 .owner = THIS_MODULE,
0352 .read = ib_uverbs_async_event_read,
0353 .poll = ib_uverbs_async_event_poll,
0354 .release = uverbs_async_event_release,
0355 .fasync = ib_uverbs_async_event_fasync,
0356 .llseek = no_llseek,
0357 };
0358
0359 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
0360 {
0361 struct ib_uverbs_event_queue *ev_queue = cq_context;
0362 struct ib_ucq_object *uobj;
0363 struct ib_uverbs_event *entry;
0364 unsigned long flags;
0365
0366 if (!ev_queue)
0367 return;
0368
0369 spin_lock_irqsave(&ev_queue->lock, flags);
0370 if (ev_queue->is_closed) {
0371 spin_unlock_irqrestore(&ev_queue->lock, flags);
0372 return;
0373 }
0374
0375 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
0376 if (!entry) {
0377 spin_unlock_irqrestore(&ev_queue->lock, flags);
0378 return;
0379 }
0380
0381 uobj = cq->uobject;
0382
0383 entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle;
0384 entry->counter = &uobj->comp_events_reported;
0385
0386 list_add_tail(&entry->list, &ev_queue->event_list);
0387 list_add_tail(&entry->obj_list, &uobj->comp_list);
0388 spin_unlock_irqrestore(&ev_queue->lock, flags);
0389
0390 wake_up_interruptible(&ev_queue->poll_wait);
0391 kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
0392 }
0393
0394 void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
0395 __u64 element, __u64 event,
0396 struct list_head *obj_list, u32 *counter)
0397 {
0398 struct ib_uverbs_event *entry;
0399 unsigned long flags;
0400
0401 if (!async_file)
0402 return;
0403
0404 spin_lock_irqsave(&async_file->ev_queue.lock, flags);
0405 if (async_file->ev_queue.is_closed) {
0406 spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
0407 return;
0408 }
0409
0410 entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
0411 if (!entry) {
0412 spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
0413 return;
0414 }
0415
0416 entry->desc.async.element = element;
0417 entry->desc.async.event_type = event;
0418 entry->desc.async.reserved = 0;
0419 entry->counter = counter;
0420
0421 list_add_tail(&entry->list, &async_file->ev_queue.event_list);
0422 if (obj_list)
0423 list_add_tail(&entry->obj_list, obj_list);
0424 spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
0425
0426 wake_up_interruptible(&async_file->ev_queue.poll_wait);
0427 kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN);
0428 }
0429
0430 static void uverbs_uobj_event(struct ib_uevent_object *eobj,
0431 struct ib_event *event)
0432 {
0433 ib_uverbs_async_handler(eobj->event_file,
0434 eobj->uobject.user_handle, event->event,
0435 &eobj->event_list, &eobj->events_reported);
0436 }
0437
0438 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
0439 {
0440 uverbs_uobj_event(&event->element.cq->uobject->uevent, event);
0441 }
0442
0443 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
0444 {
0445
0446 if (!event->element.qp->uobject)
0447 return;
0448
0449 uverbs_uobj_event(&event->element.qp->uobject->uevent, event);
0450 }
0451
0452 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
0453 {
0454 uverbs_uobj_event(&event->element.wq->uobject->uevent, event);
0455 }
0456
0457 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
0458 {
0459 uverbs_uobj_event(&event->element.srq->uobject->uevent, event);
0460 }
0461
0462 static void ib_uverbs_event_handler(struct ib_event_handler *handler,
0463 struct ib_event *event)
0464 {
0465 ib_uverbs_async_handler(
0466 container_of(handler, struct ib_uverbs_async_event_file,
0467 event_handler),
0468 event->element.port_num, event->event, NULL, NULL);
0469 }
0470
0471 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
0472 {
0473 spin_lock_init(&ev_queue->lock);
0474 INIT_LIST_HEAD(&ev_queue->event_list);
0475 init_waitqueue_head(&ev_queue->poll_wait);
0476 ev_queue->is_closed = 0;
0477 ev_queue->async_queue = NULL;
0478 }
0479
0480 void ib_uverbs_init_async_event_file(
0481 struct ib_uverbs_async_event_file *async_file)
0482 {
0483 struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile;
0484 struct ib_device *ib_dev = async_file->uobj.context->device;
0485
0486 ib_uverbs_init_event_queue(&async_file->ev_queue);
0487
0488
0489 mutex_lock(&uverbs_file->ucontext_lock);
0490 if (!uverbs_file->default_async_file) {
0491
0492 uverbs_uobject_get(&async_file->uobj);
0493 smp_store_release(&uverbs_file->default_async_file, async_file);
0494 }
0495 mutex_unlock(&uverbs_file->ucontext_lock);
0496
0497 INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev,
0498 ib_uverbs_event_handler);
0499 ib_register_event_handler(&async_file->event_handler);
0500 }
0501
0502 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
0503 struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
0504 const struct uverbs_api_write_method *method_elm)
0505 {
0506 if (method_elm->is_ex) {
0507 count -= sizeof(*hdr) + sizeof(*ex_hdr);
0508
0509 if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
0510 return -EINVAL;
0511
0512 if (hdr->in_words * 8 < method_elm->req_size)
0513 return -ENOSPC;
0514
0515 if (ex_hdr->cmd_hdr_reserved)
0516 return -EINVAL;
0517
0518 if (ex_hdr->response) {
0519 if (!hdr->out_words && !ex_hdr->provider_out_words)
0520 return -EINVAL;
0521
0522 if (hdr->out_words * 8 < method_elm->resp_size)
0523 return -ENOSPC;
0524
0525 if (!access_ok(u64_to_user_ptr(ex_hdr->response),
0526 (hdr->out_words + ex_hdr->provider_out_words) * 8))
0527 return -EFAULT;
0528 } else {
0529 if (hdr->out_words || ex_hdr->provider_out_words)
0530 return -EINVAL;
0531 }
0532
0533 return 0;
0534 }
0535
0536
0537 if (hdr->in_words * 4 != count)
0538 return -EINVAL;
0539
0540 if (count < method_elm->req_size + sizeof(hdr)) {
0541
0542
0543
0544
0545
0546
0547
0548 if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
0549 count == 16) {
0550 hdr->in_words = 6;
0551 return 0;
0552 }
0553 return -ENOSPC;
0554 }
0555 if (hdr->out_words * 4 < method_elm->resp_size)
0556 return -ENOSPC;
0557
0558 return 0;
0559 }
0560
0561 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
0562 size_t count, loff_t *pos)
0563 {
0564 struct ib_uverbs_file *file = filp->private_data;
0565 const struct uverbs_api_write_method *method_elm;
0566 struct uverbs_api *uapi = file->device->uapi;
0567 struct ib_uverbs_ex_cmd_hdr ex_hdr;
0568 struct ib_uverbs_cmd_hdr hdr;
0569 struct uverbs_attr_bundle bundle;
0570 int srcu_key;
0571 ssize_t ret;
0572
0573 if (!ib_safe_file_access(filp)) {
0574 pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
0575 task_tgid_vnr(current), current->comm);
0576 return -EACCES;
0577 }
0578
0579 if (count < sizeof(hdr))
0580 return -EINVAL;
0581
0582 if (copy_from_user(&hdr, buf, sizeof(hdr)))
0583 return -EFAULT;
0584
0585 method_elm = uapi_get_method(uapi, hdr.command);
0586 if (IS_ERR(method_elm))
0587 return PTR_ERR(method_elm);
0588
0589 if (method_elm->is_ex) {
0590 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
0591 return -EINVAL;
0592 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
0593 return -EFAULT;
0594 }
0595
0596 ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
0597 if (ret)
0598 return ret;
0599
0600 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
0601
0602 buf += sizeof(hdr);
0603
0604 memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
0605 bundle.ufile = file;
0606 bundle.context = NULL;
0607 bundle.uobject = NULL;
0608 if (!method_elm->is_ex) {
0609 size_t in_len = hdr.in_words * 4 - sizeof(hdr);
0610 size_t out_len = hdr.out_words * 4;
0611 u64 response = 0;
0612
0613 if (method_elm->has_udata) {
0614 bundle.driver_udata.inlen =
0615 in_len - method_elm->req_size;
0616 in_len = method_elm->req_size;
0617 if (bundle.driver_udata.inlen)
0618 bundle.driver_udata.inbuf = buf + in_len;
0619 else
0620 bundle.driver_udata.inbuf = NULL;
0621 } else {
0622 memset(&bundle.driver_udata, 0,
0623 sizeof(bundle.driver_udata));
0624 }
0625
0626 if (method_elm->has_resp) {
0627
0628
0629
0630
0631
0632 ret = get_user(response, (const u64 __user *)buf);
0633 if (ret)
0634 goto out_unlock;
0635
0636 if (method_elm->has_udata) {
0637 bundle.driver_udata.outlen =
0638 out_len - method_elm->resp_size;
0639 out_len = method_elm->resp_size;
0640 if (bundle.driver_udata.outlen)
0641 bundle.driver_udata.outbuf =
0642 u64_to_user_ptr(response +
0643 out_len);
0644 else
0645 bundle.driver_udata.outbuf = NULL;
0646 }
0647 } else {
0648 bundle.driver_udata.outlen = 0;
0649 bundle.driver_udata.outbuf = NULL;
0650 }
0651
0652 ib_uverbs_init_udata_buf_or_null(
0653 &bundle.ucore, buf, u64_to_user_ptr(response),
0654 in_len, out_len);
0655 } else {
0656 buf += sizeof(ex_hdr);
0657
0658 ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
0659 u64_to_user_ptr(ex_hdr.response),
0660 hdr.in_words * 8, hdr.out_words * 8);
0661
0662 ib_uverbs_init_udata_buf_or_null(
0663 &bundle.driver_udata, buf + bundle.ucore.inlen,
0664 u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
0665 ex_hdr.provider_in_words * 8,
0666 ex_hdr.provider_out_words * 8);
0667
0668 }
0669
0670 ret = method_elm->handler(&bundle);
0671 if (bundle.uobject)
0672 uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true,
0673 !ret, &bundle);
0674 out_unlock:
0675 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
0676 return (ret) ? : count;
0677 }
0678
0679 static const struct vm_operations_struct rdma_umap_ops;
0680
0681 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
0682 {
0683 struct ib_uverbs_file *file = filp->private_data;
0684 struct ib_ucontext *ucontext;
0685 int ret = 0;
0686 int srcu_key;
0687
0688 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
0689 ucontext = ib_uverbs_get_ucontext_file(file);
0690 if (IS_ERR(ucontext)) {
0691 ret = PTR_ERR(ucontext);
0692 goto out;
0693 }
0694 vma->vm_ops = &rdma_umap_ops;
0695 ret = ucontext->device->ops.mmap(ucontext, vma);
0696 out:
0697 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
0698 return ret;
0699 }
0700
0701
0702
0703
0704
0705 static void rdma_umap_open(struct vm_area_struct *vma)
0706 {
0707 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
0708 struct rdma_umap_priv *opriv = vma->vm_private_data;
0709 struct rdma_umap_priv *priv;
0710
0711 if (!opriv)
0712 return;
0713
0714
0715 if (!down_read_trylock(&ufile->hw_destroy_rwsem))
0716 goto out_zap;
0717
0718
0719
0720 if (!ufile->ucontext)
0721 goto out_unlock;
0722
0723 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
0724 if (!priv)
0725 goto out_unlock;
0726 rdma_umap_priv_init(priv, vma, opriv->entry);
0727
0728 up_read(&ufile->hw_destroy_rwsem);
0729 return;
0730
0731 out_unlock:
0732 up_read(&ufile->hw_destroy_rwsem);
0733 out_zap:
0734
0735
0736
0737
0738
0739 vma->vm_private_data = NULL;
0740 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
0741 }
0742
0743 static void rdma_umap_close(struct vm_area_struct *vma)
0744 {
0745 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
0746 struct rdma_umap_priv *priv = vma->vm_private_data;
0747
0748 if (!priv)
0749 return;
0750
0751
0752
0753
0754
0755
0756 mutex_lock(&ufile->umap_lock);
0757 if (priv->entry)
0758 rdma_user_mmap_entry_put(priv->entry);
0759
0760 list_del(&priv->list);
0761 mutex_unlock(&ufile->umap_lock);
0762 kfree(priv);
0763 }
0764
0765
0766
0767
0768
0769 static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
0770 {
0771 struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
0772 struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
0773 vm_fault_t ret = 0;
0774
0775 if (!priv)
0776 return VM_FAULT_SIGBUS;
0777
0778
0779 if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
0780 vmf->page = ZERO_PAGE(vmf->address);
0781 get_page(vmf->page);
0782 return 0;
0783 }
0784
0785 mutex_lock(&ufile->umap_lock);
0786 if (!ufile->disassociate_page)
0787 ufile->disassociate_page =
0788 alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
0789
0790 if (ufile->disassociate_page) {
0791
0792
0793
0794
0795 vmf->page = ufile->disassociate_page;
0796 get_page(vmf->page);
0797 } else {
0798 ret = VM_FAULT_SIGBUS;
0799 }
0800 mutex_unlock(&ufile->umap_lock);
0801
0802 return ret;
0803 }
0804
0805 static const struct vm_operations_struct rdma_umap_ops = {
0806 .open = rdma_umap_open,
0807 .close = rdma_umap_close,
0808 .fault = rdma_umap_fault,
0809 };
0810
0811 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
0812 {
0813 struct rdma_umap_priv *priv, *next_priv;
0814
0815 lockdep_assert_held(&ufile->hw_destroy_rwsem);
0816
0817 while (1) {
0818 struct mm_struct *mm = NULL;
0819
0820
0821 mutex_lock(&ufile->umap_lock);
0822 while (!list_empty(&ufile->umaps)) {
0823 int ret;
0824
0825 priv = list_first_entry(&ufile->umaps,
0826 struct rdma_umap_priv, list);
0827 mm = priv->vma->vm_mm;
0828 ret = mmget_not_zero(mm);
0829 if (!ret) {
0830 list_del_init(&priv->list);
0831 if (priv->entry) {
0832 rdma_user_mmap_entry_put(priv->entry);
0833 priv->entry = NULL;
0834 }
0835 mm = NULL;
0836 continue;
0837 }
0838 break;
0839 }
0840 mutex_unlock(&ufile->umap_lock);
0841 if (!mm)
0842 return;
0843
0844
0845
0846
0847
0848
0849
0850 mmap_read_lock(mm);
0851 mutex_lock(&ufile->umap_lock);
0852 list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
0853 list) {
0854 struct vm_area_struct *vma = priv->vma;
0855
0856 if (vma->vm_mm != mm)
0857 continue;
0858 list_del_init(&priv->list);
0859
0860 zap_vma_ptes(vma, vma->vm_start,
0861 vma->vm_end - vma->vm_start);
0862
0863 if (priv->entry) {
0864 rdma_user_mmap_entry_put(priv->entry);
0865 priv->entry = NULL;
0866 }
0867 }
0868 mutex_unlock(&ufile->umap_lock);
0869 mmap_read_unlock(mm);
0870 mmput(mm);
0871 }
0872 }
0873
0874
0875
0876
0877
0878
0879
0880
0881
0882
0883
0884 static int ib_uverbs_open(struct inode *inode, struct file *filp)
0885 {
0886 struct ib_uverbs_device *dev;
0887 struct ib_uverbs_file *file;
0888 struct ib_device *ib_dev;
0889 int ret;
0890 int module_dependent;
0891 int srcu_key;
0892
0893 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
0894 if (!refcount_inc_not_zero(&dev->refcount))
0895 return -ENXIO;
0896
0897 get_device(&dev->dev);
0898 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
0899 mutex_lock(&dev->lists_mutex);
0900 ib_dev = srcu_dereference(dev->ib_dev,
0901 &dev->disassociate_srcu);
0902 if (!ib_dev) {
0903 ret = -EIO;
0904 goto err;
0905 }
0906
0907 if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) {
0908 ret = -EPERM;
0909 goto err;
0910 }
0911
0912
0913
0914
0915 module_dependent = !(ib_dev->ops.disassociate_ucontext);
0916
0917 if (module_dependent) {
0918 if (!try_module_get(ib_dev->ops.owner)) {
0919 ret = -ENODEV;
0920 goto err;
0921 }
0922 }
0923
0924 file = kzalloc(sizeof(*file), GFP_KERNEL);
0925 if (!file) {
0926 ret = -ENOMEM;
0927 if (module_dependent)
0928 goto err_module;
0929
0930 goto err;
0931 }
0932
0933 file->device = dev;
0934 kref_init(&file->ref);
0935 mutex_init(&file->ucontext_lock);
0936
0937 spin_lock_init(&file->uobjects_lock);
0938 INIT_LIST_HEAD(&file->uobjects);
0939 init_rwsem(&file->hw_destroy_rwsem);
0940 mutex_init(&file->umap_lock);
0941 INIT_LIST_HEAD(&file->umaps);
0942
0943 filp->private_data = file;
0944 list_add_tail(&file->list, &dev->uverbs_file_list);
0945 mutex_unlock(&dev->lists_mutex);
0946 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
0947
0948 setup_ufile_idr_uobject(file);
0949
0950 return stream_open(inode, filp);
0951
0952 err_module:
0953 module_put(ib_dev->ops.owner);
0954
0955 err:
0956 mutex_unlock(&dev->lists_mutex);
0957 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
0958 if (refcount_dec_and_test(&dev->refcount))
0959 ib_uverbs_comp_dev(dev);
0960
0961 put_device(&dev->dev);
0962 return ret;
0963 }
0964
0965 static int ib_uverbs_close(struct inode *inode, struct file *filp)
0966 {
0967 struct ib_uverbs_file *file = filp->private_data;
0968
0969 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
0970
0971 mutex_lock(&file->device->lists_mutex);
0972 list_del_init(&file->list);
0973 mutex_unlock(&file->device->lists_mutex);
0974
0975 kref_put(&file->ref, ib_uverbs_release_file);
0976
0977 return 0;
0978 }
0979
0980 static const struct file_operations uverbs_fops = {
0981 .owner = THIS_MODULE,
0982 .write = ib_uverbs_write,
0983 .open = ib_uverbs_open,
0984 .release = ib_uverbs_close,
0985 .llseek = no_llseek,
0986 .unlocked_ioctl = ib_uverbs_ioctl,
0987 .compat_ioctl = compat_ptr_ioctl,
0988 };
0989
0990 static const struct file_operations uverbs_mmap_fops = {
0991 .owner = THIS_MODULE,
0992 .write = ib_uverbs_write,
0993 .mmap = ib_uverbs_mmap,
0994 .open = ib_uverbs_open,
0995 .release = ib_uverbs_close,
0996 .llseek = no_llseek,
0997 .unlocked_ioctl = ib_uverbs_ioctl,
0998 .compat_ioctl = compat_ptr_ioctl,
0999 };
1000
1001 static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
1002 struct ib_client_nl_info *res)
1003 {
1004 struct ib_uverbs_device *uverbs_dev = client_data;
1005 int ret;
1006
1007 if (res->port != -1)
1008 return -EINVAL;
1009
1010 res->abi = ibdev->ops.uverbs_abi_ver;
1011 res->cdev = &uverbs_dev->dev;
1012
1013
1014
1015
1016
1017
1018
1019 if (!ibdev->ops.uverbs_no_driver_id_binding) {
1020 ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID,
1021 ibdev->ops.driver_id);
1022 if (ret)
1023 return ret;
1024 }
1025 return 0;
1026 }
1027
1028 static struct ib_client uverbs_client = {
1029 .name = "uverbs",
1030 .no_kverbs_req = true,
1031 .add = ib_uverbs_add_one,
1032 .remove = ib_uverbs_remove_one,
1033 .get_nl_info = ib_uverbs_get_nl_info,
1034 };
1035 MODULE_ALIAS_RDMA_CLIENT("uverbs");
1036
1037 static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
1038 char *buf)
1039 {
1040 struct ib_uverbs_device *dev =
1041 container_of(device, struct ib_uverbs_device, dev);
1042 int ret = -ENODEV;
1043 int srcu_key;
1044 struct ib_device *ib_dev;
1045
1046 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1047 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1048 if (ib_dev)
1049 ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev));
1050 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1051
1052 return ret;
1053 }
1054 static DEVICE_ATTR_RO(ibdev);
1055
1056 static ssize_t abi_version_show(struct device *device,
1057 struct device_attribute *attr, char *buf)
1058 {
1059 struct ib_uverbs_device *dev =
1060 container_of(device, struct ib_uverbs_device, dev);
1061 int ret = -ENODEV;
1062 int srcu_key;
1063 struct ib_device *ib_dev;
1064
1065 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1066 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1067 if (ib_dev)
1068 ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
1069 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1070
1071 return ret;
1072 }
1073 static DEVICE_ATTR_RO(abi_version);
1074
1075 static struct attribute *ib_dev_attrs[] = {
1076 &dev_attr_abi_version.attr,
1077 &dev_attr_ibdev.attr,
1078 NULL,
1079 };
1080
1081 static const struct attribute_group dev_attr_group = {
1082 .attrs = ib_dev_attrs,
1083 };
1084
1085 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1086 __stringify(IB_USER_VERBS_ABI_VERSION));
1087
1088 static int ib_uverbs_create_uapi(struct ib_device *device,
1089 struct ib_uverbs_device *uverbs_dev)
1090 {
1091 struct uverbs_api *uapi;
1092
1093 uapi = uverbs_alloc_api(device);
1094 if (IS_ERR(uapi))
1095 return PTR_ERR(uapi);
1096
1097 uverbs_dev->uapi = uapi;
1098 return 0;
1099 }
1100
1101 static int ib_uverbs_add_one(struct ib_device *device)
1102 {
1103 int devnum;
1104 dev_t base;
1105 struct ib_uverbs_device *uverbs_dev;
1106 int ret;
1107
1108 if (!device->ops.alloc_ucontext)
1109 return -EOPNOTSUPP;
1110
1111 uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
1112 if (!uverbs_dev)
1113 return -ENOMEM;
1114
1115 ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
1116 if (ret) {
1117 kfree(uverbs_dev);
1118 return -ENOMEM;
1119 }
1120
1121 device_initialize(&uverbs_dev->dev);
1122 uverbs_dev->dev.class = uverbs_class;
1123 uverbs_dev->dev.parent = device->dev.parent;
1124 uverbs_dev->dev.release = ib_uverbs_release_dev;
1125 uverbs_dev->groups[0] = &dev_attr_group;
1126 uverbs_dev->dev.groups = uverbs_dev->groups;
1127 refcount_set(&uverbs_dev->refcount, 1);
1128 init_completion(&uverbs_dev->comp);
1129 uverbs_dev->xrcd_tree = RB_ROOT;
1130 mutex_init(&uverbs_dev->xrcd_tree_mutex);
1131 mutex_init(&uverbs_dev->lists_mutex);
1132 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
1133 rcu_assign_pointer(uverbs_dev->ib_dev, device);
1134 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1135
1136 devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
1137 GFP_KERNEL);
1138 if (devnum < 0) {
1139 ret = -ENOMEM;
1140 goto err;
1141 }
1142 uverbs_dev->devnum = devnum;
1143 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
1144 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
1145 else
1146 base = IB_UVERBS_BASE_DEV + devnum;
1147
1148 ret = ib_uverbs_create_uapi(device, uverbs_dev);
1149 if (ret)
1150 goto err_uapi;
1151
1152 uverbs_dev->dev.devt = base;
1153 dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
1154
1155 cdev_init(&uverbs_dev->cdev,
1156 device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops);
1157 uverbs_dev->cdev.owner = THIS_MODULE;
1158
1159 ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
1160 if (ret)
1161 goto err_uapi;
1162
1163 ib_set_client_data(device, &uverbs_client, uverbs_dev);
1164 return 0;
1165
1166 err_uapi:
1167 ida_free(&uverbs_ida, devnum);
1168 err:
1169 if (refcount_dec_and_test(&uverbs_dev->refcount))
1170 ib_uverbs_comp_dev(uverbs_dev);
1171 wait_for_completion(&uverbs_dev->comp);
1172 put_device(&uverbs_dev->dev);
1173 return ret;
1174 }
1175
1176 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1177 struct ib_device *ib_dev)
1178 {
1179 struct ib_uverbs_file *file;
1180
1181
1182 uverbs_disassociate_api_pre(uverbs_dev);
1183
1184 mutex_lock(&uverbs_dev->lists_mutex);
1185 while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1186 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1187 struct ib_uverbs_file, list);
1188 list_del_init(&file->list);
1189 kref_get(&file->ref);
1190
1191
1192
1193
1194
1195
1196 mutex_unlock(&uverbs_dev->lists_mutex);
1197
1198 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
1199 kref_put(&file->ref, ib_uverbs_release_file);
1200
1201 mutex_lock(&uverbs_dev->lists_mutex);
1202 }
1203 mutex_unlock(&uverbs_dev->lists_mutex);
1204
1205 uverbs_disassociate_api(uverbs_dev->uapi);
1206 }
1207
1208 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1209 {
1210 struct ib_uverbs_device *uverbs_dev = client_data;
1211 int wait_clients = 1;
1212
1213 cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
1214 ida_free(&uverbs_ida, uverbs_dev->devnum);
1215
1216 if (device->ops.disassociate_ucontext) {
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228 ib_uverbs_free_hw_resources(uverbs_dev, device);
1229 wait_clients = 0;
1230 }
1231
1232 if (refcount_dec_and_test(&uverbs_dev->refcount))
1233 ib_uverbs_comp_dev(uverbs_dev);
1234 if (wait_clients)
1235 wait_for_completion(&uverbs_dev->comp);
1236
1237 put_device(&uverbs_dev->dev);
1238 }
1239
1240 static char *uverbs_devnode(struct device *dev, umode_t *mode)
1241 {
1242 if (mode)
1243 *mode = 0666;
1244 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1245 }
1246
1247 static int __init ib_uverbs_init(void)
1248 {
1249 int ret;
1250
1251 ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
1252 IB_UVERBS_NUM_FIXED_MINOR,
1253 "infiniband_verbs");
1254 if (ret) {
1255 pr_err("user_verbs: couldn't register device number\n");
1256 goto out;
1257 }
1258
1259 ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
1260 IB_UVERBS_NUM_DYNAMIC_MINOR,
1261 "infiniband_verbs");
1262 if (ret) {
1263 pr_err("couldn't register dynamic device number\n");
1264 goto out_alloc;
1265 }
1266
1267 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1268 if (IS_ERR(uverbs_class)) {
1269 ret = PTR_ERR(uverbs_class);
1270 pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1271 goto out_chrdev;
1272 }
1273
1274 uverbs_class->devnode = uverbs_devnode;
1275
1276 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1277 if (ret) {
1278 pr_err("user_verbs: couldn't create abi_version attribute\n");
1279 goto out_class;
1280 }
1281
1282 ret = ib_register_client(&uverbs_client);
1283 if (ret) {
1284 pr_err("user_verbs: couldn't register client\n");
1285 goto out_class;
1286 }
1287
1288 return 0;
1289
1290 out_class:
1291 class_destroy(uverbs_class);
1292
1293 out_chrdev:
1294 unregister_chrdev_region(dynamic_uverbs_dev,
1295 IB_UVERBS_NUM_DYNAMIC_MINOR);
1296
1297 out_alloc:
1298 unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1299 IB_UVERBS_NUM_FIXED_MINOR);
1300
1301 out:
1302 return ret;
1303 }
1304
1305 static void __exit ib_uverbs_cleanup(void)
1306 {
1307 ib_unregister_client(&uverbs_client);
1308 class_destroy(uverbs_class);
1309 unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1310 IB_UVERBS_NUM_FIXED_MINOR);
1311 unregister_chrdev_region(dynamic_uverbs_dev,
1312 IB_UVERBS_NUM_DYNAMIC_MINOR);
1313 mmu_notifier_synchronize();
1314 }
1315
1316 module_init(ib_uverbs_init);
1317 module_exit(ib_uverbs_cleanup);