Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
0003  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
0004  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
0005  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
0006  * Copyright (c) 2005 PathScale, Inc. All rights reserved.
0007  *
0008  * This software is available to you under a choice of one of two
0009  * licenses.  You may choose to be licensed under the terms of the GNU
0010  * General Public License (GPL) Version 2, available from the file
0011  * COPYING in the main directory of this source tree, or the
0012  * OpenIB.org BSD license below:
0013  *
0014  *     Redistribution and use in source and binary forms, with or
0015  *     without modification, are permitted provided that the following
0016  *     conditions are met:
0017  *
0018  *      - Redistributions of source code must retain the above
0019  *        copyright notice, this list of conditions and the following
0020  *        disclaimer.
0021  *
0022  *      - Redistributions in binary form must reproduce the above
0023  *        copyright notice, this list of conditions and the following
0024  *        disclaimer in the documentation and/or other materials
0025  *        provided with the distribution.
0026  *
0027  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0028  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0029  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0030  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0031  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0032  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0033  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0034  * SOFTWARE.
0035  */
0036 
0037 #include <linux/module.h>
0038 #include <linux/init.h>
0039 #include <linux/device.h>
0040 #include <linux/err.h>
0041 #include <linux/fs.h>
0042 #include <linux/poll.h>
0043 #include <linux/sched.h>
0044 #include <linux/file.h>
0045 #include <linux/cdev.h>
0046 #include <linux/anon_inodes.h>
0047 #include <linux/slab.h>
0048 #include <linux/sched/mm.h>
0049 
0050 #include <linux/uaccess.h>
0051 
0052 #include <rdma/ib.h>
0053 #include <rdma/uverbs_std_types.h>
0054 #include <rdma/rdma_netlink.h>
0055 
0056 #include "uverbs.h"
0057 #include "core_priv.h"
0058 #include "rdma_core.h"
0059 
0060 MODULE_AUTHOR("Roland Dreier");
0061 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
0062 MODULE_LICENSE("Dual BSD/GPL");
0063 
0064 enum {
0065     IB_UVERBS_MAJOR       = 231,
0066     IB_UVERBS_BASE_MINOR  = 192,
0067     IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
0068     IB_UVERBS_NUM_FIXED_MINOR = 32,
0069     IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
0070 };
0071 
0072 #define IB_UVERBS_BASE_DEV  MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
0073 
0074 static dev_t dynamic_uverbs_dev;
0075 static struct class *uverbs_class;
0076 
0077 static DEFINE_IDA(uverbs_ida);
0078 static int ib_uverbs_add_one(struct ib_device *device);
0079 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
0080 
0081 /*
0082  * Must be called with the ufile->device->disassociate_srcu held, and the lock
0083  * must be held until use of the ucontext is finished.
0084  */
0085 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
0086 {
0087     /*
0088      * We do not hold the hw_destroy_rwsem lock for this flow, instead
0089      * srcu is used. It does not matter if someone races this with
0090      * get_context, we get NULL or valid ucontext.
0091      */
0092     struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
0093 
0094     if (!srcu_dereference(ufile->device->ib_dev,
0095                   &ufile->device->disassociate_srcu))
0096         return ERR_PTR(-EIO);
0097 
0098     if (!ucontext)
0099         return ERR_PTR(-EINVAL);
0100 
0101     return ucontext;
0102 }
0103 EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
0104 
0105 int uverbs_dealloc_mw(struct ib_mw *mw)
0106 {
0107     struct ib_pd *pd = mw->pd;
0108     int ret;
0109 
0110     ret = mw->device->ops.dealloc_mw(mw);
0111     if (ret)
0112         return ret;
0113 
0114     atomic_dec(&pd->usecnt);
0115     kfree(mw);
0116     return ret;
0117 }
0118 
0119 static void ib_uverbs_release_dev(struct device *device)
0120 {
0121     struct ib_uverbs_device *dev =
0122             container_of(device, struct ib_uverbs_device, dev);
0123 
0124     uverbs_destroy_api(dev->uapi);
0125     cleanup_srcu_struct(&dev->disassociate_srcu);
0126     mutex_destroy(&dev->lists_mutex);
0127     mutex_destroy(&dev->xrcd_tree_mutex);
0128     kfree(dev);
0129 }
0130 
0131 void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
0132                struct ib_ucq_object *uobj)
0133 {
0134     struct ib_uverbs_event *evt, *tmp;
0135 
0136     if (ev_file) {
0137         spin_lock_irq(&ev_file->ev_queue.lock);
0138         list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
0139             list_del(&evt->list);
0140             kfree(evt);
0141         }
0142         spin_unlock_irq(&ev_file->ev_queue.lock);
0143 
0144         uverbs_uobject_put(&ev_file->uobj);
0145     }
0146 
0147     ib_uverbs_release_uevent(&uobj->uevent);
0148 }
0149 
0150 void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
0151 {
0152     struct ib_uverbs_async_event_file *async_file = uobj->event_file;
0153     struct ib_uverbs_event *evt, *tmp;
0154 
0155     if (!async_file)
0156         return;
0157 
0158     spin_lock_irq(&async_file->ev_queue.lock);
0159     list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
0160         list_del(&evt->list);
0161         kfree(evt);
0162     }
0163     spin_unlock_irq(&async_file->ev_queue.lock);
0164     uverbs_uobject_put(&async_file->uobj);
0165 }
0166 
0167 void ib_uverbs_detach_umcast(struct ib_qp *qp,
0168                  struct ib_uqp_object *uobj)
0169 {
0170     struct ib_uverbs_mcast_entry *mcast, *tmp;
0171 
0172     list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
0173         ib_detach_mcast(qp, &mcast->gid, mcast->lid);
0174         list_del(&mcast->list);
0175         kfree(mcast);
0176     }
0177 }
0178 
0179 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
0180 {
0181     complete(&dev->comp);
0182 }
0183 
0184 void ib_uverbs_release_file(struct kref *ref)
0185 {
0186     struct ib_uverbs_file *file =
0187         container_of(ref, struct ib_uverbs_file, ref);
0188     struct ib_device *ib_dev;
0189     int srcu_key;
0190 
0191     release_ufile_idr_uobject(file);
0192 
0193     srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
0194     ib_dev = srcu_dereference(file->device->ib_dev,
0195                   &file->device->disassociate_srcu);
0196     if (ib_dev && !ib_dev->ops.disassociate_ucontext)
0197         module_put(ib_dev->ops.owner);
0198     srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
0199 
0200     if (refcount_dec_and_test(&file->device->refcount))
0201         ib_uverbs_comp_dev(file->device);
0202 
0203     if (file->default_async_file)
0204         uverbs_uobject_put(&file->default_async_file->uobj);
0205     put_device(&file->device->dev);
0206 
0207     if (file->disassociate_page)
0208         __free_pages(file->disassociate_page, 0);
0209     mutex_destroy(&file->umap_lock);
0210     mutex_destroy(&file->ucontext_lock);
0211     kfree(file);
0212 }
0213 
0214 static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
0215                     struct file *filp, char __user *buf,
0216                     size_t count, loff_t *pos,
0217                     size_t eventsz)
0218 {
0219     struct ib_uverbs_event *event;
0220     int ret = 0;
0221 
0222     spin_lock_irq(&ev_queue->lock);
0223 
0224     while (list_empty(&ev_queue->event_list)) {
0225         spin_unlock_irq(&ev_queue->lock);
0226 
0227         if (filp->f_flags & O_NONBLOCK)
0228             return -EAGAIN;
0229 
0230         if (wait_event_interruptible(ev_queue->poll_wait,
0231                          (!list_empty(&ev_queue->event_list) ||
0232                           ev_queue->is_closed)))
0233             return -ERESTARTSYS;
0234 
0235         spin_lock_irq(&ev_queue->lock);
0236 
0237         /* If device was disassociated and no event exists set an error */
0238         if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
0239             spin_unlock_irq(&ev_queue->lock);
0240             return -EIO;
0241         }
0242     }
0243 
0244     event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
0245 
0246     if (eventsz > count) {
0247         ret   = -EINVAL;
0248         event = NULL;
0249     } else {
0250         list_del(ev_queue->event_list.next);
0251         if (event->counter) {
0252             ++(*event->counter);
0253             list_del(&event->obj_list);
0254         }
0255     }
0256 
0257     spin_unlock_irq(&ev_queue->lock);
0258 
0259     if (event) {
0260         if (copy_to_user(buf, event, eventsz))
0261             ret = -EFAULT;
0262         else
0263             ret = eventsz;
0264     }
0265 
0266     kfree(event);
0267 
0268     return ret;
0269 }
0270 
0271 static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
0272                       size_t count, loff_t *pos)
0273 {
0274     struct ib_uverbs_async_event_file *file = filp->private_data;
0275 
0276     return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
0277                     sizeof(struct ib_uverbs_async_event_desc));
0278 }
0279 
0280 static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
0281                      size_t count, loff_t *pos)
0282 {
0283     struct ib_uverbs_completion_event_file *comp_ev_file =
0284         filp->private_data;
0285 
0286     return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
0287                     pos,
0288                     sizeof(struct ib_uverbs_comp_event_desc));
0289 }
0290 
0291 static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
0292                      struct file *filp,
0293                      struct poll_table_struct *wait)
0294 {
0295     __poll_t pollflags = 0;
0296 
0297     poll_wait(filp, &ev_queue->poll_wait, wait);
0298 
0299     spin_lock_irq(&ev_queue->lock);
0300     if (!list_empty(&ev_queue->event_list))
0301         pollflags = EPOLLIN | EPOLLRDNORM;
0302     else if (ev_queue->is_closed)
0303         pollflags = EPOLLERR;
0304     spin_unlock_irq(&ev_queue->lock);
0305 
0306     return pollflags;
0307 }
0308 
0309 static __poll_t ib_uverbs_async_event_poll(struct file *filp,
0310                            struct poll_table_struct *wait)
0311 {
0312     struct ib_uverbs_async_event_file *file = filp->private_data;
0313 
0314     return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
0315 }
0316 
0317 static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
0318                           struct poll_table_struct *wait)
0319 {
0320     struct ib_uverbs_completion_event_file *comp_ev_file =
0321         filp->private_data;
0322 
0323     return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
0324 }
0325 
0326 static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
0327 {
0328     struct ib_uverbs_async_event_file *file = filp->private_data;
0329 
0330     return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
0331 }
0332 
0333 static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
0334 {
0335     struct ib_uverbs_completion_event_file *comp_ev_file =
0336         filp->private_data;
0337 
0338     return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
0339 }
0340 
0341 const struct file_operations uverbs_event_fops = {
0342     .owner   = THIS_MODULE,
0343     .read    = ib_uverbs_comp_event_read,
0344     .poll    = ib_uverbs_comp_event_poll,
0345     .release = uverbs_uobject_fd_release,
0346     .fasync  = ib_uverbs_comp_event_fasync,
0347     .llseek  = no_llseek,
0348 };
0349 
0350 const struct file_operations uverbs_async_event_fops = {
0351     .owner   = THIS_MODULE,
0352     .read    = ib_uverbs_async_event_read,
0353     .poll    = ib_uverbs_async_event_poll,
0354     .release = uverbs_async_event_release,
0355     .fasync  = ib_uverbs_async_event_fasync,
0356     .llseek  = no_llseek,
0357 };
0358 
0359 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
0360 {
0361     struct ib_uverbs_event_queue   *ev_queue = cq_context;
0362     struct ib_ucq_object           *uobj;
0363     struct ib_uverbs_event         *entry;
0364     unsigned long           flags;
0365 
0366     if (!ev_queue)
0367         return;
0368 
0369     spin_lock_irqsave(&ev_queue->lock, flags);
0370     if (ev_queue->is_closed) {
0371         spin_unlock_irqrestore(&ev_queue->lock, flags);
0372         return;
0373     }
0374 
0375     entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
0376     if (!entry) {
0377         spin_unlock_irqrestore(&ev_queue->lock, flags);
0378         return;
0379     }
0380 
0381     uobj = cq->uobject;
0382 
0383     entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle;
0384     entry->counter         = &uobj->comp_events_reported;
0385 
0386     list_add_tail(&entry->list, &ev_queue->event_list);
0387     list_add_tail(&entry->obj_list, &uobj->comp_list);
0388     spin_unlock_irqrestore(&ev_queue->lock, flags);
0389 
0390     wake_up_interruptible(&ev_queue->poll_wait);
0391     kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
0392 }
0393 
0394 void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
0395                  __u64 element, __u64 event,
0396                  struct list_head *obj_list, u32 *counter)
0397 {
0398     struct ib_uverbs_event *entry;
0399     unsigned long flags;
0400 
0401     if (!async_file)
0402         return;
0403 
0404     spin_lock_irqsave(&async_file->ev_queue.lock, flags);
0405     if (async_file->ev_queue.is_closed) {
0406         spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
0407         return;
0408     }
0409 
0410     entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
0411     if (!entry) {
0412         spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
0413         return;
0414     }
0415 
0416     entry->desc.async.element = element;
0417     entry->desc.async.event_type = event;
0418     entry->desc.async.reserved = 0;
0419     entry->counter = counter;
0420 
0421     list_add_tail(&entry->list, &async_file->ev_queue.event_list);
0422     if (obj_list)
0423         list_add_tail(&entry->obj_list, obj_list);
0424     spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
0425 
0426     wake_up_interruptible(&async_file->ev_queue.poll_wait);
0427     kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN);
0428 }
0429 
0430 static void uverbs_uobj_event(struct ib_uevent_object *eobj,
0431                   struct ib_event *event)
0432 {
0433     ib_uverbs_async_handler(eobj->event_file,
0434                 eobj->uobject.user_handle, event->event,
0435                 &eobj->event_list, &eobj->events_reported);
0436 }
0437 
0438 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
0439 {
0440     uverbs_uobj_event(&event->element.cq->uobject->uevent, event);
0441 }
0442 
0443 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
0444 {
0445     /* for XRC target qp's, check that qp is live */
0446     if (!event->element.qp->uobject)
0447         return;
0448 
0449     uverbs_uobj_event(&event->element.qp->uobject->uevent, event);
0450 }
0451 
0452 void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
0453 {
0454     uverbs_uobj_event(&event->element.wq->uobject->uevent, event);
0455 }
0456 
0457 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
0458 {
0459     uverbs_uobj_event(&event->element.srq->uobject->uevent, event);
0460 }
0461 
0462 static void ib_uverbs_event_handler(struct ib_event_handler *handler,
0463                     struct ib_event *event)
0464 {
0465     ib_uverbs_async_handler(
0466         container_of(handler, struct ib_uverbs_async_event_file,
0467                  event_handler),
0468         event->element.port_num, event->event, NULL, NULL);
0469 }
0470 
0471 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
0472 {
0473     spin_lock_init(&ev_queue->lock);
0474     INIT_LIST_HEAD(&ev_queue->event_list);
0475     init_waitqueue_head(&ev_queue->poll_wait);
0476     ev_queue->is_closed   = 0;
0477     ev_queue->async_queue = NULL;
0478 }
0479 
0480 void ib_uverbs_init_async_event_file(
0481     struct ib_uverbs_async_event_file *async_file)
0482 {
0483     struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile;
0484     struct ib_device *ib_dev = async_file->uobj.context->device;
0485 
0486     ib_uverbs_init_event_queue(&async_file->ev_queue);
0487 
0488     /* The first async_event_file becomes the default one for the file. */
0489     mutex_lock(&uverbs_file->ucontext_lock);
0490     if (!uverbs_file->default_async_file) {
0491         /* Pairs with the put in ib_uverbs_release_file */
0492         uverbs_uobject_get(&async_file->uobj);
0493         smp_store_release(&uverbs_file->default_async_file, async_file);
0494     }
0495     mutex_unlock(&uverbs_file->ucontext_lock);
0496 
0497     INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev,
0498                   ib_uverbs_event_handler);
0499     ib_register_event_handler(&async_file->event_handler);
0500 }
0501 
0502 static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
0503               struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
0504               const struct uverbs_api_write_method *method_elm)
0505 {
0506     if (method_elm->is_ex) {
0507         count -= sizeof(*hdr) + sizeof(*ex_hdr);
0508 
0509         if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
0510             return -EINVAL;
0511 
0512         if (hdr->in_words * 8 < method_elm->req_size)
0513             return -ENOSPC;
0514 
0515         if (ex_hdr->cmd_hdr_reserved)
0516             return -EINVAL;
0517 
0518         if (ex_hdr->response) {
0519             if (!hdr->out_words && !ex_hdr->provider_out_words)
0520                 return -EINVAL;
0521 
0522             if (hdr->out_words * 8 < method_elm->resp_size)
0523                 return -ENOSPC;
0524 
0525             if (!access_ok(u64_to_user_ptr(ex_hdr->response),
0526                        (hdr->out_words + ex_hdr->provider_out_words) * 8))
0527                 return -EFAULT;
0528         } else {
0529             if (hdr->out_words || ex_hdr->provider_out_words)
0530                 return -EINVAL;
0531         }
0532 
0533         return 0;
0534     }
0535 
0536     /* not extended command */
0537     if (hdr->in_words * 4 != count)
0538         return -EINVAL;
0539 
0540     if (count < method_elm->req_size + sizeof(hdr)) {
0541         /*
0542          * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
0543          * with a 16 byte write instead of 24. Old kernels didn't
0544          * check the size so they allowed this. Now that the size is
0545          * checked provide a compatibility work around to not break
0546          * those userspaces.
0547          */
0548         if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
0549             count == 16) {
0550             hdr->in_words = 6;
0551             return 0;
0552         }
0553         return -ENOSPC;
0554     }
0555     if (hdr->out_words * 4 < method_elm->resp_size)
0556         return -ENOSPC;
0557 
0558     return 0;
0559 }
0560 
0561 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
0562                  size_t count, loff_t *pos)
0563 {
0564     struct ib_uverbs_file *file = filp->private_data;
0565     const struct uverbs_api_write_method *method_elm;
0566     struct uverbs_api *uapi = file->device->uapi;
0567     struct ib_uverbs_ex_cmd_hdr ex_hdr;
0568     struct ib_uverbs_cmd_hdr hdr;
0569     struct uverbs_attr_bundle bundle;
0570     int srcu_key;
0571     ssize_t ret;
0572 
0573     if (!ib_safe_file_access(filp)) {
0574         pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
0575                 task_tgid_vnr(current), current->comm);
0576         return -EACCES;
0577     }
0578 
0579     if (count < sizeof(hdr))
0580         return -EINVAL;
0581 
0582     if (copy_from_user(&hdr, buf, sizeof(hdr)))
0583         return -EFAULT;
0584 
0585     method_elm = uapi_get_method(uapi, hdr.command);
0586     if (IS_ERR(method_elm))
0587         return PTR_ERR(method_elm);
0588 
0589     if (method_elm->is_ex) {
0590         if (count < (sizeof(hdr) + sizeof(ex_hdr)))
0591             return -EINVAL;
0592         if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
0593             return -EFAULT;
0594     }
0595 
0596     ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
0597     if (ret)
0598         return ret;
0599 
0600     srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
0601 
0602     buf += sizeof(hdr);
0603 
0604     memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
0605     bundle.ufile = file;
0606     bundle.context = NULL; /* only valid if bundle has uobject */
0607     bundle.uobject = NULL;
0608     if (!method_elm->is_ex) {
0609         size_t in_len = hdr.in_words * 4 - sizeof(hdr);
0610         size_t out_len = hdr.out_words * 4;
0611         u64 response = 0;
0612 
0613         if (method_elm->has_udata) {
0614             bundle.driver_udata.inlen =
0615                 in_len - method_elm->req_size;
0616             in_len = method_elm->req_size;
0617             if (bundle.driver_udata.inlen)
0618                 bundle.driver_udata.inbuf = buf + in_len;
0619             else
0620                 bundle.driver_udata.inbuf = NULL;
0621         } else {
0622             memset(&bundle.driver_udata, 0,
0623                    sizeof(bundle.driver_udata));
0624         }
0625 
0626         if (method_elm->has_resp) {
0627             /*
0628              * The macros check that if has_resp is set
0629              * then the command request structure starts
0630              * with a '__aligned u64 response' member.
0631              */
0632             ret = get_user(response, (const u64 __user *)buf);
0633             if (ret)
0634                 goto out_unlock;
0635 
0636             if (method_elm->has_udata) {
0637                 bundle.driver_udata.outlen =
0638                     out_len - method_elm->resp_size;
0639                 out_len = method_elm->resp_size;
0640                 if (bundle.driver_udata.outlen)
0641                     bundle.driver_udata.outbuf =
0642                         u64_to_user_ptr(response +
0643                                 out_len);
0644                 else
0645                     bundle.driver_udata.outbuf = NULL;
0646             }
0647         } else {
0648             bundle.driver_udata.outlen = 0;
0649             bundle.driver_udata.outbuf = NULL;
0650         }
0651 
0652         ib_uverbs_init_udata_buf_or_null(
0653             &bundle.ucore, buf, u64_to_user_ptr(response),
0654             in_len, out_len);
0655     } else {
0656         buf += sizeof(ex_hdr);
0657 
0658         ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
0659                     u64_to_user_ptr(ex_hdr.response),
0660                     hdr.in_words * 8, hdr.out_words * 8);
0661 
0662         ib_uverbs_init_udata_buf_or_null(
0663             &bundle.driver_udata, buf + bundle.ucore.inlen,
0664             u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
0665             ex_hdr.provider_in_words * 8,
0666             ex_hdr.provider_out_words * 8);
0667 
0668     }
0669 
0670     ret = method_elm->handler(&bundle);
0671     if (bundle.uobject)
0672         uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true,
0673                        !ret, &bundle);
0674 out_unlock:
0675     srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
0676     return (ret) ? : count;
0677 }
0678 
0679 static const struct vm_operations_struct rdma_umap_ops;
0680 
0681 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
0682 {
0683     struct ib_uverbs_file *file = filp->private_data;
0684     struct ib_ucontext *ucontext;
0685     int ret = 0;
0686     int srcu_key;
0687 
0688     srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
0689     ucontext = ib_uverbs_get_ucontext_file(file);
0690     if (IS_ERR(ucontext)) {
0691         ret = PTR_ERR(ucontext);
0692         goto out;
0693     }
0694     vma->vm_ops = &rdma_umap_ops;
0695     ret = ucontext->device->ops.mmap(ucontext, vma);
0696 out:
0697     srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
0698     return ret;
0699 }
0700 
0701 /*
0702  * The VMA has been dup'd, initialize the vm_private_data with a new tracking
0703  * struct
0704  */
0705 static void rdma_umap_open(struct vm_area_struct *vma)
0706 {
0707     struct ib_uverbs_file *ufile = vma->vm_file->private_data;
0708     struct rdma_umap_priv *opriv = vma->vm_private_data;
0709     struct rdma_umap_priv *priv;
0710 
0711     if (!opriv)
0712         return;
0713 
0714     /* We are racing with disassociation */
0715     if (!down_read_trylock(&ufile->hw_destroy_rwsem))
0716         goto out_zap;
0717     /*
0718      * Disassociation already completed, the VMA should already be zapped.
0719      */
0720     if (!ufile->ucontext)
0721         goto out_unlock;
0722 
0723     priv = kzalloc(sizeof(*priv), GFP_KERNEL);
0724     if (!priv)
0725         goto out_unlock;
0726     rdma_umap_priv_init(priv, vma, opriv->entry);
0727 
0728     up_read(&ufile->hw_destroy_rwsem);
0729     return;
0730 
0731 out_unlock:
0732     up_read(&ufile->hw_destroy_rwsem);
0733 out_zap:
0734     /*
0735      * We can't allow the VMA to be created with the actual IO pages, that
0736      * would break our API contract, and it can't be stopped at this
0737      * point, so zap it.
0738      */
0739     vma->vm_private_data = NULL;
0740     zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
0741 }
0742 
0743 static void rdma_umap_close(struct vm_area_struct *vma)
0744 {
0745     struct ib_uverbs_file *ufile = vma->vm_file->private_data;
0746     struct rdma_umap_priv *priv = vma->vm_private_data;
0747 
0748     if (!priv)
0749         return;
0750 
0751     /*
0752      * The vma holds a reference on the struct file that created it, which
0753      * in turn means that the ib_uverbs_file is guaranteed to exist at
0754      * this point.
0755      */
0756     mutex_lock(&ufile->umap_lock);
0757     if (priv->entry)
0758         rdma_user_mmap_entry_put(priv->entry);
0759 
0760     list_del(&priv->list);
0761     mutex_unlock(&ufile->umap_lock);
0762     kfree(priv);
0763 }
0764 
0765 /*
0766  * Once the zap_vma_ptes has been called touches to the VMA will come here and
0767  * we return a dummy writable zero page for all the pfns.
0768  */
0769 static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
0770 {
0771     struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
0772     struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
0773     vm_fault_t ret = 0;
0774 
0775     if (!priv)
0776         return VM_FAULT_SIGBUS;
0777 
0778     /* Read only pages can just use the system zero page. */
0779     if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
0780         vmf->page = ZERO_PAGE(vmf->address);
0781         get_page(vmf->page);
0782         return 0;
0783     }
0784 
0785     mutex_lock(&ufile->umap_lock);
0786     if (!ufile->disassociate_page)
0787         ufile->disassociate_page =
0788             alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
0789 
0790     if (ufile->disassociate_page) {
0791         /*
0792          * This VMA is forced to always be shared so this doesn't have
0793          * to worry about COW.
0794          */
0795         vmf->page = ufile->disassociate_page;
0796         get_page(vmf->page);
0797     } else {
0798         ret = VM_FAULT_SIGBUS;
0799     }
0800     mutex_unlock(&ufile->umap_lock);
0801 
0802     return ret;
0803 }
0804 
0805 static const struct vm_operations_struct rdma_umap_ops = {
0806     .open = rdma_umap_open,
0807     .close = rdma_umap_close,
0808     .fault = rdma_umap_fault,
0809 };
0810 
0811 void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
0812 {
0813     struct rdma_umap_priv *priv, *next_priv;
0814 
0815     lockdep_assert_held(&ufile->hw_destroy_rwsem);
0816 
0817     while (1) {
0818         struct mm_struct *mm = NULL;
0819 
0820         /* Get an arbitrary mm pointer that hasn't been cleaned yet */
0821         mutex_lock(&ufile->umap_lock);
0822         while (!list_empty(&ufile->umaps)) {
0823             int ret;
0824 
0825             priv = list_first_entry(&ufile->umaps,
0826                         struct rdma_umap_priv, list);
0827             mm = priv->vma->vm_mm;
0828             ret = mmget_not_zero(mm);
0829             if (!ret) {
0830                 list_del_init(&priv->list);
0831                 if (priv->entry) {
0832                     rdma_user_mmap_entry_put(priv->entry);
0833                     priv->entry = NULL;
0834                 }
0835                 mm = NULL;
0836                 continue;
0837             }
0838             break;
0839         }
0840         mutex_unlock(&ufile->umap_lock);
0841         if (!mm)
0842             return;
0843 
0844         /*
0845          * The umap_lock is nested under mmap_lock since it used within
0846          * the vma_ops callbacks, so we have to clean the list one mm
0847          * at a time to get the lock ordering right. Typically there
0848          * will only be one mm, so no big deal.
0849          */
0850         mmap_read_lock(mm);
0851         mutex_lock(&ufile->umap_lock);
0852         list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
0853                       list) {
0854             struct vm_area_struct *vma = priv->vma;
0855 
0856             if (vma->vm_mm != mm)
0857                 continue;
0858             list_del_init(&priv->list);
0859 
0860             zap_vma_ptes(vma, vma->vm_start,
0861                      vma->vm_end - vma->vm_start);
0862 
0863             if (priv->entry) {
0864                 rdma_user_mmap_entry_put(priv->entry);
0865                 priv->entry = NULL;
0866             }
0867         }
0868         mutex_unlock(&ufile->umap_lock);
0869         mmap_read_unlock(mm);
0870         mmput(mm);
0871     }
0872 }
0873 
0874 /*
0875  * ib_uverbs_open() does not need the BKL:
0876  *
0877  *  - the ib_uverbs_device structures are properly reference counted and
0878  *    everything else is purely local to the file being created, so
0879  *    races against other open calls are not a problem;
0880  *  - there is no ioctl method to race against;
0881  *  - the open method will either immediately run -ENXIO, or all
0882  *    required initialization will be done.
0883  */
0884 static int ib_uverbs_open(struct inode *inode, struct file *filp)
0885 {
0886     struct ib_uverbs_device *dev;
0887     struct ib_uverbs_file *file;
0888     struct ib_device *ib_dev;
0889     int ret;
0890     int module_dependent;
0891     int srcu_key;
0892 
0893     dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
0894     if (!refcount_inc_not_zero(&dev->refcount))
0895         return -ENXIO;
0896 
0897     get_device(&dev->dev);
0898     srcu_key = srcu_read_lock(&dev->disassociate_srcu);
0899     mutex_lock(&dev->lists_mutex);
0900     ib_dev = srcu_dereference(dev->ib_dev,
0901                   &dev->disassociate_srcu);
0902     if (!ib_dev) {
0903         ret = -EIO;
0904         goto err;
0905     }
0906 
0907     if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) {
0908         ret = -EPERM;
0909         goto err;
0910     }
0911 
0912     /* In case IB device supports disassociate ucontext, there is no hard
0913      * dependency between uverbs device and its low level device.
0914      */
0915     module_dependent = !(ib_dev->ops.disassociate_ucontext);
0916 
0917     if (module_dependent) {
0918         if (!try_module_get(ib_dev->ops.owner)) {
0919             ret = -ENODEV;
0920             goto err;
0921         }
0922     }
0923 
0924     file = kzalloc(sizeof(*file), GFP_KERNEL);
0925     if (!file) {
0926         ret = -ENOMEM;
0927         if (module_dependent)
0928             goto err_module;
0929 
0930         goto err;
0931     }
0932 
0933     file->device     = dev;
0934     kref_init(&file->ref);
0935     mutex_init(&file->ucontext_lock);
0936 
0937     spin_lock_init(&file->uobjects_lock);
0938     INIT_LIST_HEAD(&file->uobjects);
0939     init_rwsem(&file->hw_destroy_rwsem);
0940     mutex_init(&file->umap_lock);
0941     INIT_LIST_HEAD(&file->umaps);
0942 
0943     filp->private_data = file;
0944     list_add_tail(&file->list, &dev->uverbs_file_list);
0945     mutex_unlock(&dev->lists_mutex);
0946     srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
0947 
0948     setup_ufile_idr_uobject(file);
0949 
0950     return stream_open(inode, filp);
0951 
0952 err_module:
0953     module_put(ib_dev->ops.owner);
0954 
0955 err:
0956     mutex_unlock(&dev->lists_mutex);
0957     srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
0958     if (refcount_dec_and_test(&dev->refcount))
0959         ib_uverbs_comp_dev(dev);
0960 
0961     put_device(&dev->dev);
0962     return ret;
0963 }
0964 
0965 static int ib_uverbs_close(struct inode *inode, struct file *filp)
0966 {
0967     struct ib_uverbs_file *file = filp->private_data;
0968 
0969     uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
0970 
0971     mutex_lock(&file->device->lists_mutex);
0972     list_del_init(&file->list);
0973     mutex_unlock(&file->device->lists_mutex);
0974 
0975     kref_put(&file->ref, ib_uverbs_release_file);
0976 
0977     return 0;
0978 }
0979 
0980 static const struct file_operations uverbs_fops = {
0981     .owner   = THIS_MODULE,
0982     .write   = ib_uverbs_write,
0983     .open    = ib_uverbs_open,
0984     .release = ib_uverbs_close,
0985     .llseek  = no_llseek,
0986     .unlocked_ioctl = ib_uverbs_ioctl,
0987     .compat_ioctl = compat_ptr_ioctl,
0988 };
0989 
0990 static const struct file_operations uverbs_mmap_fops = {
0991     .owner   = THIS_MODULE,
0992     .write   = ib_uverbs_write,
0993     .mmap    = ib_uverbs_mmap,
0994     .open    = ib_uverbs_open,
0995     .release = ib_uverbs_close,
0996     .llseek  = no_llseek,
0997     .unlocked_ioctl = ib_uverbs_ioctl,
0998     .compat_ioctl = compat_ptr_ioctl,
0999 };
1000 
1001 static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
1002                  struct ib_client_nl_info *res)
1003 {
1004     struct ib_uverbs_device *uverbs_dev = client_data;
1005     int ret;
1006 
1007     if (res->port != -1)
1008         return -EINVAL;
1009 
1010     res->abi = ibdev->ops.uverbs_abi_ver;
1011     res->cdev = &uverbs_dev->dev;
1012 
1013     /*
1014      * To support DRIVER_ID binding in userspace some of the driver need
1015      * upgrading to expose their PCI dependent revision information
1016      * through get_context instead of relying on modalias matching. When
1017      * the drivers are fixed they can drop this flag.
1018      */
1019     if (!ibdev->ops.uverbs_no_driver_id_binding) {
1020         ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID,
1021                   ibdev->ops.driver_id);
1022         if (ret)
1023             return ret;
1024     }
1025     return 0;
1026 }
1027 
1028 static struct ib_client uverbs_client = {
1029     .name   = "uverbs",
1030     .no_kverbs_req = true,
1031     .add    = ib_uverbs_add_one,
1032     .remove = ib_uverbs_remove_one,
1033     .get_nl_info = ib_uverbs_get_nl_info,
1034 };
1035 MODULE_ALIAS_RDMA_CLIENT("uverbs");
1036 
1037 static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
1038               char *buf)
1039 {
1040     struct ib_uverbs_device *dev =
1041             container_of(device, struct ib_uverbs_device, dev);
1042     int ret = -ENODEV;
1043     int srcu_key;
1044     struct ib_device *ib_dev;
1045 
1046     srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1047     ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1048     if (ib_dev)
1049         ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev));
1050     srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1051 
1052     return ret;
1053 }
1054 static DEVICE_ATTR_RO(ibdev);
1055 
1056 static ssize_t abi_version_show(struct device *device,
1057                 struct device_attribute *attr, char *buf)
1058 {
1059     struct ib_uverbs_device *dev =
1060             container_of(device, struct ib_uverbs_device, dev);
1061     int ret = -ENODEV;
1062     int srcu_key;
1063     struct ib_device *ib_dev;
1064 
1065     srcu_key = srcu_read_lock(&dev->disassociate_srcu);
1066     ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
1067     if (ib_dev)
1068         ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
1069     srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
1070 
1071     return ret;
1072 }
1073 static DEVICE_ATTR_RO(abi_version);
1074 
1075 static struct attribute *ib_dev_attrs[] = {
1076     &dev_attr_abi_version.attr,
1077     &dev_attr_ibdev.attr,
1078     NULL,
1079 };
1080 
1081 static const struct attribute_group dev_attr_group = {
1082     .attrs = ib_dev_attrs,
1083 };
1084 
1085 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1086              __stringify(IB_USER_VERBS_ABI_VERSION));
1087 
1088 static int ib_uverbs_create_uapi(struct ib_device *device,
1089                  struct ib_uverbs_device *uverbs_dev)
1090 {
1091     struct uverbs_api *uapi;
1092 
1093     uapi = uverbs_alloc_api(device);
1094     if (IS_ERR(uapi))
1095         return PTR_ERR(uapi);
1096 
1097     uverbs_dev->uapi = uapi;
1098     return 0;
1099 }
1100 
1101 static int ib_uverbs_add_one(struct ib_device *device)
1102 {
1103     int devnum;
1104     dev_t base;
1105     struct ib_uverbs_device *uverbs_dev;
1106     int ret;
1107 
1108     if (!device->ops.alloc_ucontext)
1109         return -EOPNOTSUPP;
1110 
1111     uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
1112     if (!uverbs_dev)
1113         return -ENOMEM;
1114 
1115     ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
1116     if (ret) {
1117         kfree(uverbs_dev);
1118         return -ENOMEM;
1119     }
1120 
1121     device_initialize(&uverbs_dev->dev);
1122     uverbs_dev->dev.class = uverbs_class;
1123     uverbs_dev->dev.parent = device->dev.parent;
1124     uverbs_dev->dev.release = ib_uverbs_release_dev;
1125     uverbs_dev->groups[0] = &dev_attr_group;
1126     uverbs_dev->dev.groups = uverbs_dev->groups;
1127     refcount_set(&uverbs_dev->refcount, 1);
1128     init_completion(&uverbs_dev->comp);
1129     uverbs_dev->xrcd_tree = RB_ROOT;
1130     mutex_init(&uverbs_dev->xrcd_tree_mutex);
1131     mutex_init(&uverbs_dev->lists_mutex);
1132     INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
1133     rcu_assign_pointer(uverbs_dev->ib_dev, device);
1134     uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1135 
1136     devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
1137                    GFP_KERNEL);
1138     if (devnum < 0) {
1139         ret = -ENOMEM;
1140         goto err;
1141     }
1142     uverbs_dev->devnum = devnum;
1143     if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
1144         base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
1145     else
1146         base = IB_UVERBS_BASE_DEV + devnum;
1147 
1148     ret = ib_uverbs_create_uapi(device, uverbs_dev);
1149     if (ret)
1150         goto err_uapi;
1151 
1152     uverbs_dev->dev.devt = base;
1153     dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
1154 
1155     cdev_init(&uverbs_dev->cdev,
1156           device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops);
1157     uverbs_dev->cdev.owner = THIS_MODULE;
1158 
1159     ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
1160     if (ret)
1161         goto err_uapi;
1162 
1163     ib_set_client_data(device, &uverbs_client, uverbs_dev);
1164     return 0;
1165 
1166 err_uapi:
1167     ida_free(&uverbs_ida, devnum);
1168 err:
1169     if (refcount_dec_and_test(&uverbs_dev->refcount))
1170         ib_uverbs_comp_dev(uverbs_dev);
1171     wait_for_completion(&uverbs_dev->comp);
1172     put_device(&uverbs_dev->dev);
1173     return ret;
1174 }
1175 
1176 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1177                     struct ib_device *ib_dev)
1178 {
1179     struct ib_uverbs_file *file;
1180 
1181     /* Pending running commands to terminate */
1182     uverbs_disassociate_api_pre(uverbs_dev);
1183 
1184     mutex_lock(&uverbs_dev->lists_mutex);
1185     while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1186         file = list_first_entry(&uverbs_dev->uverbs_file_list,
1187                     struct ib_uverbs_file, list);
1188         list_del_init(&file->list);
1189         kref_get(&file->ref);
1190 
1191         /* We must release the mutex before going ahead and calling
1192          * uverbs_cleanup_ufile, as it might end up indirectly calling
1193          * uverbs_close, for example due to freeing the resources (e.g
1194          * mmput).
1195          */
1196         mutex_unlock(&uverbs_dev->lists_mutex);
1197 
1198         uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
1199         kref_put(&file->ref, ib_uverbs_release_file);
1200 
1201         mutex_lock(&uverbs_dev->lists_mutex);
1202     }
1203     mutex_unlock(&uverbs_dev->lists_mutex);
1204 
1205     uverbs_disassociate_api(uverbs_dev->uapi);
1206 }
1207 
1208 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1209 {
1210     struct ib_uverbs_device *uverbs_dev = client_data;
1211     int wait_clients = 1;
1212 
1213     cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
1214     ida_free(&uverbs_ida, uverbs_dev->devnum);
1215 
1216     if (device->ops.disassociate_ucontext) {
1217         /* We disassociate HW resources and immediately return.
1218          * Userspace will see a EIO errno for all future access.
1219          * Upon returning, ib_device may be freed internally and is not
1220          * valid any more.
1221          * uverbs_device is still available until all clients close
1222          * their files, then the uverbs device ref count will be zero
1223          * and its resources will be freed.
1224          * Note: At this point no more files can be opened since the
1225          * cdev was deleted, however active clients can still issue
1226          * commands and close their open files.
1227          */
1228         ib_uverbs_free_hw_resources(uverbs_dev, device);
1229         wait_clients = 0;
1230     }
1231 
1232     if (refcount_dec_and_test(&uverbs_dev->refcount))
1233         ib_uverbs_comp_dev(uverbs_dev);
1234     if (wait_clients)
1235         wait_for_completion(&uverbs_dev->comp);
1236 
1237     put_device(&uverbs_dev->dev);
1238 }
1239 
1240 static char *uverbs_devnode(struct device *dev, umode_t *mode)
1241 {
1242     if (mode)
1243         *mode = 0666;
1244     return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1245 }
1246 
1247 static int __init ib_uverbs_init(void)
1248 {
1249     int ret;
1250 
1251     ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
1252                      IB_UVERBS_NUM_FIXED_MINOR,
1253                      "infiniband_verbs");
1254     if (ret) {
1255         pr_err("user_verbs: couldn't register device number\n");
1256         goto out;
1257     }
1258 
1259     ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
1260                   IB_UVERBS_NUM_DYNAMIC_MINOR,
1261                   "infiniband_verbs");
1262     if (ret) {
1263         pr_err("couldn't register dynamic device number\n");
1264         goto out_alloc;
1265     }
1266 
1267     uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1268     if (IS_ERR(uverbs_class)) {
1269         ret = PTR_ERR(uverbs_class);
1270         pr_err("user_verbs: couldn't create class infiniband_verbs\n");
1271         goto out_chrdev;
1272     }
1273 
1274     uverbs_class->devnode = uverbs_devnode;
1275 
1276     ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1277     if (ret) {
1278         pr_err("user_verbs: couldn't create abi_version attribute\n");
1279         goto out_class;
1280     }
1281 
1282     ret = ib_register_client(&uverbs_client);
1283     if (ret) {
1284         pr_err("user_verbs: couldn't register client\n");
1285         goto out_class;
1286     }
1287 
1288     return 0;
1289 
1290 out_class:
1291     class_destroy(uverbs_class);
1292 
1293 out_chrdev:
1294     unregister_chrdev_region(dynamic_uverbs_dev,
1295                  IB_UVERBS_NUM_DYNAMIC_MINOR);
1296 
1297 out_alloc:
1298     unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1299                  IB_UVERBS_NUM_FIXED_MINOR);
1300 
1301 out:
1302     return ret;
1303 }
1304 
1305 static void __exit ib_uverbs_cleanup(void)
1306 {
1307     ib_unregister_client(&uverbs_client);
1308     class_destroy(uverbs_class);
1309     unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1310                  IB_UVERBS_NUM_FIXED_MINOR);
1311     unregister_chrdev_region(dynamic_uverbs_dev,
1312                  IB_UVERBS_NUM_DYNAMIC_MINOR);
1313     mmu_notifier_synchronize();
1314 }
1315 
1316 module_init(ib_uverbs_init);
1317 module_exit(ib_uverbs_cleanup);