Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
0003  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
0004  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
0005  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
0006  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
0007  *
0008  * This software is available to you under a choice of one of two
0009  * licenses.  You may choose to be licensed under the terms of the GNU
0010  * General Public License (GPL) Version 2, available from the file
0011  * COPYING in the main directory of this source tree, or the
0012  * OpenIB.org BSD license below:
0013  *
0014  *     Redistribution and use in source and binary forms, with or
0015  *     without modification, are permitted provided that the following
0016  *     conditions are met:
0017  *
0018  *      - Redistributions of source code must retain the above
0019  *        copyright notice, this list of conditions and the following
0020  *        disclaimer.
0021  *
0022  *      - Redistributions in binary form must reproduce the above
0023  *        copyright notice, this list of conditions and the following
0024  *        disclaimer in the documentation and/or other materials
0025  *        provided with the distribution.
0026  *
0027  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0028  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0029  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0030  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0031  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0032  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0033  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0034  * SOFTWARE.
0035  */
0036 
0037 #include <rdma/ib_smi.h>
0038 #include <rdma/ib_umem.h>
0039 #include <rdma/ib_user_verbs.h>
0040 #include <rdma/uverbs_ioctl.h>
0041 
0042 #include <linux/sched.h>
0043 #include <linux/slab.h>
0044 #include <linux/stat.h>
0045 #include <linux/mm.h>
0046 #include <linux/export.h>
0047 
0048 #include "mthca_dev.h"
0049 #include "mthca_cmd.h"
0050 #include <rdma/mthca-abi.h>
0051 #include "mthca_memfree.h"
0052 
0053 static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
0054                   struct ib_udata *uhw)
0055 {
0056     struct ib_smp *in_mad  = NULL;
0057     struct ib_smp *out_mad = NULL;
0058     int err = -ENOMEM;
0059     struct mthca_dev *mdev = to_mdev(ibdev);
0060 
0061     if (uhw->inlen || uhw->outlen)
0062         return -EINVAL;
0063 
0064     in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
0065     out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0066     if (!in_mad || !out_mad)
0067         goto out;
0068 
0069     memset(props, 0, sizeof *props);
0070 
0071     props->fw_ver              = mdev->fw_ver;
0072 
0073     ib_init_query_mad(in_mad);
0074     in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
0075 
0076     err = mthca_MAD_IFC(mdev, 1, 1,
0077                 1, NULL, NULL, in_mad, out_mad);
0078     if (err)
0079         goto out;
0080 
0081     props->device_cap_flags    = mdev->device_cap_flags;
0082     props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0083         0xffffff;
0084     props->vendor_part_id      = be16_to_cpup((__be16 *) (out_mad->data + 30));
0085     props->hw_ver              = be32_to_cpup((__be32 *) (out_mad->data + 32));
0086     memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
0087 
0088     props->max_mr_size         = ~0ull;
0089     props->page_size_cap       = mdev->limits.page_size_cap;
0090     props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
0091     props->max_qp_wr           = mdev->limits.max_wqes;
0092     props->max_send_sge        = mdev->limits.max_sg;
0093     props->max_recv_sge        = mdev->limits.max_sg;
0094     props->max_sge_rd          = mdev->limits.max_sg;
0095     props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
0096     props->max_cqe             = mdev->limits.max_cqes;
0097     props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
0098     props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
0099     props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
0100     props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
0101     props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
0102     props->max_srq             = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
0103     props->max_srq_wr          = mdev->limits.max_srq_wqes;
0104     props->max_srq_sge         = mdev->limits.max_srq_sge;
0105     props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
0106     props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
0107                     IB_ATOMIC_HCA : IB_ATOMIC_NONE;
0108     props->max_pkeys           = mdev->limits.pkey_table_len;
0109     props->max_mcast_grp       = mdev->limits.num_mgms + mdev->limits.num_amgms;
0110     props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
0111     props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
0112                        props->max_mcast_grp;
0113 
0114     err = 0;
0115  out:
0116     kfree(in_mad);
0117     kfree(out_mad);
0118     return err;
0119 }
0120 
0121 static int mthca_query_port(struct ib_device *ibdev,
0122                 u32 port, struct ib_port_attr *props)
0123 {
0124     struct ib_smp *in_mad  = NULL;
0125     struct ib_smp *out_mad = NULL;
0126     int err = -ENOMEM;
0127 
0128     in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
0129     out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0130     if (!in_mad || !out_mad)
0131         goto out;
0132 
0133     /* props being zeroed by the caller, avoid zeroing it here */
0134 
0135     ib_init_query_mad(in_mad);
0136     in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
0137     in_mad->attr_mod = cpu_to_be32(port);
0138 
0139     err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
0140                 port, NULL, NULL, in_mad, out_mad);
0141     if (err)
0142         goto out;
0143 
0144     props->lid               = be16_to_cpup((__be16 *) (out_mad->data + 16));
0145     props->lmc               = out_mad->data[34] & 0x7;
0146     props->sm_lid            = be16_to_cpup((__be16 *) (out_mad->data + 18));
0147     props->sm_sl             = out_mad->data[36] & 0xf;
0148     props->state             = out_mad->data[32] & 0xf;
0149     props->phys_state        = out_mad->data[33] >> 4;
0150     props->port_cap_flags    = be32_to_cpup((__be32 *) (out_mad->data + 20));
0151     props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
0152     props->max_msg_sz        = 0x80000000;
0153     props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
0154     props->bad_pkey_cntr     = be16_to_cpup((__be16 *) (out_mad->data + 46));
0155     props->qkey_viol_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 48));
0156     props->active_width      = out_mad->data[31] & 0xf;
0157     props->active_speed      = out_mad->data[35] >> 4;
0158     props->max_mtu           = out_mad->data[41] & 0xf;
0159     props->active_mtu        = out_mad->data[36] >> 4;
0160     props->subnet_timeout    = out_mad->data[51] & 0x1f;
0161     props->max_vl_num        = out_mad->data[37] >> 4;
0162     props->init_type_reply   = out_mad->data[41] >> 4;
0163 
0164  out:
0165     kfree(in_mad);
0166     kfree(out_mad);
0167     return err;
0168 }
0169 
0170 static int mthca_modify_device(struct ib_device *ibdev,
0171                    int mask,
0172                    struct ib_device_modify *props)
0173 {
0174     if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
0175         return -EOPNOTSUPP;
0176 
0177     if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
0178         if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
0179             return -ERESTARTSYS;
0180         memcpy(ibdev->node_desc, props->node_desc,
0181                IB_DEVICE_NODE_DESC_MAX);
0182         mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
0183     }
0184 
0185     return 0;
0186 }
0187 
0188 static int mthca_modify_port(struct ib_device *ibdev,
0189                  u32 port, int port_modify_mask,
0190                  struct ib_port_modify *props)
0191 {
0192     struct mthca_set_ib_param set_ib;
0193     struct ib_port_attr attr;
0194     int err;
0195 
0196     if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
0197         return -ERESTARTSYS;
0198 
0199     err = ib_query_port(ibdev, port, &attr);
0200     if (err)
0201         goto out;
0202 
0203     set_ib.set_si_guid     = 0;
0204     set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
0205 
0206     set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
0207         ~props->clr_port_cap_mask;
0208 
0209     err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port);
0210     if (err)
0211         goto out;
0212 out:
0213     mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
0214     return err;
0215 }
0216 
0217 static int mthca_query_pkey(struct ib_device *ibdev,
0218                 u32 port, u16 index, u16 *pkey)
0219 {
0220     struct ib_smp *in_mad  = NULL;
0221     struct ib_smp *out_mad = NULL;
0222     int err = -ENOMEM;
0223 
0224     in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
0225     out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0226     if (!in_mad || !out_mad)
0227         goto out;
0228 
0229     ib_init_query_mad(in_mad);
0230     in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
0231     in_mad->attr_mod = cpu_to_be32(index / 32);
0232 
0233     err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
0234                 port, NULL, NULL, in_mad, out_mad);
0235     if (err)
0236         goto out;
0237 
0238     *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
0239 
0240  out:
0241     kfree(in_mad);
0242     kfree(out_mad);
0243     return err;
0244 }
0245 
0246 static int mthca_query_gid(struct ib_device *ibdev, u32 port,
0247                int index, union ib_gid *gid)
0248 {
0249     struct ib_smp *in_mad  = NULL;
0250     struct ib_smp *out_mad = NULL;
0251     int err = -ENOMEM;
0252 
0253     in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
0254     out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0255     if (!in_mad || !out_mad)
0256         goto out;
0257 
0258     ib_init_query_mad(in_mad);
0259     in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
0260     in_mad->attr_mod = cpu_to_be32(port);
0261 
0262     err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
0263                 port, NULL, NULL, in_mad, out_mad);
0264     if (err)
0265         goto out;
0266 
0267     memcpy(gid->raw, out_mad->data + 8, 8);
0268 
0269     ib_init_query_mad(in_mad);
0270     in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
0271     in_mad->attr_mod = cpu_to_be32(index / 8);
0272 
0273     err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
0274                 port, NULL, NULL, in_mad, out_mad);
0275     if (err)
0276         goto out;
0277 
0278     memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
0279 
0280  out:
0281     kfree(in_mad);
0282     kfree(out_mad);
0283     return err;
0284 }
0285 
0286 static int mthca_alloc_ucontext(struct ib_ucontext *uctx,
0287                 struct ib_udata *udata)
0288 {
0289     struct ib_device *ibdev = uctx->device;
0290     struct mthca_alloc_ucontext_resp uresp = {};
0291     struct mthca_ucontext *context = to_mucontext(uctx);
0292     int                              err;
0293 
0294     if (!(to_mdev(ibdev)->active))
0295         return -EAGAIN;
0296 
0297     uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
0298     if (mthca_is_memfree(to_mdev(ibdev)))
0299         uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
0300     else
0301         uresp.uarc_size = 0;
0302 
0303     err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
0304     if (err)
0305         return err;
0306 
0307     context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
0308     if (IS_ERR(context->db_tab)) {
0309         err = PTR_ERR(context->db_tab);
0310         mthca_uar_free(to_mdev(ibdev), &context->uar);
0311         return err;
0312     }
0313 
0314     if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
0315         mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
0316         mthca_uar_free(to_mdev(ibdev), &context->uar);
0317         return -EFAULT;
0318     }
0319 
0320     context->reg_mr_warned = 0;
0321 
0322     return 0;
0323 }
0324 
0325 static void mthca_dealloc_ucontext(struct ib_ucontext *context)
0326 {
0327     mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
0328                   to_mucontext(context)->db_tab);
0329     mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
0330 }
0331 
0332 static int mthca_mmap_uar(struct ib_ucontext *context,
0333               struct vm_area_struct *vma)
0334 {
0335     if (vma->vm_end - vma->vm_start != PAGE_SIZE)
0336         return -EINVAL;
0337 
0338     vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
0339 
0340     if (io_remap_pfn_range(vma, vma->vm_start,
0341                    to_mucontext(context)->uar.pfn,
0342                    PAGE_SIZE, vma->vm_page_prot))
0343         return -EAGAIN;
0344 
0345     return 0;
0346 }
0347 
0348 static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0349 {
0350     struct ib_device *ibdev = ibpd->device;
0351     struct mthca_pd *pd = to_mpd(ibpd);
0352     int err;
0353 
0354     err = mthca_pd_alloc(to_mdev(ibdev), !udata, pd);
0355     if (err)
0356         return err;
0357 
0358     if (udata) {
0359         if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
0360             mthca_pd_free(to_mdev(ibdev), pd);
0361             return -EFAULT;
0362         }
0363     }
0364 
0365     return 0;
0366 }
0367 
0368 static int mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
0369 {
0370     mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
0371     return 0;
0372 }
0373 
0374 static int mthca_ah_create(struct ib_ah *ibah,
0375                struct rdma_ah_init_attr *init_attr,
0376                struct ib_udata *udata)
0377 
0378 {
0379     struct mthca_ah *ah = to_mah(ibah);
0380 
0381     return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd),
0382                    init_attr->ah_attr, ah);
0383 }
0384 
0385 static int mthca_ah_destroy(struct ib_ah *ah, u32 flags)
0386 {
0387     mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
0388     return 0;
0389 }
0390 
0391 static int mthca_create_srq(struct ib_srq *ibsrq,
0392                 struct ib_srq_init_attr *init_attr,
0393                 struct ib_udata *udata)
0394 {
0395     struct mthca_create_srq ucmd;
0396     struct mthca_ucontext *context = rdma_udata_to_drv_context(
0397         udata, struct mthca_ucontext, ibucontext);
0398     struct mthca_srq *srq = to_msrq(ibsrq);
0399     int err;
0400 
0401     if (init_attr->srq_type != IB_SRQT_BASIC)
0402         return -EOPNOTSUPP;
0403 
0404     if (udata) {
0405         if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
0406             return -EFAULT;
0407 
0408         err = mthca_map_user_db(to_mdev(ibsrq->device), &context->uar,
0409                     context->db_tab, ucmd.db_index,
0410                     ucmd.db_page);
0411 
0412         if (err)
0413             return err;
0414 
0415         srq->mr.ibmr.lkey = ucmd.lkey;
0416         srq->db_index     = ucmd.db_index;
0417     }
0418 
0419     err = mthca_alloc_srq(to_mdev(ibsrq->device), to_mpd(ibsrq->pd),
0420                   &init_attr->attr, srq, udata);
0421 
0422     if (err && udata)
0423         mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar,
0424                     context->db_tab, ucmd.db_index);
0425 
0426     if (err)
0427         return err;
0428 
0429     if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
0430         mthca_free_srq(to_mdev(ibsrq->device), srq);
0431         return -EFAULT;
0432     }
0433 
0434     return 0;
0435 }
0436 
0437 static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
0438 {
0439     if (udata) {
0440         struct mthca_ucontext *context =
0441             rdma_udata_to_drv_context(
0442                 udata,
0443                 struct mthca_ucontext,
0444                 ibucontext);
0445 
0446         mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
0447                     context->db_tab, to_msrq(srq)->db_index);
0448     }
0449 
0450     mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
0451     return 0;
0452 }
0453 
0454 static int mthca_create_qp(struct ib_qp *ibqp,
0455                struct ib_qp_init_attr *init_attr,
0456                struct ib_udata *udata)
0457 {
0458     struct mthca_ucontext *context = rdma_udata_to_drv_context(
0459         udata, struct mthca_ucontext, ibucontext);
0460     struct mthca_create_qp ucmd;
0461     struct mthca_qp *qp = to_mqp(ibqp);
0462     struct mthca_dev *dev = to_mdev(ibqp->device);
0463     int err;
0464 
0465     if (init_attr->create_flags)
0466         return -EOPNOTSUPP;
0467 
0468     switch (init_attr->qp_type) {
0469     case IB_QPT_RC:
0470     case IB_QPT_UC:
0471     case IB_QPT_UD:
0472     {
0473         if (udata) {
0474             if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
0475                 return -EFAULT;
0476 
0477             err = mthca_map_user_db(dev, &context->uar,
0478                         context->db_tab,
0479                         ucmd.sq_db_index,
0480                         ucmd.sq_db_page);
0481             if (err)
0482                 return err;
0483 
0484             err = mthca_map_user_db(dev, &context->uar,
0485                         context->db_tab,
0486                         ucmd.rq_db_index,
0487                         ucmd.rq_db_page);
0488             if (err) {
0489                 mthca_unmap_user_db(dev, &context->uar,
0490                             context->db_tab,
0491                             ucmd.sq_db_index);
0492                 return err;
0493             }
0494 
0495             qp->mr.ibmr.lkey = ucmd.lkey;
0496             qp->sq.db_index  = ucmd.sq_db_index;
0497             qp->rq.db_index  = ucmd.rq_db_index;
0498         }
0499 
0500         err = mthca_alloc_qp(dev, to_mpd(ibqp->pd),
0501                      to_mcq(init_attr->send_cq),
0502                      to_mcq(init_attr->recv_cq),
0503                      init_attr->qp_type, init_attr->sq_sig_type,
0504                      &init_attr->cap, qp, udata);
0505 
0506         if (err && udata) {
0507             mthca_unmap_user_db(dev, &context->uar, context->db_tab,
0508                         ucmd.sq_db_index);
0509             mthca_unmap_user_db(dev, &context->uar, context->db_tab,
0510                         ucmd.rq_db_index);
0511         }
0512 
0513         qp->ibqp.qp_num = qp->qpn;
0514         break;
0515     }
0516     case IB_QPT_SMI:
0517     case IB_QPT_GSI:
0518     {
0519         qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
0520         if (!qp->sqp)
0521             return -ENOMEM;
0522 
0523         qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
0524 
0525         err = mthca_alloc_sqp(dev, to_mpd(ibqp->pd),
0526                       to_mcq(init_attr->send_cq),
0527                       to_mcq(init_attr->recv_cq),
0528                       init_attr->sq_sig_type, &init_attr->cap,
0529                       qp->ibqp.qp_num, init_attr->port_num, qp,
0530                       udata);
0531         break;
0532     }
0533     default:
0534         /* Don't support raw QPs */
0535         return -EOPNOTSUPP;
0536     }
0537 
0538     if (err) {
0539         kfree(qp->sqp);
0540         return err;
0541     }
0542 
0543     init_attr->cap.max_send_wr     = qp->sq.max;
0544     init_attr->cap.max_recv_wr     = qp->rq.max;
0545     init_attr->cap.max_send_sge    = qp->sq.max_gs;
0546     init_attr->cap.max_recv_sge    = qp->rq.max_gs;
0547     init_attr->cap.max_inline_data = qp->max_inline_data;
0548 
0549     return 0;
0550 }
0551 
0552 static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
0553 {
0554     if (udata) {
0555         struct mthca_ucontext *context =
0556             rdma_udata_to_drv_context(
0557                 udata,
0558                 struct mthca_ucontext,
0559                 ibucontext);
0560 
0561         mthca_unmap_user_db(to_mdev(qp->device),
0562                     &context->uar,
0563                     context->db_tab,
0564                     to_mqp(qp)->sq.db_index);
0565         mthca_unmap_user_db(to_mdev(qp->device),
0566                     &context->uar,
0567                     context->db_tab,
0568                     to_mqp(qp)->rq.db_index);
0569     }
0570     mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
0571     kfree(to_mqp(qp)->sqp);
0572     return 0;
0573 }
0574 
0575 static int mthca_create_cq(struct ib_cq *ibcq,
0576                const struct ib_cq_init_attr *attr,
0577                struct ib_udata *udata)
0578 {
0579     struct ib_device *ibdev = ibcq->device;
0580     int entries = attr->cqe;
0581     struct mthca_create_cq ucmd;
0582     struct mthca_cq *cq;
0583     int nent;
0584     int err;
0585     struct mthca_ucontext *context = rdma_udata_to_drv_context(
0586         udata, struct mthca_ucontext, ibucontext);
0587 
0588     if (attr->flags)
0589         return -EOPNOTSUPP;
0590 
0591     if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
0592         return -EINVAL;
0593 
0594     if (udata) {
0595         if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
0596             return -EFAULT;
0597 
0598         err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
0599                     context->db_tab, ucmd.set_db_index,
0600                     ucmd.set_db_page);
0601         if (err)
0602             return err;
0603 
0604         err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
0605                     context->db_tab, ucmd.arm_db_index,
0606                     ucmd.arm_db_page);
0607         if (err)
0608             goto err_unmap_set;
0609     }
0610 
0611     cq = to_mcq(ibcq);
0612 
0613     if (udata) {
0614         cq->buf.mr.ibmr.lkey = ucmd.lkey;
0615         cq->set_ci_db_index  = ucmd.set_db_index;
0616         cq->arm_db_index     = ucmd.arm_db_index;
0617     }
0618 
0619     for (nent = 1; nent <= entries; nent <<= 1)
0620         ; /* nothing */
0621 
0622     err = mthca_init_cq(to_mdev(ibdev), nent, context,
0623                 udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
0624                 cq);
0625     if (err)
0626         goto err_unmap_arm;
0627 
0628     if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) {
0629         mthca_free_cq(to_mdev(ibdev), cq);
0630         err = -EFAULT;
0631         goto err_unmap_arm;
0632     }
0633 
0634     cq->resize_buf = NULL;
0635 
0636     return 0;
0637 
0638 err_unmap_arm:
0639     if (udata)
0640         mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
0641                     context->db_tab, ucmd.arm_db_index);
0642 
0643 err_unmap_set:
0644     if (udata)
0645         mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
0646                     context->db_tab, ucmd.set_db_index);
0647 
0648     return err;
0649 }
0650 
0651 static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
0652                   int entries)
0653 {
0654     int ret;
0655 
0656     spin_lock_irq(&cq->lock);
0657     if (cq->resize_buf) {
0658         ret = -EBUSY;
0659         goto unlock;
0660     }
0661 
0662     cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
0663     if (!cq->resize_buf) {
0664         ret = -ENOMEM;
0665         goto unlock;
0666     }
0667 
0668     cq->resize_buf->state = CQ_RESIZE_ALLOC;
0669 
0670     ret = 0;
0671 
0672 unlock:
0673     spin_unlock_irq(&cq->lock);
0674 
0675     if (ret)
0676         return ret;
0677 
0678     ret = mthca_alloc_cq_buf(dev, &cq->resize_buf->buf, entries);
0679     if (ret) {
0680         spin_lock_irq(&cq->lock);
0681         kfree(cq->resize_buf);
0682         cq->resize_buf = NULL;
0683         spin_unlock_irq(&cq->lock);
0684         return ret;
0685     }
0686 
0687     cq->resize_buf->cqe = entries - 1;
0688 
0689     spin_lock_irq(&cq->lock);
0690     cq->resize_buf->state = CQ_RESIZE_READY;
0691     spin_unlock_irq(&cq->lock);
0692 
0693     return 0;
0694 }
0695 
0696 static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
0697 {
0698     struct mthca_dev *dev = to_mdev(ibcq->device);
0699     struct mthca_cq *cq = to_mcq(ibcq);
0700     struct mthca_resize_cq ucmd;
0701     u32 lkey;
0702     int ret;
0703 
0704     if (entries < 1 || entries > dev->limits.max_cqes)
0705         return -EINVAL;
0706 
0707     mutex_lock(&cq->mutex);
0708 
0709     entries = roundup_pow_of_two(entries + 1);
0710     if (entries == ibcq->cqe + 1) {
0711         ret = 0;
0712         goto out;
0713     }
0714 
0715     if (cq->is_kernel) {
0716         ret = mthca_alloc_resize_buf(dev, cq, entries);
0717         if (ret)
0718             goto out;
0719         lkey = cq->resize_buf->buf.mr.ibmr.lkey;
0720     } else {
0721         if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
0722             ret = -EFAULT;
0723             goto out;
0724         }
0725         lkey = ucmd.lkey;
0726     }
0727 
0728     ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries));
0729 
0730     if (ret) {
0731         if (cq->resize_buf) {
0732             mthca_free_cq_buf(dev, &cq->resize_buf->buf,
0733                       cq->resize_buf->cqe);
0734             kfree(cq->resize_buf);
0735             spin_lock_irq(&cq->lock);
0736             cq->resize_buf = NULL;
0737             spin_unlock_irq(&cq->lock);
0738         }
0739         goto out;
0740     }
0741 
0742     if (cq->is_kernel) {
0743         struct mthca_cq_buf tbuf;
0744         int tcqe;
0745 
0746         spin_lock_irq(&cq->lock);
0747         if (cq->resize_buf->state == CQ_RESIZE_READY) {
0748             mthca_cq_resize_copy_cqes(cq);
0749             tbuf         = cq->buf;
0750             tcqe         = cq->ibcq.cqe;
0751             cq->buf      = cq->resize_buf->buf;
0752             cq->ibcq.cqe = cq->resize_buf->cqe;
0753         } else {
0754             tbuf = cq->resize_buf->buf;
0755             tcqe = cq->resize_buf->cqe;
0756         }
0757 
0758         kfree(cq->resize_buf);
0759         cq->resize_buf = NULL;
0760         spin_unlock_irq(&cq->lock);
0761 
0762         mthca_free_cq_buf(dev, &tbuf, tcqe);
0763     } else
0764         ibcq->cqe = entries - 1;
0765 
0766 out:
0767     mutex_unlock(&cq->mutex);
0768 
0769     return ret;
0770 }
0771 
0772 static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
0773 {
0774     if (udata) {
0775         struct mthca_ucontext *context =
0776             rdma_udata_to_drv_context(
0777                 udata,
0778                 struct mthca_ucontext,
0779                 ibucontext);
0780 
0781         mthca_unmap_user_db(to_mdev(cq->device),
0782                     &context->uar,
0783                     context->db_tab,
0784                     to_mcq(cq)->arm_db_index);
0785         mthca_unmap_user_db(to_mdev(cq->device),
0786                     &context->uar,
0787                     context->db_tab,
0788                     to_mcq(cq)->set_ci_db_index);
0789     }
0790     mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
0791     return 0;
0792 }
0793 
0794 static inline u32 convert_access(int acc)
0795 {
0796     return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC       : 0) |
0797            (acc & IB_ACCESS_REMOTE_WRITE  ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
0798            (acc & IB_ACCESS_REMOTE_READ   ? MTHCA_MPT_FLAG_REMOTE_READ  : 0) |
0799            (acc & IB_ACCESS_LOCAL_WRITE   ? MTHCA_MPT_FLAG_LOCAL_WRITE  : 0) |
0800            MTHCA_MPT_FLAG_LOCAL_READ;
0801 }
0802 
0803 static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
0804 {
0805     struct mthca_mr *mr;
0806     int err;
0807 
0808     mr = kmalloc(sizeof *mr, GFP_KERNEL);
0809     if (!mr)
0810         return ERR_PTR(-ENOMEM);
0811 
0812     err = mthca_mr_alloc_notrans(to_mdev(pd->device),
0813                      to_mpd(pd)->pd_num,
0814                      convert_access(acc), mr);
0815 
0816     if (err) {
0817         kfree(mr);
0818         return ERR_PTR(err);
0819     }
0820 
0821     mr->umem = NULL;
0822 
0823     return &mr->ibmr;
0824 }
0825 
0826 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
0827                        u64 virt, int acc, struct ib_udata *udata)
0828 {
0829     struct mthca_dev *dev = to_mdev(pd->device);
0830     struct ib_block_iter biter;
0831     struct mthca_ucontext *context = rdma_udata_to_drv_context(
0832         udata, struct mthca_ucontext, ibucontext);
0833     struct mthca_mr *mr;
0834     struct mthca_reg_mr ucmd;
0835     u64 *pages;
0836     int n, i;
0837     int err = 0;
0838     int write_mtt_size;
0839 
0840     if (udata->inlen < sizeof ucmd) {
0841         if (!context->reg_mr_warned) {
0842             mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
0843                    current->comm);
0844             mthca_warn(dev, "  Update libmthca to fix this.\n");
0845         }
0846         ++context->reg_mr_warned;
0847         ucmd.mr_attrs = 0;
0848     } else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
0849         return ERR_PTR(-EFAULT);
0850 
0851     mr = kmalloc(sizeof *mr, GFP_KERNEL);
0852     if (!mr)
0853         return ERR_PTR(-ENOMEM);
0854 
0855     mr->umem = ib_umem_get(pd->device, start, length, acc);
0856     if (IS_ERR(mr->umem)) {
0857         err = PTR_ERR(mr->umem);
0858         goto err;
0859     }
0860 
0861     n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE);
0862 
0863     mr->mtt = mthca_alloc_mtt(dev, n);
0864     if (IS_ERR(mr->mtt)) {
0865         err = PTR_ERR(mr->mtt);
0866         goto err_umem;
0867     }
0868 
0869     pages = (u64 *) __get_free_page(GFP_KERNEL);
0870     if (!pages) {
0871         err = -ENOMEM;
0872         goto err_mtt;
0873     }
0874 
0875     i = n = 0;
0876 
0877     write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
0878 
0879     rdma_umem_for_each_dma_block(mr->umem, &biter, PAGE_SIZE) {
0880         pages[i++] = rdma_block_iter_dma_address(&biter);
0881 
0882         /*
0883          * Be friendly to write_mtt and pass it chunks
0884          * of appropriate size.
0885          */
0886         if (i == write_mtt_size) {
0887             err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
0888             if (err)
0889                 goto mtt_done;
0890             n += i;
0891             i = 0;
0892         }
0893     }
0894 
0895     if (i)
0896         err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
0897 mtt_done:
0898     free_page((unsigned long) pages);
0899     if (err)
0900         goto err_mtt;
0901 
0902     err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, PAGE_SHIFT, virt, length,
0903                  convert_access(acc), mr);
0904 
0905     if (err)
0906         goto err_mtt;
0907 
0908     return &mr->ibmr;
0909 
0910 err_mtt:
0911     mthca_free_mtt(dev, mr->mtt);
0912 
0913 err_umem:
0914     ib_umem_release(mr->umem);
0915 
0916 err:
0917     kfree(mr);
0918     return ERR_PTR(err);
0919 }
0920 
0921 static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata)
0922 {
0923     struct mthca_mr *mmr = to_mmr(mr);
0924 
0925     mthca_free_mr(to_mdev(mr->device), mmr);
0926     ib_umem_release(mmr->umem);
0927     kfree(mmr);
0928 
0929     return 0;
0930 }
0931 
0932 static ssize_t hw_rev_show(struct device *device,
0933                struct device_attribute *attr, char *buf)
0934 {
0935     struct mthca_dev *dev =
0936         rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
0937 
0938     return sysfs_emit(buf, "%x\n", dev->rev_id);
0939 }
0940 static DEVICE_ATTR_RO(hw_rev);
0941 
0942 static const char *hca_type_string(int hca_type)
0943 {
0944     switch (hca_type) {
0945     case PCI_DEVICE_ID_MELLANOX_TAVOR:
0946         return "MT23108";
0947     case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
0948         return "MT25208 (MT23108 compat mode)";
0949     case PCI_DEVICE_ID_MELLANOX_ARBEL:
0950         return "MT25208";
0951     case PCI_DEVICE_ID_MELLANOX_SINAI:
0952     case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
0953         return "MT25204";
0954     }
0955 
0956     return "unknown";
0957 }
0958 
0959 static ssize_t hca_type_show(struct device *device,
0960                  struct device_attribute *attr, char *buf)
0961 {
0962     struct mthca_dev *dev =
0963         rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
0964 
0965     return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device));
0966 }
0967 static DEVICE_ATTR_RO(hca_type);
0968 
0969 static ssize_t board_id_show(struct device *device,
0970                  struct device_attribute *attr, char *buf)
0971 {
0972     struct mthca_dev *dev =
0973         rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
0974 
0975     return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
0976 }
0977 static DEVICE_ATTR_RO(board_id);
0978 
0979 static struct attribute *mthca_dev_attributes[] = {
0980     &dev_attr_hw_rev.attr,
0981     &dev_attr_hca_type.attr,
0982     &dev_attr_board_id.attr,
0983     NULL
0984 };
0985 
0986 static const struct attribute_group mthca_attr_group = {
0987     .attrs = mthca_dev_attributes,
0988 };
0989 
0990 static int mthca_init_node_data(struct mthca_dev *dev)
0991 {
0992     struct ib_smp *in_mad  = NULL;
0993     struct ib_smp *out_mad = NULL;
0994     int err = -ENOMEM;
0995 
0996     in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
0997     out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
0998     if (!in_mad || !out_mad)
0999         goto out;
1000 
1001     ib_init_query_mad(in_mad);
1002     in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
1003 
1004     err = mthca_MAD_IFC(dev, 1, 1,
1005                 1, NULL, NULL, in_mad, out_mad);
1006     if (err)
1007         goto out;
1008 
1009     memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
1010 
1011     in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
1012 
1013     err = mthca_MAD_IFC(dev, 1, 1,
1014                 1, NULL, NULL, in_mad, out_mad);
1015     if (err)
1016         goto out;
1017 
1018     if (mthca_is_memfree(dev))
1019         dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
1020     memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
1021 
1022 out:
1023     kfree(in_mad);
1024     kfree(out_mad);
1025     return err;
1026 }
1027 
1028 static int mthca_port_immutable(struct ib_device *ibdev, u32 port_num,
1029                     struct ib_port_immutable *immutable)
1030 {
1031     struct ib_port_attr attr;
1032     int err;
1033 
1034     immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1035 
1036     err = ib_query_port(ibdev, port_num, &attr);
1037     if (err)
1038         return err;
1039 
1040     immutable->pkey_tbl_len = attr.pkey_tbl_len;
1041     immutable->gid_tbl_len = attr.gid_tbl_len;
1042     immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1043 
1044     return 0;
1045 }
1046 
1047 static void get_dev_fw_str(struct ib_device *device, char *str)
1048 {
1049     struct mthca_dev *dev =
1050         container_of(device, struct mthca_dev, ib_dev);
1051     snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
1052          (int) (dev->fw_ver >> 32),
1053          (int) (dev->fw_ver >> 16) & 0xffff,
1054          (int) dev->fw_ver & 0xffff);
1055 }
1056 
1057 static const struct ib_device_ops mthca_dev_ops = {
1058     .owner = THIS_MODULE,
1059     .driver_id = RDMA_DRIVER_MTHCA,
1060     .uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION,
1061     .uverbs_no_driver_id_binding = 1,
1062 
1063     .alloc_pd = mthca_alloc_pd,
1064     .alloc_ucontext = mthca_alloc_ucontext,
1065     .attach_mcast = mthca_multicast_attach,
1066     .create_ah = mthca_ah_create,
1067     .create_cq = mthca_create_cq,
1068     .create_qp = mthca_create_qp,
1069     .dealloc_pd = mthca_dealloc_pd,
1070     .dealloc_ucontext = mthca_dealloc_ucontext,
1071     .dereg_mr = mthca_dereg_mr,
1072     .destroy_ah = mthca_ah_destroy,
1073     .destroy_cq = mthca_destroy_cq,
1074     .destroy_qp = mthca_destroy_qp,
1075     .detach_mcast = mthca_multicast_detach,
1076     .device_group = &mthca_attr_group,
1077     .get_dev_fw_str = get_dev_fw_str,
1078     .get_dma_mr = mthca_get_dma_mr,
1079     .get_port_immutable = mthca_port_immutable,
1080     .mmap = mthca_mmap_uar,
1081     .modify_device = mthca_modify_device,
1082     .modify_port = mthca_modify_port,
1083     .modify_qp = mthca_modify_qp,
1084     .poll_cq = mthca_poll_cq,
1085     .process_mad = mthca_process_mad,
1086     .query_ah = mthca_ah_query,
1087     .query_device = mthca_query_device,
1088     .query_gid = mthca_query_gid,
1089     .query_pkey = mthca_query_pkey,
1090     .query_port = mthca_query_port,
1091     .query_qp = mthca_query_qp,
1092     .reg_user_mr = mthca_reg_user_mr,
1093     .resize_cq = mthca_resize_cq,
1094 
1095     INIT_RDMA_OBJ_SIZE(ib_ah, mthca_ah, ibah),
1096     INIT_RDMA_OBJ_SIZE(ib_cq, mthca_cq, ibcq),
1097     INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd),
1098     INIT_RDMA_OBJ_SIZE(ib_qp, mthca_qp, ibqp),
1099     INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext),
1100 };
1101 
1102 static const struct ib_device_ops mthca_dev_arbel_srq_ops = {
1103     .create_srq = mthca_create_srq,
1104     .destroy_srq = mthca_destroy_srq,
1105     .modify_srq = mthca_modify_srq,
1106     .post_srq_recv = mthca_arbel_post_srq_recv,
1107     .query_srq = mthca_query_srq,
1108 
1109     INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq),
1110 };
1111 
1112 static const struct ib_device_ops mthca_dev_tavor_srq_ops = {
1113     .create_srq = mthca_create_srq,
1114     .destroy_srq = mthca_destroy_srq,
1115     .modify_srq = mthca_modify_srq,
1116     .post_srq_recv = mthca_tavor_post_srq_recv,
1117     .query_srq = mthca_query_srq,
1118 
1119     INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq),
1120 };
1121 
1122 static const struct ib_device_ops mthca_dev_arbel_ops = {
1123     .post_recv = mthca_arbel_post_receive,
1124     .post_send = mthca_arbel_post_send,
1125     .req_notify_cq = mthca_arbel_arm_cq,
1126 };
1127 
1128 static const struct ib_device_ops mthca_dev_tavor_ops = {
1129     .post_recv = mthca_tavor_post_receive,
1130     .post_send = mthca_tavor_post_send,
1131     .req_notify_cq = mthca_tavor_arm_cq,
1132 };
1133 
1134 int mthca_register_device(struct mthca_dev *dev)
1135 {
1136     int ret;
1137 
1138     ret = mthca_init_node_data(dev);
1139     if (ret)
1140         return ret;
1141 
1142     dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
1143     dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
1144     dev->ib_dev.num_comp_vectors     = 1;
1145     dev->ib_dev.dev.parent           = &dev->pdev->dev;
1146 
1147     if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
1148         if (mthca_is_memfree(dev))
1149             ib_set_device_ops(&dev->ib_dev,
1150                       &mthca_dev_arbel_srq_ops);
1151         else
1152             ib_set_device_ops(&dev->ib_dev,
1153                       &mthca_dev_tavor_srq_ops);
1154     }
1155 
1156     ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops);
1157 
1158     if (mthca_is_memfree(dev))
1159         ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_ops);
1160     else
1161         ib_set_device_ops(&dev->ib_dev, &mthca_dev_tavor_ops);
1162 
1163     mutex_init(&dev->cap_mask_mutex);
1164 
1165     ret = ib_register_device(&dev->ib_dev, "mthca%d", &dev->pdev->dev);
1166     if (ret)
1167         return ret;
1168 
1169     mthca_start_catas_poll(dev);
1170 
1171     return 0;
1172 }
1173 
1174 void mthca_unregister_device(struct mthca_dev *dev)
1175 {
1176     mthca_stop_catas_poll(dev);
1177     ib_unregister_device(&dev->ib_dev);
1178 }