Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /*
0003  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
0004  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
0005  */
0006 
0007 #include <linux/dma-mapping.h>
0008 #include <net/addrconf.h>
0009 #include <rdma/uverbs_ioctl.h>
0010 
0011 #include "rxe.h"
0012 #include "rxe_queue.h"
0013 #include "rxe_hw_counters.h"
0014 
0015 static int rxe_query_device(struct ib_device *dev,
0016                 struct ib_device_attr *attr,
0017                 struct ib_udata *uhw)
0018 {
0019     struct rxe_dev *rxe = to_rdev(dev);
0020 
0021     if (uhw->inlen || uhw->outlen)
0022         return -EINVAL;
0023 
0024     *attr = rxe->attr;
0025     return 0;
0026 }
0027 
0028 static int rxe_query_port(struct ib_device *dev,
0029               u32 port_num, struct ib_port_attr *attr)
0030 {
0031     struct rxe_dev *rxe = to_rdev(dev);
0032     int rc;
0033 
0034     /* *attr being zeroed by the caller, avoid zeroing it here */
0035     *attr = rxe->port.attr;
0036 
0037     mutex_lock(&rxe->usdev_lock);
0038     rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
0039                   &attr->active_width);
0040 
0041     if (attr->state == IB_PORT_ACTIVE)
0042         attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
0043     else if (dev_get_flags(rxe->ndev) & IFF_UP)
0044         attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
0045     else
0046         attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
0047 
0048     mutex_unlock(&rxe->usdev_lock);
0049 
0050     return rc;
0051 }
0052 
0053 static int rxe_query_pkey(struct ib_device *device,
0054               u32 port_num, u16 index, u16 *pkey)
0055 {
0056     if (index > 0)
0057         return -EINVAL;
0058 
0059     *pkey = IB_DEFAULT_PKEY_FULL;
0060     return 0;
0061 }
0062 
0063 static int rxe_modify_device(struct ib_device *dev,
0064                  int mask, struct ib_device_modify *attr)
0065 {
0066     struct rxe_dev *rxe = to_rdev(dev);
0067 
0068     if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
0069              IB_DEVICE_MODIFY_NODE_DESC))
0070         return -EOPNOTSUPP;
0071 
0072     if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
0073         rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
0074 
0075     if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
0076         memcpy(rxe->ib_dev.node_desc,
0077                attr->node_desc, sizeof(rxe->ib_dev.node_desc));
0078     }
0079 
0080     return 0;
0081 }
0082 
0083 static int rxe_modify_port(struct ib_device *dev,
0084                u32 port_num, int mask, struct ib_port_modify *attr)
0085 {
0086     struct rxe_dev *rxe = to_rdev(dev);
0087     struct rxe_port *port;
0088 
0089     port = &rxe->port;
0090 
0091     port->attr.port_cap_flags |= attr->set_port_cap_mask;
0092     port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
0093 
0094     if (mask & IB_PORT_RESET_QKEY_CNTR)
0095         port->attr.qkey_viol_cntr = 0;
0096 
0097     return 0;
0098 }
0099 
0100 static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
0101                            u32 port_num)
0102 {
0103     return IB_LINK_LAYER_ETHERNET;
0104 }
0105 
0106 static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
0107 {
0108     struct rxe_dev *rxe = to_rdev(ibuc->device);
0109     struct rxe_ucontext *uc = to_ruc(ibuc);
0110 
0111     return rxe_add_to_pool(&rxe->uc_pool, uc);
0112 }
0113 
0114 static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
0115 {
0116     struct rxe_ucontext *uc = to_ruc(ibuc);
0117 
0118     rxe_cleanup(uc);
0119 }
0120 
0121 static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
0122                   struct ib_port_immutable *immutable)
0123 {
0124     int err;
0125     struct ib_port_attr attr;
0126 
0127     immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
0128 
0129     err = ib_query_port(dev, port_num, &attr);
0130     if (err)
0131         return err;
0132 
0133     immutable->pkey_tbl_len = attr.pkey_tbl_len;
0134     immutable->gid_tbl_len = attr.gid_tbl_len;
0135     immutable->max_mad_size = IB_MGMT_MAD_SIZE;
0136 
0137     return 0;
0138 }
0139 
0140 static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0141 {
0142     struct rxe_dev *rxe = to_rdev(ibpd->device);
0143     struct rxe_pd *pd = to_rpd(ibpd);
0144 
0145     return rxe_add_to_pool(&rxe->pd_pool, pd);
0146 }
0147 
0148 static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0149 {
0150     struct rxe_pd *pd = to_rpd(ibpd);
0151 
0152     rxe_cleanup(pd);
0153     return 0;
0154 }
0155 
0156 static int rxe_create_ah(struct ib_ah *ibah,
0157              struct rdma_ah_init_attr *init_attr,
0158              struct ib_udata *udata)
0159 
0160 {
0161     struct rxe_dev *rxe = to_rdev(ibah->device);
0162     struct rxe_ah *ah = to_rah(ibah);
0163     struct rxe_create_ah_resp __user *uresp = NULL;
0164     int err;
0165 
0166     if (udata) {
0167         /* test if new user provider */
0168         if (udata->outlen >= sizeof(*uresp))
0169             uresp = udata->outbuf;
0170         ah->is_user = true;
0171     } else {
0172         ah->is_user = false;
0173     }
0174 
0175     err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
0176     if (err)
0177         return err;
0178 
0179     err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
0180             init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
0181     if (err)
0182         return err;
0183 
0184     /* create index > 0 */
0185     ah->ah_num = ah->elem.index;
0186 
0187     if (uresp) {
0188         /* only if new user provider */
0189         err = copy_to_user(&uresp->ah_num, &ah->ah_num,
0190                      sizeof(uresp->ah_num));
0191         if (err) {
0192             rxe_cleanup(ah);
0193             return -EFAULT;
0194         }
0195     } else if (ah->is_user) {
0196         /* only if old user provider */
0197         ah->ah_num = 0;
0198     }
0199 
0200     rxe_init_av(init_attr->ah_attr, &ah->av);
0201     rxe_finalize(ah);
0202 
0203     return 0;
0204 }
0205 
0206 static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
0207 {
0208     int err;
0209     struct rxe_dev *rxe = to_rdev(ibah->device);
0210     struct rxe_ah *ah = to_rah(ibah);
0211 
0212     err = rxe_av_chk_attr(rxe, attr);
0213     if (err)
0214         return err;
0215 
0216     rxe_init_av(attr, &ah->av);
0217     return 0;
0218 }
0219 
0220 static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
0221 {
0222     struct rxe_ah *ah = to_rah(ibah);
0223 
0224     memset(attr, 0, sizeof(*attr));
0225     attr->type = ibah->type;
0226     rxe_av_to_attr(&ah->av, attr);
0227     return 0;
0228 }
0229 
0230 static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
0231 {
0232     struct rxe_ah *ah = to_rah(ibah);
0233 
0234     rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
0235 
0236     return 0;
0237 }
0238 
0239 static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
0240 {
0241     int err;
0242     int i;
0243     u32 length;
0244     struct rxe_recv_wqe *recv_wqe;
0245     int num_sge = ibwr->num_sge;
0246     int full;
0247 
0248     full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
0249     if (unlikely(full)) {
0250         err = -ENOMEM;
0251         goto err1;
0252     }
0253 
0254     if (unlikely(num_sge > rq->max_sge)) {
0255         err = -EINVAL;
0256         goto err1;
0257     }
0258 
0259     length = 0;
0260     for (i = 0; i < num_sge; i++)
0261         length += ibwr->sg_list[i].length;
0262 
0263     recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER);
0264     recv_wqe->wr_id = ibwr->wr_id;
0265     recv_wqe->num_sge = num_sge;
0266 
0267     memcpy(recv_wqe->dma.sge, ibwr->sg_list,
0268            num_sge * sizeof(struct ib_sge));
0269 
0270     recv_wqe->dma.length        = length;
0271     recv_wqe->dma.resid     = length;
0272     recv_wqe->dma.num_sge       = num_sge;
0273     recv_wqe->dma.cur_sge       = 0;
0274     recv_wqe->dma.sge_offset    = 0;
0275 
0276     queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
0277 
0278     return 0;
0279 
0280 err1:
0281     return err;
0282 }
0283 
0284 static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
0285               struct ib_udata *udata)
0286 {
0287     int err;
0288     struct rxe_dev *rxe = to_rdev(ibsrq->device);
0289     struct rxe_pd *pd = to_rpd(ibsrq->pd);
0290     struct rxe_srq *srq = to_rsrq(ibsrq);
0291     struct rxe_create_srq_resp __user *uresp = NULL;
0292 
0293     if (udata) {
0294         if (udata->outlen < sizeof(*uresp))
0295             return -EINVAL;
0296         uresp = udata->outbuf;
0297     }
0298 
0299     if (init->srq_type != IB_SRQT_BASIC)
0300         return -EOPNOTSUPP;
0301 
0302     err = rxe_srq_chk_init(rxe, init);
0303     if (err)
0304         return err;
0305 
0306     err = rxe_add_to_pool(&rxe->srq_pool, srq);
0307     if (err)
0308         return err;
0309 
0310     rxe_get(pd);
0311     srq->pd = pd;
0312 
0313     err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
0314     if (err)
0315         goto err_cleanup;
0316 
0317     return 0;
0318 
0319 err_cleanup:
0320     rxe_cleanup(srq);
0321 
0322     return err;
0323 }
0324 
0325 static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
0326               enum ib_srq_attr_mask mask,
0327               struct ib_udata *udata)
0328 {
0329     int err;
0330     struct rxe_srq *srq = to_rsrq(ibsrq);
0331     struct rxe_dev *rxe = to_rdev(ibsrq->device);
0332     struct rxe_modify_srq_cmd ucmd = {};
0333 
0334     if (udata) {
0335         if (udata->inlen < sizeof(ucmd))
0336             return -EINVAL;
0337 
0338         err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
0339         if (err)
0340             return err;
0341     }
0342 
0343     err = rxe_srq_chk_attr(rxe, srq, attr, mask);
0344     if (err)
0345         return err;
0346 
0347     err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
0348     if (err)
0349         return err;
0350     return 0;
0351 }
0352 
0353 static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
0354 {
0355     struct rxe_srq *srq = to_rsrq(ibsrq);
0356 
0357     if (srq->error)
0358         return -EINVAL;
0359 
0360     attr->max_wr = srq->rq.queue->buf->index_mask;
0361     attr->max_sge = srq->rq.max_sge;
0362     attr->srq_limit = srq->limit;
0363     return 0;
0364 }
0365 
0366 static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
0367 {
0368     struct rxe_srq *srq = to_rsrq(ibsrq);
0369 
0370     rxe_cleanup(srq);
0371     return 0;
0372 }
0373 
0374 static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
0375                  const struct ib_recv_wr **bad_wr)
0376 {
0377     int err = 0;
0378     struct rxe_srq *srq = to_rsrq(ibsrq);
0379     unsigned long flags;
0380 
0381     spin_lock_irqsave(&srq->rq.producer_lock, flags);
0382 
0383     while (wr) {
0384         err = post_one_recv(&srq->rq, wr);
0385         if (unlikely(err))
0386             break;
0387         wr = wr->next;
0388     }
0389 
0390     spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
0391 
0392     if (err)
0393         *bad_wr = wr;
0394 
0395     return err;
0396 }
0397 
0398 static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
0399              struct ib_udata *udata)
0400 {
0401     int err;
0402     struct rxe_dev *rxe = to_rdev(ibqp->device);
0403     struct rxe_pd *pd = to_rpd(ibqp->pd);
0404     struct rxe_qp *qp = to_rqp(ibqp);
0405     struct rxe_create_qp_resp __user *uresp = NULL;
0406 
0407     if (udata) {
0408         if (udata->outlen < sizeof(*uresp))
0409             return -EINVAL;
0410         uresp = udata->outbuf;
0411     }
0412 
0413     if (init->create_flags)
0414         return -EOPNOTSUPP;
0415 
0416     err = rxe_qp_chk_init(rxe, init);
0417     if (err)
0418         return err;
0419 
0420     if (udata) {
0421         if (udata->inlen)
0422             return -EINVAL;
0423 
0424         qp->is_user = true;
0425     } else {
0426         qp->is_user = false;
0427     }
0428 
0429     err = rxe_add_to_pool(&rxe->qp_pool, qp);
0430     if (err)
0431         return err;
0432 
0433     err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
0434     if (err)
0435         goto qp_init;
0436 
0437     rxe_finalize(qp);
0438     return 0;
0439 
0440 qp_init:
0441     rxe_cleanup(qp);
0442     return err;
0443 }
0444 
0445 static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
0446              int mask, struct ib_udata *udata)
0447 {
0448     int err;
0449     struct rxe_dev *rxe = to_rdev(ibqp->device);
0450     struct rxe_qp *qp = to_rqp(ibqp);
0451 
0452     if (mask & ~IB_QP_ATTR_STANDARD_BITS)
0453         return -EOPNOTSUPP;
0454 
0455     err = rxe_qp_chk_attr(rxe, qp, attr, mask);
0456     if (err)
0457         goto err1;
0458 
0459     err = rxe_qp_from_attr(qp, attr, mask, udata);
0460     if (err)
0461         goto err1;
0462 
0463     if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
0464         qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
0465                           qp->ibqp.qp_num,
0466                           qp->attr.dest_qp_num);
0467 
0468     return 0;
0469 
0470 err1:
0471     return err;
0472 }
0473 
0474 static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
0475             int mask, struct ib_qp_init_attr *init)
0476 {
0477     struct rxe_qp *qp = to_rqp(ibqp);
0478 
0479     rxe_qp_to_init(qp, init);
0480     rxe_qp_to_attr(qp, attr, mask);
0481 
0482     return 0;
0483 }
0484 
0485 static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
0486 {
0487     struct rxe_qp *qp = to_rqp(ibqp);
0488     int ret;
0489 
0490     ret = rxe_qp_chk_destroy(qp);
0491     if (ret)
0492         return ret;
0493 
0494     rxe_cleanup(qp);
0495     return 0;
0496 }
0497 
0498 static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
0499                 unsigned int mask, unsigned int length)
0500 {
0501     int num_sge = ibwr->num_sge;
0502     struct rxe_sq *sq = &qp->sq;
0503 
0504     if (unlikely(num_sge > sq->max_sge))
0505         goto err1;
0506 
0507     if (unlikely(mask & WR_ATOMIC_MASK)) {
0508         if (length < 8)
0509             goto err1;
0510 
0511         if (atomic_wr(ibwr)->remote_addr & 0x7)
0512             goto err1;
0513     }
0514 
0515     if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
0516              (length > sq->max_inline)))
0517         goto err1;
0518 
0519     return 0;
0520 
0521 err1:
0522     return -EINVAL;
0523 }
0524 
0525 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
0526              const struct ib_send_wr *ibwr)
0527 {
0528     wr->wr_id = ibwr->wr_id;
0529     wr->num_sge = ibwr->num_sge;
0530     wr->opcode = ibwr->opcode;
0531     wr->send_flags = ibwr->send_flags;
0532 
0533     if (qp_type(qp) == IB_QPT_UD ||
0534         qp_type(qp) == IB_QPT_GSI) {
0535         struct ib_ah *ibah = ud_wr(ibwr)->ah;
0536 
0537         wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
0538         wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
0539         wr->wr.ud.ah_num = to_rah(ibah)->ah_num;
0540         if (qp_type(qp) == IB_QPT_GSI)
0541             wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
0542         if (wr->opcode == IB_WR_SEND_WITH_IMM)
0543             wr->ex.imm_data = ibwr->ex.imm_data;
0544     } else {
0545         switch (wr->opcode) {
0546         case IB_WR_RDMA_WRITE_WITH_IMM:
0547             wr->ex.imm_data = ibwr->ex.imm_data;
0548             fallthrough;
0549         case IB_WR_RDMA_READ:
0550         case IB_WR_RDMA_WRITE:
0551             wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
0552             wr->wr.rdma.rkey    = rdma_wr(ibwr)->rkey;
0553             break;
0554         case IB_WR_SEND_WITH_IMM:
0555             wr->ex.imm_data = ibwr->ex.imm_data;
0556             break;
0557         case IB_WR_SEND_WITH_INV:
0558             wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
0559             break;
0560         case IB_WR_ATOMIC_CMP_AND_SWP:
0561         case IB_WR_ATOMIC_FETCH_AND_ADD:
0562             wr->wr.atomic.remote_addr =
0563                 atomic_wr(ibwr)->remote_addr;
0564             wr->wr.atomic.compare_add =
0565                 atomic_wr(ibwr)->compare_add;
0566             wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
0567             wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
0568             break;
0569         case IB_WR_LOCAL_INV:
0570             wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
0571         break;
0572         case IB_WR_REG_MR:
0573             wr->wr.reg.mr = reg_wr(ibwr)->mr;
0574             wr->wr.reg.key = reg_wr(ibwr)->key;
0575             wr->wr.reg.access = reg_wr(ibwr)->access;
0576         break;
0577         default:
0578             break;
0579         }
0580     }
0581 }
0582 
0583 static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
0584                     const struct ib_send_wr *ibwr)
0585 {
0586     struct ib_sge *sge = ibwr->sg_list;
0587     u8 *p = wqe->dma.inline_data;
0588     int i;
0589 
0590     for (i = 0; i < ibwr->num_sge; i++, sge++) {
0591         memcpy(p, (void *)(uintptr_t)sge->addr, sge->length);
0592         p += sge->length;
0593     }
0594 }
0595 
0596 static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
0597              unsigned int mask, unsigned int length,
0598              struct rxe_send_wqe *wqe)
0599 {
0600     int num_sge = ibwr->num_sge;
0601 
0602     init_send_wr(qp, &wqe->wr, ibwr);
0603 
0604     /* local operation */
0605     if (unlikely(mask & WR_LOCAL_OP_MASK)) {
0606         wqe->mask = mask;
0607         wqe->state = wqe_state_posted;
0608         return;
0609     }
0610 
0611     if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
0612         copy_inline_data_to_wqe(wqe, ibwr);
0613     else
0614         memcpy(wqe->dma.sge, ibwr->sg_list,
0615                num_sge * sizeof(struct ib_sge));
0616 
0617     wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
0618         mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
0619     wqe->mask       = mask;
0620     wqe->dma.length     = length;
0621     wqe->dma.resid      = length;
0622     wqe->dma.num_sge    = num_sge;
0623     wqe->dma.cur_sge    = 0;
0624     wqe->dma.sge_offset = 0;
0625     wqe->state      = wqe_state_posted;
0626     wqe->ssn        = atomic_add_return(1, &qp->ssn);
0627 }
0628 
0629 static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
0630              unsigned int mask, u32 length)
0631 {
0632     int err;
0633     struct rxe_sq *sq = &qp->sq;
0634     struct rxe_send_wqe *send_wqe;
0635     unsigned long flags;
0636     int full;
0637 
0638     err = validate_send_wr(qp, ibwr, mask, length);
0639     if (err)
0640         return err;
0641 
0642     spin_lock_irqsave(&qp->sq.sq_lock, flags);
0643 
0644     full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER);
0645 
0646     if (unlikely(full)) {
0647         spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
0648         return -ENOMEM;
0649     }
0650 
0651     send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER);
0652     init_send_wqe(qp, ibwr, mask, length, send_wqe);
0653 
0654     queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER);
0655 
0656     spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
0657 
0658     return 0;
0659 }
0660 
0661 static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
0662                 const struct ib_send_wr **bad_wr)
0663 {
0664     int err = 0;
0665     unsigned int mask;
0666     unsigned int length = 0;
0667     int i;
0668     struct ib_send_wr *next;
0669 
0670     while (wr) {
0671         mask = wr_opcode_mask(wr->opcode, qp);
0672         if (unlikely(!mask)) {
0673             err = -EINVAL;
0674             *bad_wr = wr;
0675             break;
0676         }
0677 
0678         if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
0679                  !(mask & WR_INLINE_MASK))) {
0680             err = -EINVAL;
0681             *bad_wr = wr;
0682             break;
0683         }
0684 
0685         next = wr->next;
0686 
0687         length = 0;
0688         for (i = 0; i < wr->num_sge; i++)
0689             length += wr->sg_list[i].length;
0690 
0691         err = post_one_send(qp, wr, mask, length);
0692 
0693         if (err) {
0694             *bad_wr = wr;
0695             break;
0696         }
0697         wr = next;
0698     }
0699 
0700     rxe_run_task(&qp->req.task, 1);
0701     if (unlikely(qp->req.state == QP_STATE_ERROR))
0702         rxe_run_task(&qp->comp.task, 1);
0703 
0704     return err;
0705 }
0706 
0707 static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
0708              const struct ib_send_wr **bad_wr)
0709 {
0710     struct rxe_qp *qp = to_rqp(ibqp);
0711 
0712     if (unlikely(!qp->valid)) {
0713         *bad_wr = wr;
0714         return -EINVAL;
0715     }
0716 
0717     if (unlikely(qp->req.state < QP_STATE_READY)) {
0718         *bad_wr = wr;
0719         return -EINVAL;
0720     }
0721 
0722     if (qp->is_user) {
0723         /* Utilize process context to do protocol processing */
0724         rxe_run_task(&qp->req.task, 0);
0725         return 0;
0726     } else
0727         return rxe_post_send_kernel(qp, wr, bad_wr);
0728 }
0729 
0730 static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
0731              const struct ib_recv_wr **bad_wr)
0732 {
0733     int err = 0;
0734     struct rxe_qp *qp = to_rqp(ibqp);
0735     struct rxe_rq *rq = &qp->rq;
0736     unsigned long flags;
0737 
0738     if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
0739         *bad_wr = wr;
0740         err = -EINVAL;
0741         goto err1;
0742     }
0743 
0744     if (unlikely(qp->srq)) {
0745         *bad_wr = wr;
0746         err = -EINVAL;
0747         goto err1;
0748     }
0749 
0750     spin_lock_irqsave(&rq->producer_lock, flags);
0751 
0752     while (wr) {
0753         err = post_one_recv(rq, wr);
0754         if (unlikely(err)) {
0755             *bad_wr = wr;
0756             break;
0757         }
0758         wr = wr->next;
0759     }
0760 
0761     spin_unlock_irqrestore(&rq->producer_lock, flags);
0762 
0763     if (qp->resp.state == QP_STATE_ERROR)
0764         rxe_run_task(&qp->resp.task, 1);
0765 
0766 err1:
0767     return err;
0768 }
0769 
0770 static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
0771              struct ib_udata *udata)
0772 {
0773     int err;
0774     struct ib_device *dev = ibcq->device;
0775     struct rxe_dev *rxe = to_rdev(dev);
0776     struct rxe_cq *cq = to_rcq(ibcq);
0777     struct rxe_create_cq_resp __user *uresp = NULL;
0778 
0779     if (udata) {
0780         if (udata->outlen < sizeof(*uresp))
0781             return -EINVAL;
0782         uresp = udata->outbuf;
0783     }
0784 
0785     if (attr->flags)
0786         return -EOPNOTSUPP;
0787 
0788     err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
0789     if (err)
0790         return err;
0791 
0792     err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
0793                    uresp);
0794     if (err)
0795         return err;
0796 
0797     return rxe_add_to_pool(&rxe->cq_pool, cq);
0798 }
0799 
0800 static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
0801 {
0802     struct rxe_cq *cq = to_rcq(ibcq);
0803 
0804     /* See IBA C11-17: The CI shall return an error if this Verb is
0805      * invoked while a Work Queue is still associated with the CQ.
0806      */
0807     if (atomic_read(&cq->num_wq))
0808         return -EINVAL;
0809 
0810     rxe_cq_disable(cq);
0811 
0812     rxe_cleanup(cq);
0813     return 0;
0814 }
0815 
0816 static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
0817 {
0818     int err;
0819     struct rxe_cq *cq = to_rcq(ibcq);
0820     struct rxe_dev *rxe = to_rdev(ibcq->device);
0821     struct rxe_resize_cq_resp __user *uresp = NULL;
0822 
0823     if (udata) {
0824         if (udata->outlen < sizeof(*uresp))
0825             return -EINVAL;
0826         uresp = udata->outbuf;
0827     }
0828 
0829     err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
0830     if (err)
0831         goto err1;
0832 
0833     err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
0834     if (err)
0835         goto err1;
0836 
0837     return 0;
0838 
0839 err1:
0840     return err;
0841 }
0842 
0843 static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
0844 {
0845     int i;
0846     struct rxe_cq *cq = to_rcq(ibcq);
0847     struct rxe_cqe *cqe;
0848     unsigned long flags;
0849 
0850     spin_lock_irqsave(&cq->cq_lock, flags);
0851     for (i = 0; i < num_entries; i++) {
0852         cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER);
0853         if (!cqe)
0854             break;
0855 
0856         memcpy(wc++, &cqe->ibwc, sizeof(*wc));
0857         queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER);
0858     }
0859     spin_unlock_irqrestore(&cq->cq_lock, flags);
0860 
0861     return i;
0862 }
0863 
0864 static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
0865 {
0866     struct rxe_cq *cq = to_rcq(ibcq);
0867     int count;
0868 
0869     count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER);
0870 
0871     return (count > wc_cnt) ? wc_cnt : count;
0872 }
0873 
0874 static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
0875 {
0876     struct rxe_cq *cq = to_rcq(ibcq);
0877     int ret = 0;
0878     int empty;
0879     unsigned long irq_flags;
0880 
0881     spin_lock_irqsave(&cq->cq_lock, irq_flags);
0882     if (cq->notify != IB_CQ_NEXT_COMP)
0883         cq->notify = flags & IB_CQ_SOLICITED_MASK;
0884 
0885     empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER);
0886 
0887     if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
0888         ret = 1;
0889 
0890     spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
0891 
0892     return ret;
0893 }
0894 
0895 static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
0896 {
0897     struct rxe_dev *rxe = to_rdev(ibpd->device);
0898     struct rxe_pd *pd = to_rpd(ibpd);
0899     struct rxe_mr *mr;
0900 
0901     mr = rxe_alloc(&rxe->mr_pool);
0902     if (!mr)
0903         return ERR_PTR(-ENOMEM);
0904 
0905     rxe_get(pd);
0906     rxe_mr_init_dma(pd, access, mr);
0907     rxe_finalize(mr);
0908 
0909     return &mr->ibmr;
0910 }
0911 
0912 static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
0913                      u64 start,
0914                      u64 length,
0915                      u64 iova,
0916                      int access, struct ib_udata *udata)
0917 {
0918     int err;
0919     struct rxe_dev *rxe = to_rdev(ibpd->device);
0920     struct rxe_pd *pd = to_rpd(ibpd);
0921     struct rxe_mr *mr;
0922 
0923     mr = rxe_alloc(&rxe->mr_pool);
0924     if (!mr) {
0925         err = -ENOMEM;
0926         goto err2;
0927     }
0928 
0929 
0930     rxe_get(pd);
0931 
0932     err = rxe_mr_init_user(pd, start, length, iova, access, mr);
0933     if (err)
0934         goto err3;
0935 
0936     rxe_finalize(mr);
0937 
0938     return &mr->ibmr;
0939 
0940 err3:
0941     rxe_put(pd);
0942     rxe_cleanup(mr);
0943 err2:
0944     return ERR_PTR(err);
0945 }
0946 
0947 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
0948                   u32 max_num_sg)
0949 {
0950     struct rxe_dev *rxe = to_rdev(ibpd->device);
0951     struct rxe_pd *pd = to_rpd(ibpd);
0952     struct rxe_mr *mr;
0953     int err;
0954 
0955     if (mr_type != IB_MR_TYPE_MEM_REG)
0956         return ERR_PTR(-EINVAL);
0957 
0958     mr = rxe_alloc(&rxe->mr_pool);
0959     if (!mr) {
0960         err = -ENOMEM;
0961         goto err1;
0962     }
0963 
0964     rxe_get(pd);
0965 
0966     err = rxe_mr_init_fast(pd, max_num_sg, mr);
0967     if (err)
0968         goto err2;
0969 
0970     rxe_finalize(mr);
0971 
0972     return &mr->ibmr;
0973 
0974 err2:
0975     rxe_put(pd);
0976     rxe_cleanup(mr);
0977 err1:
0978     return ERR_PTR(err);
0979 }
0980 
0981 static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
0982 {
0983     struct rxe_mr *mr = to_rmr(ibmr);
0984     struct rxe_map *map;
0985     struct rxe_phys_buf *buf;
0986 
0987     if (unlikely(mr->nbuf == mr->num_buf))
0988         return -ENOMEM;
0989 
0990     map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
0991     buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
0992 
0993     buf->addr = addr;
0994     buf->size = ibmr->page_size;
0995     mr->nbuf++;
0996 
0997     return 0;
0998 }
0999 
1000 static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1001              int sg_nents, unsigned int *sg_offset)
1002 {
1003     struct rxe_mr *mr = to_rmr(ibmr);
1004     int n;
1005 
1006     mr->nbuf = 0;
1007 
1008     n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1009 
1010     mr->va = ibmr->iova;
1011     mr->iova = ibmr->iova;
1012     mr->length = ibmr->length;
1013     mr->page_shift = ilog2(ibmr->page_size);
1014     mr->page_mask = ibmr->page_size - 1;
1015     mr->offset = mr->iova & mr->page_mask;
1016 
1017     return n;
1018 }
1019 
1020 static ssize_t parent_show(struct device *device,
1021                struct device_attribute *attr, char *buf)
1022 {
1023     struct rxe_dev *rxe =
1024         rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
1025 
1026     return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1));
1027 }
1028 
1029 static DEVICE_ATTR_RO(parent);
1030 
1031 static struct attribute *rxe_dev_attributes[] = {
1032     &dev_attr_parent.attr,
1033     NULL
1034 };
1035 
1036 static const struct attribute_group rxe_attr_group = {
1037     .attrs = rxe_dev_attributes,
1038 };
1039 
1040 static int rxe_enable_driver(struct ib_device *ib_dev)
1041 {
1042     struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
1043 
1044     rxe_set_port_state(rxe);
1045     dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
1046     return 0;
1047 }
1048 
1049 static const struct ib_device_ops rxe_dev_ops = {
1050     .owner = THIS_MODULE,
1051     .driver_id = RDMA_DRIVER_RXE,
1052     .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
1053 
1054     .alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats,
1055     .alloc_mr = rxe_alloc_mr,
1056     .alloc_mw = rxe_alloc_mw,
1057     .alloc_pd = rxe_alloc_pd,
1058     .alloc_ucontext = rxe_alloc_ucontext,
1059     .attach_mcast = rxe_attach_mcast,
1060     .create_ah = rxe_create_ah,
1061     .create_cq = rxe_create_cq,
1062     .create_qp = rxe_create_qp,
1063     .create_srq = rxe_create_srq,
1064     .create_user_ah = rxe_create_ah,
1065     .dealloc_driver = rxe_dealloc,
1066     .dealloc_mw = rxe_dealloc_mw,
1067     .dealloc_pd = rxe_dealloc_pd,
1068     .dealloc_ucontext = rxe_dealloc_ucontext,
1069     .dereg_mr = rxe_dereg_mr,
1070     .destroy_ah = rxe_destroy_ah,
1071     .destroy_cq = rxe_destroy_cq,
1072     .destroy_qp = rxe_destroy_qp,
1073     .destroy_srq = rxe_destroy_srq,
1074     .detach_mcast = rxe_detach_mcast,
1075     .device_group = &rxe_attr_group,
1076     .enable_driver = rxe_enable_driver,
1077     .get_dma_mr = rxe_get_dma_mr,
1078     .get_hw_stats = rxe_ib_get_hw_stats,
1079     .get_link_layer = rxe_get_link_layer,
1080     .get_port_immutable = rxe_port_immutable,
1081     .map_mr_sg = rxe_map_mr_sg,
1082     .mmap = rxe_mmap,
1083     .modify_ah = rxe_modify_ah,
1084     .modify_device = rxe_modify_device,
1085     .modify_port = rxe_modify_port,
1086     .modify_qp = rxe_modify_qp,
1087     .modify_srq = rxe_modify_srq,
1088     .peek_cq = rxe_peek_cq,
1089     .poll_cq = rxe_poll_cq,
1090     .post_recv = rxe_post_recv,
1091     .post_send = rxe_post_send,
1092     .post_srq_recv = rxe_post_srq_recv,
1093     .query_ah = rxe_query_ah,
1094     .query_device = rxe_query_device,
1095     .query_pkey = rxe_query_pkey,
1096     .query_port = rxe_query_port,
1097     .query_qp = rxe_query_qp,
1098     .query_srq = rxe_query_srq,
1099     .reg_user_mr = rxe_reg_user_mr,
1100     .req_notify_cq = rxe_req_notify_cq,
1101     .resize_cq = rxe_resize_cq,
1102 
1103     INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
1104     INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
1105     INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
1106     INIT_RDMA_OBJ_SIZE(ib_qp, rxe_qp, ibqp),
1107     INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
1108     INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
1109     INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
1110 };
1111 
1112 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
1113 {
1114     int err;
1115     struct ib_device *dev = &rxe->ib_dev;
1116 
1117     strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1118 
1119     dev->node_type = RDMA_NODE_IB_CA;
1120     dev->phys_port_cnt = 1;
1121     dev->num_comp_vectors = num_possible_cpus();
1122     dev->local_dma_lkey = 0;
1123     addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1124                 rxe->ndev->dev_addr);
1125 
1126     dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
1127                 BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
1128 
1129     ib_set_device_ops(dev, &rxe_dev_ops);
1130     err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
1131     if (err)
1132         return err;
1133 
1134     err = rxe_icrc_init(rxe);
1135     if (err)
1136         return err;
1137 
1138     err = ib_register_device(dev, ibdev_name, NULL);
1139     if (err)
1140         pr_warn("%s failed with error %d\n", __func__, err);
1141 
1142     /*
1143      * Note that rxe may be invalid at this point if another thread
1144      * unregistered it.
1145      */
1146     return err;
1147 }