0001
0002
0003
0004
0005
0006
0007 #include <linux/dma-mapping.h>
0008 #include <net/addrconf.h>
0009 #include <rdma/uverbs_ioctl.h>
0010
0011 #include "rxe.h"
0012 #include "rxe_queue.h"
0013 #include "rxe_hw_counters.h"
0014
0015 static int rxe_query_device(struct ib_device *dev,
0016 struct ib_device_attr *attr,
0017 struct ib_udata *uhw)
0018 {
0019 struct rxe_dev *rxe = to_rdev(dev);
0020
0021 if (uhw->inlen || uhw->outlen)
0022 return -EINVAL;
0023
0024 *attr = rxe->attr;
0025 return 0;
0026 }
0027
0028 static int rxe_query_port(struct ib_device *dev,
0029 u32 port_num, struct ib_port_attr *attr)
0030 {
0031 struct rxe_dev *rxe = to_rdev(dev);
0032 int rc;
0033
0034
0035 *attr = rxe->port.attr;
0036
0037 mutex_lock(&rxe->usdev_lock);
0038 rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
0039 &attr->active_width);
0040
0041 if (attr->state == IB_PORT_ACTIVE)
0042 attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
0043 else if (dev_get_flags(rxe->ndev) & IFF_UP)
0044 attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
0045 else
0046 attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
0047
0048 mutex_unlock(&rxe->usdev_lock);
0049
0050 return rc;
0051 }
0052
0053 static int rxe_query_pkey(struct ib_device *device,
0054 u32 port_num, u16 index, u16 *pkey)
0055 {
0056 if (index > 0)
0057 return -EINVAL;
0058
0059 *pkey = IB_DEFAULT_PKEY_FULL;
0060 return 0;
0061 }
0062
0063 static int rxe_modify_device(struct ib_device *dev,
0064 int mask, struct ib_device_modify *attr)
0065 {
0066 struct rxe_dev *rxe = to_rdev(dev);
0067
0068 if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
0069 IB_DEVICE_MODIFY_NODE_DESC))
0070 return -EOPNOTSUPP;
0071
0072 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
0073 rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid);
0074
0075 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
0076 memcpy(rxe->ib_dev.node_desc,
0077 attr->node_desc, sizeof(rxe->ib_dev.node_desc));
0078 }
0079
0080 return 0;
0081 }
0082
0083 static int rxe_modify_port(struct ib_device *dev,
0084 u32 port_num, int mask, struct ib_port_modify *attr)
0085 {
0086 struct rxe_dev *rxe = to_rdev(dev);
0087 struct rxe_port *port;
0088
0089 port = &rxe->port;
0090
0091 port->attr.port_cap_flags |= attr->set_port_cap_mask;
0092 port->attr.port_cap_flags &= ~attr->clr_port_cap_mask;
0093
0094 if (mask & IB_PORT_RESET_QKEY_CNTR)
0095 port->attr.qkey_viol_cntr = 0;
0096
0097 return 0;
0098 }
0099
0100 static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
0101 u32 port_num)
0102 {
0103 return IB_LINK_LAYER_ETHERNET;
0104 }
0105
0106 static int rxe_alloc_ucontext(struct ib_ucontext *ibuc, struct ib_udata *udata)
0107 {
0108 struct rxe_dev *rxe = to_rdev(ibuc->device);
0109 struct rxe_ucontext *uc = to_ruc(ibuc);
0110
0111 return rxe_add_to_pool(&rxe->uc_pool, uc);
0112 }
0113
0114 static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
0115 {
0116 struct rxe_ucontext *uc = to_ruc(ibuc);
0117
0118 rxe_cleanup(uc);
0119 }
0120
0121 static int rxe_port_immutable(struct ib_device *dev, u32 port_num,
0122 struct ib_port_immutable *immutable)
0123 {
0124 int err;
0125 struct ib_port_attr attr;
0126
0127 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
0128
0129 err = ib_query_port(dev, port_num, &attr);
0130 if (err)
0131 return err;
0132
0133 immutable->pkey_tbl_len = attr.pkey_tbl_len;
0134 immutable->gid_tbl_len = attr.gid_tbl_len;
0135 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
0136
0137 return 0;
0138 }
0139
0140 static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0141 {
0142 struct rxe_dev *rxe = to_rdev(ibpd->device);
0143 struct rxe_pd *pd = to_rpd(ibpd);
0144
0145 return rxe_add_to_pool(&rxe->pd_pool, pd);
0146 }
0147
0148 static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0149 {
0150 struct rxe_pd *pd = to_rpd(ibpd);
0151
0152 rxe_cleanup(pd);
0153 return 0;
0154 }
0155
0156 static int rxe_create_ah(struct ib_ah *ibah,
0157 struct rdma_ah_init_attr *init_attr,
0158 struct ib_udata *udata)
0159
0160 {
0161 struct rxe_dev *rxe = to_rdev(ibah->device);
0162 struct rxe_ah *ah = to_rah(ibah);
0163 struct rxe_create_ah_resp __user *uresp = NULL;
0164 int err;
0165
0166 if (udata) {
0167
0168 if (udata->outlen >= sizeof(*uresp))
0169 uresp = udata->outbuf;
0170 ah->is_user = true;
0171 } else {
0172 ah->is_user = false;
0173 }
0174
0175 err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
0176 if (err)
0177 return err;
0178
0179 err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
0180 init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
0181 if (err)
0182 return err;
0183
0184
0185 ah->ah_num = ah->elem.index;
0186
0187 if (uresp) {
0188
0189 err = copy_to_user(&uresp->ah_num, &ah->ah_num,
0190 sizeof(uresp->ah_num));
0191 if (err) {
0192 rxe_cleanup(ah);
0193 return -EFAULT;
0194 }
0195 } else if (ah->is_user) {
0196
0197 ah->ah_num = 0;
0198 }
0199
0200 rxe_init_av(init_attr->ah_attr, &ah->av);
0201 rxe_finalize(ah);
0202
0203 return 0;
0204 }
0205
0206 static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
0207 {
0208 int err;
0209 struct rxe_dev *rxe = to_rdev(ibah->device);
0210 struct rxe_ah *ah = to_rah(ibah);
0211
0212 err = rxe_av_chk_attr(rxe, attr);
0213 if (err)
0214 return err;
0215
0216 rxe_init_av(attr, &ah->av);
0217 return 0;
0218 }
0219
0220 static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
0221 {
0222 struct rxe_ah *ah = to_rah(ibah);
0223
0224 memset(attr, 0, sizeof(*attr));
0225 attr->type = ibah->type;
0226 rxe_av_to_attr(&ah->av, attr);
0227 return 0;
0228 }
0229
0230 static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
0231 {
0232 struct rxe_ah *ah = to_rah(ibah);
0233
0234 rxe_cleanup_ah(ah, flags & RDMA_DESTROY_AH_SLEEPABLE);
0235
0236 return 0;
0237 }
0238
0239 static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
0240 {
0241 int err;
0242 int i;
0243 u32 length;
0244 struct rxe_recv_wqe *recv_wqe;
0245 int num_sge = ibwr->num_sge;
0246 int full;
0247
0248 full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
0249 if (unlikely(full)) {
0250 err = -ENOMEM;
0251 goto err1;
0252 }
0253
0254 if (unlikely(num_sge > rq->max_sge)) {
0255 err = -EINVAL;
0256 goto err1;
0257 }
0258
0259 length = 0;
0260 for (i = 0; i < num_sge; i++)
0261 length += ibwr->sg_list[i].length;
0262
0263 recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER);
0264 recv_wqe->wr_id = ibwr->wr_id;
0265 recv_wqe->num_sge = num_sge;
0266
0267 memcpy(recv_wqe->dma.sge, ibwr->sg_list,
0268 num_sge * sizeof(struct ib_sge));
0269
0270 recv_wqe->dma.length = length;
0271 recv_wqe->dma.resid = length;
0272 recv_wqe->dma.num_sge = num_sge;
0273 recv_wqe->dma.cur_sge = 0;
0274 recv_wqe->dma.sge_offset = 0;
0275
0276 queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
0277
0278 return 0;
0279
0280 err1:
0281 return err;
0282 }
0283
0284 static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
0285 struct ib_udata *udata)
0286 {
0287 int err;
0288 struct rxe_dev *rxe = to_rdev(ibsrq->device);
0289 struct rxe_pd *pd = to_rpd(ibsrq->pd);
0290 struct rxe_srq *srq = to_rsrq(ibsrq);
0291 struct rxe_create_srq_resp __user *uresp = NULL;
0292
0293 if (udata) {
0294 if (udata->outlen < sizeof(*uresp))
0295 return -EINVAL;
0296 uresp = udata->outbuf;
0297 }
0298
0299 if (init->srq_type != IB_SRQT_BASIC)
0300 return -EOPNOTSUPP;
0301
0302 err = rxe_srq_chk_init(rxe, init);
0303 if (err)
0304 return err;
0305
0306 err = rxe_add_to_pool(&rxe->srq_pool, srq);
0307 if (err)
0308 return err;
0309
0310 rxe_get(pd);
0311 srq->pd = pd;
0312
0313 err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
0314 if (err)
0315 goto err_cleanup;
0316
0317 return 0;
0318
0319 err_cleanup:
0320 rxe_cleanup(srq);
0321
0322 return err;
0323 }
0324
0325 static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
0326 enum ib_srq_attr_mask mask,
0327 struct ib_udata *udata)
0328 {
0329 int err;
0330 struct rxe_srq *srq = to_rsrq(ibsrq);
0331 struct rxe_dev *rxe = to_rdev(ibsrq->device);
0332 struct rxe_modify_srq_cmd ucmd = {};
0333
0334 if (udata) {
0335 if (udata->inlen < sizeof(ucmd))
0336 return -EINVAL;
0337
0338 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
0339 if (err)
0340 return err;
0341 }
0342
0343 err = rxe_srq_chk_attr(rxe, srq, attr, mask);
0344 if (err)
0345 return err;
0346
0347 err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
0348 if (err)
0349 return err;
0350 return 0;
0351 }
0352
0353 static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
0354 {
0355 struct rxe_srq *srq = to_rsrq(ibsrq);
0356
0357 if (srq->error)
0358 return -EINVAL;
0359
0360 attr->max_wr = srq->rq.queue->buf->index_mask;
0361 attr->max_sge = srq->rq.max_sge;
0362 attr->srq_limit = srq->limit;
0363 return 0;
0364 }
0365
0366 static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
0367 {
0368 struct rxe_srq *srq = to_rsrq(ibsrq);
0369
0370 rxe_cleanup(srq);
0371 return 0;
0372 }
0373
0374 static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
0375 const struct ib_recv_wr **bad_wr)
0376 {
0377 int err = 0;
0378 struct rxe_srq *srq = to_rsrq(ibsrq);
0379 unsigned long flags;
0380
0381 spin_lock_irqsave(&srq->rq.producer_lock, flags);
0382
0383 while (wr) {
0384 err = post_one_recv(&srq->rq, wr);
0385 if (unlikely(err))
0386 break;
0387 wr = wr->next;
0388 }
0389
0390 spin_unlock_irqrestore(&srq->rq.producer_lock, flags);
0391
0392 if (err)
0393 *bad_wr = wr;
0394
0395 return err;
0396 }
0397
0398 static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init,
0399 struct ib_udata *udata)
0400 {
0401 int err;
0402 struct rxe_dev *rxe = to_rdev(ibqp->device);
0403 struct rxe_pd *pd = to_rpd(ibqp->pd);
0404 struct rxe_qp *qp = to_rqp(ibqp);
0405 struct rxe_create_qp_resp __user *uresp = NULL;
0406
0407 if (udata) {
0408 if (udata->outlen < sizeof(*uresp))
0409 return -EINVAL;
0410 uresp = udata->outbuf;
0411 }
0412
0413 if (init->create_flags)
0414 return -EOPNOTSUPP;
0415
0416 err = rxe_qp_chk_init(rxe, init);
0417 if (err)
0418 return err;
0419
0420 if (udata) {
0421 if (udata->inlen)
0422 return -EINVAL;
0423
0424 qp->is_user = true;
0425 } else {
0426 qp->is_user = false;
0427 }
0428
0429 err = rxe_add_to_pool(&rxe->qp_pool, qp);
0430 if (err)
0431 return err;
0432
0433 err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata);
0434 if (err)
0435 goto qp_init;
0436
0437 rxe_finalize(qp);
0438 return 0;
0439
0440 qp_init:
0441 rxe_cleanup(qp);
0442 return err;
0443 }
0444
0445 static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
0446 int mask, struct ib_udata *udata)
0447 {
0448 int err;
0449 struct rxe_dev *rxe = to_rdev(ibqp->device);
0450 struct rxe_qp *qp = to_rqp(ibqp);
0451
0452 if (mask & ~IB_QP_ATTR_STANDARD_BITS)
0453 return -EOPNOTSUPP;
0454
0455 err = rxe_qp_chk_attr(rxe, qp, attr, mask);
0456 if (err)
0457 goto err1;
0458
0459 err = rxe_qp_from_attr(qp, attr, mask, udata);
0460 if (err)
0461 goto err1;
0462
0463 if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
0464 qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
0465 qp->ibqp.qp_num,
0466 qp->attr.dest_qp_num);
0467
0468 return 0;
0469
0470 err1:
0471 return err;
0472 }
0473
0474 static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
0475 int mask, struct ib_qp_init_attr *init)
0476 {
0477 struct rxe_qp *qp = to_rqp(ibqp);
0478
0479 rxe_qp_to_init(qp, init);
0480 rxe_qp_to_attr(qp, attr, mask);
0481
0482 return 0;
0483 }
0484
0485 static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
0486 {
0487 struct rxe_qp *qp = to_rqp(ibqp);
0488 int ret;
0489
0490 ret = rxe_qp_chk_destroy(qp);
0491 if (ret)
0492 return ret;
0493
0494 rxe_cleanup(qp);
0495 return 0;
0496 }
0497
0498 static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
0499 unsigned int mask, unsigned int length)
0500 {
0501 int num_sge = ibwr->num_sge;
0502 struct rxe_sq *sq = &qp->sq;
0503
0504 if (unlikely(num_sge > sq->max_sge))
0505 goto err1;
0506
0507 if (unlikely(mask & WR_ATOMIC_MASK)) {
0508 if (length < 8)
0509 goto err1;
0510
0511 if (atomic_wr(ibwr)->remote_addr & 0x7)
0512 goto err1;
0513 }
0514
0515 if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
0516 (length > sq->max_inline)))
0517 goto err1;
0518
0519 return 0;
0520
0521 err1:
0522 return -EINVAL;
0523 }
0524
0525 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
0526 const struct ib_send_wr *ibwr)
0527 {
0528 wr->wr_id = ibwr->wr_id;
0529 wr->num_sge = ibwr->num_sge;
0530 wr->opcode = ibwr->opcode;
0531 wr->send_flags = ibwr->send_flags;
0532
0533 if (qp_type(qp) == IB_QPT_UD ||
0534 qp_type(qp) == IB_QPT_GSI) {
0535 struct ib_ah *ibah = ud_wr(ibwr)->ah;
0536
0537 wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn;
0538 wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey;
0539 wr->wr.ud.ah_num = to_rah(ibah)->ah_num;
0540 if (qp_type(qp) == IB_QPT_GSI)
0541 wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index;
0542 if (wr->opcode == IB_WR_SEND_WITH_IMM)
0543 wr->ex.imm_data = ibwr->ex.imm_data;
0544 } else {
0545 switch (wr->opcode) {
0546 case IB_WR_RDMA_WRITE_WITH_IMM:
0547 wr->ex.imm_data = ibwr->ex.imm_data;
0548 fallthrough;
0549 case IB_WR_RDMA_READ:
0550 case IB_WR_RDMA_WRITE:
0551 wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr;
0552 wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey;
0553 break;
0554 case IB_WR_SEND_WITH_IMM:
0555 wr->ex.imm_data = ibwr->ex.imm_data;
0556 break;
0557 case IB_WR_SEND_WITH_INV:
0558 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
0559 break;
0560 case IB_WR_ATOMIC_CMP_AND_SWP:
0561 case IB_WR_ATOMIC_FETCH_AND_ADD:
0562 wr->wr.atomic.remote_addr =
0563 atomic_wr(ibwr)->remote_addr;
0564 wr->wr.atomic.compare_add =
0565 atomic_wr(ibwr)->compare_add;
0566 wr->wr.atomic.swap = atomic_wr(ibwr)->swap;
0567 wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey;
0568 break;
0569 case IB_WR_LOCAL_INV:
0570 wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey;
0571 break;
0572 case IB_WR_REG_MR:
0573 wr->wr.reg.mr = reg_wr(ibwr)->mr;
0574 wr->wr.reg.key = reg_wr(ibwr)->key;
0575 wr->wr.reg.access = reg_wr(ibwr)->access;
0576 break;
0577 default:
0578 break;
0579 }
0580 }
0581 }
0582
0583 static void copy_inline_data_to_wqe(struct rxe_send_wqe *wqe,
0584 const struct ib_send_wr *ibwr)
0585 {
0586 struct ib_sge *sge = ibwr->sg_list;
0587 u8 *p = wqe->dma.inline_data;
0588 int i;
0589
0590 for (i = 0; i < ibwr->num_sge; i++, sge++) {
0591 memcpy(p, (void *)(uintptr_t)sge->addr, sge->length);
0592 p += sge->length;
0593 }
0594 }
0595
0596 static void init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
0597 unsigned int mask, unsigned int length,
0598 struct rxe_send_wqe *wqe)
0599 {
0600 int num_sge = ibwr->num_sge;
0601
0602 init_send_wr(qp, &wqe->wr, ibwr);
0603
0604
0605 if (unlikely(mask & WR_LOCAL_OP_MASK)) {
0606 wqe->mask = mask;
0607 wqe->state = wqe_state_posted;
0608 return;
0609 }
0610
0611 if (unlikely(ibwr->send_flags & IB_SEND_INLINE))
0612 copy_inline_data_to_wqe(wqe, ibwr);
0613 else
0614 memcpy(wqe->dma.sge, ibwr->sg_list,
0615 num_sge * sizeof(struct ib_sge));
0616
0617 wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr :
0618 mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0;
0619 wqe->mask = mask;
0620 wqe->dma.length = length;
0621 wqe->dma.resid = length;
0622 wqe->dma.num_sge = num_sge;
0623 wqe->dma.cur_sge = 0;
0624 wqe->dma.sge_offset = 0;
0625 wqe->state = wqe_state_posted;
0626 wqe->ssn = atomic_add_return(1, &qp->ssn);
0627 }
0628
0629 static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
0630 unsigned int mask, u32 length)
0631 {
0632 int err;
0633 struct rxe_sq *sq = &qp->sq;
0634 struct rxe_send_wqe *send_wqe;
0635 unsigned long flags;
0636 int full;
0637
0638 err = validate_send_wr(qp, ibwr, mask, length);
0639 if (err)
0640 return err;
0641
0642 spin_lock_irqsave(&qp->sq.sq_lock, flags);
0643
0644 full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER);
0645
0646 if (unlikely(full)) {
0647 spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
0648 return -ENOMEM;
0649 }
0650
0651 send_wqe = queue_producer_addr(sq->queue, QUEUE_TYPE_TO_DRIVER);
0652 init_send_wqe(qp, ibwr, mask, length, send_wqe);
0653
0654 queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER);
0655
0656 spin_unlock_irqrestore(&qp->sq.sq_lock, flags);
0657
0658 return 0;
0659 }
0660
0661 static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
0662 const struct ib_send_wr **bad_wr)
0663 {
0664 int err = 0;
0665 unsigned int mask;
0666 unsigned int length = 0;
0667 int i;
0668 struct ib_send_wr *next;
0669
0670 while (wr) {
0671 mask = wr_opcode_mask(wr->opcode, qp);
0672 if (unlikely(!mask)) {
0673 err = -EINVAL;
0674 *bad_wr = wr;
0675 break;
0676 }
0677
0678 if (unlikely((wr->send_flags & IB_SEND_INLINE) &&
0679 !(mask & WR_INLINE_MASK))) {
0680 err = -EINVAL;
0681 *bad_wr = wr;
0682 break;
0683 }
0684
0685 next = wr->next;
0686
0687 length = 0;
0688 for (i = 0; i < wr->num_sge; i++)
0689 length += wr->sg_list[i].length;
0690
0691 err = post_one_send(qp, wr, mask, length);
0692
0693 if (err) {
0694 *bad_wr = wr;
0695 break;
0696 }
0697 wr = next;
0698 }
0699
0700 rxe_run_task(&qp->req.task, 1);
0701 if (unlikely(qp->req.state == QP_STATE_ERROR))
0702 rxe_run_task(&qp->comp.task, 1);
0703
0704 return err;
0705 }
0706
0707 static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
0708 const struct ib_send_wr **bad_wr)
0709 {
0710 struct rxe_qp *qp = to_rqp(ibqp);
0711
0712 if (unlikely(!qp->valid)) {
0713 *bad_wr = wr;
0714 return -EINVAL;
0715 }
0716
0717 if (unlikely(qp->req.state < QP_STATE_READY)) {
0718 *bad_wr = wr;
0719 return -EINVAL;
0720 }
0721
0722 if (qp->is_user) {
0723
0724 rxe_run_task(&qp->req.task, 0);
0725 return 0;
0726 } else
0727 return rxe_post_send_kernel(qp, wr, bad_wr);
0728 }
0729
0730 static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
0731 const struct ib_recv_wr **bad_wr)
0732 {
0733 int err = 0;
0734 struct rxe_qp *qp = to_rqp(ibqp);
0735 struct rxe_rq *rq = &qp->rq;
0736 unsigned long flags;
0737
0738 if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
0739 *bad_wr = wr;
0740 err = -EINVAL;
0741 goto err1;
0742 }
0743
0744 if (unlikely(qp->srq)) {
0745 *bad_wr = wr;
0746 err = -EINVAL;
0747 goto err1;
0748 }
0749
0750 spin_lock_irqsave(&rq->producer_lock, flags);
0751
0752 while (wr) {
0753 err = post_one_recv(rq, wr);
0754 if (unlikely(err)) {
0755 *bad_wr = wr;
0756 break;
0757 }
0758 wr = wr->next;
0759 }
0760
0761 spin_unlock_irqrestore(&rq->producer_lock, flags);
0762
0763 if (qp->resp.state == QP_STATE_ERROR)
0764 rxe_run_task(&qp->resp.task, 1);
0765
0766 err1:
0767 return err;
0768 }
0769
0770 static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
0771 struct ib_udata *udata)
0772 {
0773 int err;
0774 struct ib_device *dev = ibcq->device;
0775 struct rxe_dev *rxe = to_rdev(dev);
0776 struct rxe_cq *cq = to_rcq(ibcq);
0777 struct rxe_create_cq_resp __user *uresp = NULL;
0778
0779 if (udata) {
0780 if (udata->outlen < sizeof(*uresp))
0781 return -EINVAL;
0782 uresp = udata->outbuf;
0783 }
0784
0785 if (attr->flags)
0786 return -EOPNOTSUPP;
0787
0788 err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector);
0789 if (err)
0790 return err;
0791
0792 err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata,
0793 uresp);
0794 if (err)
0795 return err;
0796
0797 return rxe_add_to_pool(&rxe->cq_pool, cq);
0798 }
0799
0800 static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
0801 {
0802 struct rxe_cq *cq = to_rcq(ibcq);
0803
0804
0805
0806
0807 if (atomic_read(&cq->num_wq))
0808 return -EINVAL;
0809
0810 rxe_cq_disable(cq);
0811
0812 rxe_cleanup(cq);
0813 return 0;
0814 }
0815
0816 static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
0817 {
0818 int err;
0819 struct rxe_cq *cq = to_rcq(ibcq);
0820 struct rxe_dev *rxe = to_rdev(ibcq->device);
0821 struct rxe_resize_cq_resp __user *uresp = NULL;
0822
0823 if (udata) {
0824 if (udata->outlen < sizeof(*uresp))
0825 return -EINVAL;
0826 uresp = udata->outbuf;
0827 }
0828
0829 err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
0830 if (err)
0831 goto err1;
0832
0833 err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
0834 if (err)
0835 goto err1;
0836
0837 return 0;
0838
0839 err1:
0840 return err;
0841 }
0842
0843 static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
0844 {
0845 int i;
0846 struct rxe_cq *cq = to_rcq(ibcq);
0847 struct rxe_cqe *cqe;
0848 unsigned long flags;
0849
0850 spin_lock_irqsave(&cq->cq_lock, flags);
0851 for (i = 0; i < num_entries; i++) {
0852 cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER);
0853 if (!cqe)
0854 break;
0855
0856 memcpy(wc++, &cqe->ibwc, sizeof(*wc));
0857 queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER);
0858 }
0859 spin_unlock_irqrestore(&cq->cq_lock, flags);
0860
0861 return i;
0862 }
0863
0864 static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
0865 {
0866 struct rxe_cq *cq = to_rcq(ibcq);
0867 int count;
0868
0869 count = queue_count(cq->queue, QUEUE_TYPE_FROM_DRIVER);
0870
0871 return (count > wc_cnt) ? wc_cnt : count;
0872 }
0873
0874 static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
0875 {
0876 struct rxe_cq *cq = to_rcq(ibcq);
0877 int ret = 0;
0878 int empty;
0879 unsigned long irq_flags;
0880
0881 spin_lock_irqsave(&cq->cq_lock, irq_flags);
0882 if (cq->notify != IB_CQ_NEXT_COMP)
0883 cq->notify = flags & IB_CQ_SOLICITED_MASK;
0884
0885 empty = queue_empty(cq->queue, QUEUE_TYPE_FROM_DRIVER);
0886
0887 if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty)
0888 ret = 1;
0889
0890 spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
0891
0892 return ret;
0893 }
0894
0895 static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
0896 {
0897 struct rxe_dev *rxe = to_rdev(ibpd->device);
0898 struct rxe_pd *pd = to_rpd(ibpd);
0899 struct rxe_mr *mr;
0900
0901 mr = rxe_alloc(&rxe->mr_pool);
0902 if (!mr)
0903 return ERR_PTR(-ENOMEM);
0904
0905 rxe_get(pd);
0906 rxe_mr_init_dma(pd, access, mr);
0907 rxe_finalize(mr);
0908
0909 return &mr->ibmr;
0910 }
0911
0912 static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
0913 u64 start,
0914 u64 length,
0915 u64 iova,
0916 int access, struct ib_udata *udata)
0917 {
0918 int err;
0919 struct rxe_dev *rxe = to_rdev(ibpd->device);
0920 struct rxe_pd *pd = to_rpd(ibpd);
0921 struct rxe_mr *mr;
0922
0923 mr = rxe_alloc(&rxe->mr_pool);
0924 if (!mr) {
0925 err = -ENOMEM;
0926 goto err2;
0927 }
0928
0929
0930 rxe_get(pd);
0931
0932 err = rxe_mr_init_user(pd, start, length, iova, access, mr);
0933 if (err)
0934 goto err3;
0935
0936 rxe_finalize(mr);
0937
0938 return &mr->ibmr;
0939
0940 err3:
0941 rxe_put(pd);
0942 rxe_cleanup(mr);
0943 err2:
0944 return ERR_PTR(err);
0945 }
0946
0947 static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
0948 u32 max_num_sg)
0949 {
0950 struct rxe_dev *rxe = to_rdev(ibpd->device);
0951 struct rxe_pd *pd = to_rpd(ibpd);
0952 struct rxe_mr *mr;
0953 int err;
0954
0955 if (mr_type != IB_MR_TYPE_MEM_REG)
0956 return ERR_PTR(-EINVAL);
0957
0958 mr = rxe_alloc(&rxe->mr_pool);
0959 if (!mr) {
0960 err = -ENOMEM;
0961 goto err1;
0962 }
0963
0964 rxe_get(pd);
0965
0966 err = rxe_mr_init_fast(pd, max_num_sg, mr);
0967 if (err)
0968 goto err2;
0969
0970 rxe_finalize(mr);
0971
0972 return &mr->ibmr;
0973
0974 err2:
0975 rxe_put(pd);
0976 rxe_cleanup(mr);
0977 err1:
0978 return ERR_PTR(err);
0979 }
0980
0981 static int rxe_set_page(struct ib_mr *ibmr, u64 addr)
0982 {
0983 struct rxe_mr *mr = to_rmr(ibmr);
0984 struct rxe_map *map;
0985 struct rxe_phys_buf *buf;
0986
0987 if (unlikely(mr->nbuf == mr->num_buf))
0988 return -ENOMEM;
0989
0990 map = mr->map[mr->nbuf / RXE_BUF_PER_MAP];
0991 buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP];
0992
0993 buf->addr = addr;
0994 buf->size = ibmr->page_size;
0995 mr->nbuf++;
0996
0997 return 0;
0998 }
0999
1000 static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1001 int sg_nents, unsigned int *sg_offset)
1002 {
1003 struct rxe_mr *mr = to_rmr(ibmr);
1004 int n;
1005
1006 mr->nbuf = 0;
1007
1008 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page);
1009
1010 mr->va = ibmr->iova;
1011 mr->iova = ibmr->iova;
1012 mr->length = ibmr->length;
1013 mr->page_shift = ilog2(ibmr->page_size);
1014 mr->page_mask = ibmr->page_size - 1;
1015 mr->offset = mr->iova & mr->page_mask;
1016
1017 return n;
1018 }
1019
1020 static ssize_t parent_show(struct device *device,
1021 struct device_attribute *attr, char *buf)
1022 {
1023 struct rxe_dev *rxe =
1024 rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
1025
1026 return sysfs_emit(buf, "%s\n", rxe_parent_name(rxe, 1));
1027 }
1028
1029 static DEVICE_ATTR_RO(parent);
1030
1031 static struct attribute *rxe_dev_attributes[] = {
1032 &dev_attr_parent.attr,
1033 NULL
1034 };
1035
1036 static const struct attribute_group rxe_attr_group = {
1037 .attrs = rxe_dev_attributes,
1038 };
1039
1040 static int rxe_enable_driver(struct ib_device *ib_dev)
1041 {
1042 struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
1043
1044 rxe_set_port_state(rxe);
1045 dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
1046 return 0;
1047 }
1048
1049 static const struct ib_device_ops rxe_dev_ops = {
1050 .owner = THIS_MODULE,
1051 .driver_id = RDMA_DRIVER_RXE,
1052 .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION,
1053
1054 .alloc_hw_port_stats = rxe_ib_alloc_hw_port_stats,
1055 .alloc_mr = rxe_alloc_mr,
1056 .alloc_mw = rxe_alloc_mw,
1057 .alloc_pd = rxe_alloc_pd,
1058 .alloc_ucontext = rxe_alloc_ucontext,
1059 .attach_mcast = rxe_attach_mcast,
1060 .create_ah = rxe_create_ah,
1061 .create_cq = rxe_create_cq,
1062 .create_qp = rxe_create_qp,
1063 .create_srq = rxe_create_srq,
1064 .create_user_ah = rxe_create_ah,
1065 .dealloc_driver = rxe_dealloc,
1066 .dealloc_mw = rxe_dealloc_mw,
1067 .dealloc_pd = rxe_dealloc_pd,
1068 .dealloc_ucontext = rxe_dealloc_ucontext,
1069 .dereg_mr = rxe_dereg_mr,
1070 .destroy_ah = rxe_destroy_ah,
1071 .destroy_cq = rxe_destroy_cq,
1072 .destroy_qp = rxe_destroy_qp,
1073 .destroy_srq = rxe_destroy_srq,
1074 .detach_mcast = rxe_detach_mcast,
1075 .device_group = &rxe_attr_group,
1076 .enable_driver = rxe_enable_driver,
1077 .get_dma_mr = rxe_get_dma_mr,
1078 .get_hw_stats = rxe_ib_get_hw_stats,
1079 .get_link_layer = rxe_get_link_layer,
1080 .get_port_immutable = rxe_port_immutable,
1081 .map_mr_sg = rxe_map_mr_sg,
1082 .mmap = rxe_mmap,
1083 .modify_ah = rxe_modify_ah,
1084 .modify_device = rxe_modify_device,
1085 .modify_port = rxe_modify_port,
1086 .modify_qp = rxe_modify_qp,
1087 .modify_srq = rxe_modify_srq,
1088 .peek_cq = rxe_peek_cq,
1089 .poll_cq = rxe_poll_cq,
1090 .post_recv = rxe_post_recv,
1091 .post_send = rxe_post_send,
1092 .post_srq_recv = rxe_post_srq_recv,
1093 .query_ah = rxe_query_ah,
1094 .query_device = rxe_query_device,
1095 .query_pkey = rxe_query_pkey,
1096 .query_port = rxe_query_port,
1097 .query_qp = rxe_query_qp,
1098 .query_srq = rxe_query_srq,
1099 .reg_user_mr = rxe_reg_user_mr,
1100 .req_notify_cq = rxe_req_notify_cq,
1101 .resize_cq = rxe_resize_cq,
1102
1103 INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah),
1104 INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq),
1105 INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
1106 INIT_RDMA_OBJ_SIZE(ib_qp, rxe_qp, ibqp),
1107 INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq),
1108 INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
1109 INIT_RDMA_OBJ_SIZE(ib_mw, rxe_mw, ibmw),
1110 };
1111
1112 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
1113 {
1114 int err;
1115 struct ib_device *dev = &rxe->ib_dev;
1116
1117 strscpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1118
1119 dev->node_type = RDMA_NODE_IB_CA;
1120 dev->phys_port_cnt = 1;
1121 dev->num_comp_vectors = num_possible_cpus();
1122 dev->local_dma_lkey = 0;
1123 addrconf_addr_eui48((unsigned char *)&dev->node_guid,
1124 rxe->ndev->dev_addr);
1125
1126 dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
1127 BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
1128
1129 ib_set_device_ops(dev, &rxe_dev_ops);
1130 err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1);
1131 if (err)
1132 return err;
1133
1134 err = rxe_icrc_init(rxe);
1135 if (err)
1136 return err;
1137
1138 err = ib_register_device(dev, ibdev_name, NULL);
1139 if (err)
1140 pr_warn("%s failed with error %d\n", __func__, err);
1141
1142
1143
1144
1145
1146 return err;
1147 }