0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046 #include <linux/errno.h>
0047 #include <linux/inetdevice.h>
0048 #include <linux/init.h>
0049 #include <linux/module.h>
0050 #include <linux/slab.h>
0051 #include <rdma/ib_addr.h>
0052 #include <rdma/ib_smi.h>
0053 #include <rdma/ib_user_verbs.h>
0054 #include <net/addrconf.h>
0055
0056 #include "pvrdma.h"
0057
0058 #define DRV_NAME "vmw_pvrdma"
0059 #define DRV_VERSION "1.0.1.0-k"
0060
0061 static DEFINE_MUTEX(pvrdma_device_list_lock);
0062 static LIST_HEAD(pvrdma_device_list);
0063 static struct workqueue_struct *event_wq;
0064
0065 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
0066 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
0067
0068 static ssize_t hca_type_show(struct device *device,
0069 struct device_attribute *attr, char *buf)
0070 {
0071 return sysfs_emit(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
0072 }
0073 static DEVICE_ATTR_RO(hca_type);
0074
0075 static ssize_t hw_rev_show(struct device *device,
0076 struct device_attribute *attr, char *buf)
0077 {
0078 return sysfs_emit(buf, "%d\n", PVRDMA_REV_ID);
0079 }
0080 static DEVICE_ATTR_RO(hw_rev);
0081
0082 static ssize_t board_id_show(struct device *device,
0083 struct device_attribute *attr, char *buf)
0084 {
0085 return sysfs_emit(buf, "%d\n", PVRDMA_BOARD_ID);
0086 }
0087 static DEVICE_ATTR_RO(board_id);
0088
0089 static struct attribute *pvrdma_class_attributes[] = {
0090 &dev_attr_hw_rev.attr,
0091 &dev_attr_hca_type.attr,
0092 &dev_attr_board_id.attr,
0093 NULL,
0094 };
0095
0096 static const struct attribute_group pvrdma_attr_group = {
0097 .attrs = pvrdma_class_attributes,
0098 };
0099
0100 static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str)
0101 {
0102 struct pvrdma_dev *dev =
0103 container_of(device, struct pvrdma_dev, ib_dev);
0104 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d\n",
0105 (int) (dev->dsr->caps.fw_ver >> 32),
0106 (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff,
0107 (int) dev->dsr->caps.fw_ver & 0xffff);
0108 }
0109
0110 static int pvrdma_init_device(struct pvrdma_dev *dev)
0111 {
0112
0113 spin_lock_init(&dev->cmd_lock);
0114 sema_init(&dev->cmd_sema, 1);
0115 atomic_set(&dev->num_qps, 0);
0116 atomic_set(&dev->num_srqs, 0);
0117 atomic_set(&dev->num_cqs, 0);
0118 atomic_set(&dev->num_pds, 0);
0119 atomic_set(&dev->num_ahs, 0);
0120
0121 return 0;
0122 }
0123
0124 static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num,
0125 struct ib_port_immutable *immutable)
0126 {
0127 struct pvrdma_dev *dev = to_vdev(ibdev);
0128 struct ib_port_attr attr;
0129 int err;
0130
0131 if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V1)
0132 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE;
0133 else if (dev->dsr->caps.gid_types == PVRDMA_GID_TYPE_FLAG_ROCE_V2)
0134 immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
0135
0136 err = ib_query_port(ibdev, port_num, &attr);
0137 if (err)
0138 return err;
0139
0140 immutable->pkey_tbl_len = attr.pkey_tbl_len;
0141 immutable->gid_tbl_len = attr.gid_tbl_len;
0142 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
0143 return 0;
0144 }
0145
0146 static const struct ib_device_ops pvrdma_dev_ops = {
0147 .owner = THIS_MODULE,
0148 .driver_id = RDMA_DRIVER_VMW_PVRDMA,
0149 .uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION,
0150
0151 .add_gid = pvrdma_add_gid,
0152 .alloc_mr = pvrdma_alloc_mr,
0153 .alloc_pd = pvrdma_alloc_pd,
0154 .alloc_ucontext = pvrdma_alloc_ucontext,
0155 .create_ah = pvrdma_create_ah,
0156 .create_cq = pvrdma_create_cq,
0157 .create_qp = pvrdma_create_qp,
0158 .dealloc_pd = pvrdma_dealloc_pd,
0159 .dealloc_ucontext = pvrdma_dealloc_ucontext,
0160 .del_gid = pvrdma_del_gid,
0161 .dereg_mr = pvrdma_dereg_mr,
0162 .destroy_ah = pvrdma_destroy_ah,
0163 .destroy_cq = pvrdma_destroy_cq,
0164 .destroy_qp = pvrdma_destroy_qp,
0165 .device_group = &pvrdma_attr_group,
0166 .get_dev_fw_str = pvrdma_get_fw_ver_str,
0167 .get_dma_mr = pvrdma_get_dma_mr,
0168 .get_link_layer = pvrdma_port_link_layer,
0169 .get_port_immutable = pvrdma_port_immutable,
0170 .map_mr_sg = pvrdma_map_mr_sg,
0171 .mmap = pvrdma_mmap,
0172 .modify_port = pvrdma_modify_port,
0173 .modify_qp = pvrdma_modify_qp,
0174 .poll_cq = pvrdma_poll_cq,
0175 .post_recv = pvrdma_post_recv,
0176 .post_send = pvrdma_post_send,
0177 .query_device = pvrdma_query_device,
0178 .query_gid = pvrdma_query_gid,
0179 .query_pkey = pvrdma_query_pkey,
0180 .query_port = pvrdma_query_port,
0181 .query_qp = pvrdma_query_qp,
0182 .reg_user_mr = pvrdma_reg_user_mr,
0183 .req_notify_cq = pvrdma_req_notify_cq,
0184
0185 INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah),
0186 INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq),
0187 INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd),
0188 INIT_RDMA_OBJ_SIZE(ib_qp, pvrdma_qp, ibqp),
0189 INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext),
0190 };
0191
0192 static const struct ib_device_ops pvrdma_dev_srq_ops = {
0193 .create_srq = pvrdma_create_srq,
0194 .destroy_srq = pvrdma_destroy_srq,
0195 .modify_srq = pvrdma_modify_srq,
0196 .query_srq = pvrdma_query_srq,
0197
0198 INIT_RDMA_OBJ_SIZE(ib_srq, pvrdma_srq, ibsrq),
0199 };
0200
0201 static int pvrdma_register_device(struct pvrdma_dev *dev)
0202 {
0203 int ret = -1;
0204
0205 dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
0206 dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
0207 dev->flags = 0;
0208 dev->ib_dev.num_comp_vectors = 1;
0209 dev->ib_dev.dev.parent = &dev->pdev->dev;
0210
0211 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
0212 dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
0213
0214 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops);
0215
0216 mutex_init(&dev->port_mutex);
0217 spin_lock_init(&dev->desc_lock);
0218
0219 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *),
0220 GFP_KERNEL);
0221 if (!dev->cq_tbl)
0222 return ret;
0223 spin_lock_init(&dev->cq_tbl_lock);
0224
0225 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *),
0226 GFP_KERNEL);
0227 if (!dev->qp_tbl)
0228 goto err_cq_free;
0229 spin_lock_init(&dev->qp_tbl_lock);
0230
0231
0232 if (dev->dsr->caps.max_srq) {
0233 ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops);
0234
0235 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq,
0236 sizeof(struct pvrdma_srq *),
0237 GFP_KERNEL);
0238 if (!dev->srq_tbl)
0239 goto err_qp_free;
0240 }
0241 ret = ib_device_set_netdev(&dev->ib_dev, dev->netdev, 1);
0242 if (ret)
0243 goto err_srq_free;
0244 spin_lock_init(&dev->srq_tbl_lock);
0245
0246 ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev);
0247 if (ret)
0248 goto err_srq_free;
0249
0250 dev->ib_active = true;
0251
0252 return 0;
0253
0254 err_srq_free:
0255 kfree(dev->srq_tbl);
0256 err_qp_free:
0257 kfree(dev->qp_tbl);
0258 err_cq_free:
0259 kfree(dev->cq_tbl);
0260
0261 return ret;
0262 }
0263
0264 static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id)
0265 {
0266 u32 icr = PVRDMA_INTR_CAUSE_RESPONSE;
0267 struct pvrdma_dev *dev = dev_id;
0268
0269 dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
0270
0271 if (!dev->pdev->msix_enabled) {
0272
0273 icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
0274 if (icr == 0)
0275 return IRQ_NONE;
0276 }
0277
0278 if (icr == PVRDMA_INTR_CAUSE_RESPONSE)
0279 complete(&dev->cmd_done);
0280
0281 return IRQ_HANDLED;
0282 }
0283
0284 static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
0285 {
0286 struct pvrdma_qp *qp;
0287 unsigned long flags;
0288
0289 spin_lock_irqsave(&dev->qp_tbl_lock, flags);
0290 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp];
0291 if (qp)
0292 refcount_inc(&qp->refcnt);
0293 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
0294
0295 if (qp && qp->ibqp.event_handler) {
0296 struct ib_qp *ibqp = &qp->ibqp;
0297 struct ib_event e;
0298
0299 e.device = ibqp->device;
0300 e.element.qp = ibqp;
0301 e.event = type;
0302 ibqp->event_handler(&e, ibqp->qp_context);
0303 }
0304 if (qp) {
0305 if (refcount_dec_and_test(&qp->refcnt))
0306 complete(&qp->free);
0307 }
0308 }
0309
0310 static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
0311 {
0312 struct pvrdma_cq *cq;
0313 unsigned long flags;
0314
0315 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
0316 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq];
0317 if (cq)
0318 refcount_inc(&cq->refcnt);
0319 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
0320
0321 if (cq && cq->ibcq.event_handler) {
0322 struct ib_cq *ibcq = &cq->ibcq;
0323 struct ib_event e;
0324
0325 e.device = ibcq->device;
0326 e.element.cq = ibcq;
0327 e.event = type;
0328 ibcq->event_handler(&e, ibcq->cq_context);
0329 }
0330 if (cq) {
0331 if (refcount_dec_and_test(&cq->refcnt))
0332 complete(&cq->free);
0333 }
0334 }
0335
0336 static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
0337 {
0338 struct pvrdma_srq *srq;
0339 unsigned long flags;
0340
0341 spin_lock_irqsave(&dev->srq_tbl_lock, flags);
0342 if (dev->srq_tbl)
0343 srq = dev->srq_tbl[srqn % dev->dsr->caps.max_srq];
0344 else
0345 srq = NULL;
0346 if (srq)
0347 refcount_inc(&srq->refcnt);
0348 spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
0349
0350 if (srq && srq->ibsrq.event_handler) {
0351 struct ib_srq *ibsrq = &srq->ibsrq;
0352 struct ib_event e;
0353
0354 e.device = ibsrq->device;
0355 e.element.srq = ibsrq;
0356 e.event = type;
0357 ibsrq->event_handler(&e, ibsrq->srq_context);
0358 }
0359 if (srq) {
0360 if (refcount_dec_and_test(&srq->refcnt))
0361 complete(&srq->free);
0362 }
0363 }
0364
0365 static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
0366 enum ib_event_type event)
0367 {
0368 struct ib_event ib_event;
0369
0370 memset(&ib_event, 0, sizeof(ib_event));
0371 ib_event.device = &dev->ib_dev;
0372 ib_event.element.port_num = port;
0373 ib_event.event = event;
0374 ib_dispatch_event(&ib_event);
0375 }
0376
0377 static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type)
0378 {
0379 if (port < 1 || port > dev->dsr->caps.phys_port_cnt) {
0380 dev_warn(&dev->pdev->dev, "event on port %d\n", port);
0381 return;
0382 }
0383
0384 pvrdma_dispatch_event(dev, port, type);
0385 }
0386
0387 static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i)
0388 {
0389 return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr(
0390 &dev->async_pdir,
0391 PAGE_SIZE +
0392 sizeof(struct pvrdma_eqe) * i);
0393 }
0394
0395 static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id)
0396 {
0397 struct pvrdma_dev *dev = dev_id;
0398 struct pvrdma_ring *ring = &dev->async_ring_state->rx;
0399 int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) *
0400 PAGE_SIZE / sizeof(struct pvrdma_eqe);
0401 unsigned int head;
0402
0403 dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n");
0404
0405
0406
0407
0408
0409 if (!dev->ib_active)
0410 return IRQ_HANDLED;
0411
0412 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
0413 struct pvrdma_eqe *eqe;
0414
0415 eqe = get_eqe(dev, head);
0416
0417 switch (eqe->type) {
0418 case PVRDMA_EVENT_QP_FATAL:
0419 case PVRDMA_EVENT_QP_REQ_ERR:
0420 case PVRDMA_EVENT_QP_ACCESS_ERR:
0421 case PVRDMA_EVENT_COMM_EST:
0422 case PVRDMA_EVENT_SQ_DRAINED:
0423 case PVRDMA_EVENT_PATH_MIG:
0424 case PVRDMA_EVENT_PATH_MIG_ERR:
0425 case PVRDMA_EVENT_QP_LAST_WQE_REACHED:
0426 pvrdma_qp_event(dev, eqe->info, eqe->type);
0427 break;
0428
0429 case PVRDMA_EVENT_CQ_ERR:
0430 pvrdma_cq_event(dev, eqe->info, eqe->type);
0431 break;
0432
0433 case PVRDMA_EVENT_SRQ_ERR:
0434 case PVRDMA_EVENT_SRQ_LIMIT_REACHED:
0435 pvrdma_srq_event(dev, eqe->info, eqe->type);
0436 break;
0437
0438 case PVRDMA_EVENT_PORT_ACTIVE:
0439 case PVRDMA_EVENT_PORT_ERR:
0440 case PVRDMA_EVENT_LID_CHANGE:
0441 case PVRDMA_EVENT_PKEY_CHANGE:
0442 case PVRDMA_EVENT_SM_CHANGE:
0443 case PVRDMA_EVENT_CLIENT_REREGISTER:
0444 case PVRDMA_EVENT_GID_CHANGE:
0445 pvrdma_dev_event(dev, eqe->info, eqe->type);
0446 break;
0447
0448 case PVRDMA_EVENT_DEVICE_FATAL:
0449 pvrdma_dev_event(dev, 1, eqe->type);
0450 break;
0451
0452 default:
0453 break;
0454 }
0455
0456 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
0457 }
0458
0459 return IRQ_HANDLED;
0460 }
0461
0462 static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev,
0463 unsigned int i)
0464 {
0465 return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr(
0466 &dev->cq_pdir,
0467 PAGE_SIZE +
0468 sizeof(struct pvrdma_cqne) * i);
0469 }
0470
0471 static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
0472 {
0473 struct pvrdma_dev *dev = dev_id;
0474 struct pvrdma_ring *ring = &dev->cq_ring_state->rx;
0475 int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE /
0476 sizeof(struct pvrdma_cqne);
0477 unsigned int head;
0478
0479 dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n");
0480
0481 while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
0482 struct pvrdma_cqne *cqne;
0483 struct pvrdma_cq *cq;
0484
0485 cqne = get_cqne(dev, head);
0486 spin_lock(&dev->cq_tbl_lock);
0487 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
0488 if (cq)
0489 refcount_inc(&cq->refcnt);
0490 spin_unlock(&dev->cq_tbl_lock);
0491
0492 if (cq && cq->ibcq.comp_handler)
0493 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
0494 if (cq) {
0495 if (refcount_dec_and_test(&cq->refcnt))
0496 complete(&cq->free);
0497 }
0498 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
0499 }
0500
0501 return IRQ_HANDLED;
0502 }
0503
0504 static void pvrdma_free_irq(struct pvrdma_dev *dev)
0505 {
0506 int i;
0507
0508 dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
0509 for (i = 0; i < dev->nr_vectors; i++)
0510 free_irq(pci_irq_vector(dev->pdev, i), dev);
0511 }
0512
0513 static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
0514 {
0515 dev_dbg(&dev->pdev->dev, "enable interrupts\n");
0516 pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0);
0517 }
0518
0519 static void pvrdma_disable_intrs(struct pvrdma_dev *dev)
0520 {
0521 dev_dbg(&dev->pdev->dev, "disable interrupts\n");
0522 pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
0523 }
0524
0525 static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
0526 {
0527 struct pci_dev *pdev = dev->pdev;
0528 int ret = 0, i;
0529
0530 ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS,
0531 PCI_IRQ_MSIX);
0532 if (ret < 0) {
0533 ret = pci_alloc_irq_vectors(pdev, 1, 1,
0534 PCI_IRQ_MSI | PCI_IRQ_LEGACY);
0535 if (ret < 0)
0536 return ret;
0537 }
0538 dev->nr_vectors = ret;
0539
0540 ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler,
0541 pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev);
0542 if (ret) {
0543 dev_err(&dev->pdev->dev,
0544 "failed to request interrupt 0\n");
0545 goto out_free_vectors;
0546 }
0547
0548 for (i = 1; i < dev->nr_vectors; i++) {
0549 ret = request_irq(pci_irq_vector(dev->pdev, i),
0550 i == 1 ? pvrdma_intr1_handler :
0551 pvrdma_intrx_handler,
0552 0, DRV_NAME, dev);
0553 if (ret) {
0554 dev_err(&dev->pdev->dev,
0555 "failed to request interrupt %d\n", i);
0556 goto free_irqs;
0557 }
0558 }
0559
0560 return 0;
0561
0562 free_irqs:
0563 while (--i >= 0)
0564 free_irq(pci_irq_vector(dev->pdev, i), dev);
0565 out_free_vectors:
0566 pci_free_irq_vectors(pdev);
0567 return ret;
0568 }
0569
0570 static void pvrdma_free_slots(struct pvrdma_dev *dev)
0571 {
0572 struct pci_dev *pdev = dev->pdev;
0573
0574 if (dev->resp_slot)
0575 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot,
0576 dev->dsr->resp_slot_dma);
0577 if (dev->cmd_slot)
0578 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot,
0579 dev->dsr->cmd_slot_dma);
0580 }
0581
0582 static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
0583 const union ib_gid *gid,
0584 u8 gid_type,
0585 int index)
0586 {
0587 int ret;
0588 union pvrdma_cmd_req req;
0589 struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind;
0590
0591 if (!dev->sgid_tbl) {
0592 dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
0593 return -EINVAL;
0594 }
0595
0596 memset(cmd_bind, 0, sizeof(*cmd_bind));
0597 cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND;
0598 memcpy(cmd_bind->new_gid, gid->raw, 16);
0599 cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024);
0600 cmd_bind->vlan = 0xfff;
0601 cmd_bind->index = index;
0602 cmd_bind->gid_type = gid_type;
0603
0604 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
0605 if (ret < 0) {
0606 dev_warn(&dev->pdev->dev,
0607 "could not create binding, error: %d\n", ret);
0608 return -EFAULT;
0609 }
0610 memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid));
0611 return 0;
0612 }
0613
0614 static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context)
0615 {
0616 struct pvrdma_dev *dev = to_vdev(attr->device);
0617
0618 return pvrdma_add_gid_at_index(dev, &attr->gid,
0619 ib_gid_type_to_pvrdma(attr->gid_type),
0620 attr->index);
0621 }
0622
0623 static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index)
0624 {
0625 int ret;
0626 union pvrdma_cmd_req req;
0627 struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind;
0628
0629
0630 if (!dev->sgid_tbl) {
0631 dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
0632 return -EINVAL;
0633 }
0634
0635 memset(cmd_dest, 0, sizeof(*cmd_dest));
0636 cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND;
0637 memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16);
0638 cmd_dest->index = index;
0639
0640 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
0641 if (ret < 0) {
0642 dev_warn(&dev->pdev->dev,
0643 "could not destroy binding, error: %d\n", ret);
0644 return ret;
0645 }
0646 memset(&dev->sgid_tbl[index], 0, 16);
0647 return 0;
0648 }
0649
0650 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context)
0651 {
0652 struct pvrdma_dev *dev = to_vdev(attr->device);
0653
0654 dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s",
0655 attr->index, dev->netdev->name);
0656
0657 return pvrdma_del_gid_at_index(dev, attr->index);
0658 }
0659
0660 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
0661 struct net_device *ndev,
0662 unsigned long event)
0663 {
0664 struct pci_dev *pdev_net;
0665 unsigned int slot;
0666
0667 switch (event) {
0668 case NETDEV_REBOOT:
0669 case NETDEV_DOWN:
0670 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
0671 break;
0672 case NETDEV_UP:
0673 pvrdma_write_reg(dev, PVRDMA_REG_CTL,
0674 PVRDMA_DEVICE_CTL_UNQUIESCE);
0675
0676 mb();
0677
0678 if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
0679 dev_err(&dev->pdev->dev,
0680 "failed to activate device during link up\n");
0681 else
0682 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
0683 break;
0684 case NETDEV_UNREGISTER:
0685 ib_device_set_netdev(&dev->ib_dev, NULL, 1);
0686 dev_put(dev->netdev);
0687 dev->netdev = NULL;
0688 break;
0689 case NETDEV_REGISTER:
0690
0691 slot = PCI_SLOT(dev->pdev->devfn);
0692 pdev_net = pci_get_slot(dev->pdev->bus,
0693 PCI_DEVFN(slot, 0));
0694 if ((dev->netdev == NULL) &&
0695 (pci_get_drvdata(pdev_net) == ndev)) {
0696
0697 ib_device_set_netdev(&dev->ib_dev, ndev, 1);
0698 dev->netdev = ndev;
0699 dev_hold(ndev);
0700 }
0701 pci_dev_put(pdev_net);
0702 break;
0703
0704 default:
0705 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
0706 event, dev_name(&dev->ib_dev.dev));
0707 break;
0708 }
0709 }
0710
0711 static void pvrdma_netdevice_event_work(struct work_struct *work)
0712 {
0713 struct pvrdma_netdevice_work *netdev_work;
0714 struct pvrdma_dev *dev;
0715
0716 netdev_work = container_of(work, struct pvrdma_netdevice_work, work);
0717
0718 mutex_lock(&pvrdma_device_list_lock);
0719 list_for_each_entry(dev, &pvrdma_device_list, device_link) {
0720 if ((netdev_work->event == NETDEV_REGISTER) ||
0721 (dev->netdev == netdev_work->event_netdev)) {
0722 pvrdma_netdevice_event_handle(dev,
0723 netdev_work->event_netdev,
0724 netdev_work->event);
0725 break;
0726 }
0727 }
0728 mutex_unlock(&pvrdma_device_list_lock);
0729
0730 kfree(netdev_work);
0731 }
0732
0733 static int pvrdma_netdevice_event(struct notifier_block *this,
0734 unsigned long event, void *ptr)
0735 {
0736 struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr);
0737 struct pvrdma_netdevice_work *netdev_work;
0738
0739 netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC);
0740 if (!netdev_work)
0741 return NOTIFY_BAD;
0742
0743 INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work);
0744 netdev_work->event_netdev = event_netdev;
0745 netdev_work->event = event;
0746 queue_work(event_wq, &netdev_work->work);
0747
0748 return NOTIFY_DONE;
0749 }
0750
0751 static int pvrdma_pci_probe(struct pci_dev *pdev,
0752 const struct pci_device_id *id)
0753 {
0754 struct pci_dev *pdev_net;
0755 struct pvrdma_dev *dev;
0756 int ret;
0757 unsigned long start;
0758 unsigned long len;
0759 dma_addr_t slot_dma = 0;
0760
0761 dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev));
0762
0763
0764 dev = ib_alloc_device(pvrdma_dev, ib_dev);
0765 if (!dev) {
0766 dev_err(&pdev->dev, "failed to allocate IB device\n");
0767 return -ENOMEM;
0768 }
0769
0770 mutex_lock(&pvrdma_device_list_lock);
0771 list_add(&dev->device_link, &pvrdma_device_list);
0772 mutex_unlock(&pvrdma_device_list_lock);
0773
0774 ret = pvrdma_init_device(dev);
0775 if (ret)
0776 goto err_free_device;
0777
0778 dev->pdev = pdev;
0779 pci_set_drvdata(pdev, dev);
0780
0781 ret = pci_enable_device(pdev);
0782 if (ret) {
0783 dev_err(&pdev->dev, "cannot enable PCI device\n");
0784 goto err_free_device;
0785 }
0786
0787 dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n",
0788 pci_resource_flags(pdev, 0));
0789 dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
0790 (unsigned long long)pci_resource_len(pdev, 0));
0791 dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
0792 (unsigned long long)pci_resource_start(pdev, 0));
0793 dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n",
0794 pci_resource_flags(pdev, 1));
0795 dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
0796 (unsigned long long)pci_resource_len(pdev, 1));
0797 dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
0798 (unsigned long long)pci_resource_start(pdev, 1));
0799
0800 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
0801 !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
0802 dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
0803 ret = -ENOMEM;
0804 goto err_disable_pdev;
0805 }
0806
0807 ret = pci_request_regions(pdev, DRV_NAME);
0808 if (ret) {
0809 dev_err(&pdev->dev, "cannot request PCI resources\n");
0810 goto err_disable_pdev;
0811 }
0812
0813
0814 ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
0815 if (ret) {
0816 dev_err(&pdev->dev, "dma_set_mask failed\n");
0817 goto err_free_resource;
0818 }
0819 dma_set_max_seg_size(&pdev->dev, UINT_MAX);
0820 pci_set_master(pdev);
0821
0822
0823 start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
0824 len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
0825 dev->regs = ioremap(start, len);
0826 if (!dev->regs) {
0827 dev_err(&pdev->dev, "register mapping failed\n");
0828 ret = -ENOMEM;
0829 goto err_free_resource;
0830 }
0831
0832
0833 dev->driver_uar.index = 0;
0834 dev->driver_uar.pfn =
0835 pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
0836 PAGE_SHIFT;
0837 dev->driver_uar.map =
0838 ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
0839 if (!dev->driver_uar.map) {
0840 dev_err(&pdev->dev, "failed to remap UAR pages\n");
0841 ret = -ENOMEM;
0842 goto err_unmap_regs;
0843 }
0844
0845 dev->dsr_version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION);
0846 dev_info(&pdev->dev, "device version %d, driver version %d\n",
0847 dev->dsr_version, PVRDMA_VERSION);
0848
0849 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr),
0850 &dev->dsrbase, GFP_KERNEL);
0851 if (!dev->dsr) {
0852 dev_err(&pdev->dev, "failed to allocate shared region\n");
0853 ret = -ENOMEM;
0854 goto err_uar_unmap;
0855 }
0856
0857
0858 dev->dsr->driver_version = PVRDMA_VERSION;
0859 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ?
0860 PVRDMA_GOS_BITS_32 :
0861 PVRDMA_GOS_BITS_64;
0862 dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX;
0863 dev->dsr->gos_info.gos_ver = 1;
0864
0865 if (dev->dsr_version < PVRDMA_PPN64_VERSION)
0866 dev->dsr->uar_pfn = dev->driver_uar.pfn;
0867 else
0868 dev->dsr->uar_pfn64 = dev->driver_uar.pfn;
0869
0870
0871 dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
0872 &slot_dma, GFP_KERNEL);
0873 if (!dev->cmd_slot) {
0874 ret = -ENOMEM;
0875 goto err_free_dsr;
0876 }
0877
0878 dev->dsr->cmd_slot_dma = (u64)slot_dma;
0879
0880
0881 dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
0882 &slot_dma, GFP_KERNEL);
0883 if (!dev->resp_slot) {
0884 ret = -ENOMEM;
0885 goto err_free_slots;
0886 }
0887
0888 dev->dsr->resp_slot_dma = (u64)slot_dma;
0889
0890
0891 dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
0892 ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
0893 dev->dsr->async_ring_pages.num_pages, true);
0894 if (ret)
0895 goto err_free_slots;
0896 dev->async_ring_state = dev->async_pdir.pages[0];
0897 dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
0898
0899
0900 dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES;
0901 ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
0902 dev->dsr->cq_ring_pages.num_pages, true);
0903 if (ret)
0904 goto err_free_async_ring;
0905 dev->cq_ring_state = dev->cq_pdir.pages[0];
0906 dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma;
0907
0908
0909
0910
0911
0912
0913
0914 pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase);
0915 pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH,
0916 (u32)((u64)(dev->dsrbase) >> 32));
0917
0918
0919 mb();
0920
0921
0922 if (!PVRDMA_SUPPORTED(dev)) {
0923 dev_err(&pdev->dev, "driver needs RoCE v1 or v2 support\n");
0924 ret = -EFAULT;
0925 goto err_free_cq_ring;
0926 }
0927
0928
0929 pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
0930 if (!pdev_net) {
0931 dev_err(&pdev->dev, "failed to find paired net device\n");
0932 ret = -ENODEV;
0933 goto err_free_cq_ring;
0934 }
0935
0936 if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE ||
0937 pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) {
0938 dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n");
0939 pci_dev_put(pdev_net);
0940 ret = -ENODEV;
0941 goto err_free_cq_ring;
0942 }
0943
0944 dev->netdev = pci_get_drvdata(pdev_net);
0945 pci_dev_put(pdev_net);
0946 if (!dev->netdev) {
0947 dev_err(&pdev->dev, "failed to get vmxnet3 device\n");
0948 ret = -ENODEV;
0949 goto err_free_cq_ring;
0950 }
0951 dev_hold(dev->netdev);
0952
0953 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
0954
0955
0956 ret = pvrdma_alloc_intrs(dev);
0957 if (ret) {
0958 dev_err(&pdev->dev, "failed to allocate interrupts\n");
0959 ret = -ENOMEM;
0960 goto err_free_cq_ring;
0961 }
0962
0963
0964 ret = pvrdma_uar_table_init(dev);
0965 if (ret) {
0966 dev_err(&pdev->dev, "failed to allocate UAR table\n");
0967 ret = -ENOMEM;
0968 goto err_free_intrs;
0969 }
0970
0971
0972 dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len,
0973 sizeof(union ib_gid), GFP_KERNEL);
0974 if (!dev->sgid_tbl) {
0975 ret = -ENOMEM;
0976 goto err_free_uar_table;
0977 }
0978 dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
0979
0980 pvrdma_enable_intrs(dev);
0981
0982
0983 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
0984
0985
0986 mb();
0987
0988
0989 ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
0990 if (ret != 0) {
0991 dev_err(&pdev->dev, "failed to activate device\n");
0992 ret = -EFAULT;
0993 goto err_disable_intr;
0994 }
0995
0996
0997 ret = pvrdma_register_device(dev);
0998 if (ret) {
0999 dev_err(&pdev->dev, "failed to register IB device\n");
1000 goto err_disable_intr;
1001 }
1002
1003 dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
1004 ret = register_netdevice_notifier(&dev->nb_netdev);
1005 if (ret) {
1006 dev_err(&pdev->dev, "failed to register netdevice events\n");
1007 goto err_unreg_ibdev;
1008 }
1009
1010 dev_info(&pdev->dev, "attached to device\n");
1011 return 0;
1012
1013 err_unreg_ibdev:
1014 ib_unregister_device(&dev->ib_dev);
1015 err_disable_intr:
1016 pvrdma_disable_intrs(dev);
1017 kfree(dev->sgid_tbl);
1018 err_free_uar_table:
1019 pvrdma_uar_table_cleanup(dev);
1020 err_free_intrs:
1021 pvrdma_free_irq(dev);
1022 pci_free_irq_vectors(pdev);
1023 err_free_cq_ring:
1024 if (dev->netdev) {
1025 dev_put(dev->netdev);
1026 dev->netdev = NULL;
1027 }
1028 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1029 err_free_async_ring:
1030 pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
1031 err_free_slots:
1032 pvrdma_free_slots(dev);
1033 err_free_dsr:
1034 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
1035 dev->dsrbase);
1036 err_uar_unmap:
1037 iounmap(dev->driver_uar.map);
1038 err_unmap_regs:
1039 iounmap(dev->regs);
1040 err_free_resource:
1041 pci_release_regions(pdev);
1042 err_disable_pdev:
1043 pci_disable_device(pdev);
1044 pci_set_drvdata(pdev, NULL);
1045 err_free_device:
1046 mutex_lock(&pvrdma_device_list_lock);
1047 list_del(&dev->device_link);
1048 mutex_unlock(&pvrdma_device_list_lock);
1049 ib_dealloc_device(&dev->ib_dev);
1050 return ret;
1051 }
1052
1053 static void pvrdma_pci_remove(struct pci_dev *pdev)
1054 {
1055 struct pvrdma_dev *dev = pci_get_drvdata(pdev);
1056
1057 if (!dev)
1058 return;
1059
1060 dev_info(&pdev->dev, "detaching from device\n");
1061
1062 unregister_netdevice_notifier(&dev->nb_netdev);
1063 dev->nb_netdev.notifier_call = NULL;
1064
1065 flush_workqueue(event_wq);
1066
1067 if (dev->netdev) {
1068 dev_put(dev->netdev);
1069 dev->netdev = NULL;
1070 }
1071
1072
1073 ib_unregister_device(&dev->ib_dev);
1074
1075 mutex_lock(&pvrdma_device_list_lock);
1076 list_del(&dev->device_link);
1077 mutex_unlock(&pvrdma_device_list_lock);
1078
1079 pvrdma_disable_intrs(dev);
1080 pvrdma_free_irq(dev);
1081 pci_free_irq_vectors(pdev);
1082
1083
1084 pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
1085 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1086 pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
1087 pvrdma_free_slots(dev);
1088 dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
1089 dev->dsrbase);
1090
1091 iounmap(dev->regs);
1092 kfree(dev->sgid_tbl);
1093 kfree(dev->cq_tbl);
1094 kfree(dev->srq_tbl);
1095 kfree(dev->qp_tbl);
1096 pvrdma_uar_table_cleanup(dev);
1097 iounmap(dev->driver_uar.map);
1098
1099 ib_dealloc_device(&dev->ib_dev);
1100
1101
1102 pci_release_regions(pdev);
1103 pci_disable_device(pdev);
1104 pci_set_drvdata(pdev, NULL);
1105 }
1106
1107 static const struct pci_device_id pvrdma_pci_table[] = {
1108 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), },
1109 { 0 },
1110 };
1111
1112 MODULE_DEVICE_TABLE(pci, pvrdma_pci_table);
1113
1114 static struct pci_driver pvrdma_driver = {
1115 .name = DRV_NAME,
1116 .id_table = pvrdma_pci_table,
1117 .probe = pvrdma_pci_probe,
1118 .remove = pvrdma_pci_remove,
1119 };
1120
1121 static int __init pvrdma_init(void)
1122 {
1123 int err;
1124
1125 event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM);
1126 if (!event_wq)
1127 return -ENOMEM;
1128
1129 err = pci_register_driver(&pvrdma_driver);
1130 if (err)
1131 destroy_workqueue(event_wq);
1132
1133 return err;
1134 }
1135
1136 static void __exit pvrdma_cleanup(void)
1137 {
1138 pci_unregister_driver(&pvrdma_driver);
1139
1140 destroy_workqueue(event_wq);
1141 }
1142
1143 module_init(pvrdma_init);
1144 module_exit(pvrdma_cleanup);
1145
1146 MODULE_AUTHOR("VMware, Inc");
1147 MODULE_DESCRIPTION("VMware Paravirtual RDMA driver");
1148 MODULE_LICENSE("Dual BSD/GPL");