Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
0002 
0003 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
0004 /*          Kai Shen <kaishen@linux.alibaba.com> */
0005 /* Copyright (c) 2020-2022, Alibaba Group. */
0006 
0007 #include <linux/errno.h>
0008 #include <linux/init.h>
0009 #include <linux/kernel.h>
0010 #include <linux/list.h>
0011 #include <linux/module.h>
0012 #include <linux/netdevice.h>
0013 #include <linux/pci.h>
0014 #include <net/addrconf.h>
0015 #include <rdma/erdma-abi.h>
0016 #include <rdma/ib_verbs.h>
0017 #include <rdma/ib_user_verbs.h>
0018 
0019 #include "erdma.h"
0020 #include "erdma_cm.h"
0021 #include "erdma_hw.h"
0022 #include "erdma_verbs.h"
0023 
0024 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
0025 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
0026 MODULE_LICENSE("Dual BSD/GPL");
0027 
0028 static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
0029                   void *arg)
0030 {
0031     struct net_device *netdev = netdev_notifier_info_to_dev(arg);
0032     struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
0033 
0034     if (dev->netdev == NULL || dev->netdev != netdev)
0035         goto done;
0036 
0037     switch (event) {
0038     case NETDEV_UP:
0039         dev->state = IB_PORT_ACTIVE;
0040         erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
0041         break;
0042     case NETDEV_DOWN:
0043         dev->state = IB_PORT_DOWN;
0044         erdma_port_event(dev, IB_EVENT_PORT_ERR);
0045         break;
0046     case NETDEV_REGISTER:
0047     case NETDEV_UNREGISTER:
0048     case NETDEV_CHANGEADDR:
0049     case NETDEV_CHANGEMTU:
0050     case NETDEV_GOING_DOWN:
0051     case NETDEV_CHANGE:
0052     default:
0053         break;
0054     }
0055 
0056 done:
0057     return NOTIFY_OK;
0058 }
0059 
0060 static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
0061 {
0062     struct net_device *netdev;
0063     int ret = -ENODEV;
0064 
0065     /* Already binded to a net_device, so we skip. */
0066     if (dev->netdev)
0067         return 0;
0068 
0069     rtnl_lock();
0070     for_each_netdev(&init_net, netdev) {
0071         /*
0072          * In erdma, the paired netdev and ibdev should have the same
0073          * MAC address. erdma can get the value from its PCIe bar
0074          * registers. Since erdma can not get the paired netdev
0075          * reference directly, we do a traverse here to get the paired
0076          * netdev.
0077          */
0078         if (ether_addr_equal_unaligned(netdev->perm_addr,
0079                            dev->attrs.peer_addr)) {
0080             ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
0081             if (ret) {
0082                 rtnl_unlock();
0083                 ibdev_warn(&dev->ibdev,
0084                        "failed (%d) to link netdev", ret);
0085                 return ret;
0086             }
0087 
0088             dev->netdev = netdev;
0089             break;
0090         }
0091     }
0092 
0093     rtnl_unlock();
0094 
0095     return ret;
0096 }
0097 
0098 static int erdma_device_register(struct erdma_dev *dev)
0099 {
0100     struct ib_device *ibdev = &dev->ibdev;
0101     int ret;
0102 
0103     ret = erdma_enum_and_get_netdev(dev);
0104     if (ret)
0105         return ret;
0106 
0107     addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
0108 
0109     ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
0110     if (ret) {
0111         dev_err(&dev->pdev->dev,
0112             "ib_register_device failed: ret = %d\n", ret);
0113         return ret;
0114     }
0115 
0116     dev->netdev_nb.notifier_call = erdma_netdev_event;
0117     ret = register_netdevice_notifier(&dev->netdev_nb);
0118     if (ret) {
0119         ibdev_err(&dev->ibdev, "failed to register notifier.\n");
0120         ib_unregister_device(ibdev);
0121     }
0122 
0123     return ret;
0124 }
0125 
0126 static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
0127 {
0128     struct erdma_dev *dev = data;
0129 
0130     erdma_cmdq_completion_handler(&dev->cmdq);
0131     erdma_aeq_event_handler(dev);
0132 
0133     return IRQ_HANDLED;
0134 }
0135 
0136 static void erdma_dwqe_resource_init(struct erdma_dev *dev)
0137 {
0138     int total_pages, type0, type1;
0139 
0140     dev->attrs.grp_num = erdma_reg_read32(dev, ERDMA_REGS_GRP_NUM_REG);
0141 
0142     if (dev->attrs.grp_num < 4)
0143         dev->attrs.disable_dwqe = true;
0144     else
0145         dev->attrs.disable_dwqe = false;
0146 
0147     /* One page contains 4 goups. */
0148     total_pages = dev->attrs.grp_num * 4;
0149 
0150     if (dev->attrs.grp_num >= ERDMA_DWQE_MAX_GRP_CNT) {
0151         dev->attrs.grp_num = ERDMA_DWQE_MAX_GRP_CNT;
0152         type0 = ERDMA_DWQE_TYPE0_CNT;
0153         type1 = ERDMA_DWQE_TYPE1_CNT / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
0154     } else {
0155         type1 = total_pages / 3;
0156         type0 = total_pages - type1 - 1;
0157     }
0158 
0159     dev->attrs.dwqe_pages = type0;
0160     dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
0161 }
0162 
0163 static int erdma_request_vectors(struct erdma_dev *dev)
0164 {
0165     int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
0166     int ret;
0167 
0168     ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
0169     if (ret < 0) {
0170         dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
0171             ret);
0172         return ret;
0173     }
0174     dev->attrs.irq_num = ret;
0175 
0176     return 0;
0177 }
0178 
0179 static int erdma_comm_irq_init(struct erdma_dev *dev)
0180 {
0181     snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
0182          pci_name(dev->pdev));
0183     dev->comm_irq.msix_vector =
0184         pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
0185 
0186     cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
0187             &dev->comm_irq.affinity_hint_mask);
0188     irq_set_affinity_hint(dev->comm_irq.msix_vector,
0189                   &dev->comm_irq.affinity_hint_mask);
0190 
0191     return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
0192                dev->comm_irq.name, dev);
0193 }
0194 
0195 static void erdma_comm_irq_uninit(struct erdma_dev *dev)
0196 {
0197     irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
0198     free_irq(dev->comm_irq.msix_vector, dev);
0199 }
0200 
0201 static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
0202 {
0203     int ret;
0204 
0205     erdma_dwqe_resource_init(dev);
0206 
0207     ret = dma_set_mask_and_coherent(&pdev->dev,
0208                     DMA_BIT_MASK(ERDMA_PCI_WIDTH));
0209     if (ret)
0210         return ret;
0211 
0212     dma_set_max_seg_size(&pdev->dev, UINT_MAX);
0213 
0214     return 0;
0215 }
0216 
0217 static void erdma_device_uninit(struct erdma_dev *dev)
0218 {
0219     u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
0220 
0221     erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
0222 }
0223 
0224 static const struct pci_device_id erdma_pci_tbl[] = {
0225     { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
0226     {}
0227 };
0228 
0229 static int erdma_probe_dev(struct pci_dev *pdev)
0230 {
0231     struct erdma_dev *dev;
0232     int bars, err;
0233     u32 version;
0234 
0235     err = pci_enable_device(pdev);
0236     if (err) {
0237         dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
0238         return err;
0239     }
0240 
0241     pci_set_master(pdev);
0242 
0243     dev = ib_alloc_device(erdma_dev, ibdev);
0244     if (!dev) {
0245         dev_err(&pdev->dev, "ib_alloc_device failed\n");
0246         err = -ENOMEM;
0247         goto err_disable_device;
0248     }
0249 
0250     pci_set_drvdata(pdev, dev);
0251     dev->pdev = pdev;
0252     dev->attrs.numa_node = dev_to_node(&pdev->dev);
0253 
0254     bars = pci_select_bars(pdev, IORESOURCE_MEM);
0255     err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
0256     if (bars != ERDMA_BAR_MASK || err) {
0257         err = err ? err : -EINVAL;
0258         goto err_ib_device_release;
0259     }
0260 
0261     dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
0262     dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
0263 
0264     dev->func_bar =
0265         devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
0266     if (!dev->func_bar) {
0267         dev_err(&pdev->dev, "devm_ioremap failed.\n");
0268         err = -EFAULT;
0269         goto err_release_bars;
0270     }
0271 
0272     version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
0273     if (version == 0) {
0274         /* we knows that it is a non-functional function. */
0275         err = -ENODEV;
0276         goto err_iounmap_func_bar;
0277     }
0278 
0279     err = erdma_device_init(dev, pdev);
0280     if (err)
0281         goto err_iounmap_func_bar;
0282 
0283     err = erdma_request_vectors(dev);
0284     if (err)
0285         goto err_iounmap_func_bar;
0286 
0287     err = erdma_comm_irq_init(dev);
0288     if (err)
0289         goto err_free_vectors;
0290 
0291     err = erdma_aeq_init(dev);
0292     if (err)
0293         goto err_uninit_comm_irq;
0294 
0295     err = erdma_cmdq_init(dev);
0296     if (err)
0297         goto err_uninit_aeq;
0298 
0299     err = erdma_ceqs_init(dev);
0300     if (err)
0301         goto err_uninit_cmdq;
0302 
0303     erdma_finish_cmdq_init(dev);
0304 
0305     return 0;
0306 
0307 err_uninit_cmdq:
0308     erdma_device_uninit(dev);
0309     erdma_cmdq_destroy(dev);
0310 
0311 err_uninit_aeq:
0312     erdma_aeq_destroy(dev);
0313 
0314 err_uninit_comm_irq:
0315     erdma_comm_irq_uninit(dev);
0316 
0317 err_free_vectors:
0318     pci_free_irq_vectors(dev->pdev);
0319 
0320 err_iounmap_func_bar:
0321     devm_iounmap(&pdev->dev, dev->func_bar);
0322 
0323 err_release_bars:
0324     pci_release_selected_regions(pdev, bars);
0325 
0326 err_ib_device_release:
0327     ib_dealloc_device(&dev->ibdev);
0328 
0329 err_disable_device:
0330     pci_disable_device(pdev);
0331 
0332     return err;
0333 }
0334 
0335 static void erdma_remove_dev(struct pci_dev *pdev)
0336 {
0337     struct erdma_dev *dev = pci_get_drvdata(pdev);
0338 
0339     erdma_ceqs_uninit(dev);
0340 
0341     erdma_device_uninit(dev);
0342 
0343     erdma_cmdq_destroy(dev);
0344     erdma_aeq_destroy(dev);
0345     erdma_comm_irq_uninit(dev);
0346     pci_free_irq_vectors(dev->pdev);
0347 
0348     devm_iounmap(&pdev->dev, dev->func_bar);
0349     pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
0350 
0351     ib_dealloc_device(&dev->ibdev);
0352 
0353     pci_disable_device(pdev);
0354 }
0355 
0356 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
0357 
0358 static int erdma_dev_attrs_init(struct erdma_dev *dev)
0359 {
0360     int err;
0361     u64 req_hdr, cap0, cap1;
0362 
0363     erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
0364                 CMDQ_OPCODE_QUERY_DEVICE);
0365 
0366     err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
0367                   &cap1);
0368     if (err)
0369         return err;
0370 
0371     dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
0372     dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
0373     dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
0374     dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
0375     dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
0376     dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
0377     dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
0378     dev->attrs.max_mr = dev->attrs.max_qp << 1;
0379     dev->attrs.max_cq = dev->attrs.max_qp << 1;
0380 
0381     dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
0382     dev->attrs.max_ord = ERDMA_MAX_ORD;
0383     dev->attrs.max_ird = ERDMA_MAX_IRD;
0384     dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
0385     dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
0386     dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
0387     dev->attrs.max_pd = ERDMA_MAX_PD;
0388 
0389     dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
0390     dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
0391 
0392     erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
0393                 CMDQ_OPCODE_QUERY_FW_INFO);
0394 
0395     err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
0396                   &cap1);
0397     if (!err)
0398         dev->attrs.fw_version =
0399             FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
0400 
0401     return err;
0402 }
0403 
0404 static int erdma_res_cb_init(struct erdma_dev *dev)
0405 {
0406     int i, j;
0407 
0408     for (i = 0; i < ERDMA_RES_CNT; i++) {
0409         dev->res_cb[i].next_alloc_idx = 1;
0410         spin_lock_init(&dev->res_cb[i].lock);
0411         dev->res_cb[i].bitmap =
0412             bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
0413         if (!dev->res_cb[i].bitmap)
0414             goto err;
0415     }
0416 
0417     return 0;
0418 
0419 err:
0420     for (j = 0; j < i; j++)
0421         bitmap_free(dev->res_cb[j].bitmap);
0422 
0423     return -ENOMEM;
0424 }
0425 
0426 static void erdma_res_cb_free(struct erdma_dev *dev)
0427 {
0428     int i;
0429 
0430     for (i = 0; i < ERDMA_RES_CNT; i++)
0431         bitmap_free(dev->res_cb[i].bitmap);
0432 }
0433 
0434 static const struct ib_device_ops erdma_device_ops = {
0435     .owner = THIS_MODULE,
0436     .driver_id = RDMA_DRIVER_ERDMA,
0437     .uverbs_abi_ver = ERDMA_ABI_VERSION,
0438 
0439     .alloc_mr = erdma_ib_alloc_mr,
0440     .alloc_pd = erdma_alloc_pd,
0441     .alloc_ucontext = erdma_alloc_ucontext,
0442     .create_cq = erdma_create_cq,
0443     .create_qp = erdma_create_qp,
0444     .dealloc_pd = erdma_dealloc_pd,
0445     .dealloc_ucontext = erdma_dealloc_ucontext,
0446     .dereg_mr = erdma_dereg_mr,
0447     .destroy_cq = erdma_destroy_cq,
0448     .destroy_qp = erdma_destroy_qp,
0449     .get_dma_mr = erdma_get_dma_mr,
0450     .get_port_immutable = erdma_get_port_immutable,
0451     .iw_accept = erdma_accept,
0452     .iw_add_ref = erdma_qp_get_ref,
0453     .iw_connect = erdma_connect,
0454     .iw_create_listen = erdma_create_listen,
0455     .iw_destroy_listen = erdma_destroy_listen,
0456     .iw_get_qp = erdma_get_ibqp,
0457     .iw_reject = erdma_reject,
0458     .iw_rem_ref = erdma_qp_put_ref,
0459     .map_mr_sg = erdma_map_mr_sg,
0460     .mmap = erdma_mmap,
0461     .mmap_free = erdma_mmap_free,
0462     .modify_qp = erdma_modify_qp,
0463     .post_recv = erdma_post_recv,
0464     .post_send = erdma_post_send,
0465     .poll_cq = erdma_poll_cq,
0466     .query_device = erdma_query_device,
0467     .query_gid = erdma_query_gid,
0468     .query_port = erdma_query_port,
0469     .query_qp = erdma_query_qp,
0470     .req_notify_cq = erdma_req_notify_cq,
0471     .reg_user_mr = erdma_reg_user_mr,
0472 
0473     INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
0474     INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
0475     INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
0476     INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
0477 };
0478 
0479 static int erdma_ib_device_add(struct pci_dev *pdev)
0480 {
0481     struct erdma_dev *dev = pci_get_drvdata(pdev);
0482     struct ib_device *ibdev = &dev->ibdev;
0483     u64 mac;
0484     int ret;
0485 
0486     ret = erdma_dev_attrs_init(dev);
0487     if (ret)
0488         return ret;
0489 
0490     ibdev->node_type = RDMA_NODE_RNIC;
0491     memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
0492 
0493     /*
0494      * Current model (one-to-one device association):
0495      * One ERDMA device per net_device or, equivalently,
0496      * per physical port.
0497      */
0498     ibdev->phys_port_cnt = 1;
0499     ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
0500 
0501     ib_set_device_ops(ibdev, &erdma_device_ops);
0502 
0503     INIT_LIST_HEAD(&dev->cep_list);
0504 
0505     spin_lock_init(&dev->lock);
0506     xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
0507     xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
0508     dev->next_alloc_cqn = 1;
0509     dev->next_alloc_qpn = 1;
0510 
0511     ret = erdma_res_cb_init(dev);
0512     if (ret)
0513         return ret;
0514 
0515     spin_lock_init(&dev->db_bitmap_lock);
0516     bitmap_zero(dev->sdb_page, ERDMA_DWQE_TYPE0_CNT);
0517     bitmap_zero(dev->sdb_entry, ERDMA_DWQE_TYPE1_CNT);
0518 
0519     atomic_set(&dev->num_ctx, 0);
0520 
0521     mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
0522     mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
0523 
0524     u64_to_ether_addr(mac, dev->attrs.peer_addr);
0525 
0526     ret = erdma_device_register(dev);
0527     if (ret)
0528         goto err_out;
0529 
0530     return 0;
0531 
0532 err_out:
0533     xa_destroy(&dev->qp_xa);
0534     xa_destroy(&dev->cq_xa);
0535 
0536     erdma_res_cb_free(dev);
0537 
0538     return ret;
0539 }
0540 
0541 static void erdma_ib_device_remove(struct pci_dev *pdev)
0542 {
0543     struct erdma_dev *dev = pci_get_drvdata(pdev);
0544 
0545     unregister_netdevice_notifier(&dev->netdev_nb);
0546     ib_unregister_device(&dev->ibdev);
0547 
0548     erdma_res_cb_free(dev);
0549     xa_destroy(&dev->qp_xa);
0550     xa_destroy(&dev->cq_xa);
0551 }
0552 
0553 static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
0554 {
0555     int ret;
0556 
0557     ret = erdma_probe_dev(pdev);
0558     if (ret)
0559         return ret;
0560 
0561     ret = erdma_ib_device_add(pdev);
0562     if (ret) {
0563         erdma_remove_dev(pdev);
0564         return ret;
0565     }
0566 
0567     return 0;
0568 }
0569 
0570 static void erdma_remove(struct pci_dev *pdev)
0571 {
0572     erdma_ib_device_remove(pdev);
0573     erdma_remove_dev(pdev);
0574 }
0575 
0576 static struct pci_driver erdma_pci_driver = {
0577     .name = DRV_MODULE_NAME,
0578     .id_table = erdma_pci_tbl,
0579     .probe = erdma_probe,
0580     .remove = erdma_remove
0581 };
0582 
0583 MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
0584 
0585 static __init int erdma_init_module(void)
0586 {
0587     int ret;
0588 
0589     ret = erdma_cm_init();
0590     if (ret)
0591         return ret;
0592 
0593     ret = pci_register_driver(&erdma_pci_driver);
0594     if (ret)
0595         erdma_cm_exit();
0596 
0597     return ret;
0598 }
0599 
0600 static void __exit erdma_exit_module(void)
0601 {
0602     pci_unregister_driver(&erdma_pci_driver);
0603 
0604     erdma_cm_exit();
0605 }
0606 
0607 module_init(erdma_init_module);
0608 module_exit(erdma_exit_module);