Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
0004 /*          Kai Shen <kaishen@linux.alibaba.com> */
0005 /* Copyright (c) 2020-2022, Alibaba Group. */
0006 
0007 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
0008 /* Copyright (c) 2008-2019, IBM Corporation */
0009 
0010 /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
0011 
0012 #include <linux/errno.h>
0013 #include <linux/pci.h>
0014 #include <linux/types.h>
0015 #include <linux/uaccess.h>
0016 #include <linux/vmalloc.h>
0017 #include <net/addrconf.h>
0018 #include <rdma/erdma-abi.h>
0019 #include <rdma/ib_umem.h>
0020 #include <rdma/ib_user_verbs.h>
0021 #include <rdma/ib_verbs.h>
0022 #include <rdma/uverbs_ioctl.h>
0023 
0024 #include "erdma.h"
0025 #include "erdma_cm.h"
0026 #include "erdma_hw.h"
0027 #include "erdma_verbs.h"
0028 
0029 static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp)
0030 {
0031     struct erdma_cmdq_create_qp_req req;
0032     struct erdma_pd *pd = to_epd(qp->ibqp.pd);
0033     struct erdma_uqp *user_qp;
0034     u64 resp0, resp1;
0035     int err;
0036 
0037     erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
0038                 CMDQ_OPCODE_CREATE_QP);
0039 
0040     req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
0041                   ilog2(qp->attrs.sq_size)) |
0042            FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
0043     req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
0044                   ilog2(qp->attrs.rq_size)) |
0045            FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
0046 
0047     if (rdma_is_kernel_res(&qp->ibqp.res)) {
0048         u32 pgsz_range = ilog2(SZ_1M) - PAGE_SHIFT;
0049 
0050         req.sq_cqn_mtt_cfg =
0051             FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
0052                    pgsz_range) |
0053             FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
0054         req.rq_cqn_mtt_cfg =
0055             FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
0056                    pgsz_range) |
0057             FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
0058 
0059         req.sq_mtt_cfg =
0060             FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
0061             FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
0062             FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
0063                    ERDMA_MR_INLINE_MTT);
0064         req.rq_mtt_cfg = req.sq_mtt_cfg;
0065 
0066         req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
0067         req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
0068         req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
0069                       (qp->attrs.sq_size << SQEBB_SHIFT);
0070         req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
0071                       (qp->attrs.rq_size << RQE_SHIFT);
0072     } else {
0073         user_qp = &qp->user_qp;
0074         req.sq_cqn_mtt_cfg = FIELD_PREP(
0075             ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
0076             ilog2(user_qp->sq_mtt.page_size) - PAGE_SHIFT);
0077         req.sq_cqn_mtt_cfg |=
0078             FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
0079 
0080         req.rq_cqn_mtt_cfg = FIELD_PREP(
0081             ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
0082             ilog2(user_qp->rq_mtt.page_size) - PAGE_SHIFT);
0083         req.rq_cqn_mtt_cfg |=
0084             FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
0085 
0086         req.sq_mtt_cfg = user_qp->sq_mtt.page_offset;
0087         req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
0088                          user_qp->sq_mtt.mtt_nents) |
0089                   FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
0090                          user_qp->sq_mtt.mtt_type);
0091 
0092         req.rq_mtt_cfg = user_qp->rq_mtt.page_offset;
0093         req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
0094                          user_qp->rq_mtt.mtt_nents) |
0095                   FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
0096                          user_qp->rq_mtt.mtt_type);
0097 
0098         req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0];
0099         req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0];
0100 
0101         req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
0102         req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
0103     }
0104 
0105     err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), &resp0,
0106                   &resp1);
0107     if (!err)
0108         qp->attrs.cookie =
0109             FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
0110 
0111     return err;
0112 }
0113 
0114 static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
0115 {
0116     struct erdma_cmdq_reg_mr_req req;
0117     struct erdma_pd *pd = to_epd(mr->ibmr.pd);
0118     u64 *phy_addr;
0119     int i;
0120 
0121     erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
0122 
0123     req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
0124            FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
0125            FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
0126     req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
0127            FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
0128            FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) |
0129            FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0);
0130     req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
0131                   ilog2(mr->mem.page_size)) |
0132            FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
0133            FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
0134 
0135     if (mr->type == ERDMA_MR_TYPE_DMA)
0136         goto post_cmd;
0137 
0138     if (mr->type == ERDMA_MR_TYPE_NORMAL) {
0139         req.start_va = mr->mem.va;
0140         req.size = mr->mem.len;
0141     }
0142 
0143     if (mr->type == ERDMA_MR_TYPE_FRMR ||
0144         mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) {
0145         phy_addr = req.phy_addr;
0146         *phy_addr = mr->mem.mtt_entry[0];
0147     } else {
0148         phy_addr = req.phy_addr;
0149         for (i = 0; i < mr->mem.mtt_nents; i++)
0150             *phy_addr++ = mr->mem.mtt_entry[i];
0151     }
0152 
0153 post_cmd:
0154     return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
0155                    NULL);
0156 }
0157 
0158 static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq)
0159 {
0160     struct erdma_cmdq_create_cq_req req;
0161     u32 page_size;
0162     struct erdma_mem *mtt;
0163 
0164     erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
0165                 CMDQ_OPCODE_CREATE_CQ);
0166 
0167     req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
0168            FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
0169     req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
0170 
0171     if (rdma_is_kernel_res(&cq->ibcq.res)) {
0172         page_size = SZ_32M;
0173         req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
0174                        ilog2(page_size) - PAGE_SHIFT);
0175         req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
0176         req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
0177 
0178         req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
0179                 FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
0180                        ERDMA_MR_INLINE_MTT);
0181 
0182         req.first_page_offset = 0;
0183         req.cq_db_info_addr =
0184             cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
0185     } else {
0186         mtt = &cq->user_cq.qbuf_mtt;
0187         req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
0188                        ilog2(mtt->page_size) - PAGE_SHIFT);
0189         if (mtt->mtt_nents == 1) {
0190             req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf);
0191             req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf);
0192         } else {
0193             req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]);
0194             req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]);
0195         }
0196         req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
0197                        mtt->mtt_nents);
0198         req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
0199                        mtt->mtt_type);
0200 
0201         req.first_page_offset = mtt->page_offset;
0202         req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
0203     }
0204 
0205     return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
0206                    NULL);
0207 }
0208 
0209 static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
0210 {
0211     int idx;
0212     unsigned long flags;
0213 
0214     spin_lock_irqsave(&res_cb->lock, flags);
0215     idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
0216                  res_cb->next_alloc_idx);
0217     if (idx == res_cb->max_cap) {
0218         idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
0219         if (idx == res_cb->max_cap) {
0220             res_cb->next_alloc_idx = 1;
0221             spin_unlock_irqrestore(&res_cb->lock, flags);
0222             return -ENOSPC;
0223         }
0224     }
0225 
0226     set_bit(idx, res_cb->bitmap);
0227     res_cb->next_alloc_idx = idx + 1;
0228     spin_unlock_irqrestore(&res_cb->lock, flags);
0229 
0230     return idx;
0231 }
0232 
0233 static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
0234 {
0235     unsigned long flags;
0236     u32 used;
0237 
0238     spin_lock_irqsave(&res_cb->lock, flags);
0239     used = __test_and_clear_bit(idx, res_cb->bitmap);
0240     spin_unlock_irqrestore(&res_cb->lock, flags);
0241     WARN_ON(!used);
0242 }
0243 
0244 static struct rdma_user_mmap_entry *
0245 erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
0246                  u32 size, u8 mmap_flag, u64 *mmap_offset)
0247 {
0248     struct erdma_user_mmap_entry *entry =
0249         kzalloc(sizeof(*entry), GFP_KERNEL);
0250     int ret;
0251 
0252     if (!entry)
0253         return NULL;
0254 
0255     entry->address = (u64)address;
0256     entry->mmap_flag = mmap_flag;
0257 
0258     size = PAGE_ALIGN(size);
0259 
0260     ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
0261                       size);
0262     if (ret) {
0263         kfree(entry);
0264         return NULL;
0265     }
0266 
0267     *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
0268 
0269     return &entry->rdma_entry;
0270 }
0271 
0272 int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
0273                struct ib_udata *unused)
0274 {
0275     struct erdma_dev *dev = to_edev(ibdev);
0276 
0277     memset(attr, 0, sizeof(*attr));
0278 
0279     attr->max_mr_size = dev->attrs.max_mr_size;
0280     attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
0281     attr->vendor_part_id = dev->pdev->device;
0282     attr->hw_ver = dev->pdev->revision;
0283     attr->max_qp = dev->attrs.max_qp - 1;
0284     attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
0285     attr->max_qp_rd_atom = dev->attrs.max_ord;
0286     attr->max_qp_init_rd_atom = dev->attrs.max_ird;
0287     attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
0288     attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
0289     attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
0290     ibdev->local_dma_lkey = dev->attrs.local_dma_key;
0291     attr->max_send_sge = dev->attrs.max_send_sge;
0292     attr->max_recv_sge = dev->attrs.max_recv_sge;
0293     attr->max_sge_rd = dev->attrs.max_sge_rd;
0294     attr->max_cq = dev->attrs.max_cq - 1;
0295     attr->max_cqe = dev->attrs.max_cqe;
0296     attr->max_mr = dev->attrs.max_mr;
0297     attr->max_pd = dev->attrs.max_pd;
0298     attr->max_mw = dev->attrs.max_mw;
0299     attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
0300     attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
0301     attr->fw_ver = dev->attrs.fw_version;
0302 
0303     if (dev->netdev)
0304         addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
0305                     dev->netdev->dev_addr);
0306 
0307     return 0;
0308 }
0309 
0310 int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
0311             union ib_gid *gid)
0312 {
0313     struct erdma_dev *dev = to_edev(ibdev);
0314 
0315     memset(gid, 0, sizeof(*gid));
0316     ether_addr_copy(gid->raw, dev->attrs.peer_addr);
0317 
0318     return 0;
0319 }
0320 
0321 int erdma_query_port(struct ib_device *ibdev, u32 port,
0322              struct ib_port_attr *attr)
0323 {
0324     struct erdma_dev *dev = to_edev(ibdev);
0325     struct net_device *ndev = dev->netdev;
0326 
0327     memset(attr, 0, sizeof(*attr));
0328 
0329     attr->gid_tbl_len = 1;
0330     attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
0331     attr->max_msg_sz = -1;
0332 
0333     if (!ndev)
0334         goto out;
0335 
0336     ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
0337     attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
0338     attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
0339     if (netif_running(ndev) && netif_carrier_ok(ndev))
0340         dev->state = IB_PORT_ACTIVE;
0341     else
0342         dev->state = IB_PORT_DOWN;
0343     attr->state = dev->state;
0344 
0345 out:
0346     if (dev->state == IB_PORT_ACTIVE)
0347         attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
0348     else
0349         attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
0350 
0351     return 0;
0352 }
0353 
0354 int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
0355                  struct ib_port_immutable *port_immutable)
0356 {
0357     port_immutable->gid_tbl_len = 1;
0358     port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
0359 
0360     return 0;
0361 }
0362 
0363 int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0364 {
0365     struct erdma_pd *pd = to_epd(ibpd);
0366     struct erdma_dev *dev = to_edev(ibpd->device);
0367     int pdn;
0368 
0369     pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
0370     if (pdn < 0)
0371         return pdn;
0372 
0373     pd->pdn = pdn;
0374 
0375     return 0;
0376 }
0377 
0378 int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0379 {
0380     struct erdma_pd *pd = to_epd(ibpd);
0381     struct erdma_dev *dev = to_edev(ibpd->device);
0382 
0383     erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
0384 
0385     return 0;
0386 }
0387 
0388 static int erdma_qp_validate_cap(struct erdma_dev *dev,
0389                  struct ib_qp_init_attr *attrs)
0390 {
0391     if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
0392         (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
0393         (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
0394         (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
0395         (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
0396         !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
0397         return -EINVAL;
0398     }
0399 
0400     return 0;
0401 }
0402 
0403 static int erdma_qp_validate_attr(struct erdma_dev *dev,
0404                   struct ib_qp_init_attr *attrs)
0405 {
0406     if (attrs->qp_type != IB_QPT_RC)
0407         return -EOPNOTSUPP;
0408 
0409     if (attrs->srq)
0410         return -EOPNOTSUPP;
0411 
0412     if (!attrs->send_cq || !attrs->recv_cq)
0413         return -EOPNOTSUPP;
0414 
0415     return 0;
0416 }
0417 
0418 static void free_kernel_qp(struct erdma_qp *qp)
0419 {
0420     struct erdma_dev *dev = qp->dev;
0421 
0422     vfree(qp->kern_qp.swr_tbl);
0423     vfree(qp->kern_qp.rwr_tbl);
0424 
0425     if (qp->kern_qp.sq_buf)
0426         dma_free_coherent(
0427             &dev->pdev->dev,
0428             WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
0429             qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
0430 
0431     if (qp->kern_qp.rq_buf)
0432         dma_free_coherent(
0433             &dev->pdev->dev,
0434             WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
0435             qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
0436 }
0437 
0438 static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
0439               struct ib_qp_init_attr *attrs)
0440 {
0441     struct erdma_kqp *kqp = &qp->kern_qp;
0442     int size;
0443 
0444     if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
0445         kqp->sig_all = 1;
0446 
0447     kqp->sq_pi = 0;
0448     kqp->sq_ci = 0;
0449     kqp->rq_pi = 0;
0450     kqp->rq_ci = 0;
0451     kqp->hw_sq_db =
0452         dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
0453     kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
0454 
0455     kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64));
0456     kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64));
0457     if (!kqp->swr_tbl || !kqp->rwr_tbl)
0458         goto err_out;
0459 
0460     size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
0461     kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
0462                      &kqp->sq_buf_dma_addr, GFP_KERNEL);
0463     if (!kqp->sq_buf)
0464         goto err_out;
0465 
0466     size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
0467     kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
0468                      &kqp->rq_buf_dma_addr, GFP_KERNEL);
0469     if (!kqp->rq_buf)
0470         goto err_out;
0471 
0472     kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
0473     kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
0474 
0475     return 0;
0476 
0477 err_out:
0478     free_kernel_qp(qp);
0479     return -ENOMEM;
0480 }
0481 
0482 static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
0483                u64 start, u64 len, int access, u64 virt,
0484                unsigned long req_page_size, u8 force_indirect_mtt)
0485 {
0486     struct ib_block_iter biter;
0487     uint64_t *phy_addr = NULL;
0488     int ret = 0;
0489 
0490     mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
0491     if (IS_ERR(mem->umem)) {
0492         ret = PTR_ERR(mem->umem);
0493         mem->umem = NULL;
0494         return ret;
0495     }
0496 
0497     mem->va = virt;
0498     mem->len = len;
0499     mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
0500     mem->page_offset = start & (mem->page_size - 1);
0501     mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
0502     mem->page_cnt = mem->mtt_nents;
0503 
0504     if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES ||
0505         force_indirect_mtt) {
0506         mem->mtt_type = ERDMA_MR_INDIRECT_MTT;
0507         mem->mtt_buf =
0508             alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL);
0509         if (!mem->mtt_buf) {
0510             ret = -ENOMEM;
0511             goto error_ret;
0512         }
0513         phy_addr = mem->mtt_buf;
0514     } else {
0515         mem->mtt_type = ERDMA_MR_INLINE_MTT;
0516         phy_addr = mem->mtt_entry;
0517     }
0518 
0519     rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) {
0520         *phy_addr = rdma_block_iter_dma_address(&biter);
0521         phy_addr++;
0522     }
0523 
0524     if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) {
0525         mem->mtt_entry[0] =
0526             dma_map_single(&dev->pdev->dev, mem->mtt_buf,
0527                        MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
0528         if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) {
0529             free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
0530             mem->mtt_buf = NULL;
0531             ret = -ENOMEM;
0532             goto error_ret;
0533         }
0534     }
0535 
0536     return 0;
0537 
0538 error_ret:
0539     if (mem->umem) {
0540         ib_umem_release(mem->umem);
0541         mem->umem = NULL;
0542     }
0543 
0544     return ret;
0545 }
0546 
0547 static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
0548 {
0549     if (mem->mtt_buf) {
0550         dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0],
0551                  MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
0552         free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
0553     }
0554 
0555     if (mem->umem) {
0556         ib_umem_release(mem->umem);
0557         mem->umem = NULL;
0558     }
0559 }
0560 
0561 static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
0562                     u64 dbrecords_va,
0563                     struct erdma_user_dbrecords_page **dbr_page,
0564                     dma_addr_t *dma_addr)
0565 {
0566     struct erdma_user_dbrecords_page *page = NULL;
0567     int rv = 0;
0568 
0569     mutex_lock(&ctx->dbrecords_page_mutex);
0570 
0571     list_for_each_entry(page, &ctx->dbrecords_page_list, list)
0572         if (page->va == (dbrecords_va & PAGE_MASK))
0573             goto found;
0574 
0575     page = kmalloc(sizeof(*page), GFP_KERNEL);
0576     if (!page) {
0577         rv = -ENOMEM;
0578         goto out;
0579     }
0580 
0581     page->va = (dbrecords_va & PAGE_MASK);
0582     page->refcnt = 0;
0583 
0584     page->umem = ib_umem_get(ctx->ibucontext.device,
0585                  dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
0586     if (IS_ERR(page->umem)) {
0587         rv = PTR_ERR(page->umem);
0588         kfree(page);
0589         goto out;
0590     }
0591 
0592     list_add(&page->list, &ctx->dbrecords_page_list);
0593 
0594 found:
0595     *dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
0596             (dbrecords_va & ~PAGE_MASK);
0597     *dbr_page = page;
0598     page->refcnt++;
0599 
0600 out:
0601     mutex_unlock(&ctx->dbrecords_page_mutex);
0602     return rv;
0603 }
0604 
0605 static void
0606 erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
0607                struct erdma_user_dbrecords_page **dbr_page)
0608 {
0609     if (!ctx || !(*dbr_page))
0610         return;
0611 
0612     mutex_lock(&ctx->dbrecords_page_mutex);
0613     if (--(*dbr_page)->refcnt == 0) {
0614         list_del(&(*dbr_page)->list);
0615         ib_umem_release((*dbr_page)->umem);
0616         kfree(*dbr_page);
0617     }
0618 
0619     *dbr_page = NULL;
0620     mutex_unlock(&ctx->dbrecords_page_mutex);
0621 }
0622 
0623 static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
0624             u64 va, u32 len, u64 db_info_va)
0625 {
0626     dma_addr_t db_info_dma_addr;
0627     u32 rq_offset;
0628     int ret;
0629 
0630     if (len < (PAGE_ALIGN(qp->attrs.sq_size * SQEBB_SIZE) +
0631            qp->attrs.rq_size * RQE_SIZE))
0632         return -EINVAL;
0633 
0634     ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va,
0635                   qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
0636                   (SZ_1M - SZ_4K), 1);
0637     if (ret)
0638         return ret;
0639 
0640     rq_offset = PAGE_ALIGN(qp->attrs.sq_size << SQEBB_SHIFT);
0641     qp->user_qp.rq_offset = rq_offset;
0642 
0643     ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset,
0644                   qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
0645                   (SZ_1M - SZ_4K), 1);
0646     if (ret)
0647         goto put_sq_mtt;
0648 
0649     ret = erdma_map_user_dbrecords(uctx, db_info_va,
0650                        &qp->user_qp.user_dbr_page,
0651                        &db_info_dma_addr);
0652     if (ret)
0653         goto put_rq_mtt;
0654 
0655     qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
0656     qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
0657 
0658     return 0;
0659 
0660 put_rq_mtt:
0661     put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
0662 
0663 put_sq_mtt:
0664     put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
0665 
0666     return ret;
0667 }
0668 
0669 static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
0670 {
0671     put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
0672     put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
0673     erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
0674 }
0675 
0676 int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
0677             struct ib_udata *udata)
0678 {
0679     struct erdma_qp *qp = to_eqp(ibqp);
0680     struct erdma_dev *dev = to_edev(ibqp->device);
0681     struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
0682         udata, struct erdma_ucontext, ibucontext);
0683     struct erdma_ureq_create_qp ureq;
0684     struct erdma_uresp_create_qp uresp;
0685     int ret;
0686 
0687     ret = erdma_qp_validate_cap(dev, attrs);
0688     if (ret)
0689         goto err_out;
0690 
0691     ret = erdma_qp_validate_attr(dev, attrs);
0692     if (ret)
0693         goto err_out;
0694 
0695     qp->scq = to_ecq(attrs->send_cq);
0696     qp->rcq = to_ecq(attrs->recv_cq);
0697     qp->dev = dev;
0698     qp->attrs.cc = dev->attrs.cc;
0699 
0700     init_rwsem(&qp->state_lock);
0701     kref_init(&qp->ref);
0702     init_completion(&qp->safe_free);
0703 
0704     ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
0705                   XA_LIMIT(1, dev->attrs.max_qp - 1),
0706                   &dev->next_alloc_qpn, GFP_KERNEL);
0707     if (ret < 0) {
0708         ret = -ENOMEM;
0709         goto err_out;
0710     }
0711 
0712     qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
0713                            ERDMA_MAX_WQEBB_PER_SQE);
0714     qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
0715 
0716     if (uctx) {
0717         ret = ib_copy_from_udata(&ureq, udata,
0718                      min(sizeof(ureq), udata->inlen));
0719         if (ret)
0720             goto err_out_xa;
0721 
0722         ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
0723                    ureq.db_record_va);
0724         if (ret)
0725             goto err_out_xa;
0726 
0727         memset(&uresp, 0, sizeof(uresp));
0728 
0729         uresp.num_sqe = qp->attrs.sq_size;
0730         uresp.num_rqe = qp->attrs.rq_size;
0731         uresp.qp_id = QP_ID(qp);
0732         uresp.rq_offset = qp->user_qp.rq_offset;
0733 
0734         ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
0735         if (ret)
0736             goto err_out_cmd;
0737     } else {
0738         init_kernel_qp(dev, qp, attrs);
0739     }
0740 
0741     qp->attrs.max_send_sge = attrs->cap.max_send_sge;
0742     qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
0743     qp->attrs.state = ERDMA_QP_STATE_IDLE;
0744 
0745     ret = create_qp_cmd(dev, qp);
0746     if (ret)
0747         goto err_out_cmd;
0748 
0749     spin_lock_init(&qp->lock);
0750 
0751     return 0;
0752 
0753 err_out_cmd:
0754     if (uctx)
0755         free_user_qp(qp, uctx);
0756     else
0757         free_kernel_qp(qp);
0758 err_out_xa:
0759     xa_erase(&dev->qp_xa, QP_ID(qp));
0760 err_out:
0761     return ret;
0762 }
0763 
0764 static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
0765 {
0766     int stag_idx;
0767 
0768     stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
0769     if (stag_idx < 0)
0770         return stag_idx;
0771 
0772     /* For now, we always let key field be zero. */
0773     *stag = (stag_idx << 8);
0774 
0775     return 0;
0776 }
0777 
0778 struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
0779 {
0780     struct erdma_dev *dev = to_edev(ibpd->device);
0781     struct erdma_mr *mr;
0782     u32 stag;
0783     int ret;
0784 
0785     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0786     if (!mr)
0787         return ERR_PTR(-ENOMEM);
0788 
0789     ret = erdma_create_stag(dev, &stag);
0790     if (ret)
0791         goto out_free;
0792 
0793     mr->type = ERDMA_MR_TYPE_DMA;
0794 
0795     mr->ibmr.lkey = stag;
0796     mr->ibmr.rkey = stag;
0797     mr->ibmr.pd = ibpd;
0798     mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
0799     ret = regmr_cmd(dev, mr);
0800     if (ret)
0801         goto out_remove_stag;
0802 
0803     return &mr->ibmr;
0804 
0805 out_remove_stag:
0806     erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
0807                mr->ibmr.lkey >> 8);
0808 
0809 out_free:
0810     kfree(mr);
0811 
0812     return ERR_PTR(ret);
0813 }
0814 
0815 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
0816                 u32 max_num_sg)
0817 {
0818     struct erdma_mr *mr;
0819     struct erdma_dev *dev = to_edev(ibpd->device);
0820     int ret;
0821     u32 stag;
0822 
0823     if (mr_type != IB_MR_TYPE_MEM_REG)
0824         return ERR_PTR(-EOPNOTSUPP);
0825 
0826     if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
0827         return ERR_PTR(-EINVAL);
0828 
0829     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0830     if (!mr)
0831         return ERR_PTR(-ENOMEM);
0832 
0833     ret = erdma_create_stag(dev, &stag);
0834     if (ret)
0835         goto out_free;
0836 
0837     mr->type = ERDMA_MR_TYPE_FRMR;
0838 
0839     mr->ibmr.lkey = stag;
0840     mr->ibmr.rkey = stag;
0841     mr->ibmr.pd = ibpd;
0842     /* update it in FRMR. */
0843     mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
0844              ERDMA_MR_ACC_RW;
0845 
0846     mr->mem.page_size = PAGE_SIZE; /* update it later. */
0847     mr->mem.page_cnt = max_num_sg;
0848     mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT;
0849     mr->mem.mtt_buf =
0850         alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL);
0851     if (!mr->mem.mtt_buf) {
0852         ret = -ENOMEM;
0853         goto out_remove_stag;
0854     }
0855 
0856     mr->mem.mtt_entry[0] =
0857         dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf,
0858                    MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
0859     if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) {
0860         ret = -ENOMEM;
0861         goto out_free_mtt;
0862     }
0863 
0864     ret = regmr_cmd(dev, mr);
0865     if (ret)
0866         goto out_dma_unmap;
0867 
0868     return &mr->ibmr;
0869 
0870 out_dma_unmap:
0871     dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0],
0872              MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
0873 out_free_mtt:
0874     free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt));
0875 
0876 out_remove_stag:
0877     erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
0878                mr->ibmr.lkey >> 8);
0879 
0880 out_free:
0881     kfree(mr);
0882 
0883     return ERR_PTR(ret);
0884 }
0885 
0886 static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
0887 {
0888     struct erdma_mr *mr = to_emr(ibmr);
0889 
0890     if (mr->mem.mtt_nents >= mr->mem.page_cnt)
0891         return -1;
0892 
0893     *((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr;
0894     mr->mem.mtt_nents++;
0895 
0896     return 0;
0897 }
0898 
0899 int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
0900             unsigned int *sg_offset)
0901 {
0902     struct erdma_mr *mr = to_emr(ibmr);
0903     int num;
0904 
0905     mr->mem.mtt_nents = 0;
0906 
0907     num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
0908                  erdma_set_page);
0909 
0910     return num;
0911 }
0912 
0913 struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
0914                 u64 virt, int access, struct ib_udata *udata)
0915 {
0916     struct erdma_mr *mr = NULL;
0917     struct erdma_dev *dev = to_edev(ibpd->device);
0918     u32 stag;
0919     int ret;
0920 
0921     if (!len || len > dev->attrs.max_mr_size)
0922         return ERR_PTR(-EINVAL);
0923 
0924     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0925     if (!mr)
0926         return ERR_PTR(-ENOMEM);
0927 
0928     ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
0929                   SZ_2G - SZ_4K, 0);
0930     if (ret)
0931         goto err_out_free;
0932 
0933     ret = erdma_create_stag(dev, &stag);
0934     if (ret)
0935         goto err_out_put_mtt;
0936 
0937     mr->ibmr.lkey = mr->ibmr.rkey = stag;
0938     mr->ibmr.pd = ibpd;
0939     mr->mem.va = virt;
0940     mr->mem.len = len;
0941     mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
0942     mr->valid = 1;
0943     mr->type = ERDMA_MR_TYPE_NORMAL;
0944 
0945     ret = regmr_cmd(dev, mr);
0946     if (ret)
0947         goto err_out_mr;
0948 
0949     return &mr->ibmr;
0950 
0951 err_out_mr:
0952     erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
0953                mr->ibmr.lkey >> 8);
0954 
0955 err_out_put_mtt:
0956     put_mtt_entries(dev, &mr->mem);
0957 
0958 err_out_free:
0959     kfree(mr);
0960 
0961     return ERR_PTR(ret);
0962 }
0963 
0964 int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
0965 {
0966     struct erdma_mr *mr;
0967     struct erdma_dev *dev = to_edev(ibmr->device);
0968     struct erdma_cmdq_dereg_mr_req req;
0969     int ret;
0970 
0971     mr = to_emr(ibmr);
0972 
0973     erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
0974                 CMDQ_OPCODE_DEREG_MR);
0975 
0976     req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
0977           FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
0978 
0979     ret = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
0980                   NULL);
0981     if (ret)
0982         return ret;
0983 
0984     erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
0985 
0986     put_mtt_entries(dev, &mr->mem);
0987 
0988     kfree(mr);
0989     return 0;
0990 }
0991 
0992 int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
0993 {
0994     struct erdma_cq *cq = to_ecq(ibcq);
0995     struct erdma_dev *dev = to_edev(ibcq->device);
0996     struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
0997         udata, struct erdma_ucontext, ibucontext);
0998     int err;
0999     struct erdma_cmdq_destroy_cq_req req;
1000 
1001     erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1002                 CMDQ_OPCODE_DESTROY_CQ);
1003     req.cqn = cq->cqn;
1004 
1005     err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
1006                   NULL);
1007     if (err)
1008         return err;
1009 
1010     if (rdma_is_kernel_res(&cq->ibcq.res)) {
1011         dma_free_coherent(&dev->pdev->dev,
1012                   WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1013                   cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1014     } else {
1015         erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1016         put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1017     }
1018 
1019     xa_erase(&dev->cq_xa, cq->cqn);
1020 
1021     return 0;
1022 }
1023 
1024 int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1025 {
1026     struct erdma_qp *qp = to_eqp(ibqp);
1027     struct erdma_dev *dev = to_edev(ibqp->device);
1028     struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1029         udata, struct erdma_ucontext, ibucontext);
1030     struct erdma_qp_attrs qp_attrs;
1031     int err;
1032     struct erdma_cmdq_destroy_qp_req req;
1033 
1034     down_write(&qp->state_lock);
1035     qp_attrs.state = ERDMA_QP_STATE_ERROR;
1036     erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
1037     up_write(&qp->state_lock);
1038 
1039     erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1040                 CMDQ_OPCODE_DESTROY_QP);
1041     req.qpn = QP_ID(qp);
1042 
1043     err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
1044                   NULL);
1045     if (err)
1046         return err;
1047 
1048     erdma_qp_put(qp);
1049     wait_for_completion(&qp->safe_free);
1050 
1051     if (rdma_is_kernel_res(&qp->ibqp.res)) {
1052         vfree(qp->kern_qp.swr_tbl);
1053         vfree(qp->kern_qp.rwr_tbl);
1054         dma_free_coherent(
1055             &dev->pdev->dev,
1056             WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
1057             qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
1058         dma_free_coherent(
1059             &dev->pdev->dev,
1060             WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
1061             qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
1062     } else {
1063         put_mtt_entries(dev, &qp->user_qp.sq_mtt);
1064         put_mtt_entries(dev, &qp->user_qp.rq_mtt);
1065         erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1066     }
1067 
1068     if (qp->cep)
1069         erdma_cep_put(qp->cep);
1070     xa_erase(&dev->qp_xa, QP_ID(qp));
1071 
1072     return 0;
1073 }
1074 
1075 void erdma_qp_get_ref(struct ib_qp *ibqp)
1076 {
1077     erdma_qp_get(to_eqp(ibqp));
1078 }
1079 
1080 void erdma_qp_put_ref(struct ib_qp *ibqp)
1081 {
1082     erdma_qp_put(to_eqp(ibqp));
1083 }
1084 
1085 int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1086 {
1087     struct rdma_user_mmap_entry *rdma_entry;
1088     struct erdma_user_mmap_entry *entry;
1089     pgprot_t prot;
1090     int err;
1091 
1092     rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1093     if (!rdma_entry)
1094         return -EINVAL;
1095 
1096     entry = to_emmap(rdma_entry);
1097 
1098     switch (entry->mmap_flag) {
1099     case ERDMA_MMAP_IO_NC:
1100         /* map doorbell. */
1101         prot = pgprot_device(vma->vm_page_prot);
1102         break;
1103     default:
1104         return -EINVAL;
1105     }
1106 
1107     err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1108                 prot, rdma_entry);
1109 
1110     rdma_user_mmap_entry_put(rdma_entry);
1111     return err;
1112 }
1113 
1114 void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1115 {
1116     struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1117 
1118     kfree(entry);
1119 }
1120 
1121 #define ERDMA_SDB_PAGE 0
1122 #define ERDMA_SDB_ENTRY 1
1123 #define ERDMA_SDB_SHARED 2
1124 
1125 static void alloc_db_resources(struct erdma_dev *dev,
1126                    struct erdma_ucontext *ctx)
1127 {
1128     u32 bitmap_idx;
1129     struct erdma_devattr *attrs = &dev->attrs;
1130 
1131     if (attrs->disable_dwqe)
1132         goto alloc_normal_db;
1133 
1134     /* Try to alloc independent SDB page. */
1135     spin_lock(&dev->db_bitmap_lock);
1136     bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages);
1137     if (bitmap_idx != attrs->dwqe_pages) {
1138         set_bit(bitmap_idx, dev->sdb_page);
1139         spin_unlock(&dev->db_bitmap_lock);
1140 
1141         ctx->sdb_type = ERDMA_SDB_PAGE;
1142         ctx->sdb_idx = bitmap_idx;
1143         ctx->sdb_page_idx = bitmap_idx;
1144         ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1145                (bitmap_idx << PAGE_SHIFT);
1146         ctx->sdb_page_off = 0;
1147 
1148         return;
1149     }
1150 
1151     bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries);
1152     if (bitmap_idx != attrs->dwqe_entries) {
1153         set_bit(bitmap_idx, dev->sdb_entry);
1154         spin_unlock(&dev->db_bitmap_lock);
1155 
1156         ctx->sdb_type = ERDMA_SDB_ENTRY;
1157         ctx->sdb_idx = bitmap_idx;
1158         ctx->sdb_page_idx = attrs->dwqe_pages +
1159                     bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1160         ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1161 
1162         ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1163                (ctx->sdb_page_idx << PAGE_SHIFT);
1164 
1165         return;
1166     }
1167 
1168     spin_unlock(&dev->db_bitmap_lock);
1169 
1170 alloc_normal_db:
1171     ctx->sdb_type = ERDMA_SDB_SHARED;
1172     ctx->sdb_idx = 0;
1173     ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX;
1174     ctx->sdb_page_off = 0;
1175 
1176     ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT);
1177 }
1178 
1179 static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1180 {
1181     rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1182     rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1183     rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1184 }
1185 
1186 int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1187 {
1188     struct erdma_ucontext *ctx = to_ectx(ibctx);
1189     struct erdma_dev *dev = to_edev(ibctx->device);
1190     int ret;
1191     struct erdma_uresp_alloc_ctx uresp = {};
1192 
1193     if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1194         ret = -ENOMEM;
1195         goto err_out;
1196     }
1197 
1198     INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1199     mutex_init(&ctx->dbrecords_page_mutex);
1200 
1201     alloc_db_resources(dev, ctx);
1202 
1203     ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1204     ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1205 
1206     if (udata->outlen < sizeof(uresp)) {
1207         ret = -EINVAL;
1208         goto err_out;
1209     }
1210 
1211     ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1212         ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1213     if (!ctx->sq_db_mmap_entry) {
1214         ret = -ENOMEM;
1215         goto err_out;
1216     }
1217 
1218     ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1219         ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1220     if (!ctx->rq_db_mmap_entry) {
1221         ret = -EINVAL;
1222         goto err_out;
1223     }
1224 
1225     ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1226         ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1227     if (!ctx->cq_db_mmap_entry) {
1228         ret = -EINVAL;
1229         goto err_out;
1230     }
1231 
1232     uresp.dev_id = dev->pdev->device;
1233     uresp.sdb_type = ctx->sdb_type;
1234     uresp.sdb_offset = ctx->sdb_page_off;
1235 
1236     ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1237     if (ret)
1238         goto err_out;
1239 
1240     return 0;
1241 
1242 err_out:
1243     erdma_uctx_user_mmap_entries_remove(ctx);
1244     atomic_dec(&dev->num_ctx);
1245     return ret;
1246 }
1247 
1248 void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1249 {
1250     struct erdma_ucontext *ctx = to_ectx(ibctx);
1251     struct erdma_dev *dev = to_edev(ibctx->device);
1252 
1253     spin_lock(&dev->db_bitmap_lock);
1254     if (ctx->sdb_type == ERDMA_SDB_PAGE)
1255         clear_bit(ctx->sdb_idx, dev->sdb_page);
1256     else if (ctx->sdb_type == ERDMA_SDB_ENTRY)
1257         clear_bit(ctx->sdb_idx, dev->sdb_entry);
1258 
1259     erdma_uctx_user_mmap_entries_remove(ctx);
1260 
1261     spin_unlock(&dev->db_bitmap_lock);
1262 
1263     atomic_dec(&dev->num_ctx);
1264 }
1265 
1266 static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
1267     [IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
1268     [IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
1269     [IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
1270     [IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
1271     [IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
1272     [IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
1273     [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
1274 };
1275 
1276 int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1277             struct ib_udata *udata)
1278 {
1279     struct erdma_qp_attrs new_attrs;
1280     enum erdma_qp_attr_mask erdma_attr_mask = 0;
1281     struct erdma_qp *qp = to_eqp(ibqp);
1282     int ret = 0;
1283 
1284     if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1285         return -EOPNOTSUPP;
1286 
1287     memset(&new_attrs, 0, sizeof(new_attrs));
1288 
1289     if (attr_mask & IB_QP_STATE) {
1290         new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
1291 
1292         erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
1293     }
1294 
1295     down_write(&qp->state_lock);
1296 
1297     ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
1298 
1299     up_write(&qp->state_lock);
1300 
1301     return ret;
1302 }
1303 
1304 int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1305            int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1306 {
1307     struct erdma_qp *qp;
1308     struct erdma_dev *dev;
1309 
1310     if (ibqp && qp_attr && qp_init_attr) {
1311         qp = to_eqp(ibqp);
1312         dev = to_edev(ibqp->device);
1313     } else {
1314         return -EINVAL;
1315     }
1316 
1317     qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1318     qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1319 
1320     qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1321     qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1322     qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1323     qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1324 
1325     qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1326     qp_attr->max_rd_atomic = qp->attrs.irq_size;
1327     qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1328 
1329     qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1330                    IB_ACCESS_REMOTE_WRITE |
1331                    IB_ACCESS_REMOTE_READ;
1332 
1333     qp_init_attr->cap = qp_attr->cap;
1334 
1335     return 0;
1336 }
1337 
1338 static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1339                   struct erdma_ureq_create_cq *ureq)
1340 {
1341     int ret;
1342     struct erdma_dev *dev = to_edev(cq->ibcq.device);
1343 
1344     ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va,
1345                   ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1346                   1);
1347     if (ret)
1348         return ret;
1349 
1350     ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1351                        &cq->user_cq.user_dbr_page,
1352                        &cq->user_cq.db_info_dma_addr);
1353     if (ret)
1354         put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1355 
1356     return ret;
1357 }
1358 
1359 static int erdma_init_kernel_cq(struct erdma_cq *cq)
1360 {
1361     struct erdma_dev *dev = to_edev(cq->ibcq.device);
1362 
1363     cq->kern_cq.qbuf =
1364         dma_alloc_coherent(&dev->pdev->dev,
1365                    WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1366                    &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1367     if (!cq->kern_cq.qbuf)
1368         return -ENOMEM;
1369 
1370     cq->kern_cq.db_record =
1371         (u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
1372     spin_lock_init(&cq->kern_cq.lock);
1373     /* use default cqdb addr */
1374     cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1375 
1376     return 0;
1377 }
1378 
1379 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1380             struct ib_udata *udata)
1381 {
1382     struct erdma_cq *cq = to_ecq(ibcq);
1383     struct erdma_dev *dev = to_edev(ibcq->device);
1384     unsigned int depth = attr->cqe;
1385     int ret;
1386     struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1387         udata, struct erdma_ucontext, ibucontext);
1388 
1389     if (depth > dev->attrs.max_cqe)
1390         return -EINVAL;
1391 
1392     depth = roundup_pow_of_two(depth);
1393     cq->ibcq.cqe = depth;
1394     cq->depth = depth;
1395     cq->assoc_eqn = attr->comp_vector + 1;
1396 
1397     ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1398                   XA_LIMIT(1, dev->attrs.max_cq - 1),
1399                   &dev->next_alloc_cqn, GFP_KERNEL);
1400     if (ret < 0)
1401         return ret;
1402 
1403     if (!rdma_is_kernel_res(&ibcq->res)) {
1404         struct erdma_ureq_create_cq ureq;
1405         struct erdma_uresp_create_cq uresp;
1406 
1407         ret = ib_copy_from_udata(&ureq, udata,
1408                      min(udata->inlen, sizeof(ureq)));
1409         if (ret)
1410             goto err_out_xa;
1411 
1412         ret = erdma_init_user_cq(ctx, cq, &ureq);
1413         if (ret)
1414             goto err_out_xa;
1415 
1416         uresp.cq_id = cq->cqn;
1417         uresp.num_cqe = depth;
1418 
1419         ret = ib_copy_to_udata(udata, &uresp,
1420                        min(sizeof(uresp), udata->outlen));
1421         if (ret)
1422             goto err_free_res;
1423     } else {
1424         ret = erdma_init_kernel_cq(cq);
1425         if (ret)
1426             goto err_out_xa;
1427     }
1428 
1429     ret = create_cq_cmd(dev, cq);
1430     if (ret)
1431         goto err_free_res;
1432 
1433     return 0;
1434 
1435 err_free_res:
1436     if (!rdma_is_kernel_res(&ibcq->res)) {
1437         erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1438         put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1439     } else {
1440         dma_free_coherent(&dev->pdev->dev,
1441                   WARPPED_BUFSIZE(depth << CQE_SHIFT),
1442                   cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1443     }
1444 
1445 err_out_xa:
1446     xa_erase(&dev->cq_xa, cq->cqn);
1447 
1448     return ret;
1449 }
1450 
1451 void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
1452 {
1453     struct ib_event event;
1454 
1455     event.device = &dev->ibdev;
1456     event.element.port_num = 1;
1457     event.event = reason;
1458 
1459     ib_dispatch_event(&event);
1460 }