Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /*
0003  * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
0004  */
0005 
0006 #include <linux/dma-buf.h>
0007 #include <linux/dma-resv.h>
0008 #include <linux/vmalloc.h>
0009 #include <linux/log2.h>
0010 
0011 #include <rdma/ib_addr.h>
0012 #include <rdma/ib_umem.h>
0013 #include <rdma/ib_user_verbs.h>
0014 #include <rdma/ib_verbs.h>
0015 #include <rdma/uverbs_ioctl.h>
0016 
0017 #include "efa.h"
0018 
0019 enum {
0020     EFA_MMAP_DMA_PAGE = 0,
0021     EFA_MMAP_IO_WC,
0022     EFA_MMAP_IO_NC,
0023 };
0024 
0025 #define EFA_AENQ_ENABLED_GROUPS \
0026     (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
0027      BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
0028 
0029 struct efa_user_mmap_entry {
0030     struct rdma_user_mmap_entry rdma_entry;
0031     u64 address;
0032     u8 mmap_flag;
0033 };
0034 
0035 #define EFA_DEFINE_DEVICE_STATS(op) \
0036     op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
0037     op(EFA_COMPLETED_CMDS, "completed_cmds") \
0038     op(EFA_CMDS_ERR, "cmds_err") \
0039     op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
0040     op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
0041     op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
0042     op(EFA_CREATE_QP_ERR, "create_qp_err") \
0043     op(EFA_CREATE_CQ_ERR, "create_cq_err") \
0044     op(EFA_REG_MR_ERR, "reg_mr_err") \
0045     op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
0046     op(EFA_CREATE_AH_ERR, "create_ah_err") \
0047     op(EFA_MMAP_ERR, "mmap_err")
0048 
0049 #define EFA_DEFINE_PORT_STATS(op) \
0050     op(EFA_TX_BYTES, "tx_bytes") \
0051     op(EFA_TX_PKTS, "tx_pkts") \
0052     op(EFA_RX_BYTES, "rx_bytes") \
0053     op(EFA_RX_PKTS, "rx_pkts") \
0054     op(EFA_RX_DROPS, "rx_drops") \
0055     op(EFA_SEND_BYTES, "send_bytes") \
0056     op(EFA_SEND_WRS, "send_wrs") \
0057     op(EFA_RECV_BYTES, "recv_bytes") \
0058     op(EFA_RECV_WRS, "recv_wrs") \
0059     op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
0060     op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
0061     op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
0062     op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
0063 
0064 #define EFA_STATS_ENUM(ename, name) ename,
0065 #define EFA_STATS_STR(ename, nam) \
0066     [ename].name = nam,
0067 
0068 enum efa_hw_device_stats {
0069     EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM)
0070 };
0071 
0072 static const struct rdma_stat_desc efa_device_stats_descs[] = {
0073     EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR)
0074 };
0075 
0076 enum efa_hw_port_stats {
0077     EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM)
0078 };
0079 
0080 static const struct rdma_stat_desc efa_port_stats_descs[] = {
0081     EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
0082 };
0083 
0084 #define EFA_CHUNK_PAYLOAD_SHIFT       12
0085 #define EFA_CHUNK_PAYLOAD_SIZE        BIT(EFA_CHUNK_PAYLOAD_SHIFT)
0086 #define EFA_CHUNK_PAYLOAD_PTR_SIZE    8
0087 
0088 #define EFA_CHUNK_SHIFT               12
0089 #define EFA_CHUNK_SIZE                BIT(EFA_CHUNK_SHIFT)
0090 #define EFA_CHUNK_PTR_SIZE            sizeof(struct efa_com_ctrl_buff_info)
0091 
0092 #define EFA_PTRS_PER_CHUNK \
0093     ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
0094 
0095 #define EFA_CHUNK_USED_SIZE \
0096     ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
0097 
0098 struct pbl_chunk {
0099     dma_addr_t dma_addr;
0100     u64 *buf;
0101     u32 length;
0102 };
0103 
0104 struct pbl_chunk_list {
0105     struct pbl_chunk *chunks;
0106     unsigned int size;
0107 };
0108 
0109 struct pbl_context {
0110     union {
0111         struct {
0112             dma_addr_t dma_addr;
0113         } continuous;
0114         struct {
0115             u32 pbl_buf_size_in_pages;
0116             struct scatterlist *sgl;
0117             int sg_dma_cnt;
0118             struct pbl_chunk_list chunk_list;
0119         } indirect;
0120     } phys;
0121     u64 *pbl_buf;
0122     u32 pbl_buf_size_in_bytes;
0123     u8 physically_continuous;
0124 };
0125 
0126 static inline struct efa_dev *to_edev(struct ib_device *ibdev)
0127 {
0128     return container_of(ibdev, struct efa_dev, ibdev);
0129 }
0130 
0131 static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
0132 {
0133     return container_of(ibucontext, struct efa_ucontext, ibucontext);
0134 }
0135 
0136 static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
0137 {
0138     return container_of(ibpd, struct efa_pd, ibpd);
0139 }
0140 
0141 static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
0142 {
0143     return container_of(ibmr, struct efa_mr, ibmr);
0144 }
0145 
0146 static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
0147 {
0148     return container_of(ibqp, struct efa_qp, ibqp);
0149 }
0150 
0151 static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
0152 {
0153     return container_of(ibcq, struct efa_cq, ibcq);
0154 }
0155 
0156 static inline struct efa_ah *to_eah(struct ib_ah *ibah)
0157 {
0158     return container_of(ibah, struct efa_ah, ibah);
0159 }
0160 
0161 static inline struct efa_user_mmap_entry *
0162 to_emmap(struct rdma_user_mmap_entry *rdma_entry)
0163 {
0164     return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
0165 }
0166 
0167 #define EFA_DEV_CAP(dev, cap) \
0168     ((dev)->dev_attr.device_caps & \
0169      EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
0170 
0171 #define is_reserved_cleared(reserved) \
0172     !memchr_inv(reserved, 0, sizeof(reserved))
0173 
0174 static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
0175                    size_t size, enum dma_data_direction dir)
0176 {
0177     void *addr;
0178 
0179     addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
0180     if (!addr)
0181         return NULL;
0182 
0183     *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
0184     if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
0185         ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
0186         free_pages_exact(addr, size);
0187         return NULL;
0188     }
0189 
0190     return addr;
0191 }
0192 
0193 static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr,
0194                 dma_addr_t dma_addr,
0195                 size_t size, enum dma_data_direction dir)
0196 {
0197     dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir);
0198     free_pages_exact(cpu_addr, size);
0199 }
0200 
0201 int efa_query_device(struct ib_device *ibdev,
0202              struct ib_device_attr *props,
0203              struct ib_udata *udata)
0204 {
0205     struct efa_com_get_device_attr_result *dev_attr;
0206     struct efa_ibv_ex_query_device_resp resp = {};
0207     struct efa_dev *dev = to_edev(ibdev);
0208     int err;
0209 
0210     if (udata && udata->inlen &&
0211         !ib_is_udata_cleared(udata, 0, udata->inlen)) {
0212         ibdev_dbg(ibdev,
0213               "Incompatible ABI params, udata not cleared\n");
0214         return -EINVAL;
0215     }
0216 
0217     dev_attr = &dev->dev_attr;
0218 
0219     memset(props, 0, sizeof(*props));
0220     props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
0221     props->page_size_cap = dev_attr->page_size_cap;
0222     props->vendor_id = dev->pdev->vendor;
0223     props->vendor_part_id = dev->pdev->device;
0224     props->hw_ver = dev->pdev->subsystem_device;
0225     props->max_qp = dev_attr->max_qp;
0226     props->max_cq = dev_attr->max_cq;
0227     props->max_pd = dev_attr->max_pd;
0228     props->max_mr = dev_attr->max_mr;
0229     props->max_ah = dev_attr->max_ah;
0230     props->max_cqe = dev_attr->max_cq_depth;
0231     props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
0232                  dev_attr->max_rq_depth);
0233     props->max_send_sge = dev_attr->max_sq_sge;
0234     props->max_recv_sge = dev_attr->max_rq_sge;
0235     props->max_sge_rd = dev_attr->max_wr_rdma_sge;
0236     props->max_pkeys = 1;
0237 
0238     if (udata && udata->outlen) {
0239         resp.max_sq_sge = dev_attr->max_sq_sge;
0240         resp.max_rq_sge = dev_attr->max_rq_sge;
0241         resp.max_sq_wr = dev_attr->max_sq_depth;
0242         resp.max_rq_wr = dev_attr->max_rq_depth;
0243         resp.max_rdma_size = dev_attr->max_rdma_size;
0244 
0245         if (EFA_DEV_CAP(dev, RDMA_READ))
0246             resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
0247 
0248         if (EFA_DEV_CAP(dev, RNR_RETRY))
0249             resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
0250 
0251         if (dev->neqs)
0252             resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
0253 
0254         err = ib_copy_to_udata(udata, &resp,
0255                        min(sizeof(resp), udata->outlen));
0256         if (err) {
0257             ibdev_dbg(ibdev,
0258                   "Failed to copy udata for query_device\n");
0259             return err;
0260         }
0261     }
0262 
0263     return 0;
0264 }
0265 
0266 int efa_query_port(struct ib_device *ibdev, u32 port,
0267            struct ib_port_attr *props)
0268 {
0269     struct efa_dev *dev = to_edev(ibdev);
0270 
0271     props->lmc = 1;
0272 
0273     props->state = IB_PORT_ACTIVE;
0274     props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
0275     props->gid_tbl_len = 1;
0276     props->pkey_tbl_len = 1;
0277     props->active_speed = IB_SPEED_EDR;
0278     props->active_width = IB_WIDTH_4X;
0279     props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
0280     props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
0281     props->max_msg_sz = dev->dev_attr.mtu;
0282     props->max_vl_num = 1;
0283 
0284     return 0;
0285 }
0286 
0287 int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
0288          int qp_attr_mask,
0289          struct ib_qp_init_attr *qp_init_attr)
0290 {
0291     struct efa_dev *dev = to_edev(ibqp->device);
0292     struct efa_com_query_qp_params params = {};
0293     struct efa_com_query_qp_result result;
0294     struct efa_qp *qp = to_eqp(ibqp);
0295     int err;
0296 
0297 #define EFA_QUERY_QP_SUPP_MASK \
0298     (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
0299      IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
0300 
0301     if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
0302         ibdev_dbg(&dev->ibdev,
0303               "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
0304               qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
0305         return -EOPNOTSUPP;
0306     }
0307 
0308     memset(qp_attr, 0, sizeof(*qp_attr));
0309     memset(qp_init_attr, 0, sizeof(*qp_init_attr));
0310 
0311     params.qp_handle = qp->qp_handle;
0312     err = efa_com_query_qp(&dev->edev, &params, &result);
0313     if (err)
0314         return err;
0315 
0316     qp_attr->qp_state = result.qp_state;
0317     qp_attr->qkey = result.qkey;
0318     qp_attr->sq_psn = result.sq_psn;
0319     qp_attr->sq_draining = result.sq_draining;
0320     qp_attr->port_num = 1;
0321     qp_attr->rnr_retry = result.rnr_retry;
0322 
0323     qp_attr->cap.max_send_wr = qp->max_send_wr;
0324     qp_attr->cap.max_recv_wr = qp->max_recv_wr;
0325     qp_attr->cap.max_send_sge = qp->max_send_sge;
0326     qp_attr->cap.max_recv_sge = qp->max_recv_sge;
0327     qp_attr->cap.max_inline_data = qp->max_inline_data;
0328 
0329     qp_init_attr->qp_type = ibqp->qp_type;
0330     qp_init_attr->recv_cq = ibqp->recv_cq;
0331     qp_init_attr->send_cq = ibqp->send_cq;
0332     qp_init_attr->qp_context = ibqp->qp_context;
0333     qp_init_attr->cap = qp_attr->cap;
0334 
0335     return 0;
0336 }
0337 
0338 int efa_query_gid(struct ib_device *ibdev, u32 port, int index,
0339           union ib_gid *gid)
0340 {
0341     struct efa_dev *dev = to_edev(ibdev);
0342 
0343     memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
0344 
0345     return 0;
0346 }
0347 
0348 int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
0349            u16 *pkey)
0350 {
0351     if (index > 0)
0352         return -EINVAL;
0353 
0354     *pkey = 0xffff;
0355     return 0;
0356 }
0357 
0358 static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
0359 {
0360     struct efa_com_dealloc_pd_params params = {
0361         .pdn = pdn,
0362     };
0363 
0364     return efa_com_dealloc_pd(&dev->edev, &params);
0365 }
0366 
0367 int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0368 {
0369     struct efa_dev *dev = to_edev(ibpd->device);
0370     struct efa_ibv_alloc_pd_resp resp = {};
0371     struct efa_com_alloc_pd_result result;
0372     struct efa_pd *pd = to_epd(ibpd);
0373     int err;
0374 
0375     if (udata->inlen &&
0376         !ib_is_udata_cleared(udata, 0, udata->inlen)) {
0377         ibdev_dbg(&dev->ibdev,
0378               "Incompatible ABI params, udata not cleared\n");
0379         err = -EINVAL;
0380         goto err_out;
0381     }
0382 
0383     err = efa_com_alloc_pd(&dev->edev, &result);
0384     if (err)
0385         goto err_out;
0386 
0387     pd->pdn = result.pdn;
0388     resp.pdn = result.pdn;
0389 
0390     if (udata->outlen) {
0391         err = ib_copy_to_udata(udata, &resp,
0392                        min(sizeof(resp), udata->outlen));
0393         if (err) {
0394             ibdev_dbg(&dev->ibdev,
0395                   "Failed to copy udata for alloc_pd\n");
0396             goto err_dealloc_pd;
0397         }
0398     }
0399 
0400     ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
0401 
0402     return 0;
0403 
0404 err_dealloc_pd:
0405     efa_pd_dealloc(dev, result.pdn);
0406 err_out:
0407     atomic64_inc(&dev->stats.alloc_pd_err);
0408     return err;
0409 }
0410 
0411 int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0412 {
0413     struct efa_dev *dev = to_edev(ibpd->device);
0414     struct efa_pd *pd = to_epd(ibpd);
0415 
0416     ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
0417     efa_pd_dealloc(dev, pd->pdn);
0418     return 0;
0419 }
0420 
0421 static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
0422 {
0423     struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
0424 
0425     return efa_com_destroy_qp(&dev->edev, &params);
0426 }
0427 
0428 static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp)
0429 {
0430     rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
0431     rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
0432     rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry);
0433     rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry);
0434 }
0435 
0436 int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
0437 {
0438     struct efa_dev *dev = to_edev(ibqp->pd->device);
0439     struct efa_qp *qp = to_eqp(ibqp);
0440     int err;
0441 
0442     ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
0443 
0444     efa_qp_user_mmap_entries_remove(qp);
0445 
0446     err = efa_destroy_qp_handle(dev, qp->qp_handle);
0447     if (err)
0448         return err;
0449 
0450     if (qp->rq_cpu_addr) {
0451         ibdev_dbg(&dev->ibdev,
0452               "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
0453               qp->rq_cpu_addr, qp->rq_size,
0454               &qp->rq_dma_addr);
0455         efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
0456                 qp->rq_size, DMA_TO_DEVICE);
0457     }
0458 
0459     return 0;
0460 }
0461 
0462 static struct rdma_user_mmap_entry*
0463 efa_user_mmap_entry_insert(struct ib_ucontext *ucontext,
0464                u64 address, size_t length,
0465                u8 mmap_flag, u64 *offset)
0466 {
0467     struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
0468     int err;
0469 
0470     if (!entry)
0471         return NULL;
0472 
0473     entry->address = address;
0474     entry->mmap_flag = mmap_flag;
0475 
0476     err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry,
0477                       length);
0478     if (err) {
0479         kfree(entry);
0480         return NULL;
0481     }
0482     *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
0483 
0484     return &entry->rdma_entry;
0485 }
0486 
0487 static int qp_mmap_entries_setup(struct efa_qp *qp,
0488                  struct efa_dev *dev,
0489                  struct efa_ucontext *ucontext,
0490                  struct efa_com_create_qp_params *params,
0491                  struct efa_ibv_create_qp_resp *resp)
0492 {
0493     size_t length;
0494     u64 address;
0495 
0496     address = dev->db_bar_addr + resp->sq_db_offset;
0497     qp->sq_db_mmap_entry =
0498         efa_user_mmap_entry_insert(&ucontext->ibucontext,
0499                        address,
0500                        PAGE_SIZE, EFA_MMAP_IO_NC,
0501                        &resp->sq_db_mmap_key);
0502     if (!qp->sq_db_mmap_entry)
0503         return -ENOMEM;
0504 
0505     resp->sq_db_offset &= ~PAGE_MASK;
0506 
0507     address = dev->mem_bar_addr + resp->llq_desc_offset;
0508     length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
0509                 (resp->llq_desc_offset & ~PAGE_MASK));
0510 
0511     qp->llq_desc_mmap_entry =
0512         efa_user_mmap_entry_insert(&ucontext->ibucontext,
0513                        address, length,
0514                        EFA_MMAP_IO_WC,
0515                        &resp->llq_desc_mmap_key);
0516     if (!qp->llq_desc_mmap_entry)
0517         goto err_remove_mmap;
0518 
0519     resp->llq_desc_offset &= ~PAGE_MASK;
0520 
0521     if (qp->rq_size) {
0522         address = dev->db_bar_addr + resp->rq_db_offset;
0523 
0524         qp->rq_db_mmap_entry =
0525             efa_user_mmap_entry_insert(&ucontext->ibucontext,
0526                            address, PAGE_SIZE,
0527                            EFA_MMAP_IO_NC,
0528                            &resp->rq_db_mmap_key);
0529         if (!qp->rq_db_mmap_entry)
0530             goto err_remove_mmap;
0531 
0532         resp->rq_db_offset &= ~PAGE_MASK;
0533 
0534         address = virt_to_phys(qp->rq_cpu_addr);
0535         qp->rq_mmap_entry =
0536             efa_user_mmap_entry_insert(&ucontext->ibucontext,
0537                            address, qp->rq_size,
0538                            EFA_MMAP_DMA_PAGE,
0539                            &resp->rq_mmap_key);
0540         if (!qp->rq_mmap_entry)
0541             goto err_remove_mmap;
0542 
0543         resp->rq_mmap_size = qp->rq_size;
0544     }
0545 
0546     return 0;
0547 
0548 err_remove_mmap:
0549     efa_qp_user_mmap_entries_remove(qp);
0550 
0551     return -ENOMEM;
0552 }
0553 
0554 static int efa_qp_validate_cap(struct efa_dev *dev,
0555                    struct ib_qp_init_attr *init_attr)
0556 {
0557     if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
0558         ibdev_dbg(&dev->ibdev,
0559               "qp: requested send wr[%u] exceeds the max[%u]\n",
0560               init_attr->cap.max_send_wr,
0561               dev->dev_attr.max_sq_depth);
0562         return -EINVAL;
0563     }
0564     if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
0565         ibdev_dbg(&dev->ibdev,
0566               "qp: requested receive wr[%u] exceeds the max[%u]\n",
0567               init_attr->cap.max_recv_wr,
0568               dev->dev_attr.max_rq_depth);
0569         return -EINVAL;
0570     }
0571     if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
0572         ibdev_dbg(&dev->ibdev,
0573               "qp: requested sge send[%u] exceeds the max[%u]\n",
0574               init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
0575         return -EINVAL;
0576     }
0577     if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
0578         ibdev_dbg(&dev->ibdev,
0579               "qp: requested sge recv[%u] exceeds the max[%u]\n",
0580               init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
0581         return -EINVAL;
0582     }
0583     if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
0584         ibdev_dbg(&dev->ibdev,
0585               "qp: requested inline data[%u] exceeds the max[%u]\n",
0586               init_attr->cap.max_inline_data,
0587               dev->dev_attr.inline_buf_size);
0588         return -EINVAL;
0589     }
0590 
0591     return 0;
0592 }
0593 
0594 static int efa_qp_validate_attr(struct efa_dev *dev,
0595                 struct ib_qp_init_attr *init_attr)
0596 {
0597     if (init_attr->qp_type != IB_QPT_DRIVER &&
0598         init_attr->qp_type != IB_QPT_UD) {
0599         ibdev_dbg(&dev->ibdev,
0600               "Unsupported qp type %d\n", init_attr->qp_type);
0601         return -EOPNOTSUPP;
0602     }
0603 
0604     if (init_attr->srq) {
0605         ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
0606         return -EOPNOTSUPP;
0607     }
0608 
0609     if (init_attr->create_flags) {
0610         ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
0611         return -EOPNOTSUPP;
0612     }
0613 
0614     return 0;
0615 }
0616 
0617 int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
0618           struct ib_udata *udata)
0619 {
0620     struct efa_com_create_qp_params create_qp_params = {};
0621     struct efa_com_create_qp_result create_qp_resp;
0622     struct efa_dev *dev = to_edev(ibqp->device);
0623     struct efa_ibv_create_qp_resp resp = {};
0624     struct efa_ibv_create_qp cmd = {};
0625     struct efa_qp *qp = to_eqp(ibqp);
0626     struct efa_ucontext *ucontext;
0627     int err;
0628 
0629     ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
0630                          ibucontext);
0631 
0632     err = efa_qp_validate_cap(dev, init_attr);
0633     if (err)
0634         goto err_out;
0635 
0636     err = efa_qp_validate_attr(dev, init_attr);
0637     if (err)
0638         goto err_out;
0639 
0640     if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) {
0641         ibdev_dbg(&dev->ibdev,
0642               "Incompatible ABI params, no input udata\n");
0643         err = -EINVAL;
0644         goto err_out;
0645     }
0646 
0647     if (udata->inlen > sizeof(cmd) &&
0648         !ib_is_udata_cleared(udata, sizeof(cmd),
0649                  udata->inlen - sizeof(cmd))) {
0650         ibdev_dbg(&dev->ibdev,
0651               "Incompatible ABI params, unknown fields in udata\n");
0652         err = -EINVAL;
0653         goto err_out;
0654     }
0655 
0656     err = ib_copy_from_udata(&cmd, udata,
0657                  min(sizeof(cmd), udata->inlen));
0658     if (err) {
0659         ibdev_dbg(&dev->ibdev,
0660               "Cannot copy udata for create_qp\n");
0661         goto err_out;
0662     }
0663 
0664     if (cmd.comp_mask) {
0665         ibdev_dbg(&dev->ibdev,
0666               "Incompatible ABI params, unknown fields in udata\n");
0667         err = -EINVAL;
0668         goto err_out;
0669     }
0670 
0671     create_qp_params.uarn = ucontext->uarn;
0672     create_qp_params.pd = to_epd(ibqp->pd)->pdn;
0673 
0674     if (init_attr->qp_type == IB_QPT_UD) {
0675         create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
0676     } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
0677         create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
0678     } else {
0679         ibdev_dbg(&dev->ibdev,
0680               "Unsupported qp type %d driver qp type %d\n",
0681               init_attr->qp_type, cmd.driver_qp_type);
0682         err = -EOPNOTSUPP;
0683         goto err_out;
0684     }
0685 
0686     ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
0687           init_attr->qp_type, cmd.driver_qp_type);
0688     create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
0689     create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
0690     create_qp_params.sq_depth = init_attr->cap.max_send_wr;
0691     create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
0692 
0693     create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
0694     create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
0695     qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
0696     if (qp->rq_size) {
0697         qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
0698                             qp->rq_size, DMA_TO_DEVICE);
0699         if (!qp->rq_cpu_addr) {
0700             err = -ENOMEM;
0701             goto err_out;
0702         }
0703 
0704         ibdev_dbg(&dev->ibdev,
0705               "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
0706               qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
0707         create_qp_params.rq_base_addr = qp->rq_dma_addr;
0708     }
0709 
0710     err = efa_com_create_qp(&dev->edev, &create_qp_params,
0711                 &create_qp_resp);
0712     if (err)
0713         goto err_free_mapped;
0714 
0715     resp.sq_db_offset = create_qp_resp.sq_db_offset;
0716     resp.rq_db_offset = create_qp_resp.rq_db_offset;
0717     resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
0718     resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
0719     resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
0720 
0721     err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
0722                     &resp);
0723     if (err)
0724         goto err_destroy_qp;
0725 
0726     qp->qp_handle = create_qp_resp.qp_handle;
0727     qp->ibqp.qp_num = create_qp_resp.qp_num;
0728     qp->max_send_wr = init_attr->cap.max_send_wr;
0729     qp->max_recv_wr = init_attr->cap.max_recv_wr;
0730     qp->max_send_sge = init_attr->cap.max_send_sge;
0731     qp->max_recv_sge = init_attr->cap.max_recv_sge;
0732     qp->max_inline_data = init_attr->cap.max_inline_data;
0733 
0734     if (udata->outlen) {
0735         err = ib_copy_to_udata(udata, &resp,
0736                        min(sizeof(resp), udata->outlen));
0737         if (err) {
0738             ibdev_dbg(&dev->ibdev,
0739                   "Failed to copy udata for qp[%u]\n",
0740                   create_qp_resp.qp_num);
0741             goto err_remove_mmap_entries;
0742         }
0743     }
0744 
0745     ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
0746 
0747     return 0;
0748 
0749 err_remove_mmap_entries:
0750     efa_qp_user_mmap_entries_remove(qp);
0751 err_destroy_qp:
0752     efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
0753 err_free_mapped:
0754     if (qp->rq_size)
0755         efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
0756                 qp->rq_size, DMA_TO_DEVICE);
0757 err_out:
0758     atomic64_inc(&dev->stats.create_qp_err);
0759     return err;
0760 }
0761 
0762 static const struct {
0763     int         valid;
0764     enum ib_qp_attr_mask    req_param;
0765     enum ib_qp_attr_mask    opt_param;
0766 } srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
0767     [IB_QPS_RESET] = {
0768         [IB_QPS_RESET] = { .valid = 1 },
0769         [IB_QPS_INIT]  = {
0770             .valid = 1,
0771             .req_param = IB_QP_PKEY_INDEX |
0772                      IB_QP_PORT |
0773                      IB_QP_QKEY,
0774         },
0775     },
0776     [IB_QPS_INIT] = {
0777         [IB_QPS_RESET] = { .valid = 1 },
0778         [IB_QPS_ERR]   = { .valid = 1 },
0779         [IB_QPS_INIT]  = {
0780             .valid = 1,
0781             .opt_param = IB_QP_PKEY_INDEX |
0782                      IB_QP_PORT |
0783                      IB_QP_QKEY,
0784         },
0785         [IB_QPS_RTR]   = {
0786             .valid = 1,
0787             .opt_param = IB_QP_PKEY_INDEX |
0788                      IB_QP_QKEY,
0789         },
0790     },
0791     [IB_QPS_RTR] = {
0792         [IB_QPS_RESET] = { .valid = 1 },
0793         [IB_QPS_ERR]   = { .valid = 1 },
0794         [IB_QPS_RTS]   = {
0795             .valid = 1,
0796             .req_param = IB_QP_SQ_PSN,
0797             .opt_param = IB_QP_CUR_STATE |
0798                      IB_QP_QKEY |
0799                      IB_QP_RNR_RETRY,
0800 
0801         }
0802     },
0803     [IB_QPS_RTS] = {
0804         [IB_QPS_RESET] = { .valid = 1 },
0805         [IB_QPS_ERR]   = { .valid = 1 },
0806         [IB_QPS_RTS]   = {
0807             .valid = 1,
0808             .opt_param = IB_QP_CUR_STATE |
0809                      IB_QP_QKEY,
0810         },
0811         [IB_QPS_SQD] = {
0812             .valid = 1,
0813             .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY,
0814         },
0815     },
0816     [IB_QPS_SQD] = {
0817         [IB_QPS_RESET] = { .valid = 1 },
0818         [IB_QPS_ERR]   = { .valid = 1 },
0819         [IB_QPS_RTS]   = {
0820             .valid = 1,
0821             .opt_param = IB_QP_CUR_STATE |
0822                      IB_QP_QKEY,
0823         },
0824         [IB_QPS_SQD] = {
0825             .valid = 1,
0826             .opt_param = IB_QP_PKEY_INDEX |
0827                      IB_QP_QKEY,
0828         }
0829     },
0830     [IB_QPS_SQE] = {
0831         [IB_QPS_RESET] = { .valid = 1 },
0832         [IB_QPS_ERR]   = { .valid = 1 },
0833         [IB_QPS_RTS]   = {
0834             .valid = 1,
0835             .opt_param = IB_QP_CUR_STATE |
0836                      IB_QP_QKEY,
0837         }
0838     },
0839     [IB_QPS_ERR] = {
0840         [IB_QPS_RESET] = { .valid = 1 },
0841         [IB_QPS_ERR]   = { .valid = 1 },
0842     }
0843 };
0844 
0845 static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state,
0846                     enum ib_qp_state next_state,
0847                     enum ib_qp_attr_mask mask)
0848 {
0849     enum ib_qp_attr_mask req_param, opt_param;
0850 
0851     if (mask & IB_QP_CUR_STATE  &&
0852         cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
0853         cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
0854         return false;
0855 
0856     if (!srd_qp_state_table[cur_state][next_state].valid)
0857         return false;
0858 
0859     req_param = srd_qp_state_table[cur_state][next_state].req_param;
0860     opt_param = srd_qp_state_table[cur_state][next_state].opt_param;
0861 
0862     if ((mask & req_param) != req_param)
0863         return false;
0864 
0865     if (mask & ~(req_param | opt_param | IB_QP_STATE))
0866         return false;
0867 
0868     return true;
0869 }
0870 
0871 static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
0872                   struct ib_qp_attr *qp_attr, int qp_attr_mask,
0873                   enum ib_qp_state cur_state,
0874                   enum ib_qp_state new_state)
0875 {
0876     int err;
0877 
0878 #define EFA_MODIFY_QP_SUPP_MASK \
0879     (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
0880      IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
0881      IB_QP_RNR_RETRY)
0882 
0883     if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
0884         ibdev_dbg(&dev->ibdev,
0885               "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
0886               qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
0887         return -EOPNOTSUPP;
0888     }
0889 
0890     if (qp->ibqp.qp_type == IB_QPT_DRIVER)
0891         err = !efa_modify_srd_qp_is_ok(cur_state, new_state,
0892                            qp_attr_mask);
0893     else
0894         err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
0895                       qp_attr_mask);
0896 
0897     if (err) {
0898         ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
0899         return -EINVAL;
0900     }
0901 
0902     if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
0903         ibdev_dbg(&dev->ibdev, "Can't change port num\n");
0904         return -EOPNOTSUPP;
0905     }
0906 
0907     if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
0908         ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
0909         return -EOPNOTSUPP;
0910     }
0911 
0912     return 0;
0913 }
0914 
0915 int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
0916           int qp_attr_mask, struct ib_udata *udata)
0917 {
0918     struct efa_dev *dev = to_edev(ibqp->device);
0919     struct efa_com_modify_qp_params params = {};
0920     struct efa_qp *qp = to_eqp(ibqp);
0921     enum ib_qp_state cur_state;
0922     enum ib_qp_state new_state;
0923     int err;
0924 
0925     if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
0926         return -EOPNOTSUPP;
0927 
0928     if (udata->inlen &&
0929         !ib_is_udata_cleared(udata, 0, udata->inlen)) {
0930         ibdev_dbg(&dev->ibdev,
0931               "Incompatible ABI params, udata not cleared\n");
0932         return -EINVAL;
0933     }
0934 
0935     cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
0936                              qp->state;
0937     new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
0938 
0939     err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
0940                      new_state);
0941     if (err)
0942         return err;
0943 
0944     params.qp_handle = qp->qp_handle;
0945 
0946     if (qp_attr_mask & IB_QP_STATE) {
0947         EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE,
0948             1);
0949         EFA_SET(&params.modify_mask,
0950             EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
0951         params.cur_qp_state = cur_state;
0952         params.qp_state = new_state;
0953     }
0954 
0955     if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
0956         EFA_SET(&params.modify_mask,
0957             EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1);
0958         params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
0959     }
0960 
0961     if (qp_attr_mask & IB_QP_QKEY) {
0962         EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1);
0963         params.qkey = qp_attr->qkey;
0964     }
0965 
0966     if (qp_attr_mask & IB_QP_SQ_PSN) {
0967         EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1);
0968         params.sq_psn = qp_attr->sq_psn;
0969     }
0970 
0971     if (qp_attr_mask & IB_QP_RNR_RETRY) {
0972         EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
0973             1);
0974         params.rnr_retry = qp_attr->rnr_retry;
0975     }
0976 
0977     err = efa_com_modify_qp(&dev->edev, &params);
0978     if (err)
0979         return err;
0980 
0981     qp->state = new_state;
0982 
0983     return 0;
0984 }
0985 
0986 static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
0987 {
0988     struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
0989 
0990     return efa_com_destroy_cq(&dev->edev, &params);
0991 }
0992 
0993 static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq)
0994 {
0995     rdma_user_mmap_entry_remove(cq->db_mmap_entry);
0996     rdma_user_mmap_entry_remove(cq->mmap_entry);
0997 }
0998 
0999 int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1000 {
1001     struct efa_dev *dev = to_edev(ibcq->device);
1002     struct efa_cq *cq = to_ecq(ibcq);
1003 
1004     ibdev_dbg(&dev->ibdev,
1005           "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
1006           cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
1007 
1008     efa_cq_user_mmap_entries_remove(cq);
1009     efa_destroy_cq_idx(dev, cq->cq_idx);
1010     if (cq->eq) {
1011         xa_erase(&dev->cqs_xa, cq->cq_idx);
1012         synchronize_irq(cq->eq->irq.irqn);
1013     }
1014     efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
1015             DMA_FROM_DEVICE);
1016     return 0;
1017 }
1018 
1019 static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec)
1020 {
1021     return &dev->eqs[vec];
1022 }
1023 
1024 static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
1025                  struct efa_ibv_create_cq_resp *resp,
1026                  bool db_valid)
1027 {
1028     resp->q_mmap_size = cq->size;
1029     cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
1030                             virt_to_phys(cq->cpu_addr),
1031                             cq->size, EFA_MMAP_DMA_PAGE,
1032                             &resp->q_mmap_key);
1033     if (!cq->mmap_entry)
1034         return -ENOMEM;
1035 
1036     if (db_valid) {
1037         cq->db_mmap_entry =
1038             efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
1039                            dev->db_bar_addr + resp->db_off,
1040                            PAGE_SIZE, EFA_MMAP_IO_NC,
1041                            &resp->db_mmap_key);
1042         if (!cq->db_mmap_entry) {
1043             rdma_user_mmap_entry_remove(cq->mmap_entry);
1044             return -ENOMEM;
1045         }
1046 
1047         resp->db_off &= ~PAGE_MASK;
1048         resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF;
1049     }
1050 
1051     return 0;
1052 }
1053 
1054 int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1055           struct ib_udata *udata)
1056 {
1057     struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
1058         udata, struct efa_ucontext, ibucontext);
1059     struct efa_com_create_cq_params params = {};
1060     struct efa_ibv_create_cq_resp resp = {};
1061     struct efa_com_create_cq_result result;
1062     struct ib_device *ibdev = ibcq->device;
1063     struct efa_dev *dev = to_edev(ibdev);
1064     struct efa_ibv_create_cq cmd = {};
1065     struct efa_cq *cq = to_ecq(ibcq);
1066     int entries = attr->cqe;
1067     int err;
1068 
1069     ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
1070 
1071     if (attr->flags)
1072         return -EOPNOTSUPP;
1073 
1074     if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
1075         ibdev_dbg(ibdev,
1076               "cq: requested entries[%u] non-positive or greater than max[%u]\n",
1077               entries, dev->dev_attr.max_cq_depth);
1078         err = -EINVAL;
1079         goto err_out;
1080     }
1081 
1082     if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) {
1083         ibdev_dbg(ibdev,
1084               "Incompatible ABI params, no input udata\n");
1085         err = -EINVAL;
1086         goto err_out;
1087     }
1088 
1089     if (udata->inlen > sizeof(cmd) &&
1090         !ib_is_udata_cleared(udata, sizeof(cmd),
1091                  udata->inlen - sizeof(cmd))) {
1092         ibdev_dbg(ibdev,
1093               "Incompatible ABI params, unknown fields in udata\n");
1094         err = -EINVAL;
1095         goto err_out;
1096     }
1097 
1098     err = ib_copy_from_udata(&cmd, udata,
1099                  min(sizeof(cmd), udata->inlen));
1100     if (err) {
1101         ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
1102         goto err_out;
1103     }
1104 
1105     if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) {
1106         ibdev_dbg(ibdev,
1107               "Incompatible ABI params, unknown fields in udata\n");
1108         err = -EINVAL;
1109         goto err_out;
1110     }
1111 
1112     if (!cmd.cq_entry_size) {
1113         ibdev_dbg(ibdev,
1114               "Invalid entry size [%u]\n", cmd.cq_entry_size);
1115         err = -EINVAL;
1116         goto err_out;
1117     }
1118 
1119     if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
1120         ibdev_dbg(ibdev,
1121               "Invalid number of sub cqs[%u] expected[%u]\n",
1122               cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
1123         err = -EINVAL;
1124         goto err_out;
1125     }
1126 
1127     cq->ucontext = ucontext;
1128     cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
1129     cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
1130                      DMA_FROM_DEVICE);
1131     if (!cq->cpu_addr) {
1132         err = -ENOMEM;
1133         goto err_out;
1134     }
1135 
1136     params.uarn = cq->ucontext->uarn;
1137     params.cq_depth = entries;
1138     params.dma_addr = cq->dma_addr;
1139     params.entry_size_in_bytes = cmd.cq_entry_size;
1140     params.num_sub_cqs = cmd.num_sub_cqs;
1141     if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) {
1142         cq->eq = efa_vec2eq(dev, attr->comp_vector);
1143         params.eqn = cq->eq->eeq.eqn;
1144         params.interrupt_mode_enabled = true;
1145     }
1146 
1147     err = efa_com_create_cq(&dev->edev, &params, &result);
1148     if (err)
1149         goto err_free_mapped;
1150 
1151     resp.db_off = result.db_off;
1152     resp.cq_idx = result.cq_idx;
1153     cq->cq_idx = result.cq_idx;
1154     cq->ibcq.cqe = result.actual_depth;
1155     WARN_ON_ONCE(entries != result.actual_depth);
1156 
1157     err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
1158     if (err) {
1159         ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
1160               cq->cq_idx);
1161         goto err_destroy_cq;
1162     }
1163 
1164     if (cq->eq) {
1165         err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL));
1166         if (err) {
1167             ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n",
1168                   cq->cq_idx);
1169             goto err_remove_mmap;
1170         }
1171     }
1172 
1173     if (udata->outlen) {
1174         err = ib_copy_to_udata(udata, &resp,
1175                        min(sizeof(resp), udata->outlen));
1176         if (err) {
1177             ibdev_dbg(ibdev,
1178                   "Failed to copy udata for create_cq\n");
1179             goto err_xa_erase;
1180         }
1181     }
1182 
1183     ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
1184           cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
1185 
1186     return 0;
1187 
1188 err_xa_erase:
1189     if (cq->eq)
1190         xa_erase(&dev->cqs_xa, cq->cq_idx);
1191 err_remove_mmap:
1192     efa_cq_user_mmap_entries_remove(cq);
1193 err_destroy_cq:
1194     efa_destroy_cq_idx(dev, cq->cq_idx);
1195 err_free_mapped:
1196     efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
1197             DMA_FROM_DEVICE);
1198 
1199 err_out:
1200     atomic64_inc(&dev->stats.create_cq_err);
1201     return err;
1202 }
1203 
1204 static int umem_to_page_list(struct efa_dev *dev,
1205                  struct ib_umem *umem,
1206                  u64 *page_list,
1207                  u32 hp_cnt,
1208                  u8 hp_shift)
1209 {
1210     u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
1211     struct ib_block_iter biter;
1212     unsigned int hp_idx = 0;
1213 
1214     ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
1215           hp_cnt, pages_in_hp);
1216 
1217     rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
1218         page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
1219 
1220     return 0;
1221 }
1222 
1223 static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
1224 {
1225     struct scatterlist *sglist;
1226     struct page *pg;
1227     int i;
1228 
1229     sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL);
1230     if (!sglist)
1231         return NULL;
1232     sg_init_table(sglist, page_cnt);
1233     for (i = 0; i < page_cnt; i++) {
1234         pg = vmalloc_to_page(buf);
1235         if (!pg)
1236             goto err;
1237         sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
1238         buf += PAGE_SIZE / sizeof(*buf);
1239     }
1240     return sglist;
1241 
1242 err:
1243     kfree(sglist);
1244     return NULL;
1245 }
1246 
1247 /*
1248  * create a chunk list of physical pages dma addresses from the supplied
1249  * scatter gather list
1250  */
1251 static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
1252 {
1253     struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1254     int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
1255     struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
1256     unsigned int chunk_list_size, chunk_idx, payload_idx;
1257     int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
1258     struct efa_com_ctrl_buff_info *ctrl_buf;
1259     u64 *cur_chunk_buf, *prev_chunk_buf;
1260     struct ib_block_iter biter;
1261     dma_addr_t dma_addr;
1262     int i;
1263 
1264     /* allocate a chunk list that consists of 4KB chunks */
1265     chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
1266 
1267     chunk_list->size = chunk_list_size;
1268     chunk_list->chunks = kcalloc(chunk_list_size,
1269                      sizeof(*chunk_list->chunks),
1270                      GFP_KERNEL);
1271     if (!chunk_list->chunks)
1272         return -ENOMEM;
1273 
1274     ibdev_dbg(&dev->ibdev,
1275           "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
1276           page_cnt);
1277 
1278     /* allocate chunk buffers: */
1279     for (i = 0; i < chunk_list_size; i++) {
1280         chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
1281         if (!chunk_list->chunks[i].buf)
1282             goto chunk_list_dealloc;
1283 
1284         chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
1285     }
1286     chunk_list->chunks[chunk_list_size - 1].length =
1287         ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
1288             EFA_CHUNK_PTR_SIZE;
1289 
1290     /* fill the dma addresses of sg list pages to chunks: */
1291     chunk_idx = 0;
1292     payload_idx = 0;
1293     cur_chunk_buf = chunk_list->chunks[0].buf;
1294     rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
1295                 EFA_CHUNK_PAYLOAD_SIZE) {
1296         cur_chunk_buf[payload_idx++] =
1297             rdma_block_iter_dma_address(&biter);
1298 
1299         if (payload_idx == EFA_PTRS_PER_CHUNK) {
1300             chunk_idx++;
1301             cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
1302             payload_idx = 0;
1303         }
1304     }
1305 
1306     /* map chunks to dma and fill chunks next ptrs */
1307     for (i = chunk_list_size - 1; i >= 0; i--) {
1308         dma_addr = dma_map_single(&dev->pdev->dev,
1309                       chunk_list->chunks[i].buf,
1310                       chunk_list->chunks[i].length,
1311                       DMA_TO_DEVICE);
1312         if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1313             ibdev_err(&dev->ibdev,
1314                   "chunk[%u] dma_map_failed\n", i);
1315             goto chunk_list_unmap;
1316         }
1317 
1318         chunk_list->chunks[i].dma_addr = dma_addr;
1319         ibdev_dbg(&dev->ibdev,
1320               "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
1321 
1322         if (!i)
1323             break;
1324 
1325         prev_chunk_buf = chunk_list->chunks[i - 1].buf;
1326 
1327         ctrl_buf = (struct efa_com_ctrl_buff_info *)
1328                 &prev_chunk_buf[EFA_PTRS_PER_CHUNK];
1329         ctrl_buf->length = chunk_list->chunks[i].length;
1330 
1331         efa_com_set_dma_addr(dma_addr,
1332                      &ctrl_buf->address.mem_addr_high,
1333                      &ctrl_buf->address.mem_addr_low);
1334     }
1335 
1336     return 0;
1337 
1338 chunk_list_unmap:
1339     for (; i < chunk_list_size; i++) {
1340         dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1341                  chunk_list->chunks[i].length, DMA_TO_DEVICE);
1342     }
1343 chunk_list_dealloc:
1344     for (i = 0; i < chunk_list_size; i++)
1345         kfree(chunk_list->chunks[i].buf);
1346 
1347     kfree(chunk_list->chunks);
1348     return -ENOMEM;
1349 }
1350 
1351 static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1352 {
1353     struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1354     int i;
1355 
1356     for (i = 0; i < chunk_list->size; i++) {
1357         dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1358                  chunk_list->chunks[i].length, DMA_TO_DEVICE);
1359         kfree(chunk_list->chunks[i].buf);
1360     }
1361 
1362     kfree(chunk_list->chunks);
1363 }
1364 
1365 /* initialize pbl continuous mode: map pbl buffer to a dma address. */
1366 static int pbl_continuous_initialize(struct efa_dev *dev,
1367                      struct pbl_context *pbl)
1368 {
1369     dma_addr_t dma_addr;
1370 
1371     dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
1372                   pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1373     if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1374         ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
1375         return -ENOMEM;
1376     }
1377 
1378     pbl->phys.continuous.dma_addr = dma_addr;
1379     ibdev_dbg(&dev->ibdev,
1380           "pbl continuous - dma_addr = %pad, size[%u]\n",
1381           &dma_addr, pbl->pbl_buf_size_in_bytes);
1382 
1383     return 0;
1384 }
1385 
1386 /*
1387  * initialize pbl indirect mode:
1388  * create a chunk list out of the dma addresses of the physical pages of
1389  * pbl buffer.
1390  */
1391 static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
1392 {
1393     u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
1394     struct scatterlist *sgl;
1395     int sg_dma_cnt, err;
1396 
1397     BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
1398     sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
1399     if (!sgl)
1400         return -ENOMEM;
1401 
1402     sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1403     if (!sg_dma_cnt) {
1404         err = -EINVAL;
1405         goto err_map;
1406     }
1407 
1408     pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
1409     pbl->phys.indirect.sgl = sgl;
1410     pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
1411     err = pbl_chunk_list_create(dev, pbl);
1412     if (err) {
1413         ibdev_dbg(&dev->ibdev,
1414               "chunk_list creation failed[%d]\n", err);
1415         goto err_chunk;
1416     }
1417 
1418     ibdev_dbg(&dev->ibdev,
1419           "pbl indirect - size[%u], chunks[%u]\n",
1420           pbl->pbl_buf_size_in_bytes,
1421           pbl->phys.indirect.chunk_list.size);
1422 
1423     return 0;
1424 
1425 err_chunk:
1426     dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1427 err_map:
1428     kfree(sgl);
1429     return err;
1430 }
1431 
1432 static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
1433 {
1434     pbl_chunk_list_destroy(dev, pbl);
1435     dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
1436              pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
1437     kfree(pbl->phys.indirect.sgl);
1438 }
1439 
1440 /* create a page buffer list from a mapped user memory region */
1441 static int pbl_create(struct efa_dev *dev,
1442               struct pbl_context *pbl,
1443               struct ib_umem *umem,
1444               int hp_cnt,
1445               u8 hp_shift)
1446 {
1447     int err;
1448 
1449     pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
1450     pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
1451     if (!pbl->pbl_buf)
1452         return -ENOMEM;
1453 
1454     if (is_vmalloc_addr(pbl->pbl_buf)) {
1455         pbl->physically_continuous = 0;
1456         err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1457                     hp_shift);
1458         if (err)
1459             goto err_free;
1460 
1461         err = pbl_indirect_initialize(dev, pbl);
1462         if (err)
1463             goto err_free;
1464     } else {
1465         pbl->physically_continuous = 1;
1466         err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1467                     hp_shift);
1468         if (err)
1469             goto err_free;
1470 
1471         err = pbl_continuous_initialize(dev, pbl);
1472         if (err)
1473             goto err_free;
1474     }
1475 
1476     ibdev_dbg(&dev->ibdev,
1477           "user_pbl_created: user_pages[%u], continuous[%u]\n",
1478           hp_cnt, pbl->physically_continuous);
1479 
1480     return 0;
1481 
1482 err_free:
1483     kvfree(pbl->pbl_buf);
1484     return err;
1485 }
1486 
1487 static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1488 {
1489     if (pbl->physically_continuous)
1490         dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
1491                  pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1492     else
1493         pbl_indirect_terminate(dev, pbl);
1494 
1495     kvfree(pbl->pbl_buf);
1496 }
1497 
1498 static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
1499                  struct efa_com_reg_mr_params *params)
1500 {
1501     int err;
1502 
1503     params->inline_pbl = 1;
1504     err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
1505                 params->page_num, params->page_shift);
1506     if (err)
1507         return err;
1508 
1509     ibdev_dbg(&dev->ibdev,
1510           "inline_pbl_array - pages[%u]\n", params->page_num);
1511 
1512     return 0;
1513 }
1514 
1515 static int efa_create_pbl(struct efa_dev *dev,
1516               struct pbl_context *pbl,
1517               struct efa_mr *mr,
1518               struct efa_com_reg_mr_params *params)
1519 {
1520     int err;
1521 
1522     err = pbl_create(dev, pbl, mr->umem, params->page_num,
1523              params->page_shift);
1524     if (err) {
1525         ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
1526         return err;
1527     }
1528 
1529     params->inline_pbl = 0;
1530     params->indirect = !pbl->physically_continuous;
1531     if (pbl->physically_continuous) {
1532         params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
1533 
1534         efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
1535                      &params->pbl.pbl.address.mem_addr_high,
1536                      &params->pbl.pbl.address.mem_addr_low);
1537     } else {
1538         params->pbl.pbl.length =
1539             pbl->phys.indirect.chunk_list.chunks[0].length;
1540 
1541         efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
1542                      &params->pbl.pbl.address.mem_addr_high,
1543                      &params->pbl.pbl.address.mem_addr_low);
1544     }
1545 
1546     return 0;
1547 }
1548 
1549 static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags,
1550                    struct ib_udata *udata)
1551 {
1552     struct efa_dev *dev = to_edev(ibpd->device);
1553     int supp_access_flags;
1554     struct efa_mr *mr;
1555 
1556     if (udata && udata->inlen &&
1557         !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
1558         ibdev_dbg(&dev->ibdev,
1559               "Incompatible ABI params, udata not cleared\n");
1560         return ERR_PTR(-EINVAL);
1561     }
1562 
1563     supp_access_flags =
1564         IB_ACCESS_LOCAL_WRITE |
1565         (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0);
1566 
1567     access_flags &= ~IB_ACCESS_OPTIONAL;
1568     if (access_flags & ~supp_access_flags) {
1569         ibdev_dbg(&dev->ibdev,
1570               "Unsupported access flags[%#x], supported[%#x]\n",
1571               access_flags, supp_access_flags);
1572         return ERR_PTR(-EOPNOTSUPP);
1573     }
1574 
1575     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1576     if (!mr)
1577         return ERR_PTR(-ENOMEM);
1578 
1579     return mr;
1580 }
1581 
1582 static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
1583                u64 length, u64 virt_addr, int access_flags)
1584 {
1585     struct efa_dev *dev = to_edev(ibpd->device);
1586     struct efa_com_reg_mr_params params = {};
1587     struct efa_com_reg_mr_result result = {};
1588     struct pbl_context pbl;
1589     unsigned int pg_sz;
1590     int inline_size;
1591     int err;
1592 
1593     params.pd = to_epd(ibpd)->pdn;
1594     params.iova = virt_addr;
1595     params.mr_length_in_bytes = length;
1596     params.permissions = access_flags;
1597 
1598     pg_sz = ib_umem_find_best_pgsz(mr->umem,
1599                        dev->dev_attr.page_size_cap,
1600                        virt_addr);
1601     if (!pg_sz) {
1602         ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
1603               dev->dev_attr.page_size_cap);
1604         return -EOPNOTSUPP;
1605     }
1606 
1607     params.page_shift = order_base_2(pg_sz);
1608     params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz);
1609 
1610     ibdev_dbg(&dev->ibdev,
1611           "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
1612           start, length, params.page_shift, params.page_num);
1613 
1614     inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
1615     if (params.page_num <= inline_size) {
1616         err = efa_create_inline_pbl(dev, mr, &params);
1617         if (err)
1618             return err;
1619 
1620         err = efa_com_register_mr(&dev->edev, &params, &result);
1621         if (err)
1622             return err;
1623     } else {
1624         err = efa_create_pbl(dev, &pbl, mr, &params);
1625         if (err)
1626             return err;
1627 
1628         err = efa_com_register_mr(&dev->edev, &params, &result);
1629         pbl_destroy(dev, &pbl);
1630 
1631         if (err)
1632             return err;
1633     }
1634 
1635     mr->ibmr.lkey = result.l_key;
1636     mr->ibmr.rkey = result.r_key;
1637     mr->ibmr.length = length;
1638     ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
1639 
1640     return 0;
1641 }
1642 
1643 struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
1644                      u64 length, u64 virt_addr,
1645                      int fd, int access_flags,
1646                      struct ib_udata *udata)
1647 {
1648     struct efa_dev *dev = to_edev(ibpd->device);
1649     struct ib_umem_dmabuf *umem_dmabuf;
1650     struct efa_mr *mr;
1651     int err;
1652 
1653     mr = efa_alloc_mr(ibpd, access_flags, udata);
1654     if (IS_ERR(mr)) {
1655         err = PTR_ERR(mr);
1656         goto err_out;
1657     }
1658 
1659     umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd,
1660                         access_flags);
1661     if (IS_ERR(umem_dmabuf)) {
1662         err = PTR_ERR(umem_dmabuf);
1663         ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
1664         goto err_free;
1665     }
1666 
1667     mr->umem = &umem_dmabuf->umem;
1668     err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
1669     if (err)
1670         goto err_release;
1671 
1672     return &mr->ibmr;
1673 
1674 err_release:
1675     ib_umem_release(mr->umem);
1676 err_free:
1677     kfree(mr);
1678 err_out:
1679     atomic64_inc(&dev->stats.reg_mr_err);
1680     return ERR_PTR(err);
1681 }
1682 
1683 struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
1684              u64 virt_addr, int access_flags,
1685              struct ib_udata *udata)
1686 {
1687     struct efa_dev *dev = to_edev(ibpd->device);
1688     struct efa_mr *mr;
1689     int err;
1690 
1691     mr = efa_alloc_mr(ibpd, access_flags, udata);
1692     if (IS_ERR(mr)) {
1693         err = PTR_ERR(mr);
1694         goto err_out;
1695     }
1696 
1697     mr->umem = ib_umem_get(ibpd->device, start, length, access_flags);
1698     if (IS_ERR(mr->umem)) {
1699         err = PTR_ERR(mr->umem);
1700         ibdev_dbg(&dev->ibdev,
1701               "Failed to pin and map user space memory[%d]\n", err);
1702         goto err_free;
1703     }
1704 
1705     err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
1706     if (err)
1707         goto err_release;
1708 
1709     return &mr->ibmr;
1710 
1711 err_release:
1712     ib_umem_release(mr->umem);
1713 err_free:
1714     kfree(mr);
1715 err_out:
1716     atomic64_inc(&dev->stats.reg_mr_err);
1717     return ERR_PTR(err);
1718 }
1719 
1720 int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1721 {
1722     struct efa_dev *dev = to_edev(ibmr->device);
1723     struct efa_com_dereg_mr_params params;
1724     struct efa_mr *mr = to_emr(ibmr);
1725     int err;
1726 
1727     ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
1728 
1729     params.l_key = mr->ibmr.lkey;
1730     err = efa_com_dereg_mr(&dev->edev, &params);
1731     if (err)
1732         return err;
1733 
1734     ib_umem_release(mr->umem);
1735     kfree(mr);
1736 
1737     return 0;
1738 }
1739 
1740 int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
1741                struct ib_port_immutable *immutable)
1742 {
1743     struct ib_port_attr attr;
1744     int err;
1745 
1746     err = ib_query_port(ibdev, port_num, &attr);
1747     if (err) {
1748         ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
1749         return err;
1750     }
1751 
1752     immutable->pkey_tbl_len = attr.pkey_tbl_len;
1753     immutable->gid_tbl_len = attr.gid_tbl_len;
1754 
1755     return 0;
1756 }
1757 
1758 static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
1759 {
1760     struct efa_com_dealloc_uar_params params = {
1761         .uarn = uarn,
1762     };
1763 
1764     return efa_com_dealloc_uar(&dev->edev, &params);
1765 }
1766 
1767 #define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \
1768     (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \
1769              NULL : #_attr)
1770 
1771 static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext,
1772                    const struct efa_ibv_alloc_ucontext_cmd *cmd)
1773 {
1774     struct efa_dev *dev = to_edev(ibucontext->device);
1775     char *attr_str;
1776 
1777     if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch,
1778                 EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str))
1779         goto err;
1780 
1781     if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth,
1782                 EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR,
1783                 attr_str))
1784         goto err;
1785 
1786     return 0;
1787 
1788 err:
1789     ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n",
1790           attr_str);
1791     return -EOPNOTSUPP;
1792 }
1793 
1794 int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
1795 {
1796     struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1797     struct efa_dev *dev = to_edev(ibucontext->device);
1798     struct efa_ibv_alloc_ucontext_resp resp = {};
1799     struct efa_ibv_alloc_ucontext_cmd cmd = {};
1800     struct efa_com_alloc_uar_result result;
1801     int err;
1802 
1803     /*
1804      * it's fine if the driver does not know all request fields,
1805      * we will ack input fields in our response.
1806      */
1807 
1808     err = ib_copy_from_udata(&cmd, udata,
1809                  min(sizeof(cmd), udata->inlen));
1810     if (err) {
1811         ibdev_dbg(&dev->ibdev,
1812               "Cannot copy udata for alloc_ucontext\n");
1813         goto err_out;
1814     }
1815 
1816     err = efa_user_comp_handshake(ibucontext, &cmd);
1817     if (err)
1818         goto err_out;
1819 
1820     err = efa_com_alloc_uar(&dev->edev, &result);
1821     if (err)
1822         goto err_out;
1823 
1824     ucontext->uarn = result.uarn;
1825 
1826     resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
1827     resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
1828     resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
1829     resp.inline_buf_size = dev->dev_attr.inline_buf_size;
1830     resp.max_llq_size = dev->dev_attr.max_llq_size;
1831     resp.max_tx_batch = dev->dev_attr.max_tx_batch;
1832     resp.min_sq_wr = dev->dev_attr.min_sq_depth;
1833 
1834     err = ib_copy_to_udata(udata, &resp,
1835                    min(sizeof(resp), udata->outlen));
1836     if (err)
1837         goto err_dealloc_uar;
1838 
1839     return 0;
1840 
1841 err_dealloc_uar:
1842     efa_dealloc_uar(dev, result.uarn);
1843 err_out:
1844     atomic64_inc(&dev->stats.alloc_ucontext_err);
1845     return err;
1846 }
1847 
1848 void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
1849 {
1850     struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1851     struct efa_dev *dev = to_edev(ibucontext->device);
1852 
1853     efa_dealloc_uar(dev, ucontext->uarn);
1854 }
1855 
1856 void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1857 {
1858     struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
1859 
1860     kfree(entry);
1861 }
1862 
1863 static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
1864               struct vm_area_struct *vma)
1865 {
1866     struct rdma_user_mmap_entry *rdma_entry;
1867     struct efa_user_mmap_entry *entry;
1868     unsigned long va;
1869     int err = 0;
1870     u64 pfn;
1871 
1872     rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
1873     if (!rdma_entry) {
1874         ibdev_dbg(&dev->ibdev,
1875               "pgoff[%#lx] does not have valid entry\n",
1876               vma->vm_pgoff);
1877         atomic64_inc(&dev->stats.mmap_err);
1878         return -EINVAL;
1879     }
1880     entry = to_emmap(rdma_entry);
1881 
1882     ibdev_dbg(&dev->ibdev,
1883           "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
1884           entry->address, rdma_entry->npages * PAGE_SIZE,
1885           entry->mmap_flag);
1886 
1887     pfn = entry->address >> PAGE_SHIFT;
1888     switch (entry->mmap_flag) {
1889     case EFA_MMAP_IO_NC:
1890         err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
1891                     entry->rdma_entry.npages * PAGE_SIZE,
1892                     pgprot_noncached(vma->vm_page_prot),
1893                     rdma_entry);
1894         break;
1895     case EFA_MMAP_IO_WC:
1896         err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
1897                     entry->rdma_entry.npages * PAGE_SIZE,
1898                     pgprot_writecombine(vma->vm_page_prot),
1899                     rdma_entry);
1900         break;
1901     case EFA_MMAP_DMA_PAGE:
1902         for (va = vma->vm_start; va < vma->vm_end;
1903              va += PAGE_SIZE, pfn++) {
1904             err = vm_insert_page(vma, va, pfn_to_page(pfn));
1905             if (err)
1906                 break;
1907         }
1908         break;
1909     default:
1910         err = -EINVAL;
1911     }
1912 
1913     if (err) {
1914         ibdev_dbg(
1915             &dev->ibdev,
1916             "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
1917             entry->address, rdma_entry->npages * PAGE_SIZE,
1918             entry->mmap_flag, err);
1919         atomic64_inc(&dev->stats.mmap_err);
1920     }
1921 
1922     rdma_user_mmap_entry_put(rdma_entry);
1923     return err;
1924 }
1925 
1926 int efa_mmap(struct ib_ucontext *ibucontext,
1927          struct vm_area_struct *vma)
1928 {
1929     struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1930     struct efa_dev *dev = to_edev(ibucontext->device);
1931     size_t length = vma->vm_end - vma->vm_start;
1932 
1933     ibdev_dbg(&dev->ibdev,
1934           "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
1935           vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
1936 
1937     return __efa_mmap(dev, ucontext, vma);
1938 }
1939 
1940 static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
1941 {
1942     struct efa_com_destroy_ah_params params = {
1943         .ah = ah->ah,
1944         .pdn = to_epd(ah->ibah.pd)->pdn,
1945     };
1946 
1947     return efa_com_destroy_ah(&dev->edev, &params);
1948 }
1949 
1950 int efa_create_ah(struct ib_ah *ibah,
1951           struct rdma_ah_init_attr *init_attr,
1952           struct ib_udata *udata)
1953 {
1954     struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
1955     struct efa_dev *dev = to_edev(ibah->device);
1956     struct efa_com_create_ah_params params = {};
1957     struct efa_ibv_create_ah_resp resp = {};
1958     struct efa_com_create_ah_result result;
1959     struct efa_ah *ah = to_eah(ibah);
1960     int err;
1961 
1962     if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) {
1963         ibdev_dbg(&dev->ibdev,
1964               "Create address handle is not supported in atomic context\n");
1965         err = -EOPNOTSUPP;
1966         goto err_out;
1967     }
1968 
1969     if (udata->inlen &&
1970         !ib_is_udata_cleared(udata, 0, udata->inlen)) {
1971         ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
1972         err = -EINVAL;
1973         goto err_out;
1974     }
1975 
1976     memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
1977            sizeof(params.dest_addr));
1978     params.pdn = to_epd(ibah->pd)->pdn;
1979     err = efa_com_create_ah(&dev->edev, &params, &result);
1980     if (err)
1981         goto err_out;
1982 
1983     memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
1984     ah->ah = result.ah;
1985 
1986     resp.efa_address_handle = result.ah;
1987 
1988     if (udata->outlen) {
1989         err = ib_copy_to_udata(udata, &resp,
1990                        min(sizeof(resp), udata->outlen));
1991         if (err) {
1992             ibdev_dbg(&dev->ibdev,
1993                   "Failed to copy udata for create_ah response\n");
1994             goto err_destroy_ah;
1995         }
1996     }
1997     ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
1998 
1999     return 0;
2000 
2001 err_destroy_ah:
2002     efa_ah_destroy(dev, ah);
2003 err_out:
2004     atomic64_inc(&dev->stats.create_ah_err);
2005     return err;
2006 }
2007 
2008 int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
2009 {
2010     struct efa_dev *dev = to_edev(ibah->pd->device);
2011     struct efa_ah *ah = to_eah(ibah);
2012 
2013     ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
2014 
2015     if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
2016         ibdev_dbg(&dev->ibdev,
2017               "Destroy address handle is not supported in atomic context\n");
2018         return -EOPNOTSUPP;
2019     }
2020 
2021     efa_ah_destroy(dev, ah);
2022     return 0;
2023 }
2024 
2025 struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev,
2026                           u32 port_num)
2027 {
2028     return rdma_alloc_hw_stats_struct(efa_port_stats_descs,
2029                       ARRAY_SIZE(efa_port_stats_descs),
2030                       RDMA_HW_STATS_DEFAULT_LIFESPAN);
2031 }
2032 
2033 struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
2034 {
2035     return rdma_alloc_hw_stats_struct(efa_device_stats_descs,
2036                       ARRAY_SIZE(efa_device_stats_descs),
2037                       RDMA_HW_STATS_DEFAULT_LIFESPAN);
2038 }
2039 
2040 static int efa_fill_device_stats(struct efa_dev *dev,
2041                  struct rdma_hw_stats *stats)
2042 {
2043     struct efa_com_stats_admin *as = &dev->edev.aq.stats;
2044     struct efa_stats *s = &dev->stats;
2045 
2046     stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
2047     stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
2048     stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
2049     stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
2050 
2051     stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
2052     stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
2053     stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
2054     stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
2055     stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
2056     stats->value[EFA_ALLOC_UCONTEXT_ERR] =
2057         atomic64_read(&s->alloc_ucontext_err);
2058     stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
2059     stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
2060 
2061     return ARRAY_SIZE(efa_device_stats_descs);
2062 }
2063 
2064 static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
2065                    u32 port_num)
2066 {
2067     struct efa_com_get_stats_params params = {};
2068     union efa_com_get_stats_result result;
2069     struct efa_com_rdma_read_stats *rrs;
2070     struct efa_com_messages_stats *ms;
2071     struct efa_com_basic_stats *bs;
2072     int err;
2073 
2074     params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
2075     params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
2076 
2077     err = efa_com_get_stats(&dev->edev, &params, &result);
2078     if (err)
2079         return err;
2080 
2081     bs = &result.basic_stats;
2082     stats->value[EFA_TX_BYTES] = bs->tx_bytes;
2083     stats->value[EFA_TX_PKTS] = bs->tx_pkts;
2084     stats->value[EFA_RX_BYTES] = bs->rx_bytes;
2085     stats->value[EFA_RX_PKTS] = bs->rx_pkts;
2086     stats->value[EFA_RX_DROPS] = bs->rx_drops;
2087 
2088     params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES;
2089     err = efa_com_get_stats(&dev->edev, &params, &result);
2090     if (err)
2091         return err;
2092 
2093     ms = &result.messages_stats;
2094     stats->value[EFA_SEND_BYTES] = ms->send_bytes;
2095     stats->value[EFA_SEND_WRS] = ms->send_wrs;
2096     stats->value[EFA_RECV_BYTES] = ms->recv_bytes;
2097     stats->value[EFA_RECV_WRS] = ms->recv_wrs;
2098 
2099     params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ;
2100     err = efa_com_get_stats(&dev->edev, &params, &result);
2101     if (err)
2102         return err;
2103 
2104     rrs = &result.rdma_read_stats;
2105     stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs;
2106     stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes;
2107     stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
2108     stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
2109 
2110     return ARRAY_SIZE(efa_port_stats_descs);
2111 }
2112 
2113 int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
2114              u32 port_num, int index)
2115 {
2116     if (port_num)
2117         return efa_fill_port_stats(to_edev(ibdev), stats, port_num);
2118     else
2119         return efa_fill_device_stats(to_edev(ibdev), stats);
2120 }
2121 
2122 enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
2123                      u32 port_num)
2124 {
2125     return IB_LINK_LAYER_UNSPECIFIED;
2126 }
2127