Back to home page

OSCL-LXR

 
 

    


0001 /* This file is part of the Emulex RoCE Device Driver for
0002  * RoCE (RDMA over Converged Ethernet) adapters.
0003  * Copyright (C) 2012-2015 Emulex. All rights reserved.
0004  * EMULEX and SLI are trademarks of Emulex.
0005  * www.emulex.com
0006  *
0007  * This software is available to you under a choice of one of two licenses.
0008  * You may choose to be licensed under the terms of the GNU General Public
0009  * License (GPL) Version 2, available from the file COPYING in the main
0010  * directory of this source tree, or the BSD license below:
0011  *
0012  * Redistribution and use in source and binary forms, with or without
0013  * modification, are permitted provided that the following conditions
0014  * are met:
0015  *
0016  * - Redistributions of source code must retain the above copyright notice,
0017  *   this list of conditions and the following disclaimer.
0018  *
0019  * - Redistributions in binary form must reproduce the above copyright
0020  *   notice, this list of conditions and the following disclaimer in
0021  *   the documentation and/or other materials provided with the distribution.
0022  *
0023  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0024  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
0025  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0026  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
0027  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0028  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0029  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
0030  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
0031  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
0032  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
0033  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0034  *
0035  * Contact Information:
0036  * linux-drivers@emulex.com
0037  *
0038  * Emulex
0039  * 3333 Susan Street
0040  * Costa Mesa, CA 92626
0041  */
0042 
0043 #include <linux/dma-mapping.h>
0044 #include <net/addrconf.h>
0045 #include <rdma/ib_verbs.h>
0046 #include <rdma/ib_user_verbs.h>
0047 #include <rdma/iw_cm.h>
0048 #include <rdma/ib_umem.h>
0049 #include <rdma/ib_addr.h>
0050 #include <rdma/ib_cache.h>
0051 #include <rdma/uverbs_ioctl.h>
0052 
0053 #include "ocrdma.h"
0054 #include "ocrdma_hw.h"
0055 #include "ocrdma_verbs.h"
0056 #include <rdma/ocrdma-abi.h>
0057 
0058 int ocrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
0059 {
0060     if (index > 0)
0061         return -EINVAL;
0062 
0063     *pkey = 0xffff;
0064     return 0;
0065 }
0066 
0067 int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
0068             struct ib_udata *uhw)
0069 {
0070     struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
0071 
0072     if (uhw->inlen || uhw->outlen)
0073         return -EINVAL;
0074 
0075     memset(attr, 0, sizeof *attr);
0076     memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
0077            min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
0078     addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
0079                 dev->nic_info.mac_addr);
0080     attr->max_mr_size = dev->attr.max_mr_size;
0081     attr->page_size_cap = 0xffff000;
0082     attr->vendor_id = dev->nic_info.pdev->vendor;
0083     attr->vendor_part_id = dev->nic_info.pdev->device;
0084     attr->hw_ver = dev->asic_id;
0085     attr->max_qp = dev->attr.max_qp;
0086     attr->max_ah = OCRDMA_MAX_AH;
0087     attr->max_qp_wr = dev->attr.max_wqe;
0088 
0089     attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
0090                     IB_DEVICE_RC_RNR_NAK_GEN |
0091                     IB_DEVICE_SHUTDOWN_PORT |
0092                     IB_DEVICE_SYS_IMAGE_GUID |
0093                     IB_DEVICE_MEM_MGT_EXTENSIONS;
0094     attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
0095     attr->max_send_sge = dev->attr.max_send_sge;
0096     attr->max_recv_sge = dev->attr.max_recv_sge;
0097     attr->max_sge_rd = dev->attr.max_rdma_sge;
0098     attr->max_cq = dev->attr.max_cq;
0099     attr->max_cqe = dev->attr.max_cqe;
0100     attr->max_mr = dev->attr.max_mr;
0101     attr->max_mw = dev->attr.max_mw;
0102     attr->max_pd = dev->attr.max_pd;
0103     attr->atomic_cap = 0;
0104     attr->max_qp_rd_atom =
0105         min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
0106     attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
0107     attr->max_srq = dev->attr.max_srq;
0108     attr->max_srq_sge = dev->attr.max_srq_sge;
0109     attr->max_srq_wr = dev->attr.max_rqe;
0110     attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
0111     attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr;
0112     attr->max_pkeys = 1;
0113     return 0;
0114 }
0115 
0116 static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
0117                         u16 *ib_speed, u8 *ib_width)
0118 {
0119     int status;
0120     u8 speed;
0121 
0122     status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
0123     if (status)
0124         speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
0125 
0126     switch (speed) {
0127     case OCRDMA_PHYS_LINK_SPEED_1GBPS:
0128         *ib_speed = IB_SPEED_SDR;
0129         *ib_width = IB_WIDTH_1X;
0130         break;
0131 
0132     case OCRDMA_PHYS_LINK_SPEED_10GBPS:
0133         *ib_speed = IB_SPEED_QDR;
0134         *ib_width = IB_WIDTH_1X;
0135         break;
0136 
0137     case OCRDMA_PHYS_LINK_SPEED_20GBPS:
0138         *ib_speed = IB_SPEED_DDR;
0139         *ib_width = IB_WIDTH_4X;
0140         break;
0141 
0142     case OCRDMA_PHYS_LINK_SPEED_40GBPS:
0143         *ib_speed = IB_SPEED_QDR;
0144         *ib_width = IB_WIDTH_4X;
0145         break;
0146 
0147     default:
0148         /* Unsupported */
0149         *ib_speed = IB_SPEED_SDR;
0150         *ib_width = IB_WIDTH_1X;
0151     }
0152 }
0153 
0154 int ocrdma_query_port(struct ib_device *ibdev,
0155               u32 port, struct ib_port_attr *props)
0156 {
0157     enum ib_port_state port_state;
0158     struct ocrdma_dev *dev;
0159     struct net_device *netdev;
0160 
0161     /* props being zeroed by the caller, avoid zeroing it here */
0162     dev = get_ocrdma_dev(ibdev);
0163     netdev = dev->nic_info.netdev;
0164     if (netif_running(netdev) && netif_oper_up(netdev)) {
0165         port_state = IB_PORT_ACTIVE;
0166         props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
0167     } else {
0168         port_state = IB_PORT_DOWN;
0169         props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
0170     }
0171     props->max_mtu = IB_MTU_4096;
0172     props->active_mtu = iboe_get_mtu(netdev->mtu);
0173     props->lid = 0;
0174     props->lmc = 0;
0175     props->sm_lid = 0;
0176     props->sm_sl = 0;
0177     props->state = port_state;
0178     props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
0179                 IB_PORT_DEVICE_MGMT_SUP |
0180                 IB_PORT_VENDOR_CLASS_SUP;
0181     props->ip_gids = true;
0182     props->gid_tbl_len = OCRDMA_MAX_SGID;
0183     props->pkey_tbl_len = 1;
0184     props->bad_pkey_cntr = 0;
0185     props->qkey_viol_cntr = 0;
0186     get_link_speed_and_width(dev, &props->active_speed,
0187                  &props->active_width);
0188     props->max_msg_sz = 0x80000000;
0189     props->max_vl_num = 4;
0190     return 0;
0191 }
0192 
0193 static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
0194                unsigned long len)
0195 {
0196     struct ocrdma_mm *mm;
0197 
0198     mm = kzalloc(sizeof(*mm), GFP_KERNEL);
0199     if (mm == NULL)
0200         return -ENOMEM;
0201     mm->key.phy_addr = phy_addr;
0202     mm->key.len = len;
0203     INIT_LIST_HEAD(&mm->entry);
0204 
0205     mutex_lock(&uctx->mm_list_lock);
0206     list_add_tail(&mm->entry, &uctx->mm_head);
0207     mutex_unlock(&uctx->mm_list_lock);
0208     return 0;
0209 }
0210 
0211 static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
0212                 unsigned long len)
0213 {
0214     struct ocrdma_mm *mm, *tmp;
0215 
0216     mutex_lock(&uctx->mm_list_lock);
0217     list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
0218         if (len != mm->key.len && phy_addr != mm->key.phy_addr)
0219             continue;
0220 
0221         list_del(&mm->entry);
0222         kfree(mm);
0223         break;
0224     }
0225     mutex_unlock(&uctx->mm_list_lock);
0226 }
0227 
0228 static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
0229                   unsigned long len)
0230 {
0231     bool found = false;
0232     struct ocrdma_mm *mm;
0233 
0234     mutex_lock(&uctx->mm_list_lock);
0235     list_for_each_entry(mm, &uctx->mm_head, entry) {
0236         if (len != mm->key.len && phy_addr != mm->key.phy_addr)
0237             continue;
0238 
0239         found = true;
0240         break;
0241     }
0242     mutex_unlock(&uctx->mm_list_lock);
0243     return found;
0244 }
0245 
0246 
0247 static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
0248 {
0249     u16 pd_bitmap_idx = 0;
0250     unsigned long *pd_bitmap;
0251 
0252     if (dpp_pool) {
0253         pd_bitmap = dev->pd_mgr->pd_dpp_bitmap;
0254         pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
0255                             dev->pd_mgr->max_dpp_pd);
0256         __set_bit(pd_bitmap_idx, pd_bitmap);
0257         dev->pd_mgr->pd_dpp_count++;
0258         if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh)
0259             dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count;
0260     } else {
0261         pd_bitmap = dev->pd_mgr->pd_norm_bitmap;
0262         pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
0263                             dev->pd_mgr->max_normal_pd);
0264         __set_bit(pd_bitmap_idx, pd_bitmap);
0265         dev->pd_mgr->pd_norm_count++;
0266         if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh)
0267             dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count;
0268     }
0269     return pd_bitmap_idx;
0270 }
0271 
0272 static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
0273                     bool dpp_pool)
0274 {
0275     u16 pd_count;
0276     u16 pd_bit_index;
0277 
0278     pd_count = dpp_pool ? dev->pd_mgr->pd_dpp_count :
0279                   dev->pd_mgr->pd_norm_count;
0280     if (pd_count == 0)
0281         return -EINVAL;
0282 
0283     if (dpp_pool) {
0284         pd_bit_index = pd_id - dev->pd_mgr->pd_dpp_start;
0285         if (pd_bit_index >= dev->pd_mgr->max_dpp_pd) {
0286             return -EINVAL;
0287         } else {
0288             __clear_bit(pd_bit_index, dev->pd_mgr->pd_dpp_bitmap);
0289             dev->pd_mgr->pd_dpp_count--;
0290         }
0291     } else {
0292         pd_bit_index = pd_id - dev->pd_mgr->pd_norm_start;
0293         if (pd_bit_index >= dev->pd_mgr->max_normal_pd) {
0294             return -EINVAL;
0295         } else {
0296             __clear_bit(pd_bit_index, dev->pd_mgr->pd_norm_bitmap);
0297             dev->pd_mgr->pd_norm_count--;
0298         }
0299     }
0300 
0301     return 0;
0302 }
0303 
0304 static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
0305                    bool dpp_pool)
0306 {
0307     int status;
0308 
0309     mutex_lock(&dev->dev_lock);
0310     status = _ocrdma_pd_mgr_put_bitmap(dev, pd_id, dpp_pool);
0311     mutex_unlock(&dev->dev_lock);
0312     return status;
0313 }
0314 
0315 static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
0316 {
0317     u16 pd_idx = 0;
0318     int status = 0;
0319 
0320     mutex_lock(&dev->dev_lock);
0321     if (pd->dpp_enabled) {
0322         /* try allocating DPP PD, if not available then normal PD */
0323         if (dev->pd_mgr->pd_dpp_count < dev->pd_mgr->max_dpp_pd) {
0324             pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, true);
0325             pd->id = dev->pd_mgr->pd_dpp_start + pd_idx;
0326             pd->dpp_page = dev->pd_mgr->dpp_page_index + pd_idx;
0327         } else if (dev->pd_mgr->pd_norm_count <
0328                dev->pd_mgr->max_normal_pd) {
0329             pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
0330             pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
0331             pd->dpp_enabled = false;
0332         } else {
0333             status = -EINVAL;
0334         }
0335     } else {
0336         if (dev->pd_mgr->pd_norm_count < dev->pd_mgr->max_normal_pd) {
0337             pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
0338             pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
0339         } else {
0340             status = -EINVAL;
0341         }
0342     }
0343     mutex_unlock(&dev->dev_lock);
0344     return status;
0345 }
0346 
0347 /*
0348  * NOTE:
0349  *
0350  * ocrdma_ucontext must be used here because this function is also
0351  * called from ocrdma_alloc_ucontext where ib_udata does not have
0352  * valid ib_ucontext pointer. ib_uverbs_get_context does not call
0353  * uobj_{alloc|get_xxx} helpers which are used to store the
0354  * ib_ucontext in uverbs_attr_bundle wrapping the ib_udata. so
0355  * ib_udata does NOT imply valid ib_ucontext here!
0356  */
0357 static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
0358                 struct ocrdma_ucontext *uctx,
0359                 struct ib_udata *udata)
0360 {
0361     int status;
0362 
0363     if (udata && uctx && dev->attr.max_dpp_pds) {
0364         pd->dpp_enabled =
0365             ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
0366         pd->num_dpp_qp =
0367             pd->dpp_enabled ? (dev->nic_info.db_page_size /
0368                        dev->attr.wqe_size) : 0;
0369     }
0370 
0371     if (dev->pd_mgr->pd_prealloc_valid)
0372         return ocrdma_get_pd_num(dev, pd);
0373 
0374 retry:
0375     status = ocrdma_mbx_alloc_pd(dev, pd);
0376     if (status) {
0377         if (pd->dpp_enabled) {
0378             pd->dpp_enabled = false;
0379             pd->num_dpp_qp = 0;
0380             goto retry;
0381         }
0382         return status;
0383     }
0384 
0385     return 0;
0386 }
0387 
0388 static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
0389                  struct ocrdma_pd *pd)
0390 {
0391     return (uctx->cntxt_pd == pd);
0392 }
0393 
0394 static void _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
0395                   struct ocrdma_pd *pd)
0396 {
0397     if (dev->pd_mgr->pd_prealloc_valid)
0398         ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
0399     else
0400         ocrdma_mbx_dealloc_pd(dev, pd);
0401 }
0402 
0403 static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
0404                     struct ocrdma_ucontext *uctx,
0405                     struct ib_udata *udata)
0406 {
0407     struct ib_device *ibdev = &dev->ibdev;
0408     struct ib_pd *pd;
0409     int status;
0410 
0411     pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
0412     if (!pd)
0413         return -ENOMEM;
0414 
0415     pd->device  = ibdev;
0416     uctx->cntxt_pd = get_ocrdma_pd(pd);
0417 
0418     status = _ocrdma_alloc_pd(dev, uctx->cntxt_pd, uctx, udata);
0419     if (status) {
0420         kfree(uctx->cntxt_pd);
0421         goto err;
0422     }
0423 
0424     uctx->cntxt_pd->uctx = uctx;
0425     uctx->cntxt_pd->ibpd.device = &dev->ibdev;
0426 err:
0427     return status;
0428 }
0429 
0430 static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
0431 {
0432     struct ocrdma_pd *pd = uctx->cntxt_pd;
0433     struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
0434 
0435     if (uctx->pd_in_use) {
0436         pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
0437                __func__, dev->id, pd->id);
0438     }
0439     uctx->cntxt_pd = NULL;
0440     _ocrdma_dealloc_pd(dev, pd);
0441     kfree(pd);
0442 }
0443 
0444 static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
0445 {
0446     struct ocrdma_pd *pd = NULL;
0447 
0448     mutex_lock(&uctx->mm_list_lock);
0449     if (!uctx->pd_in_use) {
0450         uctx->pd_in_use = true;
0451         pd = uctx->cntxt_pd;
0452     }
0453     mutex_unlock(&uctx->mm_list_lock);
0454 
0455     return pd;
0456 }
0457 
0458 static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
0459 {
0460     mutex_lock(&uctx->mm_list_lock);
0461     uctx->pd_in_use = false;
0462     mutex_unlock(&uctx->mm_list_lock);
0463 }
0464 
0465 int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
0466 {
0467     struct ib_device *ibdev = uctx->device;
0468     int status;
0469     struct ocrdma_ucontext *ctx = get_ocrdma_ucontext(uctx);
0470     struct ocrdma_alloc_ucontext_resp resp = {};
0471     struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
0472     struct pci_dev *pdev = dev->nic_info.pdev;
0473     u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
0474 
0475     if (!udata)
0476         return -EFAULT;
0477     INIT_LIST_HEAD(&ctx->mm_head);
0478     mutex_init(&ctx->mm_list_lock);
0479 
0480     ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
0481                         &ctx->ah_tbl.pa, GFP_KERNEL);
0482     if (!ctx->ah_tbl.va)
0483         return -ENOMEM;
0484 
0485     ctx->ah_tbl.len = map_len;
0486 
0487     resp.ah_tbl_len = ctx->ah_tbl.len;
0488     resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
0489 
0490     status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
0491     if (status)
0492         goto map_err;
0493 
0494     status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
0495     if (status)
0496         goto pd_err;
0497 
0498     resp.dev_id = dev->id;
0499     resp.max_inline_data = dev->attr.max_inline_data;
0500     resp.wqe_size = dev->attr.wqe_size;
0501     resp.rqe_size = dev->attr.rqe_size;
0502     resp.dpp_wqe_size = dev->attr.wqe_size;
0503 
0504     memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
0505     status = ib_copy_to_udata(udata, &resp, sizeof(resp));
0506     if (status)
0507         goto cpy_err;
0508     return 0;
0509 
0510 cpy_err:
0511     ocrdma_dealloc_ucontext_pd(ctx);
0512 pd_err:
0513     ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
0514 map_err:
0515     dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
0516               ctx->ah_tbl.pa);
0517     return status;
0518 }
0519 
0520 void ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
0521 {
0522     struct ocrdma_mm *mm, *tmp;
0523     struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
0524     struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
0525     struct pci_dev *pdev = dev->nic_info.pdev;
0526 
0527     ocrdma_dealloc_ucontext_pd(uctx);
0528 
0529     ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
0530     dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
0531               uctx->ah_tbl.pa);
0532 
0533     list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
0534         list_del(&mm->entry);
0535         kfree(mm);
0536     }
0537 }
0538 
0539 int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
0540 {
0541     struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
0542     struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
0543     unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
0544     u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
0545     unsigned long len = (vma->vm_end - vma->vm_start);
0546     int status;
0547     bool found;
0548 
0549     if (vma->vm_start & (PAGE_SIZE - 1))
0550         return -EINVAL;
0551     found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
0552     if (!found)
0553         return -EINVAL;
0554 
0555     if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
0556         dev->nic_info.db_total_size)) &&
0557         (len <= dev->nic_info.db_page_size)) {
0558         if (vma->vm_flags & VM_READ)
0559             return -EPERM;
0560 
0561         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
0562         status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
0563                         len, vma->vm_page_prot);
0564     } else if (dev->nic_info.dpp_unmapped_len &&
0565         (vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
0566         (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
0567             dev->nic_info.dpp_unmapped_len)) &&
0568         (len <= dev->nic_info.dpp_unmapped_len)) {
0569         if (vma->vm_flags & VM_READ)
0570             return -EPERM;
0571 
0572         vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
0573         status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
0574                         len, vma->vm_page_prot);
0575     } else {
0576         status = remap_pfn_range(vma, vma->vm_start,
0577                      vma->vm_pgoff, len, vma->vm_page_prot);
0578     }
0579     return status;
0580 }
0581 
0582 static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
0583                 struct ib_udata *udata)
0584 {
0585     int status;
0586     u64 db_page_addr;
0587     u64 dpp_page_addr = 0;
0588     u32 db_page_size;
0589     struct ocrdma_alloc_pd_uresp rsp;
0590     struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
0591         udata, struct ocrdma_ucontext, ibucontext);
0592 
0593     memset(&rsp, 0, sizeof(rsp));
0594     rsp.id = pd->id;
0595     rsp.dpp_enabled = pd->dpp_enabled;
0596     db_page_addr = ocrdma_get_db_addr(dev, pd->id);
0597     db_page_size = dev->nic_info.db_page_size;
0598 
0599     status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
0600     if (status)
0601         return status;
0602 
0603     if (pd->dpp_enabled) {
0604         dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
0605                 (pd->id * PAGE_SIZE);
0606         status = ocrdma_add_mmap(uctx, dpp_page_addr,
0607                  PAGE_SIZE);
0608         if (status)
0609             goto dpp_map_err;
0610         rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
0611         rsp.dpp_page_addr_lo = dpp_page_addr;
0612     }
0613 
0614     status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
0615     if (status)
0616         goto ucopy_err;
0617 
0618     pd->uctx = uctx;
0619     return 0;
0620 
0621 ucopy_err:
0622     if (pd->dpp_enabled)
0623         ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
0624 dpp_map_err:
0625     ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
0626     return status;
0627 }
0628 
0629 int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0630 {
0631     struct ib_device *ibdev = ibpd->device;
0632     struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
0633     struct ocrdma_pd *pd;
0634     int status;
0635     u8 is_uctx_pd = false;
0636     struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
0637         udata, struct ocrdma_ucontext, ibucontext);
0638 
0639     if (udata) {
0640         pd = ocrdma_get_ucontext_pd(uctx);
0641         if (pd) {
0642             is_uctx_pd = true;
0643             goto pd_mapping;
0644         }
0645     }
0646 
0647     pd = get_ocrdma_pd(ibpd);
0648     status = _ocrdma_alloc_pd(dev, pd, uctx, udata);
0649     if (status)
0650         goto exit;
0651 
0652 pd_mapping:
0653     if (udata) {
0654         status = ocrdma_copy_pd_uresp(dev, pd, udata);
0655         if (status)
0656             goto err;
0657     }
0658     return 0;
0659 
0660 err:
0661     if (is_uctx_pd)
0662         ocrdma_release_ucontext_pd(uctx);
0663     else
0664         _ocrdma_dealloc_pd(dev, pd);
0665 exit:
0666     return status;
0667 }
0668 
0669 int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0670 {
0671     struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
0672     struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
0673     struct ocrdma_ucontext *uctx = NULL;
0674     u64 usr_db;
0675 
0676     uctx = pd->uctx;
0677     if (uctx) {
0678         u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
0679             (pd->id * PAGE_SIZE);
0680         if (pd->dpp_enabled)
0681             ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
0682         usr_db = ocrdma_get_db_addr(dev, pd->id);
0683         ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
0684 
0685         if (is_ucontext_pd(uctx, pd)) {
0686             ocrdma_release_ucontext_pd(uctx);
0687             return 0;
0688         }
0689     }
0690     _ocrdma_dealloc_pd(dev, pd);
0691     return 0;
0692 }
0693 
0694 static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
0695                 u32 pdid, int acc, u32 num_pbls, u32 addr_check)
0696 {
0697     int status;
0698 
0699     mr->hwmr.fr_mr = 0;
0700     mr->hwmr.local_rd = 1;
0701     mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
0702     mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
0703     mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
0704     mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
0705     mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
0706     mr->hwmr.num_pbls = num_pbls;
0707 
0708     status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
0709     if (status)
0710         return status;
0711 
0712     mr->ibmr.lkey = mr->hwmr.lkey;
0713     if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
0714         mr->ibmr.rkey = mr->hwmr.lkey;
0715     return 0;
0716 }
0717 
0718 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
0719 {
0720     int status;
0721     struct ocrdma_mr *mr;
0722     struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
0723     struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
0724 
0725     if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
0726         pr_err("%s err, invalid access rights\n", __func__);
0727         return ERR_PTR(-EINVAL);
0728     }
0729 
0730     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0731     if (!mr)
0732         return ERR_PTR(-ENOMEM);
0733 
0734     status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
0735                    OCRDMA_ADDR_CHECK_DISABLE);
0736     if (status) {
0737         kfree(mr);
0738         return ERR_PTR(status);
0739     }
0740 
0741     return &mr->ibmr;
0742 }
0743 
0744 static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
0745                    struct ocrdma_hw_mr *mr)
0746 {
0747     struct pci_dev *pdev = dev->nic_info.pdev;
0748     int i = 0;
0749 
0750     if (mr->pbl_table) {
0751         for (i = 0; i < mr->num_pbls; i++) {
0752             if (!mr->pbl_table[i].va)
0753                 continue;
0754             dma_free_coherent(&pdev->dev, mr->pbl_size,
0755                       mr->pbl_table[i].va,
0756                       mr->pbl_table[i].pa);
0757         }
0758         kfree(mr->pbl_table);
0759         mr->pbl_table = NULL;
0760     }
0761 }
0762 
0763 static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
0764                   u32 num_pbes)
0765 {
0766     u32 num_pbls = 0;
0767     u32 idx = 0;
0768     int status = 0;
0769     u32 pbl_size;
0770 
0771     do {
0772         pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
0773         if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
0774             status = -EFAULT;
0775             break;
0776         }
0777         num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
0778         num_pbls = num_pbls / (pbl_size / sizeof(u64));
0779         idx++;
0780     } while (num_pbls >= dev->attr.max_num_mr_pbl);
0781 
0782     mr->hwmr.num_pbes = num_pbes;
0783     mr->hwmr.num_pbls = num_pbls;
0784     mr->hwmr.pbl_size = pbl_size;
0785     return status;
0786 }
0787 
0788 static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
0789 {
0790     int status = 0;
0791     int i;
0792     u32 dma_len = mr->pbl_size;
0793     struct pci_dev *pdev = dev->nic_info.pdev;
0794     void *va;
0795     dma_addr_t pa;
0796 
0797     mr->pbl_table = kcalloc(mr->num_pbls, sizeof(struct ocrdma_pbl),
0798                 GFP_KERNEL);
0799 
0800     if (!mr->pbl_table)
0801         return -ENOMEM;
0802 
0803     for (i = 0; i < mr->num_pbls; i++) {
0804         va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
0805         if (!va) {
0806             ocrdma_free_mr_pbl_tbl(dev, mr);
0807             status = -ENOMEM;
0808             break;
0809         }
0810         mr->pbl_table[i].va = va;
0811         mr->pbl_table[i].pa = pa;
0812     }
0813     return status;
0814 }
0815 
0816 static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
0817 {
0818     struct ocrdma_pbe *pbe;
0819     struct ib_block_iter biter;
0820     struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
0821     int pbe_cnt;
0822     u64 pg_addr;
0823 
0824     if (!mr->hwmr.num_pbes)
0825         return;
0826 
0827     pbe = (struct ocrdma_pbe *)pbl_tbl->va;
0828     pbe_cnt = 0;
0829 
0830     rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) {
0831         /* store the page address in pbe */
0832         pg_addr = rdma_block_iter_dma_address(&biter);
0833         pbe->pa_lo = cpu_to_le32(pg_addr);
0834         pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr));
0835         pbe_cnt += 1;
0836         pbe++;
0837 
0838         /* if the given pbl is full storing the pbes,
0839          * move to next pbl.
0840          */
0841         if (pbe_cnt == (mr->hwmr.pbl_size / sizeof(u64))) {
0842             pbl_tbl++;
0843             pbe = (struct ocrdma_pbe *)pbl_tbl->va;
0844             pbe_cnt = 0;
0845         }
0846     }
0847 }
0848 
0849 struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
0850                  u64 usr_addr, int acc, struct ib_udata *udata)
0851 {
0852     int status = -ENOMEM;
0853     struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
0854     struct ocrdma_mr *mr;
0855     struct ocrdma_pd *pd;
0856 
0857     pd = get_ocrdma_pd(ibpd);
0858 
0859     if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
0860         return ERR_PTR(-EINVAL);
0861 
0862     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0863     if (!mr)
0864         return ERR_PTR(status);
0865     mr->umem = ib_umem_get(ibpd->device, start, len, acc);
0866     if (IS_ERR(mr->umem)) {
0867         status = -EFAULT;
0868         goto umem_err;
0869     }
0870     status = ocrdma_get_pbl_info(
0871         dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE));
0872     if (status)
0873         goto umem_err;
0874 
0875     mr->hwmr.pbe_size = PAGE_SIZE;
0876     mr->hwmr.va = usr_addr;
0877     mr->hwmr.len = len;
0878     mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
0879     mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
0880     mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
0881     mr->hwmr.local_rd = 1;
0882     mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
0883     status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
0884     if (status)
0885         goto umem_err;
0886     build_user_pbes(dev, mr);
0887     status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
0888     if (status)
0889         goto mbx_err;
0890     mr->ibmr.lkey = mr->hwmr.lkey;
0891     if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
0892         mr->ibmr.rkey = mr->hwmr.lkey;
0893 
0894     return &mr->ibmr;
0895 
0896 mbx_err:
0897     ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
0898 umem_err:
0899     kfree(mr);
0900     return ERR_PTR(status);
0901 }
0902 
0903 int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
0904 {
0905     struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
0906     struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
0907 
0908     (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
0909 
0910     kfree(mr->pages);
0911     ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
0912 
0913     /* it could be user registered memory. */
0914     ib_umem_release(mr->umem);
0915     kfree(mr);
0916 
0917     /* Don't stop cleanup, in case FW is unresponsive */
0918     if (dev->mqe_ctx.fw_error_state) {
0919         pr_err("%s(%d) fw not responding.\n",
0920                __func__, dev->id);
0921     }
0922     return 0;
0923 }
0924 
0925 static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
0926                 struct ib_udata *udata)
0927 {
0928     int status;
0929     struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
0930         udata, struct ocrdma_ucontext, ibucontext);
0931     struct ocrdma_create_cq_uresp uresp;
0932 
0933     /* this must be user flow! */
0934     if (!udata)
0935         return -EINVAL;
0936 
0937     memset(&uresp, 0, sizeof(uresp));
0938     uresp.cq_id = cq->id;
0939     uresp.page_size = PAGE_ALIGN(cq->len);
0940     uresp.num_pages = 1;
0941     uresp.max_hw_cqe = cq->max_hw_cqe;
0942     uresp.page_addr[0] = virt_to_phys(cq->va);
0943     uresp.db_page_addr =  ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
0944     uresp.db_page_size = dev->nic_info.db_page_size;
0945     uresp.phase_change = cq->phase_change ? 1 : 0;
0946     status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
0947     if (status) {
0948         pr_err("%s(%d) copy error cqid=0x%x.\n",
0949                __func__, dev->id, cq->id);
0950         goto err;
0951     }
0952     status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
0953     if (status)
0954         goto err;
0955     status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
0956     if (status) {
0957         ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
0958         goto err;
0959     }
0960     cq->ucontext = uctx;
0961 err:
0962     return status;
0963 }
0964 
0965 int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
0966              struct ib_udata *udata)
0967 {
0968     struct ib_device *ibdev = ibcq->device;
0969     int entries = attr->cqe;
0970     struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
0971     struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
0972     struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
0973         udata, struct ocrdma_ucontext, ibucontext);
0974     u16 pd_id = 0;
0975     int status;
0976     struct ocrdma_create_cq_ureq ureq;
0977 
0978     if (attr->flags)
0979         return -EOPNOTSUPP;
0980 
0981     if (udata) {
0982         if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
0983             return -EFAULT;
0984     } else
0985         ureq.dpp_cq = 0;
0986 
0987     spin_lock_init(&cq->cq_lock);
0988     spin_lock_init(&cq->comp_handler_lock);
0989     INIT_LIST_HEAD(&cq->sq_head);
0990     INIT_LIST_HEAD(&cq->rq_head);
0991 
0992     if (udata)
0993         pd_id = uctx->cntxt_pd->id;
0994 
0995     status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
0996     if (status)
0997         return status;
0998 
0999     if (udata) {
1000         status = ocrdma_copy_cq_uresp(dev, cq, udata);
1001         if (status)
1002             goto ctx_err;
1003     }
1004     cq->phase = OCRDMA_CQE_VALID;
1005     dev->cq_tbl[cq->id] = cq;
1006     return 0;
1007 
1008 ctx_err:
1009     ocrdma_mbx_destroy_cq(dev, cq);
1010     return status;
1011 }
1012 
1013 int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
1014              struct ib_udata *udata)
1015 {
1016     int status = 0;
1017     struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1018 
1019     if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
1020         status = -EINVAL;
1021         return status;
1022     }
1023     ibcq->cqe = new_cnt;
1024     return status;
1025 }
1026 
1027 static void ocrdma_flush_cq(struct ocrdma_cq *cq)
1028 {
1029     int cqe_cnt;
1030     int valid_count = 0;
1031     unsigned long flags;
1032 
1033     struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
1034     struct ocrdma_cqe *cqe = NULL;
1035 
1036     cqe = cq->va;
1037     cqe_cnt = cq->cqe_cnt;
1038 
1039     /* Last irq might have scheduled a polling thread
1040      * sync-up with it before hard flushing.
1041      */
1042     spin_lock_irqsave(&cq->cq_lock, flags);
1043     while (cqe_cnt) {
1044         if (is_cqe_valid(cq, cqe))
1045             valid_count++;
1046         cqe++;
1047         cqe_cnt--;
1048     }
1049     ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
1050     spin_unlock_irqrestore(&cq->cq_lock, flags);
1051 }
1052 
1053 int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1054 {
1055     struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1056     struct ocrdma_eq *eq = NULL;
1057     struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
1058     int pdid = 0;
1059     u32 irq, indx;
1060 
1061     dev->cq_tbl[cq->id] = NULL;
1062     indx = ocrdma_get_eq_table_index(dev, cq->eqn);
1063 
1064     eq = &dev->eq_tbl[indx];
1065     irq = ocrdma_get_irq(dev, eq);
1066     synchronize_irq(irq);
1067     ocrdma_flush_cq(cq);
1068 
1069     ocrdma_mbx_destroy_cq(dev, cq);
1070     if (cq->ucontext) {
1071         pdid = cq->ucontext->cntxt_pd->id;
1072         ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
1073                 PAGE_ALIGN(cq->len));
1074         ocrdma_del_mmap(cq->ucontext,
1075                 ocrdma_get_db_addr(dev, pdid),
1076                 dev->nic_info.db_page_size);
1077     }
1078     return 0;
1079 }
1080 
1081 static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1082 {
1083     int status = -EINVAL;
1084 
1085     if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
1086         dev->qp_tbl[qp->id] = qp;
1087         status = 0;
1088     }
1089     return status;
1090 }
1091 
1092 static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1093 {
1094     dev->qp_tbl[qp->id] = NULL;
1095 }
1096 
1097 static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
1098                   struct ib_qp_init_attr *attrs,
1099                   struct ib_udata *udata)
1100 {
1101     if ((attrs->qp_type != IB_QPT_GSI) &&
1102         (attrs->qp_type != IB_QPT_RC) &&
1103         (attrs->qp_type != IB_QPT_UC) &&
1104         (attrs->qp_type != IB_QPT_UD)) {
1105         pr_err("%s(%d) unsupported qp type=0x%x requested\n",
1106                __func__, dev->id, attrs->qp_type);
1107         return -EOPNOTSUPP;
1108     }
1109     /* Skip the check for QP1 to support CM size of 128 */
1110     if ((attrs->qp_type != IB_QPT_GSI) &&
1111         (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
1112         pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
1113                __func__, dev->id, attrs->cap.max_send_wr);
1114         pr_err("%s(%d) supported send_wr=0x%x\n",
1115                __func__, dev->id, dev->attr.max_wqe);
1116         return -EINVAL;
1117     }
1118     if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
1119         pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
1120                __func__, dev->id, attrs->cap.max_recv_wr);
1121         pr_err("%s(%d) supported recv_wr=0x%x\n",
1122                __func__, dev->id, dev->attr.max_rqe);
1123         return -EINVAL;
1124     }
1125     if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
1126         pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
1127                __func__, dev->id, attrs->cap.max_inline_data);
1128         pr_err("%s(%d) supported inline data size=0x%x\n",
1129                __func__, dev->id, dev->attr.max_inline_data);
1130         return -EINVAL;
1131     }
1132     if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
1133         pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
1134                __func__, dev->id, attrs->cap.max_send_sge);
1135         pr_err("%s(%d) supported send_sge=0x%x\n",
1136                __func__, dev->id, dev->attr.max_send_sge);
1137         return -EINVAL;
1138     }
1139     if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
1140         pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
1141                __func__, dev->id, attrs->cap.max_recv_sge);
1142         pr_err("%s(%d) supported recv_sge=0x%x\n",
1143                __func__, dev->id, dev->attr.max_recv_sge);
1144         return -EINVAL;
1145     }
1146     /* unprivileged user space cannot create special QP */
1147     if (udata && attrs->qp_type == IB_QPT_GSI) {
1148         pr_err
1149             ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
1150              __func__, dev->id, attrs->qp_type);
1151         return -EINVAL;
1152     }
1153     /* allow creating only one GSI type of QP */
1154     if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
1155         pr_err("%s(%d) GSI special QPs already created.\n",
1156                __func__, dev->id);
1157         return -EINVAL;
1158     }
1159     /* verify consumer QPs are not trying to use GSI QP's CQ */
1160     if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
1161         if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
1162             (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
1163             pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
1164                 __func__, dev->id);
1165             return -EINVAL;
1166         }
1167     }
1168     return 0;
1169 }
1170 
1171 static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
1172                 struct ib_udata *udata, int dpp_offset,
1173                 int dpp_credit_lmt, int srq)
1174 {
1175     int status;
1176     u64 usr_db;
1177     struct ocrdma_create_qp_uresp uresp;
1178     struct ocrdma_pd *pd = qp->pd;
1179     struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
1180 
1181     memset(&uresp, 0, sizeof(uresp));
1182     usr_db = dev->nic_info.unmapped_db +
1183             (pd->id * dev->nic_info.db_page_size);
1184     uresp.qp_id = qp->id;
1185     uresp.sq_dbid = qp->sq.dbid;
1186     uresp.num_sq_pages = 1;
1187     uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
1188     uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
1189     uresp.num_wqe_allocated = qp->sq.max_cnt;
1190     if (!srq) {
1191         uresp.rq_dbid = qp->rq.dbid;
1192         uresp.num_rq_pages = 1;
1193         uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
1194         uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
1195         uresp.num_rqe_allocated = qp->rq.max_cnt;
1196     }
1197     uresp.db_page_addr = usr_db;
1198     uresp.db_page_size = dev->nic_info.db_page_size;
1199     uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
1200     uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1201     uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
1202 
1203     if (qp->dpp_enabled) {
1204         uresp.dpp_credit = dpp_credit_lmt;
1205         uresp.dpp_offset = dpp_offset;
1206     }
1207     status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1208     if (status) {
1209         pr_err("%s(%d) user copy error.\n", __func__, dev->id);
1210         goto err;
1211     }
1212     status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
1213                  uresp.sq_page_size);
1214     if (status)
1215         goto err;
1216 
1217     if (!srq) {
1218         status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
1219                      uresp.rq_page_size);
1220         if (status)
1221             goto rq_map_err;
1222     }
1223     return status;
1224 rq_map_err:
1225     ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
1226 err:
1227     return status;
1228 }
1229 
1230 static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
1231                  struct ocrdma_pd *pd)
1232 {
1233     if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1234         qp->sq_db = dev->nic_info.db +
1235             (pd->id * dev->nic_info.db_page_size) +
1236             OCRDMA_DB_GEN2_SQ_OFFSET;
1237         qp->rq_db = dev->nic_info.db +
1238             (pd->id * dev->nic_info.db_page_size) +
1239             OCRDMA_DB_GEN2_RQ_OFFSET;
1240     } else {
1241         qp->sq_db = dev->nic_info.db +
1242             (pd->id * dev->nic_info.db_page_size) +
1243             OCRDMA_DB_SQ_OFFSET;
1244         qp->rq_db = dev->nic_info.db +
1245             (pd->id * dev->nic_info.db_page_size) +
1246             OCRDMA_DB_RQ_OFFSET;
1247     }
1248 }
1249 
1250 static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
1251 {
1252     qp->wqe_wr_id_tbl =
1253         kcalloc(qp->sq.max_cnt, sizeof(*(qp->wqe_wr_id_tbl)),
1254             GFP_KERNEL);
1255     if (qp->wqe_wr_id_tbl == NULL)
1256         return -ENOMEM;
1257     qp->rqe_wr_id_tbl =
1258         kcalloc(qp->rq.max_cnt, sizeof(u64), GFP_KERNEL);
1259     if (qp->rqe_wr_id_tbl == NULL)
1260         return -ENOMEM;
1261 
1262     return 0;
1263 }
1264 
1265 static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
1266                       struct ocrdma_pd *pd,
1267                       struct ib_qp_init_attr *attrs)
1268 {
1269     qp->pd = pd;
1270     spin_lock_init(&qp->q_lock);
1271     INIT_LIST_HEAD(&qp->sq_entry);
1272     INIT_LIST_HEAD(&qp->rq_entry);
1273 
1274     qp->qp_type = attrs->qp_type;
1275     qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
1276     qp->max_inline_data = attrs->cap.max_inline_data;
1277     qp->sq.max_sges = attrs->cap.max_send_sge;
1278     qp->rq.max_sges = attrs->cap.max_recv_sge;
1279     qp->state = OCRDMA_QPS_RST;
1280     qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1281 }
1282 
1283 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1284                    struct ib_qp_init_attr *attrs)
1285 {
1286     if (attrs->qp_type == IB_QPT_GSI) {
1287         dev->gsi_qp_created = 1;
1288         dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
1289         dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
1290     }
1291 }
1292 
1293 int ocrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
1294              struct ib_udata *udata)
1295 {
1296     int status;
1297     struct ib_pd *ibpd = ibqp->pd;
1298     struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1299     struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1300     struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
1301     struct ocrdma_create_qp_ureq ureq;
1302     u16 dpp_credit_lmt, dpp_offset;
1303 
1304     if (attrs->create_flags)
1305         return -EOPNOTSUPP;
1306 
1307     status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
1308     if (status)
1309         goto gen_err;
1310 
1311     memset(&ureq, 0, sizeof(ureq));
1312     if (udata) {
1313         if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1314             return -EFAULT;
1315     }
1316     ocrdma_set_qp_init_params(qp, pd, attrs);
1317     if (udata == NULL)
1318         qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
1319                     OCRDMA_QP_FAST_REG);
1320 
1321     mutex_lock(&dev->dev_lock);
1322     status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
1323                     ureq.dpp_cq_id,
1324                     &dpp_offset, &dpp_credit_lmt);
1325     if (status)
1326         goto mbx_err;
1327 
1328     /* user space QP's wr_id table are managed in library */
1329     if (udata == NULL) {
1330         status = ocrdma_alloc_wr_id_tbl(qp);
1331         if (status)
1332             goto map_err;
1333     }
1334 
1335     status = ocrdma_add_qpn_map(dev, qp);
1336     if (status)
1337         goto map_err;
1338     ocrdma_set_qp_db(dev, qp, pd);
1339     if (udata) {
1340         status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
1341                           dpp_credit_lmt,
1342                           (attrs->srq != NULL));
1343         if (status)
1344             goto cpy_err;
1345     }
1346     ocrdma_store_gsi_qp_cq(dev, attrs);
1347     qp->ibqp.qp_num = qp->id;
1348     mutex_unlock(&dev->dev_lock);
1349     return 0;
1350 
1351 cpy_err:
1352     ocrdma_del_qpn_map(dev, qp);
1353 map_err:
1354     ocrdma_mbx_destroy_qp(dev, qp);
1355 mbx_err:
1356     mutex_unlock(&dev->dev_lock);
1357     kfree(qp->wqe_wr_id_tbl);
1358     kfree(qp->rqe_wr_id_tbl);
1359     pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1360 gen_err:
1361     return status;
1362 }
1363 
1364 int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1365               int attr_mask)
1366 {
1367     int status = 0;
1368     struct ocrdma_qp *qp;
1369     struct ocrdma_dev *dev;
1370     enum ib_qp_state old_qps;
1371 
1372     qp = get_ocrdma_qp(ibqp);
1373     dev = get_ocrdma_dev(ibqp->device);
1374     if (attr_mask & IB_QP_STATE)
1375         status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
1376     /* if new and previous states are same hw doesn't need to
1377      * know about it.
1378      */
1379     if (status < 0)
1380         return status;
1381     return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
1382 }
1383 
1384 int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1385              int attr_mask, struct ib_udata *udata)
1386 {
1387     unsigned long flags;
1388     int status = -EINVAL;
1389     struct ocrdma_qp *qp;
1390     struct ocrdma_dev *dev;
1391     enum ib_qp_state old_qps, new_qps;
1392 
1393     if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1394         return -EOPNOTSUPP;
1395 
1396     qp = get_ocrdma_qp(ibqp);
1397     dev = get_ocrdma_dev(ibqp->device);
1398 
1399     /* syncronize with multiple context trying to change, retrive qps */
1400     mutex_lock(&dev->dev_lock);
1401     /* syncronize with wqe, rqe posting and cqe processing contexts */
1402     spin_lock_irqsave(&qp->q_lock, flags);
1403     old_qps = get_ibqp_state(qp->state);
1404     if (attr_mask & IB_QP_STATE)
1405         new_qps = attr->qp_state;
1406     else
1407         new_qps = old_qps;
1408     spin_unlock_irqrestore(&qp->q_lock, flags);
1409 
1410     if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1411         pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1412                "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1413                __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1414                old_qps, new_qps);
1415         goto param_err;
1416     }
1417 
1418     status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
1419     if (status > 0)
1420         status = 0;
1421 param_err:
1422     mutex_unlock(&dev->dev_lock);
1423     return status;
1424 }
1425 
1426 static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
1427 {
1428     switch (mtu) {
1429     case 256:
1430         return IB_MTU_256;
1431     case 512:
1432         return IB_MTU_512;
1433     case 1024:
1434         return IB_MTU_1024;
1435     case 2048:
1436         return IB_MTU_2048;
1437     case 4096:
1438         return IB_MTU_4096;
1439     default:
1440         return IB_MTU_1024;
1441     }
1442 }
1443 
1444 static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
1445 {
1446     int ib_qp_acc_flags = 0;
1447 
1448     if (qp_cap_flags & OCRDMA_QP_INB_WR)
1449         ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1450     if (qp_cap_flags & OCRDMA_QP_INB_RD)
1451         ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1452     return ib_qp_acc_flags;
1453 }
1454 
1455 int ocrdma_query_qp(struct ib_qp *ibqp,
1456             struct ib_qp_attr *qp_attr,
1457             int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1458 {
1459     int status;
1460     u32 qp_state;
1461     struct ocrdma_qp_params params;
1462     struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1463     struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
1464 
1465     memset(&params, 0, sizeof(params));
1466     mutex_lock(&dev->dev_lock);
1467     status = ocrdma_mbx_query_qp(dev, qp, &params);
1468     mutex_unlock(&dev->dev_lock);
1469     if (status)
1470         goto mbx_err;
1471     if (qp->qp_type == IB_QPT_UD)
1472         qp_attr->qkey = params.qkey;
1473     qp_attr->path_mtu =
1474         ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
1475                 OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
1476                 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
1477     qp_attr->path_mig_state = IB_MIG_MIGRATED;
1478     qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
1479     qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
1480     qp_attr->dest_qp_num =
1481         params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
1482 
1483     qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
1484     qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
1485     qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
1486     qp_attr->cap.max_send_sge = qp->sq.max_sges;
1487     qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1488     qp_attr->cap.max_inline_data = qp->max_inline_data;
1489     qp_init_attr->cap = qp_attr->cap;
1490     qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
1491 
1492     rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
1493             params.rnt_rc_sl_fl &
1494               OCRDMA_QP_PARAMS_FLOW_LABEL_MASK,
1495             qp->sgid_idx,
1496             (params.hop_lmt_rq_psn &
1497              OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
1498              OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
1499             (params.tclass_sq_psn &
1500              OCRDMA_QP_PARAMS_TCLASS_MASK) >>
1501              OCRDMA_QP_PARAMS_TCLASS_SHIFT);
1502     rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid[0]);
1503 
1504     rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
1505     rdma_ah_set_sl(&qp_attr->ah_attr, (params.rnt_rc_sl_fl &
1506                        OCRDMA_QP_PARAMS_SL_MASK) >>
1507                        OCRDMA_QP_PARAMS_SL_SHIFT);
1508     qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
1509                 OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
1510                 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
1511     qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
1512                   OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
1513                 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
1514     qp_attr->retry_cnt =
1515         (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
1516         OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
1517     qp_attr->min_rnr_timer = 0;
1518     qp_attr->pkey_index = 0;
1519     qp_attr->port_num = 1;
1520     rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
1521     rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
1522     qp_attr->alt_pkey_index = 0;
1523     qp_attr->alt_port_num = 0;
1524     qp_attr->alt_timeout = 0;
1525     memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1526     qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
1527             OCRDMA_QP_PARAMS_STATE_SHIFT;
1528     qp_attr->qp_state = get_ibqp_state(qp_state);
1529     qp_attr->cur_qp_state = qp_attr->qp_state;
1530     qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
1531     qp_attr->max_dest_rd_atomic =
1532         params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
1533     qp_attr->max_rd_atomic =
1534         params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
1535     qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
1536                 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
1537     /* Sync driver QP state with FW */
1538     ocrdma_qp_state_change(qp, qp_attr->qp_state, NULL);
1539 mbx_err:
1540     return status;
1541 }
1542 
1543 static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, unsigned int idx)
1544 {
1545     unsigned int i = idx / 32;
1546     u32 mask = (1U << (idx % 32));
1547 
1548     srq->idx_bit_fields[i] ^= mask;
1549 }
1550 
1551 static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1552 {
1553     return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
1554 }
1555 
1556 static int is_hw_sq_empty(struct ocrdma_qp *qp)
1557 {
1558     return (qp->sq.tail == qp->sq.head);
1559 }
1560 
1561 static int is_hw_rq_empty(struct ocrdma_qp *qp)
1562 {
1563     return (qp->rq.tail == qp->rq.head);
1564 }
1565 
1566 static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
1567 {
1568     return q->va + (q->head * q->entry_size);
1569 }
1570 
1571 static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
1572                       u32 idx)
1573 {
1574     return q->va + (idx * q->entry_size);
1575 }
1576 
1577 static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
1578 {
1579     q->head = (q->head + 1) & q->max_wqe_idx;
1580 }
1581 
1582 static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
1583 {
1584     q->tail = (q->tail + 1) & q->max_wqe_idx;
1585 }
1586 
1587 /* discard the cqe for a given QP */
1588 static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
1589 {
1590     unsigned long cq_flags;
1591     unsigned long flags;
1592     int discard_cnt = 0;
1593     u32 cur_getp, stop_getp;
1594     struct ocrdma_cqe *cqe;
1595     u32 qpn = 0, wqe_idx = 0;
1596 
1597     spin_lock_irqsave(&cq->cq_lock, cq_flags);
1598 
1599     /* traverse through the CQEs in the hw CQ,
1600      * find the matching CQE for a given qp,
1601      * mark the matching one discarded by clearing qpn.
1602      * ring the doorbell in the poll_cq() as
1603      * we don't complete out of order cqe.
1604      */
1605 
1606     cur_getp = cq->getp;
1607     /* find upto when do we reap the cq. */
1608     stop_getp = cur_getp;
1609     do {
1610         if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
1611             break;
1612 
1613         cqe = cq->va + cur_getp;
1614         /* if (a) done reaping whole hw cq, or
1615          *    (b) qp_xq becomes empty.
1616          * then exit
1617          */
1618         qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
1619         /* if previously discarded cqe found, skip that too. */
1620         /* check for matching qp */
1621         if (qpn == 0 || qpn != qp->id)
1622             goto skip_cqe;
1623 
1624         if (is_cqe_for_sq(cqe)) {
1625             ocrdma_hwq_inc_tail(&qp->sq);
1626         } else {
1627             if (qp->srq) {
1628                 wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
1629                     OCRDMA_CQE_BUFTAG_SHIFT) &
1630                     qp->srq->rq.max_wqe_idx;
1631                 BUG_ON(wqe_idx < 1);
1632                 spin_lock_irqsave(&qp->srq->q_lock, flags);
1633                 ocrdma_hwq_inc_tail(&qp->srq->rq);
1634                 ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
1635                 spin_unlock_irqrestore(&qp->srq->q_lock, flags);
1636 
1637             } else {
1638                 ocrdma_hwq_inc_tail(&qp->rq);
1639             }
1640         }
1641         /* mark cqe discarded so that it is not picked up later
1642          * in the poll_cq().
1643          */
1644         discard_cnt += 1;
1645         cqe->cmn.qpn = 0;
1646 skip_cqe:
1647         cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
1648     } while (cur_getp != stop_getp);
1649     spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
1650 }
1651 
1652 void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
1653 {
1654     int found = false;
1655     unsigned long flags;
1656     struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
1657     /* sync with any active CQ poll */
1658 
1659     spin_lock_irqsave(&dev->flush_q_lock, flags);
1660     found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1661     if (found)
1662         list_del(&qp->sq_entry);
1663     if (!qp->srq) {
1664         found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1665         if (found)
1666             list_del(&qp->rq_entry);
1667     }
1668     spin_unlock_irqrestore(&dev->flush_q_lock, flags);
1669 }
1670 
1671 int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1672 {
1673     struct ocrdma_pd *pd;
1674     struct ocrdma_qp *qp;
1675     struct ocrdma_dev *dev;
1676     struct ib_qp_attr attrs;
1677     int attr_mask;
1678     unsigned long flags;
1679 
1680     qp = get_ocrdma_qp(ibqp);
1681     dev = get_ocrdma_dev(ibqp->device);
1682 
1683     pd = qp->pd;
1684 
1685     /* change the QP state to ERROR */
1686     if (qp->state != OCRDMA_QPS_RST) {
1687         attrs.qp_state = IB_QPS_ERR;
1688         attr_mask = IB_QP_STATE;
1689         _ocrdma_modify_qp(ibqp, &attrs, attr_mask);
1690     }
1691     /* ensure that CQEs for newly created QP (whose id may be same with
1692      * one which just getting destroyed are same), dont get
1693      * discarded until the old CQEs are discarded.
1694      */
1695     mutex_lock(&dev->dev_lock);
1696     (void) ocrdma_mbx_destroy_qp(dev, qp);
1697 
1698     /*
1699      * acquire CQ lock while destroy is in progress, in order to
1700      * protect against proessing in-flight CQEs for this QP.
1701      */
1702     spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1703     if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
1704         spin_lock(&qp->rq_cq->cq_lock);
1705         ocrdma_del_qpn_map(dev, qp);
1706         spin_unlock(&qp->rq_cq->cq_lock);
1707     } else {
1708         ocrdma_del_qpn_map(dev, qp);
1709     }
1710     spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1711 
1712     if (!pd->uctx) {
1713         ocrdma_discard_cqes(qp, qp->sq_cq);
1714         ocrdma_discard_cqes(qp, qp->rq_cq);
1715     }
1716     mutex_unlock(&dev->dev_lock);
1717 
1718     if (pd->uctx) {
1719         ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
1720                 PAGE_ALIGN(qp->sq.len));
1721         if (!qp->srq)
1722             ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
1723                     PAGE_ALIGN(qp->rq.len));
1724     }
1725 
1726     ocrdma_del_flush_qp(qp);
1727 
1728     kfree(qp->wqe_wr_id_tbl);
1729     kfree(qp->rqe_wr_id_tbl);
1730     return 0;
1731 }
1732 
1733 static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
1734                 struct ib_udata *udata)
1735 {
1736     int status;
1737     struct ocrdma_create_srq_uresp uresp;
1738 
1739     memset(&uresp, 0, sizeof(uresp));
1740     uresp.rq_dbid = srq->rq.dbid;
1741     uresp.num_rq_pages = 1;
1742     uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
1743     uresp.rq_page_size = srq->rq.len;
1744     uresp.db_page_addr = dev->nic_info.unmapped_db +
1745         (srq->pd->id * dev->nic_info.db_page_size);
1746     uresp.db_page_size = dev->nic_info.db_page_size;
1747     uresp.num_rqe_allocated = srq->rq.max_cnt;
1748     if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1749         uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1750         uresp.db_shift = 24;
1751     } else {
1752         uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
1753         uresp.db_shift = 16;
1754     }
1755 
1756     status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1757     if (status)
1758         return status;
1759     status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
1760                  uresp.rq_page_size);
1761     if (status)
1762         return status;
1763     return status;
1764 }
1765 
1766 int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1767               struct ib_udata *udata)
1768 {
1769     int status;
1770     struct ocrdma_pd *pd = get_ocrdma_pd(ibsrq->pd);
1771     struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
1772     struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq);
1773 
1774     if (init_attr->srq_type != IB_SRQT_BASIC)
1775         return -EOPNOTSUPP;
1776 
1777     if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
1778         return -EINVAL;
1779     if (init_attr->attr.max_wr > dev->attr.max_rqe)
1780         return -EINVAL;
1781 
1782     spin_lock_init(&srq->q_lock);
1783     srq->pd = pd;
1784     srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
1785     status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
1786     if (status)
1787         return status;
1788 
1789     if (!udata) {
1790         srq->rqe_wr_id_tbl = kcalloc(srq->rq.max_cnt, sizeof(u64),
1791                          GFP_KERNEL);
1792         if (!srq->rqe_wr_id_tbl) {
1793             status = -ENOMEM;
1794             goto arm_err;
1795         }
1796 
1797         srq->bit_fields_len = (srq->rq.max_cnt / 32) +
1798             (srq->rq.max_cnt % 32 ? 1 : 0);
1799         srq->idx_bit_fields =
1800             kmalloc_array(srq->bit_fields_len, sizeof(u32),
1801                   GFP_KERNEL);
1802         if (!srq->idx_bit_fields) {
1803             status = -ENOMEM;
1804             goto arm_err;
1805         }
1806         memset(srq->idx_bit_fields, 0xff,
1807                srq->bit_fields_len * sizeof(u32));
1808     }
1809 
1810     if (init_attr->attr.srq_limit) {
1811         status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
1812         if (status)
1813             goto arm_err;
1814     }
1815 
1816     if (udata) {
1817         status = ocrdma_copy_srq_uresp(dev, srq, udata);
1818         if (status)
1819             goto arm_err;
1820     }
1821 
1822     return 0;
1823 
1824 arm_err:
1825     ocrdma_mbx_destroy_srq(dev, srq);
1826     kfree(srq->rqe_wr_id_tbl);
1827     kfree(srq->idx_bit_fields);
1828     return status;
1829 }
1830 
1831 int ocrdma_modify_srq(struct ib_srq *ibsrq,
1832               struct ib_srq_attr *srq_attr,
1833               enum ib_srq_attr_mask srq_attr_mask,
1834               struct ib_udata *udata)
1835 {
1836     int status;
1837     struct ocrdma_srq *srq;
1838 
1839     srq = get_ocrdma_srq(ibsrq);
1840     if (srq_attr_mask & IB_SRQ_MAX_WR)
1841         status = -EINVAL;
1842     else
1843         status = ocrdma_mbx_modify_srq(srq, srq_attr);
1844     return status;
1845 }
1846 
1847 int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
1848 {
1849     struct ocrdma_srq *srq;
1850 
1851     srq = get_ocrdma_srq(ibsrq);
1852     return ocrdma_mbx_query_srq(srq, srq_attr);
1853 }
1854 
1855 int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1856 {
1857     struct ocrdma_srq *srq;
1858     struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
1859 
1860     srq = get_ocrdma_srq(ibsrq);
1861 
1862     ocrdma_mbx_destroy_srq(dev, srq);
1863 
1864     if (srq->pd->uctx)
1865         ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
1866                 PAGE_ALIGN(srq->rq.len));
1867 
1868     kfree(srq->idx_bit_fields);
1869     kfree(srq->rqe_wr_id_tbl);
1870     return 0;
1871 }
1872 
1873 /* unprivileged verbs and their support functions. */
1874 static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1875                 struct ocrdma_hdr_wqe *hdr,
1876                 const struct ib_send_wr *wr)
1877 {
1878     struct ocrdma_ewqe_ud_hdr *ud_hdr =
1879         (struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
1880     struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
1881 
1882     ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
1883     if (qp->qp_type == IB_QPT_GSI)
1884         ud_hdr->qkey = qp->qkey;
1885     else
1886         ud_hdr->qkey = ud_wr(wr)->remote_qkey;
1887     ud_hdr->rsvd_ahid = ah->id;
1888     ud_hdr->hdr_type = ah->hdr_type;
1889     if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
1890         hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
1891 }
1892 
1893 static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
1894                   struct ocrdma_sge *sge, int num_sge,
1895                   struct ib_sge *sg_list)
1896 {
1897     int i;
1898 
1899     for (i = 0; i < num_sge; i++) {
1900         sge[i].lrkey = sg_list[i].lkey;
1901         sge[i].addr_lo = sg_list[i].addr;
1902         sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
1903         sge[i].len = sg_list[i].length;
1904         hdr->total_len += sg_list[i].length;
1905     }
1906     if (num_sge == 0)
1907         memset(sge, 0, sizeof(*sge));
1908 }
1909 
1910 static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
1911 {
1912     uint32_t total_len = 0, i;
1913 
1914     for (i = 0; i < num_sge; i++)
1915         total_len += sg_list[i].length;
1916     return total_len;
1917 }
1918 
1919 
1920 static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1921                     struct ocrdma_hdr_wqe *hdr,
1922                     struct ocrdma_sge *sge,
1923                     const struct ib_send_wr *wr, u32 wqe_size)
1924 {
1925     int i;
1926     char *dpp_addr;
1927 
1928     if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
1929         hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
1930         if (unlikely(hdr->total_len > qp->max_inline_data)) {
1931             pr_err("%s() supported_len=0x%x,\n"
1932                    " unsupported len req=0x%x\n", __func__,
1933                 qp->max_inline_data, hdr->total_len);
1934             return -EINVAL;
1935         }
1936         dpp_addr = (char *)sge;
1937         for (i = 0; i < wr->num_sge; i++) {
1938             memcpy(dpp_addr,
1939                    (void *)(unsigned long)wr->sg_list[i].addr,
1940                    wr->sg_list[i].length);
1941             dpp_addr += wr->sg_list[i].length;
1942         }
1943 
1944         wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
1945         if (0 == hdr->total_len)
1946             wqe_size += sizeof(struct ocrdma_sge);
1947         hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
1948     } else {
1949         ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1950         if (wr->num_sge)
1951             wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
1952         else
1953             wqe_size += sizeof(struct ocrdma_sge);
1954         hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1955     }
1956     hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1957     return 0;
1958 }
1959 
1960 static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1961                  const struct ib_send_wr *wr)
1962 {
1963     struct ocrdma_sge *sge;
1964     u32 wqe_size = sizeof(*hdr);
1965 
1966     if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
1967         ocrdma_build_ud_hdr(qp, hdr, wr);
1968         sge = (struct ocrdma_sge *)(hdr + 2);
1969         wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
1970     } else {
1971         sge = (struct ocrdma_sge *)(hdr + 1);
1972     }
1973 
1974     return ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1975 }
1976 
1977 static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1978                   const struct ib_send_wr *wr)
1979 {
1980     int status;
1981     struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1982     struct ocrdma_sge *sge = ext_rw + 1;
1983     u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
1984 
1985     status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1986     if (status)
1987         return status;
1988     ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
1989     ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
1990     ext_rw->lrkey = rdma_wr(wr)->rkey;
1991     ext_rw->len = hdr->total_len;
1992     return 0;
1993 }
1994 
1995 static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1996                   const struct ib_send_wr *wr)
1997 {
1998     struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1999     struct ocrdma_sge *sge = ext_rw + 1;
2000     u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
2001         sizeof(struct ocrdma_hdr_wqe);
2002 
2003     ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
2004     hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2005     hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
2006     hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2007 
2008     ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
2009     ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
2010     ext_rw->lrkey = rdma_wr(wr)->rkey;
2011     ext_rw->len = hdr->total_len;
2012 }
2013 
2014 static int get_encoded_page_size(int pg_sz)
2015 {
2016     /* Max size is 256M 4096 << 16 */
2017     int i = 0;
2018     for (; i < 17; i++)
2019         if (pg_sz == (4096 << i))
2020             break;
2021     return i;
2022 }
2023 
2024 static int ocrdma_build_reg(struct ocrdma_qp *qp,
2025                 struct ocrdma_hdr_wqe *hdr,
2026                 const struct ib_reg_wr *wr)
2027 {
2028     u64 fbo;
2029     struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
2030     struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
2031     struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
2032     struct ocrdma_pbe *pbe;
2033     u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
2034     int num_pbes = 0, i;
2035 
2036     wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
2037 
2038     hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
2039     hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2040 
2041     if (wr->access & IB_ACCESS_LOCAL_WRITE)
2042         hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
2043     if (wr->access & IB_ACCESS_REMOTE_WRITE)
2044         hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
2045     if (wr->access & IB_ACCESS_REMOTE_READ)
2046         hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
2047     hdr->lkey = wr->key;
2048     hdr->total_len = mr->ibmr.length;
2049 
2050     fbo = mr->ibmr.iova - mr->pages[0];
2051 
2052     fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
2053     fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
2054     fast_reg->fbo_hi = upper_32_bits(fbo);
2055     fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
2056     fast_reg->num_sges = mr->npages;
2057     fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
2058 
2059     pbe = pbl_tbl->va;
2060     for (i = 0; i < mr->npages; i++) {
2061         u64 buf_addr = mr->pages[i];
2062 
2063         pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
2064         pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
2065         num_pbes += 1;
2066         pbe++;
2067 
2068         /* if the pbl is full storing the pbes,
2069          * move to next pbl.
2070         */
2071         if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
2072             pbl_tbl++;
2073             pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2074         }
2075     }
2076 
2077     return 0;
2078 }
2079 
2080 static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
2081 {
2082     u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
2083 
2084     iowrite32(val, qp->sq_db);
2085 }
2086 
2087 int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
2088              const struct ib_send_wr **bad_wr)
2089 {
2090     int status = 0;
2091     struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2092     struct ocrdma_hdr_wqe *hdr;
2093     unsigned long flags;
2094 
2095     spin_lock_irqsave(&qp->q_lock, flags);
2096     if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
2097         spin_unlock_irqrestore(&qp->q_lock, flags);
2098         *bad_wr = wr;
2099         return -EINVAL;
2100     }
2101 
2102     while (wr) {
2103         if (qp->qp_type == IB_QPT_UD &&
2104             (wr->opcode != IB_WR_SEND &&
2105              wr->opcode != IB_WR_SEND_WITH_IMM)) {
2106             *bad_wr = wr;
2107             status = -EINVAL;
2108             break;
2109         }
2110         if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
2111             wr->num_sge > qp->sq.max_sges) {
2112             *bad_wr = wr;
2113             status = -ENOMEM;
2114             break;
2115         }
2116         hdr = ocrdma_hwq_head(&qp->sq);
2117         hdr->cw = 0;
2118         if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2119             hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2120         if (wr->send_flags & IB_SEND_FENCE)
2121             hdr->cw |=
2122                 (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
2123         if (wr->send_flags & IB_SEND_SOLICITED)
2124             hdr->cw |=
2125                 (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
2126         hdr->total_len = 0;
2127         switch (wr->opcode) {
2128         case IB_WR_SEND_WITH_IMM:
2129             hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2130             hdr->immdt = ntohl(wr->ex.imm_data);
2131             fallthrough;
2132         case IB_WR_SEND:
2133             hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2134             ocrdma_build_send(qp, hdr, wr);
2135             break;
2136         case IB_WR_SEND_WITH_INV:
2137             hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
2138             hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2139             hdr->lkey = wr->ex.invalidate_rkey;
2140             status = ocrdma_build_send(qp, hdr, wr);
2141             break;
2142         case IB_WR_RDMA_WRITE_WITH_IMM:
2143             hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2144             hdr->immdt = ntohl(wr->ex.imm_data);
2145             fallthrough;
2146         case IB_WR_RDMA_WRITE:
2147             hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
2148             status = ocrdma_build_write(qp, hdr, wr);
2149             break;
2150         case IB_WR_RDMA_READ:
2151             ocrdma_build_read(qp, hdr, wr);
2152             break;
2153         case IB_WR_LOCAL_INV:
2154             hdr->cw |=
2155                 (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
2156             hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
2157                     sizeof(struct ocrdma_sge)) /
2158                 OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
2159             hdr->lkey = wr->ex.invalidate_rkey;
2160             break;
2161         case IB_WR_REG_MR:
2162             status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
2163             break;
2164         default:
2165             status = -EINVAL;
2166             break;
2167         }
2168         if (status) {
2169             *bad_wr = wr;
2170             break;
2171         }
2172         if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2173             qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
2174         else
2175             qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
2176         qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
2177         ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
2178                    OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
2179         /* make sure wqe is written before adapter can access it */
2180         wmb();
2181         /* inform hw to start processing it */
2182         ocrdma_ring_sq_db(qp);
2183 
2184         /* update pointer, counter for next wr */
2185         ocrdma_hwq_inc_head(&qp->sq);
2186         wr = wr->next;
2187     }
2188     spin_unlock_irqrestore(&qp->q_lock, flags);
2189     return status;
2190 }
2191 
2192 static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
2193 {
2194     u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
2195 
2196     iowrite32(val, qp->rq_db);
2197 }
2198 
2199 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
2200                  const struct ib_recv_wr *wr, u16 tag)
2201 {
2202     u32 wqe_size = 0;
2203     struct ocrdma_sge *sge;
2204     if (wr->num_sge)
2205         wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
2206     else
2207         wqe_size = sizeof(*sge) + sizeof(*rqe);
2208 
2209     rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
2210                 OCRDMA_WQE_SIZE_SHIFT);
2211     rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2212     rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2213     rqe->total_len = 0;
2214     rqe->rsvd_tag = tag;
2215     sge = (struct ocrdma_sge *)(rqe + 1);
2216     ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
2217     ocrdma_cpu_to_le32(rqe, wqe_size);
2218 }
2219 
2220 int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
2221              const struct ib_recv_wr **bad_wr)
2222 {
2223     int status = 0;
2224     unsigned long flags;
2225     struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2226     struct ocrdma_hdr_wqe *rqe;
2227 
2228     spin_lock_irqsave(&qp->q_lock, flags);
2229     if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
2230         spin_unlock_irqrestore(&qp->q_lock, flags);
2231         *bad_wr = wr;
2232         return -EINVAL;
2233     }
2234     while (wr) {
2235         if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
2236             wr->num_sge > qp->rq.max_sges) {
2237             *bad_wr = wr;
2238             status = -ENOMEM;
2239             break;
2240         }
2241         rqe = ocrdma_hwq_head(&qp->rq);
2242         ocrdma_build_rqe(rqe, wr, 0);
2243 
2244         qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
2245         /* make sure rqe is written before adapter can access it */
2246         wmb();
2247 
2248         /* inform hw to start processing it */
2249         ocrdma_ring_rq_db(qp);
2250 
2251         /* update pointer, counter for next wr */
2252         ocrdma_hwq_inc_head(&qp->rq);
2253         wr = wr->next;
2254     }
2255     spin_unlock_irqrestore(&qp->q_lock, flags);
2256     return status;
2257 }
2258 
2259 /* cqe for srq's rqe can potentially arrive out of order.
2260  * index gives the entry in the shadow table where to store
2261  * the wr_id. tag/index is returned in cqe to reference back
2262  * for a given rqe.
2263  */
2264 static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
2265 {
2266     int row = 0;
2267     int indx = 0;
2268 
2269     for (row = 0; row < srq->bit_fields_len; row++) {
2270         if (srq->idx_bit_fields[row]) {
2271             indx = ffs(srq->idx_bit_fields[row]);
2272             indx = (row * 32) + (indx - 1);
2273             BUG_ON(indx >= srq->rq.max_cnt);
2274             ocrdma_srq_toggle_bit(srq, indx);
2275             break;
2276         }
2277     }
2278 
2279     BUG_ON(row == srq->bit_fields_len);
2280     return indx + 1; /* Use from index 1 */
2281 }
2282 
2283 static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
2284 {
2285     u32 val = srq->rq.dbid | (1 << 16);
2286 
2287     iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
2288 }
2289 
2290 int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
2291              const struct ib_recv_wr **bad_wr)
2292 {
2293     int status = 0;
2294     unsigned long flags;
2295     struct ocrdma_srq *srq;
2296     struct ocrdma_hdr_wqe *rqe;
2297     u16 tag;
2298 
2299     srq = get_ocrdma_srq(ibsrq);
2300 
2301     spin_lock_irqsave(&srq->q_lock, flags);
2302     while (wr) {
2303         if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
2304             wr->num_sge > srq->rq.max_sges) {
2305             status = -ENOMEM;
2306             *bad_wr = wr;
2307             break;
2308         }
2309         tag = ocrdma_srq_get_idx(srq);
2310         rqe = ocrdma_hwq_head(&srq->rq);
2311         ocrdma_build_rqe(rqe, wr, tag);
2312 
2313         srq->rqe_wr_id_tbl[tag] = wr->wr_id;
2314         /* make sure rqe is written before adapter can perform DMA */
2315         wmb();
2316         /* inform hw to start processing it */
2317         ocrdma_ring_srq_db(srq);
2318         /* update pointer, counter for next wr */
2319         ocrdma_hwq_inc_head(&srq->rq);
2320         wr = wr->next;
2321     }
2322     spin_unlock_irqrestore(&srq->q_lock, flags);
2323     return status;
2324 }
2325 
2326 static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
2327 {
2328     enum ib_wc_status ibwc_status;
2329 
2330     switch (status) {
2331     case OCRDMA_CQE_GENERAL_ERR:
2332         ibwc_status = IB_WC_GENERAL_ERR;
2333         break;
2334     case OCRDMA_CQE_LOC_LEN_ERR:
2335         ibwc_status = IB_WC_LOC_LEN_ERR;
2336         break;
2337     case OCRDMA_CQE_LOC_QP_OP_ERR:
2338         ibwc_status = IB_WC_LOC_QP_OP_ERR;
2339         break;
2340     case OCRDMA_CQE_LOC_EEC_OP_ERR:
2341         ibwc_status = IB_WC_LOC_EEC_OP_ERR;
2342         break;
2343     case OCRDMA_CQE_LOC_PROT_ERR:
2344         ibwc_status = IB_WC_LOC_PROT_ERR;
2345         break;
2346     case OCRDMA_CQE_WR_FLUSH_ERR:
2347         ibwc_status = IB_WC_WR_FLUSH_ERR;
2348         break;
2349     case OCRDMA_CQE_MW_BIND_ERR:
2350         ibwc_status = IB_WC_MW_BIND_ERR;
2351         break;
2352     case OCRDMA_CQE_BAD_RESP_ERR:
2353         ibwc_status = IB_WC_BAD_RESP_ERR;
2354         break;
2355     case OCRDMA_CQE_LOC_ACCESS_ERR:
2356         ibwc_status = IB_WC_LOC_ACCESS_ERR;
2357         break;
2358     case OCRDMA_CQE_REM_INV_REQ_ERR:
2359         ibwc_status = IB_WC_REM_INV_REQ_ERR;
2360         break;
2361     case OCRDMA_CQE_REM_ACCESS_ERR:
2362         ibwc_status = IB_WC_REM_ACCESS_ERR;
2363         break;
2364     case OCRDMA_CQE_REM_OP_ERR:
2365         ibwc_status = IB_WC_REM_OP_ERR;
2366         break;
2367     case OCRDMA_CQE_RETRY_EXC_ERR:
2368         ibwc_status = IB_WC_RETRY_EXC_ERR;
2369         break;
2370     case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
2371         ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
2372         break;
2373     case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
2374         ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
2375         break;
2376     case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
2377         ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
2378         break;
2379     case OCRDMA_CQE_REM_ABORT_ERR:
2380         ibwc_status = IB_WC_REM_ABORT_ERR;
2381         break;
2382     case OCRDMA_CQE_INV_EECN_ERR:
2383         ibwc_status = IB_WC_INV_EECN_ERR;
2384         break;
2385     case OCRDMA_CQE_INV_EEC_STATE_ERR:
2386         ibwc_status = IB_WC_INV_EEC_STATE_ERR;
2387         break;
2388     case OCRDMA_CQE_FATAL_ERR:
2389         ibwc_status = IB_WC_FATAL_ERR;
2390         break;
2391     case OCRDMA_CQE_RESP_TIMEOUT_ERR:
2392         ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
2393         break;
2394     default:
2395         ibwc_status = IB_WC_GENERAL_ERR;
2396         break;
2397     }
2398     return ibwc_status;
2399 }
2400 
2401 static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2402               u32 wqe_idx)
2403 {
2404     struct ocrdma_hdr_wqe *hdr;
2405     struct ocrdma_sge *rw;
2406     int opcode;
2407 
2408     hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
2409 
2410     ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
2411     /* Undo the hdr->cw swap */
2412     opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
2413     switch (opcode) {
2414     case OCRDMA_WRITE:
2415         ibwc->opcode = IB_WC_RDMA_WRITE;
2416         break;
2417     case OCRDMA_READ:
2418         rw = (struct ocrdma_sge *)(hdr + 1);
2419         ibwc->opcode = IB_WC_RDMA_READ;
2420         ibwc->byte_len = rw->len;
2421         break;
2422     case OCRDMA_SEND:
2423         ibwc->opcode = IB_WC_SEND;
2424         break;
2425     case OCRDMA_FR_MR:
2426         ibwc->opcode = IB_WC_REG_MR;
2427         break;
2428     case OCRDMA_LKEY_INV:
2429         ibwc->opcode = IB_WC_LOCAL_INV;
2430         break;
2431     default:
2432         ibwc->status = IB_WC_GENERAL_ERR;
2433         pr_err("%s() invalid opcode received = 0x%x\n",
2434                __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2435         break;
2436     }
2437 }
2438 
2439 static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
2440                         struct ocrdma_cqe *cqe)
2441 {
2442     if (is_cqe_for_sq(cqe)) {
2443         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2444                 cqe->flags_status_srcqpn) &
2445                     ~OCRDMA_CQE_STATUS_MASK);
2446         cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2447                 cqe->flags_status_srcqpn) |
2448                 (OCRDMA_CQE_WR_FLUSH_ERR <<
2449                     OCRDMA_CQE_STATUS_SHIFT));
2450     } else {
2451         if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2452             cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2453                     cqe->flags_status_srcqpn) &
2454                         ~OCRDMA_CQE_UD_STATUS_MASK);
2455             cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2456                     cqe->flags_status_srcqpn) |
2457                     (OCRDMA_CQE_WR_FLUSH_ERR <<
2458                         OCRDMA_CQE_UD_STATUS_SHIFT));
2459         } else {
2460             cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2461                     cqe->flags_status_srcqpn) &
2462                         ~OCRDMA_CQE_STATUS_MASK);
2463             cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2464                     cqe->flags_status_srcqpn) |
2465                     (OCRDMA_CQE_WR_FLUSH_ERR <<
2466                         OCRDMA_CQE_STATUS_SHIFT));
2467         }
2468     }
2469 }
2470 
2471 static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2472                   struct ocrdma_qp *qp, int status)
2473 {
2474     bool expand = false;
2475 
2476     ibwc->byte_len = 0;
2477     ibwc->qp = &qp->ibqp;
2478     ibwc->status = ocrdma_to_ibwc_err(status);
2479 
2480     ocrdma_flush_qp(qp);
2481     ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
2482 
2483     /* if wqe/rqe pending for which cqe needs to be returned,
2484      * trigger inflating it.
2485      */
2486     if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
2487         expand = true;
2488         ocrdma_set_cqe_status_flushed(qp, cqe);
2489     }
2490     return expand;
2491 }
2492 
2493 static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2494                   struct ocrdma_qp *qp, int status)
2495 {
2496     ibwc->opcode = IB_WC_RECV;
2497     ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2498     ocrdma_hwq_inc_tail(&qp->rq);
2499 
2500     return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2501 }
2502 
2503 static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2504                   struct ocrdma_qp *qp, int status)
2505 {
2506     ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2507     ocrdma_hwq_inc_tail(&qp->sq);
2508 
2509     return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2510 }
2511 
2512 
2513 static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2514                  struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
2515                  bool *polled, bool *stop)
2516 {
2517     bool expand;
2518     struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2519     int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2520         OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2521     if (status < OCRDMA_MAX_CQE_ERR)
2522         atomic_inc(&dev->cqe_err_stats[status]);
2523 
2524     /* when hw sq is empty, but rq is not empty, so we continue
2525      * to keep the cqe in order to get the cq event again.
2526      */
2527     if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
2528         /* when cq for rq and sq is same, it is safe to return
2529          * flush cqe for RQEs.
2530          */
2531         if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2532             *polled = true;
2533             status = OCRDMA_CQE_WR_FLUSH_ERR;
2534             expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2535         } else {
2536             /* stop processing further cqe as this cqe is used for
2537              * triggering cq event on buddy cq of RQ.
2538              * When QP is destroyed, this cqe will be removed
2539              * from the cq's hardware q.
2540              */
2541             *polled = false;
2542             *stop = true;
2543             expand = false;
2544         }
2545     } else if (is_hw_sq_empty(qp)) {
2546         /* Do nothing */
2547         expand = false;
2548         *polled = false;
2549         *stop = false;
2550     } else {
2551         *polled = true;
2552         expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2553     }
2554     return expand;
2555 }
2556 
2557 static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2558                      struct ocrdma_cqe *cqe,
2559                      struct ib_wc *ibwc, bool *polled)
2560 {
2561     bool expand = false;
2562     int tail = qp->sq.tail;
2563     u32 wqe_idx;
2564 
2565     if (!qp->wqe_wr_id_tbl[tail].signaled) {
2566         *polled = false;    /* WC cannot be consumed yet */
2567     } else {
2568         ibwc->status = IB_WC_SUCCESS;
2569         ibwc->wc_flags = 0;
2570         ibwc->qp = &qp->ibqp;
2571         ocrdma_update_wc(qp, ibwc, tail);
2572         *polled = true;
2573     }
2574     wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
2575             OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
2576     if (tail != wqe_idx)
2577         expand = true; /* Coalesced CQE can't be consumed yet */
2578 
2579     ocrdma_hwq_inc_tail(&qp->sq);
2580     return expand;
2581 }
2582 
2583 static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2584                  struct ib_wc *ibwc, bool *polled, bool *stop)
2585 {
2586     int status;
2587     bool expand;
2588 
2589     status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2590         OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2591 
2592     if (status == OCRDMA_CQE_SUCCESS)
2593         expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
2594     else
2595         expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
2596     return expand;
2597 }
2598 
2599 static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc,
2600                  struct ocrdma_cqe *cqe)
2601 {
2602     int status;
2603     u16 hdr_type = 0;
2604 
2605     status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2606         OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
2607     ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
2608                         OCRDMA_CQE_SRCQP_MASK;
2609     ibwc->pkey_index = 0;
2610     ibwc->wc_flags = IB_WC_GRH;
2611     ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2612               OCRDMA_CQE_UD_XFER_LEN_SHIFT) &
2613               OCRDMA_CQE_UD_XFER_LEN_MASK;
2614 
2615     if (ocrdma_is_udp_encap_supported(dev)) {
2616         hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2617                 OCRDMA_CQE_UD_L3TYPE_SHIFT) &
2618                 OCRDMA_CQE_UD_L3TYPE_MASK;
2619         ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
2620         ibwc->network_hdr_type = hdr_type;
2621     }
2622 
2623     return status;
2624 }
2625 
2626 static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
2627                        struct ocrdma_cqe *cqe,
2628                        struct ocrdma_qp *qp)
2629 {
2630     unsigned long flags;
2631     struct ocrdma_srq *srq;
2632     u32 wqe_idx;
2633 
2634     srq = get_ocrdma_srq(qp->ibqp.srq);
2635     wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
2636         OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
2637     BUG_ON(wqe_idx < 1);
2638 
2639     ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
2640     spin_lock_irqsave(&srq->q_lock, flags);
2641     ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
2642     spin_unlock_irqrestore(&srq->q_lock, flags);
2643     ocrdma_hwq_inc_tail(&srq->rq);
2644 }
2645 
2646 static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2647                 struct ib_wc *ibwc, bool *polled, bool *stop,
2648                 int status)
2649 {
2650     bool expand;
2651     struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2652 
2653     if (status < OCRDMA_MAX_CQE_ERR)
2654         atomic_inc(&dev->cqe_err_stats[status]);
2655 
2656     /* when hw_rq is empty, but wq is not empty, so continue
2657      * to keep the cqe to get the cq event again.
2658      */
2659     if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
2660         if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2661             *polled = true;
2662             status = OCRDMA_CQE_WR_FLUSH_ERR;
2663             expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2664         } else {
2665             *polled = false;
2666             *stop = true;
2667             expand = false;
2668         }
2669     } else if (is_hw_rq_empty(qp)) {
2670         /* Do nothing */
2671         expand = false;
2672         *polled = false;
2673         *stop = false;
2674     } else {
2675         *polled = true;
2676         expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2677     }
2678     return expand;
2679 }
2680 
2681 static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2682                      struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2683 {
2684     struct ocrdma_dev *dev;
2685 
2686     dev = get_ocrdma_dev(qp->ibqp.device);
2687     ibwc->opcode = IB_WC_RECV;
2688     ibwc->qp = &qp->ibqp;
2689     ibwc->status = IB_WC_SUCCESS;
2690 
2691     if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2692         ocrdma_update_ud_rcqe(dev, ibwc, cqe);
2693     else
2694         ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2695 
2696     if (is_cqe_imm(cqe)) {
2697         ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2698         ibwc->wc_flags |= IB_WC_WITH_IMM;
2699     } else if (is_cqe_wr_imm(cqe)) {
2700         ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2701         ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2702         ibwc->wc_flags |= IB_WC_WITH_IMM;
2703     } else if (is_cqe_invalidated(cqe)) {
2704         ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
2705         ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
2706     }
2707     if (qp->ibqp.srq) {
2708         ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
2709     } else {
2710         ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2711         ocrdma_hwq_inc_tail(&qp->rq);
2712     }
2713 }
2714 
2715 static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2716                  struct ib_wc *ibwc, bool *polled, bool *stop)
2717 {
2718     int status;
2719     bool expand = false;
2720 
2721     ibwc->wc_flags = 0;
2722     if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2723         status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2724                     OCRDMA_CQE_UD_STATUS_MASK) >>
2725                     OCRDMA_CQE_UD_STATUS_SHIFT;
2726     } else {
2727         status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2728                  OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2729     }
2730 
2731     if (status == OCRDMA_CQE_SUCCESS) {
2732         *polled = true;
2733         ocrdma_poll_success_rcqe(qp, cqe, ibwc);
2734     } else {
2735         expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
2736                           status);
2737     }
2738     return expand;
2739 }
2740 
2741 static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
2742                    u16 cur_getp)
2743 {
2744     if (cq->phase_change) {
2745         if (cur_getp == 0)
2746             cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
2747     } else {
2748         /* clear valid bit */
2749         cqe->flags_status_srcqpn = 0;
2750     }
2751 }
2752 
2753 static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
2754                 struct ib_wc *ibwc)
2755 {
2756     u16 qpn = 0;
2757     int i = 0;
2758     bool expand = false;
2759     int polled_hw_cqes = 0;
2760     struct ocrdma_qp *qp = NULL;
2761     struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
2762     struct ocrdma_cqe *cqe;
2763     u16 cur_getp; bool polled = false; bool stop = false;
2764 
2765     cur_getp = cq->getp;
2766     while (num_entries) {
2767         cqe = cq->va + cur_getp;
2768         /* check whether valid cqe or not */
2769         if (!is_cqe_valid(cq, cqe))
2770             break;
2771         qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
2772         /* ignore discarded cqe */
2773         if (qpn == 0)
2774             goto skip_cqe;
2775         qp = dev->qp_tbl[qpn];
2776         BUG_ON(qp == NULL);
2777 
2778         if (is_cqe_for_sq(cqe)) {
2779             expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
2780                           &stop);
2781         } else {
2782             expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
2783                           &stop);
2784         }
2785         if (expand)
2786             goto expand_cqe;
2787         if (stop)
2788             goto stop_cqe;
2789         /* clear qpn to avoid duplicate processing by discard_cqe() */
2790         cqe->cmn.qpn = 0;
2791 skip_cqe:
2792         polled_hw_cqes += 1;
2793         cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
2794         ocrdma_change_cq_phase(cq, cqe, cur_getp);
2795 expand_cqe:
2796         if (polled) {
2797             num_entries -= 1;
2798             i += 1;
2799             ibwc = ibwc + 1;
2800             polled = false;
2801         }
2802     }
2803 stop_cqe:
2804     cq->getp = cur_getp;
2805 
2806     if (polled_hw_cqes)
2807         ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
2808 
2809     return i;
2810 }
2811 
2812 /* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2813 static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
2814                   struct ocrdma_qp *qp, struct ib_wc *ibwc)
2815 {
2816     int err_cqes = 0;
2817 
2818     while (num_entries) {
2819         if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
2820             break;
2821         if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
2822             ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2823             ocrdma_hwq_inc_tail(&qp->sq);
2824         } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
2825             ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2826             ocrdma_hwq_inc_tail(&qp->rq);
2827         } else {
2828             return err_cqes;
2829         }
2830         ibwc->byte_len = 0;
2831         ibwc->status = IB_WC_WR_FLUSH_ERR;
2832         ibwc = ibwc + 1;
2833         err_cqes += 1;
2834         num_entries -= 1;
2835     }
2836     return err_cqes;
2837 }
2838 
2839 int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
2840 {
2841     int cqes_to_poll = num_entries;
2842     struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2843     struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2844     int num_os_cqe = 0, err_cqes = 0;
2845     struct ocrdma_qp *qp;
2846     unsigned long flags;
2847 
2848     /* poll cqes from adapter CQ */
2849     spin_lock_irqsave(&cq->cq_lock, flags);
2850     num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
2851     spin_unlock_irqrestore(&cq->cq_lock, flags);
2852     cqes_to_poll -= num_os_cqe;
2853 
2854     if (cqes_to_poll) {
2855         wc = wc + num_os_cqe;
2856         /* adapter returns single error cqe when qp moves to
2857          * error state. So insert error cqes with wc_status as
2858          * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2859          * respectively which uses this CQ.
2860          */
2861         spin_lock_irqsave(&dev->flush_q_lock, flags);
2862         list_for_each_entry(qp, &cq->sq_head, sq_entry) {
2863             if (cqes_to_poll == 0)
2864                 break;
2865             err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
2866             cqes_to_poll -= err_cqes;
2867             num_os_cqe += err_cqes;
2868             wc = wc + err_cqes;
2869         }
2870         spin_unlock_irqrestore(&dev->flush_q_lock, flags);
2871     }
2872     return num_os_cqe;
2873 }
2874 
2875 int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
2876 {
2877     struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2878     struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2879     u16 cq_id;
2880     unsigned long flags;
2881     bool arm_needed = false, sol_needed = false;
2882 
2883     cq_id = cq->id;
2884 
2885     spin_lock_irqsave(&cq->cq_lock, flags);
2886     if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
2887         arm_needed = true;
2888     if (cq_flags & IB_CQ_SOLICITED)
2889         sol_needed = true;
2890 
2891     ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
2892     spin_unlock_irqrestore(&cq->cq_lock, flags);
2893 
2894     return 0;
2895 }
2896 
2897 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
2898                   u32 max_num_sg)
2899 {
2900     int status;
2901     struct ocrdma_mr *mr;
2902     struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
2903     struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
2904 
2905     if (mr_type != IB_MR_TYPE_MEM_REG)
2906         return ERR_PTR(-EINVAL);
2907 
2908     if (max_num_sg > dev->attr.max_pages_per_frmr)
2909         return ERR_PTR(-EINVAL);
2910 
2911     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2912     if (!mr)
2913         return ERR_PTR(-ENOMEM);
2914 
2915     mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
2916     if (!mr->pages) {
2917         status = -ENOMEM;
2918         goto pl_err;
2919     }
2920 
2921     status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
2922     if (status)
2923         goto pbl_err;
2924     mr->hwmr.fr_mr = 1;
2925     mr->hwmr.remote_rd = 0;
2926     mr->hwmr.remote_wr = 0;
2927     mr->hwmr.local_rd = 0;
2928     mr->hwmr.local_wr = 0;
2929     mr->hwmr.mw_bind = 0;
2930     status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
2931     if (status)
2932         goto pbl_err;
2933     status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
2934     if (status)
2935         goto mbx_err;
2936     mr->ibmr.rkey = mr->hwmr.lkey;
2937     mr->ibmr.lkey = mr->hwmr.lkey;
2938     dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
2939         (unsigned long) mr;
2940     return &mr->ibmr;
2941 mbx_err:
2942     ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
2943 pbl_err:
2944     kfree(mr->pages);
2945 pl_err:
2946     kfree(mr);
2947     return ERR_PTR(-ENOMEM);
2948 }
2949 
2950 static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
2951 {
2952     struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
2953 
2954     if (unlikely(mr->npages == mr->hwmr.num_pbes))
2955         return -ENOMEM;
2956 
2957     mr->pages[mr->npages++] = addr;
2958 
2959     return 0;
2960 }
2961 
2962 int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2963              unsigned int *sg_offset)
2964 {
2965     struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
2966 
2967     mr->npages = 0;
2968 
2969     return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page);
2970 }