Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
0003  *
0004  * This software is available to you under a choice of one of two
0005  * licenses.  You may choose to be licensed under the terms of the GNU
0006  * General Public License (GPL) Version 2, available from the file
0007  * COPYING in the main directory of this source tree, or the
0008  * OpenIB.org BSD license below:
0009  *
0010  *     Redistribution and use in source and binary forms, with or
0011  *     without modification, are permitted provided that the following
0012  *     conditions are met:
0013  *
0014  *      - Redistributions of source code must retain the above
0015  *        copyright notice, this list of conditions and the following
0016  *        disclaimer.
0017  *
0018  *      - Redistributions in binary form must reproduce the above
0019  *        copyright notice, this list of conditions and the following
0020  *        disclaimer in the documentation and/or other materials
0021  *        provided with the distribution.
0022  *
0023  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0024  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0025  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0026  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0027  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0028  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0029  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0030  * SOFTWARE.
0031  */
0032 
0033 #include <linux/module.h>
0034 #include <rdma/uverbs_ioctl.h>
0035 
0036 #include "iw_cxgb4.h"
0037 
0038 static int db_delay_usecs = 1;
0039 module_param(db_delay_usecs, int, 0644);
0040 MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
0041 
0042 static int ocqp_support = 1;
0043 module_param(ocqp_support, int, 0644);
0044 MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
0045 
0046 int db_fc_threshold = 1000;
0047 module_param(db_fc_threshold, int, 0644);
0048 MODULE_PARM_DESC(db_fc_threshold,
0049          "QP count/threshold that triggers"
0050          " automatic db flow control mode (default = 1000)");
0051 
0052 int db_coalescing_threshold;
0053 module_param(db_coalescing_threshold, int, 0644);
0054 MODULE_PARM_DESC(db_coalescing_threshold,
0055          "QP count/threshold that triggers"
0056          " disabling db coalescing (default = 0)");
0057 
0058 static int max_fr_immd = T4_MAX_FR_IMMD;
0059 module_param(max_fr_immd, int, 0644);
0060 MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immediate");
0061 
0062 static int alloc_ird(struct c4iw_dev *dev, u32 ird)
0063 {
0064     int ret = 0;
0065 
0066     xa_lock_irq(&dev->qps);
0067     if (ird <= dev->avail_ird)
0068         dev->avail_ird -= ird;
0069     else
0070         ret = -ENOMEM;
0071     xa_unlock_irq(&dev->qps);
0072 
0073     if (ret)
0074         dev_warn(&dev->rdev.lldi.pdev->dev,
0075              "device IRD resources exhausted\n");
0076 
0077     return ret;
0078 }
0079 
0080 static void free_ird(struct c4iw_dev *dev, int ird)
0081 {
0082     xa_lock_irq(&dev->qps);
0083     dev->avail_ird += ird;
0084     xa_unlock_irq(&dev->qps);
0085 }
0086 
0087 static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
0088 {
0089     unsigned long flag;
0090     spin_lock_irqsave(&qhp->lock, flag);
0091     qhp->attr.state = state;
0092     spin_unlock_irqrestore(&qhp->lock, flag);
0093 }
0094 
0095 static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
0096 {
0097     c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize);
0098 }
0099 
0100 static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
0101 {
0102     dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue,
0103               dma_unmap_addr(sq, mapping));
0104 }
0105 
0106 static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
0107 {
0108     if (t4_sq_onchip(sq))
0109         dealloc_oc_sq(rdev, sq);
0110     else
0111         dealloc_host_sq(rdev, sq);
0112 }
0113 
0114 static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
0115 {
0116     if (!ocqp_support || !ocqp_supported(&rdev->lldi))
0117         return -ENOSYS;
0118     sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize);
0119     if (!sq->dma_addr)
0120         return -ENOMEM;
0121     sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr -
0122             rdev->lldi.vr->ocq.start;
0123     sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr -
0124                         rdev->lldi.vr->ocq.start);
0125     sq->flags |= T4_SQ_ONCHIP;
0126     return 0;
0127 }
0128 
0129 static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
0130 {
0131     sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize,
0132                        &(sq->dma_addr), GFP_KERNEL);
0133     if (!sq->queue)
0134         return -ENOMEM;
0135     sq->phys_addr = virt_to_phys(sq->queue);
0136     dma_unmap_addr_set(sq, mapping, sq->dma_addr);
0137     return 0;
0138 }
0139 
0140 static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
0141 {
0142     int ret = -ENOSYS;
0143     if (user)
0144         ret = alloc_oc_sq(rdev, sq);
0145     if (ret)
0146         ret = alloc_host_sq(rdev, sq);
0147     return ret;
0148 }
0149 
0150 static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
0151               struct c4iw_dev_ucontext *uctx, int has_rq)
0152 {
0153     /*
0154      * uP clears EQ contexts when the connection exits rdma mode,
0155      * so no need to post a RESET WR for these EQs.
0156      */
0157     dealloc_sq(rdev, &wq->sq);
0158     kfree(wq->sq.sw_sq);
0159     c4iw_put_qpid(rdev, wq->sq.qid, uctx);
0160 
0161     if (has_rq) {
0162         dma_free_coherent(&rdev->lldi.pdev->dev,
0163                   wq->rq.memsize, wq->rq.queue,
0164                   dma_unmap_addr(&wq->rq, mapping));
0165         c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
0166         kfree(wq->rq.sw_rq);
0167         c4iw_put_qpid(rdev, wq->rq.qid, uctx);
0168     }
0169     return 0;
0170 }
0171 
0172 /*
0173  * Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL,
0174  * then this is a user mapping so compute the page-aligned physical address
0175  * for mapping.
0176  */
0177 void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
0178                   enum cxgb4_bar2_qtype qtype,
0179                   unsigned int *pbar2_qid, u64 *pbar2_pa)
0180 {
0181     u64 bar2_qoffset;
0182     int ret;
0183 
0184     ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype,
0185                    pbar2_pa ? 1 : 0,
0186                    &bar2_qoffset, pbar2_qid);
0187     if (ret)
0188         return NULL;
0189 
0190     if (pbar2_pa)
0191         *pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
0192 
0193     if (is_t4(rdev->lldi.adapter_type))
0194         return NULL;
0195 
0196     return rdev->bar2_kva + bar2_qoffset;
0197 }
0198 
0199 static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
0200              struct t4_cq *rcq, struct t4_cq *scq,
0201              struct c4iw_dev_ucontext *uctx,
0202              struct c4iw_wr_wait *wr_waitp,
0203              int need_rq)
0204 {
0205     int user = (uctx != &rdev->uctx);
0206     struct fw_ri_res_wr *res_wr;
0207     struct fw_ri_res *res;
0208     int wr_len;
0209     struct sk_buff *skb;
0210     int ret = 0;
0211     int eqsize;
0212 
0213     wq->sq.qid = c4iw_get_qpid(rdev, uctx);
0214     if (!wq->sq.qid)
0215         return -ENOMEM;
0216 
0217     if (need_rq) {
0218         wq->rq.qid = c4iw_get_qpid(rdev, uctx);
0219         if (!wq->rq.qid) {
0220             ret = -ENOMEM;
0221             goto free_sq_qid;
0222         }
0223     }
0224 
0225     if (!user) {
0226         wq->sq.sw_sq = kcalloc(wq->sq.size, sizeof(*wq->sq.sw_sq),
0227                        GFP_KERNEL);
0228         if (!wq->sq.sw_sq) {
0229             ret = -ENOMEM;
0230             goto free_rq_qid;//FIXME
0231         }
0232 
0233         if (need_rq) {
0234             wq->rq.sw_rq = kcalloc(wq->rq.size,
0235                            sizeof(*wq->rq.sw_rq),
0236                            GFP_KERNEL);
0237             if (!wq->rq.sw_rq) {
0238                 ret = -ENOMEM;
0239                 goto free_sw_sq;
0240             }
0241         }
0242     }
0243 
0244     if (need_rq) {
0245         /*
0246          * RQT must be a power of 2 and at least 16 deep.
0247          */
0248         wq->rq.rqt_size =
0249             roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
0250         wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
0251         if (!wq->rq.rqt_hwaddr) {
0252             ret = -ENOMEM;
0253             goto free_sw_rq;
0254         }
0255     }
0256 
0257     ret = alloc_sq(rdev, &wq->sq, user);
0258     if (ret)
0259         goto free_hwaddr;
0260     memset(wq->sq.queue, 0, wq->sq.memsize);
0261     dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
0262 
0263     if (need_rq) {
0264         wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
0265                           wq->rq.memsize,
0266                           &wq->rq.dma_addr,
0267                           GFP_KERNEL);
0268         if (!wq->rq.queue) {
0269             ret = -ENOMEM;
0270             goto free_sq;
0271         }
0272         pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
0273              wq->sq.queue,
0274              (unsigned long long)virt_to_phys(wq->sq.queue),
0275              wq->rq.queue,
0276              (unsigned long long)virt_to_phys(wq->rq.queue));
0277         dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
0278     }
0279 
0280     wq->db = rdev->lldi.db_reg;
0281 
0282     wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
0283                      CXGB4_BAR2_QTYPE_EGRESS,
0284                      &wq->sq.bar2_qid,
0285                      user ? &wq->sq.bar2_pa : NULL);
0286     if (need_rq)
0287         wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
0288                          CXGB4_BAR2_QTYPE_EGRESS,
0289                          &wq->rq.bar2_qid,
0290                          user ? &wq->rq.bar2_pa : NULL);
0291 
0292     /*
0293      * User mode must have bar2 access.
0294      */
0295     if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
0296         pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
0297             pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
0298         ret = -EINVAL;
0299         goto free_dma;
0300     }
0301 
0302     wq->rdev = rdev;
0303     wq->rq.msn = 1;
0304 
0305     /* build fw_ri_res_wr */
0306     wr_len = sizeof(*res_wr) + 2 * sizeof(*res);
0307     if (need_rq)
0308         wr_len += sizeof(*res);
0309     skb = alloc_skb(wr_len, GFP_KERNEL);
0310     if (!skb) {
0311         ret = -ENOMEM;
0312         goto free_dma;
0313     }
0314     set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
0315 
0316     res_wr = __skb_put_zero(skb, wr_len);
0317     res_wr->op_nres = cpu_to_be32(
0318             FW_WR_OP_V(FW_RI_RES_WR) |
0319             FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
0320             FW_WR_COMPL_F);
0321     res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
0322     res_wr->cookie = (uintptr_t)wr_waitp;
0323     res = res_wr->res;
0324     res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
0325     res->u.sqrq.op = FW_RI_RES_OP_WRITE;
0326 
0327     /*
0328      * eqsize is the number of 64B entries plus the status page size.
0329      */
0330     eqsize = wq->sq.size * T4_SQ_NUM_SLOTS +
0331         rdev->hw_queue.t4_eq_status_entries;
0332 
0333     res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
0334         FW_RI_RES_WR_HOSTFCMODE_V(0) |  /* no host cidx updates */
0335         FW_RI_RES_WR_CPRIO_V(0) |   /* don't keep in chip cache */
0336         FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */
0337         (t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_ONCHIP_F : 0) |
0338         FW_RI_RES_WR_IQID_V(scq->cqid));
0339     res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
0340         FW_RI_RES_WR_DCAEN_V(0) |
0341         FW_RI_RES_WR_DCACPU_V(0) |
0342         FW_RI_RES_WR_FBMIN_V(2) |
0343         (t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_FBMAX_V(2) :
0344                      FW_RI_RES_WR_FBMAX_V(3)) |
0345         FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
0346         FW_RI_RES_WR_CIDXFTHRESH_V(0) |
0347         FW_RI_RES_WR_EQSIZE_V(eqsize));
0348     res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
0349     res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
0350 
0351     if (need_rq) {
0352         res++;
0353         res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
0354         res->u.sqrq.op = FW_RI_RES_OP_WRITE;
0355 
0356         /*
0357          * eqsize is the number of 64B entries plus the status page size
0358          */
0359         eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
0360             rdev->hw_queue.t4_eq_status_entries;
0361         res->u.sqrq.fetchszm_to_iqid =
0362             /* no host cidx updates */
0363             cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
0364             /* don't keep in chip cache */
0365             FW_RI_RES_WR_CPRIO_V(0) |
0366             /* set by uP at ri_init time */
0367             FW_RI_RES_WR_PCIECHN_V(0) |
0368             FW_RI_RES_WR_IQID_V(rcq->cqid));
0369         res->u.sqrq.dcaen_to_eqsize =
0370             cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
0371             FW_RI_RES_WR_DCACPU_V(0) |
0372             FW_RI_RES_WR_FBMIN_V(2) |
0373             FW_RI_RES_WR_FBMAX_V(3) |
0374             FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
0375             FW_RI_RES_WR_CIDXFTHRESH_V(0) |
0376             FW_RI_RES_WR_EQSIZE_V(eqsize));
0377         res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
0378         res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
0379     }
0380 
0381     c4iw_init_wr_wait(wr_waitp);
0382     ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
0383     if (ret)
0384         goto free_dma;
0385 
0386     pr_debug("sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
0387          wq->sq.qid, wq->rq.qid, wq->db,
0388          wq->sq.bar2_va, wq->rq.bar2_va);
0389 
0390     return 0;
0391 free_dma:
0392     if (need_rq)
0393         dma_free_coherent(&rdev->lldi.pdev->dev,
0394                   wq->rq.memsize, wq->rq.queue,
0395                   dma_unmap_addr(&wq->rq, mapping));
0396 free_sq:
0397     dealloc_sq(rdev, &wq->sq);
0398 free_hwaddr:
0399     if (need_rq)
0400         c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
0401 free_sw_rq:
0402     if (need_rq)
0403         kfree(wq->rq.sw_rq);
0404 free_sw_sq:
0405     kfree(wq->sq.sw_sq);
0406 free_rq_qid:
0407     if (need_rq)
0408         c4iw_put_qpid(rdev, wq->rq.qid, uctx);
0409 free_sq_qid:
0410     c4iw_put_qpid(rdev, wq->sq.qid, uctx);
0411     return ret;
0412 }
0413 
0414 static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
0415               const struct ib_send_wr *wr, int max, u32 *plenp)
0416 {
0417     u8 *dstp, *srcp;
0418     u32 plen = 0;
0419     int i;
0420     int rem, len;
0421 
0422     dstp = (u8 *)immdp->data;
0423     for (i = 0; i < wr->num_sge; i++) {
0424         if ((plen + wr->sg_list[i].length) > max)
0425             return -EMSGSIZE;
0426         srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
0427         plen += wr->sg_list[i].length;
0428         rem = wr->sg_list[i].length;
0429         while (rem) {
0430             if (dstp == (u8 *)&sq->queue[sq->size])
0431                 dstp = (u8 *)sq->queue;
0432             if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
0433                 len = rem;
0434             else
0435                 len = (u8 *)&sq->queue[sq->size] - dstp;
0436             memcpy(dstp, srcp, len);
0437             dstp += len;
0438             srcp += len;
0439             rem -= len;
0440         }
0441     }
0442     len = roundup(plen + sizeof(*immdp), 16) - (plen + sizeof(*immdp));
0443     if (len)
0444         memset(dstp, 0, len);
0445     immdp->op = FW_RI_DATA_IMMD;
0446     immdp->r1 = 0;
0447     immdp->r2 = 0;
0448     immdp->immdlen = cpu_to_be32(plen);
0449     *plenp = plen;
0450     return 0;
0451 }
0452 
0453 static int build_isgl(__be64 *queue_start, __be64 *queue_end,
0454               struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
0455               int num_sge, u32 *plenp)
0456 
0457 {
0458     int i;
0459     u32 plen = 0;
0460     __be64 *flitp;
0461 
0462     if ((__be64 *)isglp == queue_end)
0463         isglp = (struct fw_ri_isgl *)queue_start;
0464 
0465     flitp = (__be64 *)isglp->sge;
0466 
0467     for (i = 0; i < num_sge; i++) {
0468         if ((plen + sg_list[i].length) < plen)
0469             return -EMSGSIZE;
0470         plen += sg_list[i].length;
0471         *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
0472                      sg_list[i].length);
0473         if (++flitp == queue_end)
0474             flitp = queue_start;
0475         *flitp = cpu_to_be64(sg_list[i].addr);
0476         if (++flitp == queue_end)
0477             flitp = queue_start;
0478     }
0479     *flitp = (__force __be64)0;
0480     isglp->op = FW_RI_DATA_ISGL;
0481     isglp->r1 = 0;
0482     isglp->nsge = cpu_to_be16(num_sge);
0483     isglp->r2 = 0;
0484     if (plenp)
0485         *plenp = plen;
0486     return 0;
0487 }
0488 
0489 static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
0490                const struct ib_send_wr *wr, u8 *len16)
0491 {
0492     u32 plen;
0493     int size;
0494     int ret;
0495 
0496     if (wr->num_sge > T4_MAX_SEND_SGE)
0497         return -EINVAL;
0498     switch (wr->opcode) {
0499     case IB_WR_SEND:
0500         if (wr->send_flags & IB_SEND_SOLICITED)
0501             wqe->send.sendop_pkd = cpu_to_be32(
0502                 FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE));
0503         else
0504             wqe->send.sendop_pkd = cpu_to_be32(
0505                 FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND));
0506         wqe->send.stag_inv = 0;
0507         break;
0508     case IB_WR_SEND_WITH_INV:
0509         if (wr->send_flags & IB_SEND_SOLICITED)
0510             wqe->send.sendop_pkd = cpu_to_be32(
0511                 FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE_INV));
0512         else
0513             wqe->send.sendop_pkd = cpu_to_be32(
0514                 FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_INV));
0515         wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
0516         break;
0517 
0518     default:
0519         return -EINVAL;
0520     }
0521     wqe->send.r3 = 0;
0522     wqe->send.r4 = 0;
0523 
0524     plen = 0;
0525     if (wr->num_sge) {
0526         if (wr->send_flags & IB_SEND_INLINE) {
0527             ret = build_immd(sq, wqe->send.u.immd_src, wr,
0528                      T4_MAX_SEND_INLINE, &plen);
0529             if (ret)
0530                 return ret;
0531             size = sizeof(wqe->send) + sizeof(struct fw_ri_immd) +
0532                    plen;
0533         } else {
0534             ret = build_isgl((__be64 *)sq->queue,
0535                      (__be64 *)&sq->queue[sq->size],
0536                      wqe->send.u.isgl_src,
0537                      wr->sg_list, wr->num_sge, &plen);
0538             if (ret)
0539                 return ret;
0540             size = sizeof(wqe->send) + sizeof(struct fw_ri_isgl) +
0541                    wr->num_sge * sizeof(struct fw_ri_sge);
0542         }
0543     } else {
0544         wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
0545         wqe->send.u.immd_src[0].r1 = 0;
0546         wqe->send.u.immd_src[0].r2 = 0;
0547         wqe->send.u.immd_src[0].immdlen = 0;
0548         size = sizeof(wqe->send) + sizeof(struct fw_ri_immd);
0549         plen = 0;
0550     }
0551     *len16 = DIV_ROUND_UP(size, 16);
0552     wqe->send.plen = cpu_to_be32(plen);
0553     return 0;
0554 }
0555 
0556 static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
0557                 const struct ib_send_wr *wr, u8 *len16)
0558 {
0559     u32 plen;
0560     int size;
0561     int ret;
0562 
0563     if (wr->num_sge > T4_MAX_SEND_SGE)
0564         return -EINVAL;
0565 
0566     /*
0567      * iWARP protocol supports 64 bit immediate data but rdma api
0568      * limits it to 32bit.
0569      */
0570     if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
0571         wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data;
0572     else
0573         wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0;
0574     wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
0575     wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
0576     if (wr->num_sge) {
0577         if (wr->send_flags & IB_SEND_INLINE) {
0578             ret = build_immd(sq, wqe->write.u.immd_src, wr,
0579                      T4_MAX_WRITE_INLINE, &plen);
0580             if (ret)
0581                 return ret;
0582             size = sizeof(wqe->write) + sizeof(struct fw_ri_immd) +
0583                    plen;
0584         } else {
0585             ret = build_isgl((__be64 *)sq->queue,
0586                      (__be64 *)&sq->queue[sq->size],
0587                      wqe->write.u.isgl_src,
0588                      wr->sg_list, wr->num_sge, &plen);
0589             if (ret)
0590                 return ret;
0591             size = sizeof(wqe->write) + sizeof(struct fw_ri_isgl) +
0592                    wr->num_sge * sizeof(struct fw_ri_sge);
0593         }
0594     } else {
0595         wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
0596         wqe->write.u.immd_src[0].r1 = 0;
0597         wqe->write.u.immd_src[0].r2 = 0;
0598         wqe->write.u.immd_src[0].immdlen = 0;
0599         size = sizeof(wqe->write) + sizeof(struct fw_ri_immd);
0600         plen = 0;
0601     }
0602     *len16 = DIV_ROUND_UP(size, 16);
0603     wqe->write.plen = cpu_to_be32(plen);
0604     return 0;
0605 }
0606 
0607 static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp,
0608                 struct ib_send_wr *wr)
0609 {
0610     memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16);
0611     memset(immdp->r1, 0, 6);
0612     immdp->op = FW_RI_DATA_IMMD;
0613     immdp->immdlen = 16;
0614 }
0615 
0616 static void build_rdma_write_cmpl(struct t4_sq *sq,
0617                   struct fw_ri_rdma_write_cmpl_wr *wcwr,
0618                   const struct ib_send_wr *wr, u8 *len16)
0619 {
0620     u32 plen;
0621     int size;
0622 
0623     /*
0624      * This code assumes the struct fields preceding the write isgl
0625      * fit in one 64B WR slot.  This is because the WQE is built
0626      * directly in the dma queue, and wrapping is only handled
0627      * by the code buildling sgls.  IE the "fixed part" of the wr
0628      * structs must all fit in 64B.  The WQE build code should probably be
0629      * redesigned to avoid this restriction, but for now just add
0630      * the BUILD_BUG_ON() to catch if this WQE struct gets too big.
0631      */
0632     BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64);
0633 
0634     wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
0635     wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
0636     if (wr->next->opcode == IB_WR_SEND)
0637         wcwr->stag_inv = 0;
0638     else
0639         wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
0640     wcwr->r2 = 0;
0641     wcwr->r3 = 0;
0642 
0643     /* SEND_INV SGL */
0644     if (wr->next->send_flags & IB_SEND_INLINE)
0645         build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next);
0646     else
0647         build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
0648                &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL);
0649 
0650     /* WRITE SGL */
0651     build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
0652            wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen);
0653 
0654     size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) +
0655         wr->num_sge * sizeof(struct fw_ri_sge);
0656     wcwr->plen = cpu_to_be32(plen);
0657     *len16 = DIV_ROUND_UP(size, 16);
0658 }
0659 
0660 static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
0661                u8 *len16)
0662 {
0663     if (wr->num_sge > 1)
0664         return -EINVAL;
0665     if (wr->num_sge && wr->sg_list[0].length) {
0666         wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
0667         wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
0668                             >> 32));
0669         wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr);
0670         wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
0671         wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
0672         wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
0673                              >> 32));
0674         wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr));
0675     } else {
0676         wqe->read.stag_src = cpu_to_be32(2);
0677         wqe->read.to_src_hi = 0;
0678         wqe->read.to_src_lo = 0;
0679         wqe->read.stag_sink = cpu_to_be32(2);
0680         wqe->read.plen = 0;
0681         wqe->read.to_sink_hi = 0;
0682         wqe->read.to_sink_lo = 0;
0683     }
0684     wqe->read.r2 = 0;
0685     wqe->read.r5 = 0;
0686     *len16 = DIV_ROUND_UP(sizeof(wqe->read), 16);
0687     return 0;
0688 }
0689 
0690 static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr)
0691 {
0692     bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) ||
0693                  qhp->sq_sig_all;
0694     bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
0695                   qhp->sq_sig_all;
0696     struct t4_swsqe *swsqe;
0697     union t4_wr *wqe;
0698     u16 write_wrid;
0699     u8 len16;
0700     u16 idx;
0701 
0702     /*
0703      * The sw_sq entries still look like a WRITE and a SEND and consume
0704      * 2 slots. The FW WR, however, will be a single uber-WR.
0705      */
0706     wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
0707            qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
0708     build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16);
0709 
0710     /* WRITE swsqe */
0711     swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
0712     swsqe->opcode = FW_RI_RDMA_WRITE;
0713     swsqe->idx = qhp->wq.sq.pidx;
0714     swsqe->complete = 0;
0715     swsqe->signaled = write_signaled;
0716     swsqe->flushed = 0;
0717     swsqe->wr_id = wr->wr_id;
0718     if (c4iw_wr_log) {
0719         swsqe->sge_ts =
0720             cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
0721         swsqe->host_time = ktime_get();
0722     }
0723 
0724     write_wrid = qhp->wq.sq.pidx;
0725 
0726     /* just bump the sw_sq */
0727     qhp->wq.sq.in_use++;
0728     if (++qhp->wq.sq.pidx == qhp->wq.sq.size)
0729         qhp->wq.sq.pidx = 0;
0730 
0731     /* SEND_WITH_INV swsqe */
0732     swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
0733     if (wr->next->opcode == IB_WR_SEND)
0734         swsqe->opcode = FW_RI_SEND;
0735     else
0736         swsqe->opcode = FW_RI_SEND_WITH_INV;
0737     swsqe->idx = qhp->wq.sq.pidx;
0738     swsqe->complete = 0;
0739     swsqe->signaled = send_signaled;
0740     swsqe->flushed = 0;
0741     swsqe->wr_id = wr->next->wr_id;
0742     if (c4iw_wr_log) {
0743         swsqe->sge_ts =
0744             cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
0745         swsqe->host_time = ktime_get();
0746     }
0747 
0748     wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0;
0749     wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx;
0750 
0751     init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR,
0752             write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16);
0753     t4_sq_produce(&qhp->wq, len16);
0754     idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
0755 
0756     t4_ring_sq_db(&qhp->wq, idx, wqe);
0757 }
0758 
0759 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
0760                const struct ib_recv_wr *wr, u8 *len16)
0761 {
0762     int ret;
0763 
0764     ret = build_isgl((__be64 *)qhp->wq.rq.queue,
0765              (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
0766              &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
0767     if (ret)
0768         return ret;
0769     *len16 = DIV_ROUND_UP(
0770         sizeof(wqe->recv) + wr->num_sge * sizeof(struct fw_ri_sge), 16);
0771     return 0;
0772 }
0773 
0774 static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr,
0775               u8 *len16)
0776 {
0777     int ret;
0778 
0779     ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
0780              &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
0781     if (ret)
0782         return ret;
0783     *len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
0784                   wr->num_sge * sizeof(struct fw_ri_sge), 16);
0785     return 0;
0786 }
0787 
0788 static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
0789                   const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
0790                   u8 *len16)
0791 {
0792     __be64 *p = (__be64 *)fr->pbl;
0793 
0794     fr->r2 = cpu_to_be32(0);
0795     fr->stag = cpu_to_be32(mhp->ibmr.rkey);
0796 
0797     fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
0798         FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) |
0799         FW_RI_TPTE_STAGSTATE_V(1) |
0800         FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) |
0801         FW_RI_TPTE_PDID_V(mhp->attr.pdid));
0802     fr->tpte.locread_to_qpid = cpu_to_be32(
0803         FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) |
0804         FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) |
0805         FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12));
0806     fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V(
0807         PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3));
0808     fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0);
0809     fr->tpte.len_hi = cpu_to_be32(0);
0810     fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length);
0811     fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
0812     fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
0813 
0814     p[0] = cpu_to_be64((u64)mhp->mpl[0]);
0815     p[1] = cpu_to_be64((u64)mhp->mpl[1]);
0816 
0817     *len16 = DIV_ROUND_UP(sizeof(*fr), 16);
0818 }
0819 
0820 static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
0821             const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
0822             u8 *len16, bool dsgl_supported)
0823 {
0824     struct fw_ri_immd *imdp;
0825     __be64 *p;
0826     int i;
0827     int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
0828     int rem;
0829 
0830     if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl))
0831         return -EINVAL;
0832 
0833     wqe->fr.qpbinde_to_dcacpu = 0;
0834     wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12;
0835     wqe->fr.addr_type = FW_RI_VA_BASED_TO;
0836     wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access);
0837     wqe->fr.len_hi = 0;
0838     wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
0839     wqe->fr.stag = cpu_to_be32(wr->key);
0840     wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
0841     wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
0842                     0xffffffff);
0843 
0844     if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) {
0845         struct fw_ri_dsgl *sglp;
0846 
0847         for (i = 0; i < mhp->mpl_len; i++)
0848             mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]);
0849 
0850         sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
0851         sglp->op = FW_RI_DATA_DSGL;
0852         sglp->r1 = 0;
0853         sglp->nsge = cpu_to_be16(1);
0854         sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
0855         sglp->len0 = cpu_to_be32(pbllen);
0856 
0857         *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
0858     } else {
0859         imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
0860         imdp->op = FW_RI_DATA_IMMD;
0861         imdp->r1 = 0;
0862         imdp->r2 = 0;
0863         imdp->immdlen = cpu_to_be32(pbllen);
0864         p = (__be64 *)(imdp + 1);
0865         rem = pbllen;
0866         for (i = 0; i < mhp->mpl_len; i++) {
0867             *p = cpu_to_be64((u64)mhp->mpl[i]);
0868             rem -= sizeof(*p);
0869             if (++p == (__be64 *)&sq->queue[sq->size])
0870                 p = (__be64 *)sq->queue;
0871         }
0872         while (rem) {
0873             *p = 0;
0874             rem -= sizeof(*p);
0875             if (++p == (__be64 *)&sq->queue[sq->size])
0876                 p = (__be64 *)sq->queue;
0877         }
0878         *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp)
0879                       + pbllen, 16);
0880     }
0881     return 0;
0882 }
0883 
0884 static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
0885               u8 *len16)
0886 {
0887     wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
0888     wqe->inv.r2 = 0;
0889     *len16 = DIV_ROUND_UP(sizeof(wqe->inv), 16);
0890     return 0;
0891 }
0892 
0893 void c4iw_qp_add_ref(struct ib_qp *qp)
0894 {
0895     pr_debug("ib_qp %p\n", qp);
0896     refcount_inc(&to_c4iw_qp(qp)->qp_refcnt);
0897 }
0898 
0899 void c4iw_qp_rem_ref(struct ib_qp *qp)
0900 {
0901     pr_debug("ib_qp %p\n", qp);
0902     if (refcount_dec_and_test(&to_c4iw_qp(qp)->qp_refcnt))
0903         complete(&to_c4iw_qp(qp)->qp_rel_comp);
0904 }
0905 
0906 static void add_to_fc_list(struct list_head *head, struct list_head *entry)
0907 {
0908     if (list_empty(entry))
0909         list_add_tail(entry, head);
0910 }
0911 
0912 static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
0913 {
0914     unsigned long flags;
0915 
0916     xa_lock_irqsave(&qhp->rhp->qps, flags);
0917     spin_lock(&qhp->lock);
0918     if (qhp->rhp->db_state == NORMAL)
0919         t4_ring_sq_db(&qhp->wq, inc, NULL);
0920     else {
0921         add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
0922         qhp->wq.sq.wq_pidx_inc += inc;
0923     }
0924     spin_unlock(&qhp->lock);
0925     xa_unlock_irqrestore(&qhp->rhp->qps, flags);
0926     return 0;
0927 }
0928 
0929 static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
0930 {
0931     unsigned long flags;
0932 
0933     xa_lock_irqsave(&qhp->rhp->qps, flags);
0934     spin_lock(&qhp->lock);
0935     if (qhp->rhp->db_state == NORMAL)
0936         t4_ring_rq_db(&qhp->wq, inc, NULL);
0937     else {
0938         add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
0939         qhp->wq.rq.wq_pidx_inc += inc;
0940     }
0941     spin_unlock(&qhp->lock);
0942     xa_unlock_irqrestore(&qhp->rhp->qps, flags);
0943     return 0;
0944 }
0945 
0946 static int ib_to_fw_opcode(int ib_opcode)
0947 {
0948     int opcode;
0949 
0950     switch (ib_opcode) {
0951     case IB_WR_SEND_WITH_INV:
0952         opcode = FW_RI_SEND_WITH_INV;
0953         break;
0954     case IB_WR_SEND:
0955         opcode = FW_RI_SEND;
0956         break;
0957     case IB_WR_RDMA_WRITE:
0958         opcode = FW_RI_RDMA_WRITE;
0959         break;
0960     case IB_WR_RDMA_WRITE_WITH_IMM:
0961         opcode = FW_RI_WRITE_IMMEDIATE;
0962         break;
0963     case IB_WR_RDMA_READ:
0964     case IB_WR_RDMA_READ_WITH_INV:
0965         opcode = FW_RI_READ_REQ;
0966         break;
0967     case IB_WR_REG_MR:
0968         opcode = FW_RI_FAST_REGISTER;
0969         break;
0970     case IB_WR_LOCAL_INV:
0971         opcode = FW_RI_LOCAL_INV;
0972         break;
0973     default:
0974         opcode = -EINVAL;
0975     }
0976     return opcode;
0977 }
0978 
0979 static int complete_sq_drain_wr(struct c4iw_qp *qhp,
0980                 const struct ib_send_wr *wr)
0981 {
0982     struct t4_cqe cqe = {};
0983     struct c4iw_cq *schp;
0984     unsigned long flag;
0985     struct t4_cq *cq;
0986     int opcode;
0987 
0988     schp = to_c4iw_cq(qhp->ibqp.send_cq);
0989     cq = &schp->cq;
0990 
0991     opcode = ib_to_fw_opcode(wr->opcode);
0992     if (opcode < 0)
0993         return opcode;
0994 
0995     cqe.u.drain_cookie = wr->wr_id;
0996     cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
0997                  CQE_OPCODE_V(opcode) |
0998                  CQE_TYPE_V(1) |
0999                  CQE_SWCQE_V(1) |
1000                  CQE_DRAIN_V(1) |
1001                  CQE_QPID_V(qhp->wq.sq.qid));
1002 
1003     spin_lock_irqsave(&schp->lock, flag);
1004     cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
1005     cq->sw_queue[cq->sw_pidx] = cqe;
1006     t4_swcq_produce(cq);
1007     spin_unlock_irqrestore(&schp->lock, flag);
1008 
1009     if (t4_clear_cq_armed(&schp->cq)) {
1010         spin_lock_irqsave(&schp->comp_handler_lock, flag);
1011         (*schp->ibcq.comp_handler)(&schp->ibcq,
1012                        schp->ibcq.cq_context);
1013         spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1014     }
1015     return 0;
1016 }
1017 
1018 static int complete_sq_drain_wrs(struct c4iw_qp *qhp,
1019                  const struct ib_send_wr *wr,
1020                  const struct ib_send_wr **bad_wr)
1021 {
1022     int ret = 0;
1023 
1024     while (wr) {
1025         ret = complete_sq_drain_wr(qhp, wr);
1026         if (ret) {
1027             *bad_wr = wr;
1028             break;
1029         }
1030         wr = wr->next;
1031     }
1032     return ret;
1033 }
1034 
1035 static void complete_rq_drain_wr(struct c4iw_qp *qhp,
1036                  const struct ib_recv_wr *wr)
1037 {
1038     struct t4_cqe cqe = {};
1039     struct c4iw_cq *rchp;
1040     unsigned long flag;
1041     struct t4_cq *cq;
1042 
1043     rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
1044     cq = &rchp->cq;
1045 
1046     cqe.u.drain_cookie = wr->wr_id;
1047     cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
1048                  CQE_OPCODE_V(FW_RI_SEND) |
1049                  CQE_TYPE_V(0) |
1050                  CQE_SWCQE_V(1) |
1051                  CQE_DRAIN_V(1) |
1052                  CQE_QPID_V(qhp->wq.sq.qid));
1053 
1054     spin_lock_irqsave(&rchp->lock, flag);
1055     cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
1056     cq->sw_queue[cq->sw_pidx] = cqe;
1057     t4_swcq_produce(cq);
1058     spin_unlock_irqrestore(&rchp->lock, flag);
1059 
1060     if (t4_clear_cq_armed(&rchp->cq)) {
1061         spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1062         (*rchp->ibcq.comp_handler)(&rchp->ibcq,
1063                        rchp->ibcq.cq_context);
1064         spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1065     }
1066 }
1067 
1068 static void complete_rq_drain_wrs(struct c4iw_qp *qhp,
1069                   const struct ib_recv_wr *wr)
1070 {
1071     while (wr) {
1072         complete_rq_drain_wr(qhp, wr);
1073         wr = wr->next;
1074     }
1075 }
1076 
1077 int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1078            const struct ib_send_wr **bad_wr)
1079 {
1080     int err = 0;
1081     u8 len16 = 0;
1082     enum fw_wr_opcodes fw_opcode = 0;
1083     enum fw_ri_wr_flags fw_flags;
1084     struct c4iw_qp *qhp;
1085     struct c4iw_dev *rhp;
1086     union t4_wr *wqe = NULL;
1087     u32 num_wrs;
1088     struct t4_swsqe *swsqe;
1089     unsigned long flag;
1090     u16 idx = 0;
1091 
1092     qhp = to_c4iw_qp(ibqp);
1093     rhp = qhp->rhp;
1094     spin_lock_irqsave(&qhp->lock, flag);
1095 
1096     /*
1097      * If the qp has been flushed, then just insert a special
1098      * drain cqe.
1099      */
1100     if (qhp->wq.flushed) {
1101         spin_unlock_irqrestore(&qhp->lock, flag);
1102         err = complete_sq_drain_wrs(qhp, wr, bad_wr);
1103         return err;
1104     }
1105     num_wrs = t4_sq_avail(&qhp->wq);
1106     if (num_wrs == 0) {
1107         spin_unlock_irqrestore(&qhp->lock, flag);
1108         *bad_wr = wr;
1109         return -ENOMEM;
1110     }
1111 
1112     /*
1113      * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
1114      * the response for small NVMEe-oF READ requests.  If the chain is
1115      * exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths
1116      * and lengths meet the requirements of the fw_ri_write_cmpl_wr work
1117      * request, then build and post the write_cmpl WR. If any of the tests
1118      * below are not true, then we continue on with the tradtional WRITE
1119      * and SEND WRs.
1120      */
1121     if (qhp->rhp->rdev.lldi.write_cmpl_support &&
1122         CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >=
1123         CHELSIO_T5 &&
1124         wr && wr->next && !wr->next->next &&
1125         wr->opcode == IB_WR_RDMA_WRITE &&
1126         wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
1127         (wr->next->opcode == IB_WR_SEND ||
1128         wr->next->opcode == IB_WR_SEND_WITH_INV) &&
1129         wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
1130         wr->next->num_sge == 1 && num_wrs >= 2) {
1131         post_write_cmpl(qhp, wr);
1132         spin_unlock_irqrestore(&qhp->lock, flag);
1133         return 0;
1134     }
1135 
1136     while (wr) {
1137         if (num_wrs == 0) {
1138             err = -ENOMEM;
1139             *bad_wr = wr;
1140             break;
1141         }
1142         wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
1143               qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
1144 
1145         fw_flags = 0;
1146         if (wr->send_flags & IB_SEND_SOLICITED)
1147             fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
1148         if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
1149             fw_flags |= FW_RI_COMPLETION_FLAG;
1150         swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
1151         switch (wr->opcode) {
1152         case IB_WR_SEND_WITH_INV:
1153         case IB_WR_SEND:
1154             if (wr->send_flags & IB_SEND_FENCE)
1155                 fw_flags |= FW_RI_READ_FENCE_FLAG;
1156             fw_opcode = FW_RI_SEND_WR;
1157             if (wr->opcode == IB_WR_SEND)
1158                 swsqe->opcode = FW_RI_SEND;
1159             else
1160                 swsqe->opcode = FW_RI_SEND_WITH_INV;
1161             err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
1162             break;
1163         case IB_WR_RDMA_WRITE_WITH_IMM:
1164             if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) {
1165                 err = -EINVAL;
1166                 break;
1167             }
1168             fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
1169             fallthrough;
1170         case IB_WR_RDMA_WRITE:
1171             fw_opcode = FW_RI_RDMA_WRITE_WR;
1172             swsqe->opcode = FW_RI_RDMA_WRITE;
1173             err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
1174             break;
1175         case IB_WR_RDMA_READ:
1176         case IB_WR_RDMA_READ_WITH_INV:
1177             fw_opcode = FW_RI_RDMA_READ_WR;
1178             swsqe->opcode = FW_RI_READ_REQ;
1179             if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
1180                 c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey);
1181                 fw_flags = FW_RI_RDMA_READ_INVALIDATE;
1182             } else {
1183                 fw_flags = 0;
1184             }
1185             err = build_rdma_read(wqe, wr, &len16);
1186             if (err)
1187                 break;
1188             swsqe->read_len = wr->sg_list[0].length;
1189             if (!qhp->wq.sq.oldest_read)
1190                 qhp->wq.sq.oldest_read = swsqe;
1191             break;
1192         case IB_WR_REG_MR: {
1193             struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
1194 
1195             swsqe->opcode = FW_RI_FAST_REGISTER;
1196             if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
1197                 !mhp->attr.state && mhp->mpl_len <= 2) {
1198                 fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
1199                 build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
1200                           mhp, &len16);
1201             } else {
1202                 fw_opcode = FW_RI_FR_NSMR_WR;
1203                 err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
1204                        mhp, &len16,
1205                        rhp->rdev.lldi.ulptx_memwrite_dsgl);
1206                 if (err)
1207                     break;
1208             }
1209             mhp->attr.state = 1;
1210             break;
1211         }
1212         case IB_WR_LOCAL_INV:
1213             if (wr->send_flags & IB_SEND_FENCE)
1214                 fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
1215             fw_opcode = FW_RI_INV_LSTAG_WR;
1216             swsqe->opcode = FW_RI_LOCAL_INV;
1217             err = build_inv_stag(wqe, wr, &len16);
1218             c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey);
1219             break;
1220         default:
1221             pr_warn("%s post of type=%d TBD!\n", __func__,
1222                 wr->opcode);
1223             err = -EINVAL;
1224         }
1225         if (err) {
1226             *bad_wr = wr;
1227             break;
1228         }
1229         swsqe->idx = qhp->wq.sq.pidx;
1230         swsqe->complete = 0;
1231         swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
1232                   qhp->sq_sig_all;
1233         swsqe->flushed = 0;
1234         swsqe->wr_id = wr->wr_id;
1235         if (c4iw_wr_log) {
1236             swsqe->sge_ts = cxgb4_read_sge_timestamp(
1237                     rhp->rdev.lldi.ports[0]);
1238             swsqe->host_time = ktime_get();
1239         }
1240 
1241         init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
1242 
1243         pr_debug("cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
1244              (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
1245              swsqe->opcode, swsqe->read_len);
1246         wr = wr->next;
1247         num_wrs--;
1248         t4_sq_produce(&qhp->wq, len16);
1249         idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
1250     }
1251     if (!rhp->rdev.status_page->db_off) {
1252         t4_ring_sq_db(&qhp->wq, idx, wqe);
1253         spin_unlock_irqrestore(&qhp->lock, flag);
1254     } else {
1255         spin_unlock_irqrestore(&qhp->lock, flag);
1256         ring_kernel_sq_db(qhp, idx);
1257     }
1258     return err;
1259 }
1260 
1261 int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1262               const struct ib_recv_wr **bad_wr)
1263 {
1264     int err = 0;
1265     struct c4iw_qp *qhp;
1266     union t4_recv_wr *wqe = NULL;
1267     u32 num_wrs;
1268     u8 len16 = 0;
1269     unsigned long flag;
1270     u16 idx = 0;
1271 
1272     qhp = to_c4iw_qp(ibqp);
1273     spin_lock_irqsave(&qhp->lock, flag);
1274 
1275     /*
1276      * If the qp has been flushed, then just insert a special
1277      * drain cqe.
1278      */
1279     if (qhp->wq.flushed) {
1280         spin_unlock_irqrestore(&qhp->lock, flag);
1281         complete_rq_drain_wrs(qhp, wr);
1282         return err;
1283     }
1284     num_wrs = t4_rq_avail(&qhp->wq);
1285     if (num_wrs == 0) {
1286         spin_unlock_irqrestore(&qhp->lock, flag);
1287         *bad_wr = wr;
1288         return -ENOMEM;
1289     }
1290     while (wr) {
1291         if (wr->num_sge > T4_MAX_RECV_SGE) {
1292             err = -EINVAL;
1293             *bad_wr = wr;
1294             break;
1295         }
1296         wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
1297                        qhp->wq.rq.wq_pidx *
1298                        T4_EQ_ENTRY_SIZE);
1299         if (num_wrs)
1300             err = build_rdma_recv(qhp, wqe, wr, &len16);
1301         else
1302             err = -ENOMEM;
1303         if (err) {
1304             *bad_wr = wr;
1305             break;
1306         }
1307 
1308         qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
1309         if (c4iw_wr_log) {
1310             qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts =
1311                 cxgb4_read_sge_timestamp(
1312                         qhp->rhp->rdev.lldi.ports[0]);
1313             qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time =
1314                 ktime_get();
1315         }
1316 
1317         wqe->recv.opcode = FW_RI_RECV_WR;
1318         wqe->recv.r1 = 0;
1319         wqe->recv.wrid = qhp->wq.rq.pidx;
1320         wqe->recv.r2[0] = 0;
1321         wqe->recv.r2[1] = 0;
1322         wqe->recv.r2[2] = 0;
1323         wqe->recv.len16 = len16;
1324         pr_debug("cookie 0x%llx pidx %u\n",
1325              (unsigned long long)wr->wr_id, qhp->wq.rq.pidx);
1326         t4_rq_produce(&qhp->wq, len16);
1327         idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
1328         wr = wr->next;
1329         num_wrs--;
1330     }
1331     if (!qhp->rhp->rdev.status_page->db_off) {
1332         t4_ring_rq_db(&qhp->wq, idx, wqe);
1333         spin_unlock_irqrestore(&qhp->lock, flag);
1334     } else {
1335         spin_unlock_irqrestore(&qhp->lock, flag);
1336         ring_kernel_rq_db(qhp, idx);
1337     }
1338     return err;
1339 }
1340 
1341 static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
1342              u64 wr_id, u8 len16)
1343 {
1344     struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
1345 
1346     pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
1347          __func__, srq->cidx, srq->pidx, srq->wq_pidx,
1348          srq->in_use, srq->ooo_count,
1349          (unsigned long long)wr_id, srq->pending_cidx,
1350          srq->pending_pidx, srq->pending_in_use);
1351     pwr->wr_id = wr_id;
1352     pwr->len16 = len16;
1353     memcpy(&pwr->wqe, wqe, len16 * 16);
1354     t4_srq_produce_pending_wr(srq);
1355 }
1356 
1357 int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1358                const struct ib_recv_wr **bad_wr)
1359 {
1360     union t4_recv_wr *wqe, lwqe;
1361     struct c4iw_srq *srq;
1362     unsigned long flag;
1363     u8 len16 = 0;
1364     u16 idx = 0;
1365     int err = 0;
1366     u32 num_wrs;
1367 
1368     srq = to_c4iw_srq(ibsrq);
1369     spin_lock_irqsave(&srq->lock, flag);
1370     num_wrs = t4_srq_avail(&srq->wq);
1371     if (num_wrs == 0) {
1372         spin_unlock_irqrestore(&srq->lock, flag);
1373         return -ENOMEM;
1374     }
1375     while (wr) {
1376         if (wr->num_sge > T4_MAX_RECV_SGE) {
1377             err = -EINVAL;
1378             *bad_wr = wr;
1379             break;
1380         }
1381         wqe = &lwqe;
1382         if (num_wrs)
1383             err = build_srq_recv(wqe, wr, &len16);
1384         else
1385             err = -ENOMEM;
1386         if (err) {
1387             *bad_wr = wr;
1388             break;
1389         }
1390 
1391         wqe->recv.opcode = FW_RI_RECV_WR;
1392         wqe->recv.r1 = 0;
1393         wqe->recv.wrid = srq->wq.pidx;
1394         wqe->recv.r2[0] = 0;
1395         wqe->recv.r2[1] = 0;
1396         wqe->recv.r2[2] = 0;
1397         wqe->recv.len16 = len16;
1398 
1399         if (srq->wq.ooo_count ||
1400             srq->wq.pending_in_use ||
1401             srq->wq.sw_rq[srq->wq.pidx].valid) {
1402             defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
1403         } else {
1404             srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
1405             srq->wq.sw_rq[srq->wq.pidx].valid = 1;
1406             c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
1407             pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
1408                  __func__, srq->wq.cidx,
1409                  srq->wq.pidx, srq->wq.wq_pidx,
1410                  srq->wq.in_use,
1411                  (unsigned long long)wr->wr_id);
1412             t4_srq_produce(&srq->wq, len16);
1413             idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
1414         }
1415         wr = wr->next;
1416         num_wrs--;
1417     }
1418     if (idx)
1419         t4_ring_srq_db(&srq->wq, idx, len16, wqe);
1420     spin_unlock_irqrestore(&srq->lock, flag);
1421     return err;
1422 }
1423 
1424 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
1425                     u8 *ecode)
1426 {
1427     int status;
1428     int tagged;
1429     int opcode;
1430     int rqtype;
1431     int send_inv;
1432 
1433     if (!err_cqe) {
1434         *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
1435         *ecode = 0;
1436         return;
1437     }
1438 
1439     status = CQE_STATUS(err_cqe);
1440     opcode = CQE_OPCODE(err_cqe);
1441     rqtype = RQ_TYPE(err_cqe);
1442     send_inv = (opcode == FW_RI_SEND_WITH_INV) ||
1443            (opcode == FW_RI_SEND_WITH_SE_INV);
1444     tagged = (opcode == FW_RI_RDMA_WRITE) ||
1445          (rqtype && (opcode == FW_RI_READ_RESP));
1446 
1447     switch (status) {
1448     case T4_ERR_STAG:
1449         if (send_inv) {
1450             *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1451             *ecode = RDMAP_CANT_INV_STAG;
1452         } else {
1453             *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1454             *ecode = RDMAP_INV_STAG;
1455         }
1456         break;
1457     case T4_ERR_PDID:
1458         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1459         if ((opcode == FW_RI_SEND_WITH_INV) ||
1460             (opcode == FW_RI_SEND_WITH_SE_INV))
1461             *ecode = RDMAP_CANT_INV_STAG;
1462         else
1463             *ecode = RDMAP_STAG_NOT_ASSOC;
1464         break;
1465     case T4_ERR_QPID:
1466         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1467         *ecode = RDMAP_STAG_NOT_ASSOC;
1468         break;
1469     case T4_ERR_ACCESS:
1470         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1471         *ecode = RDMAP_ACC_VIOL;
1472         break;
1473     case T4_ERR_WRAP:
1474         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1475         *ecode = RDMAP_TO_WRAP;
1476         break;
1477     case T4_ERR_BOUND:
1478         if (tagged) {
1479             *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1480             *ecode = DDPT_BASE_BOUNDS;
1481         } else {
1482             *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1483             *ecode = RDMAP_BASE_BOUNDS;
1484         }
1485         break;
1486     case T4_ERR_INVALIDATE_SHARED_MR:
1487     case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
1488         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1489         *ecode = RDMAP_CANT_INV_STAG;
1490         break;
1491     case T4_ERR_ECC:
1492     case T4_ERR_ECC_PSTAG:
1493     case T4_ERR_INTERNAL_ERR:
1494         *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
1495         *ecode = 0;
1496         break;
1497     case T4_ERR_OUT_OF_RQE:
1498         *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1499         *ecode = DDPU_INV_MSN_NOBUF;
1500         break;
1501     case T4_ERR_PBL_ADDR_BOUND:
1502         *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1503         *ecode = DDPT_BASE_BOUNDS;
1504         break;
1505     case T4_ERR_CRC:
1506         *layer_type = LAYER_MPA|DDP_LLP;
1507         *ecode = MPA_CRC_ERR;
1508         break;
1509     case T4_ERR_MARKER:
1510         *layer_type = LAYER_MPA|DDP_LLP;
1511         *ecode = MPA_MARKER_ERR;
1512         break;
1513     case T4_ERR_PDU_LEN_ERR:
1514         *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1515         *ecode = DDPU_MSG_TOOBIG;
1516         break;
1517     case T4_ERR_DDP_VERSION:
1518         if (tagged) {
1519             *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1520             *ecode = DDPT_INV_VERS;
1521         } else {
1522             *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1523             *ecode = DDPU_INV_VERS;
1524         }
1525         break;
1526     case T4_ERR_RDMA_VERSION:
1527         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1528         *ecode = RDMAP_INV_VERS;
1529         break;
1530     case T4_ERR_OPCODE:
1531         *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1532         *ecode = RDMAP_INV_OPCODE;
1533         break;
1534     case T4_ERR_DDP_QUEUE_NUM:
1535         *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1536         *ecode = DDPU_INV_QN;
1537         break;
1538     case T4_ERR_MSN:
1539     case T4_ERR_MSN_GAP:
1540     case T4_ERR_MSN_RANGE:
1541     case T4_ERR_IRD_OVERFLOW:
1542         *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1543         *ecode = DDPU_INV_MSN_RANGE;
1544         break;
1545     case T4_ERR_TBIT:
1546         *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
1547         *ecode = 0;
1548         break;
1549     case T4_ERR_MO:
1550         *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1551         *ecode = DDPU_INV_MO;
1552         break;
1553     default:
1554         *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
1555         *ecode = 0;
1556         break;
1557     }
1558 }
1559 
1560 static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
1561                gfp_t gfp)
1562 {
1563     struct fw_ri_wr *wqe;
1564     struct sk_buff *skb;
1565     struct terminate_message *term;
1566 
1567     pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid,
1568          qhp->ep->hwtid);
1569 
1570     skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
1571     if (WARN_ON(!skb))
1572         return;
1573 
1574     set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
1575 
1576     wqe = __skb_put_zero(skb, sizeof(*wqe));
1577     wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
1578     wqe->flowid_len16 = cpu_to_be32(
1579         FW_WR_FLOWID_V(qhp->ep->hwtid) |
1580         FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1581 
1582     wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
1583     wqe->u.terminate.immdlen = cpu_to_be32(sizeof(*term));
1584     term = (struct terminate_message *)wqe->u.terminate.termmsg;
1585     if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
1586         term->layer_etype = qhp->attr.layer_etype;
1587         term->ecode = qhp->attr.ecode;
1588     } else
1589         build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
1590     c4iw_ofld_send(&qhp->rhp->rdev, skb);
1591 }
1592 
1593 /*
1594  * Assumes qhp lock is held.
1595  */
1596 static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1597                struct c4iw_cq *schp)
1598 {
1599     int count;
1600     int rq_flushed = 0, sq_flushed;
1601     unsigned long flag;
1602 
1603     pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
1604 
1605     /* locking hierarchy: cqs lock first, then qp lock. */
1606     spin_lock_irqsave(&rchp->lock, flag);
1607     if (schp != rchp)
1608         spin_lock(&schp->lock);
1609     spin_lock(&qhp->lock);
1610 
1611     if (qhp->wq.flushed) {
1612         spin_unlock(&qhp->lock);
1613         if (schp != rchp)
1614             spin_unlock(&schp->lock);
1615         spin_unlock_irqrestore(&rchp->lock, flag);
1616         return;
1617     }
1618     qhp->wq.flushed = 1;
1619     t4_set_wq_in_error(&qhp->wq, 0);
1620 
1621     c4iw_flush_hw_cq(rchp, qhp);
1622     if (!qhp->srq) {
1623         c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
1624         rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
1625     }
1626 
1627     if (schp != rchp)
1628         c4iw_flush_hw_cq(schp, qhp);
1629     sq_flushed = c4iw_flush_sq(qhp);
1630 
1631     spin_unlock(&qhp->lock);
1632     if (schp != rchp)
1633         spin_unlock(&schp->lock);
1634     spin_unlock_irqrestore(&rchp->lock, flag);
1635 
1636     if (schp == rchp) {
1637         if ((rq_flushed || sq_flushed) &&
1638             t4_clear_cq_armed(&rchp->cq)) {
1639             spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1640             (*rchp->ibcq.comp_handler)(&rchp->ibcq,
1641                            rchp->ibcq.cq_context);
1642             spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1643         }
1644     } else {
1645         if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
1646             spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1647             (*rchp->ibcq.comp_handler)(&rchp->ibcq,
1648                            rchp->ibcq.cq_context);
1649             spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1650         }
1651         if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
1652             spin_lock_irqsave(&schp->comp_handler_lock, flag);
1653             (*schp->ibcq.comp_handler)(&schp->ibcq,
1654                            schp->ibcq.cq_context);
1655             spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1656         }
1657     }
1658 }
1659 
1660 static void flush_qp(struct c4iw_qp *qhp)
1661 {
1662     struct c4iw_cq *rchp, *schp;
1663     unsigned long flag;
1664 
1665     rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
1666     schp = to_c4iw_cq(qhp->ibqp.send_cq);
1667 
1668     if (qhp->ibqp.uobject) {
1669 
1670         /* for user qps, qhp->wq.flushed is protected by qhp->mutex */
1671         if (qhp->wq.flushed)
1672             return;
1673 
1674         qhp->wq.flushed = 1;
1675         t4_set_wq_in_error(&qhp->wq, 0);
1676         t4_set_cq_in_error(&rchp->cq);
1677         spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1678         (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
1679         spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1680         if (schp != rchp) {
1681             t4_set_cq_in_error(&schp->cq);
1682             spin_lock_irqsave(&schp->comp_handler_lock, flag);
1683             (*schp->ibcq.comp_handler)(&schp->ibcq,
1684                     schp->ibcq.cq_context);
1685             spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1686         }
1687         return;
1688     }
1689     __flush_qp(qhp, rchp, schp);
1690 }
1691 
1692 static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1693              struct c4iw_ep *ep)
1694 {
1695     struct fw_ri_wr *wqe;
1696     int ret;
1697     struct sk_buff *skb;
1698 
1699     pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, ep->hwtid);
1700 
1701     skb = skb_dequeue(&ep->com.ep_skb_list);
1702     if (WARN_ON(!skb))
1703         return -ENOMEM;
1704 
1705     set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1706 
1707     wqe = __skb_put_zero(skb, sizeof(*wqe));
1708     wqe->op_compl = cpu_to_be32(
1709         FW_WR_OP_V(FW_RI_INIT_WR) |
1710         FW_WR_COMPL_F);
1711     wqe->flowid_len16 = cpu_to_be32(
1712         FW_WR_FLOWID_V(ep->hwtid) |
1713         FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1714     wqe->cookie = (uintptr_t)ep->com.wr_waitp;
1715 
1716     wqe->u.fini.type = FW_RI_TYPE_FINI;
1717 
1718     ret = c4iw_ref_send_wait(&rhp->rdev, skb, ep->com.wr_waitp,
1719                  qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
1720 
1721     pr_debug("ret %d\n", ret);
1722     return ret;
1723 }
1724 
1725 static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
1726 {
1727     pr_debug("p2p_type = %d\n", p2p_type);
1728     memset(&init->u, 0, sizeof(init->u));
1729     switch (p2p_type) {
1730     case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
1731         init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
1732         init->u.write.stag_sink = cpu_to_be32(1);
1733         init->u.write.to_sink = cpu_to_be64(1);
1734         init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
1735         init->u.write.len16 = DIV_ROUND_UP(
1736             sizeof(init->u.write) + sizeof(struct fw_ri_immd), 16);
1737         break;
1738     case FW_RI_INIT_P2PTYPE_READ_REQ:
1739         init->u.write.opcode = FW_RI_RDMA_READ_WR;
1740         init->u.read.stag_src = cpu_to_be32(1);
1741         init->u.read.to_src_lo = cpu_to_be32(1);
1742         init->u.read.stag_sink = cpu_to_be32(1);
1743         init->u.read.to_sink_lo = cpu_to_be32(1);
1744         init->u.read.len16 = DIV_ROUND_UP(sizeof(init->u.read), 16);
1745         break;
1746     }
1747 }
1748 
1749 static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
1750 {
1751     struct fw_ri_wr *wqe;
1752     int ret;
1753     struct sk_buff *skb;
1754 
1755     pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp,
1756          qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
1757 
1758     skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
1759     if (!skb) {
1760         ret = -ENOMEM;
1761         goto out;
1762     }
1763     ret = alloc_ird(rhp, qhp->attr.max_ird);
1764     if (ret) {
1765         qhp->attr.max_ird = 0;
1766         kfree_skb(skb);
1767         goto out;
1768     }
1769     set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
1770 
1771     wqe = __skb_put_zero(skb, sizeof(*wqe));
1772     wqe->op_compl = cpu_to_be32(
1773         FW_WR_OP_V(FW_RI_INIT_WR) |
1774         FW_WR_COMPL_F);
1775     wqe->flowid_len16 = cpu_to_be32(
1776         FW_WR_FLOWID_V(qhp->ep->hwtid) |
1777         FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1778 
1779     wqe->cookie = (uintptr_t)qhp->ep->com.wr_waitp;
1780 
1781     wqe->u.init.type = FW_RI_TYPE_INIT;
1782     wqe->u.init.mpareqbit_p2ptype =
1783         FW_RI_WR_MPAREQBIT_V(qhp->attr.mpa_attr.initiator) |
1784         FW_RI_WR_P2PTYPE_V(qhp->attr.mpa_attr.p2p_type);
1785     wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE;
1786     if (qhp->attr.mpa_attr.recv_marker_enabled)
1787         wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE;
1788     if (qhp->attr.mpa_attr.xmit_marker_enabled)
1789         wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE;
1790     if (qhp->attr.mpa_attr.crc_enabled)
1791         wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE;
1792 
1793     wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE |
1794                 FW_RI_QP_RDMA_WRITE_ENABLE |
1795                 FW_RI_QP_BIND_ENABLE;
1796     if (!qhp->ibqp.uobject)
1797         wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE |
1798                      FW_RI_QP_STAG0_ENABLE;
1799     wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq));
1800     wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
1801     wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
1802     wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
1803     if (qhp->srq) {
1804         wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
1805                           qhp->srq->idx);
1806     } else {
1807         wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
1808         wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
1809         wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
1810                            rhp->rdev.lldi.vr->rq.start);
1811     }
1812     wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
1813     wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
1814     wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
1815     wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
1816     wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
1817     wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
1818     if (qhp->attr.mpa_attr.initiator)
1819         build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
1820 
1821     ret = c4iw_ref_send_wait(&rhp->rdev, skb, qhp->ep->com.wr_waitp,
1822                  qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
1823     if (!ret)
1824         goto out;
1825 
1826     free_ird(rhp, qhp->attr.max_ird);
1827 out:
1828     pr_debug("ret %d\n", ret);
1829     return ret;
1830 }
1831 
1832 int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1833            enum c4iw_qp_attr_mask mask,
1834            struct c4iw_qp_attributes *attrs,
1835            int internal)
1836 {
1837     int ret = 0;
1838     struct c4iw_qp_attributes newattr = qhp->attr;
1839     int disconnect = 0;
1840     int terminate = 0;
1841     int abort = 0;
1842     int free = 0;
1843     struct c4iw_ep *ep = NULL;
1844 
1845     pr_debug("qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n",
1846          qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
1847          (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
1848 
1849     mutex_lock(&qhp->mutex);
1850 
1851     /* Process attr changes if in IDLE */
1852     if (mask & C4IW_QP_ATTR_VALID_MODIFY) {
1853         if (qhp->attr.state != C4IW_QP_STATE_IDLE) {
1854             ret = -EIO;
1855             goto out;
1856         }
1857         if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ)
1858             newattr.enable_rdma_read = attrs->enable_rdma_read;
1859         if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE)
1860             newattr.enable_rdma_write = attrs->enable_rdma_write;
1861         if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND)
1862             newattr.enable_bind = attrs->enable_bind;
1863         if (mask & C4IW_QP_ATTR_MAX_ORD) {
1864             if (attrs->max_ord > c4iw_max_read_depth) {
1865                 ret = -EINVAL;
1866                 goto out;
1867             }
1868             newattr.max_ord = attrs->max_ord;
1869         }
1870         if (mask & C4IW_QP_ATTR_MAX_IRD) {
1871             if (attrs->max_ird > cur_max_read_depth(rhp)) {
1872                 ret = -EINVAL;
1873                 goto out;
1874             }
1875             newattr.max_ird = attrs->max_ird;
1876         }
1877         qhp->attr = newattr;
1878     }
1879 
1880     if (mask & C4IW_QP_ATTR_SQ_DB) {
1881         ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc);
1882         goto out;
1883     }
1884     if (mask & C4IW_QP_ATTR_RQ_DB) {
1885         ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc);
1886         goto out;
1887     }
1888 
1889     if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
1890         goto out;
1891     if (qhp->attr.state == attrs->next_state)
1892         goto out;
1893 
1894     switch (qhp->attr.state) {
1895     case C4IW_QP_STATE_IDLE:
1896         switch (attrs->next_state) {
1897         case C4IW_QP_STATE_RTS:
1898             if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) {
1899                 ret = -EINVAL;
1900                 goto out;
1901             }
1902             if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) {
1903                 ret = -EINVAL;
1904                 goto out;
1905             }
1906             qhp->attr.mpa_attr = attrs->mpa_attr;
1907             qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
1908             qhp->ep = qhp->attr.llp_stream_handle;
1909             set_state(qhp, C4IW_QP_STATE_RTS);
1910 
1911             /*
1912              * Ref the endpoint here and deref when we
1913              * disassociate the endpoint from the QP.  This
1914              * happens in CLOSING->IDLE transition or *->ERROR
1915              * transition.
1916              */
1917             c4iw_get_ep(&qhp->ep->com);
1918             ret = rdma_init(rhp, qhp);
1919             if (ret)
1920                 goto err;
1921             break;
1922         case C4IW_QP_STATE_ERROR:
1923             set_state(qhp, C4IW_QP_STATE_ERROR);
1924             flush_qp(qhp);
1925             break;
1926         default:
1927             ret = -EINVAL;
1928             goto out;
1929         }
1930         break;
1931     case C4IW_QP_STATE_RTS:
1932         switch (attrs->next_state) {
1933         case C4IW_QP_STATE_CLOSING:
1934             t4_set_wq_in_error(&qhp->wq, 0);
1935             set_state(qhp, C4IW_QP_STATE_CLOSING);
1936             ep = qhp->ep;
1937             if (!internal) {
1938                 abort = 0;
1939                 disconnect = 1;
1940                 c4iw_get_ep(&qhp->ep->com);
1941             }
1942             ret = rdma_fini(rhp, qhp, ep);
1943             if (ret)
1944                 goto err;
1945             break;
1946         case C4IW_QP_STATE_TERMINATE:
1947             t4_set_wq_in_error(&qhp->wq, 0);
1948             set_state(qhp, C4IW_QP_STATE_TERMINATE);
1949             qhp->attr.layer_etype = attrs->layer_etype;
1950             qhp->attr.ecode = attrs->ecode;
1951             ep = qhp->ep;
1952             if (!internal) {
1953                 c4iw_get_ep(&ep->com);
1954                 terminate = 1;
1955                 disconnect = 1;
1956             } else {
1957                 terminate = qhp->attr.send_term;
1958                 ret = rdma_fini(rhp, qhp, ep);
1959                 if (ret)
1960                     goto err;
1961             }
1962             break;
1963         case C4IW_QP_STATE_ERROR:
1964             t4_set_wq_in_error(&qhp->wq, 0);
1965             set_state(qhp, C4IW_QP_STATE_ERROR);
1966             if (!internal) {
1967                 disconnect = 1;
1968                 ep = qhp->ep;
1969                 c4iw_get_ep(&qhp->ep->com);
1970             }
1971             goto err;
1972             break;
1973         default:
1974             ret = -EINVAL;
1975             goto out;
1976         }
1977         break;
1978     case C4IW_QP_STATE_CLOSING:
1979 
1980         /*
1981          * Allow kernel users to move to ERROR for qp draining.
1982          */
1983         if (!internal && (qhp->ibqp.uobject || attrs->next_state !=
1984                   C4IW_QP_STATE_ERROR)) {
1985             ret = -EINVAL;
1986             goto out;
1987         }
1988         switch (attrs->next_state) {
1989         case C4IW_QP_STATE_IDLE:
1990             flush_qp(qhp);
1991             set_state(qhp, C4IW_QP_STATE_IDLE);
1992             qhp->attr.llp_stream_handle = NULL;
1993             c4iw_put_ep(&qhp->ep->com);
1994             qhp->ep = NULL;
1995             wake_up(&qhp->wait);
1996             break;
1997         case C4IW_QP_STATE_ERROR:
1998             goto err;
1999         default:
2000             ret = -EINVAL;
2001             goto err;
2002         }
2003         break;
2004     case C4IW_QP_STATE_ERROR:
2005         if (attrs->next_state != C4IW_QP_STATE_IDLE) {
2006             ret = -EINVAL;
2007             goto out;
2008         }
2009         if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) {
2010             ret = -EINVAL;
2011             goto out;
2012         }
2013         set_state(qhp, C4IW_QP_STATE_IDLE);
2014         break;
2015     case C4IW_QP_STATE_TERMINATE:
2016         if (!internal) {
2017             ret = -EINVAL;
2018             goto out;
2019         }
2020         goto err;
2021         break;
2022     default:
2023         pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
2024         ret = -EINVAL;
2025         goto err;
2026         break;
2027     }
2028     goto out;
2029 err:
2030     pr_debug("disassociating ep %p qpid 0x%x\n", qhp->ep,
2031          qhp->wq.sq.qid);
2032 
2033     /* disassociate the LLP connection */
2034     qhp->attr.llp_stream_handle = NULL;
2035     if (!ep)
2036         ep = qhp->ep;
2037     qhp->ep = NULL;
2038     set_state(qhp, C4IW_QP_STATE_ERROR);
2039     free = 1;
2040     abort = 1;
2041     flush_qp(qhp);
2042     wake_up(&qhp->wait);
2043 out:
2044     mutex_unlock(&qhp->mutex);
2045 
2046     if (terminate)
2047         post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL);
2048 
2049     /*
2050      * If disconnect is 1, then we need to initiate a disconnect
2051      * on the EP.  This can be a normal close (RTS->CLOSING) or
2052      * an abnormal close (RTS/CLOSING->ERROR).
2053      */
2054     if (disconnect) {
2055         c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC :
2056                              GFP_KERNEL);
2057         c4iw_put_ep(&ep->com);
2058     }
2059 
2060     /*
2061      * If free is 1, then we've disassociated the EP from the QP
2062      * and we need to dereference the EP.
2063      */
2064     if (free)
2065         c4iw_put_ep(&ep->com);
2066     pr_debug("exit state %d\n", qhp->attr.state);
2067     return ret;
2068 }
2069 
2070 int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
2071 {
2072     struct c4iw_dev *rhp;
2073     struct c4iw_qp *qhp;
2074     struct c4iw_ucontext *ucontext;
2075     struct c4iw_qp_attributes attrs;
2076 
2077     qhp = to_c4iw_qp(ib_qp);
2078     rhp = qhp->rhp;
2079     ucontext = qhp->ucontext;
2080 
2081     attrs.next_state = C4IW_QP_STATE_ERROR;
2082     if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
2083         c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2084     else
2085         c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
2086     wait_event(qhp->wait, !qhp->ep);
2087 
2088     xa_lock_irq(&rhp->qps);
2089     __xa_erase(&rhp->qps, qhp->wq.sq.qid);
2090     if (!list_empty(&qhp->db_fc_entry))
2091         list_del_init(&qhp->db_fc_entry);
2092     xa_unlock_irq(&rhp->qps);
2093     free_ird(rhp, qhp->attr.max_ird);
2094 
2095     c4iw_qp_rem_ref(ib_qp);
2096 
2097     wait_for_completion(&qhp->qp_rel_comp);
2098 
2099     pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid);
2100     pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
2101 
2102     destroy_qp(&rhp->rdev, &qhp->wq,
2103            ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
2104 
2105     c4iw_put_wr_wait(qhp->wr_waitp);
2106     return 0;
2107 }
2108 
2109 int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
2110            struct ib_udata *udata)
2111 {
2112     struct ib_pd *pd = qp->pd;
2113     struct c4iw_dev *rhp;
2114     struct c4iw_qp *qhp = to_c4iw_qp(qp);
2115     struct c4iw_pd *php;
2116     struct c4iw_cq *schp;
2117     struct c4iw_cq *rchp;
2118     struct c4iw_create_qp_resp uresp;
2119     unsigned int sqsize, rqsize = 0;
2120     struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
2121         udata, struct c4iw_ucontext, ibucontext);
2122     int ret;
2123     struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
2124     struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
2125 
2126     if (attrs->qp_type != IB_QPT_RC || attrs->create_flags)
2127         return -EOPNOTSUPP;
2128 
2129     php = to_c4iw_pd(pd);
2130     rhp = php->rhp;
2131     schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
2132     rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
2133     if (!schp || !rchp)
2134         return -EINVAL;
2135 
2136     if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
2137         return -EINVAL;
2138 
2139     if (!attrs->srq) {
2140         if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
2141             return -E2BIG;
2142         rqsize = attrs->cap.max_recv_wr + 1;
2143         if (rqsize < 8)
2144             rqsize = 8;
2145     }
2146 
2147     if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
2148         return -E2BIG;
2149     sqsize = attrs->cap.max_send_wr + 1;
2150     if (sqsize < 8)
2151         sqsize = 8;
2152 
2153     qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
2154     if (!qhp->wr_waitp)
2155         return -ENOMEM;
2156 
2157     qhp->wq.sq.size = sqsize;
2158     qhp->wq.sq.memsize =
2159         (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2160         sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
2161     qhp->wq.sq.flush_cidx = -1;
2162     if (!attrs->srq) {
2163         qhp->wq.rq.size = rqsize;
2164         qhp->wq.rq.memsize =
2165             (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2166             sizeof(*qhp->wq.rq.queue);
2167     }
2168 
2169     if (ucontext) {
2170         qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
2171         if (!attrs->srq)
2172             qhp->wq.rq.memsize =
2173                 roundup(qhp->wq.rq.memsize, PAGE_SIZE);
2174     }
2175 
2176     ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
2177             ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2178             qhp->wr_waitp, !attrs->srq);
2179     if (ret)
2180         goto err_free_wr_wait;
2181 
2182     attrs->cap.max_recv_wr = rqsize - 1;
2183     attrs->cap.max_send_wr = sqsize - 1;
2184     attrs->cap.max_inline_data = T4_MAX_SEND_INLINE;
2185 
2186     qhp->rhp = rhp;
2187     qhp->attr.pd = php->pdid;
2188     qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
2189     qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
2190     qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
2191     qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
2192     qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
2193     if (!attrs->srq) {
2194         qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
2195         qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
2196     }
2197     qhp->attr.state = C4IW_QP_STATE_IDLE;
2198     qhp->attr.next_state = C4IW_QP_STATE_IDLE;
2199     qhp->attr.enable_rdma_read = 1;
2200     qhp->attr.enable_rdma_write = 1;
2201     qhp->attr.enable_bind = 1;
2202     qhp->attr.max_ord = 0;
2203     qhp->attr.max_ird = 0;
2204     qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
2205     spin_lock_init(&qhp->lock);
2206     mutex_init(&qhp->mutex);
2207     init_waitqueue_head(&qhp->wait);
2208     init_completion(&qhp->qp_rel_comp);
2209     refcount_set(&qhp->qp_refcnt, 1);
2210 
2211     ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL);
2212     if (ret)
2213         goto err_destroy_qp;
2214 
2215     if (udata && ucontext) {
2216         sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL);
2217         if (!sq_key_mm) {
2218             ret = -ENOMEM;
2219             goto err_remove_handle;
2220         }
2221         if (!attrs->srq) {
2222             rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
2223             if (!rq_key_mm) {
2224                 ret = -ENOMEM;
2225                 goto err_free_sq_key;
2226             }
2227         }
2228         sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
2229         if (!sq_db_key_mm) {
2230             ret = -ENOMEM;
2231             goto err_free_rq_key;
2232         }
2233         if (!attrs->srq) {
2234             rq_db_key_mm =
2235                 kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
2236             if (!rq_db_key_mm) {
2237                 ret = -ENOMEM;
2238                 goto err_free_sq_db_key;
2239             }
2240         }
2241         memset(&uresp, 0, sizeof(uresp));
2242         if (t4_sq_onchip(&qhp->wq.sq)) {
2243             ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
2244                          GFP_KERNEL);
2245             if (!ma_sync_key_mm) {
2246                 ret = -ENOMEM;
2247                 goto err_free_rq_db_key;
2248             }
2249             uresp.flags = C4IW_QPF_ONCHIP;
2250         }
2251         if (rhp->rdev.lldi.write_w_imm_support)
2252             uresp.flags |= C4IW_QPF_WRITE_W_IMM;
2253         uresp.qid_mask = rhp->rdev.qpmask;
2254         uresp.sqid = qhp->wq.sq.qid;
2255         uresp.sq_size = qhp->wq.sq.size;
2256         uresp.sq_memsize = qhp->wq.sq.memsize;
2257         if (!attrs->srq) {
2258             uresp.rqid = qhp->wq.rq.qid;
2259             uresp.rq_size = qhp->wq.rq.size;
2260             uresp.rq_memsize = qhp->wq.rq.memsize;
2261         }
2262         spin_lock(&ucontext->mmap_lock);
2263         if (ma_sync_key_mm) {
2264             uresp.ma_sync_key = ucontext->key;
2265             ucontext->key += PAGE_SIZE;
2266         }
2267         uresp.sq_key = ucontext->key;
2268         ucontext->key += PAGE_SIZE;
2269         if (!attrs->srq) {
2270             uresp.rq_key = ucontext->key;
2271             ucontext->key += PAGE_SIZE;
2272         }
2273         uresp.sq_db_gts_key = ucontext->key;
2274         ucontext->key += PAGE_SIZE;
2275         if (!attrs->srq) {
2276             uresp.rq_db_gts_key = ucontext->key;
2277             ucontext->key += PAGE_SIZE;
2278         }
2279         spin_unlock(&ucontext->mmap_lock);
2280         ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
2281         if (ret)
2282             goto err_free_ma_sync_key;
2283         sq_key_mm->key = uresp.sq_key;
2284         sq_key_mm->addr = qhp->wq.sq.phys_addr;
2285         sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
2286         insert_mmap(ucontext, sq_key_mm);
2287         if (!attrs->srq) {
2288             rq_key_mm->key = uresp.rq_key;
2289             rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
2290             rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
2291             insert_mmap(ucontext, rq_key_mm);
2292         }
2293         sq_db_key_mm->key = uresp.sq_db_gts_key;
2294         sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
2295         sq_db_key_mm->len = PAGE_SIZE;
2296         insert_mmap(ucontext, sq_db_key_mm);
2297         if (!attrs->srq) {
2298             rq_db_key_mm->key = uresp.rq_db_gts_key;
2299             rq_db_key_mm->addr =
2300                 (u64)(unsigned long)qhp->wq.rq.bar2_pa;
2301             rq_db_key_mm->len = PAGE_SIZE;
2302             insert_mmap(ucontext, rq_db_key_mm);
2303         }
2304         if (ma_sync_key_mm) {
2305             ma_sync_key_mm->key = uresp.ma_sync_key;
2306             ma_sync_key_mm->addr =
2307                 (pci_resource_start(rhp->rdev.lldi.pdev, 0) +
2308                 PCIE_MA_SYNC_A) & PAGE_MASK;
2309             ma_sync_key_mm->len = PAGE_SIZE;
2310             insert_mmap(ucontext, ma_sync_key_mm);
2311         }
2312 
2313         qhp->ucontext = ucontext;
2314     }
2315     if (!attrs->srq) {
2316         qhp->wq.qp_errp =
2317             &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
2318     } else {
2319         qhp->wq.qp_errp =
2320             &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
2321         qhp->wq.srqidxp =
2322             &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
2323     }
2324 
2325     qhp->ibqp.qp_num = qhp->wq.sq.qid;
2326     if (attrs->srq)
2327         qhp->srq = to_c4iw_srq(attrs->srq);
2328     INIT_LIST_HEAD(&qhp->db_fc_entry);
2329     pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
2330          qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
2331          attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
2332          qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
2333     return 0;
2334 err_free_ma_sync_key:
2335     kfree(ma_sync_key_mm);
2336 err_free_rq_db_key:
2337     if (!attrs->srq)
2338         kfree(rq_db_key_mm);
2339 err_free_sq_db_key:
2340     kfree(sq_db_key_mm);
2341 err_free_rq_key:
2342     if (!attrs->srq)
2343         kfree(rq_key_mm);
2344 err_free_sq_key:
2345     kfree(sq_key_mm);
2346 err_remove_handle:
2347     xa_erase_irq(&rhp->qps, qhp->wq.sq.qid);
2348 err_destroy_qp:
2349     destroy_qp(&rhp->rdev, &qhp->wq,
2350            ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
2351 err_free_wr_wait:
2352     c4iw_put_wr_wait(qhp->wr_waitp);
2353     return ret;
2354 }
2355 
2356 int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2357               int attr_mask, struct ib_udata *udata)
2358 {
2359     struct c4iw_dev *rhp;
2360     struct c4iw_qp *qhp;
2361     enum c4iw_qp_attr_mask mask = 0;
2362     struct c4iw_qp_attributes attrs = {};
2363 
2364     pr_debug("ib_qp %p\n", ibqp);
2365 
2366     if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
2367         return -EOPNOTSUPP;
2368 
2369     /* iwarp does not support the RTR state */
2370     if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
2371         attr_mask &= ~IB_QP_STATE;
2372 
2373     /* Make sure we still have something left to do */
2374     if (!attr_mask)
2375         return 0;
2376 
2377     qhp = to_c4iw_qp(ibqp);
2378     rhp = qhp->rhp;
2379 
2380     attrs.next_state = c4iw_convert_state(attr->qp_state);
2381     attrs.enable_rdma_read = (attr->qp_access_flags &
2382                    IB_ACCESS_REMOTE_READ) ?  1 : 0;
2383     attrs.enable_rdma_write = (attr->qp_access_flags &
2384                 IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2385     attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
2386 
2387 
2388     mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0;
2389     mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
2390             (C4IW_QP_ATTR_ENABLE_RDMA_READ |
2391              C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
2392              C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
2393 
2394     /*
2395      * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
2396      * ringing the queue db when we're in DB_FULL mode.
2397      * Only allow this on T4 devices.
2398      */
2399     attrs.sq_db_inc = attr->sq_psn;
2400     attrs.rq_db_inc = attr->rq_psn;
2401     mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
2402     mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
2403     if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
2404         (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
2405         return -EINVAL;
2406 
2407     return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
2408 }
2409 
2410 struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
2411 {
2412     pr_debug("ib_dev %p qpn 0x%x\n", dev, qpn);
2413     return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
2414 }
2415 
2416 void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
2417 {
2418     struct ib_event event = {};
2419 
2420     event.device = &srq->rhp->ibdev;
2421     event.element.srq = &srq->ibsrq;
2422     event.event = IB_EVENT_SRQ_LIMIT_REACHED;
2423     ib_dispatch_event(&event);
2424 }
2425 
2426 int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
2427             enum ib_srq_attr_mask srq_attr_mask,
2428             struct ib_udata *udata)
2429 {
2430     struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
2431     int ret = 0;
2432 
2433     /*
2434      * XXX 0 mask == a SW interrupt for srq_limit reached...
2435      */
2436     if (udata && !srq_attr_mask) {
2437         c4iw_dispatch_srq_limit_reached_event(srq);
2438         goto out;
2439     }
2440 
2441     /* no support for this yet */
2442     if (srq_attr_mask & IB_SRQ_MAX_WR) {
2443         ret = -EINVAL;
2444         goto out;
2445     }
2446 
2447     if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
2448         srq->armed = true;
2449         srq->srq_limit = attr->srq_limit;
2450     }
2451 out:
2452     return ret;
2453 }
2454 
2455 int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2456              int attr_mask, struct ib_qp_init_attr *init_attr)
2457 {
2458     struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
2459 
2460     memset(attr, 0, sizeof(*attr));
2461     memset(init_attr, 0, sizeof(*init_attr));
2462     attr->qp_state = to_ib_qp_state(qhp->attr.state);
2463     attr->cur_qp_state = to_ib_qp_state(qhp->attr.state);
2464     init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
2465     init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
2466     init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
2467     init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges;
2468     init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
2469     init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
2470     return 0;
2471 }
2472 
2473 static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2474                struct c4iw_wr_wait *wr_waitp)
2475 {
2476     struct c4iw_rdev *rdev = &srq->rhp->rdev;
2477     struct sk_buff *skb = srq->destroy_skb;
2478     struct t4_srq *wq = &srq->wq;
2479     struct fw_ri_res_wr *res_wr;
2480     struct fw_ri_res *res;
2481     int wr_len;
2482 
2483     wr_len = sizeof(*res_wr) + sizeof(*res);
2484     set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
2485 
2486     res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
2487     memset(res_wr, 0, wr_len);
2488     res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
2489             FW_RI_RES_WR_NRES_V(1) |
2490             FW_WR_COMPL_F);
2491     res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
2492     res_wr->cookie = (uintptr_t)wr_waitp;
2493     res = res_wr->res;
2494     res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
2495     res->u.srq.op = FW_RI_RES_OP_RESET;
2496     res->u.srq.srqid = cpu_to_be32(srq->idx);
2497     res->u.srq.eqid = cpu_to_be32(wq->qid);
2498 
2499     c4iw_init_wr_wait(wr_waitp);
2500     c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
2501 
2502     dma_free_coherent(&rdev->lldi.pdev->dev,
2503               wq->memsize, wq->queue,
2504             dma_unmap_addr(wq, mapping));
2505     c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
2506     kfree(wq->sw_rq);
2507     c4iw_put_qpid(rdev, wq->qid, uctx);
2508 }
2509 
2510 static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2511                struct c4iw_wr_wait *wr_waitp)
2512 {
2513     struct c4iw_rdev *rdev = &srq->rhp->rdev;
2514     int user = (uctx != &rdev->uctx);
2515     struct t4_srq *wq = &srq->wq;
2516     struct fw_ri_res_wr *res_wr;
2517     struct fw_ri_res *res;
2518     struct sk_buff *skb;
2519     int wr_len;
2520     int eqsize;
2521     int ret = -ENOMEM;
2522 
2523     wq->qid = c4iw_get_qpid(rdev, uctx);
2524     if (!wq->qid)
2525         goto err;
2526 
2527     if (!user) {
2528         wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
2529                     GFP_KERNEL);
2530         if (!wq->sw_rq)
2531             goto err_put_qpid;
2532         wq->pending_wrs = kcalloc(srq->wq.size,
2533                       sizeof(*srq->wq.pending_wrs),
2534                       GFP_KERNEL);
2535         if (!wq->pending_wrs)
2536             goto err_free_sw_rq;
2537     }
2538 
2539     wq->rqt_size = wq->size;
2540     wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
2541     if (!wq->rqt_hwaddr)
2542         goto err_free_pending_wrs;
2543     wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
2544         T4_RQT_ENTRY_SHIFT;
2545 
2546     wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, wq->memsize,
2547                        &wq->dma_addr, GFP_KERNEL);
2548     if (!wq->queue)
2549         goto err_free_rqtpool;
2550 
2551     dma_unmap_addr_set(wq, mapping, wq->dma_addr);
2552 
2553     wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
2554                       &wq->bar2_qid,
2555             user ? &wq->bar2_pa : NULL);
2556 
2557     /*
2558      * User mode must have bar2 access.
2559      */
2560 
2561     if (user && !wq->bar2_va) {
2562         pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
2563             pci_name(rdev->lldi.pdev), wq->qid);
2564         ret = -EINVAL;
2565         goto err_free_queue;
2566     }
2567 
2568     /* build fw_ri_res_wr */
2569     wr_len = sizeof(*res_wr) + sizeof(*res);
2570 
2571     skb = alloc_skb(wr_len, GFP_KERNEL);
2572     if (!skb)
2573         goto err_free_queue;
2574     set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
2575 
2576     res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
2577     memset(res_wr, 0, wr_len);
2578     res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
2579             FW_RI_RES_WR_NRES_V(1) |
2580             FW_WR_COMPL_F);
2581     res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
2582     res_wr->cookie = (uintptr_t)wr_waitp;
2583     res = res_wr->res;
2584     res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
2585     res->u.srq.op = FW_RI_RES_OP_WRITE;
2586 
2587     /*
2588      * eqsize is the number of 64B entries plus the status page size.
2589      */
2590     eqsize = wq->size * T4_RQ_NUM_SLOTS +
2591         rdev->hw_queue.t4_eq_status_entries;
2592     res->u.srq.eqid = cpu_to_be32(wq->qid);
2593     res->u.srq.fetchszm_to_iqid =
2594                         /* no host cidx updates */
2595         cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
2596         FW_RI_RES_WR_CPRIO_V(0) |       /* don't keep in chip cache */
2597         FW_RI_RES_WR_PCIECHN_V(0) |     /* set by uP at ri_init time */
2598         FW_RI_RES_WR_FETCHRO_V(0));     /* relaxed_ordering */
2599     res->u.srq.dcaen_to_eqsize =
2600         cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
2601         FW_RI_RES_WR_DCACPU_V(0) |
2602         FW_RI_RES_WR_FBMIN_V(2) |
2603         FW_RI_RES_WR_FBMAX_V(3) |
2604         FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
2605         FW_RI_RES_WR_CIDXFTHRESH_V(0) |
2606         FW_RI_RES_WR_EQSIZE_V(eqsize));
2607     res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
2608     res->u.srq.srqid = cpu_to_be32(srq->idx);
2609     res->u.srq.pdid = cpu_to_be32(srq->pdid);
2610     res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
2611     res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
2612             rdev->lldi.vr->rq.start);
2613 
2614     c4iw_init_wr_wait(wr_waitp);
2615 
2616     ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
2617     if (ret)
2618         goto err_free_queue;
2619 
2620     pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
2621             " bar2_addr %p rqt addr 0x%x size %d\n",
2622             __func__, srq->idx, wq->qid, srq->pdid, wq->queue,
2623             (u64)virt_to_phys(wq->queue), wq->bar2_va,
2624             wq->rqt_hwaddr, wq->rqt_size);
2625 
2626     return 0;
2627 err_free_queue:
2628     dma_free_coherent(&rdev->lldi.pdev->dev,
2629               wq->memsize, wq->queue,
2630             dma_unmap_addr(wq, mapping));
2631 err_free_rqtpool:
2632     c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
2633 err_free_pending_wrs:
2634     if (!user)
2635         kfree(wq->pending_wrs);
2636 err_free_sw_rq:
2637     if (!user)
2638         kfree(wq->sw_rq);
2639 err_put_qpid:
2640     c4iw_put_qpid(rdev, wq->qid, uctx);
2641 err:
2642     return ret;
2643 }
2644 
2645 void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
2646 {
2647     u64 *src, *dst;
2648 
2649     src = (u64 *)wqe;
2650     dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
2651     while (len16) {
2652         *dst++ = *src++;
2653         if (dst >= (u64 *)&srq->queue[srq->size])
2654             dst = (u64 *)srq->queue;
2655         *dst++ = *src++;
2656         if (dst >= (u64 *)&srq->queue[srq->size])
2657             dst = (u64 *)srq->queue;
2658         len16--;
2659     }
2660 }
2661 
2662 int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
2663                    struct ib_udata *udata)
2664 {
2665     struct ib_pd *pd = ib_srq->pd;
2666     struct c4iw_dev *rhp;
2667     struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
2668     struct c4iw_pd *php;
2669     struct c4iw_create_srq_resp uresp;
2670     struct c4iw_ucontext *ucontext;
2671     struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
2672     int rqsize;
2673     int ret;
2674     int wr_len;
2675 
2676     if (attrs->srq_type != IB_SRQT_BASIC)
2677         return -EOPNOTSUPP;
2678 
2679     pr_debug("%s ib_pd %p\n", __func__, pd);
2680 
2681     php = to_c4iw_pd(pd);
2682     rhp = php->rhp;
2683 
2684     if (!rhp->rdev.lldi.vr->srq.size)
2685         return -EINVAL;
2686     if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
2687         return -E2BIG;
2688     if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
2689         return -E2BIG;
2690 
2691     /*
2692      * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
2693      */
2694     rqsize = attrs->attr.max_wr + 1;
2695     rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
2696 
2697     ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
2698                          ibucontext);
2699 
2700     srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
2701     if (!srq->wr_waitp)
2702         return -ENOMEM;
2703 
2704     srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
2705     if (srq->idx < 0) {
2706         ret = -ENOMEM;
2707         goto err_free_wr_wait;
2708     }
2709 
2710     wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
2711     srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
2712     if (!srq->destroy_skb) {
2713         ret = -ENOMEM;
2714         goto err_free_srq_idx;
2715     }
2716 
2717     srq->rhp = rhp;
2718     srq->pdid = php->pdid;
2719 
2720     srq->wq.size = rqsize;
2721     srq->wq.memsize =
2722         (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2723         sizeof(*srq->wq.queue);
2724     if (ucontext)
2725         srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
2726 
2727     ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
2728             &rhp->rdev.uctx, srq->wr_waitp);
2729     if (ret)
2730         goto err_free_skb;
2731     attrs->attr.max_wr = rqsize - 1;
2732 
2733     if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
2734         srq->flags = T4_SRQ_LIMIT_SUPPORT;
2735 
2736     if (udata) {
2737         srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
2738         if (!srq_key_mm) {
2739             ret = -ENOMEM;
2740             goto err_free_queue;
2741         }
2742         srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
2743         if (!srq_db_key_mm) {
2744             ret = -ENOMEM;
2745             goto err_free_srq_key_mm;
2746         }
2747         memset(&uresp, 0, sizeof(uresp));
2748         uresp.flags = srq->flags;
2749         uresp.qid_mask = rhp->rdev.qpmask;
2750         uresp.srqid = srq->wq.qid;
2751         uresp.srq_size = srq->wq.size;
2752         uresp.srq_memsize = srq->wq.memsize;
2753         uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
2754         spin_lock(&ucontext->mmap_lock);
2755         uresp.srq_key = ucontext->key;
2756         ucontext->key += PAGE_SIZE;
2757         uresp.srq_db_gts_key = ucontext->key;
2758         ucontext->key += PAGE_SIZE;
2759         spin_unlock(&ucontext->mmap_lock);
2760         ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
2761         if (ret)
2762             goto err_free_srq_db_key_mm;
2763         srq_key_mm->key = uresp.srq_key;
2764         srq_key_mm->addr = virt_to_phys(srq->wq.queue);
2765         srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
2766         insert_mmap(ucontext, srq_key_mm);
2767         srq_db_key_mm->key = uresp.srq_db_gts_key;
2768         srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
2769         srq_db_key_mm->len = PAGE_SIZE;
2770         insert_mmap(ucontext, srq_db_key_mm);
2771     }
2772 
2773     pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
2774          __func__, srq->wq.qid, srq->idx, srq->wq.size,
2775             (unsigned long)srq->wq.memsize, attrs->attr.max_wr);
2776 
2777     spin_lock_init(&srq->lock);
2778     return 0;
2779 
2780 err_free_srq_db_key_mm:
2781     kfree(srq_db_key_mm);
2782 err_free_srq_key_mm:
2783     kfree(srq_key_mm);
2784 err_free_queue:
2785     free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2786                srq->wr_waitp);
2787 err_free_skb:
2788     kfree_skb(srq->destroy_skb);
2789 err_free_srq_idx:
2790     c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2791 err_free_wr_wait:
2792     c4iw_put_wr_wait(srq->wr_waitp);
2793     return ret;
2794 }
2795 
2796 int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
2797 {
2798     struct c4iw_dev *rhp;
2799     struct c4iw_srq *srq;
2800     struct c4iw_ucontext *ucontext;
2801 
2802     srq = to_c4iw_srq(ibsrq);
2803     rhp = srq->rhp;
2804 
2805     pr_debug("%s id %d\n", __func__, srq->wq.qid);
2806     ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
2807                          ibucontext);
2808     free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2809                srq->wr_waitp);
2810     c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2811     c4iw_put_wr_wait(srq->wr_waitp);
2812     return 0;
2813 }