Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
0003  * Copyright (c) 2005 Cisco Systems. All rights reserved.
0004  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
0005  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
0006  *
0007  * This software is available to you under a choice of one of two
0008  * licenses.  You may choose to be licensed under the terms of the GNU
0009  * General Public License (GPL) Version 2, available from the file
0010  * COPYING in the main directory of this source tree, or the
0011  * OpenIB.org BSD license below:
0012  *
0013  *     Redistribution and use in source and binary forms, with or
0014  *     without modification, are permitted provided that the following
0015  *     conditions are met:
0016  *
0017  *      - Redistributions of source code must retain the above
0018  *        copyright notice, this list of conditions and the following
0019  *        disclaimer.
0020  *
0021  *      - Redistributions in binary form must reproduce the above
0022  *        copyright notice, this list of conditions and the following
0023  *        disclaimer in the documentation and/or other materials
0024  *        provided with the distribution.
0025  *
0026  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0027  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0028  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0029  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0030  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0031  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0032  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0033  * SOFTWARE.
0034  */
0035 
0036 #include <linux/string.h>
0037 #include <linux/slab.h>
0038 #include <linux/sched.h>
0039 
0040 #include <asm/io.h>
0041 
0042 #include <rdma/ib_verbs.h>
0043 #include <rdma/ib_cache.h>
0044 #include <rdma/ib_pack.h>
0045 #include <rdma/uverbs_ioctl.h>
0046 
0047 #include "mthca_dev.h"
0048 #include "mthca_cmd.h"
0049 #include "mthca_memfree.h"
0050 #include "mthca_wqe.h"
0051 
0052 enum {
0053     MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE,
0054     MTHCA_ACK_REQ_FREQ       = 10,
0055     MTHCA_FLIGHT_LIMIT       = 9,
0056     MTHCA_UD_HEADER_SIZE     = 72, /* largest UD header possible */
0057     MTHCA_INLINE_HEADER_SIZE = 4,  /* data segment overhead for inline */
0058     MTHCA_INLINE_CHUNK_SIZE  = 16  /* inline data segment chunk */
0059 };
0060 
0061 enum {
0062     MTHCA_QP_STATE_RST  = 0,
0063     MTHCA_QP_STATE_INIT = 1,
0064     MTHCA_QP_STATE_RTR  = 2,
0065     MTHCA_QP_STATE_RTS  = 3,
0066     MTHCA_QP_STATE_SQE  = 4,
0067     MTHCA_QP_STATE_SQD  = 5,
0068     MTHCA_QP_STATE_ERR  = 6,
0069     MTHCA_QP_STATE_DRAINING = 7
0070 };
0071 
0072 enum {
0073     MTHCA_QP_ST_RC  = 0x0,
0074     MTHCA_QP_ST_UC  = 0x1,
0075     MTHCA_QP_ST_RD  = 0x2,
0076     MTHCA_QP_ST_UD  = 0x3,
0077     MTHCA_QP_ST_MLX = 0x7
0078 };
0079 
0080 enum {
0081     MTHCA_QP_PM_MIGRATED = 0x3,
0082     MTHCA_QP_PM_ARMED    = 0x0,
0083     MTHCA_QP_PM_REARM    = 0x1
0084 };
0085 
0086 enum {
0087     /* qp_context flags */
0088     MTHCA_QP_BIT_DE  = 1 <<  8,
0089     /* params1 */
0090     MTHCA_QP_BIT_SRE = 1 << 15,
0091     MTHCA_QP_BIT_SWE = 1 << 14,
0092     MTHCA_QP_BIT_SAE = 1 << 13,
0093     MTHCA_QP_BIT_SIC = 1 <<  4,
0094     MTHCA_QP_BIT_SSC = 1 <<  3,
0095     /* params2 */
0096     MTHCA_QP_BIT_RRE = 1 << 15,
0097     MTHCA_QP_BIT_RWE = 1 << 14,
0098     MTHCA_QP_BIT_RAE = 1 << 13,
0099     MTHCA_QP_BIT_RIC = 1 <<  4,
0100     MTHCA_QP_BIT_RSC = 1 <<  3
0101 };
0102 
0103 enum {
0104     MTHCA_SEND_DOORBELL_FENCE = 1 << 5
0105 };
0106 
0107 struct mthca_qp_path {
0108     __be32 port_pkey;
0109     u8     rnr_retry;
0110     u8     g_mylmc;
0111     __be16 rlid;
0112     u8     ackto;
0113     u8     mgid_index;
0114     u8     static_rate;
0115     u8     hop_limit;
0116     __be32 sl_tclass_flowlabel;
0117     u8     rgid[16];
0118 } __packed;
0119 
0120 struct mthca_qp_context {
0121     __be32 flags;
0122     __be32 tavor_sched_queue; /* Reserved on Arbel */
0123     u8     mtu_msgmax;
0124     u8     rq_size_stride;  /* Reserved on Tavor */
0125     u8     sq_size_stride;  /* Reserved on Tavor */
0126     u8     rlkey_arbel_sched_queue; /* Reserved on Tavor */
0127     __be32 usr_page;
0128     __be32 local_qpn;
0129     __be32 remote_qpn;
0130     u32    reserved1[2];
0131     struct mthca_qp_path pri_path;
0132     struct mthca_qp_path alt_path;
0133     __be32 rdd;
0134     __be32 pd;
0135     __be32 wqe_base;
0136     __be32 wqe_lkey;
0137     __be32 params1;
0138     __be32 reserved2;
0139     __be32 next_send_psn;
0140     __be32 cqn_snd;
0141     __be32 snd_wqe_base_l;  /* Next send WQE on Tavor */
0142     __be32 snd_db_index;    /* (debugging only entries) */
0143     __be32 last_acked_psn;
0144     __be32 ssn;
0145     __be32 params2;
0146     __be32 rnr_nextrecvpsn;
0147     __be32 ra_buff_indx;
0148     __be32 cqn_rcv;
0149     __be32 rcv_wqe_base_l;  /* Next recv WQE on Tavor */
0150     __be32 rcv_db_index;    /* (debugging only entries) */
0151     __be32 qkey;
0152     __be32 srqn;
0153     __be32 rmsn;
0154     __be16 rq_wqe_counter;  /* reserved on Tavor */
0155     __be16 sq_wqe_counter;  /* reserved on Tavor */
0156     u32    reserved3[18];
0157 } __packed;
0158 
0159 struct mthca_qp_param {
0160     __be32 opt_param_mask;
0161     u32    reserved1;
0162     struct mthca_qp_context context;
0163     u32    reserved2[62];
0164 } __packed;
0165 
0166 enum {
0167     MTHCA_QP_OPTPAR_ALT_ADDR_PATH     = 1 << 0,
0168     MTHCA_QP_OPTPAR_RRE               = 1 << 1,
0169     MTHCA_QP_OPTPAR_RAE               = 1 << 2,
0170     MTHCA_QP_OPTPAR_RWE               = 1 << 3,
0171     MTHCA_QP_OPTPAR_PKEY_INDEX        = 1 << 4,
0172     MTHCA_QP_OPTPAR_Q_KEY             = 1 << 5,
0173     MTHCA_QP_OPTPAR_RNR_TIMEOUT       = 1 << 6,
0174     MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
0175     MTHCA_QP_OPTPAR_SRA_MAX           = 1 << 8,
0176     MTHCA_QP_OPTPAR_RRA_MAX           = 1 << 9,
0177     MTHCA_QP_OPTPAR_PM_STATE          = 1 << 10,
0178     MTHCA_QP_OPTPAR_PORT_NUM          = 1 << 11,
0179     MTHCA_QP_OPTPAR_RETRY_COUNT       = 1 << 12,
0180     MTHCA_QP_OPTPAR_ALT_RNR_RETRY     = 1 << 13,
0181     MTHCA_QP_OPTPAR_ACK_TIMEOUT       = 1 << 14,
0182     MTHCA_QP_OPTPAR_RNR_RETRY         = 1 << 15,
0183     MTHCA_QP_OPTPAR_SCHED_QUEUE       = 1 << 16
0184 };
0185 
0186 static const u8 mthca_opcode[] = {
0187     [IB_WR_SEND]                 = MTHCA_OPCODE_SEND,
0188     [IB_WR_SEND_WITH_IMM]        = MTHCA_OPCODE_SEND_IMM,
0189     [IB_WR_RDMA_WRITE]           = MTHCA_OPCODE_RDMA_WRITE,
0190     [IB_WR_RDMA_WRITE_WITH_IMM]  = MTHCA_OPCODE_RDMA_WRITE_IMM,
0191     [IB_WR_RDMA_READ]            = MTHCA_OPCODE_RDMA_READ,
0192     [IB_WR_ATOMIC_CMP_AND_SWP]   = MTHCA_OPCODE_ATOMIC_CS,
0193     [IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
0194 };
0195 
0196 static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp)
0197 {
0198     return qp->qpn >= dev->qp_table.sqp_start &&
0199         qp->qpn <= dev->qp_table.sqp_start + 3;
0200 }
0201 
0202 static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp)
0203 {
0204     return qp->qpn >= dev->qp_table.sqp_start &&
0205         qp->qpn <= dev->qp_table.sqp_start + 1;
0206 }
0207 
0208 static void *get_recv_wqe(struct mthca_qp *qp, int n)
0209 {
0210     if (qp->is_direct)
0211         return qp->queue.direct.buf + (n << qp->rq.wqe_shift);
0212     else
0213         return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf +
0214             ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1));
0215 }
0216 
0217 static void *get_send_wqe(struct mthca_qp *qp, int n)
0218 {
0219     if (qp->is_direct)
0220         return qp->queue.direct.buf + qp->send_wqe_offset +
0221             (n << qp->sq.wqe_shift);
0222     else
0223         return qp->queue.page_list[(qp->send_wqe_offset +
0224                         (n << qp->sq.wqe_shift)) >>
0225                        PAGE_SHIFT].buf +
0226             ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) &
0227              (PAGE_SIZE - 1));
0228 }
0229 
0230 static void mthca_wq_reset(struct mthca_wq *wq)
0231 {
0232     wq->next_ind  = 0;
0233     wq->last_comp = wq->max - 1;
0234     wq->head      = 0;
0235     wq->tail      = 0;
0236 }
0237 
0238 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
0239             enum ib_event_type event_type)
0240 {
0241     struct mthca_qp *qp;
0242     struct ib_event event;
0243 
0244     spin_lock(&dev->qp_table.lock);
0245     qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1));
0246     if (qp)
0247         ++qp->refcount;
0248     spin_unlock(&dev->qp_table.lock);
0249 
0250     if (!qp) {
0251         mthca_warn(dev, "Async event %d for bogus QP %08x\n",
0252                event_type, qpn);
0253         return;
0254     }
0255 
0256     if (event_type == IB_EVENT_PATH_MIG)
0257         qp->port = qp->alt_port;
0258 
0259     event.device      = &dev->ib_dev;
0260     event.event       = event_type;
0261     event.element.qp  = &qp->ibqp;
0262     if (qp->ibqp.event_handler)
0263         qp->ibqp.event_handler(&event, qp->ibqp.qp_context);
0264 
0265     spin_lock(&dev->qp_table.lock);
0266     if (!--qp->refcount)
0267         wake_up(&qp->wait);
0268     spin_unlock(&dev->qp_table.lock);
0269 }
0270 
0271 static int to_mthca_state(enum ib_qp_state ib_state)
0272 {
0273     switch (ib_state) {
0274     case IB_QPS_RESET: return MTHCA_QP_STATE_RST;
0275     case IB_QPS_INIT:  return MTHCA_QP_STATE_INIT;
0276     case IB_QPS_RTR:   return MTHCA_QP_STATE_RTR;
0277     case IB_QPS_RTS:   return MTHCA_QP_STATE_RTS;
0278     case IB_QPS_SQD:   return MTHCA_QP_STATE_SQD;
0279     case IB_QPS_SQE:   return MTHCA_QP_STATE_SQE;
0280     case IB_QPS_ERR:   return MTHCA_QP_STATE_ERR;
0281     default:                return -1;
0282     }
0283 }
0284 
0285 enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS };
0286 
0287 static int to_mthca_st(int transport)
0288 {
0289     switch (transport) {
0290     case RC:  return MTHCA_QP_ST_RC;
0291     case UC:  return MTHCA_QP_ST_UC;
0292     case UD:  return MTHCA_QP_ST_UD;
0293     case RD:  return MTHCA_QP_ST_RD;
0294     case MLX: return MTHCA_QP_ST_MLX;
0295     default:  return -1;
0296     }
0297 }
0298 
0299 static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
0300             int attr_mask)
0301 {
0302     if (attr_mask & IB_QP_PKEY_INDEX)
0303         sqp->pkey_index = attr->pkey_index;
0304     if (attr_mask & IB_QP_QKEY)
0305         sqp->qkey = attr->qkey;
0306     if (attr_mask & IB_QP_SQ_PSN)
0307         sqp->send_psn = attr->sq_psn;
0308 }
0309 
0310 static void init_port(struct mthca_dev *dev, int port)
0311 {
0312     int err;
0313     struct mthca_init_ib_param param;
0314 
0315     memset(&param, 0, sizeof param);
0316 
0317     param.port_width = dev->limits.port_width_cap;
0318     param.vl_cap     = dev->limits.vl_cap;
0319     param.mtu_cap    = dev->limits.mtu_cap;
0320     param.gid_cap    = dev->limits.gid_table_len;
0321     param.pkey_cap   = dev->limits.pkey_table_len;
0322 
0323     err = mthca_INIT_IB(dev, &param, port);
0324     if (err)
0325         mthca_warn(dev, "INIT_IB failed, return code %d.\n", err);
0326 }
0327 
0328 static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
0329                   int attr_mask)
0330 {
0331     u8 dest_rd_atomic;
0332     u32 access_flags;
0333     u32 hw_access_flags = 0;
0334 
0335     if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
0336         dest_rd_atomic = attr->max_dest_rd_atomic;
0337     else
0338         dest_rd_atomic = qp->resp_depth;
0339 
0340     if (attr_mask & IB_QP_ACCESS_FLAGS)
0341         access_flags = attr->qp_access_flags;
0342     else
0343         access_flags = qp->atomic_rd_en;
0344 
0345     if (!dest_rd_atomic)
0346         access_flags &= IB_ACCESS_REMOTE_WRITE;
0347 
0348     if (access_flags & IB_ACCESS_REMOTE_READ)
0349         hw_access_flags |= MTHCA_QP_BIT_RRE;
0350     if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
0351         hw_access_flags |= MTHCA_QP_BIT_RAE;
0352     if (access_flags & IB_ACCESS_REMOTE_WRITE)
0353         hw_access_flags |= MTHCA_QP_BIT_RWE;
0354 
0355     return cpu_to_be32(hw_access_flags);
0356 }
0357 
0358 static inline enum ib_qp_state to_ib_qp_state(int mthca_state)
0359 {
0360     switch (mthca_state) {
0361     case MTHCA_QP_STATE_RST:      return IB_QPS_RESET;
0362     case MTHCA_QP_STATE_INIT:     return IB_QPS_INIT;
0363     case MTHCA_QP_STATE_RTR:      return IB_QPS_RTR;
0364     case MTHCA_QP_STATE_RTS:      return IB_QPS_RTS;
0365     case MTHCA_QP_STATE_DRAINING:
0366     case MTHCA_QP_STATE_SQD:      return IB_QPS_SQD;
0367     case MTHCA_QP_STATE_SQE:      return IB_QPS_SQE;
0368     case MTHCA_QP_STATE_ERR:      return IB_QPS_ERR;
0369     default:                      return -1;
0370     }
0371 }
0372 
0373 static inline enum ib_mig_state to_ib_mig_state(int mthca_mig_state)
0374 {
0375     switch (mthca_mig_state) {
0376     case 0:  return IB_MIG_ARMED;
0377     case 1:  return IB_MIG_REARM;
0378     case 3:  return IB_MIG_MIGRATED;
0379     default: return -1;
0380     }
0381 }
0382 
0383 static int to_ib_qp_access_flags(int mthca_flags)
0384 {
0385     int ib_flags = 0;
0386 
0387     if (mthca_flags & MTHCA_QP_BIT_RRE)
0388         ib_flags |= IB_ACCESS_REMOTE_READ;
0389     if (mthca_flags & MTHCA_QP_BIT_RWE)
0390         ib_flags |= IB_ACCESS_REMOTE_WRITE;
0391     if (mthca_flags & MTHCA_QP_BIT_RAE)
0392         ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
0393 
0394     return ib_flags;
0395 }
0396 
0397 static void to_rdma_ah_attr(struct mthca_dev *dev,
0398                 struct rdma_ah_attr *ah_attr,
0399                 struct mthca_qp_path *path)
0400 {
0401     u8 port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
0402 
0403     memset(ah_attr, 0, sizeof(*ah_attr));
0404 
0405     if (port_num == 0 || port_num > dev->limits.num_ports)
0406         return;
0407     ah_attr->type = rdma_ah_find_type(&dev->ib_dev, port_num);
0408     rdma_ah_set_port_num(ah_attr, port_num);
0409 
0410     rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
0411     rdma_ah_set_sl(ah_attr, be32_to_cpu(path->sl_tclass_flowlabel) >> 28);
0412     rdma_ah_set_path_bits(ah_attr, path->g_mylmc & 0x7f);
0413     rdma_ah_set_static_rate(ah_attr,
0414                 mthca_rate_to_ib(dev,
0415                          path->static_rate & 0xf,
0416                          port_num));
0417     if (path->g_mylmc & (1 << 7)) {
0418         u32 tc_fl = be32_to_cpu(path->sl_tclass_flowlabel);
0419 
0420         rdma_ah_set_grh(ah_attr, NULL,
0421                 tc_fl & 0xfffff,
0422                 path->mgid_index &
0423                 (dev->limits.gid_table_len - 1),
0424                 path->hop_limit,
0425                 (tc_fl >> 20) & 0xff);
0426         rdma_ah_set_dgid_raw(ah_attr, path->rgid);
0427     }
0428 }
0429 
0430 int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
0431            struct ib_qp_init_attr *qp_init_attr)
0432 {
0433     struct mthca_dev *dev = to_mdev(ibqp->device);
0434     struct mthca_qp *qp = to_mqp(ibqp);
0435     int err = 0;
0436     struct mthca_mailbox *mailbox = NULL;
0437     struct mthca_qp_param *qp_param;
0438     struct mthca_qp_context *context;
0439     int mthca_state;
0440 
0441     mutex_lock(&qp->mutex);
0442 
0443     if (qp->state == IB_QPS_RESET) {
0444         qp_attr->qp_state = IB_QPS_RESET;
0445         goto done;
0446     }
0447 
0448     mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
0449     if (IS_ERR(mailbox)) {
0450         err = PTR_ERR(mailbox);
0451         goto out;
0452     }
0453 
0454     err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox);
0455     if (err) {
0456         mthca_warn(dev, "QUERY_QP failed (%d)\n", err);
0457         goto out_mailbox;
0458     }
0459 
0460     qp_param    = mailbox->buf;
0461     context     = &qp_param->context;
0462     mthca_state = be32_to_cpu(context->flags) >> 28;
0463 
0464     qp->state            = to_ib_qp_state(mthca_state);
0465     qp_attr->qp_state        = qp->state;
0466     qp_attr->path_mtu        = context->mtu_msgmax >> 5;
0467     qp_attr->path_mig_state      =
0468         to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
0469     qp_attr->qkey            = be32_to_cpu(context->qkey);
0470     qp_attr->rq_psn          = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
0471     qp_attr->sq_psn          = be32_to_cpu(context->next_send_psn) & 0xffffff;
0472     qp_attr->dest_qp_num         = be32_to_cpu(context->remote_qpn) & 0xffffff;
0473     qp_attr->qp_access_flags     =
0474         to_ib_qp_access_flags(be32_to_cpu(context->params2));
0475 
0476     if (qp->transport == RC || qp->transport == UC) {
0477         to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
0478         to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
0479         qp_attr->alt_pkey_index =
0480             be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
0481         qp_attr->alt_port_num   =
0482             rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
0483     }
0484 
0485     qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
0486     qp_attr->port_num   =
0487         (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
0488 
0489     /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
0490     qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
0491 
0492     qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
0493 
0494     qp_attr->max_dest_rd_atomic =
0495         1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
0496     qp_attr->min_rnr_timer      =
0497         (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
0498     qp_attr->timeout        = context->pri_path.ackto >> 3;
0499     qp_attr->retry_cnt      = (be32_to_cpu(context->params1) >> 16) & 0x7;
0500     qp_attr->rnr_retry      = context->pri_path.rnr_retry >> 5;
0501     qp_attr->alt_timeout        = context->alt_path.ackto >> 3;
0502 
0503 done:
0504     qp_attr->cur_qp_state        = qp_attr->qp_state;
0505     qp_attr->cap.max_send_wr     = qp->sq.max;
0506     qp_attr->cap.max_recv_wr     = qp->rq.max;
0507     qp_attr->cap.max_send_sge    = qp->sq.max_gs;
0508     qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
0509     qp_attr->cap.max_inline_data = qp->max_inline_data;
0510 
0511     qp_init_attr->cap        = qp_attr->cap;
0512     qp_init_attr->sq_sig_type    = qp->sq_policy;
0513 
0514 out_mailbox:
0515     mthca_free_mailbox(dev, mailbox);
0516 
0517 out:
0518     mutex_unlock(&qp->mutex);
0519     return err;
0520 }
0521 
0522 static int mthca_path_set(struct mthca_dev *dev, const struct rdma_ah_attr *ah,
0523               struct mthca_qp_path *path, u8 port)
0524 {
0525     path->g_mylmc     = rdma_ah_get_path_bits(ah) & 0x7f;
0526     path->rlid        = cpu_to_be16(rdma_ah_get_dlid(ah));
0527     path->static_rate = mthca_get_rate(dev, rdma_ah_get_static_rate(ah),
0528                        port);
0529 
0530     if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
0531         const struct ib_global_route *grh = rdma_ah_read_grh(ah);
0532 
0533         if (grh->sgid_index >= dev->limits.gid_table_len) {
0534             mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
0535                   grh->sgid_index,
0536                   dev->limits.gid_table_len - 1);
0537             return -1;
0538         }
0539 
0540         path->g_mylmc   |= 1 << 7;
0541         path->mgid_index = grh->sgid_index;
0542         path->hop_limit  = grh->hop_limit;
0543         path->sl_tclass_flowlabel =
0544             cpu_to_be32((rdma_ah_get_sl(ah) << 28) |
0545                     (grh->traffic_class << 20) |
0546                     (grh->flow_label));
0547         memcpy(path->rgid, grh->dgid.raw, 16);
0548     } else {
0549         path->sl_tclass_flowlabel = cpu_to_be32(rdma_ah_get_sl(ah) <<
0550                             28);
0551     }
0552 
0553     return 0;
0554 }
0555 
0556 static int __mthca_modify_qp(struct ib_qp *ibqp,
0557                  const struct ib_qp_attr *attr, int attr_mask,
0558                  enum ib_qp_state cur_state,
0559                  enum ib_qp_state new_state,
0560                  struct ib_udata *udata)
0561 {
0562     struct mthca_dev *dev = to_mdev(ibqp->device);
0563     struct mthca_qp *qp = to_mqp(ibqp);
0564     struct mthca_ucontext *context = rdma_udata_to_drv_context(
0565         udata, struct mthca_ucontext, ibucontext);
0566     struct mthca_mailbox *mailbox;
0567     struct mthca_qp_param *qp_param;
0568     struct mthca_qp_context *qp_context;
0569     u32 sqd_event = 0;
0570     int err = -EINVAL;
0571 
0572     mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
0573     if (IS_ERR(mailbox)) {
0574         err = PTR_ERR(mailbox);
0575         goto out;
0576     }
0577     qp_param = mailbox->buf;
0578     qp_context = &qp_param->context;
0579     memset(qp_param, 0, sizeof *qp_param);
0580 
0581     qp_context->flags      = cpu_to_be32((to_mthca_state(new_state) << 28) |
0582                          (to_mthca_st(qp->transport) << 16));
0583     qp_context->flags     |= cpu_to_be32(MTHCA_QP_BIT_DE);
0584     if (!(attr_mask & IB_QP_PATH_MIG_STATE))
0585         qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
0586     else {
0587         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE);
0588         switch (attr->path_mig_state) {
0589         case IB_MIG_MIGRATED:
0590             qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11);
0591             break;
0592         case IB_MIG_REARM:
0593             qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11);
0594             break;
0595         case IB_MIG_ARMED:
0596             qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11);
0597             break;
0598         }
0599     }
0600 
0601     /* leave tavor_sched_queue as 0 */
0602 
0603     if (qp->transport == MLX || qp->transport == UD)
0604         qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11;
0605     else if (attr_mask & IB_QP_PATH_MTU) {
0606         if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_2048) {
0607             mthca_dbg(dev, "path MTU (%u) is invalid\n",
0608                   attr->path_mtu);
0609             goto out_mailbox;
0610         }
0611         qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31;
0612     }
0613 
0614     if (mthca_is_memfree(dev)) {
0615         if (qp->rq.max)
0616             qp_context->rq_size_stride = ilog2(qp->rq.max) << 3;
0617         qp_context->rq_size_stride |= qp->rq.wqe_shift - 4;
0618 
0619         if (qp->sq.max)
0620             qp_context->sq_size_stride = ilog2(qp->sq.max) << 3;
0621         qp_context->sq_size_stride |= qp->sq.wqe_shift - 4;
0622     }
0623 
0624     /* leave arbel_sched_queue as 0 */
0625 
0626     if (qp->ibqp.uobject)
0627         qp_context->usr_page = cpu_to_be32(context->uar.index);
0628     else
0629         qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
0630     qp_context->local_qpn  = cpu_to_be32(qp->qpn);
0631     if (attr_mask & IB_QP_DEST_QPN) {
0632         qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
0633     }
0634 
0635     if (qp->transport == MLX)
0636         qp_context->pri_path.port_pkey |=
0637             cpu_to_be32(qp->port << 24);
0638     else {
0639         if (attr_mask & IB_QP_PORT) {
0640             qp_context->pri_path.port_pkey |=
0641                 cpu_to_be32(attr->port_num << 24);
0642             qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM);
0643         }
0644     }
0645 
0646     if (attr_mask & IB_QP_PKEY_INDEX) {
0647         qp_context->pri_path.port_pkey |=
0648             cpu_to_be32(attr->pkey_index);
0649         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX);
0650     }
0651 
0652     if (attr_mask & IB_QP_RNR_RETRY) {
0653         qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
0654             attr->rnr_retry << 5;
0655         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY |
0656                             MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
0657     }
0658 
0659     if (attr_mask & IB_QP_AV) {
0660         if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
0661                    attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
0662             goto out_mailbox;
0663 
0664         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
0665     }
0666 
0667     if (ibqp->qp_type == IB_QPT_RC &&
0668         cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
0669         u8 sched_queue = ibqp->uobject ? 0x2 : 0x1;
0670 
0671         if (mthca_is_memfree(dev))
0672             qp_context->rlkey_arbel_sched_queue |= sched_queue;
0673         else
0674             qp_context->tavor_sched_queue |= cpu_to_be32(sched_queue);
0675 
0676         qp_param->opt_param_mask |=
0677             cpu_to_be32(MTHCA_QP_OPTPAR_SCHED_QUEUE);
0678     }
0679 
0680     if (attr_mask & IB_QP_TIMEOUT) {
0681         qp_context->pri_path.ackto = attr->timeout << 3;
0682         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
0683     }
0684 
0685     if (attr_mask & IB_QP_ALT_PATH) {
0686         if (attr->alt_pkey_index >= dev->limits.pkey_table_len) {
0687             mthca_dbg(dev, "Alternate P_Key index (%u) too large. max is %d\n",
0688                   attr->alt_pkey_index, dev->limits.pkey_table_len-1);
0689             goto out_mailbox;
0690         }
0691 
0692         if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
0693             mthca_dbg(dev, "Alternate port number (%u) is invalid\n",
0694                 attr->alt_port_num);
0695             goto out_mailbox;
0696         }
0697 
0698         if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
0699                    rdma_ah_get_port_num(&attr->alt_ah_attr)))
0700             goto out_mailbox;
0701 
0702         qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
0703                                   attr->alt_port_num << 24);
0704         qp_context->alt_path.ackto = attr->alt_timeout << 3;
0705         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
0706     }
0707 
0708     /* leave rdd as 0 */
0709     qp_context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
0710     /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */
0711     qp_context->wqe_lkey   = cpu_to_be32(qp->mr.ibmr.lkey);
0712     qp_context->params1    = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
0713                          (MTHCA_FLIGHT_LIMIT << 24) |
0714                          MTHCA_QP_BIT_SWE);
0715     if (qp->sq_policy == IB_SIGNAL_ALL_WR)
0716         qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
0717     if (attr_mask & IB_QP_RETRY_CNT) {
0718         qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
0719         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT);
0720     }
0721 
0722     if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
0723         if (attr->max_rd_atomic) {
0724             qp_context->params1 |=
0725                 cpu_to_be32(MTHCA_QP_BIT_SRE |
0726                         MTHCA_QP_BIT_SAE);
0727             qp_context->params1 |=
0728                 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
0729         }
0730         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
0731     }
0732 
0733     if (attr_mask & IB_QP_SQ_PSN)
0734         qp_context->next_send_psn = cpu_to_be32(attr->sq_psn);
0735     qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn);
0736 
0737     if (mthca_is_memfree(dev)) {
0738         qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset);
0739         qp_context->snd_db_index   = cpu_to_be32(qp->sq.db_index);
0740     }
0741 
0742     if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
0743         if (attr->max_dest_rd_atomic)
0744             qp_context->params2 |=
0745                 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
0746 
0747         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
0748     }
0749 
0750     if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
0751         qp_context->params2      |= get_hw_access_flags(qp, attr, attr_mask);
0752         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
0753                             MTHCA_QP_OPTPAR_RRE |
0754                             MTHCA_QP_OPTPAR_RAE);
0755     }
0756 
0757     qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
0758 
0759     if (ibqp->srq)
0760         qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC);
0761 
0762     if (attr_mask & IB_QP_MIN_RNR_TIMER) {
0763         qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
0764         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT);
0765     }
0766     if (attr_mask & IB_QP_RQ_PSN)
0767         qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
0768 
0769     qp_context->ra_buff_indx =
0770         cpu_to_be32(dev->qp_table.rdb_base +
0771                 ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE <<
0772                  dev->qp_table.rdb_shift));
0773 
0774     qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn);
0775 
0776     if (mthca_is_memfree(dev))
0777         qp_context->rcv_db_index   = cpu_to_be32(qp->rq.db_index);
0778 
0779     if (attr_mask & IB_QP_QKEY) {
0780         qp_context->qkey = cpu_to_be32(attr->qkey);
0781         qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY);
0782     }
0783 
0784     if (ibqp->srq)
0785         qp_context->srqn = cpu_to_be32(1 << 24 |
0786                            to_msrq(ibqp->srq)->srqn);
0787 
0788     if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD  &&
0789         attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY       &&
0790         attr->en_sqd_async_notify)
0791         sqd_event = 1 << 31;
0792 
0793     err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0,
0794                   mailbox, sqd_event);
0795     if (err) {
0796         mthca_warn(dev, "modify QP %d->%d returned %d.\n",
0797                cur_state, new_state, err);
0798         goto out_mailbox;
0799     }
0800 
0801     qp->state = new_state;
0802     if (attr_mask & IB_QP_ACCESS_FLAGS)
0803         qp->atomic_rd_en = attr->qp_access_flags;
0804     if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
0805         qp->resp_depth = attr->max_dest_rd_atomic;
0806     if (attr_mask & IB_QP_PORT)
0807         qp->port = attr->port_num;
0808     if (attr_mask & IB_QP_ALT_PATH)
0809         qp->alt_port = attr->alt_port_num;
0810 
0811     if (is_sqp(dev, qp))
0812         store_attrs(qp->sqp, attr, attr_mask);
0813 
0814     /*
0815      * If we moved QP0 to RTR, bring the IB link up; if we moved
0816      * QP0 to RESET or ERROR, bring the link back down.
0817      */
0818     if (is_qp0(dev, qp)) {
0819         if (cur_state != IB_QPS_RTR &&
0820             new_state == IB_QPS_RTR)
0821             init_port(dev, qp->port);
0822 
0823         if (cur_state != IB_QPS_RESET &&
0824             cur_state != IB_QPS_ERR &&
0825             (new_state == IB_QPS_RESET ||
0826              new_state == IB_QPS_ERR))
0827             mthca_CLOSE_IB(dev, qp->port);
0828     }
0829 
0830     /*
0831      * If we moved a kernel QP to RESET, clean up all old CQ
0832      * entries and reinitialize the QP.
0833      */
0834     if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
0835         mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
0836                    qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
0837         if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
0838             mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
0839 
0840         mthca_wq_reset(&qp->sq);
0841         qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
0842 
0843         mthca_wq_reset(&qp->rq);
0844         qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
0845 
0846         if (mthca_is_memfree(dev)) {
0847             *qp->sq.db = 0;
0848             *qp->rq.db = 0;
0849         }
0850     }
0851 
0852 out_mailbox:
0853     mthca_free_mailbox(dev, mailbox);
0854 out:
0855     return err;
0856 }
0857 
0858 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
0859             struct ib_udata *udata)
0860 {
0861     struct mthca_dev *dev = to_mdev(ibqp->device);
0862     struct mthca_qp *qp = to_mqp(ibqp);
0863     enum ib_qp_state cur_state, new_state;
0864     int err = -EINVAL;
0865 
0866     if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
0867         return -EOPNOTSUPP;
0868 
0869     mutex_lock(&qp->mutex);
0870     if (attr_mask & IB_QP_CUR_STATE) {
0871         cur_state = attr->cur_qp_state;
0872     } else {
0873         spin_lock_irq(&qp->sq.lock);
0874         spin_lock(&qp->rq.lock);
0875         cur_state = qp->state;
0876         spin_unlock(&qp->rq.lock);
0877         spin_unlock_irq(&qp->sq.lock);
0878     }
0879 
0880     new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
0881 
0882     if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
0883                 attr_mask)) {
0884         mthca_dbg(dev, "Bad QP transition (transport %d) "
0885               "%d->%d with attr 0x%08x\n",
0886               qp->transport, cur_state, new_state,
0887               attr_mask);
0888         goto out;
0889     }
0890 
0891     if ((attr_mask & IB_QP_PKEY_INDEX) &&
0892          attr->pkey_index >= dev->limits.pkey_table_len) {
0893         mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
0894               attr->pkey_index, dev->limits.pkey_table_len-1);
0895         goto out;
0896     }
0897 
0898     if ((attr_mask & IB_QP_PORT) &&
0899         (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
0900         mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
0901         goto out;
0902     }
0903 
0904     if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
0905         attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
0906         mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
0907               attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
0908         goto out;
0909     }
0910 
0911     if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
0912         attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
0913         mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
0914               attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
0915         goto out;
0916     }
0917 
0918     if (cur_state == new_state && cur_state == IB_QPS_RESET) {
0919         err = 0;
0920         goto out;
0921     }
0922 
0923     err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state,
0924                 udata);
0925 
0926 out:
0927     mutex_unlock(&qp->mutex);
0928     return err;
0929 }
0930 
0931 static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
0932 {
0933     /*
0934      * Calculate the maximum size of WQE s/g segments, excluding
0935      * the next segment and other non-data segments.
0936      */
0937     int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
0938 
0939     switch (qp->transport) {
0940     case MLX:
0941         max_data_size -= 2 * sizeof (struct mthca_data_seg);
0942         break;
0943 
0944     case UD:
0945         if (mthca_is_memfree(dev))
0946             max_data_size -= sizeof (struct mthca_arbel_ud_seg);
0947         else
0948             max_data_size -= sizeof (struct mthca_tavor_ud_seg);
0949         break;
0950 
0951     default:
0952         max_data_size -= sizeof (struct mthca_raddr_seg);
0953         break;
0954     }
0955 
0956     return max_data_size;
0957 }
0958 
0959 static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)
0960 {
0961     /* We don't support inline data for kernel QPs (yet). */
0962     return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;
0963 }
0964 
0965 static void mthca_adjust_qp_caps(struct mthca_dev *dev,
0966                  struct mthca_pd *pd,
0967                  struct mthca_qp *qp)
0968 {
0969     int max_data_size = mthca_max_data_size(dev, qp,
0970                         min(dev->limits.max_desc_sz,
0971                             1 << qp->sq.wqe_shift));
0972 
0973     qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);
0974 
0975     qp->sq.max_gs = min_t(int, dev->limits.max_sg,
0976                   max_data_size / sizeof (struct mthca_data_seg));
0977     qp->rq.max_gs = min_t(int, dev->limits.max_sg,
0978                    (min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) -
0979                 sizeof (struct mthca_next_seg)) /
0980                    sizeof (struct mthca_data_seg));
0981 }
0982 
0983 /*
0984  * Allocate and register buffer for WQEs.  qp->rq.max, sq.max,
0985  * rq.max_gs and sq.max_gs must all be assigned.
0986  * mthca_alloc_wqe_buf will calculate rq.wqe_shift and
0987  * sq.wqe_shift (as well as send_wqe_offset, is_direct, and
0988  * queue)
0989  */
0990 static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
0991                    struct mthca_pd *pd,
0992                    struct mthca_qp *qp,
0993                    struct ib_udata *udata)
0994 {
0995     int size;
0996     int err = -ENOMEM;
0997 
0998     size = sizeof (struct mthca_next_seg) +
0999         qp->rq.max_gs * sizeof (struct mthca_data_seg);
1000 
1001     if (size > dev->limits.max_desc_sz)
1002         return -EINVAL;
1003 
1004     for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
1005          qp->rq.wqe_shift++)
1006         ; /* nothing */
1007 
1008     size = qp->sq.max_gs * sizeof (struct mthca_data_seg);
1009     switch (qp->transport) {
1010     case MLX:
1011         size += 2 * sizeof (struct mthca_data_seg);
1012         break;
1013 
1014     case UD:
1015         size += mthca_is_memfree(dev) ?
1016             sizeof (struct mthca_arbel_ud_seg) :
1017             sizeof (struct mthca_tavor_ud_seg);
1018         break;
1019 
1020     case UC:
1021         size += sizeof (struct mthca_raddr_seg);
1022         break;
1023 
1024     case RC:
1025         size += sizeof (struct mthca_raddr_seg);
1026         /*
1027          * An atomic op will require an atomic segment, a
1028          * remote address segment and one scatter entry.
1029          */
1030         size = max_t(int, size,
1031                  sizeof (struct mthca_atomic_seg) +
1032                  sizeof (struct mthca_raddr_seg) +
1033                  sizeof (struct mthca_data_seg));
1034         break;
1035 
1036     default:
1037         break;
1038     }
1039 
1040     /* Make sure that we have enough space for a bind request */
1041     size = max_t(int, size, sizeof (struct mthca_bind_seg));
1042 
1043     size += sizeof (struct mthca_next_seg);
1044 
1045     if (size > dev->limits.max_desc_sz)
1046         return -EINVAL;
1047 
1048     for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
1049          qp->sq.wqe_shift++)
1050         ; /* nothing */
1051 
1052     qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
1053                     1 << qp->sq.wqe_shift);
1054 
1055     /*
1056      * If this is a userspace QP, we don't actually have to
1057      * allocate anything.  All we need is to calculate the WQE
1058      * sizes and the send_wqe_offset, so we're done now.
1059      */
1060     if (udata)
1061         return 0;
1062 
1063     size = PAGE_ALIGN(qp->send_wqe_offset +
1064               (qp->sq.max << qp->sq.wqe_shift));
1065 
1066     qp->wrid = kmalloc_array(qp->rq.max + qp->sq.max, sizeof(u64),
1067                  GFP_KERNEL);
1068     if (!qp->wrid)
1069         goto err_out;
1070 
1071     err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE,
1072                   &qp->queue, &qp->is_direct, pd, 0, &qp->mr);
1073     if (err)
1074         goto err_out;
1075 
1076     return 0;
1077 
1078 err_out:
1079     kfree(qp->wrid);
1080     return err;
1081 }
1082 
1083 static void mthca_free_wqe_buf(struct mthca_dev *dev,
1084                    struct mthca_qp *qp)
1085 {
1086     mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset +
1087                        (qp->sq.max << qp->sq.wqe_shift)),
1088                &qp->queue, qp->is_direct, &qp->mr);
1089     kfree(qp->wrid);
1090 }
1091 
1092 static int mthca_map_memfree(struct mthca_dev *dev,
1093                  struct mthca_qp *qp)
1094 {
1095     int ret;
1096 
1097     if (mthca_is_memfree(dev)) {
1098         ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
1099         if (ret)
1100             return ret;
1101 
1102         ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn);
1103         if (ret)
1104             goto err_qpc;
1105 
1106         ret = mthca_table_get(dev, dev->qp_table.rdb_table,
1107                       qp->qpn << dev->qp_table.rdb_shift);
1108         if (ret)
1109             goto err_eqpc;
1110 
1111     }
1112 
1113     return 0;
1114 
1115 err_eqpc:
1116     mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1117 
1118 err_qpc:
1119     mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1120 
1121     return ret;
1122 }
1123 
1124 static void mthca_unmap_memfree(struct mthca_dev *dev,
1125                 struct mthca_qp *qp)
1126 {
1127     mthca_table_put(dev, dev->qp_table.rdb_table,
1128             qp->qpn << dev->qp_table.rdb_shift);
1129     mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
1130     mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
1131 }
1132 
1133 static int mthca_alloc_memfree(struct mthca_dev *dev,
1134                    struct mthca_qp *qp)
1135 {
1136     if (mthca_is_memfree(dev)) {
1137         qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
1138                          qp->qpn, &qp->rq.db);
1139         if (qp->rq.db_index < 0)
1140             return -ENOMEM;
1141 
1142         qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
1143                          qp->qpn, &qp->sq.db);
1144         if (qp->sq.db_index < 0) {
1145             mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1146             return -ENOMEM;
1147         }
1148     }
1149 
1150     return 0;
1151 }
1152 
1153 static void mthca_free_memfree(struct mthca_dev *dev,
1154                    struct mthca_qp *qp)
1155 {
1156     if (mthca_is_memfree(dev)) {
1157         mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
1158         mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
1159     }
1160 }
1161 
1162 static int mthca_alloc_qp_common(struct mthca_dev *dev,
1163                  struct mthca_pd *pd,
1164                  struct mthca_cq *send_cq,
1165                  struct mthca_cq *recv_cq,
1166                  enum ib_sig_type send_policy,
1167                  struct mthca_qp *qp,
1168                  struct ib_udata *udata)
1169 {
1170     int ret;
1171     int i;
1172     struct mthca_next_seg *next;
1173 
1174     qp->refcount = 1;
1175     init_waitqueue_head(&qp->wait);
1176     mutex_init(&qp->mutex);
1177     qp->state        = IB_QPS_RESET;
1178     qp->atomic_rd_en = 0;
1179     qp->resp_depth   = 0;
1180     qp->sq_policy    = send_policy;
1181     mthca_wq_reset(&qp->sq);
1182     mthca_wq_reset(&qp->rq);
1183 
1184     spin_lock_init(&qp->sq.lock);
1185     spin_lock_init(&qp->rq.lock);
1186 
1187     ret = mthca_map_memfree(dev, qp);
1188     if (ret)
1189         return ret;
1190 
1191     ret = mthca_alloc_wqe_buf(dev, pd, qp, udata);
1192     if (ret) {
1193         mthca_unmap_memfree(dev, qp);
1194         return ret;
1195     }
1196 
1197     mthca_adjust_qp_caps(dev, pd, qp);
1198 
1199     /*
1200      * If this is a userspace QP, we're done now.  The doorbells
1201      * will be allocated and buffers will be initialized in
1202      * userspace.
1203      */
1204     if (udata)
1205         return 0;
1206 
1207     ret = mthca_alloc_memfree(dev, qp);
1208     if (ret) {
1209         mthca_free_wqe_buf(dev, qp);
1210         mthca_unmap_memfree(dev, qp);
1211         return ret;
1212     }
1213 
1214     if (mthca_is_memfree(dev)) {
1215         struct mthca_data_seg *scatter;
1216         int size = (sizeof (struct mthca_next_seg) +
1217                 qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
1218 
1219         for (i = 0; i < qp->rq.max; ++i) {
1220             next = get_recv_wqe(qp, i);
1221             next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) <<
1222                            qp->rq.wqe_shift);
1223             next->ee_nds = cpu_to_be32(size);
1224 
1225             for (scatter = (void *) (next + 1);
1226                  (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift);
1227                  ++scatter)
1228                 scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
1229         }
1230 
1231         for (i = 0; i < qp->sq.max; ++i) {
1232             next = get_send_wqe(qp, i);
1233             next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) <<
1234                             qp->sq.wqe_shift) +
1235                            qp->send_wqe_offset);
1236         }
1237     } else {
1238         for (i = 0; i < qp->rq.max; ++i) {
1239             next = get_recv_wqe(qp, i);
1240             next->nda_op = htonl((((i + 1) % qp->rq.max) <<
1241                           qp->rq.wqe_shift) | 1);
1242         }
1243 
1244     }
1245 
1246     qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
1247     qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
1248 
1249     return 0;
1250 }
1251 
1252 static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
1253                  struct mthca_pd *pd, struct mthca_qp *qp)
1254 {
1255     int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
1256 
1257     /* Sanity check QP size before proceeding */
1258     if (cap->max_send_wr     > dev->limits.max_wqes ||
1259         cap->max_recv_wr     > dev->limits.max_wqes ||
1260         cap->max_send_sge    > dev->limits.max_sg   ||
1261         cap->max_recv_sge    > dev->limits.max_sg   ||
1262         cap->max_inline_data > mthca_max_inline_data(pd, max_data_size))
1263         return -EINVAL;
1264 
1265     /*
1266      * For MLX transport we need 2 extra send gather entries:
1267      * one for the header and one for the checksum at the end
1268      */
1269     if (qp->transport == MLX && cap->max_send_sge + 2 > dev->limits.max_sg)
1270         return -EINVAL;
1271 
1272     if (mthca_is_memfree(dev)) {
1273         qp->rq.max = cap->max_recv_wr ?
1274             roundup_pow_of_two(cap->max_recv_wr) : 0;
1275         qp->sq.max = cap->max_send_wr ?
1276             roundup_pow_of_two(cap->max_send_wr) : 0;
1277     } else {
1278         qp->rq.max = cap->max_recv_wr;
1279         qp->sq.max = cap->max_send_wr;
1280     }
1281 
1282     qp->rq.max_gs = cap->max_recv_sge;
1283     qp->sq.max_gs = max_t(int, cap->max_send_sge,
1284                   ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE,
1285                     MTHCA_INLINE_CHUNK_SIZE) /
1286                   sizeof (struct mthca_data_seg));
1287 
1288     return 0;
1289 }
1290 
1291 int mthca_alloc_qp(struct mthca_dev *dev,
1292            struct mthca_pd *pd,
1293            struct mthca_cq *send_cq,
1294            struct mthca_cq *recv_cq,
1295            enum ib_qp_type type,
1296            enum ib_sig_type send_policy,
1297            struct ib_qp_cap *cap,
1298            struct mthca_qp *qp,
1299            struct ib_udata *udata)
1300 {
1301     int err;
1302 
1303     switch (type) {
1304     case IB_QPT_RC: qp->transport = RC; break;
1305     case IB_QPT_UC: qp->transport = UC; break;
1306     case IB_QPT_UD: qp->transport = UD; break;
1307     default: return -EINVAL;
1308     }
1309 
1310     err = mthca_set_qp_size(dev, cap, pd, qp);
1311     if (err)
1312         return err;
1313 
1314     qp->qpn = mthca_alloc(&dev->qp_table.alloc);
1315     if (qp->qpn == -1)
1316         return -ENOMEM;
1317 
1318     /* initialize port to zero for error-catching. */
1319     qp->port = 0;
1320 
1321     err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1322                     send_policy, qp, udata);
1323     if (err) {
1324         mthca_free(&dev->qp_table.alloc, qp->qpn);
1325         return err;
1326     }
1327 
1328     spin_lock_irq(&dev->qp_table.lock);
1329     mthca_array_set(&dev->qp_table.qp,
1330             qp->qpn & (dev->limits.num_qps - 1), qp);
1331     spin_unlock_irq(&dev->qp_table.lock);
1332 
1333     return 0;
1334 }
1335 
1336 static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
1337     __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
1338 {
1339     if (send_cq == recv_cq) {
1340         spin_lock_irq(&send_cq->lock);
1341         __acquire(&recv_cq->lock);
1342     } else if (send_cq->cqn < recv_cq->cqn) {
1343         spin_lock_irq(&send_cq->lock);
1344         spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
1345     } else {
1346         spin_lock_irq(&recv_cq->lock);
1347         spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
1348     }
1349 }
1350 
1351 static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
1352     __releases(&send_cq->lock) __releases(&recv_cq->lock)
1353 {
1354     if (send_cq == recv_cq) {
1355         __release(&recv_cq->lock);
1356         spin_unlock_irq(&send_cq->lock);
1357     } else if (send_cq->cqn < recv_cq->cqn) {
1358         spin_unlock(&recv_cq->lock);
1359         spin_unlock_irq(&send_cq->lock);
1360     } else {
1361         spin_unlock(&send_cq->lock);
1362         spin_unlock_irq(&recv_cq->lock);
1363     }
1364 }
1365 
1366 int mthca_alloc_sqp(struct mthca_dev *dev,
1367             struct mthca_pd *pd,
1368             struct mthca_cq *send_cq,
1369             struct mthca_cq *recv_cq,
1370             enum ib_sig_type send_policy,
1371             struct ib_qp_cap *cap,
1372             int qpn,
1373             u32 port,
1374             struct mthca_qp *qp,
1375             struct ib_udata *udata)
1376 {
1377     u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
1378     int err;
1379 
1380     qp->transport = MLX;
1381     err = mthca_set_qp_size(dev, cap, pd, qp);
1382     if (err)
1383         return err;
1384 
1385     qp->sqp->header_buf_size = qp->sq.max * MTHCA_UD_HEADER_SIZE;
1386     qp->sqp->header_buf =
1387         dma_alloc_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
1388                    &qp->sqp->header_dma, GFP_KERNEL);
1389     if (!qp->sqp->header_buf)
1390         return -ENOMEM;
1391 
1392     spin_lock_irq(&dev->qp_table.lock);
1393     if (mthca_array_get(&dev->qp_table.qp, mqpn))
1394         err = -EBUSY;
1395     else
1396         mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp);
1397     spin_unlock_irq(&dev->qp_table.lock);
1398 
1399     if (err)
1400         goto err_out;
1401 
1402     qp->port      = port;
1403     qp->qpn       = mqpn;
1404     qp->transport = MLX;
1405 
1406     err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
1407                     send_policy, qp, udata);
1408     if (err)
1409         goto err_out_free;
1410 
1411     atomic_inc(&pd->sqp_count);
1412 
1413     return 0;
1414 
1415  err_out_free:
1416     /*
1417      * Lock CQs here, so that CQ polling code can do QP lookup
1418      * without taking a lock.
1419      */
1420     mthca_lock_cqs(send_cq, recv_cq);
1421 
1422     spin_lock(&dev->qp_table.lock);
1423     mthca_array_clear(&dev->qp_table.qp, mqpn);
1424     spin_unlock(&dev->qp_table.lock);
1425 
1426     mthca_unlock_cqs(send_cq, recv_cq);
1427 
1428 err_out:
1429     dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
1430               qp->sqp->header_buf, qp->sqp->header_dma);
1431     return err;
1432 }
1433 
1434 static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp)
1435 {
1436     int c;
1437 
1438     spin_lock_irq(&dev->qp_table.lock);
1439     c = qp->refcount;
1440     spin_unlock_irq(&dev->qp_table.lock);
1441 
1442     return c;
1443 }
1444 
1445 void mthca_free_qp(struct mthca_dev *dev,
1446            struct mthca_qp *qp)
1447 {
1448     struct mthca_cq *send_cq;
1449     struct mthca_cq *recv_cq;
1450 
1451     send_cq = to_mcq(qp->ibqp.send_cq);
1452     recv_cq = to_mcq(qp->ibqp.recv_cq);
1453 
1454     /*
1455      * Lock CQs here, so that CQ polling code can do QP lookup
1456      * without taking a lock.
1457      */
1458     mthca_lock_cqs(send_cq, recv_cq);
1459 
1460     spin_lock(&dev->qp_table.lock);
1461     mthca_array_clear(&dev->qp_table.qp,
1462               qp->qpn & (dev->limits.num_qps - 1));
1463     --qp->refcount;
1464     spin_unlock(&dev->qp_table.lock);
1465 
1466     mthca_unlock_cqs(send_cq, recv_cq);
1467 
1468     wait_event(qp->wait, !get_qp_refcount(dev, qp));
1469 
1470     if (qp->state != IB_QPS_RESET)
1471         mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0,
1472                 NULL, 0);
1473 
1474     /*
1475      * If this is a userspace QP, the buffers, MR, CQs and so on
1476      * will be cleaned up in userspace, so all we have to do is
1477      * unref the mem-free tables and free the QPN in our table.
1478      */
1479     if (!qp->ibqp.uobject) {
1480         mthca_cq_clean(dev, recv_cq, qp->qpn,
1481                    qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1482         if (send_cq != recv_cq)
1483             mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
1484 
1485         mthca_free_memfree(dev, qp);
1486         mthca_free_wqe_buf(dev, qp);
1487     }
1488 
1489     mthca_unmap_memfree(dev, qp);
1490 
1491     if (is_sqp(dev, qp)) {
1492         atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
1493         dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
1494                   qp->sqp->header_buf, qp->sqp->header_dma);
1495     } else
1496         mthca_free(&dev->qp_table.alloc, qp->qpn);
1497 }
1498 
1499 /* Create UD header for an MLX send and build a data segment for it */
1500 static int build_mlx_header(struct mthca_dev *dev, struct mthca_qp *qp, int ind,
1501                 const struct ib_ud_wr *wr,
1502                 struct mthca_mlx_seg *mlx,
1503                 struct mthca_data_seg *data)
1504 {
1505     struct mthca_sqp *sqp = qp->sqp;
1506     int header_size;
1507     int err;
1508     u16 pkey;
1509 
1510     ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
1511               mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
1512               &sqp->ud_header);
1513 
1514     err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
1515     if (err)
1516         return err;
1517     mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
1518     mlx->flags |= cpu_to_be32((!qp->ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
1519                   (sqp->ud_header.lrh.destination_lid ==
1520                    IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
1521                   (sqp->ud_header.lrh.service_level << 8));
1522     mlx->rlid = sqp->ud_header.lrh.destination_lid;
1523     mlx->vcrc = 0;
1524 
1525     switch (wr->wr.opcode) {
1526     case IB_WR_SEND:
1527         sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1528         sqp->ud_header.immediate_present = 0;
1529         break;
1530     case IB_WR_SEND_WITH_IMM:
1531         sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1532         sqp->ud_header.immediate_present = 1;
1533         sqp->ud_header.immediate_data = wr->wr.ex.imm_data;
1534         break;
1535     default:
1536         return -EINVAL;
1537     }
1538 
1539     sqp->ud_header.lrh.virtual_lane    = !qp->ibqp.qp_num ? 15 : 0;
1540     if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
1541         sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1542     sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
1543     if (!qp->ibqp.qp_num)
1544         ib_get_cached_pkey(&dev->ib_dev, qp->port, sqp->pkey_index,
1545                    &pkey);
1546     else
1547         ib_get_cached_pkey(&dev->ib_dev, qp->port, wr->pkey_index,
1548                    &pkey);
1549     sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1550     sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
1551     sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1552     sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
1553                            sqp->qkey : wr->remote_qkey);
1554     sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
1555 
1556     header_size = ib_ud_header_pack(&sqp->ud_header,
1557                     sqp->header_buf +
1558                     ind * MTHCA_UD_HEADER_SIZE);
1559 
1560     data->byte_count = cpu_to_be32(header_size);
1561     data->lkey       = cpu_to_be32(to_mpd(qp->ibqp.pd)->ntmr.ibmr.lkey);
1562     data->addr       = cpu_to_be64(sqp->header_dma +
1563                        ind * MTHCA_UD_HEADER_SIZE);
1564 
1565     return 0;
1566 }
1567 
1568 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
1569                     struct ib_cq *ib_cq)
1570 {
1571     unsigned cur;
1572     struct mthca_cq *cq;
1573 
1574     cur = wq->head - wq->tail;
1575     if (likely(cur + nreq < wq->max))
1576         return 0;
1577 
1578     cq = to_mcq(ib_cq);
1579     spin_lock(&cq->lock);
1580     cur = wq->head - wq->tail;
1581     spin_unlock(&cq->lock);
1582 
1583     return cur + nreq >= wq->max;
1584 }
1585 
1586 static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
1587                       u64 remote_addr, u32 rkey)
1588 {
1589     rseg->raddr    = cpu_to_be64(remote_addr);
1590     rseg->rkey     = cpu_to_be32(rkey);
1591     rseg->reserved = 0;
1592 }
1593 
1594 static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
1595                        const struct ib_atomic_wr *wr)
1596 {
1597     if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1598         aseg->swap_add = cpu_to_be64(wr->swap);
1599         aseg->compare  = cpu_to_be64(wr->compare_add);
1600     } else {
1601         aseg->swap_add = cpu_to_be64(wr->compare_add);
1602         aseg->compare  = 0;
1603     }
1604 
1605 }
1606 
1607 static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
1608                  const struct ib_ud_wr *wr)
1609 {
1610     useg->lkey    = cpu_to_be32(to_mah(wr->ah)->key);
1611     useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma);
1612     useg->dqpn    = cpu_to_be32(wr->remote_qpn);
1613     useg->qkey    = cpu_to_be32(wr->remote_qkey);
1614 
1615 }
1616 
1617 static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
1618                  const struct ib_ud_wr *wr)
1619 {
1620     memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
1621     useg->dqpn = cpu_to_be32(wr->remote_qpn);
1622     useg->qkey = cpu_to_be32(wr->remote_qkey);
1623 }
1624 
1625 int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1626               const struct ib_send_wr **bad_wr)
1627 {
1628     struct mthca_dev *dev = to_mdev(ibqp->device);
1629     struct mthca_qp *qp = to_mqp(ibqp);
1630     void *wqe;
1631     void *prev_wqe;
1632     unsigned long flags;
1633     int err = 0;
1634     int nreq;
1635     int i;
1636     int size;
1637     /*
1638      * f0 and size0 are only used if nreq != 0, and they will
1639      * always be initialized the first time through the main loop
1640      * before nreq is incremented.  So nreq cannot become non-zero
1641      * without initializing f0 and size0, and they are in fact
1642      * never used uninitialized.
1643      */
1644     int size0;
1645     u32 f0;
1646     int ind;
1647     u8 op0 = 0;
1648 
1649     spin_lock_irqsave(&qp->sq.lock, flags);
1650 
1651     /* XXX check that state is OK to post send */
1652 
1653     ind = qp->sq.next_ind;
1654 
1655     for (nreq = 0; wr; ++nreq, wr = wr->next) {
1656         if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1657             mthca_err(dev, "SQ %06x full (%u head, %u tail,"
1658                     " %d max, %d nreq)\n", qp->qpn,
1659                     qp->sq.head, qp->sq.tail,
1660                     qp->sq.max, nreq);
1661             err = -ENOMEM;
1662             *bad_wr = wr;
1663             goto out;
1664         }
1665 
1666         wqe = get_send_wqe(qp, ind);
1667         prev_wqe = qp->sq.last;
1668         qp->sq.last = wqe;
1669 
1670         ((struct mthca_next_seg *) wqe)->nda_op = 0;
1671         ((struct mthca_next_seg *) wqe)->ee_nds = 0;
1672         ((struct mthca_next_seg *) wqe)->flags =
1673             ((wr->send_flags & IB_SEND_SIGNALED) ?
1674              cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
1675             ((wr->send_flags & IB_SEND_SOLICITED) ?
1676              cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0)   |
1677             cpu_to_be32(1);
1678         if (wr->opcode == IB_WR_SEND_WITH_IMM ||
1679             wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1680             ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
1681 
1682         wqe += sizeof (struct mthca_next_seg);
1683         size = sizeof (struct mthca_next_seg) / 16;
1684 
1685         switch (qp->transport) {
1686         case RC:
1687             switch (wr->opcode) {
1688             case IB_WR_ATOMIC_CMP_AND_SWP:
1689             case IB_WR_ATOMIC_FETCH_AND_ADD:
1690                 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
1691                           atomic_wr(wr)->rkey);
1692                 wqe += sizeof (struct mthca_raddr_seg);
1693 
1694                 set_atomic_seg(wqe, atomic_wr(wr));
1695                 wqe += sizeof (struct mthca_atomic_seg);
1696                 size += (sizeof (struct mthca_raddr_seg) +
1697                      sizeof (struct mthca_atomic_seg)) / 16;
1698                 break;
1699 
1700             case IB_WR_RDMA_WRITE:
1701             case IB_WR_RDMA_WRITE_WITH_IMM:
1702             case IB_WR_RDMA_READ:
1703                 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
1704                           rdma_wr(wr)->rkey);
1705                 wqe  += sizeof (struct mthca_raddr_seg);
1706                 size += sizeof (struct mthca_raddr_seg) / 16;
1707                 break;
1708 
1709             default:
1710                 /* No extra segments required for sends */
1711                 break;
1712             }
1713 
1714             break;
1715 
1716         case UC:
1717             switch (wr->opcode) {
1718             case IB_WR_RDMA_WRITE:
1719             case IB_WR_RDMA_WRITE_WITH_IMM:
1720                 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
1721                           rdma_wr(wr)->rkey);
1722                 wqe  += sizeof (struct mthca_raddr_seg);
1723                 size += sizeof (struct mthca_raddr_seg) / 16;
1724                 break;
1725 
1726             default:
1727                 /* No extra segments required for sends */
1728                 break;
1729             }
1730 
1731             break;
1732 
1733         case UD:
1734             set_tavor_ud_seg(wqe, ud_wr(wr));
1735             wqe  += sizeof (struct mthca_tavor_ud_seg);
1736             size += sizeof (struct mthca_tavor_ud_seg) / 16;
1737             break;
1738 
1739         case MLX:
1740             err = build_mlx_header(
1741                 dev, qp, ind, ud_wr(wr),
1742                 wqe - sizeof(struct mthca_next_seg), wqe);
1743             if (err) {
1744                 *bad_wr = wr;
1745                 goto out;
1746             }
1747             wqe += sizeof (struct mthca_data_seg);
1748             size += sizeof (struct mthca_data_seg) / 16;
1749             break;
1750         }
1751 
1752         if (wr->num_sge > qp->sq.max_gs) {
1753             mthca_err(dev, "too many gathers\n");
1754             err = -EINVAL;
1755             *bad_wr = wr;
1756             goto out;
1757         }
1758 
1759         for (i = 0; i < wr->num_sge; ++i) {
1760             mthca_set_data_seg(wqe, wr->sg_list + i);
1761             wqe  += sizeof (struct mthca_data_seg);
1762             size += sizeof (struct mthca_data_seg) / 16;
1763         }
1764 
1765         /* Add one more inline data segment for ICRC */
1766         if (qp->transport == MLX) {
1767             ((struct mthca_data_seg *) wqe)->byte_count =
1768                 cpu_to_be32((1 << 31) | 4);
1769             ((u32 *) wqe)[1] = 0;
1770             wqe += sizeof (struct mthca_data_seg);
1771             size += sizeof (struct mthca_data_seg) / 16;
1772         }
1773 
1774         qp->wrid[ind + qp->rq.max] = wr->wr_id;
1775 
1776         if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
1777             mthca_err(dev, "opcode invalid\n");
1778             err = -EINVAL;
1779             *bad_wr = wr;
1780             goto out;
1781         }
1782 
1783         ((struct mthca_next_seg *) prev_wqe)->nda_op =
1784             cpu_to_be32(((ind << qp->sq.wqe_shift) +
1785                      qp->send_wqe_offset) |
1786                     mthca_opcode[wr->opcode]);
1787         wmb();
1788         ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1789             cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size |
1790                     ((wr->send_flags & IB_SEND_FENCE) ?
1791                     MTHCA_NEXT_FENCE : 0));
1792 
1793         if (!nreq) {
1794             size0 = size;
1795             op0   = mthca_opcode[wr->opcode];
1796             f0    = wr->send_flags & IB_SEND_FENCE ?
1797                 MTHCA_SEND_DOORBELL_FENCE : 0;
1798         }
1799 
1800         ++ind;
1801         if (unlikely(ind >= qp->sq.max))
1802             ind -= qp->sq.max;
1803     }
1804 
1805 out:
1806     if (likely(nreq)) {
1807         wmb();
1808 
1809         mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
1810                    qp->send_wqe_offset) | f0 | op0,
1811                   (qp->qpn << 8) | size0,
1812                   dev->kar + MTHCA_SEND_DOORBELL,
1813                   MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1814     }
1815 
1816     qp->sq.next_ind = ind;
1817     qp->sq.head    += nreq;
1818 
1819     spin_unlock_irqrestore(&qp->sq.lock, flags);
1820     return err;
1821 }
1822 
1823 int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1824                  const struct ib_recv_wr **bad_wr)
1825 {
1826     struct mthca_dev *dev = to_mdev(ibqp->device);
1827     struct mthca_qp *qp = to_mqp(ibqp);
1828     unsigned long flags;
1829     int err = 0;
1830     int nreq;
1831     int i;
1832     int size;
1833     /*
1834      * size0 is only used if nreq != 0, and it will always be
1835      * initialized the first time through the main loop before
1836      * nreq is incremented.  So nreq cannot become non-zero
1837      * without initializing size0, and it is in fact never used
1838      * uninitialized.
1839      */
1840     int size0;
1841     int ind;
1842     void *wqe;
1843     void *prev_wqe;
1844 
1845     spin_lock_irqsave(&qp->rq.lock, flags);
1846 
1847     /* XXX check that state is OK to post receive */
1848 
1849     ind = qp->rq.next_ind;
1850 
1851     for (nreq = 0; wr; wr = wr->next) {
1852         if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1853             mthca_err(dev, "RQ %06x full (%u head, %u tail,"
1854                     " %d max, %d nreq)\n", qp->qpn,
1855                     qp->rq.head, qp->rq.tail,
1856                     qp->rq.max, nreq);
1857             err = -ENOMEM;
1858             *bad_wr = wr;
1859             goto out;
1860         }
1861 
1862         wqe = get_recv_wqe(qp, ind);
1863         prev_wqe = qp->rq.last;
1864         qp->rq.last = wqe;
1865 
1866         ((struct mthca_next_seg *) wqe)->ee_nds =
1867             cpu_to_be32(MTHCA_NEXT_DBD);
1868         ((struct mthca_next_seg *) wqe)->flags = 0;
1869 
1870         wqe += sizeof (struct mthca_next_seg);
1871         size = sizeof (struct mthca_next_seg) / 16;
1872 
1873         if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1874             err = -EINVAL;
1875             *bad_wr = wr;
1876             goto out;
1877         }
1878 
1879         for (i = 0; i < wr->num_sge; ++i) {
1880             mthca_set_data_seg(wqe, wr->sg_list + i);
1881             wqe  += sizeof (struct mthca_data_seg);
1882             size += sizeof (struct mthca_data_seg) / 16;
1883         }
1884 
1885         qp->wrid[ind] = wr->wr_id;
1886 
1887         ((struct mthca_next_seg *) prev_wqe)->ee_nds =
1888             cpu_to_be32(MTHCA_NEXT_DBD | size);
1889 
1890         if (!nreq)
1891             size0 = size;
1892 
1893         ++ind;
1894         if (unlikely(ind >= qp->rq.max))
1895             ind -= qp->rq.max;
1896 
1897         ++nreq;
1898         if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
1899             nreq = 0;
1900 
1901             wmb();
1902 
1903             mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
1904                       qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
1905                       MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1906 
1907             qp->rq.next_ind = ind;
1908             qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
1909         }
1910     }
1911 
1912 out:
1913     if (likely(nreq)) {
1914         wmb();
1915 
1916         mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
1917                   qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
1918                   MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1919     }
1920 
1921     qp->rq.next_ind = ind;
1922     qp->rq.head    += nreq;
1923 
1924     spin_unlock_irqrestore(&qp->rq.lock, flags);
1925     return err;
1926 }
1927 
1928 int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1929               const struct ib_send_wr **bad_wr)
1930 {
1931     struct mthca_dev *dev = to_mdev(ibqp->device);
1932     struct mthca_qp *qp = to_mqp(ibqp);
1933     u32 dbhi;
1934     void *wqe;
1935     void *prev_wqe;
1936     unsigned long flags;
1937     int err = 0;
1938     int nreq;
1939     int i;
1940     int size;
1941     /*
1942      * f0 and size0 are only used if nreq != 0, and they will
1943      * always be initialized the first time through the main loop
1944      * before nreq is incremented.  So nreq cannot become non-zero
1945      * without initializing f0 and size0, and they are in fact
1946      * never used uninitialized.
1947      */
1948     int size0;
1949     u32 f0;
1950     int ind;
1951     u8 op0 = 0;
1952 
1953     spin_lock_irqsave(&qp->sq.lock, flags);
1954 
1955     /* XXX check that state is OK to post send */
1956 
1957     ind = qp->sq.head & (qp->sq.max - 1);
1958 
1959     for (nreq = 0; wr; ++nreq, wr = wr->next) {
1960         if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
1961             nreq = 0;
1962 
1963             dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
1964                 ((qp->sq.head & 0xffff) << 8) | f0 | op0;
1965 
1966             qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
1967 
1968             /*
1969              * Make sure that descriptors are written before
1970              * doorbell record.
1971              */
1972             wmb();
1973             *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
1974 
1975             /*
1976              * Make sure doorbell record is written before we
1977              * write MMIO send doorbell.
1978              */
1979             wmb();
1980 
1981             mthca_write64(dbhi, (qp->qpn << 8) | size0,
1982                       dev->kar + MTHCA_SEND_DOORBELL,
1983                       MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
1984         }
1985 
1986         if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1987             mthca_err(dev, "SQ %06x full (%u head, %u tail,"
1988                     " %d max, %d nreq)\n", qp->qpn,
1989                     qp->sq.head, qp->sq.tail,
1990                     qp->sq.max, nreq);
1991             err = -ENOMEM;
1992             *bad_wr = wr;
1993             goto out;
1994         }
1995 
1996         wqe = get_send_wqe(qp, ind);
1997         prev_wqe = qp->sq.last;
1998         qp->sq.last = wqe;
1999 
2000         ((struct mthca_next_seg *) wqe)->flags =
2001             ((wr->send_flags & IB_SEND_SIGNALED) ?
2002              cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
2003             ((wr->send_flags & IB_SEND_SOLICITED) ?
2004              cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0)   |
2005             ((wr->send_flags & IB_SEND_IP_CSUM) ?
2006              cpu_to_be32(MTHCA_NEXT_IP_CSUM | MTHCA_NEXT_TCP_UDP_CSUM) : 0) |
2007             cpu_to_be32(1);
2008         if (wr->opcode == IB_WR_SEND_WITH_IMM ||
2009             wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
2010             ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
2011 
2012         wqe += sizeof (struct mthca_next_seg);
2013         size = sizeof (struct mthca_next_seg) / 16;
2014 
2015         switch (qp->transport) {
2016         case RC:
2017             switch (wr->opcode) {
2018             case IB_WR_ATOMIC_CMP_AND_SWP:
2019             case IB_WR_ATOMIC_FETCH_AND_ADD:
2020                 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
2021                           atomic_wr(wr)->rkey);
2022                 wqe += sizeof (struct mthca_raddr_seg);
2023 
2024                 set_atomic_seg(wqe, atomic_wr(wr));
2025                 wqe  += sizeof (struct mthca_atomic_seg);
2026                 size += (sizeof (struct mthca_raddr_seg) +
2027                      sizeof (struct mthca_atomic_seg)) / 16;
2028                 break;
2029 
2030             case IB_WR_RDMA_READ:
2031             case IB_WR_RDMA_WRITE:
2032             case IB_WR_RDMA_WRITE_WITH_IMM:
2033                 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
2034                           rdma_wr(wr)->rkey);
2035                 wqe  += sizeof (struct mthca_raddr_seg);
2036                 size += sizeof (struct mthca_raddr_seg) / 16;
2037                 break;
2038 
2039             default:
2040                 /* No extra segments required for sends */
2041                 break;
2042             }
2043 
2044             break;
2045 
2046         case UC:
2047             switch (wr->opcode) {
2048             case IB_WR_RDMA_WRITE:
2049             case IB_WR_RDMA_WRITE_WITH_IMM:
2050                 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
2051                           rdma_wr(wr)->rkey);
2052                 wqe  += sizeof (struct mthca_raddr_seg);
2053                 size += sizeof (struct mthca_raddr_seg) / 16;
2054                 break;
2055 
2056             default:
2057                 /* No extra segments required for sends */
2058                 break;
2059             }
2060 
2061             break;
2062 
2063         case UD:
2064             set_arbel_ud_seg(wqe, ud_wr(wr));
2065             wqe  += sizeof (struct mthca_arbel_ud_seg);
2066             size += sizeof (struct mthca_arbel_ud_seg) / 16;
2067             break;
2068 
2069         case MLX:
2070             err = build_mlx_header(
2071                 dev, qp, ind, ud_wr(wr),
2072                 wqe - sizeof(struct mthca_next_seg), wqe);
2073             if (err) {
2074                 *bad_wr = wr;
2075                 goto out;
2076             }
2077             wqe += sizeof (struct mthca_data_seg);
2078             size += sizeof (struct mthca_data_seg) / 16;
2079             break;
2080         }
2081 
2082         if (wr->num_sge > qp->sq.max_gs) {
2083             mthca_err(dev, "too many gathers\n");
2084             err = -EINVAL;
2085             *bad_wr = wr;
2086             goto out;
2087         }
2088 
2089         for (i = 0; i < wr->num_sge; ++i) {
2090             mthca_set_data_seg(wqe, wr->sg_list + i);
2091             wqe  += sizeof (struct mthca_data_seg);
2092             size += sizeof (struct mthca_data_seg) / 16;
2093         }
2094 
2095         /* Add one more inline data segment for ICRC */
2096         if (qp->transport == MLX) {
2097             ((struct mthca_data_seg *) wqe)->byte_count =
2098                 cpu_to_be32((1 << 31) | 4);
2099             ((u32 *) wqe)[1] = 0;
2100             wqe += sizeof (struct mthca_data_seg);
2101             size += sizeof (struct mthca_data_seg) / 16;
2102         }
2103 
2104         qp->wrid[ind + qp->rq.max] = wr->wr_id;
2105 
2106         if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) {
2107             mthca_err(dev, "opcode invalid\n");
2108             err = -EINVAL;
2109             *bad_wr = wr;
2110             goto out;
2111         }
2112 
2113         ((struct mthca_next_seg *) prev_wqe)->nda_op =
2114             cpu_to_be32(((ind << qp->sq.wqe_shift) +
2115                      qp->send_wqe_offset) |
2116                     mthca_opcode[wr->opcode]);
2117         wmb();
2118         ((struct mthca_next_seg *) prev_wqe)->ee_nds =
2119             cpu_to_be32(MTHCA_NEXT_DBD | size |
2120                     ((wr->send_flags & IB_SEND_FENCE) ?
2121                      MTHCA_NEXT_FENCE : 0));
2122 
2123         if (!nreq) {
2124             size0 = size;
2125             op0   = mthca_opcode[wr->opcode];
2126             f0    = wr->send_flags & IB_SEND_FENCE ?
2127                 MTHCA_SEND_DOORBELL_FENCE : 0;
2128         }
2129 
2130         ++ind;
2131         if (unlikely(ind >= qp->sq.max))
2132             ind -= qp->sq.max;
2133     }
2134 
2135 out:
2136     if (likely(nreq)) {
2137         dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
2138 
2139         qp->sq.head += nreq;
2140 
2141         /*
2142          * Make sure that descriptors are written before
2143          * doorbell record.
2144          */
2145         wmb();
2146         *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
2147 
2148         /*
2149          * Make sure doorbell record is written before we
2150          * write MMIO send doorbell.
2151          */
2152         wmb();
2153 
2154         mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
2155                   MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
2156     }
2157 
2158     spin_unlock_irqrestore(&qp->sq.lock, flags);
2159     return err;
2160 }
2161 
2162 int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
2163                  const struct ib_recv_wr **bad_wr)
2164 {
2165     struct mthca_dev *dev = to_mdev(ibqp->device);
2166     struct mthca_qp *qp = to_mqp(ibqp);
2167     unsigned long flags;
2168     int err = 0;
2169     int nreq;
2170     int ind;
2171     int i;
2172     void *wqe;
2173 
2174     spin_lock_irqsave(&qp->rq.lock, flags);
2175 
2176     /* XXX check that state is OK to post receive */
2177 
2178     ind = qp->rq.head & (qp->rq.max - 1);
2179 
2180     for (nreq = 0; wr; ++nreq, wr = wr->next) {
2181         if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2182             mthca_err(dev, "RQ %06x full (%u head, %u tail,"
2183                     " %d max, %d nreq)\n", qp->qpn,
2184                     qp->rq.head, qp->rq.tail,
2185                     qp->rq.max, nreq);
2186             err = -ENOMEM;
2187             *bad_wr = wr;
2188             goto out;
2189         }
2190 
2191         wqe = get_recv_wqe(qp, ind);
2192 
2193         ((struct mthca_next_seg *) wqe)->flags = 0;
2194 
2195         wqe += sizeof (struct mthca_next_seg);
2196 
2197         if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2198             err = -EINVAL;
2199             *bad_wr = wr;
2200             goto out;
2201         }
2202 
2203         for (i = 0; i < wr->num_sge; ++i) {
2204             mthca_set_data_seg(wqe, wr->sg_list + i);
2205             wqe += sizeof (struct mthca_data_seg);
2206         }
2207 
2208         if (i < qp->rq.max_gs)
2209             mthca_set_data_seg_inval(wqe);
2210 
2211         qp->wrid[ind] = wr->wr_id;
2212 
2213         ++ind;
2214         if (unlikely(ind >= qp->rq.max))
2215             ind -= qp->rq.max;
2216     }
2217 out:
2218     if (likely(nreq)) {
2219         qp->rq.head += nreq;
2220 
2221         /*
2222          * Make sure that descriptors are written before
2223          * doorbell record.
2224          */
2225         wmb();
2226         *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
2227     }
2228 
2229     spin_unlock_irqrestore(&qp->rq.lock, flags);
2230     return err;
2231 }
2232 
2233 void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
2234             int index, int *dbd, __be32 *new_wqe)
2235 {
2236     struct mthca_next_seg *next;
2237 
2238     /*
2239      * For SRQs, all receive WQEs generate a CQE, so we're always
2240      * at the end of the doorbell chain.
2241      */
2242     if (qp->ibqp.srq && !is_send) {
2243         *new_wqe = 0;
2244         return;
2245     }
2246 
2247     if (is_send)
2248         next = get_send_wqe(qp, index);
2249     else
2250         next = get_recv_wqe(qp, index);
2251 
2252     *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD));
2253     if (next->ee_nds & cpu_to_be32(0x3f))
2254         *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) |
2255             (next->ee_nds & cpu_to_be32(0x3f));
2256     else
2257         *new_wqe = 0;
2258 }
2259 
2260 int mthca_init_qp_table(struct mthca_dev *dev)
2261 {
2262     int err;
2263     int i;
2264 
2265     spin_lock_init(&dev->qp_table.lock);
2266 
2267     /*
2268      * We reserve 2 extra QPs per port for the special QPs.  The
2269      * special QP for port 1 has to be even, so round up.
2270      */
2271     dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL;
2272     err = mthca_alloc_init(&dev->qp_table.alloc,
2273                    dev->limits.num_qps,
2274                    (1 << 24) - 1,
2275                    dev->qp_table.sqp_start +
2276                    MTHCA_MAX_PORTS * 2);
2277     if (err)
2278         return err;
2279 
2280     err = mthca_array_init(&dev->qp_table.qp,
2281                    dev->limits.num_qps);
2282     if (err) {
2283         mthca_alloc_cleanup(&dev->qp_table.alloc);
2284         return err;
2285     }
2286 
2287     for (i = 0; i < 2; ++i) {
2288         err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI,
2289                     dev->qp_table.sqp_start + i * 2);
2290         if (err) {
2291             mthca_warn(dev, "CONF_SPECIAL_QP returned "
2292                    "%d, aborting.\n", err);
2293             goto err_out;
2294         }
2295     }
2296     return 0;
2297 
2298  err_out:
2299     for (i = 0; i < 2; ++i)
2300         mthca_CONF_SPECIAL_QP(dev, i, 0);
2301 
2302     mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
2303     mthca_alloc_cleanup(&dev->qp_table.alloc);
2304 
2305     return err;
2306 }
2307 
2308 void mthca_cleanup_qp_table(struct mthca_dev *dev)
2309 {
2310     int i;
2311 
2312     for (i = 0; i < 2; ++i)
2313         mthca_CONF_SPECIAL_QP(dev, i, 0);
2314 
2315     mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
2316     mthca_alloc_cleanup(&dev->qp_table.alloc);
2317 }