Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
0003  *
0004  * This software is available to you under a choice of one of two
0005  * licenses.  You may choose to be licensed under the terms of the GNU
0006  * General Public License (GPL) Version 2, available from the file
0007  * COPYING in the main directory of this source tree, or the
0008  * OpenIB.org BSD license below:
0009  *
0010  *     Redistribution and use in source and binary forms, with or
0011  *     without modification, are permitted provided that the following
0012  *     conditions are met:
0013  *
0014  *      - Redistributions of source code must retain the above
0015  *        copyright notice, this list of conditions and the following
0016  *        disclaimer.
0017  *
0018  *      - Redistributions in binary form must reproduce the above
0019  *        copyright notice, this list of conditions and the following
0020  *        disclaimer in the documentation and/or other materials
0021  *        provided with the distribution.
0022  *
0023  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0024  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0025  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0026  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0027  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0028  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0029  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0030  * SOFTWARE.
0031  */
0032 
0033 #include <linux/etherdevice.h>
0034 #include <rdma/ib_umem.h>
0035 #include <rdma/ib_cache.h>
0036 #include <rdma/ib_user_verbs.h>
0037 #include <rdma/rdma_counter.h>
0038 #include <linux/mlx5/fs.h>
0039 #include "mlx5_ib.h"
0040 #include "ib_rep.h"
0041 #include "counters.h"
0042 #include "cmd.h"
0043 #include "umr.h"
0044 #include "qp.h"
0045 #include "wr.h"
0046 
0047 enum {
0048     MLX5_IB_ACK_REQ_FREQ    = 8,
0049 };
0050 
0051 enum {
0052     MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83,
0053     MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
0054     MLX5_IB_LINK_TYPE_IB        = 0,
0055     MLX5_IB_LINK_TYPE_ETH       = 1
0056 };
0057 
0058 enum raw_qp_set_mask_map {
0059     MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID     = 1UL << 0,
0060     MLX5_RAW_QP_RATE_LIMIT          = 1UL << 1,
0061 };
0062 
0063 struct mlx5_modify_raw_qp_param {
0064     u16 operation;
0065 
0066     u32 set_mask; /* raw_qp_set_mask_map */
0067 
0068     struct mlx5_rate_limit rl;
0069 
0070     u8 rq_q_ctr_id;
0071     u32 port;
0072 };
0073 
0074 static void get_cqs(enum ib_qp_type qp_type,
0075             struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
0076             struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
0077 
0078 static int is_qp0(enum ib_qp_type qp_type)
0079 {
0080     return qp_type == IB_QPT_SMI;
0081 }
0082 
0083 static int is_sqp(enum ib_qp_type qp_type)
0084 {
0085     return is_qp0(qp_type) || is_qp1(qp_type);
0086 }
0087 
0088 /**
0089  * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ
0090  * to kernel buffer
0091  *
0092  * @umem: User space memory where the WQ is
0093  * @buffer: buffer to copy to
0094  * @buflen: buffer length
0095  * @wqe_index: index of WQE to copy from
0096  * @wq_offset: offset to start of WQ
0097  * @wq_wqe_cnt: number of WQEs in WQ
0098  * @wq_wqe_shift: log2 of WQE size
0099  * @bcnt: number of bytes to copy
0100  * @bytes_copied: number of bytes to copy (return value)
0101  *
0102  * Copies from start of WQE bcnt or less bytes.
0103  * Does not gurantee to copy the entire WQE.
0104  *
0105  * Return: zero on success, or an error code.
0106  */
0107 static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, void *buffer,
0108                     size_t buflen, int wqe_index,
0109                     int wq_offset, int wq_wqe_cnt,
0110                     int wq_wqe_shift, int bcnt,
0111                     size_t *bytes_copied)
0112 {
0113     size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift);
0114     size_t wq_end = wq_offset + (wq_wqe_cnt << wq_wqe_shift);
0115     size_t copy_length;
0116     int ret;
0117 
0118     /* don't copy more than requested, more than buffer length or
0119      * beyond WQ end
0120      */
0121     copy_length = min_t(u32, buflen, wq_end - offset);
0122     copy_length = min_t(u32, copy_length, bcnt);
0123 
0124     ret = ib_umem_copy_from(buffer, umem, offset, copy_length);
0125     if (ret)
0126         return ret;
0127 
0128     if (!ret && bytes_copied)
0129         *bytes_copied = copy_length;
0130 
0131     return 0;
0132 }
0133 
0134 static int mlx5_ib_read_kernel_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
0135                       void *buffer, size_t buflen, size_t *bc)
0136 {
0137     struct mlx5_wqe_ctrl_seg *ctrl;
0138     size_t bytes_copied = 0;
0139     size_t wqe_length;
0140     void *p;
0141     int ds;
0142 
0143     wqe_index = wqe_index & qp->sq.fbc.sz_m1;
0144 
0145     /* read the control segment first */
0146     p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
0147     ctrl = p;
0148     ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
0149     wqe_length = ds * MLX5_WQE_DS_UNITS;
0150 
0151     /* read rest of WQE if it spreads over more than one stride */
0152     while (bytes_copied < wqe_length) {
0153         size_t copy_length =
0154             min_t(size_t, buflen - bytes_copied, MLX5_SEND_WQE_BB);
0155 
0156         if (!copy_length)
0157             break;
0158 
0159         memcpy(buffer + bytes_copied, p, copy_length);
0160         bytes_copied += copy_length;
0161 
0162         wqe_index = (wqe_index + 1) & qp->sq.fbc.sz_m1;
0163         p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
0164     }
0165     *bc = bytes_copied;
0166     return 0;
0167 }
0168 
0169 static int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
0170                     void *buffer, size_t buflen, size_t *bc)
0171 {
0172     struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
0173     struct ib_umem *umem = base->ubuffer.umem;
0174     struct mlx5_ib_wq *wq = &qp->sq;
0175     struct mlx5_wqe_ctrl_seg *ctrl;
0176     size_t bytes_copied;
0177     size_t bytes_copied2;
0178     size_t wqe_length;
0179     int ret;
0180     int ds;
0181 
0182     /* at first read as much as possible */
0183     ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
0184                        wq->offset, wq->wqe_cnt,
0185                        wq->wqe_shift, buflen,
0186                        &bytes_copied);
0187     if (ret)
0188         return ret;
0189 
0190     /* we need at least control segment size to proceed */
0191     if (bytes_copied < sizeof(*ctrl))
0192         return -EINVAL;
0193 
0194     ctrl = buffer;
0195     ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
0196     wqe_length = ds * MLX5_WQE_DS_UNITS;
0197 
0198     /* if we copied enough then we are done */
0199     if (bytes_copied >= wqe_length) {
0200         *bc = bytes_copied;
0201         return 0;
0202     }
0203 
0204     /* otherwise this a wrapped around wqe
0205      * so read the remaining bytes starting
0206      * from  wqe_index 0
0207      */
0208     ret = mlx5_ib_read_user_wqe_common(umem, buffer + bytes_copied,
0209                        buflen - bytes_copied, 0, wq->offset,
0210                        wq->wqe_cnt, wq->wqe_shift,
0211                        wqe_length - bytes_copied,
0212                        &bytes_copied2);
0213 
0214     if (ret)
0215         return ret;
0216     *bc = bytes_copied + bytes_copied2;
0217     return 0;
0218 }
0219 
0220 int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
0221             size_t buflen, size_t *bc)
0222 {
0223     struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
0224     struct ib_umem *umem = base->ubuffer.umem;
0225 
0226     if (buflen < sizeof(struct mlx5_wqe_ctrl_seg))
0227         return -EINVAL;
0228 
0229     if (!umem)
0230         return mlx5_ib_read_kernel_wqe_sq(qp, wqe_index, buffer,
0231                           buflen, bc);
0232 
0233     return mlx5_ib_read_user_wqe_sq(qp, wqe_index, buffer, buflen, bc);
0234 }
0235 
0236 static int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index,
0237                     void *buffer, size_t buflen, size_t *bc)
0238 {
0239     struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
0240     struct ib_umem *umem = base->ubuffer.umem;
0241     struct mlx5_ib_wq *wq = &qp->rq;
0242     size_t bytes_copied;
0243     int ret;
0244 
0245     ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
0246                        wq->offset, wq->wqe_cnt,
0247                        wq->wqe_shift, buflen,
0248                        &bytes_copied);
0249 
0250     if (ret)
0251         return ret;
0252     *bc = bytes_copied;
0253     return 0;
0254 }
0255 
0256 int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
0257             size_t buflen, size_t *bc)
0258 {
0259     struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
0260     struct ib_umem *umem = base->ubuffer.umem;
0261     struct mlx5_ib_wq *wq = &qp->rq;
0262     size_t wqe_size = 1 << wq->wqe_shift;
0263 
0264     if (buflen < wqe_size)
0265         return -EINVAL;
0266 
0267     if (!umem)
0268         return -EOPNOTSUPP;
0269 
0270     return mlx5_ib_read_user_wqe_rq(qp, wqe_index, buffer, buflen, bc);
0271 }
0272 
0273 static int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
0274                      void *buffer, size_t buflen, size_t *bc)
0275 {
0276     struct ib_umem *umem = srq->umem;
0277     size_t bytes_copied;
0278     int ret;
0279 
0280     ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index, 0,
0281                        srq->msrq.max, srq->msrq.wqe_shift,
0282                        buflen, &bytes_copied);
0283 
0284     if (ret)
0285         return ret;
0286     *bc = bytes_copied;
0287     return 0;
0288 }
0289 
0290 int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
0291              size_t buflen, size_t *bc)
0292 {
0293     struct ib_umem *umem = srq->umem;
0294     size_t wqe_size = 1 << srq->msrq.wqe_shift;
0295 
0296     if (buflen < wqe_size)
0297         return -EINVAL;
0298 
0299     if (!umem)
0300         return -EOPNOTSUPP;
0301 
0302     return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc);
0303 }
0304 
0305 static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
0306 {
0307     struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
0308     struct ib_event event;
0309 
0310     if (type == MLX5_EVENT_TYPE_PATH_MIG) {
0311         /* This event is only valid for trans_qps */
0312         to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
0313     }
0314 
0315     if (ibqp->event_handler) {
0316         event.device     = ibqp->device;
0317         event.element.qp = ibqp;
0318         switch (type) {
0319         case MLX5_EVENT_TYPE_PATH_MIG:
0320             event.event = IB_EVENT_PATH_MIG;
0321             break;
0322         case MLX5_EVENT_TYPE_COMM_EST:
0323             event.event = IB_EVENT_COMM_EST;
0324             break;
0325         case MLX5_EVENT_TYPE_SQ_DRAINED:
0326             event.event = IB_EVENT_SQ_DRAINED;
0327             break;
0328         case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
0329             event.event = IB_EVENT_QP_LAST_WQE_REACHED;
0330             break;
0331         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
0332             event.event = IB_EVENT_QP_FATAL;
0333             break;
0334         case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
0335             event.event = IB_EVENT_PATH_MIG_ERR;
0336             break;
0337         case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
0338             event.event = IB_EVENT_QP_REQ_ERR;
0339             break;
0340         case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
0341             event.event = IB_EVENT_QP_ACCESS_ERR;
0342             break;
0343         default:
0344             pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
0345             return;
0346         }
0347 
0348         ibqp->event_handler(&event, ibqp->qp_context);
0349     }
0350 }
0351 
0352 static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
0353                int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
0354 {
0355     int wqe_size;
0356     int wq_size;
0357 
0358     /* Sanity check RQ size before proceeding */
0359     if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)))
0360         return -EINVAL;
0361 
0362     if (!has_rq) {
0363         qp->rq.max_gs = 0;
0364         qp->rq.wqe_cnt = 0;
0365         qp->rq.wqe_shift = 0;
0366         cap->max_recv_wr = 0;
0367         cap->max_recv_sge = 0;
0368     } else {
0369         int wq_sig = !!(qp->flags_en & MLX5_QP_FLAG_SIGNATURE);
0370 
0371         if (ucmd) {
0372             qp->rq.wqe_cnt = ucmd->rq_wqe_count;
0373             if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift))
0374                 return -EINVAL;
0375             qp->rq.wqe_shift = ucmd->rq_wqe_shift;
0376             if ((1 << qp->rq.wqe_shift) /
0377                     sizeof(struct mlx5_wqe_data_seg) <
0378                 wq_sig)
0379                 return -EINVAL;
0380             qp->rq.max_gs =
0381                 (1 << qp->rq.wqe_shift) /
0382                     sizeof(struct mlx5_wqe_data_seg) -
0383                 wq_sig;
0384             qp->rq.max_post = qp->rq.wqe_cnt;
0385         } else {
0386             wqe_size =
0387                 wq_sig ? sizeof(struct mlx5_wqe_signature_seg) :
0388                      0;
0389             wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
0390             wqe_size = roundup_pow_of_two(wqe_size);
0391             wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
0392             wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
0393             qp->rq.wqe_cnt = wq_size / wqe_size;
0394             if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) {
0395                 mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
0396                         wqe_size,
0397                         MLX5_CAP_GEN(dev->mdev,
0398                              max_wqe_sz_rq));
0399                 return -EINVAL;
0400             }
0401             qp->rq.wqe_shift = ilog2(wqe_size);
0402             qp->rq.max_gs =
0403                 (1 << qp->rq.wqe_shift) /
0404                     sizeof(struct mlx5_wqe_data_seg) -
0405                 wq_sig;
0406             qp->rq.max_post = qp->rq.wqe_cnt;
0407         }
0408     }
0409 
0410     return 0;
0411 }
0412 
0413 static int sq_overhead(struct ib_qp_init_attr *attr)
0414 {
0415     int size = 0;
0416 
0417     switch (attr->qp_type) {
0418     case IB_QPT_XRC_INI:
0419         size += sizeof(struct mlx5_wqe_xrc_seg);
0420         fallthrough;
0421     case IB_QPT_RC:
0422         size += sizeof(struct mlx5_wqe_ctrl_seg) +
0423             max(sizeof(struct mlx5_wqe_atomic_seg) +
0424                 sizeof(struct mlx5_wqe_raddr_seg),
0425                 sizeof(struct mlx5_wqe_umr_ctrl_seg) +
0426                 sizeof(struct mlx5_mkey_seg) +
0427                 MLX5_IB_SQ_UMR_INLINE_THRESHOLD /
0428                 MLX5_IB_UMR_OCTOWORD);
0429         break;
0430 
0431     case IB_QPT_XRC_TGT:
0432         return 0;
0433 
0434     case IB_QPT_UC:
0435         size += sizeof(struct mlx5_wqe_ctrl_seg) +
0436             max(sizeof(struct mlx5_wqe_raddr_seg),
0437                 sizeof(struct mlx5_wqe_umr_ctrl_seg) +
0438                 sizeof(struct mlx5_mkey_seg));
0439         break;
0440 
0441     case IB_QPT_UD:
0442         if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
0443             size += sizeof(struct mlx5_wqe_eth_pad) +
0444                 sizeof(struct mlx5_wqe_eth_seg);
0445         fallthrough;
0446     case IB_QPT_SMI:
0447     case MLX5_IB_QPT_HW_GSI:
0448         size += sizeof(struct mlx5_wqe_ctrl_seg) +
0449             sizeof(struct mlx5_wqe_datagram_seg);
0450         break;
0451 
0452     case MLX5_IB_QPT_REG_UMR:
0453         size += sizeof(struct mlx5_wqe_ctrl_seg) +
0454             sizeof(struct mlx5_wqe_umr_ctrl_seg) +
0455             sizeof(struct mlx5_mkey_seg);
0456         break;
0457 
0458     default:
0459         return -EINVAL;
0460     }
0461 
0462     return size;
0463 }
0464 
0465 static int calc_send_wqe(struct ib_qp_init_attr *attr)
0466 {
0467     int inl_size = 0;
0468     int size;
0469 
0470     size = sq_overhead(attr);
0471     if (size < 0)
0472         return size;
0473 
0474     if (attr->cap.max_inline_data) {
0475         inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
0476             attr->cap.max_inline_data;
0477     }
0478 
0479     size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
0480     if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN &&
0481         ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
0482         return MLX5_SIG_WQE_SIZE;
0483     else
0484         return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
0485 }
0486 
0487 static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
0488 {
0489     int max_sge;
0490 
0491     if (attr->qp_type == IB_QPT_RC)
0492         max_sge = (min_t(int, wqe_size, 512) -
0493                sizeof(struct mlx5_wqe_ctrl_seg) -
0494                sizeof(struct mlx5_wqe_raddr_seg)) /
0495             sizeof(struct mlx5_wqe_data_seg);
0496     else if (attr->qp_type == IB_QPT_XRC_INI)
0497         max_sge = (min_t(int, wqe_size, 512) -
0498                sizeof(struct mlx5_wqe_ctrl_seg) -
0499                sizeof(struct mlx5_wqe_xrc_seg) -
0500                sizeof(struct mlx5_wqe_raddr_seg)) /
0501             sizeof(struct mlx5_wqe_data_seg);
0502     else
0503         max_sge = (wqe_size - sq_overhead(attr)) /
0504             sizeof(struct mlx5_wqe_data_seg);
0505 
0506     return min_t(int, max_sge, wqe_size - sq_overhead(attr) /
0507              sizeof(struct mlx5_wqe_data_seg));
0508 }
0509 
0510 static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
0511             struct mlx5_ib_qp *qp)
0512 {
0513     int wqe_size;
0514     int wq_size;
0515 
0516     if (!attr->cap.max_send_wr)
0517         return 0;
0518 
0519     wqe_size = calc_send_wqe(attr);
0520     mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
0521     if (wqe_size < 0)
0522         return wqe_size;
0523 
0524     if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
0525         mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
0526                 wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
0527         return -EINVAL;
0528     }
0529 
0530     qp->max_inline_data = wqe_size - sq_overhead(attr) -
0531                   sizeof(struct mlx5_wqe_inline_seg);
0532     attr->cap.max_inline_data = qp->max_inline_data;
0533 
0534     wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
0535     qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
0536     if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
0537         mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n",
0538                 attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB,
0539                 qp->sq.wqe_cnt,
0540                 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
0541         return -ENOMEM;
0542     }
0543     qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
0544     qp->sq.max_gs = get_send_sge(attr, wqe_size);
0545     if (qp->sq.max_gs < attr->cap.max_send_sge)
0546         return -ENOMEM;
0547 
0548     attr->cap.max_send_sge = qp->sq.max_gs;
0549     qp->sq.max_post = wq_size / wqe_size;
0550     attr->cap.max_send_wr = qp->sq.max_post;
0551 
0552     return wq_size;
0553 }
0554 
0555 static int set_user_buf_size(struct mlx5_ib_dev *dev,
0556                 struct mlx5_ib_qp *qp,
0557                 struct mlx5_ib_create_qp *ucmd,
0558                 struct mlx5_ib_qp_base *base,
0559                 struct ib_qp_init_attr *attr)
0560 {
0561     int desc_sz = 1 << qp->sq.wqe_shift;
0562 
0563     if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
0564         mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
0565                  desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
0566         return -EINVAL;
0567     }
0568 
0569     if (ucmd->sq_wqe_count && !is_power_of_2(ucmd->sq_wqe_count)) {
0570         mlx5_ib_warn(dev, "sq_wqe_count %d is not a power of two\n",
0571                  ucmd->sq_wqe_count);
0572         return -EINVAL;
0573     }
0574 
0575     qp->sq.wqe_cnt = ucmd->sq_wqe_count;
0576 
0577     if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
0578         mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
0579                  qp->sq.wqe_cnt,
0580                  1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
0581         return -EINVAL;
0582     }
0583 
0584     if (attr->qp_type == IB_QPT_RAW_PACKET ||
0585         qp->flags & IB_QP_CREATE_SOURCE_QPN) {
0586         base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
0587         qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
0588     } else {
0589         base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
0590                      (qp->sq.wqe_cnt << 6);
0591     }
0592 
0593     return 0;
0594 }
0595 
0596 static int qp_has_rq(struct ib_qp_init_attr *attr)
0597 {
0598     if (attr->qp_type == IB_QPT_XRC_INI ||
0599         attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
0600         attr->qp_type == MLX5_IB_QPT_REG_UMR ||
0601         !attr->cap.max_recv_wr)
0602         return 0;
0603 
0604     return 1;
0605 }
0606 
0607 enum {
0608     /* this is the first blue flame register in the array of bfregs assigned
0609      * to a processes. Since we do not use it for blue flame but rather
0610      * regular 64 bit doorbells, we do not need a lock for maintaiing
0611      * "odd/even" order
0612      */
0613     NUM_NON_BLUE_FLAME_BFREGS = 1,
0614 };
0615 
0616 static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
0617 {
0618     return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
0619            bfregi->num_static_sys_pages * MLX5_NON_FP_BFREGS_PER_UAR;
0620 }
0621 
0622 static int num_med_bfreg(struct mlx5_ib_dev *dev,
0623              struct mlx5_bfreg_info *bfregi)
0624 {
0625     int n;
0626 
0627     n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs -
0628         NUM_NON_BLUE_FLAME_BFREGS;
0629 
0630     return n >= 0 ? n : 0;
0631 }
0632 
0633 static int first_med_bfreg(struct mlx5_ib_dev *dev,
0634                struct mlx5_bfreg_info *bfregi)
0635 {
0636     return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM;
0637 }
0638 
0639 static int first_hi_bfreg(struct mlx5_ib_dev *dev,
0640               struct mlx5_bfreg_info *bfregi)
0641 {
0642     int med;
0643 
0644     med = num_med_bfreg(dev, bfregi);
0645     return ++med;
0646 }
0647 
0648 static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
0649                   struct mlx5_bfreg_info *bfregi)
0650 {
0651     int i;
0652 
0653     for (i = first_hi_bfreg(dev, bfregi); i < max_bfregs(dev, bfregi); i++) {
0654         if (!bfregi->count[i]) {
0655             bfregi->count[i]++;
0656             return i;
0657         }
0658     }
0659 
0660     return -ENOMEM;
0661 }
0662 
0663 static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
0664                  struct mlx5_bfreg_info *bfregi)
0665 {
0666     int minidx = first_med_bfreg(dev, bfregi);
0667     int i;
0668 
0669     if (minidx < 0)
0670         return minidx;
0671 
0672     for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) {
0673         if (bfregi->count[i] < bfregi->count[minidx])
0674             minidx = i;
0675         if (!bfregi->count[minidx])
0676             break;
0677     }
0678 
0679     bfregi->count[minidx]++;
0680     return minidx;
0681 }
0682 
0683 static int alloc_bfreg(struct mlx5_ib_dev *dev,
0684                struct mlx5_bfreg_info *bfregi)
0685 {
0686     int bfregn = -ENOMEM;
0687 
0688     if (bfregi->lib_uar_dyn)
0689         return -EINVAL;
0690 
0691     mutex_lock(&bfregi->lock);
0692     if (bfregi->ver >= 2) {
0693         bfregn = alloc_high_class_bfreg(dev, bfregi);
0694         if (bfregn < 0)
0695             bfregn = alloc_med_class_bfreg(dev, bfregi);
0696     }
0697 
0698     if (bfregn < 0) {
0699         BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1);
0700         bfregn = 0;
0701         bfregi->count[bfregn]++;
0702     }
0703     mutex_unlock(&bfregi->lock);
0704 
0705     return bfregn;
0706 }
0707 
0708 void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn)
0709 {
0710     mutex_lock(&bfregi->lock);
0711     bfregi->count[bfregn]--;
0712     mutex_unlock(&bfregi->lock);
0713 }
0714 
0715 static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
0716 {
0717     switch (state) {
0718     case IB_QPS_RESET:  return MLX5_QP_STATE_RST;
0719     case IB_QPS_INIT:   return MLX5_QP_STATE_INIT;
0720     case IB_QPS_RTR:    return MLX5_QP_STATE_RTR;
0721     case IB_QPS_RTS:    return MLX5_QP_STATE_RTS;
0722     case IB_QPS_SQD:    return MLX5_QP_STATE_SQD;
0723     case IB_QPS_SQE:    return MLX5_QP_STATE_SQER;
0724     case IB_QPS_ERR:    return MLX5_QP_STATE_ERR;
0725     default:        return -1;
0726     }
0727 }
0728 
0729 static int to_mlx5_st(enum ib_qp_type type)
0730 {
0731     switch (type) {
0732     case IB_QPT_RC:         return MLX5_QP_ST_RC;
0733     case IB_QPT_UC:         return MLX5_QP_ST_UC;
0734     case IB_QPT_UD:         return MLX5_QP_ST_UD;
0735     case MLX5_IB_QPT_REG_UMR:   return MLX5_QP_ST_REG_UMR;
0736     case IB_QPT_XRC_INI:
0737     case IB_QPT_XRC_TGT:        return MLX5_QP_ST_XRC;
0738     case IB_QPT_SMI:        return MLX5_QP_ST_QP0;
0739     case MLX5_IB_QPT_HW_GSI:    return MLX5_QP_ST_QP1;
0740     case MLX5_IB_QPT_DCI:       return MLX5_QP_ST_DCI;
0741     case IB_QPT_RAW_PACKET:     return MLX5_QP_ST_RAW_ETHERTYPE;
0742     default:        return -EINVAL;
0743     }
0744 }
0745 
0746 static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
0747                  struct mlx5_ib_cq *recv_cq);
0748 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
0749                    struct mlx5_ib_cq *recv_cq);
0750 
0751 int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
0752             struct mlx5_bfreg_info *bfregi, u32 bfregn,
0753             bool dyn_bfreg)
0754 {
0755     unsigned int bfregs_per_sys_page;
0756     u32 index_of_sys_page;
0757     u32 offset;
0758 
0759     if (bfregi->lib_uar_dyn)
0760         return -EINVAL;
0761 
0762     bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
0763                 MLX5_NON_FP_BFREGS_PER_UAR;
0764     index_of_sys_page = bfregn / bfregs_per_sys_page;
0765 
0766     if (dyn_bfreg) {
0767         index_of_sys_page += bfregi->num_static_sys_pages;
0768 
0769         if (index_of_sys_page >= bfregi->num_sys_pages)
0770             return -EINVAL;
0771 
0772         if (bfregn > bfregi->num_dyn_bfregs ||
0773             bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) {
0774             mlx5_ib_dbg(dev, "Invalid dynamic uar index\n");
0775             return -EINVAL;
0776         }
0777     }
0778 
0779     offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR;
0780     return bfregi->sys_pages[index_of_sys_page] + offset;
0781 }
0782 
0783 static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
0784                 struct mlx5_ib_rwq *rwq, struct ib_udata *udata)
0785 {
0786     struct mlx5_ib_ucontext *context =
0787         rdma_udata_to_drv_context(
0788             udata,
0789             struct mlx5_ib_ucontext,
0790             ibucontext);
0791 
0792     if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP)
0793         atomic_dec(&dev->delay_drop.rqs_cnt);
0794 
0795     mlx5_ib_db_unmap_user(context, &rwq->db);
0796     ib_umem_release(rwq->umem);
0797 }
0798 
0799 static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
0800               struct ib_udata *udata, struct mlx5_ib_rwq *rwq,
0801               struct mlx5_ib_create_wq *ucmd)
0802 {
0803     struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
0804         udata, struct mlx5_ib_ucontext, ibucontext);
0805     unsigned long page_size = 0;
0806     u32 offset = 0;
0807     int err;
0808 
0809     if (!ucmd->buf_addr)
0810         return -EINVAL;
0811 
0812     rwq->umem = ib_umem_get(&dev->ib_dev, ucmd->buf_addr, rwq->buf_size, 0);
0813     if (IS_ERR(rwq->umem)) {
0814         mlx5_ib_dbg(dev, "umem_get failed\n");
0815         err = PTR_ERR(rwq->umem);
0816         return err;
0817     }
0818 
0819     page_size = mlx5_umem_find_best_quantized_pgoff(
0820         rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
0821         page_offset, 64, &rwq->rq_page_offset);
0822     if (!page_size) {
0823         mlx5_ib_warn(dev, "bad offset\n");
0824         err = -EINVAL;
0825         goto err_umem;
0826     }
0827 
0828     rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size);
0829     rwq->page_shift = order_base_2(page_size);
0830     rwq->log_page_size =  rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT;
0831     rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
0832 
0833     mlx5_ib_dbg(
0834         dev,
0835         "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n",
0836         (unsigned long long)ucmd->buf_addr, rwq->buf_size,
0837         ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas,
0838         offset);
0839 
0840     err = mlx5_ib_db_map_user(ucontext, ucmd->db_addr, &rwq->db);
0841     if (err) {
0842         mlx5_ib_dbg(dev, "map failed\n");
0843         goto err_umem;
0844     }
0845 
0846     return 0;
0847 
0848 err_umem:
0849     ib_umem_release(rwq->umem);
0850     return err;
0851 }
0852 
0853 static int adjust_bfregn(struct mlx5_ib_dev *dev,
0854              struct mlx5_bfreg_info *bfregi, int bfregn)
0855 {
0856     return bfregn / MLX5_NON_FP_BFREGS_PER_UAR * MLX5_BFREGS_PER_UAR +
0857                 bfregn % MLX5_NON_FP_BFREGS_PER_UAR;
0858 }
0859 
0860 static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
0861                struct mlx5_ib_qp *qp, struct ib_udata *udata,
0862                struct ib_qp_init_attr *attr, u32 **in,
0863                struct mlx5_ib_create_qp_resp *resp, int *inlen,
0864                struct mlx5_ib_qp_base *base,
0865                struct mlx5_ib_create_qp *ucmd)
0866 {
0867     struct mlx5_ib_ucontext *context;
0868     struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
0869     unsigned int page_offset_quantized = 0;
0870     unsigned long page_size = 0;
0871     int uar_index = 0;
0872     int bfregn;
0873     int ncont = 0;
0874     __be64 *pas;
0875     void *qpc;
0876     int err;
0877     u16 uid;
0878     u32 uar_flags;
0879 
0880     context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
0881                         ibucontext);
0882     uar_flags = qp->flags_en &
0883             (MLX5_QP_FLAG_UAR_PAGE_INDEX | MLX5_QP_FLAG_BFREG_INDEX);
0884     switch (uar_flags) {
0885     case MLX5_QP_FLAG_UAR_PAGE_INDEX:
0886         uar_index = ucmd->bfreg_index;
0887         bfregn = MLX5_IB_INVALID_BFREG;
0888         break;
0889     case MLX5_QP_FLAG_BFREG_INDEX:
0890         uar_index = bfregn_to_uar_index(dev, &context->bfregi,
0891                         ucmd->bfreg_index, true);
0892         if (uar_index < 0)
0893             return uar_index;
0894         bfregn = MLX5_IB_INVALID_BFREG;
0895         break;
0896     case 0:
0897         if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
0898             return -EINVAL;
0899         bfregn = alloc_bfreg(dev, &context->bfregi);
0900         if (bfregn < 0)
0901             return bfregn;
0902         break;
0903     default:
0904         return -EINVAL;
0905     }
0906 
0907     mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
0908     if (bfregn != MLX5_IB_INVALID_BFREG)
0909         uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn,
0910                         false);
0911 
0912     qp->rq.offset = 0;
0913     qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
0914     qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
0915 
0916     err = set_user_buf_size(dev, qp, ucmd, base, attr);
0917     if (err)
0918         goto err_bfreg;
0919 
0920     if (ucmd->buf_addr && ubuffer->buf_size) {
0921         ubuffer->buf_addr = ucmd->buf_addr;
0922         ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
0923                         ubuffer->buf_size, 0);
0924         if (IS_ERR(ubuffer->umem)) {
0925             err = PTR_ERR(ubuffer->umem);
0926             goto err_bfreg;
0927         }
0928         page_size = mlx5_umem_find_best_quantized_pgoff(
0929             ubuffer->umem, qpc, log_page_size,
0930             MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
0931             &page_offset_quantized);
0932         if (!page_size) {
0933             err = -EINVAL;
0934             goto err_umem;
0935         }
0936         ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size);
0937     } else {
0938         ubuffer->umem = NULL;
0939     }
0940 
0941     *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
0942          MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
0943     *in = kvzalloc(*inlen, GFP_KERNEL);
0944     if (!*in) {
0945         err = -ENOMEM;
0946         goto err_umem;
0947     }
0948 
0949     uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
0950     MLX5_SET(create_qp_in, *in, uid, uid);
0951     qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
0952     pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
0953     if (ubuffer->umem) {
0954         mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0);
0955         MLX5_SET(qpc, qpc, log_page_size,
0956              order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
0957         MLX5_SET(qpc, qpc, page_offset, page_offset_quantized);
0958     }
0959     MLX5_SET(qpc, qpc, uar_page, uar_index);
0960     if (bfregn != MLX5_IB_INVALID_BFREG)
0961         resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
0962     else
0963         resp->bfreg_index = MLX5_IB_INVALID_BFREG;
0964     qp->bfregn = bfregn;
0965 
0966     err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
0967     if (err) {
0968         mlx5_ib_dbg(dev, "map failed\n");
0969         goto err_free;
0970     }
0971 
0972     return 0;
0973 
0974 err_free:
0975     kvfree(*in);
0976 
0977 err_umem:
0978     ib_umem_release(ubuffer->umem);
0979 
0980 err_bfreg:
0981     if (bfregn != MLX5_IB_INVALID_BFREG)
0982         mlx5_ib_free_bfreg(dev, &context->bfregi, bfregn);
0983     return err;
0984 }
0985 
0986 static void destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
0987                struct mlx5_ib_qp_base *base, struct ib_udata *udata)
0988 {
0989     struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
0990         udata, struct mlx5_ib_ucontext, ibucontext);
0991 
0992     if (udata) {
0993         /* User QP */
0994         mlx5_ib_db_unmap_user(context, &qp->db);
0995         ib_umem_release(base->ubuffer.umem);
0996 
0997         /*
0998          * Free only the BFREGs which are handled by the kernel.
0999          * BFREGs of UARs allocated dynamically are handled by user.
1000          */
1001         if (qp->bfregn != MLX5_IB_INVALID_BFREG)
1002             mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
1003         return;
1004     }
1005 
1006     /* Kernel QP */
1007     kvfree(qp->sq.wqe_head);
1008     kvfree(qp->sq.w_list);
1009     kvfree(qp->sq.wrid);
1010     kvfree(qp->sq.wr_data);
1011     kvfree(qp->rq.wrid);
1012     if (qp->db.db)
1013         mlx5_db_free(dev->mdev, &qp->db);
1014     if (qp->buf.frags)
1015         mlx5_frag_buf_free(dev->mdev, &qp->buf);
1016 }
1017 
1018 static int _create_kernel_qp(struct mlx5_ib_dev *dev,
1019                  struct ib_qp_init_attr *init_attr,
1020                  struct mlx5_ib_qp *qp, u32 **in, int *inlen,
1021                  struct mlx5_ib_qp_base *base)
1022 {
1023     int uar_index;
1024     void *qpc;
1025     int err;
1026 
1027     if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
1028         qp->bf.bfreg = &dev->fp_bfreg;
1029     else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
1030         qp->bf.bfreg = &dev->wc_bfreg;
1031     else
1032         qp->bf.bfreg = &dev->bfreg;
1033 
1034     /* We need to divide by two since each register is comprised of
1035      * two buffers of identical size, namely odd and even
1036      */
1037     qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
1038     uar_index = qp->bf.bfreg->index;
1039 
1040     err = calc_sq_size(dev, init_attr, qp);
1041     if (err < 0) {
1042         mlx5_ib_dbg(dev, "err %d\n", err);
1043         return err;
1044     }
1045 
1046     qp->rq.offset = 0;
1047     qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
1048     base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
1049 
1050     err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size,
1051                        &qp->buf, dev->mdev->priv.numa_node);
1052     if (err) {
1053         mlx5_ib_dbg(dev, "err %d\n", err);
1054         return err;
1055     }
1056 
1057     if (qp->rq.wqe_cnt)
1058         mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift,
1059                   ilog2(qp->rq.wqe_cnt), &qp->rq.fbc);
1060 
1061     if (qp->sq.wqe_cnt) {
1062         int sq_strides_offset = (qp->sq.offset  & (PAGE_SIZE - 1)) /
1063                     MLX5_SEND_WQE_BB;
1064         mlx5_init_fbc_offset(qp->buf.frags +
1065                      (qp->sq.offset / PAGE_SIZE),
1066                      ilog2(MLX5_SEND_WQE_BB),
1067                      ilog2(qp->sq.wqe_cnt),
1068                      sq_strides_offset, &qp->sq.fbc);
1069 
1070         qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
1071     }
1072 
1073     *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
1074          MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
1075     *in = kvzalloc(*inlen, GFP_KERNEL);
1076     if (!*in) {
1077         err = -ENOMEM;
1078         goto err_buf;
1079     }
1080 
1081     qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
1082     MLX5_SET(qpc, qpc, uar_page, uar_index);
1083     MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
1084     MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
1085 
1086     /* Set "fast registration enabled" for all kernel QPs */
1087     MLX5_SET(qpc, qpc, fre, 1);
1088     MLX5_SET(qpc, qpc, rlky, 1);
1089 
1090     if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
1091         MLX5_SET(qpc, qpc, deth_sqpn, 1);
1092 
1093     mlx5_fill_page_frag_array(&qp->buf,
1094                   (__be64 *)MLX5_ADDR_OF(create_qp_in,
1095                              *in, pas));
1096 
1097     err = mlx5_db_alloc(dev->mdev, &qp->db);
1098     if (err) {
1099         mlx5_ib_dbg(dev, "err %d\n", err);
1100         goto err_free;
1101     }
1102 
1103     qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt,
1104                      sizeof(*qp->sq.wrid), GFP_KERNEL);
1105     qp->sq.wr_data = kvmalloc_array(qp->sq.wqe_cnt,
1106                     sizeof(*qp->sq.wr_data), GFP_KERNEL);
1107     qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt,
1108                      sizeof(*qp->rq.wrid), GFP_KERNEL);
1109     qp->sq.w_list = kvmalloc_array(qp->sq.wqe_cnt,
1110                        sizeof(*qp->sq.w_list), GFP_KERNEL);
1111     qp->sq.wqe_head = kvmalloc_array(qp->sq.wqe_cnt,
1112                      sizeof(*qp->sq.wqe_head), GFP_KERNEL);
1113 
1114     if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
1115         !qp->sq.w_list || !qp->sq.wqe_head) {
1116         err = -ENOMEM;
1117         goto err_wrid;
1118     }
1119 
1120     return 0;
1121 
1122 err_wrid:
1123     kvfree(qp->sq.wqe_head);
1124     kvfree(qp->sq.w_list);
1125     kvfree(qp->sq.wrid);
1126     kvfree(qp->sq.wr_data);
1127     kvfree(qp->rq.wrid);
1128     mlx5_db_free(dev->mdev, &qp->db);
1129 
1130 err_free:
1131     kvfree(*in);
1132 
1133 err_buf:
1134     mlx5_frag_buf_free(dev->mdev, &qp->buf);
1135     return err;
1136 }
1137 
1138 static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
1139 {
1140     if (attr->srq || (qp->type == IB_QPT_XRC_TGT) ||
1141         (qp->type == MLX5_IB_QPT_DCI) || (qp->type == IB_QPT_XRC_INI))
1142         return MLX5_SRQ_RQ;
1143     else if (!qp->has_rq)
1144         return MLX5_ZERO_LEN_RQ;
1145 
1146     return MLX5_NON_ZERO_RQ;
1147 }
1148 
1149 static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1150                     struct mlx5_ib_qp *qp,
1151                     struct mlx5_ib_sq *sq, u32 tdn,
1152                     struct ib_pd *pd)
1153 {
1154     u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
1155     void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1156 
1157     MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
1158     MLX5_SET(tisc, tisc, transport_domain, tdn);
1159     if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
1160         MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
1161 
1162     return mlx5_core_create_tis(dev->mdev, in, &sq->tisn);
1163 }
1164 
1165 static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1166                       struct mlx5_ib_sq *sq, struct ib_pd *pd)
1167 {
1168     mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid);
1169 }
1170 
1171 static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq)
1172 {
1173     if (sq->flow_rule)
1174         mlx5_del_flow_rules(sq->flow_rule);
1175     sq->flow_rule = NULL;
1176 }
1177 
1178 static bool fr_supported(int ts_cap)
1179 {
1180     return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
1181            ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
1182 }
1183 
1184 static int get_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
1185              bool fr_sup, bool rt_sup)
1186 {
1187     if (cq->private_flags & MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS) {
1188         if (!rt_sup) {
1189             mlx5_ib_dbg(dev,
1190                     "Real time TS format is not supported\n");
1191             return -EOPNOTSUPP;
1192         }
1193         return MLX5_TIMESTAMP_FORMAT_REAL_TIME;
1194     }
1195     if (cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) {
1196         if (!fr_sup) {
1197             mlx5_ib_dbg(dev,
1198                     "Free running TS format is not supported\n");
1199             return -EOPNOTSUPP;
1200         }
1201         return MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
1202     }
1203     return fr_sup ? MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
1204             MLX5_TIMESTAMP_FORMAT_DEFAULT;
1205 }
1206 
1207 static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *recv_cq)
1208 {
1209     u8 ts_cap = MLX5_CAP_GEN(dev->mdev, rq_ts_format);
1210 
1211     return get_ts_format(dev, recv_cq, fr_supported(ts_cap),
1212                  rt_supported(ts_cap));
1213 }
1214 
1215 static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
1216 {
1217     u8 ts_cap = MLX5_CAP_GEN(dev->mdev, sq_ts_format);
1218 
1219     return get_ts_format(dev, send_cq, fr_supported(ts_cap),
1220                  rt_supported(ts_cap));
1221 }
1222 
1223 static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
1224                 struct mlx5_ib_cq *recv_cq)
1225 {
1226     u8 ts_cap = MLX5_CAP_ROCE(dev->mdev, qp_ts_format);
1227     bool fr_sup = fr_supported(ts_cap);
1228     bool rt_sup = rt_supported(ts_cap);
1229     u8 default_ts = fr_sup ? MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
1230                  MLX5_TIMESTAMP_FORMAT_DEFAULT;
1231     int send_ts_format =
1232         send_cq ? get_ts_format(dev, send_cq, fr_sup, rt_sup) :
1233               default_ts;
1234     int recv_ts_format =
1235         recv_cq ? get_ts_format(dev, recv_cq, fr_sup, rt_sup) :
1236               default_ts;
1237 
1238     if (send_ts_format < 0 || recv_ts_format < 0)
1239         return -EOPNOTSUPP;
1240 
1241     if (send_ts_format != MLX5_TIMESTAMP_FORMAT_DEFAULT &&
1242         recv_ts_format != MLX5_TIMESTAMP_FORMAT_DEFAULT &&
1243         send_ts_format != recv_ts_format) {
1244         mlx5_ib_dbg(
1245             dev,
1246             "The send ts_format does not match the receive ts_format\n");
1247         return -EOPNOTSUPP;
1248     }
1249 
1250     return send_ts_format == default_ts ? recv_ts_format : send_ts_format;
1251 }
1252 
1253 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
1254                    struct ib_udata *udata,
1255                    struct mlx5_ib_sq *sq, void *qpin,
1256                    struct ib_pd *pd, struct mlx5_ib_cq *cq)
1257 {
1258     struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
1259     __be64 *pas;
1260     void *in;
1261     void *sqc;
1262     void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
1263     void *wq;
1264     int inlen;
1265     int err;
1266     unsigned int page_offset_quantized;
1267     unsigned long page_size;
1268     int ts_format;
1269 
1270     ts_format = get_sq_ts_format(dev, cq);
1271     if (ts_format < 0)
1272         return ts_format;
1273 
1274     sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
1275                        ubuffer->buf_size, 0);
1276     if (IS_ERR(sq->ubuffer.umem))
1277         return PTR_ERR(sq->ubuffer.umem);
1278     page_size = mlx5_umem_find_best_quantized_pgoff(
1279         ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
1280         page_offset, 64, &page_offset_quantized);
1281     if (!page_size) {
1282         err = -EINVAL;
1283         goto err_umem;
1284     }
1285 
1286     inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1287         sizeof(u64) *
1288             ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size);
1289     in = kvzalloc(inlen, GFP_KERNEL);
1290     if (!in) {
1291         err = -ENOMEM;
1292         goto err_umem;
1293     }
1294 
1295     MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid);
1296     sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1297     MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1298     if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
1299         MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1);
1300     MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1301     MLX5_SET(sqc, sqc, ts_format, ts_format);
1302     MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
1303     MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
1304     MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1305     MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
1306     if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
1307         MLX5_CAP_ETH(dev->mdev, swp))
1308         MLX5_SET(sqc, sqc, allow_swp, 1);
1309 
1310     wq = MLX5_ADDR_OF(sqc, sqc, wq);
1311     MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1312     MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
1313     MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
1314     MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
1315     MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1316     MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
1317     MLX5_SET(wq, wq, log_wq_pg_sz,
1318          order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
1319     MLX5_SET(wq, wq, page_offset, page_offset_quantized);
1320 
1321     pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
1322     mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0);
1323 
1324     err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp);
1325 
1326     kvfree(in);
1327 
1328     if (err)
1329         goto err_umem;
1330 
1331     return 0;
1332 
1333 err_umem:
1334     ib_umem_release(sq->ubuffer.umem);
1335     sq->ubuffer.umem = NULL;
1336 
1337     return err;
1338 }
1339 
1340 static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
1341                      struct mlx5_ib_sq *sq)
1342 {
1343     destroy_flow_rule_vport_sq(sq);
1344     mlx5_core_destroy_sq_tracked(dev, &sq->base.mqp);
1345     ib_umem_release(sq->ubuffer.umem);
1346 }
1347 
1348 static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1349                    struct mlx5_ib_rq *rq, void *qpin,
1350                    struct ib_pd *pd, struct mlx5_ib_cq *cq)
1351 {
1352     struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
1353     __be64 *pas;
1354     void *in;
1355     void *rqc;
1356     void *wq;
1357     void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
1358     struct ib_umem *umem = rq->base.ubuffer.umem;
1359     unsigned int page_offset_quantized;
1360     unsigned long page_size = 0;
1361     int ts_format;
1362     size_t inlen;
1363     int err;
1364 
1365     ts_format = get_rq_ts_format(dev, cq);
1366     if (ts_format < 0)
1367         return ts_format;
1368 
1369     page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz,
1370                             MLX5_ADAPTER_PAGE_SHIFT,
1371                             page_offset, 64,
1372                             &page_offset_quantized);
1373     if (!page_size)
1374         return -EINVAL;
1375 
1376     inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
1377         sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size);
1378     in = kvzalloc(inlen, GFP_KERNEL);
1379     if (!in)
1380         return -ENOMEM;
1381 
1382     MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
1383     rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
1384     if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
1385         MLX5_SET(rqc, rqc, vsd, 1);
1386     MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
1387     MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
1388     MLX5_SET(rqc, rqc, ts_format, ts_format);
1389     MLX5_SET(rqc, rqc, flush_in_error_en, 1);
1390     MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
1391     MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
1392 
1393     if (mqp->flags & IB_QP_CREATE_SCATTER_FCS)
1394         MLX5_SET(rqc, rqc, scatter_fcs, 1);
1395 
1396     wq = MLX5_ADDR_OF(rqc, rqc, wq);
1397     MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1398     if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING)
1399         MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1400     MLX5_SET(wq, wq, page_offset, page_offset_quantized);
1401     MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
1402     MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
1403     MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
1404     MLX5_SET(wq, wq, log_wq_pg_sz,
1405          order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
1406     MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
1407 
1408     pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
1409     mlx5_ib_populate_pas(umem, page_size, pas, 0);
1410 
1411     err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp);
1412 
1413     kvfree(in);
1414 
1415     return err;
1416 }
1417 
1418 static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1419                      struct mlx5_ib_rq *rq)
1420 {
1421     mlx5_core_destroy_rq_tracked(dev, &rq->base.mqp);
1422 }
1423 
1424 static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1425                       struct mlx5_ib_rq *rq,
1426                       u32 qp_flags_en,
1427                       struct ib_pd *pd)
1428 {
1429     if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
1430                MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
1431         mlx5_ib_disable_lb(dev, false, true);
1432     mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid);
1433 }
1434 
1435 static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1436                     struct mlx5_ib_rq *rq, u32 tdn,
1437                     u32 *qp_flags_en, struct ib_pd *pd,
1438                     u32 *out)
1439 {
1440     u8 lb_flag = 0;
1441     u32 *in;
1442     void *tirc;
1443     int inlen;
1444     int err;
1445 
1446     inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1447     in = kvzalloc(inlen, GFP_KERNEL);
1448     if (!in)
1449         return -ENOMEM;
1450 
1451     MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
1452     tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1453     MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
1454     MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
1455     MLX5_SET(tirc, tirc, transport_domain, tdn);
1456     if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
1457         MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
1458 
1459     if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
1460         lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1461 
1462     if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
1463         lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
1464 
1465     if (dev->is_rep) {
1466         lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1467         *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
1468     }
1469 
1470     MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
1471     MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
1472     err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
1473     rq->tirn = MLX5_GET(create_tir_out, out, tirn);
1474     if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
1475         err = mlx5_ib_enable_lb(dev, false, true);
1476 
1477         if (err)
1478             destroy_raw_packet_qp_tir(dev, rq, 0, pd);
1479     }
1480     kvfree(in);
1481 
1482     return err;
1483 }
1484 
1485 static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1486                 u32 *in, size_t inlen, struct ib_pd *pd,
1487                 struct ib_udata *udata,
1488                 struct mlx5_ib_create_qp_resp *resp,
1489                 struct ib_qp_init_attr *init_attr)
1490 {
1491     struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1492     struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1493     struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1494     struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
1495         udata, struct mlx5_ib_ucontext, ibucontext);
1496     int err;
1497     u32 tdn = mucontext->tdn;
1498     u16 uid = to_mpd(pd)->uid;
1499     u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
1500 
1501     if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt)
1502         return -EINVAL;
1503     if (qp->sq.wqe_cnt) {
1504         err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
1505         if (err)
1506             return err;
1507 
1508         err = create_raw_packet_qp_sq(dev, udata, sq, in, pd,
1509                           to_mcq(init_attr->send_cq));
1510         if (err)
1511             goto err_destroy_tis;
1512 
1513         if (uid) {
1514             resp->tisn = sq->tisn;
1515             resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN;
1516             resp->sqn = sq->base.mqp.qpn;
1517             resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN;
1518         }
1519 
1520         sq->base.container_mibqp = qp;
1521         sq->base.mqp.event = mlx5_ib_qp_event;
1522     }
1523 
1524     if (qp->rq.wqe_cnt) {
1525         rq->base.container_mibqp = qp;
1526 
1527         if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING)
1528             rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
1529         if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
1530             rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
1531         err = create_raw_packet_qp_rq(dev, rq, in, pd,
1532                           to_mcq(init_attr->recv_cq));
1533         if (err)
1534             goto err_destroy_sq;
1535 
1536         err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd,
1537                            out);
1538         if (err)
1539             goto err_destroy_rq;
1540 
1541         if (uid) {
1542             resp->rqn = rq->base.mqp.qpn;
1543             resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
1544             resp->tirn = rq->tirn;
1545             resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
1546             if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
1547                 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
1548                 resp->tir_icm_addr = MLX5_GET(
1549                     create_tir_out, out, icm_address_31_0);
1550                 resp->tir_icm_addr |=
1551                     (u64)MLX5_GET(create_tir_out, out,
1552                               icm_address_39_32)
1553                     << 32;
1554                 resp->tir_icm_addr |=
1555                     (u64)MLX5_GET(create_tir_out, out,
1556                               icm_address_63_40)
1557                     << 40;
1558                 resp->comp_mask |=
1559                     MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR;
1560             }
1561         }
1562     }
1563 
1564     qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
1565                              rq->base.mqp.qpn;
1566     return 0;
1567 
1568 err_destroy_rq:
1569     destroy_raw_packet_qp_rq(dev, rq);
1570 err_destroy_sq:
1571     if (!qp->sq.wqe_cnt)
1572         return err;
1573     destroy_raw_packet_qp_sq(dev, sq);
1574 err_destroy_tis:
1575     destroy_raw_packet_qp_tis(dev, sq, pd);
1576 
1577     return err;
1578 }
1579 
1580 static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
1581                   struct mlx5_ib_qp *qp)
1582 {
1583     struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1584     struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1585     struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1586 
1587     if (qp->rq.wqe_cnt) {
1588         destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd);
1589         destroy_raw_packet_qp_rq(dev, rq);
1590     }
1591 
1592     if (qp->sq.wqe_cnt) {
1593         destroy_raw_packet_qp_sq(dev, sq);
1594         destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd);
1595     }
1596 }
1597 
1598 static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
1599                     struct mlx5_ib_raw_packet_qp *raw_packet_qp)
1600 {
1601     struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1602     struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1603 
1604     sq->sq = &qp->sq;
1605     rq->rq = &qp->rq;
1606     sq->doorbell = &qp->db;
1607     rq->doorbell = &qp->db;
1608 }
1609 
1610 static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1611 {
1612     if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
1613                 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
1614         mlx5_ib_disable_lb(dev, false, true);
1615     mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
1616                  to_mpd(qp->ibqp.pd)->uid);
1617 }
1618 
1619 struct mlx5_create_qp_params {
1620     struct ib_udata *udata;
1621     size_t inlen;
1622     size_t outlen;
1623     size_t ucmd_size;
1624     void *ucmd;
1625     u8 is_rss_raw : 1;
1626     struct ib_qp_init_attr *attr;
1627     u32 uidx;
1628     struct mlx5_ib_create_qp_resp resp;
1629 };
1630 
1631 static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1632                  struct mlx5_ib_qp *qp,
1633                  struct mlx5_create_qp_params *params)
1634 {
1635     struct ib_qp_init_attr *init_attr = params->attr;
1636     struct mlx5_ib_create_qp_rss *ucmd = params->ucmd;
1637     struct ib_udata *udata = params->udata;
1638     struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
1639         udata, struct mlx5_ib_ucontext, ibucontext);
1640     int inlen;
1641     int outlen;
1642     int err;
1643     u32 *in;
1644     u32 *out;
1645     void *tirc;
1646     void *hfso;
1647     u32 selected_fields = 0;
1648     u32 outer_l4;
1649     u32 tdn = mucontext->tdn;
1650     u8 lb_flag = 0;
1651 
1652     if (ucmd->comp_mask) {
1653         mlx5_ib_dbg(dev, "invalid comp mask\n");
1654         return -EOPNOTSUPP;
1655     }
1656 
1657     if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER &&
1658         !(ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) {
1659         mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n");
1660         return -EOPNOTSUPP;
1661     }
1662 
1663     if (dev->is_rep)
1664         qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
1665 
1666     if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
1667         lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1668 
1669     if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
1670         lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
1671 
1672     inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1673     outlen = MLX5_ST_SZ_BYTES(create_tir_out);
1674     in = kvzalloc(inlen + outlen, GFP_KERNEL);
1675     if (!in)
1676         return -ENOMEM;
1677 
1678     out = in + MLX5_ST_SZ_DW(create_tir_in);
1679     MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
1680     tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1681     MLX5_SET(tirc, tirc, disp_type,
1682          MLX5_TIRC_DISP_TYPE_INDIRECT);
1683     MLX5_SET(tirc, tirc, indirect_table,
1684          init_attr->rwq_ind_tbl->ind_tbl_num);
1685     MLX5_SET(tirc, tirc, transport_domain, tdn);
1686 
1687     hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1688 
1689     if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
1690         MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
1691 
1692     MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
1693 
1694     if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER)
1695         hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
1696     else
1697         hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1698 
1699     switch (ucmd->rx_hash_function) {
1700     case MLX5_RX_HASH_FUNC_TOEPLITZ:
1701     {
1702         void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1703         size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
1704 
1705         if (len != ucmd->rx_key_len) {
1706             err = -EINVAL;
1707             goto err;
1708         }
1709 
1710         MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1711         memcpy(rss_key, ucmd->rx_hash_key, len);
1712         break;
1713     }
1714     default:
1715         err = -EOPNOTSUPP;
1716         goto err;
1717     }
1718 
1719     if (!ucmd->rx_hash_fields_mask) {
1720         /* special case when this TIR serves as steering entry without hashing */
1721         if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
1722             goto create_tir;
1723         err = -EINVAL;
1724         goto err;
1725     }
1726 
1727     if (((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1728          (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
1729          ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
1730          (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
1731         err = -EINVAL;
1732         goto err;
1733     }
1734 
1735     /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
1736     if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1737         (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
1738         MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1739              MLX5_L3_PROT_TYPE_IPV4);
1740     else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
1741          (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
1742         MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1743              MLX5_L3_PROT_TYPE_IPV6);
1744 
1745     outer_l4 = ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1746             (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
1747                << 0 |
1748            ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
1749             (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1750                << 1 |
1751            (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
1752 
1753     /* Check that only one l4 protocol is set */
1754     if (outer_l4 & (outer_l4 - 1)) {
1755         err = -EINVAL;
1756         goto err;
1757     }
1758 
1759     /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
1760     if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1761         (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
1762         MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1763              MLX5_L4_PROT_TYPE_TCP);
1764     else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
1765          (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1766         MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1767              MLX5_L4_PROT_TYPE_UDP);
1768 
1769     if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1770         (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
1771         selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
1772 
1773     if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
1774         (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
1775         selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
1776 
1777     if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1778         (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
1779         selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
1780 
1781     if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
1782         (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1783         selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
1784 
1785     if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
1786         selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI;
1787 
1788     MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
1789 
1790 create_tir:
1791     MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
1792     err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
1793 
1794     qp->rss_qp.tirn = MLX5_GET(create_tir_out, out, tirn);
1795     if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
1796         err = mlx5_ib_enable_lb(dev, false, true);
1797 
1798         if (err)
1799             mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
1800                          to_mpd(pd)->uid);
1801     }
1802 
1803     if (err)
1804         goto err;
1805 
1806     if (mucontext->devx_uid) {
1807         params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
1808         params->resp.tirn = qp->rss_qp.tirn;
1809         if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
1810             MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
1811             params->resp.tir_icm_addr =
1812                 MLX5_GET(create_tir_out, out, icm_address_31_0);
1813             params->resp.tir_icm_addr |=
1814                 (u64)MLX5_GET(create_tir_out, out,
1815                           icm_address_39_32)
1816                 << 32;
1817             params->resp.tir_icm_addr |=
1818                 (u64)MLX5_GET(create_tir_out, out,
1819                           icm_address_63_40)
1820                 << 40;
1821             params->resp.comp_mask |=
1822                 MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR;
1823         }
1824     }
1825 
1826     kvfree(in);
1827     /* qpn is reserved for that QP */
1828     qp->trans_qp.base.mqp.qpn = 0;
1829     qp->is_rss = true;
1830     return 0;
1831 
1832 err:
1833     kvfree(in);
1834     return err;
1835 }
1836 
1837 static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
1838                      struct mlx5_ib_qp *qp,
1839                      struct ib_qp_init_attr *init_attr,
1840                      void *qpc)
1841 {
1842     int scqe_sz;
1843     bool allow_scat_cqe = false;
1844 
1845     allow_scat_cqe = qp->flags_en & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
1846 
1847     if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
1848         return;
1849 
1850     scqe_sz = mlx5_ib_get_cqe_size(init_attr->send_cq);
1851     if (scqe_sz == 128) {
1852         MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
1853         return;
1854     }
1855 
1856     if (init_attr->qp_type != MLX5_IB_QPT_DCI ||
1857         MLX5_CAP_GEN(dev->mdev, dc_req_scat_data_cqe))
1858         MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
1859 }
1860 
1861 static int atomic_size_to_mode(int size_mask)
1862 {
1863     /* driver does not support atomic_size > 256B
1864      * and does not know how to translate bigger sizes
1865      */
1866     int supported_size_mask = size_mask & 0x1ff;
1867     int log_max_size;
1868 
1869     if (!supported_size_mask)
1870         return -EOPNOTSUPP;
1871 
1872     log_max_size = __fls(supported_size_mask);
1873 
1874     if (log_max_size > 3)
1875         return log_max_size;
1876 
1877     return MLX5_ATOMIC_MODE_8B;
1878 }
1879 
1880 static int get_atomic_mode(struct mlx5_ib_dev *dev,
1881                enum ib_qp_type qp_type)
1882 {
1883     u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
1884     u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
1885     int atomic_mode = -EOPNOTSUPP;
1886     int atomic_size_mask;
1887 
1888     if (!atomic)
1889         return -EOPNOTSUPP;
1890 
1891     if (qp_type == MLX5_IB_QPT_DCT)
1892         atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
1893     else
1894         atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
1895 
1896     if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) ||
1897         (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD))
1898         atomic_mode = atomic_size_to_mode(atomic_size_mask);
1899 
1900     if (atomic_mode <= 0 &&
1901         (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP &&
1902          atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
1903         atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
1904 
1905     return atomic_mode;
1906 }
1907 
1908 static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1909                  struct mlx5_create_qp_params *params)
1910 {
1911     struct ib_qp_init_attr *attr = params->attr;
1912     u32 uidx = params->uidx;
1913     struct mlx5_ib_resources *devr = &dev->devr;
1914     u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
1915     int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
1916     struct mlx5_core_dev *mdev = dev->mdev;
1917     struct mlx5_ib_qp_base *base;
1918     unsigned long flags;
1919     void *qpc;
1920     u32 *in;
1921     int err;
1922 
1923     if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1924         qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1925 
1926     in = kvzalloc(inlen, GFP_KERNEL);
1927     if (!in)
1928         return -ENOMEM;
1929 
1930     qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
1931 
1932     MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC);
1933     MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
1934     MLX5_SET(qpc, qpc, pd, to_mpd(devr->p0)->pdn);
1935 
1936     if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
1937         MLX5_SET(qpc, qpc, block_lb_mc, 1);
1938     if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
1939         MLX5_SET(qpc, qpc, cd_master, 1);
1940     if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
1941         MLX5_SET(qpc, qpc, cd_slave_send, 1);
1942     if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
1943         MLX5_SET(qpc, qpc, cd_slave_receive, 1);
1944 
1945     MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
1946     MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ);
1947     MLX5_SET(qpc, qpc, no_sq, 1);
1948     MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
1949     MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
1950     MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
1951     MLX5_SET(qpc, qpc, xrcd, to_mxrcd(attr->xrcd)->xrcdn);
1952     MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
1953 
1954     /* 0xffffff means we ask to work with cqe version 0 */
1955     if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
1956         MLX5_SET(qpc, qpc, user_index, uidx);
1957 
1958     if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
1959         MLX5_SET(qpc, qpc, end_padding_mode,
1960              MLX5_WQ_END_PAD_MODE_ALIGN);
1961         /* Special case to clean flag */
1962         qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
1963     }
1964 
1965     base = &qp->trans_qp.base;
1966     err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
1967     kvfree(in);
1968     if (err)
1969         return err;
1970 
1971     base->container_mibqp = qp;
1972     base->mqp.event = mlx5_ib_qp_event;
1973     if (MLX5_CAP_GEN(mdev, ece_support))
1974         params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
1975 
1976     spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1977     list_add_tail(&qp->qps_list, &dev->qp_list);
1978     spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1979 
1980     qp->trans_qp.xrcdn = to_mxrcd(attr->xrcd)->xrcdn;
1981     return 0;
1982 }
1983 
1984 static int create_dci(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1985               struct mlx5_ib_qp *qp,
1986               struct mlx5_create_qp_params *params)
1987 {
1988     struct ib_qp_init_attr *init_attr = params->attr;
1989     struct mlx5_ib_create_qp *ucmd = params->ucmd;
1990     u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
1991     struct ib_udata *udata = params->udata;
1992     u32 uidx = params->uidx;
1993     struct mlx5_ib_resources *devr = &dev->devr;
1994     int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
1995     struct mlx5_core_dev *mdev = dev->mdev;
1996     struct mlx5_ib_cq *send_cq;
1997     struct mlx5_ib_cq *recv_cq;
1998     unsigned long flags;
1999     struct mlx5_ib_qp_base *base;
2000     int ts_format;
2001     int mlx5_st;
2002     void *qpc;
2003     u32 *in;
2004     int err;
2005 
2006     spin_lock_init(&qp->sq.lock);
2007     spin_lock_init(&qp->rq.lock);
2008 
2009     mlx5_st = to_mlx5_st(qp->type);
2010     if (mlx5_st < 0)
2011         return -EINVAL;
2012 
2013     if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
2014         qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
2015 
2016     base = &qp->trans_qp.base;
2017 
2018     qp->has_rq = qp_has_rq(init_attr);
2019     err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
2020     if (err) {
2021         mlx5_ib_dbg(dev, "err %d\n", err);
2022         return err;
2023     }
2024 
2025     if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
2026         ucmd->rq_wqe_count != qp->rq.wqe_cnt)
2027         return -EINVAL;
2028 
2029     if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
2030         return -EINVAL;
2031 
2032     ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
2033                      to_mcq(init_attr->recv_cq));
2034 
2035     if (ts_format < 0)
2036         return ts_format;
2037 
2038     err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
2039                   &inlen, base, ucmd);
2040     if (err)
2041         return err;
2042 
2043     if (MLX5_CAP_GEN(mdev, ece_support))
2044         MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
2045     qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
2046 
2047     MLX5_SET(qpc, qpc, st, mlx5_st);
2048     MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
2049     MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
2050 
2051     if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
2052         MLX5_SET(qpc, qpc, wq_signature, 1);
2053 
2054     if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
2055         MLX5_SET(qpc, qpc, cd_master, 1);
2056     if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
2057         MLX5_SET(qpc, qpc, cd_slave_send, 1);
2058     if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE)
2059         configure_requester_scat_cqe(dev, qp, init_attr, qpc);
2060 
2061     if (qp->rq.wqe_cnt) {
2062         MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
2063         MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
2064     }
2065 
2066     if (qp->flags_en & MLX5_QP_FLAG_DCI_STREAM) {
2067         MLX5_SET(qpc, qpc, log_num_dci_stream_channels,
2068              ucmd->dci_streams.log_num_concurent);
2069         MLX5_SET(qpc, qpc, log_num_dci_errored_streams,
2070              ucmd->dci_streams.log_num_errored);
2071     }
2072 
2073     MLX5_SET(qpc, qpc, ts_format, ts_format);
2074     MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
2075 
2076     MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
2077 
2078     /* Set default resources */
2079     if (init_attr->srq) {
2080         MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
2081         MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
2082              to_msrq(init_attr->srq)->msrq.srqn);
2083     } else {
2084         MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
2085         MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
2086              to_msrq(devr->s1)->msrq.srqn);
2087     }
2088 
2089     if (init_attr->send_cq)
2090         MLX5_SET(qpc, qpc, cqn_snd,
2091              to_mcq(init_attr->send_cq)->mcq.cqn);
2092 
2093     if (init_attr->recv_cq)
2094         MLX5_SET(qpc, qpc, cqn_rcv,
2095              to_mcq(init_attr->recv_cq)->mcq.cqn);
2096 
2097     MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
2098 
2099     /* 0xffffff means we ask to work with cqe version 0 */
2100     if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
2101         MLX5_SET(qpc, qpc, user_index, uidx);
2102 
2103     if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
2104         MLX5_SET(qpc, qpc, end_padding_mode,
2105              MLX5_WQ_END_PAD_MODE_ALIGN);
2106         /* Special case to clean flag */
2107         qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
2108     }
2109 
2110     err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
2111 
2112     kvfree(in);
2113     if (err)
2114         goto err_create;
2115 
2116     base->container_mibqp = qp;
2117     base->mqp.event = mlx5_ib_qp_event;
2118     if (MLX5_CAP_GEN(mdev, ece_support))
2119         params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
2120 
2121     get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
2122         &send_cq, &recv_cq);
2123     spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
2124     mlx5_ib_lock_cqs(send_cq, recv_cq);
2125     /* Maintain device to QPs access, needed for further handling via reset
2126      * flow
2127      */
2128     list_add_tail(&qp->qps_list, &dev->qp_list);
2129     /* Maintain CQ to QPs access, needed for further handling via reset flow
2130      */
2131     if (send_cq)
2132         list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
2133     if (recv_cq)
2134         list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
2135     mlx5_ib_unlock_cqs(send_cq, recv_cq);
2136     spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
2137 
2138     return 0;
2139 
2140 err_create:
2141     destroy_qp(dev, qp, base, udata);
2142     return err;
2143 }
2144 
2145 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2146               struct mlx5_ib_qp *qp,
2147               struct mlx5_create_qp_params *params)
2148 {
2149     struct ib_qp_init_attr *init_attr = params->attr;
2150     struct mlx5_ib_create_qp *ucmd = params->ucmd;
2151     u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
2152     struct ib_udata *udata = params->udata;
2153     u32 uidx = params->uidx;
2154     struct mlx5_ib_resources *devr = &dev->devr;
2155     int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
2156     struct mlx5_core_dev *mdev = dev->mdev;
2157     struct mlx5_ib_cq *send_cq;
2158     struct mlx5_ib_cq *recv_cq;
2159     unsigned long flags;
2160     struct mlx5_ib_qp_base *base;
2161     int ts_format;
2162     int mlx5_st;
2163     void *qpc;
2164     u32 *in;
2165     int err;
2166 
2167     spin_lock_init(&qp->sq.lock);
2168     spin_lock_init(&qp->rq.lock);
2169 
2170     mlx5_st = to_mlx5_st(qp->type);
2171     if (mlx5_st < 0)
2172         return -EINVAL;
2173 
2174     if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
2175         qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
2176 
2177     if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
2178         qp->underlay_qpn = init_attr->source_qpn;
2179 
2180     base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
2181         qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
2182            &qp->raw_packet_qp.rq.base :
2183            &qp->trans_qp.base;
2184 
2185     qp->has_rq = qp_has_rq(init_attr);
2186     err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
2187     if (err) {
2188         mlx5_ib_dbg(dev, "err %d\n", err);
2189         return err;
2190     }
2191 
2192     if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
2193         ucmd->rq_wqe_count != qp->rq.wqe_cnt)
2194         return -EINVAL;
2195 
2196     if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
2197         return -EINVAL;
2198 
2199     if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
2200         ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
2201                          to_mcq(init_attr->recv_cq));
2202         if (ts_format < 0)
2203             return ts_format;
2204     }
2205 
2206     err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
2207                   &inlen, base, ucmd);
2208     if (err)
2209         return err;
2210 
2211     if (is_sqp(init_attr->qp_type))
2212         qp->port = init_attr->port_num;
2213 
2214     if (MLX5_CAP_GEN(mdev, ece_support))
2215         MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
2216     qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
2217 
2218     MLX5_SET(qpc, qpc, st, mlx5_st);
2219     MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
2220     MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
2221 
2222     if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
2223         MLX5_SET(qpc, qpc, wq_signature, 1);
2224 
2225     if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
2226         MLX5_SET(qpc, qpc, block_lb_mc, 1);
2227 
2228     if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
2229         MLX5_SET(qpc, qpc, cd_master, 1);
2230     if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
2231         MLX5_SET(qpc, qpc, cd_slave_send, 1);
2232     if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
2233         MLX5_SET(qpc, qpc, cd_slave_receive, 1);
2234     if (qp->flags_en & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)
2235         MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
2236     if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
2237         (init_attr->qp_type == IB_QPT_RC ||
2238          init_attr->qp_type == IB_QPT_UC)) {
2239         int rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
2240 
2241         MLX5_SET(qpc, qpc, cs_res,
2242              rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
2243                       MLX5_RES_SCAT_DATA32_CQE);
2244     }
2245     if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
2246         (qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC))
2247         configure_requester_scat_cqe(dev, qp, init_attr, qpc);
2248 
2249     if (qp->rq.wqe_cnt) {
2250         MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
2251         MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
2252     }
2253 
2254     if (init_attr->qp_type != IB_QPT_RAW_PACKET)
2255         MLX5_SET(qpc, qpc, ts_format, ts_format);
2256 
2257     MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
2258 
2259     if (qp->sq.wqe_cnt) {
2260         MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
2261     } else {
2262         MLX5_SET(qpc, qpc, no_sq, 1);
2263         if (init_attr->srq &&
2264             init_attr->srq->srq_type == IB_SRQT_TM)
2265             MLX5_SET(qpc, qpc, offload_type,
2266                  MLX5_QPC_OFFLOAD_TYPE_RNDV);
2267     }
2268 
2269     /* Set default resources */
2270     switch (init_attr->qp_type) {
2271     case IB_QPT_XRC_INI:
2272         MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
2273         MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
2274         MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
2275         break;
2276     default:
2277         if (init_attr->srq) {
2278             MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
2279             MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
2280         } else {
2281             MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
2282             MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
2283         }
2284     }
2285 
2286     if (init_attr->send_cq)
2287         MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn);
2288 
2289     if (init_attr->recv_cq)
2290         MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn);
2291 
2292     MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
2293 
2294     /* 0xffffff means we ask to work with cqe version 0 */
2295     if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
2296         MLX5_SET(qpc, qpc, user_index, uidx);
2297 
2298     if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING &&
2299         init_attr->qp_type != IB_QPT_RAW_PACKET) {
2300         MLX5_SET(qpc, qpc, end_padding_mode,
2301              MLX5_WQ_END_PAD_MODE_ALIGN);
2302         /* Special case to clean flag */
2303         qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
2304     }
2305 
2306     if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
2307         qp->flags & IB_QP_CREATE_SOURCE_QPN) {
2308         qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr;
2309         raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
2310         err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
2311                        &params->resp, init_attr);
2312     } else
2313         err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
2314 
2315     kvfree(in);
2316     if (err)
2317         goto err_create;
2318 
2319     base->container_mibqp = qp;
2320     base->mqp.event = mlx5_ib_qp_event;
2321     if (MLX5_CAP_GEN(mdev, ece_support))
2322         params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
2323 
2324     get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
2325         &send_cq, &recv_cq);
2326     spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
2327     mlx5_ib_lock_cqs(send_cq, recv_cq);
2328     /* Maintain device to QPs access, needed for further handling via reset
2329      * flow
2330      */
2331     list_add_tail(&qp->qps_list, &dev->qp_list);
2332     /* Maintain CQ to QPs access, needed for further handling via reset flow
2333      */
2334     if (send_cq)
2335         list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
2336     if (recv_cq)
2337         list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
2338     mlx5_ib_unlock_cqs(send_cq, recv_cq);
2339     spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
2340 
2341     return 0;
2342 
2343 err_create:
2344     destroy_qp(dev, qp, base, udata);
2345     return err;
2346 }
2347 
2348 static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2349                 struct mlx5_ib_qp *qp,
2350                 struct mlx5_create_qp_params *params)
2351 {
2352     struct ib_qp_init_attr *attr = params->attr;
2353     u32 uidx = params->uidx;
2354     struct mlx5_ib_resources *devr = &dev->devr;
2355     u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
2356     int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
2357     struct mlx5_core_dev *mdev = dev->mdev;
2358     struct mlx5_ib_cq *send_cq;
2359     struct mlx5_ib_cq *recv_cq;
2360     unsigned long flags;
2361     struct mlx5_ib_qp_base *base;
2362     int mlx5_st;
2363     void *qpc;
2364     u32 *in;
2365     int err;
2366 
2367     spin_lock_init(&qp->sq.lock);
2368     spin_lock_init(&qp->rq.lock);
2369 
2370     mlx5_st = to_mlx5_st(qp->type);
2371     if (mlx5_st < 0)
2372         return -EINVAL;
2373 
2374     if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
2375         qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
2376 
2377     base = &qp->trans_qp.base;
2378 
2379     qp->has_rq = qp_has_rq(attr);
2380     err = set_rq_size(dev, &attr->cap, qp->has_rq, qp, NULL);
2381     if (err) {
2382         mlx5_ib_dbg(dev, "err %d\n", err);
2383         return err;
2384     }
2385 
2386     err = _create_kernel_qp(dev, attr, qp, &in, &inlen, base);
2387     if (err)
2388         return err;
2389 
2390     if (is_sqp(attr->qp_type))
2391         qp->port = attr->port_num;
2392 
2393     qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
2394 
2395     MLX5_SET(qpc, qpc, st, mlx5_st);
2396     MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
2397 
2398     if (attr->qp_type != MLX5_IB_QPT_REG_UMR)
2399         MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
2400     else
2401         MLX5_SET(qpc, qpc, latency_sensitive, 1);
2402 
2403 
2404     if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
2405         MLX5_SET(qpc, qpc, block_lb_mc, 1);
2406 
2407     if (qp->rq.wqe_cnt) {
2408         MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
2409         MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
2410     }
2411 
2412     MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, attr));
2413 
2414     if (qp->sq.wqe_cnt)
2415         MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
2416     else
2417         MLX5_SET(qpc, qpc, no_sq, 1);
2418 
2419     if (attr->srq) {
2420         MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
2421         MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
2422              to_msrq(attr->srq)->msrq.srqn);
2423     } else {
2424         MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
2425         MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
2426              to_msrq(devr->s1)->msrq.srqn);
2427     }
2428 
2429     if (attr->send_cq)
2430         MLX5_SET(qpc, qpc, cqn_snd, to_mcq(attr->send_cq)->mcq.cqn);
2431 
2432     if (attr->recv_cq)
2433         MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(attr->recv_cq)->mcq.cqn);
2434 
2435     MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
2436 
2437     /* 0xffffff means we ask to work with cqe version 0 */
2438     if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
2439         MLX5_SET(qpc, qpc, user_index, uidx);
2440 
2441     /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
2442     if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO)
2443         MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
2444 
2445     err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
2446     kvfree(in);
2447     if (err)
2448         goto err_create;
2449 
2450     base->container_mibqp = qp;
2451     base->mqp.event = mlx5_ib_qp_event;
2452 
2453     get_cqs(qp->type, attr->send_cq, attr->recv_cq,
2454         &send_cq, &recv_cq);
2455     spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
2456     mlx5_ib_lock_cqs(send_cq, recv_cq);
2457     /* Maintain device to QPs access, needed for further handling via reset
2458      * flow
2459      */
2460     list_add_tail(&qp->qps_list, &dev->qp_list);
2461     /* Maintain CQ to QPs access, needed for further handling via reset flow
2462      */
2463     if (send_cq)
2464         list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
2465     if (recv_cq)
2466         list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
2467     mlx5_ib_unlock_cqs(send_cq, recv_cq);
2468     spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
2469 
2470     return 0;
2471 
2472 err_create:
2473     destroy_qp(dev, qp, base, NULL);
2474     return err;
2475 }
2476 
2477 static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
2478     __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
2479 {
2480     if (send_cq) {
2481         if (recv_cq) {
2482             if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
2483                 spin_lock(&send_cq->lock);
2484                 spin_lock_nested(&recv_cq->lock,
2485                          SINGLE_DEPTH_NESTING);
2486             } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
2487                 spin_lock(&send_cq->lock);
2488                 __acquire(&recv_cq->lock);
2489             } else {
2490                 spin_lock(&recv_cq->lock);
2491                 spin_lock_nested(&send_cq->lock,
2492                          SINGLE_DEPTH_NESTING);
2493             }
2494         } else {
2495             spin_lock(&send_cq->lock);
2496             __acquire(&recv_cq->lock);
2497         }
2498     } else if (recv_cq) {
2499         spin_lock(&recv_cq->lock);
2500         __acquire(&send_cq->lock);
2501     } else {
2502         __acquire(&send_cq->lock);
2503         __acquire(&recv_cq->lock);
2504     }
2505 }
2506 
2507 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
2508     __releases(&send_cq->lock) __releases(&recv_cq->lock)
2509 {
2510     if (send_cq) {
2511         if (recv_cq) {
2512             if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
2513                 spin_unlock(&recv_cq->lock);
2514                 spin_unlock(&send_cq->lock);
2515             } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
2516                 __release(&recv_cq->lock);
2517                 spin_unlock(&send_cq->lock);
2518             } else {
2519                 spin_unlock(&send_cq->lock);
2520                 spin_unlock(&recv_cq->lock);
2521             }
2522         } else {
2523             __release(&recv_cq->lock);
2524             spin_unlock(&send_cq->lock);
2525         }
2526     } else if (recv_cq) {
2527         __release(&send_cq->lock);
2528         spin_unlock(&recv_cq->lock);
2529     } else {
2530         __release(&recv_cq->lock);
2531         __release(&send_cq->lock);
2532     }
2533 }
2534 
2535 static void get_cqs(enum ib_qp_type qp_type,
2536             struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
2537             struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
2538 {
2539     switch (qp_type) {
2540     case IB_QPT_XRC_TGT:
2541         *send_cq = NULL;
2542         *recv_cq = NULL;
2543         break;
2544     case MLX5_IB_QPT_REG_UMR:
2545     case IB_QPT_XRC_INI:
2546         *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
2547         *recv_cq = NULL;
2548         break;
2549 
2550     case IB_QPT_SMI:
2551     case MLX5_IB_QPT_HW_GSI:
2552     case IB_QPT_RC:
2553     case IB_QPT_UC:
2554     case IB_QPT_UD:
2555     case IB_QPT_RAW_PACKET:
2556         *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
2557         *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
2558         break;
2559     default:
2560         *send_cq = NULL;
2561         *recv_cq = NULL;
2562         break;
2563     }
2564 }
2565 
2566 static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2567                 const struct mlx5_modify_raw_qp_param *raw_qp_param,
2568                 u8 lag_tx_affinity);
2569 
2570 static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2571                   struct ib_udata *udata)
2572 {
2573     struct mlx5_ib_cq *send_cq, *recv_cq;
2574     struct mlx5_ib_qp_base *base;
2575     unsigned long flags;
2576     int err;
2577 
2578     if (qp->is_rss) {
2579         destroy_rss_raw_qp_tir(dev, qp);
2580         return;
2581     }
2582 
2583     base = (qp->type == IB_QPT_RAW_PACKET ||
2584         qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
2585                &qp->raw_packet_qp.rq.base :
2586                &qp->trans_qp.base;
2587 
2588     if (qp->state != IB_QPS_RESET) {
2589         if (qp->type != IB_QPT_RAW_PACKET &&
2590             !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
2591             err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0,
2592                           NULL, &base->mqp, NULL);
2593         } else {
2594             struct mlx5_modify_raw_qp_param raw_qp_param = {
2595                 .operation = MLX5_CMD_OP_2RST_QP
2596             };
2597 
2598             err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0);
2599         }
2600         if (err)
2601             mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
2602                      base->mqp.qpn);
2603     }
2604 
2605     get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq,
2606         &recv_cq);
2607 
2608     spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
2609     mlx5_ib_lock_cqs(send_cq, recv_cq);
2610     /* del from lists under both locks above to protect reset flow paths */
2611     list_del(&qp->qps_list);
2612     if (send_cq)
2613         list_del(&qp->cq_send_list);
2614 
2615     if (recv_cq)
2616         list_del(&qp->cq_recv_list);
2617 
2618     if (!udata) {
2619         __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
2620                    qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
2621         if (send_cq != recv_cq)
2622             __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
2623                        NULL);
2624     }
2625     mlx5_ib_unlock_cqs(send_cq, recv_cq);
2626     spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
2627 
2628     if (qp->type == IB_QPT_RAW_PACKET ||
2629         qp->flags & IB_QP_CREATE_SOURCE_QPN) {
2630         destroy_raw_packet_qp(dev, qp);
2631     } else {
2632         err = mlx5_core_destroy_qp(dev, &base->mqp);
2633         if (err)
2634             mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
2635                      base->mqp.qpn);
2636     }
2637 
2638     destroy_qp(dev, qp, base, udata);
2639 }
2640 
2641 static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2642               struct mlx5_ib_qp *qp,
2643               struct mlx5_create_qp_params *params)
2644 {
2645     struct ib_qp_init_attr *attr = params->attr;
2646     struct mlx5_ib_create_qp *ucmd = params->ucmd;
2647     u32 uidx = params->uidx;
2648     void *dctc;
2649 
2650     if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct))
2651         return -EOPNOTSUPP;
2652 
2653     qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
2654     if (!qp->dct.in)
2655         return -ENOMEM;
2656 
2657     MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
2658     dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
2659     MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
2660     MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
2661     MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
2662     MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
2663     MLX5_SET(dctc, dctc, user_index, uidx);
2664     if (MLX5_CAP_GEN(dev->mdev, ece_support))
2665         MLX5_SET(dctc, dctc, ece, ucmd->ece_options);
2666 
2667     if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) {
2668         int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq);
2669 
2670         if (rcqe_sz == 128)
2671             MLX5_SET(dctc, dctc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
2672     }
2673 
2674     qp->state = IB_QPS_RESET;
2675     return 0;
2676 }
2677 
2678 static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
2679              enum ib_qp_type *type)
2680 {
2681     if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct))
2682         goto out;
2683 
2684     switch (attr->qp_type) {
2685     case IB_QPT_XRC_TGT:
2686     case IB_QPT_XRC_INI:
2687         if (!MLX5_CAP_GEN(dev->mdev, xrc))
2688             goto out;
2689         fallthrough;
2690     case IB_QPT_RC:
2691     case IB_QPT_UC:
2692     case IB_QPT_SMI:
2693     case MLX5_IB_QPT_HW_GSI:
2694     case IB_QPT_DRIVER:
2695     case IB_QPT_GSI:
2696     case IB_QPT_RAW_PACKET:
2697     case IB_QPT_UD:
2698     case MLX5_IB_QPT_REG_UMR:
2699         break;
2700     default:
2701         goto out;
2702     }
2703 
2704     *type = attr->qp_type;
2705     return 0;
2706 
2707 out:
2708     mlx5_ib_dbg(dev, "Unsupported QP type %d\n", attr->qp_type);
2709     return -EOPNOTSUPP;
2710 }
2711 
2712 static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2713                 struct ib_qp_init_attr *attr,
2714                 struct ib_udata *udata)
2715 {
2716     struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
2717         udata, struct mlx5_ib_ucontext, ibucontext);
2718 
2719     if (!udata) {
2720         /* Kernel create_qp callers */
2721         if (attr->rwq_ind_tbl)
2722             return -EOPNOTSUPP;
2723 
2724         switch (attr->qp_type) {
2725         case IB_QPT_RAW_PACKET:
2726         case IB_QPT_DRIVER:
2727             return -EOPNOTSUPP;
2728         default:
2729             return 0;
2730         }
2731     }
2732 
2733     /* Userspace create_qp callers */
2734     if (attr->qp_type == IB_QPT_RAW_PACKET && !ucontext->cqe_version) {
2735         mlx5_ib_dbg(dev,
2736             "Raw Packet QP is only supported for CQE version > 0\n");
2737         return -EINVAL;
2738     }
2739 
2740     if (attr->qp_type != IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) {
2741         mlx5_ib_dbg(dev,
2742                 "Wrong QP type %d for the RWQ indirect table\n",
2743                 attr->qp_type);
2744         return -EINVAL;
2745     }
2746 
2747     /*
2748      * We don't need to see this warning, it means that kernel code
2749      * missing ib_pd. Placed here to catch developer's mistakes.
2750      */
2751     WARN_ONCE(!pd && attr->qp_type != IB_QPT_XRC_TGT,
2752           "There is a missing PD pointer assignment\n");
2753     return 0;
2754 }
2755 
2756 static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
2757                 bool cond, struct mlx5_ib_qp *qp)
2758 {
2759     if (!(*flags & flag))
2760         return;
2761 
2762     if (cond) {
2763         qp->flags_en |= flag;
2764         *flags &= ~flag;
2765         return;
2766     }
2767 
2768     switch (flag) {
2769     case MLX5_QP_FLAG_SCATTER_CQE:
2770     case MLX5_QP_FLAG_ALLOW_SCATTER_CQE:
2771         /*
2772              * We don't return error if these flags were provided,
2773              * and mlx5 doesn't have right capability.
2774              */
2775         *flags &= ~(MLX5_QP_FLAG_SCATTER_CQE |
2776                 MLX5_QP_FLAG_ALLOW_SCATTER_CQE);
2777         return;
2778     default:
2779         break;
2780     }
2781     mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag);
2782 }
2783 
2784 static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2785                 void *ucmd, struct ib_qp_init_attr *attr)
2786 {
2787     struct mlx5_core_dev *mdev = dev->mdev;
2788     bool cond;
2789     int flags;
2790 
2791     if (attr->rwq_ind_tbl)
2792         flags = ((struct mlx5_ib_create_qp_rss *)ucmd)->flags;
2793     else
2794         flags = ((struct mlx5_ib_create_qp *)ucmd)->flags;
2795 
2796     switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) {
2797     case MLX5_QP_FLAG_TYPE_DCI:
2798         qp->type = MLX5_IB_QPT_DCI;
2799         break;
2800     case MLX5_QP_FLAG_TYPE_DCT:
2801         qp->type = MLX5_IB_QPT_DCT;
2802         break;
2803     default:
2804         if (qp->type != IB_QPT_DRIVER)
2805             break;
2806         /*
2807          * It is IB_QPT_DRIVER and or no subtype or
2808          * wrong subtype were provided.
2809          */
2810         return -EINVAL;
2811     }
2812 
2813     process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp);
2814     process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp);
2815     process_vendor_flag(dev, &flags, MLX5_QP_FLAG_DCI_STREAM,
2816                 MLX5_CAP_GEN(mdev, log_max_dci_stream_channels),
2817                 qp);
2818 
2819     process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
2820     process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
2821                 MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
2822     process_vendor_flag(dev, &flags, MLX5_QP_FLAG_ALLOW_SCATTER_CQE,
2823                 MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
2824 
2825     if (qp->type == IB_QPT_RAW_PACKET) {
2826         cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) ||
2827                MLX5_CAP_ETH(mdev, tunnel_stateless_gre) ||
2828                MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx);
2829         process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TUNNEL_OFFLOADS,
2830                     cond, qp);
2831         process_vendor_flag(dev, &flags,
2832                     MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC, true,
2833                     qp);
2834         process_vendor_flag(dev, &flags,
2835                     MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC, true,
2836                     qp);
2837     }
2838 
2839     if (qp->type == IB_QPT_RC)
2840         process_vendor_flag(dev, &flags,
2841                     MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE,
2842                     MLX5_CAP_GEN(mdev, qp_packet_based), qp);
2843 
2844     process_vendor_flag(dev, &flags, MLX5_QP_FLAG_BFREG_INDEX, true, qp);
2845     process_vendor_flag(dev, &flags, MLX5_QP_FLAG_UAR_PAGE_INDEX, true, qp);
2846 
2847     cond = qp->flags_en & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
2848                 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
2849                 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC);
2850     if (attr->rwq_ind_tbl && cond) {
2851         mlx5_ib_dbg(dev, "RSS RAW QP has unsupported flags 0x%X\n",
2852                 cond);
2853         return -EINVAL;
2854     }
2855 
2856     if (flags)
2857         mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags);
2858 
2859     return (flags) ? -EINVAL : 0;
2860     }
2861 
2862 static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
2863                 bool cond, struct mlx5_ib_qp *qp)
2864 {
2865     if (!(*flags & flag))
2866         return;
2867 
2868     if (cond) {
2869         qp->flags |= flag;
2870         *flags &= ~flag;
2871         return;
2872     }
2873 
2874     if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
2875         /*
2876          * Special case, if condition didn't meet, it won't be error,
2877          * just different in-kernel flow.
2878          */
2879         *flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
2880         return;
2881     }
2882     mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
2883 }
2884 
2885 static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2886                 struct ib_qp_init_attr *attr)
2887 {
2888     enum ib_qp_type qp_type = qp->type;
2889     struct mlx5_core_dev *mdev = dev->mdev;
2890     int create_flags = attr->create_flags;
2891     bool cond;
2892 
2893     if (qp_type == MLX5_IB_QPT_DCT)
2894         return (create_flags) ? -EINVAL : 0;
2895 
2896     if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl)
2897         return (create_flags) ? -EINVAL : 0;
2898 
2899     process_create_flag(dev, &create_flags, IB_QP_CREATE_NETIF_QP,
2900                 mlx5_get_flow_namespace(dev->mdev,
2901                             MLX5_FLOW_NAMESPACE_BYPASS),
2902                 qp);
2903     process_create_flag(dev, &create_flags,
2904                 IB_QP_CREATE_INTEGRITY_EN,
2905                 MLX5_CAP_GEN(mdev, sho), qp);
2906     process_create_flag(dev, &create_flags,
2907                 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
2908                 MLX5_CAP_GEN(mdev, block_lb_mc), qp);
2909     process_create_flag(dev, &create_flags, IB_QP_CREATE_CROSS_CHANNEL,
2910                 MLX5_CAP_GEN(mdev, cd), qp);
2911     process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_SEND,
2912                 MLX5_CAP_GEN(mdev, cd), qp);
2913     process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_RECV,
2914                 MLX5_CAP_GEN(mdev, cd), qp);
2915 
2916     if (qp_type == IB_QPT_UD) {
2917         process_create_flag(dev, &create_flags,
2918                     IB_QP_CREATE_IPOIB_UD_LSO,
2919                     MLX5_CAP_GEN(mdev, ipoib_basic_offloads),
2920                     qp);
2921         cond = MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_IB;
2922         process_create_flag(dev, &create_flags, IB_QP_CREATE_SOURCE_QPN,
2923                     cond, qp);
2924     }
2925 
2926     if (qp_type == IB_QPT_RAW_PACKET) {
2927         cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
2928                MLX5_CAP_ETH(mdev, scatter_fcs);
2929         process_create_flag(dev, &create_flags,
2930                     IB_QP_CREATE_SCATTER_FCS, cond, qp);
2931 
2932         cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
2933                MLX5_CAP_ETH(mdev, vlan_cap);
2934         process_create_flag(dev, &create_flags,
2935                     IB_QP_CREATE_CVLAN_STRIPPING, cond, qp);
2936     }
2937 
2938     process_create_flag(dev, &create_flags,
2939                 IB_QP_CREATE_PCI_WRITE_END_PADDING,
2940                 MLX5_CAP_GEN(mdev, end_pad), qp);
2941 
2942     process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
2943                 qp_type != MLX5_IB_QPT_REG_UMR, qp);
2944     process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
2945                 true, qp);
2946 
2947     if (create_flags) {
2948         mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n",
2949                 create_flags);
2950         return -EOPNOTSUPP;
2951     }
2952     return 0;
2953 }
2954 
2955 static int process_udata_size(struct mlx5_ib_dev *dev,
2956                   struct mlx5_create_qp_params *params)
2957 {
2958     size_t ucmd = sizeof(struct mlx5_ib_create_qp);
2959     struct ib_udata *udata = params->udata;
2960     size_t outlen = udata->outlen;
2961     size_t inlen = udata->inlen;
2962 
2963     params->outlen = min(outlen, sizeof(struct mlx5_ib_create_qp_resp));
2964     params->ucmd_size = ucmd;
2965     if (!params->is_rss_raw) {
2966         /* User has old rdma-core, which doesn't support ECE */
2967         size_t min_inlen =
2968             offsetof(struct mlx5_ib_create_qp, ece_options);
2969 
2970         /*
2971          * We will check in check_ucmd_data() that user
2972          * cleared everything after inlen.
2973          */
2974         params->inlen = (inlen < min_inlen) ? 0 : min(inlen, ucmd);
2975         goto out;
2976     }
2977 
2978     /* RSS RAW QP */
2979     if (inlen < offsetofend(struct mlx5_ib_create_qp_rss, flags))
2980         return -EINVAL;
2981 
2982     if (outlen < offsetofend(struct mlx5_ib_create_qp_resp, bfreg_index))
2983         return -EINVAL;
2984 
2985     ucmd = sizeof(struct mlx5_ib_create_qp_rss);
2986     params->ucmd_size = ucmd;
2987     if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd))
2988         return -EINVAL;
2989 
2990     params->inlen = min(ucmd, inlen);
2991 out:
2992     if (!params->inlen)
2993         mlx5_ib_dbg(dev, "udata is too small\n");
2994 
2995     return (params->inlen) ? 0 : -EINVAL;
2996 }
2997 
2998 static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2999              struct mlx5_ib_qp *qp,
3000              struct mlx5_create_qp_params *params)
3001 {
3002     int err;
3003 
3004     if (params->is_rss_raw) {
3005         err = create_rss_raw_qp_tir(dev, pd, qp, params);
3006         goto out;
3007     }
3008 
3009     switch (qp->type) {
3010     case MLX5_IB_QPT_DCT:
3011         err = create_dct(dev, pd, qp, params);
3012         rdma_restrack_no_track(&qp->ibqp.res);
3013         break;
3014     case MLX5_IB_QPT_DCI:
3015         err = create_dci(dev, pd, qp, params);
3016         break;
3017     case IB_QPT_XRC_TGT:
3018         err = create_xrc_tgt_qp(dev, qp, params);
3019         break;
3020     case IB_QPT_GSI:
3021         err = mlx5_ib_create_gsi(pd, qp, params->attr);
3022         break;
3023     case MLX5_IB_QPT_HW_GSI:
3024     case MLX5_IB_QPT_REG_UMR:
3025         rdma_restrack_no_track(&qp->ibqp.res);
3026         fallthrough;
3027     default:
3028         if (params->udata)
3029             err = create_user_qp(dev, pd, qp, params);
3030         else
3031             err = create_kernel_qp(dev, pd, qp, params);
3032     }
3033 
3034 out:
3035     if (err) {
3036         mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type);
3037         return err;
3038     }
3039 
3040     if (is_qp0(qp->type))
3041         qp->ibqp.qp_num = 0;
3042     else if (is_qp1(qp->type))
3043         qp->ibqp.qp_num = 1;
3044     else
3045         qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
3046 
3047     mlx5_ib_dbg(dev,
3048         "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x, ece 0x%x\n",
3049         qp->type, qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
3050         params->attr->recv_cq ? to_mcq(params->attr->recv_cq)->mcq.cqn :
3051                     -1,
3052         params->attr->send_cq ? to_mcq(params->attr->send_cq)->mcq.cqn :
3053                     -1,
3054         params->resp.ece_options);
3055 
3056     return 0;
3057 }
3058 
3059 static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
3060              struct ib_qp_init_attr *attr)
3061 {
3062     int ret = 0;
3063 
3064     switch (qp->type) {
3065     case MLX5_IB_QPT_DCT:
3066         ret = (!attr->srq || !attr->recv_cq) ? -EINVAL : 0;
3067         break;
3068     case MLX5_IB_QPT_DCI:
3069         ret = (attr->cap.max_recv_wr || attr->cap.max_recv_sge) ?
3070                   -EINVAL :
3071                   0;
3072         break;
3073     case IB_QPT_RAW_PACKET:
3074         ret = (attr->rwq_ind_tbl && attr->send_cq) ? -EINVAL : 0;
3075         break;
3076     default:
3077         break;
3078     }
3079 
3080     if (ret)
3081         mlx5_ib_dbg(dev, "QP type %d has wrong attributes\n", qp->type);
3082 
3083     return ret;
3084 }
3085 
3086 static int get_qp_uidx(struct mlx5_ib_qp *qp,
3087                struct mlx5_create_qp_params *params)
3088 {
3089     struct mlx5_ib_create_qp *ucmd = params->ucmd;
3090     struct ib_udata *udata = params->udata;
3091     struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
3092         udata, struct mlx5_ib_ucontext, ibucontext);
3093 
3094     if (params->is_rss_raw)
3095         return 0;
3096 
3097     return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &params->uidx);
3098 }
3099 
3100 static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
3101 {
3102     struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
3103 
3104     if (mqp->state == IB_QPS_RTR) {
3105         int err;
3106 
3107         err = mlx5_core_destroy_dct(dev, &mqp->dct.mdct);
3108         if (err) {
3109             mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err);
3110             return err;
3111         }
3112     }
3113 
3114     kfree(mqp->dct.in);
3115     return 0;
3116 }
3117 
3118 static int check_ucmd_data(struct mlx5_ib_dev *dev,
3119                struct mlx5_create_qp_params *params)
3120 {
3121     struct ib_udata *udata = params->udata;
3122     size_t size, last;
3123     int ret;
3124 
3125     if (params->is_rss_raw)
3126         /*
3127          * These QPs don't have "reserved" field in their
3128          * create_qp input struct, so their data is always valid.
3129          */
3130         last = sizeof(struct mlx5_ib_create_qp_rss);
3131     else
3132         last = offsetof(struct mlx5_ib_create_qp, reserved);
3133 
3134     if (udata->inlen <= last)
3135         return 0;
3136 
3137     /*
3138      * User provides different create_qp structures based on the
3139      * flow and we need to know if he cleared memory after our
3140      * struct create_qp ends.
3141      */
3142     size = udata->inlen - last;
3143     ret = ib_is_udata_cleared(params->udata, last, size);
3144     if (!ret)
3145         mlx5_ib_dbg(
3146             dev,
3147             "udata is not cleared, inlen = %zu, ucmd = %zu, last = %zu, size = %zu\n",
3148             udata->inlen, params->ucmd_size, last, size);
3149     return ret ? 0 : -EINVAL;
3150 }
3151 
3152 int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
3153               struct ib_udata *udata)
3154 {
3155     struct mlx5_create_qp_params params = {};
3156     struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
3157     struct mlx5_ib_qp *qp = to_mqp(ibqp);
3158     struct ib_pd *pd = ibqp->pd;
3159     enum ib_qp_type type;
3160     int err;
3161 
3162     err = check_qp_type(dev, attr, &type);
3163     if (err)
3164         return err;
3165 
3166     err = check_valid_flow(dev, pd, attr, udata);
3167     if (err)
3168         return err;
3169 
3170     params.udata = udata;
3171     params.uidx = MLX5_IB_DEFAULT_UIDX;
3172     params.attr = attr;
3173     params.is_rss_raw = !!attr->rwq_ind_tbl;
3174 
3175     if (udata) {
3176         err = process_udata_size(dev, &params);
3177         if (err)
3178             return err;
3179 
3180         err = check_ucmd_data(dev, &params);
3181         if (err)
3182             return err;
3183 
3184         params.ucmd = kzalloc(params.ucmd_size, GFP_KERNEL);
3185         if (!params.ucmd)
3186             return -ENOMEM;
3187 
3188         err = ib_copy_from_udata(params.ucmd, udata, params.inlen);
3189         if (err)
3190             goto free_ucmd;
3191     }
3192 
3193     mutex_init(&qp->mutex);
3194     qp->type = type;
3195     if (udata) {
3196         err = process_vendor_flags(dev, qp, params.ucmd, attr);
3197         if (err)
3198             goto free_ucmd;
3199 
3200         err = get_qp_uidx(qp, &params);
3201         if (err)
3202             goto free_ucmd;
3203     }
3204     err = process_create_flags(dev, qp, attr);
3205     if (err)
3206         goto free_ucmd;
3207 
3208     err = check_qp_attr(dev, qp, attr);
3209     if (err)
3210         goto free_ucmd;
3211 
3212     err = create_qp(dev, pd, qp, &params);
3213     if (err)
3214         goto free_ucmd;
3215 
3216     kfree(params.ucmd);
3217     params.ucmd = NULL;
3218 
3219     if (udata)
3220         /*
3221          * It is safe to copy response for all user create QP flows,
3222          * including MLX5_IB_QPT_DCT, which doesn't need it.
3223          * In that case, resp will be filled with zeros.
3224          */
3225         err = ib_copy_to_udata(udata, &params.resp, params.outlen);
3226     if (err)
3227         goto destroy_qp;
3228 
3229     return 0;
3230 
3231 destroy_qp:
3232     switch (qp->type) {
3233     case MLX5_IB_QPT_DCT:
3234         mlx5_ib_destroy_dct(qp);
3235         break;
3236     case IB_QPT_GSI:
3237         mlx5_ib_destroy_gsi(qp);
3238         break;
3239     default:
3240         destroy_qp_common(dev, qp, udata);
3241     }
3242 
3243 free_ucmd:
3244     kfree(params.ucmd);
3245     return err;
3246 }
3247 
3248 int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
3249 {
3250     struct mlx5_ib_dev *dev = to_mdev(qp->device);
3251     struct mlx5_ib_qp *mqp = to_mqp(qp);
3252 
3253     if (mqp->type == IB_QPT_GSI)
3254         return mlx5_ib_destroy_gsi(mqp);
3255 
3256     if (mqp->type == MLX5_IB_QPT_DCT)
3257         return mlx5_ib_destroy_dct(mqp);
3258 
3259     destroy_qp_common(dev, mqp, udata);
3260     return 0;
3261 }
3262 
3263 static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp,
3264                 const struct ib_qp_attr *attr, int attr_mask,
3265                 void *qpc)
3266 {
3267     struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
3268     u8 dest_rd_atomic;
3269     u32 access_flags;
3270 
3271     if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
3272         dest_rd_atomic = attr->max_dest_rd_atomic;
3273     else
3274         dest_rd_atomic = qp->trans_qp.resp_depth;
3275 
3276     if (attr_mask & IB_QP_ACCESS_FLAGS)
3277         access_flags = attr->qp_access_flags;
3278     else
3279         access_flags = qp->trans_qp.atomic_rd_en;
3280 
3281     if (!dest_rd_atomic)
3282         access_flags &= IB_ACCESS_REMOTE_WRITE;
3283 
3284     MLX5_SET(qpc, qpc, rre, !!(access_flags & IB_ACCESS_REMOTE_READ));
3285 
3286     if (access_flags & IB_ACCESS_REMOTE_ATOMIC) {
3287         int atomic_mode;
3288 
3289         atomic_mode = get_atomic_mode(dev, qp->type);
3290         if (atomic_mode < 0)
3291             return -EOPNOTSUPP;
3292 
3293         MLX5_SET(qpc, qpc, rae, 1);
3294         MLX5_SET(qpc, qpc, atomic_mode, atomic_mode);
3295     }
3296 
3297     MLX5_SET(qpc, qpc, rwe, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
3298     return 0;
3299 }
3300 
3301 enum {
3302     MLX5_PATH_FLAG_FL   = 1 << 0,
3303     MLX5_PATH_FLAG_FREE_AR  = 1 << 1,
3304     MLX5_PATH_FLAG_COUNTER  = 1 << 2,
3305 };
3306 
3307 static int mlx5_to_ib_rate_map(u8 rate)
3308 {
3309     static const int rates[] = { IB_RATE_PORT_CURRENT, IB_RATE_56_GBPS,
3310                      IB_RATE_25_GBPS,      IB_RATE_100_GBPS,
3311                      IB_RATE_200_GBPS,     IB_RATE_50_GBPS,
3312                      IB_RATE_400_GBPS };
3313 
3314     if (rate < ARRAY_SIZE(rates))
3315         return rates[rate];
3316 
3317     return rate - MLX5_STAT_RATE_OFFSET;
3318 }
3319 
3320 static int ib_to_mlx5_rate_map(u8 rate)
3321 {
3322     switch (rate) {
3323     case IB_RATE_PORT_CURRENT:
3324         return 0;
3325     case IB_RATE_56_GBPS:
3326         return 1;
3327     case IB_RATE_25_GBPS:
3328         return 2;
3329     case IB_RATE_100_GBPS:
3330         return 3;
3331     case IB_RATE_200_GBPS:
3332         return 4;
3333     case IB_RATE_50_GBPS:
3334         return 5;
3335     case IB_RATE_400_GBPS:
3336         return 6;
3337     default:
3338         return rate + MLX5_STAT_RATE_OFFSET;
3339     }
3340 
3341     return 0;
3342 }
3343 
3344 static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
3345 {
3346     u32 stat_rate_support;
3347 
3348     if (rate == IB_RATE_PORT_CURRENT)
3349         return 0;
3350 
3351     if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
3352         return -EINVAL;
3353 
3354     stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
3355     while (rate != IB_RATE_PORT_CURRENT &&
3356            !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support))
3357         --rate;
3358 
3359     return ib_to_mlx5_rate_map(rate);
3360 }
3361 
3362 static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
3363                       struct mlx5_ib_sq *sq, u8 sl,
3364                       struct ib_pd *pd)
3365 {
3366     void *in;
3367     void *tisc;
3368     int inlen;
3369     int err;
3370 
3371     inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
3372     in = kvzalloc(inlen, GFP_KERNEL);
3373     if (!in)
3374         return -ENOMEM;
3375 
3376     MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
3377     MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
3378 
3379     tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
3380     MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
3381 
3382     err = mlx5_core_modify_tis(dev, sq->tisn, in);
3383 
3384     kvfree(in);
3385 
3386     return err;
3387 }
3388 
3389 static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
3390                      struct mlx5_ib_sq *sq, u8 tx_affinity,
3391                      struct ib_pd *pd)
3392 {
3393     void *in;
3394     void *tisc;
3395     int inlen;
3396     int err;
3397 
3398     inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
3399     in = kvzalloc(inlen, GFP_KERNEL);
3400     if (!in)
3401         return -ENOMEM;
3402 
3403     MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
3404     MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
3405 
3406     tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
3407     MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
3408 
3409     err = mlx5_core_modify_tis(dev, sq->tisn, in);
3410 
3411     kvfree(in);
3412 
3413     return err;
3414 }
3415 
3416 static void mlx5_set_path_udp_sport(void *path, const struct rdma_ah_attr *ah,
3417                     u32 lqpn, u32 rqpn)
3418 
3419 {
3420     u32 fl = ah->grh.flow_label;
3421 
3422     if (!fl)
3423         fl = rdma_calc_flow_label(lqpn, rqpn);
3424 
3425     MLX5_SET(ads, path, udp_sport, rdma_flow_label_to_udp_sport(fl));
3426 }
3427 
3428 static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
3429              const struct rdma_ah_attr *ah, void *path, u8 port,
3430              int attr_mask, u32 path_flags,
3431              const struct ib_qp_attr *attr, bool alt)
3432 {
3433     const struct ib_global_route *grh = rdma_ah_read_grh(ah);
3434     int err;
3435     enum ib_gid_type gid_type;
3436     u8 ah_flags = rdma_ah_get_ah_flags(ah);
3437     u8 sl = rdma_ah_get_sl(ah);
3438 
3439     if (attr_mask & IB_QP_PKEY_INDEX)
3440         MLX5_SET(ads, path, pkey_index,
3441              alt ? attr->alt_pkey_index : attr->pkey_index);
3442 
3443     if (ah_flags & IB_AH_GRH) {
3444         const struct ib_port_immutable *immutable;
3445 
3446         immutable = ib_port_immutable_read(&dev->ib_dev, port);
3447         if (grh->sgid_index >= immutable->gid_tbl_len) {
3448             pr_err("sgid_index (%u) too large. max is %d\n",
3449                    grh->sgid_index,
3450                    immutable->gid_tbl_len);
3451             return -EINVAL;
3452         }
3453     }
3454 
3455     if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
3456         if (!(ah_flags & IB_AH_GRH))
3457             return -EINVAL;
3458 
3459         ether_addr_copy(MLX5_ADDR_OF(ads, path, rmac_47_32),
3460                 ah->roce.dmac);
3461         if ((qp->type == IB_QPT_RC ||
3462              qp->type == IB_QPT_UC ||
3463              qp->type == IB_QPT_XRC_INI ||
3464              qp->type == IB_QPT_XRC_TGT) &&
3465             (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
3466             (attr_mask & IB_QP_DEST_QPN))
3467             mlx5_set_path_udp_sport(path, ah,
3468                         qp->ibqp.qp_num,
3469                         attr->dest_qp_num);
3470         MLX5_SET(ads, path, eth_prio, sl & 0x7);
3471         gid_type = ah->grh.sgid_attr->gid_type;
3472         if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
3473             MLX5_SET(ads, path, dscp, grh->traffic_class >> 2);
3474     } else {
3475         MLX5_SET(ads, path, fl, !!(path_flags & MLX5_PATH_FLAG_FL));
3476         MLX5_SET(ads, path, free_ar,
3477              !!(path_flags & MLX5_PATH_FLAG_FREE_AR));
3478         MLX5_SET(ads, path, rlid, rdma_ah_get_dlid(ah));
3479         MLX5_SET(ads, path, mlid, rdma_ah_get_path_bits(ah));
3480         MLX5_SET(ads, path, grh, !!(ah_flags & IB_AH_GRH));
3481         MLX5_SET(ads, path, sl, sl);
3482     }
3483 
3484     if (ah_flags & IB_AH_GRH) {
3485         MLX5_SET(ads, path, src_addr_index, grh->sgid_index);
3486         MLX5_SET(ads, path, hop_limit, grh->hop_limit);
3487         MLX5_SET(ads, path, tclass, grh->traffic_class);
3488         MLX5_SET(ads, path, flow_label, grh->flow_label);
3489         memcpy(MLX5_ADDR_OF(ads, path, rgid_rip), grh->dgid.raw,
3490                sizeof(grh->dgid.raw));
3491     }
3492 
3493     err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
3494     if (err < 0)
3495         return err;
3496     MLX5_SET(ads, path, stat_rate, err);
3497     MLX5_SET(ads, path, vhca_port_num, port);
3498 
3499     if (attr_mask & IB_QP_TIMEOUT)
3500         MLX5_SET(ads, path, ack_timeout,
3501              alt ? attr->alt_timeout : attr->timeout);
3502 
3503     if ((qp->type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
3504         return modify_raw_packet_eth_prio(dev->mdev,
3505                           &qp->raw_packet_qp.sq,
3506                           sl & 0xf, qp->ibqp.pd);
3507 
3508     return 0;
3509 }
3510 
3511 static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
3512     [MLX5_QP_STATE_INIT] = {
3513         [MLX5_QP_STATE_INIT] = {
3514             [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE        |
3515                       MLX5_QP_OPTPAR_RAE        |
3516                       MLX5_QP_OPTPAR_RWE        |
3517                       MLX5_QP_OPTPAR_PKEY_INDEX |
3518                       MLX5_QP_OPTPAR_PRI_PORT   |
3519                       MLX5_QP_OPTPAR_LAG_TX_AFF,
3520             [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE        |
3521                       MLX5_QP_OPTPAR_PKEY_INDEX |
3522                       MLX5_QP_OPTPAR_PRI_PORT   |
3523                       MLX5_QP_OPTPAR_LAG_TX_AFF,
3524             [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
3525                       MLX5_QP_OPTPAR_Q_KEY      |
3526                       MLX5_QP_OPTPAR_PRI_PORT,
3527             [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE       |
3528                       MLX5_QP_OPTPAR_RAE        |
3529                       MLX5_QP_OPTPAR_RWE        |
3530                       MLX5_QP_OPTPAR_PKEY_INDEX |
3531                       MLX5_QP_OPTPAR_PRI_PORT   |
3532                       MLX5_QP_OPTPAR_LAG_TX_AFF,
3533         },
3534         [MLX5_QP_STATE_RTR] = {
3535             [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
3536                       MLX5_QP_OPTPAR_RRE            |
3537                       MLX5_QP_OPTPAR_RAE            |
3538                       MLX5_QP_OPTPAR_RWE            |
3539                       MLX5_QP_OPTPAR_PKEY_INDEX |
3540                       MLX5_QP_OPTPAR_LAG_TX_AFF,
3541             [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
3542                       MLX5_QP_OPTPAR_RWE            |
3543                       MLX5_QP_OPTPAR_PKEY_INDEX |
3544                       MLX5_QP_OPTPAR_LAG_TX_AFF,
3545             [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
3546                       MLX5_QP_OPTPAR_Q_KEY,
3547             [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX    |
3548                        MLX5_QP_OPTPAR_Q_KEY,
3549             [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
3550                       MLX5_QP_OPTPAR_RRE            |
3551                       MLX5_QP_OPTPAR_RAE            |
3552                       MLX5_QP_OPTPAR_RWE            |
3553                       MLX5_QP_OPTPAR_PKEY_INDEX |
3554                       MLX5_QP_OPTPAR_LAG_TX_AFF,
3555         },
3556     },
3557     [MLX5_QP_STATE_RTR] = {
3558         [MLX5_QP_STATE_RTS] = {
3559             [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
3560                       MLX5_QP_OPTPAR_RRE        |
3561                       MLX5_QP_OPTPAR_RAE        |
3562                       MLX5_QP_OPTPAR_RWE        |
3563                       MLX5_QP_OPTPAR_PM_STATE   |
3564                       MLX5_QP_OPTPAR_RNR_TIMEOUT,
3565             [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
3566                       MLX5_QP_OPTPAR_RWE        |
3567                       MLX5_QP_OPTPAR_PM_STATE,
3568             [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
3569             [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
3570                       MLX5_QP_OPTPAR_RRE        |
3571                       MLX5_QP_OPTPAR_RAE        |
3572                       MLX5_QP_OPTPAR_RWE        |
3573                       MLX5_QP_OPTPAR_PM_STATE   |
3574                       MLX5_QP_OPTPAR_RNR_TIMEOUT,
3575         },
3576     },
3577     [MLX5_QP_STATE_RTS] = {
3578         [MLX5_QP_STATE_RTS] = {
3579             [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE        |
3580                       MLX5_QP_OPTPAR_RAE        |
3581                       MLX5_QP_OPTPAR_RWE        |
3582                       MLX5_QP_OPTPAR_RNR_TIMEOUT    |
3583                       MLX5_QP_OPTPAR_PM_STATE   |
3584                       MLX5_QP_OPTPAR_ALT_ADDR_PATH,
3585             [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE        |
3586                       MLX5_QP_OPTPAR_PM_STATE   |
3587                       MLX5_QP_OPTPAR_ALT_ADDR_PATH,
3588             [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY      |
3589                       MLX5_QP_OPTPAR_SRQN       |
3590                       MLX5_QP_OPTPAR_CQN_RCV,
3591             [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE       |
3592                       MLX5_QP_OPTPAR_RAE        |
3593                       MLX5_QP_OPTPAR_RWE        |
3594                       MLX5_QP_OPTPAR_RNR_TIMEOUT    |
3595                       MLX5_QP_OPTPAR_PM_STATE   |
3596                       MLX5_QP_OPTPAR_ALT_ADDR_PATH,
3597         },
3598     },
3599     [MLX5_QP_STATE_SQER] = {
3600         [MLX5_QP_STATE_RTS] = {
3601             [MLX5_QP_ST_UD]  = MLX5_QP_OPTPAR_Q_KEY,
3602             [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
3603             [MLX5_QP_ST_UC]  = MLX5_QP_OPTPAR_RWE,
3604             [MLX5_QP_ST_RC]  = MLX5_QP_OPTPAR_RNR_TIMEOUT   |
3605                        MLX5_QP_OPTPAR_RWE       |
3606                        MLX5_QP_OPTPAR_RAE       |
3607                        MLX5_QP_OPTPAR_RRE,
3608             [MLX5_QP_ST_XRC]  = MLX5_QP_OPTPAR_RNR_TIMEOUT  |
3609                        MLX5_QP_OPTPAR_RWE       |
3610                        MLX5_QP_OPTPAR_RAE       |
3611                        MLX5_QP_OPTPAR_RRE,
3612         },
3613     },
3614     [MLX5_QP_STATE_SQD] = {
3615         [MLX5_QP_STATE_RTS] = {
3616             [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
3617             [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
3618             [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
3619             [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT    |
3620                       MLX5_QP_OPTPAR_RWE        |
3621                       MLX5_QP_OPTPAR_RAE        |
3622                       MLX5_QP_OPTPAR_RRE,
3623         },
3624     },
3625 };
3626 
3627 static int ib_nr_to_mlx5_nr(int ib_mask)
3628 {
3629     switch (ib_mask) {
3630     case IB_QP_STATE:
3631         return 0;
3632     case IB_QP_CUR_STATE:
3633         return 0;
3634     case IB_QP_EN_SQD_ASYNC_NOTIFY:
3635         return 0;
3636     case IB_QP_ACCESS_FLAGS:
3637         return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
3638             MLX5_QP_OPTPAR_RAE;
3639     case IB_QP_PKEY_INDEX:
3640         return MLX5_QP_OPTPAR_PKEY_INDEX;
3641     case IB_QP_PORT:
3642         return MLX5_QP_OPTPAR_PRI_PORT;
3643     case IB_QP_QKEY:
3644         return MLX5_QP_OPTPAR_Q_KEY;
3645     case IB_QP_AV:
3646         return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
3647             MLX5_QP_OPTPAR_PRI_PORT;
3648     case IB_QP_PATH_MTU:
3649         return 0;
3650     case IB_QP_TIMEOUT:
3651         return MLX5_QP_OPTPAR_ACK_TIMEOUT;
3652     case IB_QP_RETRY_CNT:
3653         return MLX5_QP_OPTPAR_RETRY_COUNT;
3654     case IB_QP_RNR_RETRY:
3655         return MLX5_QP_OPTPAR_RNR_RETRY;
3656     case IB_QP_RQ_PSN:
3657         return 0;
3658     case IB_QP_MAX_QP_RD_ATOMIC:
3659         return MLX5_QP_OPTPAR_SRA_MAX;
3660     case IB_QP_ALT_PATH:
3661         return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
3662     case IB_QP_MIN_RNR_TIMER:
3663         return MLX5_QP_OPTPAR_RNR_TIMEOUT;
3664     case IB_QP_SQ_PSN:
3665         return 0;
3666     case IB_QP_MAX_DEST_RD_ATOMIC:
3667         return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
3668             MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
3669     case IB_QP_PATH_MIG_STATE:
3670         return MLX5_QP_OPTPAR_PM_STATE;
3671     case IB_QP_CAP:
3672         return 0;
3673     case IB_QP_DEST_QPN:
3674         return 0;
3675     }
3676     return 0;
3677 }
3678 
3679 static int ib_mask_to_mlx5_opt(int ib_mask)
3680 {
3681     int result = 0;
3682     int i;
3683 
3684     for (i = 0; i < 8 * sizeof(int); i++) {
3685         if ((1 << i) & ib_mask)
3686             result |= ib_nr_to_mlx5_nr(1 << i);
3687     }
3688 
3689     return result;
3690 }
3691 
3692 static int modify_raw_packet_qp_rq(
3693     struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state,
3694     const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
3695 {
3696     void *in;
3697     void *rqc;
3698     int inlen;
3699     int err;
3700 
3701     inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
3702     in = kvzalloc(inlen, GFP_KERNEL);
3703     if (!in)
3704         return -ENOMEM;
3705 
3706     MLX5_SET(modify_rq_in, in, rq_state, rq->state);
3707     MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid);
3708 
3709     rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
3710     MLX5_SET(rqc, rqc, state, new_state);
3711 
3712     if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
3713         if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
3714             MLX5_SET64(modify_rq_in, in, modify_bitmask,
3715                    MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
3716             MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
3717         } else
3718             dev_info_once(
3719                 &dev->ib_dev.dev,
3720                 "RAW PACKET QP counters are not supported on current FW\n");
3721     }
3722 
3723     err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in);
3724     if (err)
3725         goto out;
3726 
3727     rq->state = new_state;
3728 
3729 out:
3730     kvfree(in);
3731     return err;
3732 }
3733 
3734 static int modify_raw_packet_qp_sq(
3735     struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state,
3736     const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
3737 {
3738     struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
3739     struct mlx5_rate_limit old_rl = ibqp->rl;
3740     struct mlx5_rate_limit new_rl = old_rl;
3741     bool new_rate_added = false;
3742     u16 rl_index = 0;
3743     void *in;
3744     void *sqc;
3745     int inlen;
3746     int err;
3747 
3748     inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
3749     in = kvzalloc(inlen, GFP_KERNEL);
3750     if (!in)
3751         return -ENOMEM;
3752 
3753     MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid);
3754     MLX5_SET(modify_sq_in, in, sq_state, sq->state);
3755 
3756     sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
3757     MLX5_SET(sqc, sqc, state, new_state);
3758 
3759     if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) {
3760         if (new_state != MLX5_SQC_STATE_RDY)
3761             pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
3762                 __func__);
3763         else
3764             new_rl = raw_qp_param->rl;
3765     }
3766 
3767     if (!mlx5_rl_are_equal(&old_rl, &new_rl)) {
3768         if (new_rl.rate) {
3769             err = mlx5_rl_add_rate(dev, &rl_index, &new_rl);
3770             if (err) {
3771                 pr_err("Failed configuring rate limit(err %d): \
3772                        rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
3773                        err, new_rl.rate, new_rl.max_burst_sz,
3774                        new_rl.typical_pkt_sz);
3775 
3776                 goto out;
3777             }
3778             new_rate_added = true;
3779         }
3780 
3781         MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
3782         /* index 0 means no limit */
3783         MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
3784     }
3785 
3786     err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in);
3787     if (err) {
3788         /* Remove new rate from table if failed */
3789         if (new_rate_added)
3790             mlx5_rl_remove_rate(dev, &new_rl);
3791         goto out;
3792     }
3793 
3794     /* Only remove the old rate after new rate was set */
3795     if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
3796         (new_state != MLX5_SQC_STATE_RDY)) {
3797         mlx5_rl_remove_rate(dev, &old_rl);
3798         if (new_state != MLX5_SQC_STATE_RDY)
3799             memset(&new_rl, 0, sizeof(new_rl));
3800     }
3801 
3802     ibqp->rl = new_rl;
3803     sq->state = new_state;
3804 
3805 out:
3806     kvfree(in);
3807     return err;
3808 }
3809 
3810 static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
3811                 const struct mlx5_modify_raw_qp_param *raw_qp_param,
3812                 u8 tx_affinity)
3813 {
3814     struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
3815     struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
3816     struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
3817     int modify_rq = !!qp->rq.wqe_cnt;
3818     int modify_sq = !!qp->sq.wqe_cnt;
3819     int rq_state;
3820     int sq_state;
3821     int err;
3822 
3823     switch (raw_qp_param->operation) {
3824     case MLX5_CMD_OP_RST2INIT_QP:
3825         rq_state = MLX5_RQC_STATE_RDY;
3826         sq_state = MLX5_SQC_STATE_RST;
3827         break;
3828     case MLX5_CMD_OP_2ERR_QP:
3829         rq_state = MLX5_RQC_STATE_ERR;
3830         sq_state = MLX5_SQC_STATE_ERR;
3831         break;
3832     case MLX5_CMD_OP_2RST_QP:
3833         rq_state = MLX5_RQC_STATE_RST;
3834         sq_state = MLX5_SQC_STATE_RST;
3835         break;
3836     case MLX5_CMD_OP_RTR2RTS_QP:
3837     case MLX5_CMD_OP_RTS2RTS_QP:
3838         if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT)
3839             return -EINVAL;
3840 
3841         modify_rq = 0;
3842         sq_state = MLX5_SQC_STATE_RDY;
3843         break;
3844     case MLX5_CMD_OP_INIT2INIT_QP:
3845     case MLX5_CMD_OP_INIT2RTR_QP:
3846         if (raw_qp_param->set_mask)
3847             return -EINVAL;
3848         else
3849             return 0;
3850     default:
3851         WARN_ON(1);
3852         return -EINVAL;
3853     }
3854 
3855     if (modify_rq) {
3856         err =  modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param,
3857                            qp->ibqp.pd);
3858         if (err)
3859             return err;
3860     }
3861 
3862     if (modify_sq) {
3863         struct mlx5_flow_handle *flow_rule;
3864 
3865         if (tx_affinity) {
3866             err = modify_raw_packet_tx_affinity(dev->mdev, sq,
3867                                 tx_affinity,
3868                                 qp->ibqp.pd);
3869             if (err)
3870                 return err;
3871         }
3872 
3873         flow_rule = create_flow_rule_vport_sq(dev, sq,
3874                               raw_qp_param->port);
3875         if (IS_ERR(flow_rule))
3876             return PTR_ERR(flow_rule);
3877 
3878         err = modify_raw_packet_qp_sq(dev->mdev, sq, sq_state,
3879                           raw_qp_param, qp->ibqp.pd);
3880         if (err) {
3881             if (flow_rule)
3882                 mlx5_del_flow_rules(flow_rule);
3883             return err;
3884         }
3885 
3886         if (flow_rule) {
3887             destroy_flow_rule_vport_sq(sq);
3888             sq->flow_rule = flow_rule;
3889         }
3890 
3891         return err;
3892     }
3893 
3894     return 0;
3895 }
3896 
3897 static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
3898                        struct ib_udata *udata)
3899 {
3900     struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
3901         udata, struct mlx5_ib_ucontext, ibucontext);
3902     u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
3903     atomic_t *tx_port_affinity;
3904 
3905     if (ucontext)
3906         tx_port_affinity = &ucontext->tx_port_affinity;
3907     else
3908         tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity;
3909 
3910     return (unsigned int)atomic_add_return(1, tx_port_affinity) %
3911         (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1;
3912 }
3913 
3914 static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
3915 {
3916     if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) ||
3917         (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) ||
3918         (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) ||
3919         (qp->type == MLX5_IB_QPT_DCI))
3920         return true;
3921     return false;
3922 }
3923 
3924 static unsigned int get_tx_affinity(struct ib_qp *qp,
3925                     const struct ib_qp_attr *attr,
3926                     int attr_mask, u8 init,
3927                     struct ib_udata *udata)
3928 {
3929     struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
3930         udata, struct mlx5_ib_ucontext, ibucontext);
3931     struct mlx5_ib_dev *dev = to_mdev(qp->device);
3932     struct mlx5_ib_qp *mqp = to_mqp(qp);
3933     struct mlx5_ib_qp_base *qp_base;
3934     unsigned int tx_affinity;
3935 
3936     if (!(mlx5_ib_lag_should_assign_affinity(dev) &&
3937           qp_supports_affinity(mqp)))
3938         return 0;
3939 
3940     if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
3941         tx_affinity = mqp->gsi_lag_port;
3942     else if (init)
3943         tx_affinity = get_tx_affinity_rr(dev, udata);
3944     else if ((attr_mask & IB_QP_AV) && attr->xmit_slave)
3945         tx_affinity =
3946             mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave);
3947     else
3948         return 0;
3949 
3950     qp_base = &mqp->trans_qp.base;
3951     if (ucontext)
3952         mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
3953                 tx_affinity, qp_base->mqp.qpn, ucontext);
3954     else
3955         mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
3956                 tx_affinity, qp_base->mqp.qpn);
3957     return tx_affinity;
3958 }
3959 
3960 static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
3961                     struct rdma_counter *counter)
3962 {
3963     struct mlx5_ib_dev *dev = to_mdev(qp->device);
3964     u32 in[MLX5_ST_SZ_DW(rts2rts_qp_in)] = {};
3965     struct mlx5_ib_qp *mqp = to_mqp(qp);
3966     struct mlx5_ib_qp_base *base;
3967     u32 set_id;
3968     u32 *qpc;
3969 
3970     if (counter)
3971         set_id = counter->id;
3972     else
3973         set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1);
3974 
3975     base = &mqp->trans_qp.base;
3976     MLX5_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP);
3977     MLX5_SET(rts2rts_qp_in, in, qpn, base->mqp.qpn);
3978     MLX5_SET(rts2rts_qp_in, in, uid, base->mqp.uid);
3979     MLX5_SET(rts2rts_qp_in, in, opt_param_mask,
3980          MLX5_QP_OPTPAR_COUNTER_SET_ID);
3981 
3982     qpc = MLX5_ADDR_OF(rts2rts_qp_in, in, qpc);
3983     MLX5_SET(qpc, qpc, counter_set_id, set_id);
3984     return mlx5_cmd_exec_in(dev->mdev, rts2rts_qp, in);
3985 }
3986 
3987 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
3988                    const struct ib_qp_attr *attr, int attr_mask,
3989                    enum ib_qp_state cur_state,
3990                    enum ib_qp_state new_state,
3991                    const struct mlx5_ib_modify_qp *ucmd,
3992                    struct mlx5_ib_modify_qp_resp *resp,
3993                    struct ib_udata *udata)
3994 {
3995     static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
3996         [MLX5_QP_STATE_RST] = {
3997             [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
3998             [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
3999             [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
4000         },
4001         [MLX5_QP_STATE_INIT]  = {
4002             [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
4003             [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
4004             [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
4005             [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
4006         },
4007         [MLX5_QP_STATE_RTR]   = {
4008             [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
4009             [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
4010             [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
4011         },
4012         [MLX5_QP_STATE_RTS]   = {
4013             [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
4014             [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
4015             [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
4016         },
4017         [MLX5_QP_STATE_SQD] = {
4018             [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
4019             [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
4020             [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQD_RTS_QP,
4021         },
4022         [MLX5_QP_STATE_SQER] = {
4023             [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
4024             [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
4025             [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
4026         },
4027         [MLX5_QP_STATE_ERR] = {
4028             [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
4029             [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
4030         }
4031     };
4032 
4033     struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4034     struct mlx5_ib_qp *qp = to_mqp(ibqp);
4035     struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
4036     struct mlx5_ib_cq *send_cq, *recv_cq;
4037     struct mlx5_ib_pd *pd;
4038     enum mlx5_qp_state mlx5_cur, mlx5_new;
4039     void *qpc, *pri_path, *alt_path;
4040     enum mlx5_qp_optpar optpar = 0;
4041     u32 set_id = 0;
4042     int mlx5_st;
4043     int err;
4044     u16 op;
4045     u8 tx_affinity = 0;
4046 
4047     mlx5_st = to_mlx5_st(qp->type);
4048     if (mlx5_st < 0)
4049         return -EINVAL;
4050 
4051     qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
4052     if (!qpc)
4053         return -ENOMEM;
4054 
4055     pd = to_mpd(qp->ibqp.pd);
4056     MLX5_SET(qpc, qpc, st, mlx5_st);
4057 
4058     if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
4059         MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
4060     } else {
4061         switch (attr->path_mig_state) {
4062         case IB_MIG_MIGRATED:
4063             MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
4064             break;
4065         case IB_MIG_REARM:
4066             MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_REARM);
4067             break;
4068         case IB_MIG_ARMED:
4069             MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_ARMED);
4070             break;
4071         }
4072     }
4073 
4074     tx_affinity = get_tx_affinity(ibqp, attr, attr_mask,
4075                       cur_state == IB_QPS_RESET &&
4076                       new_state == IB_QPS_INIT, udata);
4077 
4078     MLX5_SET(qpc, qpc, lag_tx_port_affinity, tx_affinity);
4079     if (tx_affinity && new_state == IB_QPS_RTR &&
4080         MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity))
4081         optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF;
4082 
4083     if (is_sqp(qp->type)) {
4084         MLX5_SET(qpc, qpc, mtu, IB_MTU_256);
4085         MLX5_SET(qpc, qpc, log_msg_max, 8);
4086     } else if ((qp->type == IB_QPT_UD &&
4087             !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) ||
4088            qp->type == MLX5_IB_QPT_REG_UMR) {
4089         MLX5_SET(qpc, qpc, mtu, IB_MTU_4096);
4090         MLX5_SET(qpc, qpc, log_msg_max, 12);
4091     } else if (attr_mask & IB_QP_PATH_MTU) {
4092         if (attr->path_mtu < IB_MTU_256 ||
4093             attr->path_mtu > IB_MTU_4096) {
4094             mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
4095             err = -EINVAL;
4096             goto out;
4097         }
4098         MLX5_SET(qpc, qpc, mtu, attr->path_mtu);
4099         MLX5_SET(qpc, qpc, log_msg_max,
4100              MLX5_CAP_GEN(dev->mdev, log_max_msg));
4101     }
4102 
4103     if (attr_mask & IB_QP_DEST_QPN)
4104         MLX5_SET(qpc, qpc, remote_qpn, attr->dest_qp_num);
4105 
4106     pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
4107     alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
4108 
4109     if (attr_mask & IB_QP_PKEY_INDEX)
4110         MLX5_SET(ads, pri_path, pkey_index, attr->pkey_index);
4111 
4112     /* todo implement counter_index functionality */
4113 
4114     if (is_sqp(qp->type))
4115         MLX5_SET(ads, pri_path, vhca_port_num, qp->port);
4116 
4117     if (attr_mask & IB_QP_PORT)
4118         MLX5_SET(ads, pri_path, vhca_port_num, attr->port_num);
4119 
4120     if (attr_mask & IB_QP_AV) {
4121         err = mlx5_set_path(dev, qp, &attr->ah_attr, pri_path,
4122                     attr_mask & IB_QP_PORT ? attr->port_num :
4123                                  qp->port,
4124                     attr_mask, 0, attr, false);
4125         if (err)
4126             goto out;
4127     }
4128 
4129     if (attr_mask & IB_QP_TIMEOUT)
4130         MLX5_SET(ads, pri_path, ack_timeout, attr->timeout);
4131 
4132     if (attr_mask & IB_QP_ALT_PATH) {
4133         err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, alt_path,
4134                     attr->alt_port_num,
4135                     attr_mask | IB_QP_PKEY_INDEX |
4136                         IB_QP_TIMEOUT,
4137                     0, attr, true);
4138         if (err)
4139             goto out;
4140     }
4141 
4142     get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
4143         &send_cq, &recv_cq);
4144 
4145     MLX5_SET(qpc, qpc, pd, pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
4146     if (send_cq)
4147         MLX5_SET(qpc, qpc, cqn_snd, send_cq->mcq.cqn);
4148     if (recv_cq)
4149         MLX5_SET(qpc, qpc, cqn_rcv, recv_cq->mcq.cqn);
4150 
4151     MLX5_SET(qpc, qpc, log_ack_req_freq, MLX5_IB_ACK_REQ_FREQ);
4152 
4153     if (attr_mask & IB_QP_RNR_RETRY)
4154         MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
4155 
4156     if (attr_mask & IB_QP_RETRY_CNT)
4157         MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
4158 
4159     if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic)
4160         MLX5_SET(qpc, qpc, log_sra_max, ilog2(attr->max_rd_atomic));
4161 
4162     if (attr_mask & IB_QP_SQ_PSN)
4163         MLX5_SET(qpc, qpc, next_send_psn, attr->sq_psn);
4164 
4165     if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic)
4166         MLX5_SET(qpc, qpc, log_rra_max,
4167              ilog2(attr->max_dest_rd_atomic));
4168 
4169     if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
4170         err = set_qpc_atomic_flags(qp, attr, attr_mask, qpc);
4171         if (err)
4172             goto out;
4173     }
4174 
4175     if (attr_mask & IB_QP_MIN_RNR_TIMER)
4176         MLX5_SET(qpc, qpc, min_rnr_nak, attr->min_rnr_timer);
4177 
4178     if (attr_mask & IB_QP_RQ_PSN)
4179         MLX5_SET(qpc, qpc, next_rcv_psn, attr->rq_psn);
4180 
4181     if (attr_mask & IB_QP_QKEY)
4182         MLX5_SET(qpc, qpc, q_key, attr->qkey);
4183 
4184     if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
4185         MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
4186 
4187     if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
4188         u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
4189                    qp->port) - 1;
4190 
4191         /* Underlay port should be used - index 0 function per port */
4192         if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
4193             port_num = 0;
4194 
4195         if (ibqp->counter)
4196             set_id = ibqp->counter->id;
4197         else
4198             set_id = mlx5_ib_get_counters_id(dev, port_num);
4199         MLX5_SET(qpc, qpc, counter_set_id, set_id);
4200     }
4201 
4202     if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
4203         MLX5_SET(qpc, qpc, rlky, 1);
4204 
4205     if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
4206         MLX5_SET(qpc, qpc, deth_sqpn, 1);
4207 
4208     mlx5_cur = to_mlx5_state(cur_state);
4209     mlx5_new = to_mlx5_state(new_state);
4210 
4211     if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
4212         !optab[mlx5_cur][mlx5_new]) {
4213         err = -EINVAL;
4214         goto out;
4215     }
4216 
4217     op = optab[mlx5_cur][mlx5_new];
4218     optpar |= ib_mask_to_mlx5_opt(attr_mask);
4219     optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
4220 
4221     if (qp->type == IB_QPT_RAW_PACKET ||
4222         qp->flags & IB_QP_CREATE_SOURCE_QPN) {
4223         struct mlx5_modify_raw_qp_param raw_qp_param = {};
4224 
4225         raw_qp_param.operation = op;
4226         if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
4227             raw_qp_param.rq_q_ctr_id = set_id;
4228             raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
4229         }
4230 
4231         if (attr_mask & IB_QP_PORT)
4232             raw_qp_param.port = attr->port_num;
4233 
4234         if (attr_mask & IB_QP_RATE_LIMIT) {
4235             raw_qp_param.rl.rate = attr->rate_limit;
4236 
4237             if (ucmd->burst_info.max_burst_sz) {
4238                 if (attr->rate_limit &&
4239                     MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) {
4240                     raw_qp_param.rl.max_burst_sz =
4241                         ucmd->burst_info.max_burst_sz;
4242                 } else {
4243                     err = -EINVAL;
4244                     goto out;
4245                 }
4246             }
4247 
4248             if (ucmd->burst_info.typical_pkt_sz) {
4249                 if (attr->rate_limit &&
4250                     MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) {
4251                     raw_qp_param.rl.typical_pkt_sz =
4252                         ucmd->burst_info.typical_pkt_sz;
4253                 } else {
4254                     err = -EINVAL;
4255                     goto out;
4256                 }
4257             }
4258 
4259             raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
4260         }
4261 
4262         err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
4263     } else {
4264         if (udata) {
4265             /* For the kernel flows, the resp will stay zero */
4266             resp->ece_options =
4267                 MLX5_CAP_GEN(dev->mdev, ece_support) ?
4268                     ucmd->ece_options : 0;
4269             resp->response_length = sizeof(*resp);
4270         }
4271         err = mlx5_core_qp_modify(dev, op, optpar, qpc, &base->mqp,
4272                       &resp->ece_options);
4273     }
4274 
4275     if (err)
4276         goto out;
4277 
4278     qp->state = new_state;
4279 
4280     if (attr_mask & IB_QP_ACCESS_FLAGS)
4281         qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
4282     if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
4283         qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
4284     if (attr_mask & IB_QP_PORT)
4285         qp->port = attr->port_num;
4286     if (attr_mask & IB_QP_ALT_PATH)
4287         qp->trans_qp.alt_port = attr->alt_port_num;
4288 
4289     /*
4290      * If we moved a kernel QP to RESET, clean up all old CQ
4291      * entries and reinitialize the QP.
4292      */
4293     if (new_state == IB_QPS_RESET &&
4294         !ibqp->uobject && qp->type != IB_QPT_XRC_TGT) {
4295         mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
4296                  ibqp->srq ? to_msrq(ibqp->srq) : NULL);
4297         if (send_cq != recv_cq)
4298             mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
4299 
4300         qp->rq.head = 0;
4301         qp->rq.tail = 0;
4302         qp->sq.head = 0;
4303         qp->sq.tail = 0;
4304         qp->sq.cur_post = 0;
4305         if (qp->sq.wqe_cnt)
4306             qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
4307         qp->sq.last_poll = 0;
4308         qp->db.db[MLX5_RCV_DBR] = 0;
4309         qp->db.db[MLX5_SND_DBR] = 0;
4310     }
4311 
4312     if ((new_state == IB_QPS_RTS) && qp->counter_pending) {
4313         err = __mlx5_ib_qp_set_counter(ibqp, ibqp->counter);
4314         if (!err)
4315             qp->counter_pending = 0;
4316     }
4317 
4318 out:
4319     kfree(qpc);
4320     return err;
4321 }
4322 
4323 static inline bool is_valid_mask(int mask, int req, int opt)
4324 {
4325     if ((mask & req) != req)
4326         return false;
4327 
4328     if (mask & ~(req | opt))
4329         return false;
4330 
4331     return true;
4332 }
4333 
4334 /* check valid transition for driver QP types
4335  * for now the only QP type that this function supports is DCI
4336  */
4337 static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new_state,
4338                 enum ib_qp_attr_mask attr_mask)
4339 {
4340     int req = IB_QP_STATE;
4341     int opt = 0;
4342 
4343     if (new_state == IB_QPS_RESET) {
4344         return is_valid_mask(attr_mask, req, opt);
4345     } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
4346         req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
4347         return is_valid_mask(attr_mask, req, opt);
4348     } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
4349         opt = IB_QP_PKEY_INDEX | IB_QP_PORT;
4350         return is_valid_mask(attr_mask, req, opt);
4351     } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
4352         req |= IB_QP_PATH_MTU;
4353         opt = IB_QP_PKEY_INDEX | IB_QP_AV;
4354         return is_valid_mask(attr_mask, req, opt);
4355     } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
4356         req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
4357                IB_QP_MAX_QP_RD_ATOMIC | IB_QP_SQ_PSN;
4358         opt = IB_QP_MIN_RNR_TIMER;
4359         return is_valid_mask(attr_mask, req, opt);
4360     } else if (cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) {
4361         opt = IB_QP_MIN_RNR_TIMER;
4362         return is_valid_mask(attr_mask, req, opt);
4363     } else if (cur_state != IB_QPS_RESET && new_state == IB_QPS_ERR) {
4364         return is_valid_mask(attr_mask, req, opt);
4365     }
4366     return false;
4367 }
4368 
4369 /* mlx5_ib_modify_dct: modify a DCT QP
4370  * valid transitions are:
4371  * RESET to INIT: must set access_flags, pkey_index and port
4372  * INIT  to RTR : must set min_rnr_timer, tclass, flow_label,
4373  *             mtu, gid_index and hop_limit
4374  * Other transitions and attributes are illegal
4375  */
4376 static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
4377                   int attr_mask, struct mlx5_ib_modify_qp *ucmd,
4378                   struct ib_udata *udata)
4379 {
4380     struct mlx5_ib_qp *qp = to_mqp(ibqp);
4381     struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4382     enum ib_qp_state cur_state, new_state;
4383     int required = IB_QP_STATE;
4384     void *dctc;
4385     int err;
4386 
4387     if (!(attr_mask & IB_QP_STATE))
4388         return -EINVAL;
4389 
4390     cur_state = qp->state;
4391     new_state = attr->qp_state;
4392 
4393     dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
4394     if (MLX5_CAP_GEN(dev->mdev, ece_support) && ucmd->ece_options)
4395         /*
4396          * DCT doesn't initialize QP till modify command is executed,
4397          * so we need to overwrite previously set ECE field if user
4398          * provided any value except zero, which means not set/not
4399          * valid.
4400          */
4401         MLX5_SET(dctc, dctc, ece, ucmd->ece_options);
4402 
4403     if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
4404         u16 set_id;
4405 
4406         required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
4407         if (!is_valid_mask(attr_mask, required, 0))
4408             return -EINVAL;
4409 
4410         if (attr->port_num == 0 ||
4411             attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)) {
4412             mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
4413                     attr->port_num, dev->num_ports);
4414             return -EINVAL;
4415         }
4416         if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
4417             MLX5_SET(dctc, dctc, rre, 1);
4418         if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
4419             MLX5_SET(dctc, dctc, rwe, 1);
4420         if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
4421             int atomic_mode;
4422 
4423             atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
4424             if (atomic_mode < 0)
4425                 return -EOPNOTSUPP;
4426 
4427             MLX5_SET(dctc, dctc, atomic_mode, atomic_mode);
4428             MLX5_SET(dctc, dctc, rae, 1);
4429         }
4430         MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
4431         if (mlx5_lag_is_active(dev->mdev))
4432             MLX5_SET(dctc, dctc, port,
4433                  get_tx_affinity_rr(dev, udata));
4434         else
4435             MLX5_SET(dctc, dctc, port, attr->port_num);
4436 
4437         set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
4438         MLX5_SET(dctc, dctc, counter_set_id, set_id);
4439     } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
4440         struct mlx5_ib_modify_qp_resp resp = {};
4441         u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {};
4442         u32 min_resp_len = offsetofend(typeof(resp), dctn);
4443 
4444         if (udata->outlen < min_resp_len)
4445             return -EINVAL;
4446         /*
4447          * If we don't have enough space for the ECE options,
4448          * simply indicate it with resp.response_length.
4449          */
4450         resp.response_length = (udata->outlen < sizeof(resp)) ?
4451                            min_resp_len :
4452                            sizeof(resp);
4453 
4454         required |= IB_QP_MIN_RNR_TIMER | IB_QP_AV | IB_QP_PATH_MTU;
4455         if (!is_valid_mask(attr_mask, required, 0))
4456             return -EINVAL;
4457         MLX5_SET(dctc, dctc, min_rnr_nak, attr->min_rnr_timer);
4458         MLX5_SET(dctc, dctc, tclass, attr->ah_attr.grh.traffic_class);
4459         MLX5_SET(dctc, dctc, flow_label, attr->ah_attr.grh.flow_label);
4460         MLX5_SET(dctc, dctc, mtu, attr->path_mtu);
4461         MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index);
4462         MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
4463         if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
4464             MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7);
4465 
4466         err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
4467                        MLX5_ST_SZ_BYTES(create_dct_in), out,
4468                        sizeof(out));
4469         err = mlx5_cmd_check(dev->mdev, err, qp->dct.in, out);
4470         if (err)
4471             return err;
4472         resp.dctn = qp->dct.mdct.mqp.qpn;
4473         if (MLX5_CAP_GEN(dev->mdev, ece_support))
4474             resp.ece_options = MLX5_GET(create_dct_out, out, ece);
4475         err = ib_copy_to_udata(udata, &resp, resp.response_length);
4476         if (err) {
4477             mlx5_core_destroy_dct(dev, &qp->dct.mdct);
4478             return err;
4479         }
4480     } else {
4481         mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
4482         return -EINVAL;
4483     }
4484 
4485     qp->state = new_state;
4486     return 0;
4487 }
4488 
4489 static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
4490                       struct mlx5_ib_qp *qp)
4491 {
4492     if (dev->profile != &raw_eth_profile)
4493         return true;
4494 
4495     if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
4496         return true;
4497 
4498     /* Internal QP used for wc testing, with NOPs in wq */
4499     if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
4500         return true;
4501 
4502     return false;
4503 }
4504 
4505 int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
4506               int attr_mask, struct ib_udata *udata)
4507 {
4508     struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4509     struct mlx5_ib_modify_qp_resp resp = {};
4510     struct mlx5_ib_qp *qp = to_mqp(ibqp);
4511     struct mlx5_ib_modify_qp ucmd = {};
4512     enum ib_qp_type qp_type;
4513     enum ib_qp_state cur_state, new_state;
4514     int err = -EINVAL;
4515 
4516     if (!mlx5_ib_modify_qp_allowed(dev, qp))
4517         return -EOPNOTSUPP;
4518 
4519     if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
4520         return -EOPNOTSUPP;
4521 
4522     if (ibqp->rwq_ind_tbl)
4523         return -ENOSYS;
4524 
4525     if (udata && udata->inlen) {
4526         if (udata->inlen < offsetofend(typeof(ucmd), ece_options))
4527             return -EINVAL;
4528 
4529         if (udata->inlen > sizeof(ucmd) &&
4530             !ib_is_udata_cleared(udata, sizeof(ucmd),
4531                      udata->inlen - sizeof(ucmd)))
4532             return -EOPNOTSUPP;
4533 
4534         if (ib_copy_from_udata(&ucmd, udata,
4535                        min(udata->inlen, sizeof(ucmd))))
4536             return -EFAULT;
4537 
4538         if (ucmd.comp_mask ||
4539             memchr_inv(&ucmd.burst_info.reserved, 0,
4540                    sizeof(ucmd.burst_info.reserved)))
4541             return -EOPNOTSUPP;
4542 
4543     }
4544 
4545     if (qp->type == IB_QPT_GSI)
4546         return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
4547 
4548     qp_type = (qp->type == MLX5_IB_QPT_HW_GSI) ? IB_QPT_GSI : qp->type;
4549 
4550     if (qp_type == MLX5_IB_QPT_DCT)
4551         return mlx5_ib_modify_dct(ibqp, attr, attr_mask, &ucmd, udata);
4552 
4553     mutex_lock(&qp->mutex);
4554 
4555     cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
4556     new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
4557 
4558     if (qp->flags & IB_QP_CREATE_SOURCE_QPN) {
4559         if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
4560             mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
4561                     attr_mask);
4562             goto out;
4563         }
4564     } else if (qp_type != MLX5_IB_QPT_REG_UMR &&
4565            qp_type != MLX5_IB_QPT_DCI &&
4566            !ib_modify_qp_is_ok(cur_state, new_state, qp_type,
4567                        attr_mask)) {
4568         mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
4569                 cur_state, new_state, qp->type, attr_mask);
4570         goto out;
4571     } else if (qp_type == MLX5_IB_QPT_DCI &&
4572            !modify_dci_qp_is_ok(cur_state, new_state, attr_mask)) {
4573         mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
4574                 cur_state, new_state, qp_type, attr_mask);
4575         goto out;
4576     }
4577 
4578     if ((attr_mask & IB_QP_PORT) &&
4579         (attr->port_num == 0 ||
4580          attr->port_num > dev->num_ports)) {
4581         mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
4582                 attr->port_num, dev->num_ports);
4583         goto out;
4584     }
4585 
4586     if ((attr_mask & IB_QP_PKEY_INDEX) &&
4587         attr->pkey_index >= dev->pkey_table_len) {
4588         mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index);
4589         goto out;
4590     }
4591 
4592     if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
4593         attr->max_rd_atomic >
4594         (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
4595         mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
4596                 attr->max_rd_atomic);
4597         goto out;
4598     }
4599 
4600     if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
4601         attr->max_dest_rd_atomic >
4602         (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
4603         mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
4604                 attr->max_dest_rd_atomic);
4605         goto out;
4606     }
4607 
4608     if (cur_state == new_state && cur_state == IB_QPS_RESET) {
4609         err = 0;
4610         goto out;
4611     }
4612 
4613     err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state,
4614                   new_state, &ucmd, &resp, udata);
4615 
4616     /* resp.response_length is set in ECE supported flows only */
4617     if (!err && resp.response_length &&
4618         udata->outlen >= resp.response_length)
4619         /* Return -EFAULT to the user and expect him to destroy QP. */
4620         err = ib_copy_to_udata(udata, &resp, resp.response_length);
4621 
4622 out:
4623     mutex_unlock(&qp->mutex);
4624     return err;
4625 }
4626 
4627 static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
4628 {
4629     switch (mlx5_state) {
4630     case MLX5_QP_STATE_RST:      return IB_QPS_RESET;
4631     case MLX5_QP_STATE_INIT:     return IB_QPS_INIT;
4632     case MLX5_QP_STATE_RTR:      return IB_QPS_RTR;
4633     case MLX5_QP_STATE_RTS:      return IB_QPS_RTS;
4634     case MLX5_QP_STATE_SQ_DRAINING:
4635     case MLX5_QP_STATE_SQD:      return IB_QPS_SQD;
4636     case MLX5_QP_STATE_SQER:     return IB_QPS_SQE;
4637     case MLX5_QP_STATE_ERR:      return IB_QPS_ERR;
4638     default:             return -1;
4639     }
4640 }
4641 
4642 static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
4643 {
4644     switch (mlx5_mig_state) {
4645     case MLX5_QP_PM_ARMED:      return IB_MIG_ARMED;
4646     case MLX5_QP_PM_REARM:      return IB_MIG_REARM;
4647     case MLX5_QP_PM_MIGRATED:   return IB_MIG_MIGRATED;
4648     default: return -1;
4649     }
4650 }
4651 
4652 static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
4653                 struct rdma_ah_attr *ah_attr, void *path)
4654 {
4655     int port = MLX5_GET(ads, path, vhca_port_num);
4656     int static_rate;
4657 
4658     memset(ah_attr, 0, sizeof(*ah_attr));
4659 
4660     if (!port || port > ibdev->num_ports)
4661         return;
4662 
4663     ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port);
4664 
4665     rdma_ah_set_port_num(ah_attr, port);
4666     rdma_ah_set_sl(ah_attr, MLX5_GET(ads, path, sl));
4667 
4668     rdma_ah_set_dlid(ah_attr, MLX5_GET(ads, path, rlid));
4669     rdma_ah_set_path_bits(ah_attr, MLX5_GET(ads, path, mlid));
4670 
4671     static_rate = MLX5_GET(ads, path, stat_rate);
4672     rdma_ah_set_static_rate(ah_attr, mlx5_to_ib_rate_map(static_rate));
4673     if (MLX5_GET(ads, path, grh) ||
4674         ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
4675         rdma_ah_set_grh(ah_attr, NULL, MLX5_GET(ads, path, flow_label),
4676                 MLX5_GET(ads, path, src_addr_index),
4677                 MLX5_GET(ads, path, hop_limit),
4678                 MLX5_GET(ads, path, tclass));
4679         rdma_ah_set_dgid_raw(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip));
4680     }
4681 }
4682 
4683 static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
4684                     struct mlx5_ib_sq *sq,
4685                     u8 *sq_state)
4686 {
4687     int err;
4688 
4689     err = mlx5_core_query_sq_state(dev->mdev, sq->base.mqp.qpn, sq_state);
4690     if (err)
4691         goto out;
4692     sq->state = *sq_state;
4693 
4694 out:
4695     return err;
4696 }
4697 
4698 static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
4699                     struct mlx5_ib_rq *rq,
4700                     u8 *rq_state)
4701 {
4702     void *out;
4703     void *rqc;
4704     int inlen;
4705     int err;
4706 
4707     inlen = MLX5_ST_SZ_BYTES(query_rq_out);
4708     out = kvzalloc(inlen, GFP_KERNEL);
4709     if (!out)
4710         return -ENOMEM;
4711 
4712     err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
4713     if (err)
4714         goto out;
4715 
4716     rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
4717     *rq_state = MLX5_GET(rqc, rqc, state);
4718     rq->state = *rq_state;
4719 
4720 out:
4721     kvfree(out);
4722     return err;
4723 }
4724 
4725 static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
4726                   struct mlx5_ib_qp *qp, u8 *qp_state)
4727 {
4728     static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
4729         [MLX5_RQC_STATE_RST] = {
4730             [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
4731             [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
4732             [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE_BAD,
4733             [MLX5_SQ_STATE_NA]  = IB_QPS_RESET,
4734         },
4735         [MLX5_RQC_STATE_RDY] = {
4736             [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE,
4737             [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
4738             [MLX5_SQC_STATE_ERR]    = IB_QPS_SQE,
4739             [MLX5_SQ_STATE_NA]  = MLX5_QP_STATE,
4740         },
4741         [MLX5_RQC_STATE_ERR] = {
4742             [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
4743             [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
4744             [MLX5_SQC_STATE_ERR]    = IB_QPS_ERR,
4745             [MLX5_SQ_STATE_NA]  = IB_QPS_ERR,
4746         },
4747         [MLX5_RQ_STATE_NA] = {
4748             [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE,
4749             [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
4750             [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE,
4751             [MLX5_SQ_STATE_NA]  = MLX5_QP_STATE_BAD,
4752         },
4753     };
4754 
4755     *qp_state = sqrq_trans[rq_state][sq_state];
4756 
4757     if (*qp_state == MLX5_QP_STATE_BAD) {
4758         WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
4759              qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
4760              qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
4761         return -EINVAL;
4762     }
4763 
4764     if (*qp_state == MLX5_QP_STATE)
4765         *qp_state = qp->state;
4766 
4767     return 0;
4768 }
4769 
4770 static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
4771                      struct mlx5_ib_qp *qp,
4772                      u8 *raw_packet_qp_state)
4773 {
4774     struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
4775     struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
4776     struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
4777     int err;
4778     u8 sq_state = MLX5_SQ_STATE_NA;
4779     u8 rq_state = MLX5_RQ_STATE_NA;
4780 
4781     if (qp->sq.wqe_cnt) {
4782         err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
4783         if (err)
4784             return err;
4785     }
4786 
4787     if (qp->rq.wqe_cnt) {
4788         err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
4789         if (err)
4790             return err;
4791     }
4792 
4793     return sqrq_state_to_qp_state(sq_state, rq_state, qp,
4794                       raw_packet_qp_state);
4795 }
4796 
4797 static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
4798              struct ib_qp_attr *qp_attr)
4799 {
4800     int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
4801     void *qpc, *pri_path, *alt_path;
4802     u32 *outb;
4803     int err;
4804 
4805     outb = kzalloc(outlen, GFP_KERNEL);
4806     if (!outb)
4807         return -ENOMEM;
4808 
4809     err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen);
4810     if (err)
4811         goto out;
4812 
4813     qpc = MLX5_ADDR_OF(query_qp_out, outb, qpc);
4814 
4815     qp->state = to_ib_qp_state(MLX5_GET(qpc, qpc, state));
4816     if (MLX5_GET(qpc, qpc, state) == MLX5_QP_STATE_SQ_DRAINING)
4817         qp_attr->sq_draining = 1;
4818 
4819     qp_attr->path_mtu = MLX5_GET(qpc, qpc, mtu);
4820     qp_attr->path_mig_state = to_ib_mig_state(MLX5_GET(qpc, qpc, pm_state));
4821     qp_attr->qkey = MLX5_GET(qpc, qpc, q_key);
4822     qp_attr->rq_psn = MLX5_GET(qpc, qpc, next_rcv_psn);
4823     qp_attr->sq_psn = MLX5_GET(qpc, qpc, next_send_psn);
4824     qp_attr->dest_qp_num = MLX5_GET(qpc, qpc, remote_qpn);
4825 
4826     if (MLX5_GET(qpc, qpc, rre))
4827         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
4828     if (MLX5_GET(qpc, qpc, rwe))
4829         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE;
4830     if (MLX5_GET(qpc, qpc, rae))
4831         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_ATOMIC;
4832 
4833     qp_attr->max_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_sra_max);
4834     qp_attr->max_dest_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_rra_max);
4835     qp_attr->min_rnr_timer = MLX5_GET(qpc, qpc, min_rnr_nak);
4836     qp_attr->retry_cnt = MLX5_GET(qpc, qpc, retry_count);
4837     qp_attr->rnr_retry = MLX5_GET(qpc, qpc, rnr_retry);
4838 
4839     pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
4840     alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
4841 
4842     if (qp->type == IB_QPT_RC || qp->type == IB_QPT_UC ||
4843         qp->type == IB_QPT_XRC_INI || qp->type == IB_QPT_XRC_TGT) {
4844         to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path);
4845         to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path);
4846         qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index);
4847         qp_attr->alt_port_num = MLX5_GET(ads, alt_path, vhca_port_num);
4848     }
4849 
4850     qp_attr->pkey_index = MLX5_GET(ads, pri_path, pkey_index);
4851     qp_attr->port_num = MLX5_GET(ads, pri_path, vhca_port_num);
4852     qp_attr->timeout = MLX5_GET(ads, pri_path, ack_timeout);
4853     qp_attr->alt_timeout = MLX5_GET(ads, alt_path, ack_timeout);
4854 
4855 out:
4856     kfree(outb);
4857     return err;
4858 }
4859 
4860 static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
4861                 struct ib_qp_attr *qp_attr, int qp_attr_mask,
4862                 struct ib_qp_init_attr *qp_init_attr)
4863 {
4864     struct mlx5_core_dct    *dct = &mqp->dct.mdct;
4865     u32 *out;
4866     u32 access_flags = 0;
4867     int outlen = MLX5_ST_SZ_BYTES(query_dct_out);
4868     void *dctc;
4869     int err;
4870     int supported_mask = IB_QP_STATE |
4871                  IB_QP_ACCESS_FLAGS |
4872                  IB_QP_PORT |
4873                  IB_QP_MIN_RNR_TIMER |
4874                  IB_QP_AV |
4875                  IB_QP_PATH_MTU |
4876                  IB_QP_PKEY_INDEX;
4877 
4878     if (qp_attr_mask & ~supported_mask)
4879         return -EINVAL;
4880     if (mqp->state != IB_QPS_RTR)
4881         return -EINVAL;
4882 
4883     out = kzalloc(outlen, GFP_KERNEL);
4884     if (!out)
4885         return -ENOMEM;
4886 
4887     err = mlx5_core_dct_query(dev, dct, out, outlen);
4888     if (err)
4889         goto out;
4890 
4891     dctc = MLX5_ADDR_OF(query_dct_out, out, dct_context_entry);
4892 
4893     if (qp_attr_mask & IB_QP_STATE)
4894         qp_attr->qp_state = IB_QPS_RTR;
4895 
4896     if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
4897         if (MLX5_GET(dctc, dctc, rre))
4898             access_flags |= IB_ACCESS_REMOTE_READ;
4899         if (MLX5_GET(dctc, dctc, rwe))
4900             access_flags |= IB_ACCESS_REMOTE_WRITE;
4901         if (MLX5_GET(dctc, dctc, rae))
4902             access_flags |= IB_ACCESS_REMOTE_ATOMIC;
4903         qp_attr->qp_access_flags = access_flags;
4904     }
4905 
4906     if (qp_attr_mask & IB_QP_PORT)
4907         qp_attr->port_num = MLX5_GET(dctc, dctc, port);
4908     if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
4909         qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
4910     if (qp_attr_mask & IB_QP_AV) {
4911         qp_attr->ah_attr.grh.traffic_class = MLX5_GET(dctc, dctc, tclass);
4912         qp_attr->ah_attr.grh.flow_label = MLX5_GET(dctc, dctc, flow_label);
4913         qp_attr->ah_attr.grh.sgid_index = MLX5_GET(dctc, dctc, my_addr_index);
4914         qp_attr->ah_attr.grh.hop_limit = MLX5_GET(dctc, dctc, hop_limit);
4915     }
4916     if (qp_attr_mask & IB_QP_PATH_MTU)
4917         qp_attr->path_mtu = MLX5_GET(dctc, dctc, mtu);
4918     if (qp_attr_mask & IB_QP_PKEY_INDEX)
4919         qp_attr->pkey_index = MLX5_GET(dctc, dctc, pkey_index);
4920 out:
4921     kfree(out);
4922     return err;
4923 }
4924 
4925 int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
4926              int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
4927 {
4928     struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4929     struct mlx5_ib_qp *qp = to_mqp(ibqp);
4930     int err = 0;
4931     u8 raw_packet_qp_state;
4932 
4933     if (ibqp->rwq_ind_tbl)
4934         return -ENOSYS;
4935 
4936     if (qp->type == IB_QPT_GSI)
4937         return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
4938                         qp_init_attr);
4939 
4940     /* Not all of output fields are applicable, make sure to zero them */
4941     memset(qp_init_attr, 0, sizeof(*qp_init_attr));
4942     memset(qp_attr, 0, sizeof(*qp_attr));
4943 
4944     if (unlikely(qp->type == MLX5_IB_QPT_DCT))
4945         return mlx5_ib_dct_query_qp(dev, qp, qp_attr,
4946                         qp_attr_mask, qp_init_attr);
4947 
4948     mutex_lock(&qp->mutex);
4949 
4950     if (qp->type == IB_QPT_RAW_PACKET ||
4951         qp->flags & IB_QP_CREATE_SOURCE_QPN) {
4952         err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
4953         if (err)
4954             goto out;
4955         qp->state = raw_packet_qp_state;
4956         qp_attr->port_num = 1;
4957     } else {
4958         err = query_qp_attr(dev, qp, qp_attr);
4959         if (err)
4960             goto out;
4961     }
4962 
4963     qp_attr->qp_state        = qp->state;
4964     qp_attr->cur_qp_state        = qp_attr->qp_state;
4965     qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
4966     qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
4967 
4968     if (!ibqp->uobject) {
4969         qp_attr->cap.max_send_wr  = qp->sq.max_post;
4970         qp_attr->cap.max_send_sge = qp->sq.max_gs;
4971         qp_init_attr->qp_context = ibqp->qp_context;
4972     } else {
4973         qp_attr->cap.max_send_wr  = 0;
4974         qp_attr->cap.max_send_sge = 0;
4975     }
4976 
4977     qp_init_attr->qp_type = qp->type;
4978     qp_init_attr->recv_cq = ibqp->recv_cq;
4979     qp_init_attr->send_cq = ibqp->send_cq;
4980     qp_init_attr->srq = ibqp->srq;
4981     qp_attr->cap.max_inline_data = qp->max_inline_data;
4982 
4983     qp_init_attr->cap        = qp_attr->cap;
4984 
4985     qp_init_attr->create_flags = qp->flags;
4986 
4987     qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
4988         IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
4989 
4990 out:
4991     mutex_unlock(&qp->mutex);
4992     return err;
4993 }
4994 
4995 int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
4996 {
4997     struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device);
4998     struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
4999 
5000     if (!MLX5_CAP_GEN(dev->mdev, xrc))
5001         return -EOPNOTSUPP;
5002 
5003     return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
5004 }
5005 
5006 int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
5007 {
5008     struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
5009     u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
5010 
5011     return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
5012 }
5013 
5014 static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
5015 {
5016     struct mlx5_ib_rwq *rwq = to_mibrwq(core_qp);
5017     struct mlx5_ib_dev *dev = to_mdev(rwq->ibwq.device);
5018     struct ib_event event;
5019 
5020     if (rwq->ibwq.event_handler) {
5021         event.device     = rwq->ibwq.device;
5022         event.element.wq = &rwq->ibwq;
5023         switch (type) {
5024         case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
5025             event.event = IB_EVENT_WQ_FATAL;
5026             break;
5027         default:
5028             mlx5_ib_warn(dev, "Unexpected event type %d on WQ %06x\n", type, core_qp->qpn);
5029             return;
5030         }
5031 
5032         rwq->ibwq.event_handler(&event, rwq->ibwq.wq_context);
5033     }
5034 }
5035 
5036 static int set_delay_drop(struct mlx5_ib_dev *dev)
5037 {
5038     int err = 0;
5039 
5040     mutex_lock(&dev->delay_drop.lock);
5041     if (dev->delay_drop.activate)
5042         goto out;
5043 
5044     err = mlx5_core_set_delay_drop(dev, dev->delay_drop.timeout);
5045     if (err)
5046         goto out;
5047 
5048     dev->delay_drop.activate = true;
5049 out:
5050     mutex_unlock(&dev->delay_drop.lock);
5051 
5052     if (!err)
5053         atomic_inc(&dev->delay_drop.rqs_cnt);
5054     return err;
5055 }
5056 
5057 static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
5058               struct ib_wq_init_attr *init_attr)
5059 {
5060     struct mlx5_ib_dev *dev;
5061     int has_net_offloads;
5062     __be64 *rq_pas0;
5063     int ts_format;
5064     void *in;
5065     void *rqc;
5066     void *wq;
5067     int inlen;
5068     int err;
5069 
5070     dev = to_mdev(pd->device);
5071 
5072     ts_format = get_rq_ts_format(dev, to_mcq(init_attr->cq));
5073     if (ts_format < 0)
5074         return ts_format;
5075 
5076     inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
5077     in = kvzalloc(inlen, GFP_KERNEL);
5078     if (!in)
5079         return -ENOMEM;
5080 
5081     MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
5082     rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
5083     MLX5_SET(rqc,  rqc, mem_rq_type,
5084          MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
5085     MLX5_SET(rqc, rqc, ts_format, ts_format);
5086     MLX5_SET(rqc, rqc, user_index, rwq->user_index);
5087     MLX5_SET(rqc,  rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
5088     MLX5_SET(rqc,  rqc, state, MLX5_RQC_STATE_RST);
5089     MLX5_SET(rqc,  rqc, flush_in_error_en, 1);
5090     wq = MLX5_ADDR_OF(rqc, rqc, wq);
5091     MLX5_SET(wq, wq, wq_type,
5092          rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ ?
5093          MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC);
5094     if (init_attr->create_flags & IB_WQ_FLAGS_PCI_WRITE_END_PADDING) {
5095         if (!MLX5_CAP_GEN(dev->mdev, end_pad)) {
5096             mlx5_ib_dbg(dev, "Scatter end padding is not supported\n");
5097             err = -EOPNOTSUPP;
5098             goto out;
5099         } else {
5100             MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
5101         }
5102     }
5103     MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
5104     if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) {
5105         /*
5106          * In Firmware number of strides in each WQE is:
5107          *   "512 * 2^single_wqe_log_num_of_strides"
5108          * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are
5109          * accepted as 0 to 9
5110          */
5111         static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1,
5112                          2,  3,  4,  5,  6,  7,  8, 9 };
5113         MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en);
5114         MLX5_SET(wq, wq, log_wqe_stride_size,
5115              rwq->single_stride_log_num_of_bytes -
5116              MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES);
5117         MLX5_SET(wq, wq, log_wqe_num_of_strides,
5118              fw_map[rwq->log_num_strides -
5119                 MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]);
5120     }
5121     MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
5122     MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
5123     MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset);
5124     MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
5125     MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
5126     MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
5127     has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
5128     if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
5129         if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
5130             mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
5131             err = -EOPNOTSUPP;
5132             goto out;
5133         }
5134     } else {
5135         MLX5_SET(rqc, rqc, vsd, 1);
5136     }
5137     if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
5138         if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
5139             mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
5140             err = -EOPNOTSUPP;
5141             goto out;
5142         }
5143         MLX5_SET(rqc, rqc, scatter_fcs, 1);
5144     }
5145     if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
5146         if (!(dev->ib_dev.attrs.raw_packet_caps &
5147               IB_RAW_PACKET_CAP_DELAY_DROP)) {
5148             mlx5_ib_dbg(dev, "Delay drop is not supported\n");
5149             err = -EOPNOTSUPP;
5150             goto out;
5151         }
5152         MLX5_SET(rqc, rqc, delay_drop_en, 1);
5153     }
5154     rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
5155     mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0);
5156     err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp);
5157     if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
5158         err = set_delay_drop(dev);
5159         if (err) {
5160             mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
5161                      err);
5162             mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
5163         } else {
5164             rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
5165         }
5166     }
5167 out:
5168     kvfree(in);
5169     return err;
5170 }
5171 
5172 static int set_user_rq_size(struct mlx5_ib_dev *dev,
5173                 struct ib_wq_init_attr *wq_init_attr,
5174                 struct mlx5_ib_create_wq *ucmd,
5175                 struct mlx5_ib_rwq *rwq)
5176 {
5177     /* Sanity check RQ size before proceeding */
5178     if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz)))
5179         return -EINVAL;
5180 
5181     if (!ucmd->rq_wqe_count)
5182         return -EINVAL;
5183 
5184     rwq->wqe_count = ucmd->rq_wqe_count;
5185     rwq->wqe_shift = ucmd->rq_wqe_shift;
5186     if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size))
5187         return -EINVAL;
5188 
5189     rwq->log_rq_stride = rwq->wqe_shift;
5190     rwq->log_rq_size = ilog2(rwq->wqe_count);
5191     return 0;
5192 }
5193 
5194 static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides)
5195 {
5196     if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
5197         (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
5198         return false;
5199 
5200     if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) &&
5201         (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
5202         return false;
5203 
5204     return true;
5205 }
5206 
5207 static int prepare_user_rq(struct ib_pd *pd,
5208                struct ib_wq_init_attr *init_attr,
5209                struct ib_udata *udata,
5210                struct mlx5_ib_rwq *rwq)
5211 {
5212     struct mlx5_ib_dev *dev = to_mdev(pd->device);
5213     struct mlx5_ib_create_wq ucmd = {};
5214     int err;
5215     size_t required_cmd_sz;
5216 
5217     required_cmd_sz = offsetofend(struct mlx5_ib_create_wq,
5218                       single_stride_log_num_of_bytes);
5219     if (udata->inlen < required_cmd_sz) {
5220         mlx5_ib_dbg(dev, "invalid inlen\n");
5221         return -EINVAL;
5222     }
5223 
5224     if (udata->inlen > sizeof(ucmd) &&
5225         !ib_is_udata_cleared(udata, sizeof(ucmd),
5226                  udata->inlen - sizeof(ucmd))) {
5227         mlx5_ib_dbg(dev, "inlen is not supported\n");
5228         return -EOPNOTSUPP;
5229     }
5230 
5231     if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
5232         mlx5_ib_dbg(dev, "copy failed\n");
5233         return -EFAULT;
5234     }
5235 
5236     if (ucmd.comp_mask & (~MLX5_IB_CREATE_WQ_STRIDING_RQ)) {
5237         mlx5_ib_dbg(dev, "invalid comp mask\n");
5238         return -EOPNOTSUPP;
5239     } else if (ucmd.comp_mask & MLX5_IB_CREATE_WQ_STRIDING_RQ) {
5240         if (!MLX5_CAP_GEN(dev->mdev, striding_rq)) {
5241             mlx5_ib_dbg(dev, "Striding RQ is not supported\n");
5242             return -EOPNOTSUPP;
5243         }
5244         if ((ucmd.single_stride_log_num_of_bytes <
5245             MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES) ||
5246             (ucmd.single_stride_log_num_of_bytes >
5247              MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES)) {
5248             mlx5_ib_dbg(dev, "Invalid log stride size (%u. Range is %u - %u)\n",
5249                     ucmd.single_stride_log_num_of_bytes,
5250                     MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES,
5251                     MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES);
5252             return -EINVAL;
5253         }
5254         if (!log_of_strides_valid(dev,
5255                       ucmd.single_wqe_log_num_of_strides)) {
5256             mlx5_ib_dbg(
5257                 dev,
5258                 "Invalid log num strides (%u. Range is %u - %u)\n",
5259                 ucmd.single_wqe_log_num_of_strides,
5260                 MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ?
5261                     MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES :
5262                     MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
5263                 MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
5264             return -EINVAL;
5265         }
5266         rwq->single_stride_log_num_of_bytes =
5267             ucmd.single_stride_log_num_of_bytes;
5268         rwq->log_num_strides = ucmd.single_wqe_log_num_of_strides;
5269         rwq->two_byte_shift_en = !!ucmd.two_byte_shift_en;
5270         rwq->create_flags |= MLX5_IB_WQ_FLAGS_STRIDING_RQ;
5271     }
5272 
5273     err = set_user_rq_size(dev, init_attr, &ucmd, rwq);
5274     if (err) {
5275         mlx5_ib_dbg(dev, "err %d\n", err);
5276         return err;
5277     }
5278 
5279     err = create_user_rq(dev, pd, udata, rwq, &ucmd);
5280     if (err) {
5281         mlx5_ib_dbg(dev, "err %d\n", err);
5282         return err;
5283     }
5284 
5285     rwq->user_index = ucmd.user_index;
5286     return 0;
5287 }
5288 
5289 struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
5290                 struct ib_wq_init_attr *init_attr,
5291                 struct ib_udata *udata)
5292 {
5293     struct mlx5_ib_dev *dev;
5294     struct mlx5_ib_rwq *rwq;
5295     struct mlx5_ib_create_wq_resp resp = {};
5296     size_t min_resp_len;
5297     int err;
5298 
5299     if (!udata)
5300         return ERR_PTR(-ENOSYS);
5301 
5302     min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved);
5303     if (udata->outlen && udata->outlen < min_resp_len)
5304         return ERR_PTR(-EINVAL);
5305 
5306     if (!capable(CAP_SYS_RAWIO) &&
5307         init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP)
5308         return ERR_PTR(-EPERM);
5309 
5310     dev = to_mdev(pd->device);
5311     switch (init_attr->wq_type) {
5312     case IB_WQT_RQ:
5313         rwq = kzalloc(sizeof(*rwq), GFP_KERNEL);
5314         if (!rwq)
5315             return ERR_PTR(-ENOMEM);
5316         err = prepare_user_rq(pd, init_attr, udata, rwq);
5317         if (err)
5318             goto err;
5319         err = create_rq(rwq, pd, init_attr);
5320         if (err)
5321             goto err_user_rq;
5322         break;
5323     default:
5324         mlx5_ib_dbg(dev, "unsupported wq type %d\n",
5325                 init_attr->wq_type);
5326         return ERR_PTR(-EINVAL);
5327     }
5328 
5329     rwq->ibwq.wq_num = rwq->core_qp.qpn;
5330     rwq->ibwq.state = IB_WQS_RESET;
5331     if (udata->outlen) {
5332         resp.response_length = offsetofend(
5333             struct mlx5_ib_create_wq_resp, response_length);
5334         err = ib_copy_to_udata(udata, &resp, resp.response_length);
5335         if (err)
5336             goto err_copy;
5337     }
5338 
5339     rwq->core_qp.event = mlx5_ib_wq_event;
5340     rwq->ibwq.event_handler = init_attr->event_handler;
5341     return &rwq->ibwq;
5342 
5343 err_copy:
5344     mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
5345 err_user_rq:
5346     destroy_user_rq(dev, pd, rwq, udata);
5347 err:
5348     kfree(rwq);
5349     return ERR_PTR(err);
5350 }
5351 
5352 int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
5353 {
5354     struct mlx5_ib_dev *dev = to_mdev(wq->device);
5355     struct mlx5_ib_rwq *rwq = to_mrwq(wq);
5356     int ret;
5357 
5358     ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
5359     if (ret)
5360         return ret;
5361     destroy_user_rq(dev, wq->pd, rwq, udata);
5362     kfree(rwq);
5363     return 0;
5364 }
5365 
5366 int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
5367                  struct ib_rwq_ind_table_init_attr *init_attr,
5368                  struct ib_udata *udata)
5369 {
5370     struct mlx5_ib_rwq_ind_table *rwq_ind_tbl =
5371         to_mrwq_ind_table(ib_rwq_ind_table);
5372     struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device);
5373     int sz = 1 << init_attr->log_ind_tbl_size;
5374     struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
5375     size_t min_resp_len;
5376     int inlen;
5377     int err;
5378     int i;
5379     u32 *in;
5380     void *rqtc;
5381 
5382     if (udata->inlen > 0 &&
5383         !ib_is_udata_cleared(udata, 0,
5384                  udata->inlen))
5385         return -EOPNOTSUPP;
5386 
5387     if (init_attr->log_ind_tbl_size >
5388         MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
5389         mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
5390                 init_attr->log_ind_tbl_size,
5391                 MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
5392         return -EINVAL;
5393     }
5394 
5395     min_resp_len =
5396         offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved);
5397     if (udata->outlen && udata->outlen < min_resp_len)
5398         return -EINVAL;
5399 
5400     inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
5401     in = kvzalloc(inlen, GFP_KERNEL);
5402     if (!in)
5403         return -ENOMEM;
5404 
5405     rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
5406 
5407     MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
5408     MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
5409 
5410     for (i = 0; i < sz; i++)
5411         MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
5412 
5413     rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid;
5414     MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid);
5415 
5416     err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
5417     kvfree(in);
5418     if (err)
5419         return err;
5420 
5421     rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
5422     if (udata->outlen) {
5423         resp.response_length =
5424             offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp,
5425                     response_length);
5426         err = ib_copy_to_udata(udata, &resp, resp.response_length);
5427         if (err)
5428             goto err_copy;
5429     }
5430 
5431     return 0;
5432 
5433 err_copy:
5434     mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
5435     return err;
5436 }
5437 
5438 int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
5439 {
5440     struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
5441     struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
5442 
5443     return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
5444 }
5445 
5446 int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
5447               u32 wq_attr_mask, struct ib_udata *udata)
5448 {
5449     struct mlx5_ib_dev *dev = to_mdev(wq->device);
5450     struct mlx5_ib_rwq *rwq = to_mrwq(wq);
5451     struct mlx5_ib_modify_wq ucmd = {};
5452     size_t required_cmd_sz;
5453     int curr_wq_state;
5454     int wq_state;
5455     int inlen;
5456     int err;
5457     void *rqc;
5458     void *in;
5459 
5460     required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved);
5461     if (udata->inlen < required_cmd_sz)
5462         return -EINVAL;
5463 
5464     if (udata->inlen > sizeof(ucmd) &&
5465         !ib_is_udata_cleared(udata, sizeof(ucmd),
5466                  udata->inlen - sizeof(ucmd)))
5467         return -EOPNOTSUPP;
5468 
5469     if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
5470         return -EFAULT;
5471 
5472     if (ucmd.comp_mask || ucmd.reserved)
5473         return -EOPNOTSUPP;
5474 
5475     inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
5476     in = kvzalloc(inlen, GFP_KERNEL);
5477     if (!in)
5478         return -ENOMEM;
5479 
5480     rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
5481 
5482     curr_wq_state = wq_attr->curr_wq_state;
5483     wq_state = wq_attr->wq_state;
5484     if (curr_wq_state == IB_WQS_ERR)
5485         curr_wq_state = MLX5_RQC_STATE_ERR;
5486     if (wq_state == IB_WQS_ERR)
5487         wq_state = MLX5_RQC_STATE_ERR;
5488     MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
5489     MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid);
5490     MLX5_SET(rqc, rqc, state, wq_state);
5491 
5492     if (wq_attr_mask & IB_WQ_FLAGS) {
5493         if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
5494             if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
5495                   MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
5496                 mlx5_ib_dbg(dev, "VLAN offloads are not "
5497                         "supported\n");
5498                 err = -EOPNOTSUPP;
5499                 goto out;
5500             }
5501             MLX5_SET64(modify_rq_in, in, modify_bitmask,
5502                    MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
5503             MLX5_SET(rqc, rqc, vsd,
5504                  (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
5505         }
5506 
5507         if (wq_attr->flags_mask & IB_WQ_FLAGS_PCI_WRITE_END_PADDING) {
5508             mlx5_ib_dbg(dev, "Modifying scatter end padding is not supported\n");
5509             err = -EOPNOTSUPP;
5510             goto out;
5511         }
5512     }
5513 
5514     if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
5515         u16 set_id;
5516 
5517         set_id = mlx5_ib_get_counters_id(dev, 0);
5518         if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
5519             MLX5_SET64(modify_rq_in, in, modify_bitmask,
5520                    MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
5521             MLX5_SET(rqc, rqc, counter_set_id, set_id);
5522         } else
5523             dev_info_once(
5524                 &dev->ib_dev.dev,
5525                 "Receive WQ counters are not supported on current FW\n");
5526     }
5527 
5528     err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in);
5529     if (!err)
5530         rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
5531 
5532 out:
5533     kvfree(in);
5534     return err;
5535 }
5536 
5537 struct mlx5_ib_drain_cqe {
5538     struct ib_cqe cqe;
5539     struct completion done;
5540 };
5541 
5542 static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
5543 {
5544     struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
5545                              struct mlx5_ib_drain_cqe,
5546                              cqe);
5547 
5548     complete(&cqe->done);
5549 }
5550 
5551 /* This function returns only once the drained WR was completed */
5552 static void handle_drain_completion(struct ib_cq *cq,
5553                     struct mlx5_ib_drain_cqe *sdrain,
5554                     struct mlx5_ib_dev *dev)
5555 {
5556     struct mlx5_core_dev *mdev = dev->mdev;
5557 
5558     if (cq->poll_ctx == IB_POLL_DIRECT) {
5559         while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
5560             ib_process_cq_direct(cq, -1);
5561         return;
5562     }
5563 
5564     if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5565         struct mlx5_ib_cq *mcq = to_mcq(cq);
5566         bool triggered = false;
5567         unsigned long flags;
5568 
5569         spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
5570         /* Make sure that the CQ handler won't run if wasn't run yet */
5571         if (!mcq->mcq.reset_notify_added)
5572             mcq->mcq.reset_notify_added = 1;
5573         else
5574             triggered = true;
5575         spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
5576 
5577         if (triggered) {
5578             /* Wait for any scheduled/running task to be ended */
5579             switch (cq->poll_ctx) {
5580             case IB_POLL_SOFTIRQ:
5581                 irq_poll_disable(&cq->iop);
5582                 irq_poll_enable(&cq->iop);
5583                 break;
5584             case IB_POLL_WORKQUEUE:
5585                 cancel_work_sync(&cq->work);
5586                 break;
5587             default:
5588                 WARN_ON_ONCE(1);
5589             }
5590         }
5591 
5592         /* Run the CQ handler - this makes sure that the drain WR will
5593          * be processed if wasn't processed yet.
5594          */
5595         mcq->mcq.comp(&mcq->mcq, NULL);
5596     }
5597 
5598     wait_for_completion(&sdrain->done);
5599 }
5600 
5601 void mlx5_ib_drain_sq(struct ib_qp *qp)
5602 {
5603     struct ib_cq *cq = qp->send_cq;
5604     struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
5605     struct mlx5_ib_drain_cqe sdrain;
5606     const struct ib_send_wr *bad_swr;
5607     struct ib_rdma_wr swr = {
5608         .wr = {
5609             .next = NULL,
5610             { .wr_cqe   = &sdrain.cqe, },
5611             .opcode = IB_WR_RDMA_WRITE,
5612         },
5613     };
5614     int ret;
5615     struct mlx5_ib_dev *dev = to_mdev(qp->device);
5616     struct mlx5_core_dev *mdev = dev->mdev;
5617 
5618     ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
5619     if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5620         WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
5621         return;
5622     }
5623 
5624     sdrain.cqe.done = mlx5_ib_drain_qp_done;
5625     init_completion(&sdrain.done);
5626 
5627     ret = mlx5_ib_post_send_drain(qp, &swr.wr, &bad_swr);
5628     if (ret) {
5629         WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
5630         return;
5631     }
5632 
5633     handle_drain_completion(cq, &sdrain, dev);
5634 }
5635 
5636 void mlx5_ib_drain_rq(struct ib_qp *qp)
5637 {
5638     struct ib_cq *cq = qp->recv_cq;
5639     struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
5640     struct mlx5_ib_drain_cqe rdrain;
5641     struct ib_recv_wr rwr = {};
5642     const struct ib_recv_wr *bad_rwr;
5643     int ret;
5644     struct mlx5_ib_dev *dev = to_mdev(qp->device);
5645     struct mlx5_core_dev *mdev = dev->mdev;
5646 
5647     ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
5648     if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5649         WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
5650         return;
5651     }
5652 
5653     rwr.wr_cqe = &rdrain.cqe;
5654     rdrain.cqe.done = mlx5_ib_drain_qp_done;
5655     init_completion(&rdrain.done);
5656 
5657     ret = mlx5_ib_post_recv_drain(qp, &rwr, &bad_rwr);
5658     if (ret) {
5659         WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
5660         return;
5661     }
5662 
5663     handle_drain_completion(cq, &rdrain, dev);
5664 }
5665 
5666 /*
5667  * Bind a qp to a counter. If @counter is NULL then bind the qp to
5668  * the default counter
5669  */
5670 int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
5671 {
5672     struct mlx5_ib_dev *dev = to_mdev(qp->device);
5673     struct mlx5_ib_qp *mqp = to_mqp(qp);
5674     int err = 0;
5675 
5676     mutex_lock(&mqp->mutex);
5677     if (mqp->state == IB_QPS_RESET) {
5678         qp->counter = counter;
5679         goto out;
5680     }
5681 
5682     if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) {
5683         err = -EOPNOTSUPP;
5684         goto out;
5685     }
5686 
5687     if (mqp->state == IB_QPS_RTS) {
5688         err = __mlx5_ib_qp_set_counter(qp, counter);
5689         if (!err)
5690             qp->counter = counter;
5691 
5692         goto out;
5693     }
5694 
5695     mqp->counter_pending = 1;
5696     qp->counter = counter;
5697 
5698 out:
5699     mutex_unlock(&mqp->mutex);
5700     return err;
5701 }