Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2012-2016 VMware, Inc.  All rights reserved.
0003  *
0004  * This program is free software; you can redistribute it and/or
0005  * modify it under the terms of EITHER the GNU General Public License
0006  * version 2 as published by the Free Software Foundation or the BSD
0007  * 2-Clause License. This program is distributed in the hope that it
0008  * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
0009  * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
0010  * See the GNU General Public License version 2 for more details at
0011  * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
0012  *
0013  * You should have received a copy of the GNU General Public License
0014  * along with this program available in the file COPYING in the main
0015  * directory of this source tree.
0016  *
0017  * The BSD 2-Clause License
0018  *
0019  *     Redistribution and use in source and binary forms, with or
0020  *     without modification, are permitted provided that the following
0021  *     conditions are met:
0022  *
0023  *      - Redistributions of source code must retain the above
0024  *        copyright notice, this list of conditions and the following
0025  *        disclaimer.
0026  *
0027  *      - Redistributions in binary form must reproduce the above
0028  *        copyright notice, this list of conditions and the following
0029  *        disclaimer in the documentation and/or other materials
0030  *        provided with the distribution.
0031  *
0032  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0033  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0034  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
0035  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
0036  * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
0037  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0038  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
0039  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
0040  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
0041  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0042  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
0043  * OF THE POSSIBILITY OF SUCH DAMAGE.
0044  */
0045 
0046 #include <asm/page.h>
0047 #include <linux/io.h>
0048 #include <linux/wait.h>
0049 #include <rdma/ib_addr.h>
0050 #include <rdma/ib_smi.h>
0051 #include <rdma/ib_user_verbs.h>
0052 
0053 #include "pvrdma.h"
0054 
0055 static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
0056                 struct pvrdma_qp *qp);
0057 
0058 static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
0059                struct pvrdma_cq **recv_cq)
0060 {
0061     *send_cq = to_vcq(qp->ibqp.send_cq);
0062     *recv_cq = to_vcq(qp->ibqp.recv_cq);
0063 }
0064 
0065 static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
0066                 unsigned long *scq_flags,
0067                 unsigned long *rcq_flags)
0068     __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
0069 {
0070     if (scq == rcq) {
0071         spin_lock_irqsave(&scq->cq_lock, *scq_flags);
0072         __acquire(rcq->cq_lock);
0073     } else if (scq->cq_handle < rcq->cq_handle) {
0074         spin_lock_irqsave(&scq->cq_lock, *scq_flags);
0075         spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
0076                      SINGLE_DEPTH_NESTING);
0077     } else {
0078         spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
0079         spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
0080                      SINGLE_DEPTH_NESTING);
0081     }
0082 }
0083 
0084 static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
0085                   unsigned long *scq_flags,
0086                   unsigned long *rcq_flags)
0087     __releases(scq->cq_lock) __releases(rcq->cq_lock)
0088 {
0089     if (scq == rcq) {
0090         __release(rcq->cq_lock);
0091         spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
0092     } else if (scq->cq_handle < rcq->cq_handle) {
0093         spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
0094         spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
0095     } else {
0096         spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
0097         spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
0098     }
0099 }
0100 
0101 static void pvrdma_reset_qp(struct pvrdma_qp *qp)
0102 {
0103     struct pvrdma_cq *scq, *rcq;
0104     unsigned long scq_flags, rcq_flags;
0105 
0106     /* Clean up cqes */
0107     get_cqs(qp, &scq, &rcq);
0108     pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
0109 
0110     _pvrdma_flush_cqe(qp, scq);
0111     if (scq != rcq)
0112         _pvrdma_flush_cqe(qp, rcq);
0113 
0114     pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
0115 
0116     /*
0117      * Reset queuepair. The checks are because usermode queuepairs won't
0118      * have kernel ringstates.
0119      */
0120     if (qp->rq.ring) {
0121         atomic_set(&qp->rq.ring->cons_head, 0);
0122         atomic_set(&qp->rq.ring->prod_tail, 0);
0123     }
0124     if (qp->sq.ring) {
0125         atomic_set(&qp->sq.ring->cons_head, 0);
0126         atomic_set(&qp->sq.ring->prod_tail, 0);
0127     }
0128 }
0129 
0130 static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
0131                   struct ib_qp_cap *req_cap,
0132                   struct pvrdma_qp *qp)
0133 {
0134     if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
0135         req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
0136         dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
0137         return -EINVAL;
0138     }
0139 
0140     qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
0141     qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
0142 
0143     /* Write back */
0144     req_cap->max_recv_wr = qp->rq.wqe_cnt;
0145     req_cap->max_recv_sge = qp->rq.max_sg;
0146 
0147     qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
0148                          sizeof(struct pvrdma_sge) *
0149                          qp->rq.max_sg);
0150     qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
0151               PAGE_SIZE;
0152 
0153     return 0;
0154 }
0155 
0156 static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
0157                   struct pvrdma_qp *qp)
0158 {
0159     if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
0160         req_cap->max_send_sge > dev->dsr->caps.max_sge) {
0161         dev_warn(&dev->pdev->dev, "send queue size invalid\n");
0162         return -EINVAL;
0163     }
0164 
0165     qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
0166     qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
0167 
0168     /* Write back */
0169     req_cap->max_send_wr = qp->sq.wqe_cnt;
0170     req_cap->max_send_sge = qp->sq.max_sg;
0171 
0172     qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
0173                          sizeof(struct pvrdma_sge) *
0174                          qp->sq.max_sg);
0175     /* Note: one extra page for the header. */
0176     qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES +
0177               (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) /
0178                                 PAGE_SIZE;
0179 
0180     return 0;
0181 }
0182 
0183 /**
0184  * pvrdma_create_qp - create queue pair
0185  * @ibqp: queue pair
0186  * @init_attr: queue pair attributes
0187  * @udata: user data
0188  *
0189  * @return: the 0 on success, otherwise returns an errno.
0190  */
0191 int pvrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
0192              struct ib_udata *udata)
0193 {
0194     struct pvrdma_qp *qp = to_vqp(ibqp);
0195     struct pvrdma_dev *dev = to_vdev(ibqp->device);
0196     union pvrdma_cmd_req req;
0197     union pvrdma_cmd_resp rsp;
0198     struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
0199     struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
0200     struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2;
0201     struct pvrdma_create_qp ucmd;
0202     struct pvrdma_create_qp_resp qp_resp = {};
0203     unsigned long flags;
0204     int ret;
0205     bool is_srq = !!init_attr->srq;
0206 
0207     if (init_attr->create_flags) {
0208         dev_warn(&dev->pdev->dev,
0209              "invalid create queuepair flags %#x\n",
0210              init_attr->create_flags);
0211         return -EOPNOTSUPP;
0212     }
0213 
0214     if (init_attr->qp_type != IB_QPT_RC &&
0215         init_attr->qp_type != IB_QPT_UD &&
0216         init_attr->qp_type != IB_QPT_GSI) {
0217         dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
0218              init_attr->qp_type);
0219         return -EOPNOTSUPP;
0220     }
0221 
0222     if (is_srq && !dev->dsr->caps.max_srq) {
0223         dev_warn(&dev->pdev->dev,
0224              "SRQs not supported by device\n");
0225         return -EINVAL;
0226     }
0227 
0228     if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
0229         return -ENOMEM;
0230 
0231     switch (init_attr->qp_type) {
0232     case IB_QPT_GSI:
0233         if (init_attr->port_num == 0 ||
0234             init_attr->port_num > ibqp->device->phys_port_cnt) {
0235             dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
0236             ret = -EINVAL;
0237             goto err_qp;
0238         }
0239         fallthrough;
0240     case IB_QPT_RC:
0241     case IB_QPT_UD:
0242         spin_lock_init(&qp->sq.lock);
0243         spin_lock_init(&qp->rq.lock);
0244         mutex_init(&qp->mutex);
0245         refcount_set(&qp->refcnt, 1);
0246         init_completion(&qp->free);
0247 
0248         qp->state = IB_QPS_RESET;
0249         qp->is_kernel = !udata;
0250 
0251         if (!qp->is_kernel) {
0252             dev_dbg(&dev->pdev->dev,
0253                 "create queuepair from user space\n");
0254 
0255             if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
0256                 ret = -EFAULT;
0257                 goto err_qp;
0258             }
0259 
0260             /* Userspace supports qpn and qp handles? */
0261             if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION &&
0262                 udata->outlen < sizeof(qp_resp)) {
0263                 dev_warn(&dev->pdev->dev,
0264                      "create queuepair not supported\n");
0265                 ret = -EOPNOTSUPP;
0266                 goto err_qp;
0267             }
0268 
0269             if (!is_srq) {
0270                 /* set qp->sq.wqe_cnt, shift, buf_size.. */
0271                 qp->rumem = ib_umem_get(ibqp->device,
0272                             ucmd.rbuf_addr,
0273                             ucmd.rbuf_size, 0);
0274                 if (IS_ERR(qp->rumem)) {
0275                     ret = PTR_ERR(qp->rumem);
0276                     goto err_qp;
0277                 }
0278                 qp->srq = NULL;
0279             } else {
0280                 qp->rumem = NULL;
0281                 qp->srq = to_vsrq(init_attr->srq);
0282             }
0283 
0284             qp->sumem = ib_umem_get(ibqp->device, ucmd.sbuf_addr,
0285                         ucmd.sbuf_size, 0);
0286             if (IS_ERR(qp->sumem)) {
0287                 if (!is_srq)
0288                     ib_umem_release(qp->rumem);
0289                 ret = PTR_ERR(qp->sumem);
0290                 goto err_qp;
0291             }
0292 
0293             qp->npages_send =
0294                 ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE);
0295             if (!is_srq)
0296                 qp->npages_recv = ib_umem_num_dma_blocks(
0297                     qp->rumem, PAGE_SIZE);
0298             else
0299                 qp->npages_recv = 0;
0300             qp->npages = qp->npages_send + qp->npages_recv;
0301         } else {
0302             ret = pvrdma_set_sq_size(to_vdev(ibqp->device),
0303                          &init_attr->cap, qp);
0304             if (ret)
0305                 goto err_qp;
0306 
0307             ret = pvrdma_set_rq_size(to_vdev(ibqp->device),
0308                          &init_attr->cap, qp);
0309             if (ret)
0310                 goto err_qp;
0311 
0312             qp->npages = qp->npages_send + qp->npages_recv;
0313 
0314             /* Skip header page. */
0315             qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE;
0316 
0317             /* Recv queue pages are after send pages. */
0318             qp->rq.offset = qp->npages_send * PAGE_SIZE;
0319         }
0320 
0321         if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
0322             dev_warn(&dev->pdev->dev,
0323                  "overflow pages in queuepair\n");
0324             ret = -EINVAL;
0325             goto err_umem;
0326         }
0327 
0328         ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
0329                        qp->is_kernel);
0330         if (ret) {
0331             dev_warn(&dev->pdev->dev,
0332                  "could not allocate page directory\n");
0333             goto err_umem;
0334         }
0335 
0336         if (!qp->is_kernel) {
0337             pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
0338             if (!is_srq)
0339                 pvrdma_page_dir_insert_umem(&qp->pdir,
0340                                 qp->rumem,
0341                                 qp->npages_send);
0342         } else {
0343             /* Ring state is always the first page. */
0344             qp->sq.ring = qp->pdir.pages[0];
0345             qp->rq.ring = is_srq ? NULL : &qp->sq.ring[1];
0346         }
0347         break;
0348     default:
0349         ret = -EINVAL;
0350         goto err_qp;
0351     }
0352 
0353     /* Not supported */
0354     init_attr->cap.max_inline_data = 0;
0355 
0356     memset(cmd, 0, sizeof(*cmd));
0357     cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
0358     cmd->pd_handle = to_vpd(ibqp->pd)->pd_handle;
0359     cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
0360     cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
0361     if (is_srq)
0362         cmd->srq_handle = to_vsrq(init_attr->srq)->srq_handle;
0363     else
0364         cmd->srq_handle = 0;
0365     cmd->max_send_wr = init_attr->cap.max_send_wr;
0366     cmd->max_recv_wr = init_attr->cap.max_recv_wr;
0367     cmd->max_send_sge = init_attr->cap.max_send_sge;
0368     cmd->max_recv_sge = init_attr->cap.max_recv_sge;
0369     cmd->max_inline_data = init_attr->cap.max_inline_data;
0370     cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
0371     cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
0372     cmd->is_srq = is_srq;
0373     cmd->lkey = 0;
0374     cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
0375     cmd->total_chunks = qp->npages;
0376     cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES;
0377     cmd->pdir_dma = qp->pdir.dir_dma;
0378 
0379     dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
0380         cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
0381         cmd->max_recv_sge);
0382 
0383     ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
0384     if (ret < 0) {
0385         dev_warn(&dev->pdev->dev,
0386              "could not create queuepair, error: %d\n", ret);
0387         goto err_pdir;
0388     }
0389 
0390     /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
0391     qp->port = init_attr->port_num;
0392 
0393     if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) {
0394         qp->ibqp.qp_num = resp_v2->qpn;
0395         qp->qp_handle = resp_v2->qp_handle;
0396     } else {
0397         qp->ibqp.qp_num = resp->qpn;
0398         qp->qp_handle = resp->qpn;
0399     }
0400 
0401     spin_lock_irqsave(&dev->qp_tbl_lock, flags);
0402     dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
0403     spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
0404 
0405     if (udata) {
0406         qp_resp.qpn = qp->ibqp.qp_num;
0407         qp_resp.qp_handle = qp->qp_handle;
0408 
0409         if (ib_copy_to_udata(udata, &qp_resp,
0410                      min(udata->outlen, sizeof(qp_resp)))) {
0411             dev_warn(&dev->pdev->dev,
0412                  "failed to copy back udata\n");
0413             __pvrdma_destroy_qp(dev, qp);
0414             return -EINVAL;
0415         }
0416     }
0417 
0418     return 0;
0419 
0420 err_pdir:
0421     pvrdma_page_dir_cleanup(dev, &qp->pdir);
0422 err_umem:
0423     ib_umem_release(qp->rumem);
0424     ib_umem_release(qp->sumem);
0425 err_qp:
0426     atomic_dec(&dev->num_qps);
0427     return ret;
0428 }
0429 
0430 static void _pvrdma_free_qp(struct pvrdma_qp *qp)
0431 {
0432     unsigned long flags;
0433     struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
0434 
0435     spin_lock_irqsave(&dev->qp_tbl_lock, flags);
0436     dev->qp_tbl[qp->qp_handle] = NULL;
0437     spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
0438 
0439     if (refcount_dec_and_test(&qp->refcnt))
0440         complete(&qp->free);
0441     wait_for_completion(&qp->free);
0442 
0443     ib_umem_release(qp->rumem);
0444     ib_umem_release(qp->sumem);
0445 
0446     pvrdma_page_dir_cleanup(dev, &qp->pdir);
0447 
0448     atomic_dec(&dev->num_qps);
0449 }
0450 
0451 static void pvrdma_free_qp(struct pvrdma_qp *qp)
0452 {
0453     struct pvrdma_cq *scq;
0454     struct pvrdma_cq *rcq;
0455     unsigned long scq_flags, rcq_flags;
0456 
0457     /* In case cq is polling */
0458     get_cqs(qp, &scq, &rcq);
0459     pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
0460 
0461     _pvrdma_flush_cqe(qp, scq);
0462     if (scq != rcq)
0463         _pvrdma_flush_cqe(qp, rcq);
0464 
0465     /*
0466      * We're now unlocking the CQs before clearing out the qp handle this
0467      * should still be safe. We have destroyed the backend QP and flushed
0468      * the CQEs so there should be no other completions for this QP.
0469      */
0470     pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
0471 
0472     _pvrdma_free_qp(qp);
0473 }
0474 
0475 static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev,
0476                        u32 qp_handle)
0477 {
0478     union pvrdma_cmd_req req;
0479     struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
0480     int ret;
0481 
0482     memset(cmd, 0, sizeof(*cmd));
0483     cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
0484     cmd->qp_handle = qp_handle;
0485 
0486     ret = pvrdma_cmd_post(dev, &req, NULL, 0);
0487     if (ret < 0)
0488         dev_warn(&dev->pdev->dev,
0489              "destroy queuepair failed, error: %d\n", ret);
0490 }
0491 
0492 /**
0493  * pvrdma_destroy_qp - destroy a queue pair
0494  * @qp: the queue pair to destroy
0495  * @udata: user data or null for kernel object
0496  *
0497  * @return: always 0.
0498  */
0499 int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
0500 {
0501     struct pvrdma_qp *vqp = to_vqp(qp);
0502 
0503     _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle);
0504     pvrdma_free_qp(vqp);
0505 
0506     return 0;
0507 }
0508 
0509 static void __pvrdma_destroy_qp(struct pvrdma_dev *dev,
0510                 struct pvrdma_qp *qp)
0511 {
0512     _pvrdma_destroy_qp_work(dev, qp->qp_handle);
0513     _pvrdma_free_qp(qp);
0514 }
0515 
0516 /**
0517  * pvrdma_modify_qp - modify queue pair attributes
0518  * @ibqp: the queue pair
0519  * @attr: the new queue pair's attributes
0520  * @attr_mask: attributes mask
0521  * @udata: user data
0522  *
0523  * @returns 0 on success, otherwise returns an errno.
0524  */
0525 int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
0526              int attr_mask, struct ib_udata *udata)
0527 {
0528     struct pvrdma_dev *dev = to_vdev(ibqp->device);
0529     struct pvrdma_qp *qp = to_vqp(ibqp);
0530     union pvrdma_cmd_req req;
0531     union pvrdma_cmd_resp rsp;
0532     struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
0533     enum ib_qp_state cur_state, next_state;
0534     int ret;
0535 
0536     if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
0537         return -EOPNOTSUPP;
0538 
0539     /* Sanity checking. Should need lock here */
0540     mutex_lock(&qp->mutex);
0541     cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
0542         qp->state;
0543     next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
0544 
0545     if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
0546                 attr_mask)) {
0547         ret = -EINVAL;
0548         goto out;
0549     }
0550 
0551     if (attr_mask & IB_QP_PORT) {
0552         if (attr->port_num == 0 ||
0553             attr->port_num > ibqp->device->phys_port_cnt) {
0554             ret = -EINVAL;
0555             goto out;
0556         }
0557     }
0558 
0559     if (attr_mask & IB_QP_MIN_RNR_TIMER) {
0560         if (attr->min_rnr_timer > 31) {
0561             ret = -EINVAL;
0562             goto out;
0563         }
0564     }
0565 
0566     if (attr_mask & IB_QP_PKEY_INDEX) {
0567         if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
0568             ret = -EINVAL;
0569             goto out;
0570         }
0571     }
0572 
0573     if (attr_mask & IB_QP_QKEY)
0574         qp->qkey = attr->qkey;
0575 
0576     if (cur_state == next_state && cur_state == IB_QPS_RESET) {
0577         ret = 0;
0578         goto out;
0579     }
0580 
0581     qp->state = next_state;
0582     memset(cmd, 0, sizeof(*cmd));
0583     cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
0584     cmd->qp_handle = qp->qp_handle;
0585     cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
0586     cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
0587     cmd->attrs.cur_qp_state =
0588         ib_qp_state_to_pvrdma(attr->cur_qp_state);
0589     cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
0590     cmd->attrs.path_mig_state =
0591         ib_mig_state_to_pvrdma(attr->path_mig_state);
0592     cmd->attrs.qkey = attr->qkey;
0593     cmd->attrs.rq_psn = attr->rq_psn;
0594     cmd->attrs.sq_psn = attr->sq_psn;
0595     cmd->attrs.dest_qp_num = attr->dest_qp_num;
0596     cmd->attrs.qp_access_flags =
0597         ib_access_flags_to_pvrdma(attr->qp_access_flags);
0598     cmd->attrs.pkey_index = attr->pkey_index;
0599     cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
0600     cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
0601     cmd->attrs.sq_draining = attr->sq_draining;
0602     cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
0603     cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
0604     cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
0605     cmd->attrs.port_num = attr->port_num;
0606     cmd->attrs.timeout = attr->timeout;
0607     cmd->attrs.retry_cnt = attr->retry_cnt;
0608     cmd->attrs.rnr_retry = attr->rnr_retry;
0609     cmd->attrs.alt_port_num = attr->alt_port_num;
0610     cmd->attrs.alt_timeout = attr->alt_timeout;
0611     ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
0612     rdma_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
0613     rdma_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
0614 
0615     ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
0616     if (ret < 0) {
0617         dev_warn(&dev->pdev->dev,
0618              "could not modify queuepair, error: %d\n", ret);
0619     } else if (rsp.hdr.err > 0) {
0620         dev_warn(&dev->pdev->dev,
0621              "cannot modify queuepair, error: %d\n", rsp.hdr.err);
0622         ret = -EINVAL;
0623     }
0624 
0625     if (ret == 0 && next_state == IB_QPS_RESET)
0626         pvrdma_reset_qp(qp);
0627 
0628 out:
0629     mutex_unlock(&qp->mutex);
0630 
0631     return ret;
0632 }
0633 
0634 static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n)
0635 {
0636     return pvrdma_page_dir_get_ptr(&qp->pdir,
0637                        qp->sq.offset + n * qp->sq.wqe_size);
0638 }
0639 
0640 static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
0641 {
0642     return pvrdma_page_dir_get_ptr(&qp->pdir,
0643                        qp->rq.offset + n * qp->rq.wqe_size);
0644 }
0645 
0646 static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
0647                const struct ib_reg_wr *wr)
0648 {
0649     struct pvrdma_user_mr *mr = to_vmr(wr->mr);
0650 
0651     wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
0652     wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
0653     wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
0654     wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
0655     wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
0656     wqe_hdr->wr.fast_reg.access_flags = wr->access;
0657     wqe_hdr->wr.fast_reg.rkey = wr->key;
0658 
0659     return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
0660                         mr->npages);
0661 }
0662 
0663 /**
0664  * pvrdma_post_send - post send work request entries on a QP
0665  * @ibqp: the QP
0666  * @wr: work request list to post
0667  * @bad_wr: the first bad WR returned
0668  *
0669  * @return: 0 on success, otherwise errno returned.
0670  */
0671 int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
0672              const struct ib_send_wr **bad_wr)
0673 {
0674     struct pvrdma_qp *qp = to_vqp(ibqp);
0675     struct pvrdma_dev *dev = to_vdev(ibqp->device);
0676     unsigned long flags;
0677     struct pvrdma_sq_wqe_hdr *wqe_hdr;
0678     struct pvrdma_sge *sge;
0679     int i, ret;
0680 
0681     /*
0682      * In states lower than RTS, we can fail immediately. In other states,
0683      * just post and let the device figure it out.
0684      */
0685     if (qp->state < IB_QPS_RTS) {
0686         *bad_wr = wr;
0687         return -EINVAL;
0688     }
0689 
0690     spin_lock_irqsave(&qp->sq.lock, flags);
0691 
0692     while (wr) {
0693         unsigned int tail = 0;
0694 
0695         if (unlikely(!pvrdma_idx_ring_has_space(
0696                 qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
0697             dev_warn_ratelimited(&dev->pdev->dev,
0698                          "send queue is full\n");
0699             *bad_wr = wr;
0700             ret = -ENOMEM;
0701             goto out;
0702         }
0703 
0704         if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
0705             dev_warn_ratelimited(&dev->pdev->dev,
0706                          "send SGE overflow\n");
0707             *bad_wr = wr;
0708             ret = -EINVAL;
0709             goto out;
0710         }
0711 
0712         if (unlikely(wr->opcode < 0)) {
0713             dev_warn_ratelimited(&dev->pdev->dev,
0714                          "invalid send opcode\n");
0715             *bad_wr = wr;
0716             ret = -EINVAL;
0717             goto out;
0718         }
0719 
0720         /*
0721          * Only support UD, RC.
0722          * Need to check opcode table for thorough checking.
0723          * opcode       _UD _UC _RC
0724          * _SEND        x   x   x
0725          * _SEND_WITH_IMM   x   x   x
0726          * _RDMA_WRITE          x   x
0727          * _RDMA_WRITE_WITH_IMM     x   x
0728          * _LOCAL_INV           x   x
0729          * _SEND_WITH_INV       x   x
0730          * _RDMA_READ               x
0731          * _ATOMIC_CMP_AND_SWP          x
0732          * _ATOMIC_FETCH_AND_ADD        x
0733          * _MASK_ATOMIC_CMP_AND_SWP     x
0734          * _MASK_ATOMIC_FETCH_AND_ADD       x
0735          * _REG_MR              x
0736          *
0737          */
0738         if (qp->ibqp.qp_type != IB_QPT_UD &&
0739             qp->ibqp.qp_type != IB_QPT_RC &&
0740             wr->opcode != IB_WR_SEND) {
0741             dev_warn_ratelimited(&dev->pdev->dev,
0742                          "unsupported queuepair type\n");
0743             *bad_wr = wr;
0744             ret = -EINVAL;
0745             goto out;
0746         } else if (qp->ibqp.qp_type == IB_QPT_UD ||
0747                qp->ibqp.qp_type == IB_QPT_GSI) {
0748             if (wr->opcode != IB_WR_SEND &&
0749                 wr->opcode != IB_WR_SEND_WITH_IMM) {
0750                 dev_warn_ratelimited(&dev->pdev->dev,
0751                              "invalid send opcode\n");
0752                 *bad_wr = wr;
0753                 ret = -EINVAL;
0754                 goto out;
0755             }
0756         }
0757 
0758         wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail);
0759         memset(wqe_hdr, 0, sizeof(*wqe_hdr));
0760         wqe_hdr->wr_id = wr->wr_id;
0761         wqe_hdr->num_sge = wr->num_sge;
0762         wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
0763         wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
0764         if (wr->opcode == IB_WR_SEND_WITH_IMM ||
0765             wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
0766             wqe_hdr->ex.imm_data = wr->ex.imm_data;
0767 
0768         if (unlikely(wqe_hdr->opcode == PVRDMA_WR_ERROR)) {
0769             *bad_wr = wr;
0770             ret = -EINVAL;
0771             goto out;
0772         }
0773 
0774         switch (qp->ibqp.qp_type) {
0775         case IB_QPT_GSI:
0776         case IB_QPT_UD:
0777             if (unlikely(!ud_wr(wr)->ah)) {
0778                 dev_warn_ratelimited(&dev->pdev->dev,
0779                              "invalid address handle\n");
0780                 *bad_wr = wr;
0781                 ret = -EINVAL;
0782                 goto out;
0783             }
0784 
0785             /*
0786              * Use qkey from qp context if high order bit set,
0787              * otherwise from work request.
0788              */
0789             wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
0790             wqe_hdr->wr.ud.remote_qkey =
0791                 ud_wr(wr)->remote_qkey & 0x80000000 ?
0792                 qp->qkey : ud_wr(wr)->remote_qkey;
0793             wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
0794 
0795             break;
0796         case IB_QPT_RC:
0797             switch (wr->opcode) {
0798             case IB_WR_RDMA_READ:
0799             case IB_WR_RDMA_WRITE:
0800             case IB_WR_RDMA_WRITE_WITH_IMM:
0801                 wqe_hdr->wr.rdma.remote_addr =
0802                     rdma_wr(wr)->remote_addr;
0803                 wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
0804                 break;
0805             case IB_WR_LOCAL_INV:
0806             case IB_WR_SEND_WITH_INV:
0807                 wqe_hdr->ex.invalidate_rkey =
0808                     wr->ex.invalidate_rkey;
0809                 break;
0810             case IB_WR_ATOMIC_CMP_AND_SWP:
0811             case IB_WR_ATOMIC_FETCH_AND_ADD:
0812                 wqe_hdr->wr.atomic.remote_addr =
0813                     atomic_wr(wr)->remote_addr;
0814                 wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
0815                 wqe_hdr->wr.atomic.compare_add =
0816                     atomic_wr(wr)->compare_add;
0817                 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
0818                     wqe_hdr->wr.atomic.swap =
0819                         atomic_wr(wr)->swap;
0820                 break;
0821             case IB_WR_REG_MR:
0822                 ret = set_reg_seg(wqe_hdr, reg_wr(wr));
0823                 if (ret < 0) {
0824                     dev_warn_ratelimited(&dev->pdev->dev,
0825                                  "Failed to set fast register work request\n");
0826                     *bad_wr = wr;
0827                     goto out;
0828                 }
0829                 break;
0830             default:
0831                 break;
0832             }
0833 
0834             break;
0835         default:
0836             dev_warn_ratelimited(&dev->pdev->dev,
0837                          "invalid queuepair type\n");
0838             ret = -EINVAL;
0839             *bad_wr = wr;
0840             goto out;
0841         }
0842 
0843         sge = (struct pvrdma_sge *)(wqe_hdr + 1);
0844         for (i = 0; i < wr->num_sge; i++) {
0845             /* Need to check wqe_size 0 or max size */
0846             sge->addr = wr->sg_list[i].addr;
0847             sge->length = wr->sg_list[i].length;
0848             sge->lkey = wr->sg_list[i].lkey;
0849             sge++;
0850         }
0851 
0852         /* Make sure wqe is written before index update */
0853         smp_wmb();
0854 
0855         /* Update shared sq ring */
0856         pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
0857                     qp->sq.wqe_cnt);
0858 
0859         wr = wr->next;
0860     }
0861 
0862     ret = 0;
0863 
0864 out:
0865     spin_unlock_irqrestore(&qp->sq.lock, flags);
0866 
0867     if (!ret)
0868         pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
0869 
0870     return ret;
0871 }
0872 
0873 /**
0874  * pvrdma_post_recv - post receive work request entries on a QP
0875  * @ibqp: the QP
0876  * @wr: the work request list to post
0877  * @bad_wr: the first bad WR returned
0878  *
0879  * @return: 0 on success, otherwise errno returned.
0880  */
0881 int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
0882              const struct ib_recv_wr **bad_wr)
0883 {
0884     struct pvrdma_dev *dev = to_vdev(ibqp->device);
0885     unsigned long flags;
0886     struct pvrdma_qp *qp = to_vqp(ibqp);
0887     struct pvrdma_rq_wqe_hdr *wqe_hdr;
0888     struct pvrdma_sge *sge;
0889     int ret = 0;
0890     int i;
0891 
0892     /*
0893      * In the RESET state, we can fail immediately. For other states,
0894      * just post and let the device figure it out.
0895      */
0896     if (qp->state == IB_QPS_RESET) {
0897         *bad_wr = wr;
0898         return -EINVAL;
0899     }
0900 
0901     if (qp->srq) {
0902         dev_warn(&dev->pdev->dev, "QP associated with SRQ\n");
0903         *bad_wr = wr;
0904         return -EINVAL;
0905     }
0906 
0907     spin_lock_irqsave(&qp->rq.lock, flags);
0908 
0909     while (wr) {
0910         unsigned int tail = 0;
0911 
0912         if (unlikely(wr->num_sge > qp->rq.max_sg ||
0913                  wr->num_sge < 0)) {
0914             ret = -EINVAL;
0915             *bad_wr = wr;
0916             dev_warn_ratelimited(&dev->pdev->dev,
0917                          "recv SGE overflow\n");
0918             goto out;
0919         }
0920 
0921         if (unlikely(!pvrdma_idx_ring_has_space(
0922                 qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
0923             ret = -ENOMEM;
0924             *bad_wr = wr;
0925             dev_warn_ratelimited(&dev->pdev->dev,
0926                          "recv queue full\n");
0927             goto out;
0928         }
0929 
0930         wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail);
0931         wqe_hdr->wr_id = wr->wr_id;
0932         wqe_hdr->num_sge = wr->num_sge;
0933         wqe_hdr->total_len = 0;
0934 
0935         sge = (struct pvrdma_sge *)(wqe_hdr + 1);
0936         for (i = 0; i < wr->num_sge; i++) {
0937             sge->addr = wr->sg_list[i].addr;
0938             sge->length = wr->sg_list[i].length;
0939             sge->lkey = wr->sg_list[i].lkey;
0940             sge++;
0941         }
0942 
0943         /* Make sure wqe is written before index update */
0944         smp_wmb();
0945 
0946         /* Update shared rq ring */
0947         pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
0948                     qp->rq.wqe_cnt);
0949 
0950         wr = wr->next;
0951     }
0952 
0953     spin_unlock_irqrestore(&qp->rq.lock, flags);
0954 
0955     pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
0956 
0957     return ret;
0958 
0959 out:
0960     spin_unlock_irqrestore(&qp->rq.lock, flags);
0961 
0962     return ret;
0963 }
0964 
0965 /**
0966  * pvrdma_query_qp - query a queue pair's attributes
0967  * @ibqp: the queue pair to query
0968  * @attr: the queue pair's attributes
0969  * @attr_mask: attributes mask
0970  * @init_attr: initial queue pair attributes
0971  *
0972  * @returns 0 on success, otherwise returns an errno.
0973  */
0974 int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
0975             int attr_mask, struct ib_qp_init_attr *init_attr)
0976 {
0977     struct pvrdma_dev *dev = to_vdev(ibqp->device);
0978     struct pvrdma_qp *qp = to_vqp(ibqp);
0979     union pvrdma_cmd_req req;
0980     union pvrdma_cmd_resp rsp;
0981     struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
0982     struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
0983     int ret = 0;
0984 
0985     mutex_lock(&qp->mutex);
0986 
0987     if (qp->state == IB_QPS_RESET) {
0988         attr->qp_state = IB_QPS_RESET;
0989         goto out;
0990     }
0991 
0992     memset(cmd, 0, sizeof(*cmd));
0993     cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
0994     cmd->qp_handle = qp->qp_handle;
0995     cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
0996 
0997     ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
0998     if (ret < 0) {
0999         dev_warn(&dev->pdev->dev,
1000              "could not query queuepair, error: %d\n", ret);
1001         goto out;
1002     }
1003 
1004     attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
1005     attr->cur_qp_state =
1006         pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
1007     attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
1008     attr->path_mig_state =
1009         pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
1010     attr->qkey = resp->attrs.qkey;
1011     attr->rq_psn = resp->attrs.rq_psn;
1012     attr->sq_psn = resp->attrs.sq_psn;
1013     attr->dest_qp_num = resp->attrs.dest_qp_num;
1014     attr->qp_access_flags =
1015         pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
1016     attr->pkey_index = resp->attrs.pkey_index;
1017     attr->alt_pkey_index = resp->attrs.alt_pkey_index;
1018     attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
1019     attr->sq_draining = resp->attrs.sq_draining;
1020     attr->max_rd_atomic = resp->attrs.max_rd_atomic;
1021     attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
1022     attr->min_rnr_timer = resp->attrs.min_rnr_timer;
1023     attr->port_num = resp->attrs.port_num;
1024     attr->timeout = resp->attrs.timeout;
1025     attr->retry_cnt = resp->attrs.retry_cnt;
1026     attr->rnr_retry = resp->attrs.rnr_retry;
1027     attr->alt_port_num = resp->attrs.alt_port_num;
1028     attr->alt_timeout = resp->attrs.alt_timeout;
1029     pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
1030     pvrdma_ah_attr_to_rdma(&attr->ah_attr, &resp->attrs.ah_attr);
1031     pvrdma_ah_attr_to_rdma(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
1032 
1033     qp->state = attr->qp_state;
1034 
1035     ret = 0;
1036 
1037 out:
1038     attr->cur_qp_state = attr->qp_state;
1039 
1040     init_attr->event_handler = qp->ibqp.event_handler;
1041     init_attr->qp_context = qp->ibqp.qp_context;
1042     init_attr->send_cq = qp->ibqp.send_cq;
1043     init_attr->recv_cq = qp->ibqp.recv_cq;
1044     init_attr->srq = qp->ibqp.srq;
1045     init_attr->xrcd = NULL;
1046     init_attr->cap = attr->cap;
1047     init_attr->sq_sig_type = 0;
1048     init_attr->qp_type = qp->ibqp.qp_type;
1049     init_attr->create_flags = 0;
1050     init_attr->port_num = qp->port;
1051 
1052     mutex_unlock(&qp->mutex);
1053     return ret;
1054 }