0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046 #include <asm/page.h>
0047 #include <linux/io.h>
0048 #include <linux/wait.h>
0049 #include <rdma/ib_addr.h>
0050 #include <rdma/ib_smi.h>
0051 #include <rdma/ib_user_verbs.h>
0052 #include <rdma/uverbs_ioctl.h>
0053
0054 #include "pvrdma.h"
0055
0056
0057
0058
0059
0060
0061
0062
0063 int pvrdma_req_notify_cq(struct ib_cq *ibcq,
0064 enum ib_cq_notify_flags notify_flags)
0065 {
0066 struct pvrdma_dev *dev = to_vdev(ibcq->device);
0067 struct pvrdma_cq *cq = to_vcq(ibcq);
0068 u32 val = cq->cq_handle;
0069 unsigned long flags;
0070 int has_data = 0;
0071
0072 val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
0073 PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM;
0074
0075 spin_lock_irqsave(&cq->cq_lock, flags);
0076
0077 pvrdma_write_uar_cq(dev, val);
0078
0079 if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
0080 unsigned int head;
0081
0082 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
0083 cq->ibcq.cqe, &head);
0084 if (unlikely(has_data == PVRDMA_INVALID_IDX))
0085 dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
0086 }
0087
0088 spin_unlock_irqrestore(&cq->cq_lock, flags);
0089
0090 return has_data;
0091 }
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101 int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
0102 struct ib_udata *udata)
0103 {
0104 struct ib_device *ibdev = ibcq->device;
0105 int entries = attr->cqe;
0106 struct pvrdma_dev *dev = to_vdev(ibdev);
0107 struct pvrdma_cq *cq = to_vcq(ibcq);
0108 int ret;
0109 int npages;
0110 unsigned long flags;
0111 union pvrdma_cmd_req req;
0112 union pvrdma_cmd_resp rsp;
0113 struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
0114 struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
0115 struct pvrdma_create_cq_resp cq_resp = {};
0116 struct pvrdma_create_cq ucmd;
0117 struct pvrdma_ucontext *context = rdma_udata_to_drv_context(
0118 udata, struct pvrdma_ucontext, ibucontext);
0119
0120 BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
0121
0122 if (attr->flags)
0123 return -EOPNOTSUPP;
0124
0125 entries = roundup_pow_of_two(entries);
0126 if (entries < 1 || entries > dev->dsr->caps.max_cqe)
0127 return -EINVAL;
0128
0129 if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
0130 return -ENOMEM;
0131
0132 cq->ibcq.cqe = entries;
0133 cq->is_kernel = !udata;
0134
0135 if (!cq->is_kernel) {
0136 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
0137 ret = -EFAULT;
0138 goto err_cq;
0139 }
0140
0141 cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, ucmd.buf_size,
0142 IB_ACCESS_LOCAL_WRITE);
0143 if (IS_ERR(cq->umem)) {
0144 ret = PTR_ERR(cq->umem);
0145 goto err_cq;
0146 }
0147
0148 npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE);
0149 } else {
0150
0151 npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
0152 PAGE_SIZE - 1) / PAGE_SIZE;
0153
0154
0155 cq->offset = PAGE_SIZE;
0156 }
0157
0158 if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
0159 dev_warn(&dev->pdev->dev,
0160 "overflow pages in completion queue\n");
0161 ret = -EINVAL;
0162 goto err_umem;
0163 }
0164
0165 ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel);
0166 if (ret) {
0167 dev_warn(&dev->pdev->dev,
0168 "could not allocate page directory\n");
0169 goto err_umem;
0170 }
0171
0172
0173 if (cq->is_kernel)
0174 cq->ring_state = cq->pdir.pages[0];
0175 else
0176 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
0177
0178 refcount_set(&cq->refcnt, 1);
0179 init_completion(&cq->free);
0180 spin_lock_init(&cq->cq_lock);
0181
0182 memset(cmd, 0, sizeof(*cmd));
0183 cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
0184 cmd->nchunks = npages;
0185 cmd->ctx_handle = context ? context->ctx_handle : 0;
0186 cmd->cqe = entries;
0187 cmd->pdir_dma = cq->pdir.dir_dma;
0188 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
0189 if (ret < 0) {
0190 dev_warn(&dev->pdev->dev,
0191 "could not create completion queue, error: %d\n", ret);
0192 goto err_page_dir;
0193 }
0194
0195 cq->ibcq.cqe = resp->cqe;
0196 cq->cq_handle = resp->cq_handle;
0197 cq_resp.cqn = resp->cq_handle;
0198 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
0199 dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
0200 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
0201
0202 if (!cq->is_kernel) {
0203 cq->uar = &context->uar;
0204
0205
0206 if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) {
0207 dev_warn(&dev->pdev->dev,
0208 "failed to copy back udata\n");
0209 pvrdma_destroy_cq(&cq->ibcq, udata);
0210 return -EINVAL;
0211 }
0212 }
0213
0214 return 0;
0215
0216 err_page_dir:
0217 pvrdma_page_dir_cleanup(dev, &cq->pdir);
0218 err_umem:
0219 ib_umem_release(cq->umem);
0220 err_cq:
0221 atomic_dec(&dev->num_cqs);
0222 return ret;
0223 }
0224
0225 static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
0226 {
0227 if (refcount_dec_and_test(&cq->refcnt))
0228 complete(&cq->free);
0229 wait_for_completion(&cq->free);
0230
0231 ib_umem_release(cq->umem);
0232
0233 pvrdma_page_dir_cleanup(dev, &cq->pdir);
0234 }
0235
0236
0237
0238
0239
0240
0241 int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
0242 {
0243 struct pvrdma_cq *vcq = to_vcq(cq);
0244 union pvrdma_cmd_req req;
0245 struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq;
0246 struct pvrdma_dev *dev = to_vdev(cq->device);
0247 unsigned long flags;
0248 int ret;
0249
0250 memset(cmd, 0, sizeof(*cmd));
0251 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ;
0252 cmd->cq_handle = vcq->cq_handle;
0253
0254 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
0255 if (ret < 0)
0256 dev_warn(&dev->pdev->dev,
0257 "could not destroy completion queue, error: %d\n",
0258 ret);
0259
0260
0261 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
0262 dev->cq_tbl[vcq->cq_handle] = NULL;
0263 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
0264
0265 pvrdma_free_cq(dev, vcq);
0266 atomic_dec(&dev->num_cqs);
0267 return 0;
0268 }
0269
0270 static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
0271 {
0272 return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
0273 &cq->pdir,
0274 cq->offset +
0275 sizeof(struct pvrdma_cqe) * i);
0276 }
0277
0278 void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq)
0279 {
0280 unsigned int head;
0281 int has_data;
0282
0283 if (!cq->is_kernel)
0284 return;
0285
0286
0287 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
0288 cq->ibcq.cqe, &head);
0289 if (unlikely(has_data > 0)) {
0290 int items;
0291 int curr;
0292 int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail,
0293 cq->ibcq.cqe);
0294 struct pvrdma_cqe *cqe;
0295 struct pvrdma_cqe *curr_cqe;
0296
0297 items = (tail > head) ? (tail - head) :
0298 (cq->ibcq.cqe - head + tail);
0299 curr = --tail;
0300 while (items-- > 0) {
0301 if (curr < 0)
0302 curr = cq->ibcq.cqe - 1;
0303 if (tail < 0)
0304 tail = cq->ibcq.cqe - 1;
0305 curr_cqe = get_cqe(cq, curr);
0306 if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) {
0307 if (curr != tail) {
0308 cqe = get_cqe(cq, tail);
0309 *cqe = *curr_cqe;
0310 }
0311 tail--;
0312 } else {
0313 pvrdma_idx_ring_inc(
0314 &cq->ring_state->rx.cons_head,
0315 cq->ibcq.cqe);
0316 }
0317 curr--;
0318 }
0319 }
0320 }
0321
0322 static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp,
0323 struct ib_wc *wc)
0324 {
0325 struct pvrdma_dev *dev = to_vdev(cq->ibcq.device);
0326 int has_data;
0327 unsigned int head;
0328 bool tried = false;
0329 struct pvrdma_cqe *cqe;
0330
0331 retry:
0332 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
0333 cq->ibcq.cqe, &head);
0334 if (has_data == 0) {
0335 if (tried)
0336 return -EAGAIN;
0337
0338 pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL);
0339
0340 tried = true;
0341 goto retry;
0342 } else if (has_data == PVRDMA_INVALID_IDX) {
0343 dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
0344 return -EAGAIN;
0345 }
0346
0347 cqe = get_cqe(cq, head);
0348
0349
0350 rmb();
0351 if (dev->qp_tbl[cqe->qp & 0xffff])
0352 *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff];
0353 else
0354 return -EAGAIN;
0355
0356 wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode);
0357 wc->status = pvrdma_wc_status_to_ib(cqe->status);
0358 wc->wr_id = cqe->wr_id;
0359 wc->qp = &(*cur_qp)->ibqp;
0360 wc->byte_len = cqe->byte_len;
0361 wc->ex.imm_data = cqe->imm_data;
0362 wc->src_qp = cqe->src_qp;
0363 wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags);
0364 wc->pkey_index = cqe->pkey_index;
0365 wc->slid = cqe->slid;
0366 wc->sl = cqe->sl;
0367 wc->dlid_path_bits = cqe->dlid_path_bits;
0368 wc->port_num = cqe->port_num;
0369 wc->vendor_err = cqe->vendor_err;
0370 wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type);
0371
0372
0373 pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
0374
0375 return 0;
0376 }
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386 int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
0387 {
0388 struct pvrdma_cq *cq = to_vcq(ibcq);
0389 struct pvrdma_qp *cur_qp = NULL;
0390 unsigned long flags;
0391 int npolled;
0392
0393 if (num_entries < 1 || wc == NULL)
0394 return 0;
0395
0396 spin_lock_irqsave(&cq->cq_lock, flags);
0397 for (npolled = 0; npolled < num_entries; ++npolled) {
0398 if (pvrdma_poll_one(cq, &cur_qp, wc + npolled))
0399 break;
0400 }
0401
0402 spin_unlock_irqrestore(&cq->cq_lock, flags);
0403
0404
0405 return npolled;
0406 }