0001
0002
0003
0004
0005
0006 #include <linux/slab.h>
0007 #include <linux/vmalloc.h>
0008 #include "cq.h"
0009 #include "vt.h"
0010 #include "trace.h"
0011
0012 static struct workqueue_struct *comp_vector_wq;
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
0026 {
0027 struct ib_uverbs_wc *uqueue = NULL;
0028 struct ib_wc *kqueue = NULL;
0029 struct rvt_cq_wc *u_wc = NULL;
0030 struct rvt_k_cq_wc *k_wc = NULL;
0031 unsigned long flags;
0032 u32 head;
0033 u32 next;
0034 u32 tail;
0035
0036 spin_lock_irqsave(&cq->lock, flags);
0037
0038 if (cq->ip) {
0039 u_wc = cq->queue;
0040 uqueue = &u_wc->uqueue[0];
0041 head = RDMA_READ_UAPI_ATOMIC(u_wc->head);
0042 tail = RDMA_READ_UAPI_ATOMIC(u_wc->tail);
0043 } else {
0044 k_wc = cq->kqueue;
0045 kqueue = &k_wc->kqueue[0];
0046 head = k_wc->head;
0047 tail = k_wc->tail;
0048 }
0049
0050
0051
0052
0053
0054 if (head >= (unsigned)cq->ibcq.cqe) {
0055 head = cq->ibcq.cqe;
0056 next = 0;
0057 } else {
0058 next = head + 1;
0059 }
0060
0061 if (unlikely(next == tail || cq->cq_full)) {
0062 struct rvt_dev_info *rdi = cq->rdi;
0063
0064 if (!cq->cq_full)
0065 rvt_pr_err_ratelimited(rdi, "CQ is full!\n");
0066 cq->cq_full = true;
0067 spin_unlock_irqrestore(&cq->lock, flags);
0068 if (cq->ibcq.event_handler) {
0069 struct ib_event ev;
0070
0071 ev.device = cq->ibcq.device;
0072 ev.element.cq = &cq->ibcq;
0073 ev.event = IB_EVENT_CQ_ERR;
0074 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
0075 }
0076 return false;
0077 }
0078 trace_rvt_cq_enter(cq, entry, head);
0079 if (uqueue) {
0080 uqueue[head].wr_id = entry->wr_id;
0081 uqueue[head].status = entry->status;
0082 uqueue[head].opcode = entry->opcode;
0083 uqueue[head].vendor_err = entry->vendor_err;
0084 uqueue[head].byte_len = entry->byte_len;
0085 uqueue[head].ex.imm_data = entry->ex.imm_data;
0086 uqueue[head].qp_num = entry->qp->qp_num;
0087 uqueue[head].src_qp = entry->src_qp;
0088 uqueue[head].wc_flags = entry->wc_flags;
0089 uqueue[head].pkey_index = entry->pkey_index;
0090 uqueue[head].slid = ib_lid_cpu16(entry->slid);
0091 uqueue[head].sl = entry->sl;
0092 uqueue[head].dlid_path_bits = entry->dlid_path_bits;
0093 uqueue[head].port_num = entry->port_num;
0094
0095 RDMA_WRITE_UAPI_ATOMIC(u_wc->head, next);
0096 } else {
0097 kqueue[head] = *entry;
0098 k_wc->head = next;
0099 }
0100
0101 if (cq->notify == IB_CQ_NEXT_COMP ||
0102 (cq->notify == IB_CQ_SOLICITED &&
0103 (solicited || entry->status != IB_WC_SUCCESS))) {
0104
0105
0106
0107
0108 cq->notify = RVT_CQ_NONE;
0109 cq->triggered++;
0110 queue_work_on(cq->comp_vector_cpu, comp_vector_wq,
0111 &cq->comptask);
0112 }
0113
0114 spin_unlock_irqrestore(&cq->lock, flags);
0115 return true;
0116 }
0117 EXPORT_SYMBOL(rvt_cq_enter);
0118
0119 static void send_complete(struct work_struct *work)
0120 {
0121 struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
0122
0123
0124
0125
0126
0127
0128
0129
0130 for (;;) {
0131 u8 triggered = cq->triggered;
0132
0133
0134
0135
0136
0137
0138
0139 local_bh_disable();
0140 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
0141 local_bh_enable();
0142
0143 if (cq->triggered == triggered)
0144 return;
0145 }
0146 }
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158 int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
0159 struct ib_udata *udata)
0160 {
0161 struct ib_device *ibdev = ibcq->device;
0162 struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
0163 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
0164 struct rvt_cq_wc *u_wc = NULL;
0165 struct rvt_k_cq_wc *k_wc = NULL;
0166 u32 sz;
0167 unsigned int entries = attr->cqe;
0168 int comp_vector = attr->comp_vector;
0169 int err;
0170
0171 if (attr->flags)
0172 return -EOPNOTSUPP;
0173
0174 if (entries < 1 || entries > rdi->dparms.props.max_cqe)
0175 return -EINVAL;
0176
0177 if (comp_vector < 0)
0178 comp_vector = 0;
0179
0180 comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
0181
0182
0183
0184
0185
0186
0187
0188
0189 if (udata && udata->outlen >= sizeof(__u64)) {
0190 sz = sizeof(struct ib_uverbs_wc) * (entries + 1);
0191 sz += sizeof(*u_wc);
0192 u_wc = vmalloc_user(sz);
0193 if (!u_wc)
0194 return -ENOMEM;
0195 } else {
0196 sz = sizeof(struct ib_wc) * (entries + 1);
0197 sz += sizeof(*k_wc);
0198 k_wc = vzalloc_node(sz, rdi->dparms.node);
0199 if (!k_wc)
0200 return -ENOMEM;
0201 }
0202
0203
0204
0205
0206
0207 if (udata && udata->outlen >= sizeof(__u64)) {
0208 cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc);
0209 if (IS_ERR(cq->ip)) {
0210 err = PTR_ERR(cq->ip);
0211 goto bail_wc;
0212 }
0213
0214 err = ib_copy_to_udata(udata, &cq->ip->offset,
0215 sizeof(cq->ip->offset));
0216 if (err)
0217 goto bail_ip;
0218 }
0219
0220 spin_lock_irq(&rdi->n_cqs_lock);
0221 if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
0222 spin_unlock_irq(&rdi->n_cqs_lock);
0223 err = -ENOMEM;
0224 goto bail_ip;
0225 }
0226
0227 rdi->n_cqs_allocated++;
0228 spin_unlock_irq(&rdi->n_cqs_lock);
0229
0230 if (cq->ip) {
0231 spin_lock_irq(&rdi->pending_lock);
0232 list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
0233 spin_unlock_irq(&rdi->pending_lock);
0234 }
0235
0236
0237
0238
0239
0240
0241 cq->rdi = rdi;
0242 if (rdi->driver_f.comp_vect_cpu_lookup)
0243 cq->comp_vector_cpu =
0244 rdi->driver_f.comp_vect_cpu_lookup(rdi, comp_vector);
0245 else
0246 cq->comp_vector_cpu =
0247 cpumask_first(cpumask_of_node(rdi->dparms.node));
0248
0249 cq->ibcq.cqe = entries;
0250 cq->notify = RVT_CQ_NONE;
0251 spin_lock_init(&cq->lock);
0252 INIT_WORK(&cq->comptask, send_complete);
0253 if (u_wc)
0254 cq->queue = u_wc;
0255 else
0256 cq->kqueue = k_wc;
0257
0258 trace_rvt_create_cq(cq, attr);
0259 return 0;
0260
0261 bail_ip:
0262 kfree(cq->ip);
0263 bail_wc:
0264 vfree(u_wc);
0265 vfree(k_wc);
0266 return err;
0267 }
0268
0269
0270
0271
0272
0273
0274
0275
0276 int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
0277 {
0278 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
0279 struct rvt_dev_info *rdi = cq->rdi;
0280
0281 flush_work(&cq->comptask);
0282 spin_lock_irq(&rdi->n_cqs_lock);
0283 rdi->n_cqs_allocated--;
0284 spin_unlock_irq(&rdi->n_cqs_lock);
0285 if (cq->ip)
0286 kref_put(&cq->ip->ref, rvt_release_mmap_info);
0287 else
0288 vfree(cq->kqueue);
0289 return 0;
0290 }
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
0303 {
0304 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
0305 unsigned long flags;
0306 int ret = 0;
0307
0308 spin_lock_irqsave(&cq->lock, flags);
0309
0310
0311
0312
0313 if (cq->notify != IB_CQ_NEXT_COMP)
0314 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
0315
0316 if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
0317 if (cq->queue) {
0318 if (RDMA_READ_UAPI_ATOMIC(cq->queue->head) !=
0319 RDMA_READ_UAPI_ATOMIC(cq->queue->tail))
0320 ret = 1;
0321 } else {
0322 if (cq->kqueue->head != cq->kqueue->tail)
0323 ret = 1;
0324 }
0325 }
0326
0327 spin_unlock_irqrestore(&cq->lock, flags);
0328
0329 return ret;
0330 }
0331
0332
0333
0334
0335
0336
0337
0338 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
0339 {
0340 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
0341 u32 head, tail, n;
0342 int ret;
0343 u32 sz;
0344 struct rvt_dev_info *rdi = cq->rdi;
0345 struct rvt_cq_wc *u_wc = NULL;
0346 struct rvt_cq_wc *old_u_wc = NULL;
0347 struct rvt_k_cq_wc *k_wc = NULL;
0348 struct rvt_k_cq_wc *old_k_wc = NULL;
0349
0350 if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
0351 return -EINVAL;
0352
0353
0354
0355
0356 if (udata && udata->outlen >= sizeof(__u64)) {
0357 sz = sizeof(struct ib_uverbs_wc) * (cqe + 1);
0358 sz += sizeof(*u_wc);
0359 u_wc = vmalloc_user(sz);
0360 if (!u_wc)
0361 return -ENOMEM;
0362 } else {
0363 sz = sizeof(struct ib_wc) * (cqe + 1);
0364 sz += sizeof(*k_wc);
0365 k_wc = vzalloc_node(sz, rdi->dparms.node);
0366 if (!k_wc)
0367 return -ENOMEM;
0368 }
0369
0370 if (udata && udata->outlen >= sizeof(__u64)) {
0371 __u64 offset = 0;
0372
0373 ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
0374 if (ret)
0375 goto bail_free;
0376 }
0377
0378 spin_lock_irq(&cq->lock);
0379
0380
0381
0382
0383 if (u_wc) {
0384 old_u_wc = cq->queue;
0385 head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head);
0386 tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail);
0387 } else {
0388 old_k_wc = cq->kqueue;
0389 head = old_k_wc->head;
0390 tail = old_k_wc->tail;
0391 }
0392
0393 if (head > (u32)cq->ibcq.cqe)
0394 head = (u32)cq->ibcq.cqe;
0395 if (tail > (u32)cq->ibcq.cqe)
0396 tail = (u32)cq->ibcq.cqe;
0397 if (head < tail)
0398 n = cq->ibcq.cqe + 1 + head - tail;
0399 else
0400 n = head - tail;
0401 if (unlikely((u32)cqe < n)) {
0402 ret = -EINVAL;
0403 goto bail_unlock;
0404 }
0405 for (n = 0; tail != head; n++) {
0406 if (u_wc)
0407 u_wc->uqueue[n] = old_u_wc->uqueue[tail];
0408 else
0409 k_wc->kqueue[n] = old_k_wc->kqueue[tail];
0410 if (tail == (u32)cq->ibcq.cqe)
0411 tail = 0;
0412 else
0413 tail++;
0414 }
0415 cq->ibcq.cqe = cqe;
0416 if (u_wc) {
0417 RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n);
0418 RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0);
0419 cq->queue = u_wc;
0420 } else {
0421 k_wc->head = n;
0422 k_wc->tail = 0;
0423 cq->kqueue = k_wc;
0424 }
0425 spin_unlock_irq(&cq->lock);
0426
0427 if (u_wc)
0428 vfree(old_u_wc);
0429 else
0430 vfree(old_k_wc);
0431
0432 if (cq->ip) {
0433 struct rvt_mmap_info *ip = cq->ip;
0434
0435 rvt_update_mmap_info(rdi, ip, sz, u_wc);
0436
0437
0438
0439
0440
0441 if (udata && udata->outlen >= sizeof(__u64)) {
0442 ret = ib_copy_to_udata(udata, &ip->offset,
0443 sizeof(ip->offset));
0444 if (ret)
0445 return ret;
0446 }
0447
0448 spin_lock_irq(&rdi->pending_lock);
0449 if (list_empty(&ip->pending_mmaps))
0450 list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
0451 spin_unlock_irq(&rdi->pending_lock);
0452 }
0453
0454 return 0;
0455
0456 bail_unlock:
0457 spin_unlock_irq(&cq->lock);
0458 bail_free:
0459 vfree(u_wc);
0460 vfree(k_wc);
0461
0462 return ret;
0463 }
0464
0465
0466
0467
0468
0469
0470
0471
0472
0473
0474
0475
0476 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
0477 {
0478 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
0479 struct rvt_k_cq_wc *wc;
0480 unsigned long flags;
0481 int npolled;
0482 u32 tail;
0483
0484
0485 if (cq->ip)
0486 return -EINVAL;
0487
0488 spin_lock_irqsave(&cq->lock, flags);
0489
0490 wc = cq->kqueue;
0491 tail = wc->tail;
0492 if (tail > (u32)cq->ibcq.cqe)
0493 tail = (u32)cq->ibcq.cqe;
0494 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
0495 if (tail == wc->head)
0496 break;
0497
0498 trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
0499 *entry = wc->kqueue[tail];
0500 if (tail >= cq->ibcq.cqe)
0501 tail = 0;
0502 else
0503 tail++;
0504 }
0505 wc->tail = tail;
0506
0507 spin_unlock_irqrestore(&cq->lock, flags);
0508
0509 return npolled;
0510 }
0511
0512
0513
0514
0515
0516
0517 int rvt_driver_cq_init(void)
0518 {
0519 comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE,
0520 0, "rdmavt_cq");
0521 if (!comp_vector_wq)
0522 return -ENOMEM;
0523
0524 return 0;
0525 }
0526
0527
0528
0529
0530 void rvt_cq_exit(void)
0531 {
0532 destroy_workqueue(comp_vector_wq);
0533 comp_vector_wq = NULL;
0534 }