0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <linux/slab.h>
0034 #include <linux/string.h>
0035 #include <linux/sched.h>
0036
0037 #include <asm/io.h>
0038
0039 #include <rdma/uverbs_ioctl.h>
0040
0041 #include "mthca_dev.h"
0042 #include "mthca_cmd.h"
0043 #include "mthca_memfree.h"
0044 #include "mthca_wqe.h"
0045
0046 enum {
0047 MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE
0048 };
0049
0050 struct mthca_tavor_srq_context {
0051 __be64 wqe_base_ds;
0052 __be32 state_pd;
0053 __be32 lkey;
0054 __be32 uar;
0055 __be16 limit_watermark;
0056 __be16 wqe_cnt;
0057 u32 reserved[2];
0058 };
0059
0060 struct mthca_arbel_srq_context {
0061 __be32 state_logsize_srqn;
0062 __be32 lkey;
0063 __be32 db_index;
0064 __be32 logstride_usrpage;
0065 __be64 wqe_base;
0066 __be32 eq_pd;
0067 __be16 limit_watermark;
0068 __be16 wqe_cnt;
0069 u16 reserved1;
0070 __be16 wqe_counter;
0071 u32 reserved2[3];
0072 };
0073
0074 static void *get_wqe(struct mthca_srq *srq, int n)
0075 {
0076 if (srq->is_direct)
0077 return srq->queue.direct.buf + (n << srq->wqe_shift);
0078 else
0079 return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf +
0080 ((n << srq->wqe_shift) & (PAGE_SIZE - 1));
0081 }
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092 static inline int *wqe_to_link(void *wqe)
0093 {
0094 return (int *) (wqe + offsetof(struct mthca_next_seg, imm));
0095 }
0096
0097 static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
0098 struct mthca_pd *pd,
0099 struct mthca_srq *srq,
0100 struct mthca_tavor_srq_context *context,
0101 struct ib_udata *udata)
0102 {
0103 struct mthca_ucontext *ucontext = rdma_udata_to_drv_context(
0104 udata, struct mthca_ucontext, ibucontext);
0105
0106 memset(context, 0, sizeof *context);
0107
0108 context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
0109 context->state_pd = cpu_to_be32(pd->pd_num);
0110 context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
0111
0112 if (udata)
0113 context->uar = cpu_to_be32(ucontext->uar.index);
0114 else
0115 context->uar = cpu_to_be32(dev->driver_uar.index);
0116 }
0117
0118 static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
0119 struct mthca_pd *pd,
0120 struct mthca_srq *srq,
0121 struct mthca_arbel_srq_context *context,
0122 struct ib_udata *udata)
0123 {
0124 struct mthca_ucontext *ucontext = rdma_udata_to_drv_context(
0125 udata, struct mthca_ucontext, ibucontext);
0126 int logsize, max;
0127
0128 memset(context, 0, sizeof *context);
0129
0130
0131
0132
0133
0134 max = srq->max;
0135 logsize = ilog2(max);
0136 context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
0137 context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
0138 context->db_index = cpu_to_be32(srq->db_index);
0139 context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
0140 if (udata)
0141 context->logstride_usrpage |= cpu_to_be32(ucontext->uar.index);
0142 else
0143 context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
0144 context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
0145 }
0146
0147 static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
0148 {
0149 mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue,
0150 srq->is_direct, &srq->mr);
0151 kfree(srq->wrid);
0152 }
0153
0154 static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
0155 struct mthca_srq *srq, struct ib_udata *udata)
0156 {
0157 struct mthca_data_seg *scatter;
0158 void *wqe;
0159 int err;
0160 int i;
0161
0162 if (udata)
0163 return 0;
0164
0165 srq->wrid = kmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
0166 if (!srq->wrid)
0167 return -ENOMEM;
0168
0169 err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift,
0170 MTHCA_MAX_DIRECT_SRQ_SIZE,
0171 &srq->queue, &srq->is_direct, pd, 1, &srq->mr);
0172 if (err) {
0173 kfree(srq->wrid);
0174 return err;
0175 }
0176
0177
0178
0179
0180
0181
0182 for (i = 0; i < srq->max; ++i) {
0183 struct mthca_next_seg *next;
0184
0185 next = wqe = get_wqe(srq, i);
0186
0187 if (i < srq->max - 1) {
0188 *wqe_to_link(wqe) = i + 1;
0189 next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1);
0190 } else {
0191 *wqe_to_link(wqe) = -1;
0192 next->nda_op = 0;
0193 }
0194
0195 for (scatter = wqe + sizeof (struct mthca_next_seg);
0196 (void *) scatter < wqe + (1 << srq->wqe_shift);
0197 ++scatter)
0198 scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
0199 }
0200
0201 srq->last = get_wqe(srq, srq->max - 1);
0202
0203 return 0;
0204 }
0205
0206 int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
0207 struct ib_srq_attr *attr, struct mthca_srq *srq,
0208 struct ib_udata *udata)
0209 {
0210 struct mthca_mailbox *mailbox;
0211 int ds;
0212 int err;
0213
0214
0215 if (attr->max_wr > dev->limits.max_srq_wqes ||
0216 attr->max_sge > dev->limits.max_srq_sge)
0217 return -EINVAL;
0218
0219 srq->max = attr->max_wr;
0220 srq->max_gs = attr->max_sge;
0221 srq->counter = 0;
0222
0223 if (mthca_is_memfree(dev))
0224 srq->max = roundup_pow_of_two(srq->max + 1);
0225 else
0226 srq->max = srq->max + 1;
0227
0228 ds = max(64UL,
0229 roundup_pow_of_two(sizeof (struct mthca_next_seg) +
0230 srq->max_gs * sizeof (struct mthca_data_seg)));
0231
0232 if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz))
0233 return -EINVAL;
0234
0235 srq->wqe_shift = ilog2(ds);
0236
0237 srq->srqn = mthca_alloc(&dev->srq_table.alloc);
0238 if (srq->srqn == -1)
0239 return -ENOMEM;
0240
0241 if (mthca_is_memfree(dev)) {
0242 err = mthca_table_get(dev, dev->srq_table.table, srq->srqn);
0243 if (err)
0244 goto err_out;
0245
0246 if (!udata) {
0247 srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
0248 srq->srqn, &srq->db);
0249 if (srq->db_index < 0) {
0250 err = -ENOMEM;
0251 goto err_out_icm;
0252 }
0253 }
0254 }
0255
0256 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
0257 if (IS_ERR(mailbox)) {
0258 err = PTR_ERR(mailbox);
0259 goto err_out_db;
0260 }
0261
0262 err = mthca_alloc_srq_buf(dev, pd, srq, udata);
0263 if (err)
0264 goto err_out_mailbox;
0265
0266 spin_lock_init(&srq->lock);
0267 srq->refcount = 1;
0268 init_waitqueue_head(&srq->wait);
0269 mutex_init(&srq->mutex);
0270
0271 if (mthca_is_memfree(dev))
0272 mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata);
0273 else
0274 mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata);
0275
0276 err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn);
0277
0278 if (err) {
0279 mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err);
0280 goto err_out_free_buf;
0281 }
0282
0283 spin_lock_irq(&dev->srq_table.lock);
0284 if (mthca_array_set(&dev->srq_table.srq,
0285 srq->srqn & (dev->limits.num_srqs - 1),
0286 srq)) {
0287 spin_unlock_irq(&dev->srq_table.lock);
0288 goto err_out_free_srq;
0289 }
0290 spin_unlock_irq(&dev->srq_table.lock);
0291
0292 mthca_free_mailbox(dev, mailbox);
0293
0294 srq->first_free = 0;
0295 srq->last_free = srq->max - 1;
0296
0297 attr->max_wr = srq->max - 1;
0298 attr->max_sge = srq->max_gs;
0299
0300 return 0;
0301
0302 err_out_free_srq:
0303 err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn);
0304 if (err)
0305 mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
0306
0307 err_out_free_buf:
0308 if (!udata)
0309 mthca_free_srq_buf(dev, srq);
0310
0311 err_out_mailbox:
0312 mthca_free_mailbox(dev, mailbox);
0313
0314 err_out_db:
0315 if (!udata && mthca_is_memfree(dev))
0316 mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
0317
0318 err_out_icm:
0319 mthca_table_put(dev, dev->srq_table.table, srq->srqn);
0320
0321 err_out:
0322 mthca_free(&dev->srq_table.alloc, srq->srqn);
0323
0324 return err;
0325 }
0326
0327 static inline int get_srq_refcount(struct mthca_dev *dev, struct mthca_srq *srq)
0328 {
0329 int c;
0330
0331 spin_lock_irq(&dev->srq_table.lock);
0332 c = srq->refcount;
0333 spin_unlock_irq(&dev->srq_table.lock);
0334
0335 return c;
0336 }
0337
0338 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
0339 {
0340 struct mthca_mailbox *mailbox;
0341 int err;
0342
0343 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
0344 if (IS_ERR(mailbox)) {
0345 mthca_warn(dev, "No memory for mailbox to free SRQ.\n");
0346 return;
0347 }
0348
0349 err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn);
0350 if (err)
0351 mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
0352
0353 spin_lock_irq(&dev->srq_table.lock);
0354 mthca_array_clear(&dev->srq_table.srq,
0355 srq->srqn & (dev->limits.num_srqs - 1));
0356 --srq->refcount;
0357 spin_unlock_irq(&dev->srq_table.lock);
0358
0359 wait_event(srq->wait, !get_srq_refcount(dev, srq));
0360
0361 if (!srq->ibsrq.uobject) {
0362 mthca_free_srq_buf(dev, srq);
0363 if (mthca_is_memfree(dev))
0364 mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
0365 }
0366
0367 mthca_table_put(dev, dev->srq_table.table, srq->srqn);
0368 mthca_free(&dev->srq_table.alloc, srq->srqn);
0369 mthca_free_mailbox(dev, mailbox);
0370 }
0371
0372 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
0373 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
0374 {
0375 struct mthca_dev *dev = to_mdev(ibsrq->device);
0376 struct mthca_srq *srq = to_msrq(ibsrq);
0377 int ret = 0;
0378
0379
0380 if (attr_mask & IB_SRQ_MAX_WR)
0381 return -EINVAL;
0382
0383 if (attr_mask & IB_SRQ_LIMIT) {
0384 u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max;
0385 if (attr->srq_limit > max_wr)
0386 return -EINVAL;
0387
0388 mutex_lock(&srq->mutex);
0389 ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit);
0390 mutex_unlock(&srq->mutex);
0391 }
0392
0393 return ret;
0394 }
0395
0396 int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
0397 {
0398 struct mthca_dev *dev = to_mdev(ibsrq->device);
0399 struct mthca_srq *srq = to_msrq(ibsrq);
0400 struct mthca_mailbox *mailbox;
0401 struct mthca_arbel_srq_context *arbel_ctx;
0402 struct mthca_tavor_srq_context *tavor_ctx;
0403 int err;
0404
0405 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
0406 if (IS_ERR(mailbox))
0407 return PTR_ERR(mailbox);
0408
0409 err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox);
0410 if (err)
0411 goto out;
0412
0413 if (mthca_is_memfree(dev)) {
0414 arbel_ctx = mailbox->buf;
0415 srq_attr->srq_limit = be16_to_cpu(arbel_ctx->limit_watermark);
0416 } else {
0417 tavor_ctx = mailbox->buf;
0418 srq_attr->srq_limit = be16_to_cpu(tavor_ctx->limit_watermark);
0419 }
0420
0421 srq_attr->max_wr = srq->max - 1;
0422 srq_attr->max_sge = srq->max_gs;
0423
0424 out:
0425 mthca_free_mailbox(dev, mailbox);
0426
0427 return err;
0428 }
0429
0430 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
0431 enum ib_event_type event_type)
0432 {
0433 struct mthca_srq *srq;
0434 struct ib_event event;
0435
0436 spin_lock(&dev->srq_table.lock);
0437 srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1));
0438 if (srq)
0439 ++srq->refcount;
0440 spin_unlock(&dev->srq_table.lock);
0441
0442 if (!srq) {
0443 mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
0444 return;
0445 }
0446
0447 if (!srq->ibsrq.event_handler)
0448 goto out;
0449
0450 event.device = &dev->ib_dev;
0451 event.event = event_type;
0452 event.element.srq = &srq->ibsrq;
0453 srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
0454
0455 out:
0456 spin_lock(&dev->srq_table.lock);
0457 if (!--srq->refcount)
0458 wake_up(&srq->wait);
0459 spin_unlock(&dev->srq_table.lock);
0460 }
0461
0462
0463
0464
0465 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
0466 {
0467 int ind;
0468 struct mthca_next_seg *last_free;
0469
0470 ind = wqe_addr >> srq->wqe_shift;
0471
0472 spin_lock(&srq->lock);
0473
0474 last_free = get_wqe(srq, srq->last_free);
0475 *wqe_to_link(last_free) = ind;
0476 last_free->nda_op = htonl((ind << srq->wqe_shift) | 1);
0477 *wqe_to_link(get_wqe(srq, ind)) = -1;
0478 srq->last_free = ind;
0479
0480 spin_unlock(&srq->lock);
0481 }
0482
0483 int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
0484 const struct ib_recv_wr **bad_wr)
0485 {
0486 struct mthca_dev *dev = to_mdev(ibsrq->device);
0487 struct mthca_srq *srq = to_msrq(ibsrq);
0488 unsigned long flags;
0489 int err = 0;
0490 int first_ind;
0491 int ind;
0492 int next_ind;
0493 int nreq;
0494 int i;
0495 void *wqe;
0496 void *prev_wqe;
0497
0498 spin_lock_irqsave(&srq->lock, flags);
0499
0500 first_ind = srq->first_free;
0501
0502 for (nreq = 0; wr; wr = wr->next) {
0503 ind = srq->first_free;
0504 wqe = get_wqe(srq, ind);
0505 next_ind = *wqe_to_link(wqe);
0506
0507 if (unlikely(next_ind < 0)) {
0508 mthca_err(dev, "SRQ %06x full\n", srq->srqn);
0509 err = -ENOMEM;
0510 *bad_wr = wr;
0511 break;
0512 }
0513
0514 prev_wqe = srq->last;
0515 srq->last = wqe;
0516
0517 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
0518
0519
0520 wqe += sizeof (struct mthca_next_seg);
0521
0522 if (unlikely(wr->num_sge > srq->max_gs)) {
0523 err = -EINVAL;
0524 *bad_wr = wr;
0525 srq->last = prev_wqe;
0526 break;
0527 }
0528
0529 for (i = 0; i < wr->num_sge; ++i) {
0530 mthca_set_data_seg(wqe, wr->sg_list + i);
0531 wqe += sizeof (struct mthca_data_seg);
0532 }
0533
0534 if (i < srq->max_gs)
0535 mthca_set_data_seg_inval(wqe);
0536
0537 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
0538 cpu_to_be32(MTHCA_NEXT_DBD);
0539
0540 srq->wrid[ind] = wr->wr_id;
0541 srq->first_free = next_ind;
0542
0543 ++nreq;
0544 if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
0545 nreq = 0;
0546
0547
0548
0549
0550
0551 wmb();
0552
0553 mthca_write64(first_ind << srq->wqe_shift, srq->srqn << 8,
0554 dev->kar + MTHCA_RECEIVE_DOORBELL,
0555 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
0556
0557 first_ind = srq->first_free;
0558 }
0559 }
0560
0561 if (likely(nreq)) {
0562
0563
0564
0565
0566 wmb();
0567
0568 mthca_write64(first_ind << srq->wqe_shift, (srq->srqn << 8) | nreq,
0569 dev->kar + MTHCA_RECEIVE_DOORBELL,
0570 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
0571 }
0572
0573 spin_unlock_irqrestore(&srq->lock, flags);
0574 return err;
0575 }
0576
0577 int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
0578 const struct ib_recv_wr **bad_wr)
0579 {
0580 struct mthca_dev *dev = to_mdev(ibsrq->device);
0581 struct mthca_srq *srq = to_msrq(ibsrq);
0582 unsigned long flags;
0583 int err = 0;
0584 int ind;
0585 int next_ind;
0586 int nreq;
0587 int i;
0588 void *wqe;
0589
0590 spin_lock_irqsave(&srq->lock, flags);
0591
0592 for (nreq = 0; wr; ++nreq, wr = wr->next) {
0593 ind = srq->first_free;
0594 wqe = get_wqe(srq, ind);
0595 next_ind = *wqe_to_link(wqe);
0596
0597 if (unlikely(next_ind < 0)) {
0598 mthca_err(dev, "SRQ %06x full\n", srq->srqn);
0599 err = -ENOMEM;
0600 *bad_wr = wr;
0601 break;
0602 }
0603
0604 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
0605
0606
0607 wqe += sizeof (struct mthca_next_seg);
0608
0609 if (unlikely(wr->num_sge > srq->max_gs)) {
0610 err = -EINVAL;
0611 *bad_wr = wr;
0612 break;
0613 }
0614
0615 for (i = 0; i < wr->num_sge; ++i) {
0616 mthca_set_data_seg(wqe, wr->sg_list + i);
0617 wqe += sizeof (struct mthca_data_seg);
0618 }
0619
0620 if (i < srq->max_gs)
0621 mthca_set_data_seg_inval(wqe);
0622
0623 srq->wrid[ind] = wr->wr_id;
0624 srq->first_free = next_ind;
0625 }
0626
0627 if (likely(nreq)) {
0628 srq->counter += nreq;
0629
0630
0631
0632
0633
0634 wmb();
0635 *srq->db = cpu_to_be32(srq->counter);
0636 }
0637
0638 spin_unlock_irqrestore(&srq->lock, flags);
0639 return err;
0640 }
0641
0642 int mthca_max_srq_sge(struct mthca_dev *dev)
0643 {
0644 if (mthca_is_memfree(dev))
0645 return dev->limits.max_sg;
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661 return min_t(int, dev->limits.max_sg,
0662 ((1 << (fls(dev->limits.max_desc_sz) - 1)) -
0663 sizeof (struct mthca_next_seg)) /
0664 sizeof (struct mthca_data_seg));
0665 }
0666
0667 int mthca_init_srq_table(struct mthca_dev *dev)
0668 {
0669 int err;
0670
0671 if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
0672 return 0;
0673
0674 spin_lock_init(&dev->srq_table.lock);
0675
0676 err = mthca_alloc_init(&dev->srq_table.alloc,
0677 dev->limits.num_srqs,
0678 dev->limits.num_srqs - 1,
0679 dev->limits.reserved_srqs);
0680 if (err)
0681 return err;
0682
0683 err = mthca_array_init(&dev->srq_table.srq,
0684 dev->limits.num_srqs);
0685 if (err)
0686 mthca_alloc_cleanup(&dev->srq_table.alloc);
0687
0688 return err;
0689 }
0690
0691 void mthca_cleanup_srq_table(struct mthca_dev *dev)
0692 {
0693 if (!(dev->mthca_flags & MTHCA_FLAG_SRQ))
0694 return;
0695
0696 mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs);
0697 mthca_alloc_cleanup(&dev->srq_table.alloc);
0698 }